Created
October 5, 2023 17:42
Revisions
-
ImaginaryDevelopment created this gist
Oct 5, 2023 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,73 @@ // walk two sets of data remove overlap let toLower (x:string) = x.ToLowerInvariant() let afterLast (delimiter:string) (value:string) = value[value.LastIndexOf(delimiter) + 1 ..] let photos = File.ReadAllLines(@"C:\Users\B\Documents\lancephotos.csv") |> Array.skip 2 |> Array.map (fun v -> v.Trim '"' |> toLower) |> Array.truncate 1_000 let students = File.ReadAllLines(@"C:\Users\B\Documents\lancestudents.csv") |> Array.skip 2 |> Array.map (fun v -> v.Trim '"' |> toLower |> afterLast "\\") // |> Array.truncate 5 //(photos,students).Dump() let msToSeconds (ms: int64) = let seconds = ( ms / int64 1000) seconds // raw data had quotes in it, I didn't read it as a csv so the data was bad (photos.Any(fun photo -> photo.Contains "\""), students.Any(fun s -> s.Contains "\"")).Dump() printfn "%i photos, %i students" photos.Length students.Length let maxI = photos.Length let commaChameleon (x:float) = x.ToString("N0") // andMyFriends = total ms elapsed let rateMe (andMyFriends:int64) i v = let seconds = msToSeconds andMyFriends let rate = if seconds > 0 then int64 i / seconds else 0 sprintf "Finished %s(%.2f%%) %A per second in %i seconds" (commaChameleon v) (float v / float maxI) rate seconds let genericComparer (title:string) fStudents fPredicate = let timer = System.Diagnostics.Stopwatch.StartNew() let mutable i = 0 let dumpProgress = let dc = DumpContainer() dc.Dump(title) fun (v:int) -> dc.Content <- rateMe timer.ElapsedMilliseconds i v let students = fStudents students let items = photos |> Seq.filter(fun photo -> if i % 500 = 0 then Util.Progress <- i * 100 / maxI dumpProgress i i <- i + 1 fPredicate students photo ) |> Array.ofSeq timer.Stop() dumpProgress i title, timer.ElapsedMilliseconds, items.Length let hashContains () = genericComparer "hashC" Set.ofArray (fun studs photo -> studs |> Set.exists(fun student -> student.Contains photo) |> not) let hash2 () = genericComparer "hash2" Set.ofArray (fun studs photo -> studs |> Set.contains photo |> not) let dic () = genericComparer "dic" (fun students -> students |> Seq.map(fun student -> student, student) |> Map.ofSeq) (fun studs photo -> studs |> Map.containsKey photo |> not) [ //seqVersion //seqVersion2 //hashVersion hashContains hash2 dic ] |> List.map (fun f -> async { return f()}) |> Async.Parallel |> Async.RunSynchronously