Last active
December 3, 2024 14:58
-
-
Save TheAngryByrd/caa24856e0bae16e36b55c709682fd7e to your computer and use it in GitHub Desktop.
f# graph based parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open System | |
open System.IO | |
open System.Collections.Generic | |
let root = __SOURCE_DIRECTORY__ | |
let srcDir = Path.Combine(root, "src") | |
// Add <OtherFlags>$(OtherFlags) --test:GraphBasedChecking --test:DumpCheckingGraph --times:timings.csv</OtherFlags> to the .fsproj or Directory.Build.props | |
let allTimingsFiles = | |
Directory.GetFiles(srcDir, "timings.csv", SearchOption.AllDirectories) | |
let allGraphFiles = | |
Directory.GetFiles(srcDir, "*.graph.md", SearchOption.AllDirectories) | |
// --- Parse timings files | |
let checkLines = "CheckOneImplFile" | |
// Name,StartTime,EndTime,Duration(s),Id,ParentId,RootId,fileName,project,qualifiedNameOfFile,userOpName,length,cache,cpuDelta(s),realDelta(s),gc0,gc1,gc2,outputDllFile,buildPhase | |
type Timings = | |
{ | |
Name: string | |
StartTime: string | |
EndTime: string | |
Duration: float | |
Id: string | |
ParentId: string | |
RootId: string | |
FileName: string | |
Project: string | |
QualifiedNameOfFile: string | |
TimingsFile: string | |
} | |
static member Parse(timingsFile, line: string) = | |
let parts = line.Split(',') | |
{ | |
Name = parts.[0] | |
StartTime = parts.[1] | |
EndTime = parts.[2] | |
Duration = float parts.[3] | |
Id = parts.[4] | |
ParentId = parts.[5] | |
RootId = parts.[6] | |
FileName = parts.[7].Trim('"') | |
Project = parts.[8] | |
QualifiedNameOfFile = parts.[9] | |
TimingsFile = timingsFile | |
} | |
let allCheckTimings = | |
allTimingsFiles | |
|> Array.map (fun x -> (x, File.ReadAllLines(x))) | |
|> Array.collect (fun (name, lines) -> | |
lines | |
|> Array.filter (fun x -> x.Contains(checkLines)) | |
|> Array.map (fun x -> (name, x)) | |
) | |
|> Array.map (Timings.Parse) | |
allCheckTimings | |
|> Array.sortByDescending (fun x -> x.Duration) | |
|> Array.iter (fun x -> printfn "%s %f - %s" x.FileName x.Duration x.TimingsFile) | |
// --- Parse graph files | |
allGraphFiles | |
|> Seq.head | |
type Node = | |
{ | |
GraphFile: string | |
Index: int | |
FileName: string | |
Dependents: Set<int> | |
Dependencies: Set<int> | |
} | |
member x.Summary() = | |
printfn | |
$"{x.Index} - {x.FileName} - Dependencies:{x.Dependencies.Count} - Dependents:{x.Dependents.Count}" | |
member x.Details(allNodes) = | |
printfn | |
$"{x.Index} - {x.FileName} - Dependencies:{x.Dependencies.Count} - Dependents:{x.Dependents.Count}" | |
printfn "Dependencies" | |
x.Dependencies | |
|> Set.iter (fun x -> printfn $" {x} - {allNodes[x]}") | |
printfn "Dependents" | |
x.Dependents | |
|> Set.iter (fun x -> printfn $" {x} - {allNodes[x]}") | |
let indexToNameRegex = | |
System.Text.RegularExpressions.Regex(@"(?<index>\d+)\[(?<filename>.*)\]") | |
let edgesRegex = | |
System.Text.RegularExpressions.Regex(@"(?<index>\d+)\s-->\s(?<dependency>\d+)") | |
let contains (t: string) (x: string) = x.Contains t | |
// let badFiles = [ | |
// contains "AssemblyAttributes.fs" | |
// contains "AssemblyInfo.fs" | |
// ] | |
let nodesPerFile = | |
allGraphFiles | |
|> Array.map (fun x -> (x, File.ReadAllLines(x))) | |
|> Array.map (fun (graphFile, lines) -> | |
let nodes = | |
lines | |
|> Array.map (fun x -> indexToNameRegex.Match(x)) | |
|> Array.choose (fun m -> | |
let filename = m.Groups.["filename"].Value.Trim('"') | |
let index = m.Groups.["index"].Value | |
// if badFiles |> Seq.exists(fun f -> f filename) then | |
// None | |
// else | |
try | |
let index = int index | |
Some( | |
index, | |
{ | |
GraphFile = graphFile | |
Index = int index | |
FileName = filename | |
Dependents = Set.empty | |
Dependencies = Set.empty | |
} | |
) | |
with e -> | |
// eprintfn "Error %s - %s" e.Message index | |
None | |
) | |
|> Map.ofArray | |
let edges = | |
lines | |
|> Array.map (fun x -> edgesRegex.Match(x)) | |
|> Array.choose (fun m -> | |
let index = m.Groups.["index"].Value | |
let dependency = m.Groups.["dependency"].Value | |
try | |
(int index, int dependency) | |
|> Some | |
with e -> | |
// eprintfn "Error %s - %s" e.Message source | |
None | |
) | |
(nodes, edges) | |
||> Array.fold (fun state (index, dependency) -> | |
let node = Map.tryFind index state | |
let dependencyNode = Map.tryFind dependency state | |
match node, dependencyNode with | |
| Some node, Some dependencyNode -> | |
let node = | |
{ node with | |
Dependencies = Set.add dependency node.Dependencies | |
} | |
let dependencyNode = | |
{ dependencyNode with | |
Dependents = Set.add index dependencyNode.Dependents | |
} | |
state | |
|> Map.add index node | |
|> Map.add dependency dependencyNode | |
| _ -> state | |
) | |
) | |
let topologicalSort = | |
nodesPerFile | |
|> Array.map (fun nodes -> | |
let stack = Stack<Node>() | |
let rec visit node = | |
if | |
not ( | |
stack | |
|> Seq.exists (fun n -> n.Index = node.Index) | |
) | |
then | |
stack.Push node | |
node.Dependents | |
|> Seq.iter (fun x -> visit (Map.find x nodes)) | |
let nodesWithNoDependencies = | |
nodes | |
|> Seq.filter (fun (KeyValue(_, x)) -> x.Dependencies.Count = 0) | |
|> Seq.map (fun (KeyValue(_, x)) -> x) | |
nodesWithNoDependencies | |
|> Seq.iter visit | |
stack | |
|> Seq.toList | |
) | |
// --- Determine best files for signatures | |
let firstFileNodes = | |
topologicalSort | |
|> Seq.head | |
|> Seq.rev | |
// |> Seq.head | |
// |> Map.find 4 | |
firstFileNodes | |
|> Seq.length | |
for node in firstFileNodes do | |
node.GraphFile | |
|> printfn "%s" | |
node.Summary() | |
// let timing = | |
// allCheckTimings | |
// |> List.filter(fun timing -> | |
// let result = timing.FileName.ToLower().Contains (node.FileName.ToLower()) | |
// // printfn $"{timing.FileName.ToLower()} = {node.FileName.ToLower()} -> {result}" | |
// result | |
// ) | |
// timing |> List.iter (fun x -> x.Duration |> printfn "%f") | |
// Notes | |
// 1. Largest path of the tree | |
// 2.Having a lot of dependent files | |
// 3. Compare timings to detect bottlenecks (run without graph and with graph) | |
// for example if a file is taking longer in graph, there are bottlenecks getting to it | |
// 4. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment