Last active
February 6, 2026 17:17
-
-
Save dpritchett/1619c136a68c4df8ae40f99bb1e34436 to your computer and use it in GitHub Desktop.
Cypher vs Datalog comparison for graph queries
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // ============================================================================ | |
| // Cypher Example: Analyzing ~/bin script dependencies | |
| // ============================================================================ | |
| // Cypher is a declarative graph query language. It uses ASCII-art patterns | |
| // to describe the shape of data you're looking for in the graph. | |
| // | |
| // Core concepts: | |
| // - Nodes: (variable:Label {property: value}) | |
| // - Edges: -[:RELATIONSHIP_TYPE]-> | |
| // - Patterns: Chain nodes and edges together visually | |
| // ============================================================================ | |
| // ---------------------------------------------------------------------------- | |
| // SCHEMA SETUP | |
| // ---------------------------------------------------------------------------- | |
| // Create node types (like tables in SQL) | |
| CREATE NODE TABLE Script( | |
| name STRING PRIMARY KEY, | |
| usage INT, -- times invoked from shell history | |
| refs INT, -- times referenced by other scripts | |
| lines INT, -- lines of code | |
| last_modified DATE | |
| ); | |
| CREATE NODE TABLE Author( | |
| name STRING PRIMARY KEY, | |
| email STRING | |
| ); | |
| // Create edge types (relationships between nodes) | |
| CREATE REL TABLE CALLS(FROM Script TO Script); | |
| CREATE REL TABLE WROTE(FROM Author TO Script, committed_at DATE); | |
| // ---------------------------------------------------------------------------- | |
| // DATA INSERTION | |
| // ---------------------------------------------------------------------------- | |
| // Add some scripts | |
| CREATE (s:Script {name: 'pink', usage: 9, refs: 3, lines: 15}); | |
| CREATE (s:Script {name: 'beep', usage: 9, refs: 3, lines: 22}); | |
| CREATE (s:Script {name: 'growl', usage: 7, refs: 3, lines: 45}); | |
| CREATE (s:Script {name: 'explore-image', usage: 89, refs: 0, lines: 120}); | |
| CREATE (s:Script {name: 'mkbin', usage: 77, refs: 108, lines: 35}); | |
| CREATE (s:Script {name: 'attend', usage: 0, refs: 0, lines: 25}); | |
| // Add relationships | |
| MATCH (a:Script {name: 'explore-image'}), (b:Script {name: 'pink'}) | |
| CREATE (a)-[:CALLS]->(b); | |
| MATCH (a:Script {name: 'explore-image'}), (b:Script {name: 'beep'}) | |
| CREATE (a)-[:CALLS]->(b); | |
| MATCH (a:Script {name: 'beep'}), (b:Script {name: 'growl'}) | |
| CREATE (a)-[:CALLS]->(b); | |
| // ---------------------------------------------------------------------------- | |
| // BASIC QUERIES | |
| // ---------------------------------------------------------------------------- | |
| // Find all scripts with their usage counts | |
| MATCH (s:Script) | |
| RETURN s.name, s.usage | |
| ORDER BY s.usage DESC; | |
| // Find scripts that call pink directly | |
| MATCH (caller:Script)-[:CALLS]->(callee:Script {name: 'pink'}) | |
| RETURN caller.name AS depends_on_pink; | |
| // Find scripts that nothing depends on (leaf nodes) | |
| MATCH (s:Script) | |
| WHERE NOT ()-[:CALLS]->(s) | |
| RETURN s.name AS standalone_scripts; | |
| // ---------------------------------------------------------------------------- | |
| // PATTERN MATCHING - The core of Cypher | |
| // ---------------------------------------------------------------------------- | |
| // Two-hop dependencies: A calls B calls C | |
| MATCH (a:Script)-[:CALLS]->(b:Script)-[:CALLS]->(c:Script) | |
| RETURN a.name AS top, | |
| b.name AS middle, | |
| c.name AS bottom; | |
| // Find triangles (circular dependencies of length 3) | |
| MATCH (a:Script)-[:CALLS]->(b:Script)-[:CALLS]->(c:Script)-[:CALLS]->(a) | |
| RETURN a.name, b.name, c.name; | |
| // Variable-length paths: find ALL dependencies of explore-image | |
| // The * means "any number of hops" | |
| MATCH (root:Script {name: 'explore-image'})-[:CALLS*]->(dep:Script) | |
| RETURN DISTINCT dep.name AS transitive_dependency; | |
| // Bounded depth: dependencies 1-3 hops away | |
| MATCH (root:Script {name: 'explore-image'})-[:CALLS*1..3]->(dep:Script) | |
| RETURN DISTINCT dep.name; | |
| // ---------------------------------------------------------------------------- | |
| // FILTERING AND AGGREGATION | |
| // ---------------------------------------------------------------------------- | |
| // Scripts with high usage but nothing depends on them | |
| // These are "end-user" scripts, not utilities | |
| MATCH (s:Script) | |
| WHERE s.usage > 10 | |
| AND NOT ()-[:CALLS]->(s) | |
| RETURN s.name, s.usage | |
| ORDER BY s.usage DESC; | |
| // Dead code candidates: no usage AND nothing calls them | |
| MATCH (s:Script) | |
| WHERE s.usage = 0 | |
| AND s.refs = 0 | |
| AND NOT ()-[:CALLS]->(s) | |
| RETURN s.name AS deletion_candidate; | |
| // Count dependencies per script | |
| MATCH (s:Script) | |
| OPTIONAL MATCH (s)-[:CALLS]->(dep:Script) | |
| RETURN s.name, | |
| COUNT(dep) AS dependency_count | |
| ORDER BY dependency_count DESC; | |
| // Find the most "central" utility scripts | |
| // (called by many things) | |
| MATCH (caller:Script)-[:CALLS]->(utility:Script) | |
| RETURN utility.name, | |
| COUNT(caller) AS incoming_calls | |
| ORDER BY incoming_calls DESC | |
| LIMIT 10; | |
| // ---------------------------------------------------------------------------- | |
| // PATH ANALYSIS | |
| // ---------------------------------------------------------------------------- | |
| // Shortest path between two scripts | |
| MATCH path = shortestPath( | |
| (a:Script {name: 'explore-image'})-[:CALLS*]-(b:Script {name: 'growl'}) | |
| ) | |
| RETURN path; | |
| // All paths up to length 4 | |
| MATCH path = (a:Script {name: 'explore-image'})-[:CALLS*..4]->(b:Script) | |
| RETURN path; | |
| // What breaks if we delete beep? | |
| // Find everything that transitively depends on it | |
| MATCH (dependent:Script)-[:CALLS*]->(target:Script {name: 'beep'}) | |
| RETURN DISTINCT dependent.name AS would_break; | |
| // ---------------------------------------------------------------------------- | |
| // UPDATES AND MAINTENANCE | |
| // ---------------------------------------------------------------------------- | |
| // Update usage count after new audit | |
| MATCH (s:Script {name: 'attend'}) | |
| SET s.usage = 5, s.last_modified = date('2026-02-05'); | |
| // Add a new dependency we discovered | |
| MATCH (a:Script {name: 'attend'}), (b:Script {name: 'pink'}) | |
| MERGE (a)-[:CALLS]->(b); | |
| // Remove a script and all its relationships | |
| MATCH (s:Script {name: 'old-unused-script'}) | |
| DETACH DELETE s; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| % ============================================================================ | |
| % Datalog Example: Analyzing ~/bin script dependencies | |
| % ============================================================================ | |
| % Datalog is a declarative logic programming language. Queries are expressed | |
| % as logical rules: "this is true if these conditions hold." | |
| % | |
| % Core concepts: | |
| % - Facts: Base truths, like rows in a table | |
| % - Rules: Derived truths, defined by conditions | |
| % - Queries: Questions that find matching facts | |
| % | |
| % Syntax: | |
| % - head :- body. "head is true if body is true" | |
| % - ?[vars] := body. "find all vars where body is true" | |
| % - Comma means AND | |
| % - not means negation | |
| % ============================================================================ | |
| % ---------------------------------------------------------------------------- | |
| % SCHEMA DEFINITION (CozoDB syntax) | |
| % ---------------------------------------------------------------------------- | |
| % Define stored relations (like tables) | |
| :create script { | |
| name: String, % primary key | |
| => % separator between key and value columns | |
| usage: Int, % times invoked from shell history | |
| refs: Int, % times referenced by other scripts | |
| lines: Int % lines of code | |
| } | |
| :create calls { | |
| caller: String, | |
| callee: String | |
| } | |
| :create author { | |
| name: String, | |
| => | |
| email: String | |
| } | |
| :create wrote { | |
| author: String, | |
| script: String, | |
| => | |
| committed_at: String | |
| } | |
| % ---------------------------------------------------------------------------- | |
| % DATA INSERTION | |
| % ---------------------------------------------------------------------------- | |
| ?[name, usage, refs, lines] <- [ | |
| ["pink", 9, 3, 15], | |
| ["beep", 9, 3, 22], | |
| ["growl", 7, 3, 45], | |
| ["explore-image", 89, 0, 120], | |
| ["mkbin", 77, 108, 35], | |
| ["attend", 0, 0, 25] | |
| ] | |
| :put script {name, usage, refs, lines} | |
| ?[caller, callee] <- [ | |
| ["explore-image", "pink"], | |
| ["explore-image", "beep"], | |
| ["beep", "growl"] | |
| ] | |
| :put calls {caller, callee} | |
| % ---------------------------------------------------------------------------- | |
| % BASIC QUERIES | |
| % ---------------------------------------------------------------------------- | |
| % Find all scripts with their usage counts, sorted | |
| ?[name, usage] := *script{name, usage} | |
| :order -usage | |
| % Find scripts that call pink directly | |
| ?[caller] := *calls{caller, callee}, callee == "pink" | |
| % Find scripts that nothing depends on (leaf nodes) | |
| ?[name] := *script{name}, not *calls{callee: name} | |
| % ---------------------------------------------------------------------------- | |
| % RULES - The power of Datalog | |
| % ---------------------------------------------------------------------------- | |
| % Define transitive dependency as a recursive rule | |
| % "A depends on B if A calls B" | |
| depends[a, b] := *calls{caller: a, callee: b} | |
| % "A depends on C if A calls B and B depends on C" | |
| % This is the recursive case - follows the chain | |
| depends[a, c] := *calls{caller: a, callee: b}, depends[b, c] | |
| % Now we can query transitive dependencies easily | |
| ?[dep] := depends["explore-image", dep] | |
| % Find everything that depends on beep (would break if deleted) | |
| ?[script] := depends[script, "beep"] | |
| % ---------------------------------------------------------------------------- | |
| % NEGATION AND FILTERING | |
| % ---------------------------------------------------------------------------- | |
| % Scripts with high usage but nothing depends on them | |
| % These are "end-user" scripts, not utilities | |
| ?[name, usage] := | |
| *script{name, usage}, | |
| usage > 10, | |
| not *calls{callee: name} | |
| :order -usage | |
| % Dead code candidates: no usage AND nothing calls them | |
| ?[name] := | |
| *script{name, usage, refs}, | |
| usage == 0, | |
| refs == 0, | |
| not *calls{callee: name} | |
| % Scripts that ARE called by something | |
| ?[name] := *script{name}, *calls{callee: name} | |
| % ---------------------------------------------------------------------------- | |
| % AGGREGATION | |
| % ---------------------------------------------------------------------------- | |
| % Count how many things each script calls | |
| ?[name, count(callee)] := | |
| *script{name}, | |
| *calls{caller: name, callee} | |
| % Find the most "central" utility scripts (called by many things) | |
| ?[callee, count(caller)] := *calls{caller, callee} | |
| :order -count(caller) | |
| :limit 10 | |
| % Total lines of code across all scripts | |
| ?[total] := total = sum(lines), *script{lines} | |
| % ---------------------------------------------------------------------------- | |
| % PATH QUERIES WITH RECURSION | |
| % ---------------------------------------------------------------------------- | |
| % Define "reachable" - can we get from A to B at all? | |
| reachable[a, b] := *calls{caller: a, callee: b} | |
| reachable[a, c] := *calls{caller: a, callee: b}, reachable[b, c] | |
| % Is growl reachable from explore-image? | |
| ?[answer] := | |
| answer = if(reachable["explore-image", "growl"], "yes", "no") | |
| % Find all scripts reachable from explore-image | |
| ?[target] := reachable["explore-image", target] | |
| % Define path length (distance) | |
| dist[a, b, 1] := *calls{caller: a, callee: b} | |
| dist[a, c, n] := *calls{caller: a, callee: b}, dist[b, c, m], n = m + 1 | |
| % Shortest distance to each reachable script | |
| ?[target, min(d)] := dist["explore-image", target, d] | |
| % ---------------------------------------------------------------------------- | |
| % COMPLEX ANALYSIS | |
| % ---------------------------------------------------------------------------- | |
| % Find circular dependencies (A -> ... -> A) | |
| circular[a] := depends[a, a] | |
| ?[script] := circular[script] | |
| % Scripts that form a "core" - they both use and are used by others | |
| ?[name] := | |
| *script{name}, | |
| *calls{caller: name}, % calls something | |
| *calls{callee: name} % something calls it | |
| % "Importance score" - usage + refs + things that depend on it | |
| importance[name, score] := | |
| *script{name, usage, refs}, | |
| dep_count = count(dependent), | |
| *calls{callee: name} or not *calls{callee: name}, % handle zero case | |
| score = usage + refs + dep_count | |
| ?[name, score] := importance[name, score] | |
| :order -score | |
| % ---------------------------------------------------------------------------- | |
| % COMPOSABLE RULES - Build up vocabulary | |
| % ---------------------------------------------------------------------------- | |
| % Low-level facts | |
| is_utility[s] := *calls{callee: s} | |
| is_standalone[s] := *script{name: s}, not is_utility[s] | |
| is_unused[s] := *script{name: s, usage: 0} | |
| is_unreferenced[s] := *script{name: s, refs: 0} | |
| % Composed predicates | |
| safe_to_delete[s] := is_unused[s], is_unreferenced[s], is_standalone[s] | |
| high_value_utility[s] := is_utility[s], *script{name: s, usage}, usage > 5 | |
| % Now queries read like English | |
| ?[s] := safe_to_delete[s] | |
| ?[s] := high_value_utility[s] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| % ============================================================================ | |
| % Datalog Example: Analyzing ~/bin script dependencies | |
| % ============================================================================ | |
| % Datalog is a declarative logic programming language. Queries are expressed | |
| % as logical rules: "this is true if these conditions hold." | |
| % | |
| % Core concepts: | |
| % - Facts: Base truths, like rows in a table | |
| % - Rules: Derived truths, defined by conditions | |
| % - Queries: Questions that find matching facts | |
| % | |
| % Syntax: | |
| % - head :- body. "head is true if body is true" | |
| % - ?[vars] := body. "find all vars where body is true" | |
| % - Comma means AND | |
| % - not means negation | |
| % ============================================================================ | |
| % ---------------------------------------------------------------------------- | |
| % SCHEMA DEFINITION (CozoDB syntax) | |
| % ---------------------------------------------------------------------------- | |
| % Define stored relations (like tables) | |
| :create script { | |
| name: String, % primary key | |
| => % separator between key and value columns | |
| usage: Int, % times invoked from shell history | |
| refs: Int, % times referenced by other scripts | |
| lines: Int % lines of code | |
| } | |
| :create calls { | |
| caller: String, | |
| callee: String | |
| } | |
| :create author { | |
| name: String, | |
| => | |
| email: String | |
| } | |
| :create wrote { | |
| author: String, | |
| script: String, | |
| => | |
| committed_at: String | |
| } | |
| % ---------------------------------------------------------------------------- | |
| % DATA INSERTION | |
| % ---------------------------------------------------------------------------- | |
| ?[name, usage, refs, lines] <- [ | |
| ["pink", 9, 3, 15], | |
| ["beep", 9, 3, 22], | |
| ["growl", 7, 3, 45], | |
| ["explore-image", 89, 0, 120], | |
| ["mkbin", 77, 108, 35], | |
| ["attend", 0, 0, 25] | |
| ] | |
| :put script {name, usage, refs, lines} | |
| ?[caller, callee] <- [ | |
| ["explore-image", "pink"], | |
| ["explore-image", "beep"], | |
| ["beep", "growl"] | |
| ] | |
| :put calls {caller, callee} | |
| % ---------------------------------------------------------------------------- | |
| % BASIC QUERIES | |
| % ---------------------------------------------------------------------------- | |
| % Find all scripts with their usage counts, sorted | |
| ?[name, usage] := *script{name, usage} | |
| :order -usage | |
| % Find scripts that call pink directly | |
| ?[caller] := *calls{caller, callee}, callee == "pink" | |
| % Find scripts that nothing depends on (leaf nodes) | |
| ?[name] := *script{name}, not *calls{callee: name} | |
| % ---------------------------------------------------------------------------- | |
| % RULES - The power of Datalog | |
| % ---------------------------------------------------------------------------- | |
| % Define transitive dependency as a recursive rule | |
| % "A depends on B if A calls B" | |
| depends[a, b] := *calls{caller: a, callee: b} | |
| % "A depends on C if A calls B and B depends on C" | |
| % This is the recursive case - follows the chain | |
| depends[a, c] := *calls{caller: a, callee: b}, depends[b, c] | |
| % Now we can query transitive dependencies easily | |
| ?[dep] := depends["explore-image", dep] | |
| % Find everything that depends on beep (would break if deleted) | |
| ?[script] := depends[script, "beep"] | |
| % ---------------------------------------------------------------------------- | |
| % NEGATION AND FILTERING | |
| % ---------------------------------------------------------------------------- | |
| % Scripts with high usage but nothing depends on them | |
| % These are "end-user" scripts, not utilities | |
| ?[name, usage] := | |
| *script{name, usage}, | |
| usage > 10, | |
| not *calls{callee: name} | |
| :order -usage | |
| % Dead code candidates: no usage AND nothing calls them | |
| ?[name] := | |
| *script{name, usage, refs}, | |
| usage == 0, | |
| refs == 0, | |
| not *calls{callee: name} | |
| % Scripts that ARE called by something | |
| ?[name] := *script{name}, *calls{callee: name} | |
| % ---------------------------------------------------------------------------- | |
| % AGGREGATION | |
| % ---------------------------------------------------------------------------- | |
| % Count how many things each script calls | |
| ?[name, count(callee)] := | |
| *script{name}, | |
| *calls{caller: name, callee} | |
| % Find the most "central" utility scripts (called by many things) | |
| ?[callee, count(caller)] := *calls{caller, callee} | |
| :order -count(caller) | |
| :limit 10 | |
| % Total lines of code across all scripts | |
| ?[total] := total = sum(lines), *script{lines} | |
| % ---------------------------------------------------------------------------- | |
| % PATH QUERIES WITH RECURSION | |
| % ---------------------------------------------------------------------------- | |
| % Define "reachable" - can we get from A to B at all? | |
| reachable[a, b] := *calls{caller: a, callee: b} | |
| reachable[a, c] := *calls{caller: a, callee: b}, reachable[b, c] | |
| % Is growl reachable from explore-image? | |
| ?[answer] := | |
| answer = if(reachable["explore-image", "growl"], "yes", "no") | |
| % Find all scripts reachable from explore-image | |
| ?[target] := reachable["explore-image", target] | |
| % Define path length (distance) | |
| dist[a, b, 1] := *calls{caller: a, callee: b} | |
| dist[a, c, n] := *calls{caller: a, callee: b}, dist[b, c, m], n = m + 1 | |
| % Shortest distance to each reachable script | |
| ?[target, min(d)] := dist["explore-image", target, d] | |
| % ---------------------------------------------------------------------------- | |
| % COMPLEX ANALYSIS | |
| % ---------------------------------------------------------------------------- | |
| % Find circular dependencies (A -> ... -> A) | |
| circular[a] := depends[a, a] | |
| ?[script] := circular[script] | |
| % Scripts that form a "core" - they both use and are used by others | |
| ?[name] := | |
| *script{name}, | |
| *calls{caller: name}, % calls something | |
| *calls{callee: name} % something calls it | |
| % "Importance score" - usage + refs + things that depend on it | |
| importance[name, score] := | |
| *script{name, usage, refs}, | |
| dep_count = count(dependent), | |
| *calls{callee: name} or not *calls{callee: name}, % handle zero case | |
| score = usage + refs + dep_count | |
| ?[name, score] := importance[name, score] | |
| :order -score | |
| % ---------------------------------------------------------------------------- | |
| % COMPOSABLE RULES - Build up vocabulary | |
| % ---------------------------------------------------------------------------- | |
| % Low-level facts | |
| is_utility[s] := *calls{callee: s} | |
| is_standalone[s] := *script{name: s}, not is_utility[s] | |
| is_unused[s] := *script{name: s, usage: 0} | |
| is_unreferenced[s] := *script{name: s, refs: 0} | |
| % Composed predicates | |
| safe_to_delete[s] := is_unused[s], is_unreferenced[s], is_standalone[s] | |
| high_value_utility[s] := is_utility[s], *script{name: s, usage}, usage > 5 | |
| % Now queries read like English | |
| ?[s] := safe_to_delete[s] | |
| ?[s] := high_value_utility[s] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment