Skip to content

Instantly share code, notes, and snippets.

@dpritchett
Last active February 6, 2026 17:17
Show Gist options
  • Select an option

  • Save dpritchett/1619c136a68c4df8ae40f99bb1e34436 to your computer and use it in GitHub Desktop.

Select an option

Save dpritchett/1619c136a68c4df8ae40f99bb1e34436 to your computer and use it in GitHub Desktop.
Cypher vs Datalog comparison for graph queries
// ============================================================================
// Cypher Example: Analyzing ~/bin script dependencies
// ============================================================================
// Cypher is a declarative graph query language. It uses ASCII-art patterns
// to describe the shape of data you're looking for in the graph.
//
// Core concepts:
// - Nodes: (variable:Label {property: value})
// - Edges: -[:RELATIONSHIP_TYPE]->
// - Patterns: Chain nodes and edges together visually
// ============================================================================
// ----------------------------------------------------------------------------
// SCHEMA SETUP
// ----------------------------------------------------------------------------
// Create node types (like tables in SQL)
CREATE NODE TABLE Script(
name STRING PRIMARY KEY,
usage INT, -- times invoked from shell history
refs INT, -- times referenced by other scripts
lines INT, -- lines of code
last_modified DATE
);
CREATE NODE TABLE Author(
name STRING PRIMARY KEY,
email STRING
);
// Create edge types (relationships between nodes)
CREATE REL TABLE CALLS(FROM Script TO Script);
CREATE REL TABLE WROTE(FROM Author TO Script, committed_at DATE);
// ----------------------------------------------------------------------------
// DATA INSERTION
// ----------------------------------------------------------------------------
// Add some scripts
CREATE (s:Script {name: 'pink', usage: 9, refs: 3, lines: 15});
CREATE (s:Script {name: 'beep', usage: 9, refs: 3, lines: 22});
CREATE (s:Script {name: 'growl', usage: 7, refs: 3, lines: 45});
CREATE (s:Script {name: 'explore-image', usage: 89, refs: 0, lines: 120});
CREATE (s:Script {name: 'mkbin', usage: 77, refs: 108, lines: 35});
CREATE (s:Script {name: 'attend', usage: 0, refs: 0, lines: 25});
// Add relationships
MATCH (a:Script {name: 'explore-image'}), (b:Script {name: 'pink'})
CREATE (a)-[:CALLS]->(b);
MATCH (a:Script {name: 'explore-image'}), (b:Script {name: 'beep'})
CREATE (a)-[:CALLS]->(b);
MATCH (a:Script {name: 'beep'}), (b:Script {name: 'growl'})
CREATE (a)-[:CALLS]->(b);
// ----------------------------------------------------------------------------
// BASIC QUERIES
// ----------------------------------------------------------------------------
// Find all scripts with their usage counts
MATCH (s:Script)
RETURN s.name, s.usage
ORDER BY s.usage DESC;
// Find scripts that call pink directly
MATCH (caller:Script)-[:CALLS]->(callee:Script {name: 'pink'})
RETURN caller.name AS depends_on_pink;
// Find scripts that nothing depends on (leaf nodes)
MATCH (s:Script)
WHERE NOT ()-[:CALLS]->(s)
RETURN s.name AS standalone_scripts;
// ----------------------------------------------------------------------------
// PATTERN MATCHING - The core of Cypher
// ----------------------------------------------------------------------------
// Two-hop dependencies: A calls B calls C
MATCH (a:Script)-[:CALLS]->(b:Script)-[:CALLS]->(c:Script)
RETURN a.name AS top,
b.name AS middle,
c.name AS bottom;
// Find triangles (circular dependencies of length 3)
MATCH (a:Script)-[:CALLS]->(b:Script)-[:CALLS]->(c:Script)-[:CALLS]->(a)
RETURN a.name, b.name, c.name;
// Variable-length paths: find ALL dependencies of explore-image
// The * means "any number of hops"
MATCH (root:Script {name: 'explore-image'})-[:CALLS*]->(dep:Script)
RETURN DISTINCT dep.name AS transitive_dependency;
// Bounded depth: dependencies 1-3 hops away
MATCH (root:Script {name: 'explore-image'})-[:CALLS*1..3]->(dep:Script)
RETURN DISTINCT dep.name;
// ----------------------------------------------------------------------------
// FILTERING AND AGGREGATION
// ----------------------------------------------------------------------------
// Scripts with high usage but nothing depends on them
// These are "end-user" scripts, not utilities
MATCH (s:Script)
WHERE s.usage > 10
AND NOT ()-[:CALLS]->(s)
RETURN s.name, s.usage
ORDER BY s.usage DESC;
// Dead code candidates: no usage AND nothing calls them
MATCH (s:Script)
WHERE s.usage = 0
AND s.refs = 0
AND NOT ()-[:CALLS]->(s)
RETURN s.name AS deletion_candidate;
// Count dependencies per script
MATCH (s:Script)
OPTIONAL MATCH (s)-[:CALLS]->(dep:Script)
RETURN s.name,
COUNT(dep) AS dependency_count
ORDER BY dependency_count DESC;
// Find the most "central" utility scripts
// (called by many things)
MATCH (caller:Script)-[:CALLS]->(utility:Script)
RETURN utility.name,
COUNT(caller) AS incoming_calls
ORDER BY incoming_calls DESC
LIMIT 10;
// ----------------------------------------------------------------------------
// PATH ANALYSIS
// ----------------------------------------------------------------------------
// Shortest path between two scripts
MATCH path = shortestPath(
(a:Script {name: 'explore-image'})-[:CALLS*]-(b:Script {name: 'growl'})
)
RETURN path;
// All paths up to length 4
MATCH path = (a:Script {name: 'explore-image'})-[:CALLS*..4]->(b:Script)
RETURN path;
// What breaks if we delete beep?
// Find everything that transitively depends on it
MATCH (dependent:Script)-[:CALLS*]->(target:Script {name: 'beep'})
RETURN DISTINCT dependent.name AS would_break;
// ----------------------------------------------------------------------------
// UPDATES AND MAINTENANCE
// ----------------------------------------------------------------------------
// Update usage count after new audit
MATCH (s:Script {name: 'attend'})
SET s.usage = 5, s.last_modified = date('2026-02-05');
// Add a new dependency we discovered
MATCH (a:Script {name: 'attend'}), (b:Script {name: 'pink'})
MERGE (a)-[:CALLS]->(b);
// Remove a script and all its relationships
MATCH (s:Script {name: 'old-unused-script'})
DETACH DELETE s;
% ============================================================================
% Datalog Example: Analyzing ~/bin script dependencies
% ============================================================================
% Datalog is a declarative logic programming language. Queries are expressed
% as logical rules: "this is true if these conditions hold."
%
% Core concepts:
% - Facts: Base truths, like rows in a table
% - Rules: Derived truths, defined by conditions
% - Queries: Questions that find matching facts
%
% Syntax:
% - head :- body. "head is true if body is true"
% - ?[vars] := body. "find all vars where body is true"
% - Comma means AND
% - not means negation
% ============================================================================
% ----------------------------------------------------------------------------
% SCHEMA DEFINITION (CozoDB syntax)
% ----------------------------------------------------------------------------
% Define stored relations (like tables)
:create script {
name: String, % primary key
=> % separator between key and value columns
usage: Int, % times invoked from shell history
refs: Int, % times referenced by other scripts
lines: Int % lines of code
}
:create calls {
caller: String,
callee: String
}
:create author {
name: String,
=>
email: String
}
:create wrote {
author: String,
script: String,
=>
committed_at: String
}
% ----------------------------------------------------------------------------
% DATA INSERTION
% ----------------------------------------------------------------------------
?[name, usage, refs, lines] <- [
["pink", 9, 3, 15],
["beep", 9, 3, 22],
["growl", 7, 3, 45],
["explore-image", 89, 0, 120],
["mkbin", 77, 108, 35],
["attend", 0, 0, 25]
]
:put script {name, usage, refs, lines}
?[caller, callee] <- [
["explore-image", "pink"],
["explore-image", "beep"],
["beep", "growl"]
]
:put calls {caller, callee}
% ----------------------------------------------------------------------------
% BASIC QUERIES
% ----------------------------------------------------------------------------
% Find all scripts with their usage counts, sorted
?[name, usage] := *script{name, usage}
:order -usage
% Find scripts that call pink directly
?[caller] := *calls{caller, callee}, callee == "pink"
% Find scripts that nothing depends on (leaf nodes)
?[name] := *script{name}, not *calls{callee: name}
% ----------------------------------------------------------------------------
% RULES - The power of Datalog
% ----------------------------------------------------------------------------
% Define transitive dependency as a recursive rule
% "A depends on B if A calls B"
depends[a, b] := *calls{caller: a, callee: b}
% "A depends on C if A calls B and B depends on C"
% This is the recursive case - follows the chain
depends[a, c] := *calls{caller: a, callee: b}, depends[b, c]
% Now we can query transitive dependencies easily
?[dep] := depends["explore-image", dep]
% Find everything that depends on beep (would break if deleted)
?[script] := depends[script, "beep"]
% ----------------------------------------------------------------------------
% NEGATION AND FILTERING
% ----------------------------------------------------------------------------
% Scripts with high usage but nothing depends on them
% These are "end-user" scripts, not utilities
?[name, usage] :=
*script{name, usage},
usage > 10,
not *calls{callee: name}
:order -usage
% Dead code candidates: no usage AND nothing calls them
?[name] :=
*script{name, usage, refs},
usage == 0,
refs == 0,
not *calls{callee: name}
% Scripts that ARE called by something
?[name] := *script{name}, *calls{callee: name}
% ----------------------------------------------------------------------------
% AGGREGATION
% ----------------------------------------------------------------------------
% Count how many things each script calls
?[name, count(callee)] :=
*script{name},
*calls{caller: name, callee}
% Find the most "central" utility scripts (called by many things)
?[callee, count(caller)] := *calls{caller, callee}
:order -count(caller)
:limit 10
% Total lines of code across all scripts
?[total] := total = sum(lines), *script{lines}
% ----------------------------------------------------------------------------
% PATH QUERIES WITH RECURSION
% ----------------------------------------------------------------------------
% Define "reachable" - can we get from A to B at all?
reachable[a, b] := *calls{caller: a, callee: b}
reachable[a, c] := *calls{caller: a, callee: b}, reachable[b, c]
% Is growl reachable from explore-image?
?[answer] :=
answer = if(reachable["explore-image", "growl"], "yes", "no")
% Find all scripts reachable from explore-image
?[target] := reachable["explore-image", target]
% Define path length (distance)
dist[a, b, 1] := *calls{caller: a, callee: b}
dist[a, c, n] := *calls{caller: a, callee: b}, dist[b, c, m], n = m + 1
% Shortest distance to each reachable script
?[target, min(d)] := dist["explore-image", target, d]
% ----------------------------------------------------------------------------
% COMPLEX ANALYSIS
% ----------------------------------------------------------------------------
% Find circular dependencies (A -> ... -> A)
circular[a] := depends[a, a]
?[script] := circular[script]
% Scripts that form a "core" - they both use and are used by others
?[name] :=
*script{name},
*calls{caller: name}, % calls something
*calls{callee: name} % something calls it
% "Importance score" - usage + refs + things that depend on it
importance[name, score] :=
*script{name, usage, refs},
dep_count = count(dependent),
*calls{callee: name} or not *calls{callee: name}, % handle zero case
score = usage + refs + dep_count
?[name, score] := importance[name, score]
:order -score
% ----------------------------------------------------------------------------
% COMPOSABLE RULES - Build up vocabulary
% ----------------------------------------------------------------------------
% Low-level facts
is_utility[s] := *calls{callee: s}
is_standalone[s] := *script{name: s}, not is_utility[s]
is_unused[s] := *script{name: s, usage: 0}
is_unreferenced[s] := *script{name: s, refs: 0}
% Composed predicates
safe_to_delete[s] := is_unused[s], is_unreferenced[s], is_standalone[s]
high_value_utility[s] := is_utility[s], *script{name: s, usage}, usage > 5
% Now queries read like English
?[s] := safe_to_delete[s]
?[s] := high_value_utility[s]
% ============================================================================
% Datalog Example: Analyzing ~/bin script dependencies
% ============================================================================
% Datalog is a declarative logic programming language. Queries are expressed
% as logical rules: "this is true if these conditions hold."
%
% Core concepts:
% - Facts: Base truths, like rows in a table
% - Rules: Derived truths, defined by conditions
% - Queries: Questions that find matching facts
%
% Syntax:
% - head :- body. "head is true if body is true"
% - ?[vars] := body. "find all vars where body is true"
% - Comma means AND
% - not means negation
% ============================================================================
% ----------------------------------------------------------------------------
% SCHEMA DEFINITION (CozoDB syntax)
% ----------------------------------------------------------------------------
% Define stored relations (like tables)
:create script {
name: String, % primary key
=> % separator between key and value columns
usage: Int, % times invoked from shell history
refs: Int, % times referenced by other scripts
lines: Int % lines of code
}
:create calls {
caller: String,
callee: String
}
:create author {
name: String,
=>
email: String
}
:create wrote {
author: String,
script: String,
=>
committed_at: String
}
% ----------------------------------------------------------------------------
% DATA INSERTION
% ----------------------------------------------------------------------------
?[name, usage, refs, lines] <- [
["pink", 9, 3, 15],
["beep", 9, 3, 22],
["growl", 7, 3, 45],
["explore-image", 89, 0, 120],
["mkbin", 77, 108, 35],
["attend", 0, 0, 25]
]
:put script {name, usage, refs, lines}
?[caller, callee] <- [
["explore-image", "pink"],
["explore-image", "beep"],
["beep", "growl"]
]
:put calls {caller, callee}
% ----------------------------------------------------------------------------
% BASIC QUERIES
% ----------------------------------------------------------------------------
% Find all scripts with their usage counts, sorted
?[name, usage] := *script{name, usage}
:order -usage
% Find scripts that call pink directly
?[caller] := *calls{caller, callee}, callee == "pink"
% Find scripts that nothing depends on (leaf nodes)
?[name] := *script{name}, not *calls{callee: name}
% ----------------------------------------------------------------------------
% RULES - The power of Datalog
% ----------------------------------------------------------------------------
% Define transitive dependency as a recursive rule
% "A depends on B if A calls B"
depends[a, b] := *calls{caller: a, callee: b}
% "A depends on C if A calls B and B depends on C"
% This is the recursive case - follows the chain
depends[a, c] := *calls{caller: a, callee: b}, depends[b, c]
% Now we can query transitive dependencies easily
?[dep] := depends["explore-image", dep]
% Find everything that depends on beep (would break if deleted)
?[script] := depends[script, "beep"]
% ----------------------------------------------------------------------------
% NEGATION AND FILTERING
% ----------------------------------------------------------------------------
% Scripts with high usage but nothing depends on them
% These are "end-user" scripts, not utilities
?[name, usage] :=
*script{name, usage},
usage > 10,
not *calls{callee: name}
:order -usage
% Dead code candidates: no usage AND nothing calls them
?[name] :=
*script{name, usage, refs},
usage == 0,
refs == 0,
not *calls{callee: name}
% Scripts that ARE called by something
?[name] := *script{name}, *calls{callee: name}
% ----------------------------------------------------------------------------
% AGGREGATION
% ----------------------------------------------------------------------------
% Count how many things each script calls
?[name, count(callee)] :=
*script{name},
*calls{caller: name, callee}
% Find the most "central" utility scripts (called by many things)
?[callee, count(caller)] := *calls{caller, callee}
:order -count(caller)
:limit 10
% Total lines of code across all scripts
?[total] := total = sum(lines), *script{lines}
% ----------------------------------------------------------------------------
% PATH QUERIES WITH RECURSION
% ----------------------------------------------------------------------------
% Define "reachable" - can we get from A to B at all?
reachable[a, b] := *calls{caller: a, callee: b}
reachable[a, c] := *calls{caller: a, callee: b}, reachable[b, c]
% Is growl reachable from explore-image?
?[answer] :=
answer = if(reachable["explore-image", "growl"], "yes", "no")
% Find all scripts reachable from explore-image
?[target] := reachable["explore-image", target]
% Define path length (distance)
dist[a, b, 1] := *calls{caller: a, callee: b}
dist[a, c, n] := *calls{caller: a, callee: b}, dist[b, c, m], n = m + 1
% Shortest distance to each reachable script
?[target, min(d)] := dist["explore-image", target, d]
% ----------------------------------------------------------------------------
% COMPLEX ANALYSIS
% ----------------------------------------------------------------------------
% Find circular dependencies (A -> ... -> A)
circular[a] := depends[a, a]
?[script] := circular[script]
% Scripts that form a "core" - they both use and are used by others
?[name] :=
*script{name},
*calls{caller: name}, % calls something
*calls{callee: name} % something calls it
% "Importance score" - usage + refs + things that depend on it
importance[name, score] :=
*script{name, usage, refs},
dep_count = count(dependent),
*calls{callee: name} or not *calls{callee: name}, % handle zero case
score = usage + refs + dep_count
?[name, score] := importance[name, score]
:order -score
% ----------------------------------------------------------------------------
% COMPOSABLE RULES - Build up vocabulary
% ----------------------------------------------------------------------------
% Low-level facts
is_utility[s] := *calls{callee: s}
is_standalone[s] := *script{name: s}, not is_utility[s]
is_unused[s] := *script{name: s, usage: 0}
is_unreferenced[s] := *script{name: s, refs: 0}
% Composed predicates
safe_to_delete[s] := is_unused[s], is_unreferenced[s], is_standalone[s]
high_value_utility[s] := is_utility[s], *script{name: s, usage}, usage > 5
% Now queries read like English
?[s] := safe_to_delete[s]
?[s] := high_value_utility[s]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment