Skip to content

Instantly share code, notes, and snippets.

@lenileiro
Last active April 17, 2024 21:02
Show Gist options
  • Save lenileiro/baedf134cf208871ee4f7049535a7448 to your computer and use it in GitHub Desktop.
Save lenileiro/baedf134cf208871ee4f7049535a7448 to your computer and use it in GitHub Desktop.
Elixir Datalog modelled as ReBAC (Relationship-Based Access Control) - Inspired by Google Zanzibar authorization system
defmodule Datalog do
defstruct rules: [], facts: %{}
alias Datalog.{Fact, Rule}
alias __MODULE__
def new, do: %__MODULE__{}
def add_rule(%Datalog{rules: rules} = datalog, %Rule{} = rule) do
{:ok, %Datalog{datalog | rules: [rule | rules]}}
end
def add_rule(_, _), do: {:error, :invalid_rule}
def add_fact(%Datalog{facts: facts} = datalog, %Fact{} = fact) do
updated_facts = Map.put(facts, fact_key(fact), fact)
{:ok, %Datalog{datalog | facts: updated_facts}}
end
def add_fact(_, _), do: {:error, :invalid_fact}
def evaluate_query(%Datalog{rules: rules, facts: facts}, query_params) do
matching_rules = get_matching_rules(rules, query_params[:rule])
{:ok, apply_rules(facts, matching_rules, query_params[:rule])}
end
def evaluate_query(_, _), do: {:error, :invalid_datalog}
defp get_matching_rules(rules, nil), do: rules
defp get_matching_rules(rules, relation) do
Enum.filter(rules, fn rule -> rule.name == relation end)
end
defp apply_rules(facts, rules, query_rule) do
iter_apply_rules(facts, rules, query_rule, %{}, %{})
end
defp iter_apply_rules(all_facts, rules, query_rule, derived, seen, previous_derived \\ %{}) do
# Create tasks for each fact in all_facts
tasks =
Map.keys(all_facts)
|> Enum.map(fn fact_key ->
fact = Map.fetch!(all_facts, fact_key)
Task.async(fn ->
Enum.reduce(all_facts, {Map.new(), derived}, fn {_, existing_fact}, {acc, d_acc} ->
process_fact(fact, existing_fact, rules, acc, d_acc)
end)
end)
end)
# Wait for all tasks to complete and collect the results
{new_facts, new_derived} =
tasks
|> Task.await_many()
|> Enum.reduce({%{}, derived}, fn {fact_new_facts, fact_derived}, {acc, d_acc} ->
{Map.merge(acc, fact_new_facts), Map.merge(d_acc, fact_derived)}
end)
updated_seen = update_seen(new_facts, seen)
# Check if no new facts are derived
if new_derived == previous_derived do
if query_rule do
Map.values(new_derived)
else
Map.values(all_facts)
end
else
new_total_facts = Map.merge(all_facts, new_derived)
iter_apply_rules(new_total_facts, rules, query_rule, %{}, updated_seen, new_derived)
end
end
defp update_seen(new_facts, seen) do
Map.merge(seen, new_facts, fn _, _, new -> new end)
end
defp fact_key(fact) do
{fact.object_id, fact.subject_id, fact.object_relation}
end
defp process_fact(fact1, fact2, rules, acc, derived) do
Enum.reduce(rules, {acc, derived}, fn rule, {acc_inner, derived_inner} ->
apply_rule(fact1, fact2, rule, acc_inner, derived_inner)
end)
end
defp apply_rule(fact1, fact2, %Rule{rule: rule_fn}, acc, derived) do
case apply_rule_fn(rule_fn, fact1, fact2) do
nil ->
{acc, derived}
new_fact ->
fact_key = fact_key(new_fact)
if Map.has_key?(acc, fact_key) do
{acc, derived}
else
{Map.put(acc, fact_key, new_fact), Map.put(derived, fact_key, new_fact)}
end
end
rescue
FunctionClauseError -> {acc, derived}
end
defp apply_rule_fn(rule_fn, fact1, _fact2) when is_function(rule_fn, 1),
do: apply(rule_fn, [fact1])
defp apply_rule_fn(rule_fn, fact1, fact2) when is_function(rule_fn, 2),
do: apply(rule_fn, [fact1, fact2])
defp apply_rule_fn(_, _, _), do: nil
end
defmodule DatalogTest do
use ExUnit.Case
alias Datalog
alias Datalog.Rule
alias Datalog.Fact
describe "evaluate_query/2" do
setup do
datalog = Datalog.new()
{:ok, datalog: datalog}
end
test "valid rules and facts produce expected ancestor results", %{datalog: datalog} do
ancestor_rule_fn = fn
%Fact{object_id: grandparent, subject_id: parent, object_relation: "parent"},
%Fact{object_id: parent, subject_id: descendant, object_relation: "parent"} ->
%Fact{
object_id: grandparent,
object_namespace: "user",
subject_id: descendant,
subject_namespace: "user",
object_relation: "ancestor"
}
end
ancestor_rule = %Rule{name: "ancestor", rule: ancestor_rule_fn}
{:ok, datalog} = Datalog.add_rule(datalog, ancestor_rule)
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Alice",
object_namespace: "user",
subject_id: "Bob",
subject_namespace: "user",
object_relation: "parent"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Bob",
object_namespace: "user",
subject_id: "Charlie",
subject_namespace: "user",
object_relation: "parent"
})
query_params = %{rule: "ancestor"}
{:ok, results} = Datalog.evaluate_query(datalog, query_params)
expected_results = [
%Fact{
object_id: "Alice",
object_namespace: "user",
subject_id: "Charlie",
object_relation: "ancestor",
subject_namespace: "user"
}
]
assert Enum.sort(results) == Enum.sort(expected_results)
end
test "non-existent rules return an empty list", %{datalog: datalog} do
query_params = %{rule: "non_existing_rule"}
{:ok, results} = Datalog.evaluate_query(datalog, query_params)
assert results == []
end
test "invalid Datalog structure results in error", _context do
invalid_datalog = %{invalid: "structure"}
query_params = %{object_relation: "ancestor"}
assert {:error, _} = Datalog.evaluate_query(invalid_datalog, query_params)
end
test "recursive rules correctly infer grandparent relationships", %{datalog: datalog} do
# Define a rule to infer grandparent relationships
grandparent_rule_fn = fn
%Fact{object_id: grandparent, subject_id: parent, object_relation: "parent"},
%Fact{object_id: parent, subject_id: child, object_relation: "parent"} ->
%Fact{
object_id: grandparent,
object_relation: "grandparent",
subject_id: child
}
end
grandparent_rule = %Rule{name: "grandparent_rule", rule: grandparent_rule_fn}
{:ok, datalog} = Datalog.add_rule(datalog, grandparent_rule)
# Add parent relationship facts
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Alice",
subject_id: "Bob",
object_relation: "parent"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Bob",
subject_id: "Charlie",
object_relation: "parent"
})
# Evaluate the query to infer grandparent relationships
query_params = %{rule: "grandparent_rule"}
{:ok, results} = Datalog.evaluate_query(datalog, query_params)
# Expected result
expected_results = [
%Fact{
object_id: "Alice",
object_relation: "grandparent",
subject_id: "Charlie"
}
]
assert Enum.sort(results) == Enum.sort(expected_results)
end
test "deep nested recursive relationships are accurately inferred", %{datalog: datalog} do
# Rule for direct parent-child relationship
parent_rule_fn = fn
%Fact{object_id: parent, subject_id: child, object_relation: "parent"} ->
%Fact{object_id: parent, subject_id: child, object_relation: "ancestor"}
end
parent_rule = %Rule{name: "ancestor_rule", rule: parent_rule_fn}
{:ok, datalog} = Datalog.add_rule(datalog, parent_rule)
# Rule for extending ancestor relationships
extended_ancestor_rule_fn = fn
%Fact{object_id: ancestor, subject_id: intermediate, object_relation: "ancestor"},
%Fact{object_id: intermediate, subject_id: descendant, object_relation: "parent"} ->
%Fact{object_id: ancestor, subject_id: descendant, object_relation: "ancestor"}
end
extended_ancestor_rule = %Rule{name: "ancestor_rule", rule: extended_ancestor_rule_fn}
{:ok, datalog} = Datalog.add_rule(datalog, extended_ancestor_rule)
# Add facts for a family tree
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "GreatGrandparent",
subject_id: "Grandparent",
object_relation: "parent"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Grandparent",
subject_id: "Parent",
object_relation: "parent"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Parent",
subject_id: "Child",
object_relation: "parent"
})
# Evaluate the query to infer ancestor relationships
query_params = %{rule: "ancestor_rule"}
{:ok, results} = Datalog.evaluate_query(datalog, query_params)
# Define expected results including direct and extended ancestors
expected_results = [
%Fact{
object_id: "GreatGrandparent",
subject_id: "Grandparent",
object_relation: "ancestor"
},
%Fact{object_id: "GreatGrandparent", subject_id: "Parent", object_relation: "ancestor"},
%Fact{object_id: "GreatGrandparent", subject_id: "Child", object_relation: "ancestor"},
%Fact{object_id: "Grandparent", subject_id: "Parent", object_relation: "ancestor"},
%Fact{object_id: "Grandparent", subject_id: "Child", object_relation: "ancestor"},
%Fact{object_id: "Parent", subject_id: "Child", object_relation: "ancestor"}
]
assert Enum.sort(results) == Enum.sort(expected_results)
end
test "multiple matching rules are handled correctly", %{datalog: datalog} do
# Define first rule
rule_fn_1 = fn
%Fact{object_id: id, object_relation: "relation1"} ->
%Fact{object_id: id, object_relation: "result1"}
end
rule_1 = %Rule{name: "multi_match_rule", rule: rule_fn_1}
{:ok, datalog} = Datalog.add_rule(datalog, rule_1)
# Define second rule
rule_fn_2 = fn
%Fact{object_id: id, object_relation: "relation2"} ->
%Fact{object_id: id, object_relation: "result2"}
end
rule_2 = %Rule{name: "multi_match_rule", rule: rule_fn_2}
{:ok, datalog} = Datalog.add_rule(datalog, rule_2)
# Add facts
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Object1",
object_relation: "relation1"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Object2",
object_relation: "relation2"
})
# Create a query that will trigger these rules
query_params = %{rule: "multi_match_rule"}
# Evaluate the query
{:ok, results} = Datalog.evaluate_query(datalog, query_params)
# Define expected results
expected_results = [
%Fact{object_id: "Object1", object_relation: "result1"},
%Fact{object_id: "Object2", object_relation: "result2"}
]
# Assert that the actual results match the expected results
assert Enum.sort(results) == Enum.sort(expected_results)
end
test "complex logical conditions in rules are processed correctly", %{datalog: datalog} do
# Complex rule function
complex_rule_fn = fn
%Fact{object_id: id1, subject_id: id2, object_relation: rel1},
%Fact{object_id: id2, subject_id: id3, object_relation: rel2}
when rel1 == "friend" and rel2 == "colleague" ->
%Fact{
object_id: id1,
subject_id: id3,
object_relation: "acquaintance",
object_namespace: "network",
subject_namespace: "network"
}
end
# Add complex rule
complex_rule = %Rule{name: "complex_relation", rule: complex_rule_fn}
{:ok, datalog} = Datalog.add_rule(datalog, complex_rule)
# Add facts
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Alice",
subject_id: "Bob",
object_relation: "friend"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Bob",
subject_id: "Charlie",
object_relation: "colleague"
})
# Create a query
query_params = %{rule: "complex_relation"}
# Evaluate the query
{:ok, results} = Datalog.evaluate_query(datalog, query_params)
# Define expected results
expected_results = [
%Fact{
object_id: "Alice",
subject_id: "Charlie",
object_relation: "acquaintance",
object_namespace: "network",
subject_namespace: "network"
}
]
# Assert that the actual results match the expected results
assert Enum.sort(results) == Enum.sort(expected_results)
end
test "efficient handling of large numbers of rules and facts", %{datalog: datalog} do
# Generate a large number of rules
datalog =
Enum.reduce(1..1000, datalog, fn i, acc ->
rule_fn = fn %Fact{object_id: id} ->
%Fact{object_id: id, object_relation: "processed_#{i}"}
end
rule = %Rule{name: "rule_#{i}", rule: rule_fn}
case Datalog.add_rule(acc, rule) do
{:ok, updated_datalog} -> updated_datalog
{:error, _} -> acc
end
end)
# Generate a large number of facts
datalog =
Enum.reduce(1..1000, datalog, fn j, acc ->
fact = %Fact{object_id: "object_#{j}"}
case Datalog.add_fact(acc, fact) do
{:ok, updated_datalog} -> updated_datalog
{:error, _} -> acc
end
end)
# Evaluate a query that triggers one specific rule
query_params = %{rule: "rule_500"}
{:ok, results} = Datalog.evaluate_query(datalog, query_params)
# Define expected results
expected_result =
Enum.map(1..1000, fn j ->
%Fact{object_id: "object_#{j}", object_relation: "processed_500"}
end)
# Assert that the actual results match the expected results
assert Enum.sort(results) == Enum.sort(expected_result)
end
test "queries with single function rules of the same name are handled accurately", %{
datalog: datalog
} do
rule_fn_1 = fn
%Fact{object_id: object_id, subject_id: subject_id, object_relation: "relation1"} ->
%Fact{object_id: object_id, subject_id: subject_id, object_relation: "result1"}
end
rule_fn_2 = fn
%Fact{object_id: object_id, subject_id: subject_id, object_relation: "relation2"} ->
%Fact{object_id: object_id, subject_id: subject_id, object_relation: "result2"}
end
rule_1 = %Rule{name: "same_name_rule", rule: rule_fn_1}
rule_2 = %Rule{name: "same_name_rule", rule: rule_fn_2}
{:ok, datalog} = Datalog.add_rule(datalog, rule_1)
{:ok, datalog} = Datalog.add_rule(datalog, rule_2)
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Object1",
subject_id: "Subject1",
object_relation: "relation1"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Object2",
subject_id: "Subject2",
object_relation: "relation2"
})
query_params = %{rule: "same_name_rule"}
{:ok, results} = Datalog.evaluate_query(datalog, query_params)
expected_results = [
%Fact{object_id: "Object1", subject_id: "Subject1", object_relation: "result1"},
%Fact{object_id: "Object2", subject_id: "Subject2", object_relation: "result2"}
]
assert Enum.sort(results) == Enum.sort(expected_results)
end
test "queries with multiple rules of the same name are handled accurately", %{
datalog: datalog
} do
admin_rule_fn_1 = fn
%Fact{object_id: admin, object_relation: "manages", subject_id: department},
%Fact{object_id: employee, subject_id: department, object_relation: "works_in"} ->
%Fact{object_id: admin, object_relation: "admin_of", subject_id: employee}
end
admin_rule_fn_2 = fn
%Fact{object_id: admin, object_relation: "leads", subject_id: project},
%Fact{object_id: employee, subject_id: project, object_relation: "contributes_to"} ->
%Fact{object_id: admin, object_relation: "leader_of", subject_id: employee}
end
admin_rule_1 = %Rule{name: "admin_rule", rule: admin_rule_fn_1}
admin_rule_2 = %Rule{name: "admin_rule", rule: admin_rule_fn_2}
{:ok, datalog} = Datalog.add_rule(datalog, admin_rule_1)
{:ok, datalog} = Datalog.add_rule(datalog, admin_rule_2)
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Alice",
object_relation: "manages",
subject_id: "Engineering"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Bob",
subject_id: "Engineering",
object_relation: "works_in"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Charlie",
subject_id: "Engineering",
object_relation: "works_in"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Jan",
object_relation: "leads",
subject_id: "ProjectX"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Bob",
subject_id: "ProjectX",
object_relation: "contributes_to"
})
query_params = %{rule: "admin_rule"}
{:ok, results} = Datalog.evaluate_query(datalog, query_params)
expected_results = [
%Fact{object_id: "Alice", object_relation: "admin_of", subject_id: "Bob"},
%Fact{object_id: "Alice", object_relation: "admin_of", subject_id: "Charlie"},
%Fact{object_id: "Jan", object_relation: "leader_of", subject_id: "Bob"}
]
assert Enum.sort(results) == Enum.sort(expected_results)
end
test "rules with interdependencies are evaluated correctly", %{datalog: datalog} do
parent_rule_fn = fn
%Fact{object_id: object_id, subject_id: subject_id, object_relation: "parent"} ->
%Fact{
object_id: object_id,
subject_id: subject_id,
object_relation: "parent"
}
end
parent_rule = %Rule{name: "parent", rule: parent_rule_fn}
{:ok, datalog} = Datalog.add_rule(datalog, parent_rule)
ancestor_rule_fn_1 = fn
%Fact{object_id: object_id, subject_id: subject_id, object_relation: "ancestor"} ->
%Fact{
object_id: object_id,
subject_id: subject_id,
object_relation: "ancestor"
}
end
ancestor_rule_fn_2 = fn
%Fact{object_id: grandparent, subject_id: parent, object_relation: "parent"},
%Fact{object_id: parent, subject_id: descendant, object_relation: "parent"} ->
%Fact{
object_id: grandparent,
subject_id: descendant,
object_relation: "ancestor"
}
end
ancestor_rule_fn_3 = fn
%Fact{object_id: object_id, subject_id: parent, object_relation: "parent"},
%Fact{object_id: parent, subject_id: subject_id, object_relation: "ancestor"} ->
%Fact{
object_id: object_id,
subject_id: subject_id,
object_relation: "ancestor"
}
end
ancestor_rule_1 = %Rule{name: "ancestor", rule: ancestor_rule_fn_1}
ancestor_rule_2 = %Rule{name: "ancestor", rule: ancestor_rule_fn_2}
ancestor_rule_3 = %Rule{name: "ancestor", rule: ancestor_rule_fn_3}
{:ok, datalog} = Datalog.add_rule(datalog, ancestor_rule_1)
{:ok, datalog} = Datalog.add_rule(datalog, ancestor_rule_2)
{:ok, datalog} = Datalog.add_rule(datalog, ancestor_rule_3)
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Alice",
subject_id: "Bob",
object_relation: "parent"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Bob",
subject_id: "Charlie",
object_relation: "parent"
})
{:ok, datalog} =
Datalog.add_fact(datalog, %Fact{
object_id: "Charlie",
subject_id: "Daisy",
object_relation: "parent"
})
query_params = %{rule: "ancestor"}
{:ok, results} = Datalog.evaluate_query(datalog, query_params)
expected_results = [
%Fact{object_id: "Alice", subject_id: "Charlie", object_relation: "ancestor"},
%Fact{object_id: "Bob", subject_id: "Daisy", object_relation: "ancestor"},
%Fact{object_id: "Alice", subject_id: "Daisy", object_relation: "ancestor"}
]
assert Enum.sort(results) == Enum.sort(expected_results)
end
end
end
defmodule Datalog.Fact do
defstruct [
:object_namespace,
:object_id,
:object_relation,
:subject_namespace,
:subject_id,
:subject_relation
]
end
defmodule Datalog.Rule do
defstruct [:name, :rule]
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment