Last active
May 13, 2022 01:56
-
-
Save shouya/11e93b2a6dd7e71ce1a1e1c89fe1b079 to your computer and use it in GitHub Desktop.
Deep diff/intersect/merge
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
defmodule Manager.Util.Structural do | |
@moduledoc """ | |
This module is build around this invariance, which I call | |
"reconstruction property": | |
for all m_1, m_2: maps, let m_common = intersect(m_1, m_2), | |
then merge(m_common, diff(m_1, m_common)) == m_1, | |
and merge(m_common, diff(m_2, m_common)) == m_2. | |
This allows us to efficiently store a list of [m_1, m_2, ..., m_K] | |
where "m_*" are shares a lot of common fields and K is large. | |
We can simply store the tuple {m_common, [dm_1, dm_2, ..., dm_K]} | |
where "dm_*" can be relatively small. This way the total space | |
needed is small. | |
""" | |
@type t :: map() | list() | |
defguard is_scalar(value) when not is_map(value) and not is_list(value) | |
@doc """ | |
Find a shared nested structure that is identical in two objects. The two | |
objects must be of the same type. (i.e. map vs map or list vs | |
list, not map vs list). | |
This binary operation has the properties: | |
- intersect(a, a) == a | |
- intersect(a, %{}) == %{} | |
- intersect(a, b) == intersect(b, a) | |
- intersect(a, intersect(b, c)) == intersect(intersect(a, b), c) | |
(similar to set intersection on map keys) | |
Examples: | |
iex> intersect(%{a: 1}, %{a: 1, b: 2}) | |
%{a: 1} | |
iex> intersect(%{a: [%{b: 2}, %{c: 2}]}, | |
...> %{a: [%{b: 2}, %{c: 1}]}) | |
%{a: [%{b: 2}, %{}]} | |
""" | |
@spec intersect(t(), t()) :: t | |
def intersect(m1, m2) when is_map(m1) and is_map(m2) do | |
k1 = MapSet.new(Map.keys(m1)) | |
k2 = MapSet.new(Map.keys(m2)) | |
common_keys = MapSet.intersection(k1, k2) | |
common_keys | |
|> Enum.flat_map(fn key -> | |
v1 = Map.fetch!(m1, key) | |
v2 = Map.fetch!(m2, key) | |
cond do | |
v1 == v2 -> | |
[{key, v1}] | |
is_list(v1) and is_list(v2) and length(v1) != length(v2) -> | |
[] | |
is_list(v1) and is_list(v2) -> | |
[{key, intersect(v1, v2)}] | |
is_map(v1) and is_map(v2) -> | |
[{key, intersect(v1, v2)}] | |
true -> | |
[] | |
end | |
end) | |
|> Map.new() | |
end | |
def intersect(l1, l2) | |
when is_list(l1) and is_list(l2) | |
when length(l1) == length(l2) do | |
for {e1, e2} <- Enum.zip(l1, l2) do | |
intersect(e1, e2) | |
end | |
end | |
@spec intersect_all([map()]) :: map() | |
def intersect_all([x | xs]) do | |
Enum.reduce(xs, x, fn y -> intersect(x, y) end) | |
end | |
@doc """ | |
Take the deep-difference of two objects. | |
Properties: | |
- diff(a, %{}) == a | |
- diff(a, a) == %{} | |
- diff(a, diff(a, b)) == intersection(a, b) | |
(similar to set difference on map keys) | |
Example: | |
iex> diff(%{a: [%{b: %{c: 1, d: 2}}]}, | |
...> %{a: [%{d: %{c: 1, d: 2}}]}) | |
%{a: [%{b: %{c: 1, d: 2}}]} | |
iex> diff(%{a: [%{b: %{c: 1, d: 2}}]}, | |
...> %{a: [%{b: %{c: 1, d: 2}}]}) | |
%{} | |
iex> diff(%{a: [%{b: %{c: 1, d: 2}}]}, | |
...> %{a: [%{b: %{c: 1, d: 4}}]}) | |
%{a: [%{b: %{d: 2}}]} | |
iex> diff(%{a: [%{b: %{c: 1, d: 2}}]}, | |
...> %{}) | |
%{a: [%{b: %{c: 1, d: 2}}]} | |
""" | |
@spec diff(t(), t()) :: t() | |
def diff(m1, m2) when is_map(m1) and is_map(m2) do | |
k1 = MapSet.new(Map.keys(m1)) | |
k2 = MapSet.new(Map.keys(m2)) | |
extra_keys = MapSet.difference(k1, k2) |> Enum.to_list() | |
common_keys = MapSet.intersection(k1, k2) |> Enum.to_list() | |
extra = Map.take(m1, extra_keys) | |
common_keys | |
|> Enum.flat_map(fn key -> | |
v1 = Map.fetch!(m1, key) | |
v2 = Map.fetch!(m2, key) | |
cond do | |
v1 == v2 -> | |
[] | |
is_list(v1) and is_list(v2) and length(v1) != length(v2) -> | |
[{key, v1}] | |
is_list(v1) and is_list(v2) -> | |
[{key, diff(v1, v2)}] | |
is_map(v1) and is_map(v2) -> | |
[{key, diff(v1, v2)}] | |
true -> | |
[{key, v1}] | |
end | |
end) | |
|> Map.new() | |
|> Map.merge(extra) | |
end | |
def diff(l1, l2) | |
when is_list(l1) and is_list(l2) | |
when length(l1) == length(l2) do | |
for {e1, e2} <- Enum.zip(l1, l2) do | |
diff(e1, e2) | |
end | |
end | |
@doc """ | |
Deep merge of two objects. | |
Invariances: | |
- merge(a, %{}) == merge(%{}, a) == a | |
- merge(a, a) == a | |
- merge(intersect(a, b), diff(a, b)) == a | |
Examples: | |
iex> merge(%{a: %{b: 1}}, %{a: %{b: 2}}) | |
%{a: %{b: 2}} | |
iex> merge(%{a: %{b: 1}}, %{a: %{c: 2}}) | |
%{a: %{b: 1, c: 2}} | |
iex> merge(%{a: %{}}, %{a: %{c: 2}}) | |
%{a: %{c: 2}} | |
""" | |
@spec merge(t(), t()) :: t() | |
def merge(m1, m2) when is_map(m1) and is_map(m2) do | |
k1 = MapSet.new(Map.keys(m1)) | |
k2 = MapSet.new(Map.keys(m2)) | |
common_keys = MapSet.intersection(k1, k2) | |
extra1 = Map.take(m1, Enum.to_list(MapSet.difference(k1, k2))) | |
extra2 = Map.take(m2, Enum.to_list(MapSet.difference(k2, k1))) | |
common_keys | |
|> Enum.flat_map(fn key -> | |
v1 = Map.fetch!(m1, key) | |
v2 = Map.fetch!(m2, key) | |
cond do | |
v1 == v2 -> | |
[{key, v2}] | |
is_list(v1) and is_list(v2) and length(v1) != length(v2) -> | |
[{key, v2}] | |
is_list(v1) and is_list(v2) -> | |
[{key, merge(v1, v2)}] | |
is_map(v1) and is_map(v2) -> | |
[{key, merge(v1, v2)}] | |
true -> | |
[{key, v2}] | |
end | |
end) | |
|> Map.new() | |
|> Map.merge(extra1) | |
|> Map.merge(extra2) | |
end | |
def merge(l1, l2) | |
when is_list(l1) and is_list(l2) | |
when length(l1) == length(l2) do | |
for {e1, e2} <- Enum.zip(l1, l2) do | |
merge(e1, e2) | |
end | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment