Last active
July 16, 2018 18:59
-
-
Save busypeoples/fca48f16dad93f9db1d5988829dc4d82 to your computer and use it in GitHub Desktop.
Implementing persistent data structures with HashTries
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// @flow | |
// Immutable Data Implementation | |
// Based on Hash Tries | |
// (NOTE: The actual implementation is based on hashed mapped tries!) | |
// But this can be seen as a basis for the actual implementation. | |
// This is only for learning purposes! | |
// Hash Trie or Hash Tree is a persistent data structure, commonly used | |
// to implement immuable maps. | |
// "In its basic form, a hash tree stores the hashes of its keys, | |
// regarded as strings of bits, in a trie, with the actual keys and (optional) | |
// values stored at the trie's "final" nodes." | |
// (https://en.wikipedia.org/wiki/Hash_tree_(persistent_data_structure)) | |
// To better understand the underlying structure, let's take a look at how | |
// a possible hash trie might look like. | |
// We always have a root node, this can be initialy empty. We could define a | |
// EmptyNode to represent the fact. | |
// {root: EmpytNode} | |
// Now how would the structure look like if we used a hash size of 32? | |
// We use a `keyHashFragment` function to retrieve the sub hash that we're looking | |
// for depending on the level we're at. | |
// f.e. keyHashFragment(SHIFT, 101574); // => 6 | |
// Which means we can access {root: {6: {...}}} the node at the given level. | |
// The other interesting aspect we want to focus on, are nodes. | |
// We can identify four different node types, to represent the basic structure. | |
// EmptyNode: to represent an empty trie. | |
// LeafNode: to represent a node containing the actual value. | |
// CollisionNode: to represent LeafNode with the same hash but different keys. | |
// ArrayNode: contains children Nodes, initialized with a fixed length. | |
// All nodes implement two functions: get and update | |
// get: requires shift, hash and key (EmptyNode always returns undefined!) | |
// update: requires shift, a function to call in case the LeafNode is found, | |
// as well as the hash and key. | |
// Note: The shift argument is only required by an ArrayNode to calculate the | |
// keyHashFragment for accessing children nodes. | |
// Using update covers updating as well as inserting and removing from | |
// the hash trie. update always returns a new hash trie, by recurrsively going | |
// through the structure and if it find the LeafNode rebuilds the structure | |
// to reflect the change. | |
// To better understand how we can implement an actual hash trie | |
// in the following we will build a minimal immutable Map. | |
// Constants | |
// We need to define a constant shift value | |
// Next we can calculate the size | |
// And finally we can derive Mask from the size, by substracting -1 | |
// See also https://github.com/facebook/immutable-js/blob/master/src/TrieUtils.js | |
const SHIFT = 5; | |
const SIZE = 1 << SHIFT; | |
const MASK = SIZE - 1; | |
// Nodes | |
// We will implement four types: Empty, Leaf, Collision and Array nodes. | |
/*:: | |
type Node = EmptyNode | LeafNode<*> | CollisionNode | ArrayNode; | |
type NodeT = "Empty" | "Leaf" | "Collision" | "Array"; | |
type Key = number | string; | |
type Hash = number; | |
*/ | |
// Node Types | |
const TYPE = { | |
EMPTY: "EMPTY", | |
LEAF: "LEAF", | |
COLLISION: "COLLISION", | |
ARRAY: "ARRAY" | |
}; | |
// An EmptyNode is needed to shortcut the traversal. | |
// `get` will always return undefined as we don't have any actual values. | |
// `update` will either create a new LeafNode, in case the function returns | |
// a value else we return the existing EmptyNode. | |
function EmptyNode() { | |
this.type = TYPE.EMPTY; | |
this.get = function( | |
shift /*:: : number*/, | |
hash /*:: : Hash*/, | |
key /*:: : Key*/ | |
) { | |
return undefined; | |
}; | |
this.update = function( | |
shift /*:: : number*/, | |
fn /*:: : Function*/, | |
hash /*:: : Hash*/, | |
key /*:: : Key*/ | |
) { | |
const value = fn(); | |
return value ? new LeafNode(hash, key, value) : this; | |
}; | |
} | |
// LeafNode contains the hash, key and value. | |
// `get`: returns the value if the provided key is the actual key. | |
// `update`: If the provided key is the actual stored key we call | |
// the function with the node value. In case a value is returned we return a | |
// new LeafNode with a new value. Otherwise we remove the node by returning | |
// an EmptyNode. | |
// If the key doesn't apply, we call the provided function. If no value is | |
// returned we return the node itself, else we merge the existing the current node | |
// with a newly created LeafNode containing the new value. | |
function LeafNode /*:: <T>*/( | |
hash /*:: : Hash*/, | |
key /*:: : Key*/, | |
value /*:: : T*/ | |
) { | |
this.type = TYPE.LEAF; | |
this.hash = hash; | |
this.key = key; | |
this.value = value; | |
this.get = function( | |
shift /*:: : number*/, | |
hash /*:: : Hash*/, | |
key /*:: : Key*/ | |
) { | |
return key === this.key ? this.value : undefined; | |
}; | |
this.update = function( | |
shift /*:: : number*/, | |
fn /*:: : Function*/, | |
hash /*:: : Hash*/, | |
key /*:: : Key*/ | |
) { | |
if (key === this.key) { | |
const value = fn(this.value); | |
return value === undefined | |
? new EmptyNode() | |
: new LeafNode(hash, key, value); | |
} | |
const value = fn(); | |
return value === undefined | |
? this | |
: mergeIntoNode(shift, this, new LeafNode(hash, key, value)); | |
}; | |
} | |
// CollisionNode takes care of situations where two nodes contain the same | |
// hash but different keys. | |
// `get`: we check if the provided hash is the same as the node's hash. | |
// If the hash is the same, we iterate over the children and try to find | |
// the first node that equals the provided key. If found we return the | |
// found node's value. If the hashes don't match we return undefined. | |
// `update`: We check if the hashes match. If they match, we resolve the | |
// collision and either return a new CollisionNode in case we have more than | |
// one child node, or return the child node itself. | |
function CollisionNode(hash /*:: : Hash*/, children /*:: : Array<Node>*/) { | |
this.type = TYPE.COLLISION; | |
this.hash = hash; | |
this.children = children; | |
this.get = function( | |
shift /*:: : number*/, | |
hash /*:: : Hash*/, | |
key /*:: : Key*/ | |
) { | |
if (!hash) { | |
hash = hashKey(key); | |
} | |
if (hash === this.hash) { | |
let children = this.children; | |
for (let i = 0, len = children.length; i < len; ++i) { | |
let child = children[i]; | |
if (key === child.key) { | |
return child.value; | |
} | |
} | |
} | |
return undefined; | |
}; | |
this.update = function( | |
shift /*:: : number*/, | |
fn /*:: : Function*/, | |
hash /*:: : Hash*/, | |
key /*:: : Key*/ | |
) /*:: : ?Node*/ { | |
if (hash === this.hash) { | |
const list = updateCollisions(this.hash, this.children, fn, key); | |
return list.length > 1 ? new CollisionNode(this.hash, list) : head(list); | |
} | |
const value = fn(); | |
return value === undefined | |
? this | |
: mergeIntoNode(shift, this, new LeafNode(hash, key, value)); | |
}; | |
} | |
// ArrayNode contains children nodes | |
// `get`: we check if we have a child node for provided shift and hahs | |
// and return the child node if found. | |
// `update`: We calculate the fragment hash for provided shift and hash. | |
// In case the child node doesn't exist, we create a new EmptyNode. | |
// Next, we call the update function of the either the existing node or the | |
// newly created EmptyNode with the provided function. | |
// Next we need to check if one of the nodes is empty. | |
// Depending wether the first or the second node are empty, we either | |
// insert or remove a new child node. | |
// Otherwise we create a new ArrayNode with the updated value | |
// for a given position inside the children array. | |
function ArrayNode(count /*:: : number*/, children /*:: : Array<Node>*/) { | |
this.type = TYPE.ARRAY; | |
this.count = count; | |
this.children = children; | |
this.get = function( | |
shift /*:: : number*/, | |
hash /*:: : Hash*/, | |
key /*:: : Key*/ | |
) /*:: : ?Node*/ { | |
const node = this.children[keyHashFragment(shift, hash)]; | |
if (node) { | |
return node.get(shift + SHIFT, hash, key); | |
} | |
return undefined; | |
}; | |
this.update = function( | |
shift /*:: : number*/, | |
fn /*:: : Function*/, | |
hash /*:: : Hash*/, | |
key /*:: : Key*/ | |
) /*:: : ?Node*/ { | |
const fragment = keyHashFragment(shift, hash); | |
const child = this.children[fragment] || new EmptyNode(); | |
const newChild = child.update(shift + SHIFT, fn, hash, key); | |
const { EMPTY } = TYPE; | |
if (child.type === EMPTY && newChild.type !== EMPTY) { | |
return new ArrayNode( | |
this.count + 1, | |
update(fragment, newChild, this.children) | |
); | |
} | |
if (child.type !== EMPTY && newChild.type === EMPTY) { | |
return this.count - 1 <= 0 | |
? new EmptyNode() | |
: new ArrayNode( | |
this.count - 1, | |
update(fragment, undefined, this.children) | |
); | |
} | |
return new ArrayNode(this.count, update(fragment, newChild, this.children)); | |
}; | |
} | |
// Functionalities | |
// Return a hash for given level | |
// Requires shift and the hashed key | |
// i.e. we have a hash "1507722911" | |
// We can keep calling `keyHashFragment` to find the hash for given level | |
// For example, we might have a hash "602825438" and we shift by 5. | |
// ``` | |
// keyHashFragment(0, "602825438")) // => 30 | |
// keyHashFragment(5, "602825438")) // => 22 | |
// keyHashFragment(10, "602825438")) // => 24 | |
// ``` | |
// This means we could find our value at data[30][22][24] | |
// See also https://github.com/facebook/immutable-js/blob/master/src/Map.js#L299 | |
function keyHashFragment(shift /*:: : number*/, keyHash /*:: : Hash*/) { | |
return (shift === 0 ? keyHash : keyHash >>> shift) & MASK; | |
} | |
// creates a new ArrayNode containing children array with fixed length | |
// expects a list containing (hash, node) tuples | |
function createArrayNode(list /*:: : Array<[Hash, Node]>*/) /*:: : ArrayNode*/ { | |
const children = Array(SIZE); | |
const len = list.length; | |
for (let i = 0; i < len; i++) { | |
const [hash, node] = list[i]; | |
children[hash] = node; | |
} | |
return new ArrayNode(len, children); | |
} | |
// Helper function for safely accessing the first element of a given list. | |
function head /*:: <T>*/(list /*:: : ?Array<T>*/) /*:: : ?T */ { | |
return list && list.length > 0 ? list[0] : undefined; | |
} | |
// Helper function for merging two nodes into a new node | |
// This is regulary used in case of two nodes containing the same hash but | |
// different keys, in that case a new CollisionNode is returned. | |
function mergeIntoNode( | |
shift /*:: : number*/, | |
node1 /*:: : Node*/, | |
node2 /*:: : Node*/ | |
) /*:: : CollisionNode | ArrayNode */ { | |
if (node1.hash === node2.hash) { | |
return new CollisionNode(node1.hash, [node2, node1]); | |
} | |
const idx1 = keyHashFragment(shift, node1.hash); | |
const idx2 = keyHashFragment(shift, node2.hash); | |
return idx1 === idx2 | |
? createArrayNode([[idx1, mergeIntoNode(shift + SHIFT, node1, node2)]]) | |
: createArrayNode([[idx1, node1], [idx2, node2]]); | |
} | |
// Helper function for resolving collisions | |
// Provided function is called with the value of the first node for given key | |
// If the provided function returns a value we update the list with | |
// a new LeafNode containing the value. In case value is undefined | |
// we can remove the node at the found position. | |
function updateCollisions( | |
hash /*:: : Hash*/, | |
list /*:: : Array<Node>*/, | |
fn /*:: : Function*/, | |
key /*:: : Key*/ | |
) /*:: : Array<Node>*/ { | |
let found = undefined, | |
len = list.length, | |
idx = 0; | |
for (let idx = 0; idx < len; idx++) { | |
const child = list[idx]; | |
if (child.key === key) { | |
found = child; | |
break; | |
} | |
} | |
const value = found ? fn(found.value) : fn(); | |
return value === undefined | |
? spliceOut(idx, list) | |
: update(idx, new LeafNode(hash, key, value), list); | |
} | |
// Updates a value | |
// Copies the original array and updates a value for given position | |
function update( | |
idx /*:: : number*/, | |
val /*:: : any*/, | |
from /*Array<any>*/ | |
) /*:: : Array<any>*/ { | |
const len = from.length; | |
const to = Array(len); | |
for (let i = 0; i < len; i++) { | |
to[i] = from[i]; | |
} | |
to[idx] = val; | |
return to; | |
} | |
// Remove a value | |
// Copies the original array and removes a value for given position | |
// See also https://github.com/facebook/immutable-js/blob/master/src/Map.js#L790 | |
function spliceIn( | |
idx /*:: : number*/, | |
val /*:: : any*/, | |
from /*:: : Array<any>*/ | |
) /*:: : Array<any>*/ { | |
const len = from.length + 1; | |
const to = new Array(len); | |
let extend = 0; | |
for (let i = 0; i < len; i++) { | |
if (i === idx) { | |
to[i] = val; | |
extend = -1; | |
} else { | |
to[i] = from[i + extend]; | |
} | |
} | |
return to; | |
} | |
// Insert a value | |
// Copies the original array and adds a new value at given position | |
// See also https://github.com/facebook/immutable-js/blob/master/src/Map.js#L809 | |
function spliceOut( | |
idx /*:: : number*/, | |
from /*:: : Array<any>*/ | |
) /*:: : Array<any>*/ { | |
const len = from.length - 1; | |
const to = new Array(len); | |
let extend = 0; | |
for (let i = 0; i < len; i++) { | |
if (i === idx) { | |
extend = 1; | |
} | |
to[i] = from[i + extend]; | |
} | |
return to; | |
} | |
// Convenience function: enables to pass in a value, which will get wrapped | |
// into a function. No need to define a function in user land if we don't | |
// the previous value. | |
function wrapValue /*:: <T>*/(fn /*:: : Function | T*/) /*:: : Function*/ { | |
if (typeof fn === "function") { | |
return fn; | |
} | |
return function() { | |
return fn; | |
}; | |
} | |
function updateMap /*:: <T>*/( | |
fn /*:: : ?(Function | T)*/, | |
hash /*:: : Hash*/, | |
key /*:: : Key*/, | |
node /*:: : Node*/ | |
) /*:: : Map*/ { | |
// We initialize the shift with 0 | |
// The value is transformed into a function, if it's not a function. | |
// This is for convenience, so we can call update with a value if needed. | |
// i.e. updateMap("new Value",...) | |
// Furthermore we pass in the needed hash and key. | |
return new Map(node.update(0, wrapValue(fn), hash, key)); | |
} | |
// some hashing functionality | |
function hashKey(key /*:: : Key*/) /*:: : Hash*/ { | |
let hash = 0, | |
str = "" + key, | |
len = str.length; | |
for (let i = 0; i < len; i = i + 1) { | |
let char = str.charCodeAt(i); | |
hash = ((hash << SHIFT) - hash + char) | 0; | |
} | |
return Math.abs(hash); | |
} | |
class Map { | |
/*:: root : Node */ | |
constructor(root /*:: : Node */) /*:: : void*/ { | |
this.root = root; | |
} | |
static of(root /*:: : Node*/) /*:: : Map*/ { | |
return new Map(root); | |
} | |
set(key /*:: : Key*/, value /*:: : any*/) /*:: : Map*/ { | |
return updateMap(value, hashKey(key), key, this.root); | |
} | |
update(key /*:: : Key*/, value /*:: : any*/) /*:: : Map*/ { | |
return updateMap(value, hashKey(key), key, this.root); | |
} | |
get(key /*:: : Key*/) /*:: : any*/ { | |
return this.root.get(0, hashKey(key), key); | |
} | |
has(key /*:: : Key*/) /*:: : bool*/ { | |
return this.root.get(hashKey(key), key) !== undefined; | |
} | |
remove(key /*:: : Key*/) /*:: : Map*/ { | |
return updateMap(undefined, hashKey(key), key, this.root); | |
} | |
/*:: isEmpty : Node => bool*/ | |
isEmpty(node /*:: : Node*/) /*:: : bool*/ { | |
return node && node.type === TYPE.EMPTY; | |
} | |
} | |
// Create an initial empty Map | |
function emptyMap() /*:: : Map */ { | |
return new Map(new EmptyNode()); | |
} | |
exports.emptyMap = emptyMap; | |
exports.keyHashFragment = keyHashFragment; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const { assert } = require("chai"); | |
const { emptyMap } = require("./HashTrie"); | |
describe("Test HashTrie", function() { | |
it("should set key 'A' with value 'test'", function() { | |
const map = emptyMap().set("A", "test"); | |
assert.equal(map.get("A"), "test"); | |
}); | |
it("should update key 'A' with 1 to value 2", function() { | |
const inc = function(a) { | |
return a + 1; | |
}; | |
const map = emptyMap().set("A", 1); | |
const updatedMap = map.update("A", inc); | |
assert.equal(updatedMap.get("A"), 2); | |
}); | |
it("should remove key 'A'", function() { | |
const map = emptyMap().set("A", 1); | |
const updatedMap = map.remove("A"); | |
assert.isUndefined(updatedMap.get("A")); | |
}); | |
}); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment