Last active
October 27, 2021 01:41
-
-
Save viclib/507c1a1f7c1206158938 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// This is a simple interpreter for the untyped lambda calculus | |
// in JavaScript, used as an example of the possible benefits of | |
// hash-consing. Skip to the end of the file to see the tests. | |
// ~ SrPeixinho | |
var i32size = Math.pow(2,32); | |
var i32sizeInv = 1/i32size; | |
var stats = { | |
beta_reductions : 0, | |
reductions : 0, | |
unmemoized_reductions : 0, | |
subs_calls : 0, | |
unmemoized_subs_calls : 0, | |
max_var : 0}; | |
function Runtime(hashCons){ | |
this.subsMemo = {}; | |
this.left_ = []; | |
this.right_ = []; | |
this.normal_ = {}; | |
this.buckets = 1000003; | |
this.hash = new Array(this.buckets); | |
this.hashCons = hashCons; | |
for (var i=0; i<this.buckets; ++i) | |
this.hash[i] = []; | |
}; | |
Runtime.prototype.app = function(x,y){ | |
// Applies term "x" to term "y". | |
// This is where memory the allocation happens. | |
if (this.hashCons){ | |
var bucketIndex = ((x>>>0)*31+(y>>>0))%this.buckets; | |
for(var j=0,bucket=this.hash[bucketIndex],l=bucket.length; j<l; j+=1){ | |
ptr = bucket[j]; | |
if (x === this.left(ptr) && y === this.right(ptr)) | |
return ptr; | |
}; | |
}; | |
var pos = this.left_.length; | |
var ptr = 0x80000000|pos | |
this.left_[pos] = x; | |
this.right_[pos] = y; | |
if (this.hashCons) | |
bucket[j] = ptr; | |
return ptr; | |
}; | |
Runtime.prototype.isApp = function(node){ | |
return (node >>> 20) === 0x800; | |
}; | |
Runtime.prototype.left = function(node){ | |
return this.left_[(node & 0x000FFFFF)]; | |
}; | |
Runtime.prototype.right = function(node){ | |
return this.right_[(node & 0x000FFFFF)]; | |
}; | |
Runtime.prototype.isLam = function(node){ | |
return node & 0x3FF00000; | |
}; | |
Runtime.prototype.lam = function(node){ | |
return node + 0x00100000; | |
}; | |
Runtime.prototype.body = function(node){ | |
return (node >>> 20) === 0x800 ? this.right(node) : node - 0x00100000; | |
}; | |
Runtime.prototype.isVar = function(node){ | |
return !(node >>> 20); | |
}; | |
Runtime.prototype.normal = function(x){ | |
++stats.reductions; | |
var self = this; | |
if (this.hashCons && this.normal_[x]) | |
return this.normal_[x]; | |
if (this.isApp(x)){ | |
var l = this.normal(this.left(x)); | |
var r = this.normal(this.right(x)); | |
var ll = this.left(l); | |
var lr = this.right(l); | |
}; | |
++stats.unmemoized_reductions; | |
var normal = ( | |
this.isVar(x) ? (stats.max_var < x ? (stats.max_var=x) : x) | |
: this.isLam(x) ? this.lam(this.normal(this.body(x))) | |
: this.isLam(l) ? this.apply(l,r) | |
: this.app(l,r)); | |
if (this.hashCons) | |
this.normal_[x] = normal; | |
return normal; | |
}; | |
Runtime.prototype.subs = function (t,d,w,x){ | |
// Substitutes by "t" all variables of "x", | |
// that are bound to "d"th abstraction | |
// above it. Add "x" to free variables. | |
++stats.subs_calls; | |
var hash = ""+t+"_"+d+"_"+w+"_"+x; // bad, bad, todo: improve | |
if (this.hashCons && this.subsMemo[hash]) | |
return this.subsMemo[hash]; | |
++stats.unmemoized_subs_calls; | |
// Don't even bother trying to read this. | |
return this.subsMemo[hash] | |
= this.isApp(x) ? this.app(this.subs(t,d,w,this.left(x)),this.subs(t,d,w,this.right(x))) | |
: this.isLam(x) ? this.lam(this.subs(t,d+1,w,this.body(x))) | |
: this.isVar(x) ? (t && x===d ? this.subs(0,-1,d,t) : x>d ? x+w : x) | |
: x; | |
}; | |
Runtime.prototype.apply = function(f,x){ | |
++stats.beta_reductions; | |
return this.normal(this.subs(x,0,-1,this.body(f))); | |
}; | |
Runtime.prototype.toString = function(x){ | |
return (this.isApp(x) ? "("+this.toString(this.left(x))+" "+this.toString(this.right(x))+")" : | |
this.isLam(x) ? "(λ "+this.toString(this.body(x))+")" | |
: "v"+x); | |
}; | |
// Change this to "false" to disable hash consing. | |
var useHashConsing = true; | |
// Starts runtime + helpers. | |
var rt = new Runtime(useHashConsing); | |
L = function(x){return rt.lam(x)}; | |
A = function(a,b){return rt.app(a,b)}; | |
// A few combinators. | |
id = L(0); | |
succ = L(L(L(A(1,A(A(2,1),0))))); | |
mul = L(L(L(A(2,A(1,0))))); | |
// Church numbers up to 100. | |
var c = [L(L(0))]; | |
for (var i=0; i<100; ++i) | |
c.push(A(succ,c[c.length - 1])); | |
// This is a rather stupid way to take the power of 2, but | |
// may be a good example. It crates a scott-encoded binary | |
// tree of depth = n with all leaves = 1, and then sums it. | |
pow2 = L(A(A(A(A(A(A(0,L(L(L(L(L(A(A(A(2,2),A(4,3)),A(4,3)))))))) | |
,L(L(L(L(A(1,3)))))),L(L(A(1,0)))),L(L(L(A(A(A(A(A(0,2),L(0)), | |
L(L(0))),L(L(L(A(1,A(A(2,1),0)))))),A(A(A(1,2),L(0)), | |
L(L(0)))))))),L(0)),L(L(0)))) | |
// Test = (pow2 church_9) -- 2^9 | |
main = A(pow2,c[9]); | |
// Outputs results | |
console.log("Normal of `main`:"); | |
console.log(rt.toString(rt.normal(main))); | |
console.log(); | |
console.log("Node count :",rt.left_.length); | |
console.log("Beta-reductions :",stats.beta_reductions); | |
console.log("Reductions :",stats.reductions,"("+(stats.reductions-stats.unmemoized_reductions)+" memoized)"); | |
console.log("Calls to subs :",stats.subs_calls,"("+(stats.subs_calls-stats.unmemoized_subs_calls)+" memoized)"); | |
console.log("Max bruijn var :",stats.max_var); | |
//Result on my computer: | |
// | |
//With hash consing: | |
// | |
//Node count : 3589 | |
//Beta-reductions : 893 | |
//Reductions : 210516 (204195 memoized) | |
//Calls to subs : 15095 (5556 memoized) | |
//Max bruijn var : 30 | |
//real 0m0.227s | |
//user 0m0.195s | |
//sys 0m0.036s | |
// | |
// Without hash consing: | |
// | |
//Node count : 695773 | |
//Beta-reductions : 13616 | |
//Reductions : 893424 (0 memoized) | |
//Calls to subs : 972923 (0 memoized) | |
//Max bruijn var : 30 | |
//real 0m0.943s | |
//user 0m0.853s | |
//sys 0m0.093s | |
// | |
// I believe hash consing + auto memoizing is useful and **not** replicable by memoizing because: | |
// | |
// | |
// 1. Saving **a lot** of memory (think voxel worlds...) | |
// | |
// Suppose you use a balanced tree to represent an array. | |
// | |
// data Tree a = Node (Tree a) (Tree a) | Leaf a | |
// | |
// Suppose that you fill an array of length 8 with zeros. | |
// | |
// tree = Node | |
// (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0))) | |
// (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0))) | |
// | |
// Without hash consing, we have this on memory: | |
// a = 0 0 | |
// b = 0 0 | |
// c = 0 0 | |
// d = 0 0 | |
// e = a b | |
// f = c d | |
// tree = e f | |
// So, 8*(8-1)*2 = 3584 bytes. | |
// | |
// With hash consing, we have this: | |
// a = 0 0 | |
// b = a a | |
// tree = b b | |
// So, log2(8)*2*32 = 192 bytes | |
// | |
// Similarly, for a 1024x1024 array we have | |
// | |
// No hash consing : 67043328 bytes = 67 mb | |
// Hash consing : 1280 bytes = 0.00128 mb | |
// | |
// I actually *really* needed this for | |
// | |
// | |
// 2. Automatic memoization | |
// | |
// With hash consing, the naive fib is O(N) instead of O(2^N). | |
// How cool would be able to teach the naive fib for students | |
// and it actually work? But there is more to this than that. | |
// You can argue that this can be replicated by just appending | |
// "memoize" to your function definition, but that is not true. | |
// because: | |
// | |
// a) Except if you are memoizing functions from ints to ints | |
// and so on, you need to take the hash of the structure and | |
// that can be expensive. You essentially can't memoize | |
// `length` on haskell in a way that actually makes it faster. | |
// With hash-consing, `length [GIANT_LIST]` will only take a lot | |
// of time once. Then never again. | |
// | |
// b) It still doesn't memoize subexpressions. | |
// | |
// c) It protects the whole language for duplicated computation. | |
// Not just what you specifically tag. | |
// | |
// And I really believe this will make real programs MUCH faster. | |
// Think in how much duplicated computation happens under the hoods. | |
// Hash-consing is the kind of thing that has a bad rep because it | |
// takes a 2~3 constant factor out of small benchmarks. | |
// But for real-life programs it can be stupid. | |
// | |
// | |
// 3. O(1) equality | |
// | |
// With memoization, you can compare any structure for equality in O(1) | |
// regardless of how deep it is. How cool would it be to be able to | |
// guiltylessly compare for equality on production code, without having | |
// to create IDs for hashing and so on. | |
// | |
// | |
// 4. O(1) hashing | |
// | |
// Since everything is hashed, "hashable" isn't necessary anymore. | |
// You can take the hash of anything in O(1) as well. | |
// | |
// | |
// If it is not unbearably hard to implement, I don't see why hash- | |
// -consing shouldn't a compiler option. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment