viclib · October 27, 2021 01:41
diff --git a/gistfile1.js b/gistfile1.js
 // This is a simple interpreter for the untyped lambda calculus 
 // in JavaScript, used as an example of the possible benefits of 
 // hash-consing. Skip to the end of the file to see the tests.
 // ~ SrPeixinho

 var i32size    = Math.pow(2,32);
 var i32sizeInv = 1/i32size;
 var stats = {
    beta_reductions       : 0,
    reductions            : 0,
    unmemoized_reductions : 0,
    subs_calls            : 0,
    unmemoized_subs_calls : 0,
    max_var               : 0};
 function Runtime(hashCons){
    this.subsMemo = {};
    this.left_    = [];
    this.right_   = [];
    this.normal_  = {};
    this.buckets  = 1000003;
    this.hash     = new Array(this.buckets);
    this.hashCons = hashCons;
    for (var i=0; i<this.buckets; ++i) 
        this.hash[i] = [];
 };
 Runtime.prototype.app = function(x,y){
    // Applies term "x" to term "y".
    // This is where memory the allocation happens.
    if (this.hashCons){
        var bucketIndex = ((x>>>0)*31+(y>>>0))%this.buckets;
        for(var j=0,bucket=this.hash[bucketIndex],l=bucket.length; j<l; j+=1){
            ptr = bucket[j];
            if (x === this.left(ptr) && y === this.right(ptr))
                return ptr;
        };
    };
    var pos           = this.left_.length;
    var ptr           = 0x80000000|pos
    this.left_[pos]   = x;
    this.right_[pos]  = y;
    if (this.hashCons) 
        bucket[j] = ptr;
    return ptr;
 };
 Runtime.prototype.isApp = function(node){
    return (node >>> 20) === 0x800;
 };
 Runtime.prototype.left = function(node){ 
    return this.left_[(node & 0x000FFFFF)];
 };
 Runtime.prototype.right = function(node){ 
    return this.right_[(node & 0x000FFFFF)];
 };
 Runtime.prototype.isLam = function(node){
    return node & 0x3FF00000;
 };
 Runtime.prototype.lam = function(node){
    return node + 0x00100000;
 };
 Runtime.prototype.body = function(node){
    return (node >>> 20) === 0x800 ? this.right(node) : node - 0x00100000;
 };
 Runtime.prototype.isVar = function(node){
    return !(node >>> 20);
 };
 Runtime.prototype.normal = function(x){
    ++stats.reductions;
    var self = this;
    if (this.hashCons && this.normal_[x])
        return this.normal_[x];
    if (this.isApp(x)){
        var l  = this.normal(this.left(x));
        var r  = this.normal(this.right(x));
        var ll = this.left(l);
        var lr = this.right(l);
    };
    ++stats.unmemoized_reductions;
    var normal = (
        this.isVar(x) ? (stats.max_var < x ? (stats.max_var=x) : x)
        : this.isLam(x) ? this.lam(this.normal(this.body(x)))
        : this.isLam(l) ? this.apply(l,r)
        : this.app(l,r));
    if (this.hashCons)
        this.normal_[x] = normal;
    return normal;
 };
 Runtime.prototype.subs = function (t,d,w,x){
    // Substitutes by "t" all variables of "x",
    // that are bound to "d"th abstraction
    // above it. Add "x" to free variables.
    ++stats.subs_calls;
    var hash = ""+t+"_"+d+"_"+w+"_"+x; // bad, bad, todo: improve
    if (this.hashCons && this.subsMemo[hash]) 
        return this.subsMemo[hash];
    ++stats.unmemoized_subs_calls;
    // Don't even bother trying to read this.
    return this.subsMemo[hash] 
        = this.isApp(x) ? this.app(this.subs(t,d,w,this.left(x)),this.subs(t,d,w,this.right(x)))
        : this.isLam(x) ? this.lam(this.subs(t,d+1,w,this.body(x)))
        : this.isVar(x) ? (t && x===d ? this.subs(0,-1,d,t) : x>d ? x+w : x)
        : x;
 };
 Runtime.prototype.apply = function(f,x){
    ++stats.beta_reductions;
    return this.normal(this.subs(x,0,-1,this.body(f)));
 };
 Runtime.prototype.toString = function(x){ 
    return (this.isApp(x) ? "("+this.toString(this.left(x))+" "+this.toString(this.right(x))+")" :
            this.isLam(x) ? "(λ "+this.toString(this.body(x))+")" 
            : "v"+x);
 };


 // Change this to "false" to disable hash consing.
 var useHashConsing = true;

 // Starts runtime + helpers.
 var rt = new Runtime(useHashConsing);
 L = function(x){return rt.lam(x)};
 A = function(a,b){return rt.app(a,b)};

 // A few combinators.
 id   = L(0);
 succ = L(L(L(A(1,A(A(2,1),0)))));
 mul  = L(L(L(A(2,A(1,0)))));

 // Church numbers up to 100.
 var c = [L(L(0))]; 
 for (var i=0; i<100; ++i)
    c.push(A(succ,c[c.length - 1]));

 // This is a rather stupid way to take the power of 2, but
 // may be a good example. It crates a scott-encoded binary 
 // tree of depth = n with all leaves = 1, and then sums it.
 pow2 = L(A(A(A(A(A(A(0,L(L(L(L(L(A(A(A(2,2),A(4,3)),A(4,3))))))))
    ,L(L(L(L(A(1,3)))))),L(L(A(1,0)))),L(L(L(A(A(A(A(A(0,2),L(0)),
    L(L(0))),L(L(L(A(1,A(A(2,1),0)))))),A(A(A(1,2),L(0)),
    L(L(0)))))))),L(0)),L(L(0))))

 // Test = (pow2 church_9) -- 2^9
 main = A(pow2,c[9]);

 // Outputs results
 console.log("Normal of `main`:");
 console.log(rt.toString(rt.normal(main)));
 console.log();
 console.log("Node count      :",rt.left_.length);
 console.log("Beta-reductions :",stats.beta_reductions);
 console.log("Reductions      :",stats.reductions,"("+(stats.reductions-stats.unmemoized_reductions)+" memoized)");
 console.log("Calls to subs   :",stats.subs_calls,"("+(stats.subs_calls-stats.unmemoized_subs_calls)+" memoized)");
 console.log("Max bruijn var  :",stats.max_var);

 //Result on my computer:
 //
 //With hash consing:
 //
 //Node count      : 3589
 //Beta-reductions : 893
 //Reductions      : 210516 (204195 memoized)
 //Calls to subs   : 15095 (5556 memoized)
 //Max bruijn var  : 30
 //real	0m0.227s
 //user	0m0.195s
 //sys	0m0.036s
 //
 // Without hash consing:
 //
 //Node count      : 695773
 //Beta-reductions : 13616
 //Reductions      : 893424 (0 memoized)
 //Calls to subs   : 972923 (0 memoized)
 //Max bruijn var  : 30
 //real	0m0.943s
 //user	0m0.853s
 //sys	0m0.093s
 //
 // I believe hash consing + auto memoizing is useful and **not** replicable by memoizing because:
 //
 //
 // 1. Saving **a lot** of memory (think voxel worlds...)
 //
 // Suppose you use a balanced tree to represent an array.
 //
 //   data Tree a = Node (Tree a) (Tree a) | Leaf a
 //
 // Suppose that you fill an array of length 8 with zeros. 
 //
 // tree = Node
 //     (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0)))
 //     (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0)))
 //
 // Without hash consing, we have this on memory:
 // a = 0 0
 // b = 0 0
 // c = 0 0
 // d = 0 0
 // e = a b
 // f = c d
 // tree = e f
 // So, 8*(8-1)*2 = 3584 bytes.
 //  
 // With hash consing, we have this:
 // a = 0 0
 // b = a a
 // tree = b b
 // So, log2(8)*2*32 = 192 bytes
 //
 // Similarly, for a 1024x1024 array we have
 //
 // No hash consing : 67043328 bytes = 67 mb
 // Hash consing    : 1280 bytes     = 0.00128 mb
 //
 // I actually *really* needed this for
 // 
 //
 // 2. Automatic memoization
 // 
 // With hash consing, the naive fib is O(N) instead of O(2^N).
 // How cool would be able to teach the naive fib for students
 // and it actually work? But there is more to this than that.
 // You can argue that this can be replicated by just appending
 // "memoize" to your function definition, but that is not true.
 // because:
 //
 // a) Except if you are memoizing functions from ints to ints
 //    and so on, you need to take the hash of the structure and
 //    that can be expensive. You essentially can't memoize 
 //    `length` on haskell in a way that actually makes it faster.
 //    With hash-consing, `length [GIANT_LIST]` will only take a lot
 //    of time once. Then never again.
 //
 // b) It still doesn't memoize subexpressions.
 //
 // c) It protects the whole language for duplicated computation.
 //    Not just what you specifically tag.
 //
 // And I really believe this will make real programs MUCH faster.
 // Think in how much duplicated computation happens under the hoods.
 // Hash-consing is the kind of thing that has a bad rep because it
 // takes a 2~3 constant factor out of small benchmarks.
 // But for real-life programs it can be stupid.
 //
 //
 // 3. O(1) equality
 //
 // With memoization, you can compare any structure for equality in O(1)
 // regardless of how deep it is. How cool would it be to be able to 
 // guiltylessly compare for equality on production code, without having
 // to create IDs for hashing and so on.
 //
 //
 // 4. O(1) hashing
 //
 // Since everything is hashed, "hashable" isn't necessary anymore.
 // You can take the hash of anything in O(1) as well.
 //
 //
 // If it is not unbearably hard to implement, I don't see why hash-
 // -consing shouldn't a compiler option.
	// This is a simple interpreter for the untyped lambda calculus
	// in JavaScript, used as an example of the possible benefits of
	// hash-consing. Skip to the end of the file to see the tests.
	// ~ SrPeixinho

	var i32size = Math.pow(2,32);
	var i32sizeInv = 1/i32size;
	var stats = {
	beta_reductions : 0,
	reductions : 0,
	unmemoized_reductions : 0,
	subs_calls : 0,
	unmemoized_subs_calls : 0,
	max_var : 0};
	function Runtime(hashCons){
	this.subsMemo = {};
	this.left_ = [];
	this.right_ = [];
	this.normal_ = {};
	this.buckets = 1000003;
	this.hash = new Array(this.buckets);
	this.hashCons = hashCons;
	for (var i=0; i<this.buckets; ++i)
	this.hash[i] = [];
	};
	Runtime.prototype.app = function(x,y){
	// Applies term "x" to term "y".
	// This is where memory the allocation happens.
	if (this.hashCons){
	var bucketIndex = ((x>>>0)*31+(y>>>0))%this.buckets;
	for(var j=0,bucket=this.hash[bucketIndex],l=bucket.length; j<l; j+=1){
	ptr = bucket[j];
	if (x === this.left(ptr) && y === this.right(ptr))
	return ptr;
	};
	};
	var pos = this.left_.length;
	var ptr = 0x80000000\|pos
	this.left_[pos] = x;
	this.right_[pos] = y;
	if (this.hashCons)
	bucket[j] = ptr;
	return ptr;
	};
	Runtime.prototype.isApp = function(node){
	return (node >>> 20) === 0x800;
	};
	Runtime.prototype.left = function(node){
	return this.left_[(node & 0x000FFFFF)];
	};
	Runtime.prototype.right = function(node){
	return this.right_[(node & 0x000FFFFF)];
	};
	Runtime.prototype.isLam = function(node){
	return node & 0x3FF00000;
	};
	Runtime.prototype.lam = function(node){
	return node + 0x00100000;
	};
	Runtime.prototype.body = function(node){
	return (node >>> 20) === 0x800 ? this.right(node) : node - 0x00100000;
	};
	Runtime.prototype.isVar = function(node){
	return !(node >>> 20);
	};
	Runtime.prototype.normal = function(x){
	++stats.reductions;
	var self = this;
	if (this.hashCons && this.normal_[x])
	return this.normal_[x];
	if (this.isApp(x)){
	var l = this.normal(this.left(x));
	var r = this.normal(this.right(x));
	var ll = this.left(l);
	var lr = this.right(l);
	};
	++stats.unmemoized_reductions;
	var normal = (
	this.isVar(x) ? (stats.max_var < x ? (stats.max_var=x) : x)
	: this.isLam(x) ? this.lam(this.normal(this.body(x)))
	: this.isLam(l) ? this.apply(l,r)
	: this.app(l,r));
	if (this.hashCons)
	this.normal_[x] = normal;
	return normal;
	};
	Runtime.prototype.subs = function (t,d,w,x){
	// Substitutes by "t" all variables of "x",
	// that are bound to "d"th abstraction
	// above it. Add "x" to free variables.
	++stats.subs_calls;
	var hash = ""+t+"_"+d+"_"+w+"_"+x; // bad, bad, todo: improve
	if (this.hashCons && this.subsMemo[hash])
	return this.subsMemo[hash];
	++stats.unmemoized_subs_calls;
	// Don't even bother trying to read this.
	return this.subsMemo[hash]
	= this.isApp(x) ? this.app(this.subs(t,d,w,this.left(x)),this.subs(t,d,w,this.right(x)))
	: this.isLam(x) ? this.lam(this.subs(t,d+1,w,this.body(x)))
	: this.isVar(x) ? (t && x===d ? this.subs(0,-1,d,t) : x>d ? x+w : x)
	: x;
	};
	Runtime.prototype.apply = function(f,x){
	++stats.beta_reductions;
	return this.normal(this.subs(x,0,-1,this.body(f)));
	};
	Runtime.prototype.toString = function(x){
	return (this.isApp(x) ? "("+this.toString(this.left(x))+" "+this.toString(this.right(x))+")" :
	this.isLam(x) ? "(λ "+this.toString(this.body(x))+")"
	: "v"+x);
	};


	// Change this to "false" to disable hash consing.
	var useHashConsing = true;

	// Starts runtime + helpers.
	var rt = new Runtime(useHashConsing);
	L = function(x){return rt.lam(x)};
	A = function(a,b){return rt.app(a,b)};

	// A few combinators.
	id = L(0);
	succ = L(L(L(A(1,A(A(2,1),0)))));
	mul = L(L(L(A(2,A(1,0)))));

	// Church numbers up to 100.
	var c = [L(L(0))];
	for (var i=0; i<100; ++i)
	c.push(A(succ,c[c.length - 1]));

	// This is a rather stupid way to take the power of 2, but
	// may be a good example. It crates a scott-encoded binary
	// tree of depth = n with all leaves = 1, and then sums it.
	pow2 = L(A(A(A(A(A(A(0,L(L(L(L(L(A(A(A(2,2),A(4,3)),A(4,3))))))))
	,L(L(L(L(A(1,3)))))),L(L(A(1,0)))),L(L(L(A(A(A(A(A(0,2),L(0)),
	L(L(0))),L(L(L(A(1,A(A(2,1),0)))))),A(A(A(1,2),L(0)),
	L(L(0)))))))),L(0)),L(L(0))))

	// Test = (pow2 church_9) -- 2^9
	main = A(pow2,c[9]);

	// Outputs results
	console.log("Normal of `main`:");
	console.log(rt.toString(rt.normal(main)));
	console.log();
	console.log("Node count :",rt.left_.length);
	console.log("Beta-reductions :",stats.beta_reductions);
	console.log("Reductions :",stats.reductions,"("+(stats.reductions-stats.unmemoized_reductions)+" memoized)");
	console.log("Calls to subs :",stats.subs_calls,"("+(stats.subs_calls-stats.unmemoized_subs_calls)+" memoized)");
	console.log("Max bruijn var :",stats.max_var);

	//Result on my computer:
	//
	//With hash consing:
	//
	//Node count : 3589
	//Beta-reductions : 893
	//Reductions : 210516 (204195 memoized)
	//Calls to subs : 15095 (5556 memoized)
	//Max bruijn var : 30
	//real 0m0.227s
	//user 0m0.195s
	//sys 0m0.036s
	//
	// Without hash consing:
	//
	//Node count : 695773
	//Beta-reductions : 13616
	//Reductions : 893424 (0 memoized)
	//Calls to subs : 972923 (0 memoized)
	//Max bruijn var : 30
	//real 0m0.943s
	//user 0m0.853s
	//sys 0m0.093s
	//
	// I believe hash consing + auto memoizing is useful and not replicable by memoizing because:
	//
	//
	// 1. Saving a lot of memory (think voxel worlds...)
	//
	// Suppose you use a balanced tree to represent an array.
	//
	// data Tree a = Node (Tree a) (Tree a) \| Leaf a
	//
	// Suppose that you fill an array of length 8 with zeros.
	//
	// tree = Node
	// (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0)))
	// (Node (Node (Leaf 0) (Leaf 0)) (Node (Leaf 0) (Leaf 0)))
	//
	// Without hash consing, we have this on memory:
	// a = 0 0
	// b = 0 0
	// c = 0 0
	// d = 0 0
	// e = a b
	// f = c d
	// tree = e f
	// So, 8(8-1)2 = 3584 bytes.
	//
	// With hash consing, we have this:
	// a = 0 0
	// b = a a
	// tree = b b
	// So, log2(8)232 = 192 bytes
	//
	// Similarly, for a 1024x1024 array we have
	//
	// No hash consing : 67043328 bytes = 67 mb
	// Hash consing : 1280 bytes = 0.00128 mb
	//
	// I actually really needed this for
	//
	//
	// 2. Automatic memoization
	//
	// With hash consing, the naive fib is O(N) instead of O(2^N).
	// How cool would be able to teach the naive fib for students
	// and it actually work? But there is more to this than that.
	// You can argue that this can be replicated by just appending
	// "memoize" to your function definition, but that is not true.
	// because:
	//
	// a) Except if you are memoizing functions from ints to ints
	// and so on, you need to take the hash of the structure and
	// that can be expensive. You essentially can't memoize
	// `length` on haskell in a way that actually makes it faster.
	// With hash-consing, `length [GIANT_LIST]` will only take a lot
	// of time once. Then never again.
	//
	// b) It still doesn't memoize subexpressions.
	//
	// c) It protects the whole language for duplicated computation.
	// Not just what you specifically tag.
	//
	// And I really believe this will make real programs MUCH faster.
	// Think in how much duplicated computation happens under the hoods.
	// Hash-consing is the kind of thing that has a bad rep because it
	// takes a 2~3 constant factor out of small benchmarks.
	// But for real-life programs it can be stupid.
	//
	//
	// 3. O(1) equality
	//
	// With memoization, you can compare any structure for equality in O(1)
	// regardless of how deep it is. How cool would it be to be able to
	// guiltylessly compare for equality on production code, without having
	// to create IDs for hashing and so on.
	//
	//
	// 4. O(1) hashing
	//
	// Since everything is hashed, "hashable" isn't necessary anymore.
	// You can take the hash of anything in O(1) as well.
	//
	//
	// If it is not unbearably hard to implement, I don't see why hash-
	// -consing shouldn't a compiler option.