Created
November 22, 2013 11:45
-
-
Save chiral/7598645 to your computer and use it in GitHub Desktop.
An implimentation of "Tree Edit Distance" algorithm. This is a straight forward code for the formula presented on the PFI's blog page: http://research.preferred.jp/2012/02/tree-edit-distance/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function attach(obj,methods) { | |
var wrap = function(a,f) { | |
return function() { return f.apply(a,arguments); } | |
}; | |
for (var name in methods) { | |
if (obj[name]) throw "conflict:"+name; | |
obj[name] = wrap(obj,methods[name]); | |
} | |
return obj; | |
} | |
function to_key_default($,$node) { | |
var n = $node[0]; | |
var a = n.attributes; | |
var r=[n.tagName.toLowerCase()]; | |
for (var i=0; i<a.length; i++) { | |
var s=a[i].nodeName; | |
if (s=='id' || s=='class') { | |
if (s=='id') s='#'; | |
if (s=='class') s='.'; | |
if (a[i].nodeValue) s+=a[i].nodeValue; | |
} | |
r.push(s); | |
} | |
return '<'+r.join(' ')+'>'; | |
} | |
function $children($,$node) { | |
var cs=[]; | |
$node.children().each(function(){ | |
cs.push($(this)); | |
}); | |
return cs; | |
} | |
function traverse($,$siblings,nodes,to_key) { | |
var siblings=[]; | |
var N=$siblings.length; | |
for (var i=N-1; i>=0; i--) { | |
var $cs=$children($,$siblings[i]); | |
var children=traverse($,$cs,nodes,to_key); | |
siblings.push(nodes.length); | |
nodes.push({ | |
id: nodes.length, | |
children: children, | |
key: to_key($,$siblings[i]) | |
}); | |
} | |
return siblings; | |
} | |
var init = function($,$node,to_key) { | |
var nodes = []; | |
if (!to_key) to_key=to_key_default; | |
var children=traverse($,$children($,$node),nodes,to_key); | |
nodes.push({id:nodes.length, | |
children:children, | |
key:to_key($,$node)}); | |
return attach({ | |
nodes:nodes, F:[{from:0,to:nodes.length-1}] | |
},{ | |
index: function(F) { | |
return F[F.length-1].from*this.nodes.length+F[0].to; | |
}, | |
max_index: function() { | |
return this.nodes.length*this.nodes.length; | |
}, | |
R: function(F) { | |
return F[0].to; | |
}, | |
C: function(F) { | |
var r=[]; | |
var c=this.nodes[F[0].to].children; | |
for (var i=c.length-1; i>=0; i--){ | |
var from=i>0?c[i-1]+1:F[0].from; | |
r.push({from:from,to:c[i]}); | |
} | |
return r; | |
}, | |
L: function(F,r) { | |
if (!r) r=[]; | |
for (var i=1; i<F.length; i++) { | |
r.push(F[i]); | |
} | |
return r; | |
}, | |
}); | |
} | |
var memoized = function(T1,T2) { | |
var tbl={}; | |
var index=function(F1,F2) { | |
var index1=T1.index(F1); | |
var index2=T2.index(F2); | |
var M1=T1.max_index(); | |
return M1*index2+index1; | |
}; | |
var size=function(F) { | |
return F[0].to-F[F.length-1].from+1; | |
}; | |
return attach({},{ | |
gamma: function(id1,id2) { | |
if (id1==null || id2==null) { | |
if (id1!=null) return 1; | |
if (id2!=null) return 1; | |
return 0; | |
} | |
if (T1.nodes[id1].key!=T2.nodes[id2].key) return 1; | |
return 0; | |
}, | |
distance: function(F1,F2) { | |
if (F1.length==0 || F2.length==0) { | |
if (F1.length) return size(F1); | |
if (F2.length) return size(F2); | |
return 0; | |
} | |
var ti=index(F1,F2); | |
var t=tbl[ti]; | |
if (t!=null) return t.d; | |
var d={}; | |
d.edit=this.gamma(T1.R(F1),T2.R(F2))+ | |
this.distance(T1.C(F1),T2.C(F2))+ | |
this.distance(T1.L(F1),T2.L(F2)); | |
d.del1=this.gamma(T1.R(F1),null)+ | |
this.distance(T1.L(F1,T1.C(F1)),F2); | |
d.del2=this.gamma(null,T2.R(F2))+ | |
this.distance(F1,T2.L(F2,T2.C(F2))); | |
for (var op in d) { | |
if (t==null || d[op]<t.d) t={op:op,d:d[op]}; | |
} | |
tbl[ti]=t; | |
return t.d; | |
}, | |
get_ops: function(F1,F2,ops) { | |
var key_str=function(T,F) { | |
var r=[]; | |
var to=F[0].to,from=F[F.length-1].from; | |
for (var i=to; i>=from; i--) { | |
r.push(T.nodes[i].key); | |
} | |
return r.join(','); | |
}; | |
if (F1.length==0 || F2.length==0) { | |
if (F1.length) ops.push('rest1:'+key_str(T1,F1)); | |
if (F2.length) ops.push('rest2:'+key_str(T2,F2)); | |
return; | |
} | |
var ti=index(F1,F2); | |
var t=tbl[ti]; | |
var k1=''+T1.nodes[F1[0].to].key; | |
var k2=''+T2.nodes[F2[0].to].key; | |
switch (t.op) { | |
case "del1": | |
ops.push('del1:'+k1); | |
this.get_ops(T1.L(F1,T1.C(F1)),F2,ops); | |
break; | |
case "del2": | |
ops.push('del2:'+k2); | |
this.get_ops(F1,T2.L(F2,T2.C(F2)),ops); | |
break; | |
case "edit": | |
ops.push(k1+(k1==k2?'=':'/')+k2); | |
this.get_ops(T1.C(F1),T2.C(F2),ops); | |
this.get_ops(T1.L(F1),T2.L(F2),ops); | |
break; | |
} | |
} | |
}); | |
} | |
exports.calc = function($,$node1,$node2,ops,to_key) { | |
var T1 = init($,$node1); | |
var T2 = init($,$node2); | |
var m=memoized(T1,T2); | |
var d=m.distance(T1.F,T2.F); | |
if (ops) m.get_ops(T1.F,T2.F,ops); | |
return d; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment