Created
June 17, 2019 06:04
-
-
Save jonarddoci/7193c20e0da9965b6fdcbf0a80f7fcb4 to your computer and use it in GitHub Desktop.
scrappy trie, nodes with data, multiple key lookup implementation (duplicates not handled yet)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<!-- does not handle duplicates well, need to use hashing to fix that --> | |
<!doctype html> | |
<html> | |
<head> | |
<meta charset="utf-8"> | |
<title>test</title> | |
<base href="/"> | |
<meta name="viewport" content="width=device-width, initial-scale=1"> | |
</head> | |
<body> | |
<script> | |
var dataSource = [ | |
{ | |
type: "residential", | |
locationName: "home", | |
postalCode: "12340" | |
}, | |
{ | |
type: "office", | |
locationName: "my hq", | |
postalCode: "90001" | |
}, | |
{ | |
type: "office", | |
locationName: "my office1", | |
postalCode: "10002" | |
}, | |
{ | |
type: "office", | |
locationName: "my office2", | |
postalCode: "10003" | |
} | |
]; | |
var Trie = { | |
root: null | |
}; | |
var locationKeyMakers = [ | |
function(data) { | |
return data.locationName.split(/\W+/).filter(function(value) {return !!value;}); | |
}, | |
function(data) { | |
return [data.locationName]; | |
}, | |
function(data) { | |
return [data.postalCode]; | |
} | |
]; | |
function addData(trie, data, keyMakerList) { | |
var keys = {}; | |
keyMakerList.map(function(keyMaker) {return keyMaker(data)} ).reduce(function(o1, o2) {return o1.concat(o2)}) | |
.forEach(function(key) {keys[key] = key}); | |
for (var key in keys) { | |
if (keys.hasOwnProperty(key)) { | |
insertTrieNode(trie, data, key); | |
} | |
} | |
} | |
function insertTrieNode(trie, data, key) { | |
if(!key || !data || !trie) { | |
return; | |
} | |
if (!trie.root) { | |
trie.root = { | |
key: '', | |
children: {} | |
}; | |
} | |
var iterator = trie.root; | |
var parent = trie.root; | |
for (var i = 0; i < key.length; i++) { | |
var charAt = key[i]; | |
if (charAt in iterator.children) { | |
if (i === key.length - 1) { | |
iterator.children[charAt].data = iterator.children[charAt].data.concat(data) | |
} else { | |
iterator = iterator.children[charAt]; | |
parent = iterator; | |
} | |
} else { | |
if (i === (key.length - 1)) { | |
parent.children[charAt] = { | |
key: charAt, | |
data: [data], | |
children: {} | |
} | |
} else { | |
parent.children[charAt] = { | |
key: charAt, | |
children: {} | |
}; | |
iterator = parent.children[charAt]; | |
parent = iterator; | |
} | |
} | |
} | |
} | |
function gatherChildren(trieNode) { | |
if (!trieNode) { | |
return []; | |
} | |
var iterator = trieNode; | |
if (!iterator.children) { | |
if (iterator.data) { | |
return iterator.data; | |
} else { | |
return []; | |
} | |
} | |
results = []; | |
if (iterator.data) { | |
results = results.concat(iterator.data); | |
} | |
for (var childKey in iterator.children) { | |
if (iterator.children.hasOwnProperty(childKey)) { | |
results = results.concat(gatherChildren(iterator.children[childKey])); | |
} | |
} | |
return results; | |
} | |
function prefixSearch(trie, prefix) { | |
if (!trie || !trie.root || !prefix) { | |
return []; | |
} | |
var iterator = trie.root; | |
for (var i = 0; i < prefix.length; i++) { | |
var charAt = prefix[i]; | |
if (!iterator.children[charAt]) { | |
return []; | |
} | |
if (i === (prefix.length-1)) { | |
return gatherChildren(iterator.children[charAt]) | |
} else { | |
iterator = iterator.children[charAt]; | |
} | |
} | |
} | |
addData(Trie, dataSource[0], locationKeyMakers); | |
addData(Trie, dataSource[1], locationKeyMakers); | |
addData(Trie, dataSource[2], locationKeyMakers); | |
console.log(Trie); | |
// should return all offices, since they start with my | |
console.log(prefixSearch(Trie, 'my')); | |
// should return hq | |
console.log(prefixSearch(Trie, '9')); | |
// should return home and hq | |
console.log(prefixSearch(Trie, 'h')); | |
console.log(prefixSearch(Trie, 'my office1')); | |
function benchmark() { | |
var searchData = buildBenchmarkDataSource(); | |
var prefixSearches = buildBenchmarkSearchStrings(); | |
// build trie | |
var startDate = new Date().getTime(); | |
var benchMarkTrie = {}; | |
searchData.forEach(function (value) { addData(benchMarkTrie, value, [function(data) {return [data.locationName]}])}); | |
var endDate = new Date().getTime(); | |
console.log(" trie build: ", endDate - startDate); | |
// end build trie | |
//////////////// | |
//////////////// ARRAY SEARCH | |
//////////////// | |
startDate = new Date().getTime(); | |
var arraySearchResults = {}; | |
for (var i = 0; i < prefixSearches.length; i++) { | |
arraySearchResults[i] = arraySearch(searchData, prefixSearches[i]); | |
} | |
endDate = new Date().getTime(); | |
console.log(" array search: ", endDate - startDate); | |
//////////////// | |
//////////////// ARRAY END | |
//////////////// | |
//////////////// | |
//////////////// TRIE SEARCH | |
//////////////// | |
startDate = new Date().getTime(); | |
var trieSearchResults = {}; | |
for (var j = 0; j < prefixSearches.length; j++) { | |
trieSearchResults[j] = prefixSearch(benchMarkTrie, prefixSearches[j]); | |
} | |
endDate = new Date().getTime(); | |
console.log(" TRIE search: ", endDate - startDate); | |
//////////////// | |
//////////////// TRIE END | |
//////////////// | |
for (var k = 0; k < prefixSearches.length; k++) { | |
if (arraySearchResults[k].length !== trieSearchResults[k].length) { | |
console.log(k, ' results do not match query:', prefixSearches[k], 'array results ', arraySearchResults[k], 'trie results', trieSearchResults[k]) | |
} | |
} | |
} | |
function buildBenchmarkSearchStrings() { | |
var data = []; | |
for (var i = 1; i < 1000; i++) { | |
data.push(getRandomString(2)); | |
} | |
return data; | |
} | |
function buildBenchmarkDataSource() { | |
var data = []; | |
for (var i = 0; i < 100000; i++) { | |
data.push({ | |
locationName: getRandomString(), | |
postalCode: getRandomString(), | |
type: getRandomString() | |
}) | |
} | |
return data; | |
} | |
function arraySearch(array, prefix) { | |
var results = []; | |
for (var i = 0; i < array.length; i++) { | |
var el = array[i]; | |
if (el.locationName.startsWith(prefix)) { | |
results.push(el); | |
} | |
} | |
return results; | |
} | |
function getRandomString(length) { | |
length = length || 20; | |
return (Math.random().toString(36) + Math.random().toString(36) + Math.random().toString(36) + Math.random().toString(36)).substring(2, length + 2); | |
} | |
// benchmark(); | |
</script> | |
</body> | |
</html> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment