-
-
Save verdy-p/5e167a3101e26af297719394d44d0c94 to your computer and use it in GitHub Desktop.
Compare some hash function for lua table implementation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--[[ | |
370104 words from https://github.com/dwyl/english-words/blob/master/words_alpha.txt | |
3/4 5/8 7/8 9/16 11/16 13/16 15/32 17/32 19/32 21/32 | |
(h<<5) + (h>>2) + x (3)84874 (4)81670 (4)113326 (4)80336 (2)96169 (5)111520 (4)125243 (3)79309 (4)87762 (5)95643 | |
(h<<4) + (h>>3) + x (2)84819 (1)81351 (3)113283 (5)80439 (3)96264 (2)111197 (3)125200 (4)79486 (2)87582 (2)95430 | |
(h<<5) - (h>>2) + x (1)84464 (2)81428 (2)113108 (2)80201 (4)96464 (4)111469 (1)125052 (2)79222 (3)87666 (3)95466 | |
(h<<4) - (h>>3) + x (4)85050 (3)81587 (1)113084 (1)80112 (1)96131 (1)111134 (2)125185 (1)79163 (1)87361 (1)95239 | |
((h + x) * 0xAAAB) >> 3 (5)85143 (5)81973 (5)113538 (3)80323 (5)96593 (3)111401 (5)125306 (5)79636 (5)88120 (4)95536 | |
]] | |
local seed = 0 | |
local dictfile = "words_alpha.txt" | |
local hash = {} | |
function hash.h129(h, x) | |
return (h<<5) + (h>>2) + x | |
end | |
function hash.h129_43(h,x) | |
return (h<<4) + (h>>3) + x | |
end | |
function hash.h127(h, x) | |
return (h<<4) - (h>>3) + x | |
end | |
function hash.h127_52(h, x) | |
return (h<<5) - (h>>2) + x | |
end | |
function hash.aaab(h, x) | |
return ((h + x) * 0xAAAB) >> 3 | |
end | |
local function shuffle(t) | |
local n = #t | |
for i = 1, n-1 do | |
local idx = math.random(i, n) | |
t[i], t[idx] = t[idx], t[i] | |
end | |
return t | |
end | |
local function readdict(filename) | |
local dict = {} | |
local n = 1 | |
for line in io.lines(filename) do | |
dict[n] = line | |
n = n + 1 | |
end | |
return dict | |
end | |
local function hashstring(str, func) | |
local h = seed ~ #str | |
str:gsub(".", function (c) | |
h = h ~ ( func(h, c:byte()) & 0xffffffff ) | |
return "" -- this make gsub faster | |
end) | |
return h | |
end | |
local function test(dict, slots, hashfunc) | |
local size = 1 << slots | |
local colliding = 0 | |
for i = 1, #dict-slots+1, slots do | |
local tbl = {} | |
for j = 0, slots-1 do | |
local h = hashstring(dict[i+j], hashfunc) % size | |
if tbl[h] then | |
colliding = colliding + 1 | |
else | |
tbl[h] = true | |
end | |
end | |
end | |
return colliding | |
end | |
local function genguid(n) | |
local random = math.random | |
local dict = {} | |
for i = 1, n do | |
local tmp = {} | |
for j = 1, 16 do | |
tmp[j] = string.format("%02x", random(0,255)) | |
end | |
dict[i] = table.concat(tmp) | |
end | |
return dict | |
end | |
local dict = shuffle(readdict(dictfile)) | |
-- local dict = genguid(100000) | |
print(#dict, "words") | |
local results = {} | |
for key in pairs(hash) do | |
results[key] = {} | |
end | |
local N = 22 | |
for i = 3, N, 2 do | |
local group = i | |
print(string.format("%d/%d", group, 1<<group)) | |
local rank = {} | |
for key, func in pairs(hash) do | |
local c = test(dict, group, func) | |
table.insert(rank, { c = c , key = key }) | |
end | |
table.sort(rank, function(a,b) return a.c < b.c end) | |
for r, obj in ipairs(rank) do | |
results[obj.key][i] = { c = obj.c , rank = r } | |
end | |
end | |
for key in pairs(hash) do | |
local tmp = {} | |
for i = 3, N, 2 do | |
local obj = results[key][i] | |
table.insert(tmp, string.format("(%d)%d", obj.rank, obj.c)) | |
end | |
print(key, table.concat(tmp, "\t")) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment