Last active
January 21, 2018 15:29
-
-
Save rasky/5f2a2b33b81b3673bca6934d0d1e97c0 to your computer and use it in GitHub Desktop.
Bloom filter parameters simulation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- encoding: utf-8 -*- | |
import math | |
N0 = 128 # number of 8-byte ints in intlist before we switch to bloom | |
E = 0.003 # desired false-positive error rate | |
P = 0.5 # desired fill ratio | |
R = 0.85 # tightening ratio | |
S = 2.0 # growth ratio | |
def log2(x): | |
return math.log(x) / math.log(2) | |
class AsciiOutput: | |
FMT = "%2s: %4s %5s %16s | %16s %16s | %16s %16s" | |
def __init__(self): | |
print(self.FMT % ("", "Hash", "Checks", "Partition size", "Step size", "Step count", "Tot size", "Tot count")) | |
print("-"*110) | |
@staticmethod | |
def intWithCommas(x): | |
if x < 0: | |
return '-' + intWithCommas(-x) | |
result = '' | |
while x >= 1000: | |
x, r = divmod(x, 1000) | |
result = ",%03d%s" % (r, result) | |
return "%d%s" % (x, result) | |
def printline(self, *args): | |
args = list(args) | |
args[3:] = [self.intWithCommas(a) for a in args[3:]] | |
print(self.FMT % tuple(args)) | |
def finalize(self): | |
pass | |
out = AsciiOutput() | |
# Compute first N so that first M (memory size) is about double the size | |
# of the first intlist. Doubling memory size as we grow is an acceptable | |
# pattern. | |
E0 = E * (1-R) * 3 | |
N0 = int(S * N0 * 64 * ((math.log(P) * math.log(1.0-P)) / abs(math.log(E0)))) | |
cnt = 0 | |
tot = 0 | |
hfn = 0 | |
for i in range(24): | |
e = E0 * math.pow(R, i) | |
n = N0 * math.pow(S, i) | |
k = int(math.ceil(-log2(e))) | |
m = n / ((math.log(P) * math.log(1.0-P)) / abs(math.log(e))) | |
m = int((m+7)/8) | |
m = (m + 63) / 64 * 64 | |
tot += m | |
cnt += n | |
hfn += k | |
out.printline(i, k, hfn, int(m/k), m, n, tot, cnt) | |
out.finalize() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Output with default parameters: