Created
August 23, 2022 20:58
-
-
Save vbezhenar/6289ef718f2e14725bf33cbbc8c6d7a2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import murmur332 from './murmur332.js'; | |
// https://en.wikipedia.org/wiki/Bloom_filter | |
// https://hur.st/bloomfilter/ | |
export default class BloomFilter { | |
#data; | |
#dataBitMask; | |
#hashCount; | |
/** | |
* Total number of bits: `(2 ** dataLengthP2) * 32` | |
* @param dataLengthP2 log2 size of the data array. | |
* Must be between 0 and 25. | |
* @param hashCount must be > 0 | |
*/ | |
constructor(dataLengthP2, hashCount) { | |
if (!(dataLengthP2 >= 0 && dataLengthP2 <= 25)) throw new Error('dataLengthP2 must be between 0 and 25'); | |
if (!(hashCount > 0)) throw new Error('hashCount must bet > 0'); | |
/* eslint-disable no-bitwise */ | |
const dataLength = 1 << dataLengthP2; | |
const dataBitMask = (1 << (dataLengthP2 + 5)) - 1; | |
/* eslint-enable no-bitwise */ | |
this.#data = new Int32Array(dataLength); | |
this.#dataBitMask = dataBitMask; | |
this.#hashCount = hashCount; | |
} | |
add(value) { | |
const valueArray = BloomFilter.#valueToInt32Array(value); | |
const data = this.#data; | |
const dataBitMask = this.#dataBitMask; | |
const hashCount = this.#hashCount; | |
for (let i = 0; i < hashCount; i += 1) { | |
const hashValue = murmur332(valueArray, i); | |
/* eslint-disable no-bitwise */ | |
const dataBitIndex = hashValue & dataBitMask; | |
const dataItemIndex = dataBitIndex >> 5; | |
const itemBitIndex = dataBitIndex & 0x1f; | |
data[dataItemIndex] |= (1 << itemBitIndex); | |
/* eslint-enable no-bitwise */ | |
} | |
} | |
has(value) { | |
const valueArray = BloomFilter.#valueToInt32Array(value); | |
const data = this.#data; | |
const dataBitMask = this.#dataBitMask; | |
const hashCount = this.#hashCount; | |
for (let i = 0; i < hashCount; i += 1) { | |
const hashValue = murmur332(valueArray, i); | |
/* eslint-disable no-bitwise */ | |
const dataBitIndex = hashValue & dataBitMask; | |
const dataItemIndex = dataBitIndex >> 5; | |
const itemBitIndex = dataBitIndex & 0x1f; | |
if ((data[dataItemIndex] & (1 << itemBitIndex)) === 0) return false; | |
/* eslint-enable no-bitwise */ | |
} | |
return true; | |
} | |
static #valueToInt32Array(value) { | |
switch (typeof value) { | |
case 'string': | |
return BloomFilter.#stringToInt32Array(value); | |
default: | |
throw new Error(`unsupported value type: ${typeof value}`); | |
} | |
} | |
static #stringToInt32Array(string) { | |
const stringLength = string.length; | |
if (stringLength === 0) return new Int32Array(0); | |
const arrayLength = Math.trunc((stringLength - 1) / 4) + 1; | |
const array = new Int32Array(arrayLength); | |
let nextArrayItem = 0; | |
let nextCharShift = 0; | |
let nextArrayIndex = 0; | |
for (let charIndex = 0; charIndex < stringLength; charIndex += 1) { | |
const charCode = string.charCodeAt(charIndex); | |
if (charCode > 127) throw new Error(`unsupported string "${string}": only ASCII strings are supported`); | |
/* eslint-disable no-bitwise */ | |
nextArrayItem |= charCode << nextCharShift; | |
/* eslint-enable no-bitwise */ | |
if (charIndex % 4 === 3) { | |
array[nextArrayIndex] = nextArrayItem; | |
nextArrayItem = 0; | |
nextCharShift = 0; | |
nextArrayIndex += 1; | |
} else { | |
nextCharShift += 8; | |
} | |
} | |
if (nextArrayItem !== 0) array[nextArrayIndex] = nextArrayItem; | |
return array; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment