Last active
January 3, 2019 04:05
-
-
Save Buildstarted/4618872 to your computer and use it in GitHub Desktop.
Basic Bloom Filter implementation inspired by Kellabyte
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
namespace BST | |
{ | |
using System; | |
using System.Collections; | |
public class BloomFilter<T> | |
{ | |
private readonly int _size; | |
//how many times to run the hash method | |
private readonly int _hashCount; | |
private readonly BitArray _arr; | |
/// <summary> | |
/// Initialize the bloom filter with the size and hashCount to use | |
/// </summary> | |
/// <param name="size">Size of the bit array used to hold results</param> | |
/// <param name="hashCount">The number of hash methods to run on each item</param> | |
public BloomFilter(int size, int hashCount) | |
{ | |
_size = size; | |
_hashCount = hashCount; | |
_arr = new BitArray(_size); | |
} | |
/// <summary> | |
/// Computes a hash for a given item and stores | |
/// it in a lookup | |
/// </summary> | |
/// <param name="item"></param> | |
public void Add(T item) | |
{ | |
int baseHash = item.GetHashCode(); | |
for (int i = 0; i < _hashCount; i++) | |
{ | |
int index = ComputeHash(baseHash, i); | |
_arr[index] = true; | |
} | |
} | |
/// <summary> | |
/// Checks the lookup to see if an item probably | |
/// exists | |
/// </summary> | |
/// <param name="item"></param> | |
/// <returns></returns> | |
public bool Contains(T item) | |
{ | |
int basehash = item.GetHashCode(); | |
for (int i = 0; i < _hashCount; i++) | |
{ | |
int index = ComputeHash(basehash, i); | |
if (!_arr[index]) | |
{ | |
return false; | |
} | |
} | |
return true; | |
} | |
public double Ratio | |
{ | |
get | |
{ | |
int result = 0; | |
foreach (bool bit in _arr) | |
{ | |
if (bit) result++; | |
} | |
return (double)result / _arr.Count; | |
} | |
} | |
private int ComputeHash(int hashCode, int offset) | |
{ | |
//the hashcode needs to be calculated based on the | |
//offset suplied by the index | |
//this ensures that each time the method is run we get | |
//a different hashcode per offset | |
int result = (hashCode + (offset * hashCode)) % _arr.Count; | |
return Math.Abs((int)result); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment