Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Created February 22, 2025 09:50
Show Gist options
  • Save jweinst1/7f74ce691cbd242240a1c76f58afd73c to your computer and use it in GitHub Desktop.
Save jweinst1/7f74ce691cbd242240a1c76f58afd73c to your computer and use it in GitHub Desktop.
A hash map that doesn't resolve collisions and tries to be a bloom filter but gives location and membership info
#include <map>
#include <string>
#include <vector>
#include <cstdio>
#include <cstdlib>
#include <cstdint>
#include <ctime>
#include <optional>
#include <chrono>
#include <memory>
using namespace std::chrono;
static void put_rand_chars_in_string(std::string& foo, size_t n) {
while(n--) {
foo.push_back((rand() % 90) + 32);
}
}
template<size_t tableSize>
struct TableMap {
uint64_t slots[tableSize] = {0};
std::hash<std::string> hasher;
void printColls() const {
std::map<size_t, std::vector<size_t>> collMap;
for (size_t i = 0; i < tableSize; ++i)
{
uint64_t ptr = slots[i];
auto found = collMap.find(ptr);
if (found != collMap.end()) {
found->second.push_back(i);
} else {
collMap[ptr] = std::vector<size_t>{i};
}
}
for (const auto& score: collMap) {
printf("Hits %zu, magnitude %zu Ratio %f\n",
score.first, score.second.size(),
((double)score.second.size()) / (double)tableSize);
}
}
void insert(const std::string& key, uint64_t num) {
uint64_t* got = &slots[hasher(key) % tableSize];
*got += 1;
}
};
static void bigTest() {
static constexpr size_t testSize = 1000000;
static constexpr size_t mapSize = testSize * 3;
std::vector<std::string> myStrings;
auto tmap = std::make_unique<TableMap<mapSize>>();
for (int i = 0; i < testSize; ++i)
{
std::string sample;
put_rand_chars_in_string(sample, 10);
myStrings.push_back(sample);
}
auto start = high_resolution_clock::now();
for (uint64_t i = 0; i < testSize; ++i)
{
tmap->insert(myStrings[i], i);
}
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::printf("Time taken %llu\n", duration.count());
std::printf("Table size %zu\n", sizeof(tmap->slots) / sizeof(tmap->slots[0]));
std::printf("Test size %zu\n", testSize);
tmap->printColls();
}
/*
Time taken 10424
Table size 3000000
Test size 1000000
Hits 0, magnitude 2149442 Ratio 0.716481
Hits 1, magnitude 716803 Ratio 0.238934
Hits 2, magnitude 119357 Ratio 0.039786
Hits 3, magnitude 13196 Ratio 0.004399
Hits 4, magnitude 1122 Ratio 0.000374
Hits 5, magnitude 74 Ratio 0.000025
Hits 6, magnitude 5 Ratio 0.000002
Hits 7, magnitude 1 Ratio 0.000000
*/
int main(int argc, char const *argv[])
{
srand(time(nullptr));
bigTest();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment