Created
February 22, 2025 09:50
-
-
Save jweinst1/7f74ce691cbd242240a1c76f58afd73c to your computer and use it in GitHub Desktop.
A hash map that doesn't resolve collisions and tries to be a bloom filter but gives location and membership info
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <map> | |
#include <string> | |
#include <vector> | |
#include <cstdio> | |
#include <cstdlib> | |
#include <cstdint> | |
#include <ctime> | |
#include <optional> | |
#include <chrono> | |
#include <memory> | |
using namespace std::chrono; | |
static void put_rand_chars_in_string(std::string& foo, size_t n) { | |
while(n--) { | |
foo.push_back((rand() % 90) + 32); | |
} | |
} | |
template<size_t tableSize> | |
struct TableMap { | |
uint64_t slots[tableSize] = {0}; | |
std::hash<std::string> hasher; | |
void printColls() const { | |
std::map<size_t, std::vector<size_t>> collMap; | |
for (size_t i = 0; i < tableSize; ++i) | |
{ | |
uint64_t ptr = slots[i]; | |
auto found = collMap.find(ptr); | |
if (found != collMap.end()) { | |
found->second.push_back(i); | |
} else { | |
collMap[ptr] = std::vector<size_t>{i}; | |
} | |
} | |
for (const auto& score: collMap) { | |
printf("Hits %zu, magnitude %zu Ratio %f\n", | |
score.first, score.second.size(), | |
((double)score.second.size()) / (double)tableSize); | |
} | |
} | |
void insert(const std::string& key, uint64_t num) { | |
uint64_t* got = &slots[hasher(key) % tableSize]; | |
*got += 1; | |
} | |
}; | |
static void bigTest() { | |
static constexpr size_t testSize = 1000000; | |
static constexpr size_t mapSize = testSize * 3; | |
std::vector<std::string> myStrings; | |
auto tmap = std::make_unique<TableMap<mapSize>>(); | |
for (int i = 0; i < testSize; ++i) | |
{ | |
std::string sample; | |
put_rand_chars_in_string(sample, 10); | |
myStrings.push_back(sample); | |
} | |
auto start = high_resolution_clock::now(); | |
for (uint64_t i = 0; i < testSize; ++i) | |
{ | |
tmap->insert(myStrings[i], i); | |
} | |
auto stop = high_resolution_clock::now(); | |
auto duration = duration_cast<microseconds>(stop - start); | |
std::printf("Time taken %llu\n", duration.count()); | |
std::printf("Table size %zu\n", sizeof(tmap->slots) / sizeof(tmap->slots[0])); | |
std::printf("Test size %zu\n", testSize); | |
tmap->printColls(); | |
} | |
/* | |
Time taken 10424 | |
Table size 3000000 | |
Test size 1000000 | |
Hits 0, magnitude 2149442 Ratio 0.716481 | |
Hits 1, magnitude 716803 Ratio 0.238934 | |
Hits 2, magnitude 119357 Ratio 0.039786 | |
Hits 3, magnitude 13196 Ratio 0.004399 | |
Hits 4, magnitude 1122 Ratio 0.000374 | |
Hits 5, magnitude 74 Ratio 0.000025 | |
Hits 6, magnitude 5 Ratio 0.000002 | |
Hits 7, magnitude 1 Ratio 0.000000 | |
*/ | |
int main(int argc, char const *argv[]) | |
{ | |
srand(time(nullptr)); | |
bigTest(); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment