Skip to content

Instantly share code, notes, and snippets.

@jweinst1
Last active March 10, 2025 02:07
Show Gist options
  • Save jweinst1/6fda9ddb5769633ca1bfc2a2f3bbde60 to your computer and use it in GitHub Desktop.
Save jweinst1/6fda9ddb5769633ca1bfc2a2f3bbde60 to your computer and use it in GitHub Desktop.
hash merge tree
#include <map>
#include <unordered_map>
#include <string>
#include <vector>
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <cstdint>
#include <ctime>
#include <optional>
#include <chrono>
#include <filesystem>
#include <regex>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
using namespace std::chrono;
static const std::regex HASH_FILE_REGEX("[a-zA-Z]+-[0-9]+");
static const std::string HASH_FILE_CHUNK = "chunk";
static const std::string HASH_FILE_LEVEL = "level";
struct ChunkCache {
struct Slot {
size_t hash = 0;
size_t chunk = 0;
};
size_t count = 0;
size_t size = 0;
Slot* slots = nullptr;
ChunkCache(size_t amount): size(amount), slots(new Slot[amount]()) {
}
void expand() {
const size_t oldSize = size;
const size_t newSize = size * 3;
Slot* oldSlots = slots;
Slot* newBuf = new Slot[newSize]();
size = newSize;
count = 0;
slots = newBuf;
for (size_t i = 0; i < oldSize; ++i)
{
insert(oldSlots[i].hash, oldSlots[i].chunk);
}
delete[] oldSlots;
}
size_t get(size_t hash) const {
const size_t mySpot = hash % size;
if (slots[mySpot].hash == hash) {
return slots[mySpot].chunk;
}
if (slots[mySpot].hash == 0) {
return (size_t)-1;
}
size_t i = mySpot + 1;
while (i < size) {
if (slots[i].hash == hash) {
return slots[i].chunk;
}
if (slots[i].hash == 0) {
return (size_t)-1;
}
++i;
}
i = 0;
while (i < mySpot) {
if (slots[i].hash == hash) {
return slots[i].chunk;
}
if (slots[i].hash == 0) {
return (size_t)-1;
}
++i;
}
return (size_t)-1;
}
void insert(size_t hash, size_t chunk) {
if (count >= size/2) {
expand();
}
++count;
const size_t mySpot = hash % size;
if (slots[mySpot].hash == 0) {
slots[mySpot].hash = hash;
slots[mySpot].chunk = chunk;
return;
}
size_t i = mySpot + 1;
while (i < size) {
if (slots[i].hash == 0) {
slots[i].hash = hash;
slots[i].chunk = chunk;
return;
}
++i;
}
i = 0;
while (i < mySpot) {
if (slots[i].hash == 0) {
slots[i].hash = hash;
slots[i].chunk = chunk;
return;
}
++i;
}
}
~ChunkCache() {
//delete[] slots;
}
};
static std::optional<size_t> getFileSizeOnDisk(const std::string& fpath) {
struct stat statbuf;
if (stat(fpath.c_str(), &statbuf) == -1) {
return std::nullopt;
}
return statbuf.st_size;
}
static void put_rand_chars_in_string(std::string& foo, size_t n) {
while(n--) {
foo.push_back((rand() % 90) + 32);
}
}
static bool stringEndsWith(const std::string& base, const std::string& suffix) {
return base.find(suffix) == base.size() - suffix.size();
}
static bool stringStartsWith(const std::string& base, const std::string& prefix) {
return base.find(prefix) == 0;
}
struct DataLocation {
size_t level = 0;
size_t chunk = 0;
static std::optional<DataLocation> fromString(const std::string& base);
void toString(std::string& target);
};
void DataLocation::toString(std::string& target) {
char buf[256] = {'\0'};
snprintf(buf, sizeof(buf), "level-%zu-chunk-%zu", level, chunk);
target.assign(buf);
}
// parses file patterns
std::optional<DataLocation> DataLocation::fromString(const std::string& base) {
DataLocation dl;
bool foundLevel = false;
bool foundChunk = false;
auto wordsBegin = std::sregex_iterator(base.begin(), base.end(), HASH_FILE_REGEX);
auto wordsEnd = std::sregex_iterator();
for (std::sregex_iterator i = wordsBegin; i != wordsEnd; ++i) {
const std::string match = i->str();
const auto splitter = match.find('-');
const auto label = match.substr(0, splitter);
if ( label == HASH_FILE_LEVEL) {
try {
dl.level = std::stoll(match.substr(splitter + 1));
} catch (const std::exception&) {
return std::nullopt;
}
} else if (label == HASH_FILE_CHUNK) {
try {
dl.chunk = std::stoll(match.substr(splitter + 1));
} catch (const std::exception&) {
return std::nullopt;
}
}
}
return dl;
}
static void print_mem_as_str(const char* prefix, const void* str, size_t len) {
if (prefix != nullptr) {
printf("%s ", prefix);
}
const char* ptr = (const char*)str;
while(len--) {
putc(*ptr++, stdout);
}
putc('\n', stdout);
}
static bool cmp_mem_lt(const void* lhs, size_t llen, const void* rhs, size_t rlen) {
size_t shorter = llen < rlen ? llen : rlen;
int result = memcmp(lhs, rhs, shorter);
return (result == 0 && llen < rlen) || result < 0;
}
static bool cmp_mem_eq(const void* lhs, size_t llen, const void* rhs, size_t rlen) {
if (llen != rlen) return false;
return memcmp(lhs, rhs, llen) == 0;
}
// Note: this will change the size of the vector, so
// make sure to fill it in!
static size_t resizeByteVector(std::vector<unsigned char>& bytes, size_t n) {
size_t off_before = bytes.size();
bytes.resize(bytes.size() + n);
return off_before;
}
static size_t extendByteVector(std::vector<unsigned char>& bytes, const void* data, size_t n) {
size_t off_before = bytes.size();
bytes.resize(bytes.size() + n);
memcpy(bytes.data() + off_before, data, n);
return off_before;
}
static size_t extendByteVectorWithByte(std::vector<unsigned char>& bytes, unsigned char c, size_t n) {
size_t off_before = bytes.size();
bytes.resize(bytes.size() + n);
memset(bytes.data() + off_before, c, n);
return off_before;
}
static size_t extendByteVectorWithString(std::vector<unsigned char>& bytes, const std::string& value) {
uint64_t numSize = value.size();
size_t first = extendByteVector(bytes, &numSize, sizeof(numSize));
extendByteVector(bytes, value.data(), value.size());
return first;
}
/**
* Non owning pointer to bytes
* */
struct DataSegment {
unsigned char* dataPtr;
size_t len;
DataSegment(unsigned char* data, size_t size): dataPtr(data), len(size) {}
unsigned char* data() const { return dataPtr;}
size_t size() const { return len; }
};
struct HashTableFile {
unsigned char* mapped = nullptr;
int fd = -1;
size_t file_size = 0;
size_t level = 0;
size_t chunk = 0;
std::string fpath;
bool syncToDisk() {
return msync(mapped, file_size, MS_SYNC) == 0;
}
static std::optional<HashTableFile> fromFile(const std::string& fpath) {
HashTableFile hf;
const auto checkSize = getFileSizeOnDisk(fpath);
if (!checkSize.has_value()) {
return std::nullopt;
}
const auto checkLevelAndChunk = DataLocation::fromString(fpath);
if (!checkLevelAndChunk.has_value()) {
return std::nullopt;
}
const DataLocation& dl = checkLevelAndChunk.value();
hf.level = dl.level;
hf.chunk = dl.chunk;
hf.file_size = checkSize.value();
hf.fpath = fpath;
hf.fd = open(fpath.c_str(), O_RDWR, 0666);
if (hf.fd == -1) {
return std::nullopt;
}
void* mapResult = mmap(nullptr, hf.file_size, PROT_READ | PROT_WRITE, MAP_SHARED, hf.fd, 0);
if (mapResult == MAP_FAILED) {
close(hf.fd);
return std::nullopt;
}
hf.mapped = static_cast<unsigned char*>(mapResult);
return hf;
}
static void loadLevelsInDir(const std::string& dpath,
std::vector<std::vector<HashTableFile>>& levels) {
const std::filesystem::path targetDir{dpath};
for (auto const& dir_entry : std::filesystem::directory_iterator{targetDir}) {
const auto result = HashTableFile::fromFile(dir_entry.path().string());
if (result.has_value()) {
const auto myVal = result.value();
while (levels.size() < myVal.level + 1) {
levels.push_back(std::vector<HashTableFile>{});
}
levels[myVal.level].push_back(myVal);
}
}
if (levels.size() == 0) {
// have at least 0 level ready
levels.push_back(std::vector<HashTableFile>{});
}
}
unsigned char* data() const { return mapped; }
size_t size() const { return file_size; }
DataSegment asDataSegment() const {
return DataSegment{mapped, file_size};
}
void closeDown() {
if (fd != -1 && mapped != nullptr) {
munmap(mapped, file_size);
close(fd);
fd = -1;
mapped = nullptr;
}
}
protected:
HashTableFile() = default;
};
static bool byteVectorToFile(const std::vector<unsigned char>& bytes, const std::string& path) {
FILE* fp = std::fopen(path.c_str(), "wb+");
if (fp == nullptr) {
return false;
}
std::fwrite(bytes.data(), bytes.size(), 1, fp);
std::fclose(fp);
return true;
}
static inline size_t hashWithMapFunc(const std::string& key) {
// todo prevent 1
static const std::unordered_map<std::string, std::string> _internalMap;
return _internalMap.hash_function()(key);
}
static uint64_t getSlotSizeOfTable(const DataSegment& bytes) {
return ((uint64_t*)bytes.data())[0]; // one set for hash, other for index
}
static uint64_t getKeyAndValSizeOfTable(const std::vector<unsigned char>& bytes) {
return bytes.size() - (((uint64_t*)bytes.data())[0] * sizeof(uint64_t));
}
static inline size_t incrementNumberSlot(size_t cur, size_t max) {
if (cur >= (max - 1))
return 0;
else
return cur + 1;
}
static void toHashedTable(const std::unordered_map<std::string, std::string>& pairs,
std::vector<unsigned char>& bytes) {
bytes.clear();
const size_t bucket_size = pairs.bucket_count();
size_t hash_block_size = pairs.bucket_count() * 2; // [hash][index]
extendByteVectorWithByte(bytes, 0, (hash_block_size + 1) * sizeof(uint64_t));
uint64_t* hash_nums = (uint64_t*)bytes.data();
hash_nums[0] = hash_block_size / 2;
size_t hash_i = 0;
for (size_t bucket_i = 0; bucket_i < pairs.bucket_count(); ++bucket_i) {
hash_i = bucket_i;
for (auto it = pairs.begin(bucket_i); it != pairs.end(bucket_i); ++it) {
uint64_t spot = extendByteVectorWithString(bytes, it->first);
uint64_t itemHash = hashWithMapFunc(it->first);
extendByteVectorWithString(bytes, it->second);
uint64_t* hash_table = ((uint64_t*)bytes.data()) + 1;
while (hash_table[hash_i] != 0) {
hash_i = incrementNumberSlot(hash_i, bucket_size);
}
hash_table[hash_i] = itemHash;
hash_table[hash_i + bucket_size] = spot;
}
}
}
void convertIndexToValueString(const DataSegment& bytes, uint64_t index, std::string& val) {
// consider cacheing?
uint64_t tableSize = getSlotSizeOfTable(bytes);
uint64_t keyIndex = ((const uint64_t*)(bytes.data()))[1 + index + tableSize];
const unsigned char* dataSpot = bytes.data() + keyIndex;
uint64_t keySize = 0;
uint64_t valSize = 0;
memcpy(&keySize, dataSpot, sizeof(uint64_t));
dataSpot += sizeof(uint64_t) + keySize;
memcpy(&valSize, dataSpot, sizeof(uint64_t));
dataSpot += sizeof(uint64_t);
val.assign((const char*)dataSpot, valSize);
}
std::optional<uint64_t> hashSearchTable(const DataSegment& bytes, const std::string& key) {
const uint64_t* numReader = (uint64_t*)bytes.data();
size_t numHashes = numReader[0];
size_t numHashesMinusOne = numHashes - 1;
const size_t myHash = hashWithMapFunc(key);
size_t myHashSpot = (myHash % numHashes); // for dual index
++numReader;
size_t hash_i = myHashSpot;
if (numReader[hash_i] == 0) {
return std::nullopt;
}
// implict delete market of 1.
if (numReader[hash_i] == myHash) {
const unsigned char* byteSpot = bytes.data() + numReader[hash_i + numHashes];
uint64_t keySize = 0;
memcpy(&keySize, byteSpot, sizeof(uint64_t));
byteSpot += sizeof(uint64_t);
if (cmp_mem_eq(byteSpot, keySize, key.data(), key.size())) {
return hash_i;
}
}
++hash_i;
while (hash_i < numHashes) {
if (numReader[hash_i] == 0) {
return std::nullopt;
} else if (numReader[hash_i] == myHash) {
const unsigned char* byteSpot = bytes.data() + numReader[hash_i + numHashes];
uint64_t keySize = 0;
memcpy(&keySize, byteSpot, sizeof(uint64_t));
byteSpot += sizeof(uint64_t);
if (cmp_mem_eq(byteSpot, keySize, key.data(), key.size())) {
return hash_i;
}
}
++hash_i;
}
hash_i = 0;
while (hash_i < myHashSpot) {
if (numReader[hash_i] == 0) {
return std::nullopt;
} else if (numReader[hash_i] == myHash) {
const unsigned char* byteSpot = bytes.data() + numReader[hash_i + numHashes];
uint64_t keySize = 0;
memcpy(&keySize, byteSpot, sizeof(uint64_t));
byteSpot += sizeof(uint64_t);
if (cmp_mem_eq(byteSpot, keySize, key.data(), key.size())) {
return hash_i;
}
}
++hash_i;
}
return std::nullopt;
}
bool hashTableDelete(DataSegment& bytes, const std::string& key) {
std::optional<uint64_t> result = hashSearchTable(bytes, key);
if (result.has_value()) {
((uint64_t*)bytes.data())[result.value() + 1] = 1; // delete marker
return true;
}
return false;
}
size_t hashTableMergeInto(std::unordered_map<std::string, std::string>& merger, const DataSegment& t1) {
// iter indexes for now
uint64_t t1size = getSlotSizeOfTable(t1);
size_t keysMerged = 0;
const uint64_t* reader1 = (const uint64_t*)t1.data();
for (size_t i = 1; i < t1size; ++i)
{
if (reader1[i] < 2)
continue;
const unsigned char* byteSpot = t1.data() + reader1[i + t1size];
uint64_t keySize = 0;
uint64_t valSize = 0;
memcpy(&keySize, byteSpot, sizeof(uint64_t));
byteSpot += sizeof(uint64_t);
const std::string curKey((const char*)byteSpot, keySize);
byteSpot += keySize;
memcpy(&valSize, byteSpot, sizeof(uint64_t));
byteSpot += sizeof(uint64_t);
const std::string curVal((const char*)byteSpot, valSize);
merger.insert(std::make_pair(curKey, curVal));
++keysMerged;
}
return keysMerged;
}
struct MemTable {
ChunkCache _cache;
std::unordered_map<std::string, std::string> _items;
size_t currentDataSize = 0;
MemTable(): _cache(40 * 1024 * 1024) {
_items.max_load_factor(0.8);
}
void cacheAtChunk(size_t chunk) {
//hashWithMapFunc(const std::string& key)
for (const auto& pair : _items) {
_cache.insert(hashWithMapFunc(pair.first), chunk);
}
}
};
struct HashMergeTableConfig {
size_t maxMemTableSize = 1024 * 1024;
std::string dpath;
};
class HashMergeTable {
public:
static std::optional<HashMergeTable> openDB(const HashMergeTableConfig& cfg);
static void closeDB(HashMergeTable& table);
bool get(const std::string& key, std::string& val);
bool del(const std::string& key);
bool put(const std::string&key, const std::string& value);
void debug() {
//_memTable.debugPrintCollisions();
}
void syncAllLevelFiles() {
for (auto& level : _levels) {
for (auto& hfile : level) {
hfile.syncToDisk();
}
}
}
bool dumpMemTableToDisk();
private:
void closeDownAllFiles();
HashMergeTable() = default;
HashMergeTableConfig _cfg;
std::string _dirPath;
MemTable _memTable;
std::vector<std::vector<HashTableFile>> _levels;
};
void HashMergeTable::closeDownAllFiles() {
for (auto& level : _levels) {
for (auto& hfile : level) {
hfile.closeDown();
}
}
}
std::optional<HashMergeTable> HashMergeTable::openDB(const HashMergeTableConfig& cfg) {
HashMergeTable ht;
try {
HashTableFile::loadLevelsInDir(cfg.dpath, ht._levels);
} catch (std::filesystem::filesystem_error const& ex) {
(void)ex;
return std::nullopt;
}
ht._cfg = cfg;
return ht;
}
void HashMergeTable::closeDB(HashMergeTable& table) {
table.syncAllLevelFiles();
table.closeDownAllFiles();
}
bool HashMergeTable::get(const std::string& key, std::string& val) {
const auto found = _memTable._items.find(key);
if ( found != _memTable._items.end()) {
val = found->second;
return true;
}
const auto foundCache = _memTable._cache.get(hashWithMapFunc(key));
if (foundCache != (size_t)-1) {
//for (const auto& chunk : foundCache->second) {
const DataSegment got = _levels[0][foundCache].asDataSegment();
const auto result = hashSearchTable(got, key);
if (result.has_value()) {
convertIndexToValueString(got, result.value(), val);
return true;
}
//}
}
puts("Got dup!");
return false;
}
bool HashMergeTable::del(const std::string& key) {
size_t erased_count = _memTable._items.erase("banana");
if (erased_count > 0) {
return true;
}
for (const auto& level: _levels) {
for (const auto& table: level) {
DataSegment got = table.asDataSegment();
if (hashTableDelete(got, key)) {
return true;
}
}
}
return false;
}
bool HashMergeTable::dumpMemTableToDisk() {
unsigned long nextChunk = _levels[0].size();
_memTable.cacheAtChunk(nextChunk);
std::vector<unsigned char> bytes;
toHashedTable(_memTable._items, bytes);
DataLocation dl;
std::string pathString;
dl.level = 0;
dl.chunk = nextChunk;
dl.toString(pathString);
const std::string fullPath = _cfg.dpath + "/" + pathString;
if (!byteVectorToFile(bytes, fullPath)) {
return false;
}
std::optional<HashTableFile> newHash = HashTableFile::fromFile(fullPath);
if (!newHash.has_value()) {
return false;
}
_levels[0].push_back(newHash.value());
_memTable._items.clear();
_memTable.currentDataSize = 0;
return true;
}
bool HashMergeTable::put(const std::string& key, const std::string& value) {
_memTable._items.insert(std::make_pair(key, value));
_memTable.currentDataSize += key.size() + value.size();
if (_memTable.currentDataSize > _cfg.maxMemTableSize) {
if (!dumpMemTableToDisk()) {
// consider error handling
return false;
}
}
return true;
}
// tests begin here.
static void bigTest1() {
static constexpr size_t testSize = 1000000;
static const std::string myValue = "foobar";
std::vector<unsigned char> myBytes;
std::vector<std::string> myKeys;
std::unordered_map<std::string, std::string> myMap;
std::string myString;
myMap.max_load_factor(0.5);
for (int i = 0; i < testSize; ++i)
{
put_rand_chars_in_string(myString, 16);
myKeys.push_back(myString);
// add some missed lookups
if (i % 2 == 0) myMap.insert(std::make_pair(myString, myValue));
myString.clear();
}
{
auto start = high_resolution_clock::now();
toHashedTable(myMap, myBytes);
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::printf("Time taken insert %llu\n", duration.count());
}
DataSegment ds(myBytes.data(), myBytes.size());
uint64_t adder = 0;
auto start = high_resolution_clock::now();
for (int j = 0; j < testSize; ++j)
{
const auto result = hashSearchTable(ds, myKeys[j]);
if (result.has_value()) {
adder += result.value();
}
}
auto stop = high_resolution_clock::now();
printf("Total adder %llu\n", adder);
auto duration = duration_cast<microseconds>(stop - start);
std::printf("Time taken search %llu\n", duration.count());
}
static void bigTest2() {
//Time taken HT insert 19004083
//Time taken HT search 10317895
static constexpr size_t testSize = 40000000;
static const std::string myValue = "foobar";
static const std::string myDir = "foo";
std::filesystem::create_directory(myDir);
std::vector<std::string> myKeys;
HashMergeTableConfig cfg;
cfg.dpath = myDir;
cfg.maxMemTableSize = 1024 * 1024 * 2;
std::optional<HashMergeTable> ht = HashMergeTable::openDB(cfg);
if (!ht.has_value()) {
fprintf(stderr, "FATAL cannot open db at %s\n", cfg.dpath.c_str());
abort();
}
auto myTable = ht.value();
std::string myString;
for (int i = 0; i < testSize; ++i)
{
put_rand_chars_in_string(myString, 8);
myKeys.push_back(myString);
myString.clear();
}
auto start = high_resolution_clock::now();
for (const auto& k : myKeys) {
myTable.put(k, myValue);
}
auto stop = high_resolution_clock::now();
auto duration = duration_cast<microseconds>(stop - start);
std::printf("Time taken HT insert %llu\n", duration.count());
std::string valRet;
auto start2 = high_resolution_clock::now();
for (const auto& k : myKeys) {
myTable.get(k, valRet);
}
auto stop2 = high_resolution_clock::now();
auto duration2 = duration_cast<microseconds>(stop2 - start2);
std::printf("Time taken HT search %llu\n", duration2.count());
HashMergeTable::closeDB(myTable);
std::filesystem::remove_all(myDir);
}
static void checkCondition(int cond, const char* express, unsigned lineno) {
if (!cond) {
fprintf(stderr, "FAIL %s line %u\n", express, lineno);
}
}
#define CHECKIT(cond) checkCondition(cond, #cond, __LINE__)
static void testDelete() {
std::unordered_map<std::string, std::string> myMap;
myMap.insert(std::make_pair("foo", "bar"));
myMap.insert(std::make_pair("foo1", "bar"));
myMap.insert(std::make_pair("foo2", "bar"));
std::vector<unsigned char> bytes;
toHashedTable(myMap, bytes);
DataSegment ds(bytes.data(), bytes.size());
auto res = hashSearchTable(ds, "foo1");
CHECKIT(res.has_value());
if (!res.has_value()) {
fprintf(stderr, "Expected foo1 to be in map\n");
}
CHECKIT(hashTableDelete(ds, "foo1"));
res = hashSearchTable(ds, "foo1");
CHECKIT(!res.has_value());
if (res.has_value()) {
fprintf(stderr, "Expected foo1 to NOT be in map, val=%llu\n", res.value());
}
}
static void testByteExtendVec() {
std::vector<unsigned char> bytes;
bytes.push_back(3);
extendByteVectorWithByte(bytes, 3, 10);
CHECKIT(bytes.size() == 11);
CHECKIT(bytes[1] == 3);
CHECKIT(bytes[10] == 3);
}
static void testMerges() {
std::unordered_map<std::string, std::string> myMapNew;
printf("Hash of foo is %zu\n", hashWithMapFunc(std::string("foo")));
std::unordered_map<std::string, std::string> myMap;
myMap.max_load_factor(0.5);
myMap.insert(std::make_pair("foo", "bar"));
myMap.insert(std::make_pair("foo1", "bar"));
myMap.insert(std::make_pair("foo2", "bar"));
myMap.insert(std::make_pair("foo3", "barstobars"));
std::unordered_map<std::string, std::string> myMap2;
myMap2.max_load_factor(0.5);
myMap2.insert(std::make_pair("foo", "bar"));
myMap2.insert(std::make_pair("foo92", "bar"));
myMap2.insert(std::make_pair("foo45", "bar"));
myMap2.insert(std::make_pair("foo3", "barstobars"));
std::vector<unsigned char> bytes1;
std::vector<unsigned char> bytes2;
toHashedTable(myMap, bytes1);
toHashedTable(myMap2, bytes2);
DataSegment ds1(bytes1.data(), bytes1.size());
DataSegment ds2(bytes2.data(), bytes2.size());
CHECKIT(hashTableMergeInto(myMapNew, ds1) == 4);
hashTableMergeInto(myMapNew, ds2);
printf("size of map %zu\n", myMapNew.size());
CHECKIT(myMapNew.size() == 6);
const std::string barString("bar");
CHECKIT(myMapNew.find("foo")->second == barString);
}
static void testValueString() {
std::unordered_map<std::string, std::string> myMap;
std::vector<unsigned char> bytes1;
myMap.max_load_factor(0.5);
myMap.insert(std::make_pair("foo", "bar"));
myMap.insert(std::make_pair("foo1", "bar"));
myMap.insert(std::make_pair("foo2", "bar"));
myMap.insert(std::make_pair("foo3", "barstobars"));
toHashedTable(myMap, bytes1);
DataSegment ds1(bytes1.data(), bytes1.size());
std::optional<uint64_t> got = hashSearchTable(ds1, "foo");
CHECKIT(got.has_value());
std::string myVal;
convertIndexToValueString(ds1, got.value(), myVal);
CHECKIT(myVal == "bar");
}
static void testDataLocation() {
DataLocation dl;
dl.level = 5;
dl.chunk = 1;
std::string dlString;
dl.toString(dlString);
CHECKIT(dlString == "level-5-chunk-1");
const std::optional<DataLocation> result = DataLocation::fromString(dlString);
CHECKIT(result.has_value());
CHECKIT(result.value().level == 5);
CHECKIT(result.value().chunk == 1);
}
int main(int argc, char const *argv[])
{
//Time taken HT insert 6887719
//Time taken HT search 6620036
srand(time(nullptr));
//bigTest1();
bigTest2();
//testDelete();
//testByteExtendVec();
//testMerges();
//testValueString();
//testDataLocation();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment