jweinst1 · March 10, 2025 02:07
diff --git a/hash_merge_tree.cpp b/hash_merge_tree.cpp
 #include <map>
 #include <unordered_map>
 #include <string>
 #include <vector>
 #include <cstdio>
 #include <cstring>
 #include <cstdlib>
 #include <cstdint>
 #include <ctime>
 #include <optional>
 #include <chrono>
 #include <filesystem>
 #include <regex>

 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>

 using namespace std::chrono;

 static const std::regex HASH_FILE_REGEX("[a-zA-Z]+-[0-9]+");
 static const std::string HASH_FILE_CHUNK = "chunk";
 static const std::string HASH_FILE_LEVEL = "level";

 struct ChunkCache {
 	struct Slot {
 		size_t hash = 0;
 		size_t chunk = 0;
 	};

 	size_t count = 0;
 	size_t size = 0;
 	Slot* slots = nullptr;

 	ChunkCache(size_t amount): size(amount), slots(new Slot[amount]()) {

 	}

 	void expand() {
 		const size_t oldSize = size;
 		const size_t newSize = size * 3;
 		Slot* oldSlots = slots;
 		Slot* newBuf = new Slot[newSize]();
 		size = newSize;
 		count = 0;
 		slots = newBuf;
 		for (size_t i = 0; i < oldSize; ++i)
 		{
 			insert(oldSlots[i].hash, oldSlots[i].chunk);
 		}
 		delete[] oldSlots;
 	}

 	size_t get(size_t hash) const {
 		const size_t mySpot = hash % size;
 		if (slots[mySpot].hash == hash) {
 			return slots[mySpot].chunk;
 		}
 		if (slots[mySpot].hash == 0) {
 			return (size_t)-1;
 		}
 		size_t i = mySpot + 1;
 		while (i <  size) {
 			if (slots[i].hash == hash) {
 				return slots[i].chunk;
 			}
 			if (slots[i].hash == 0) {
 				return (size_t)-1;
 			}
 			++i;
 		}
 		i = 0;
 		while (i < mySpot) {
 			if (slots[i].hash == hash) {
 				return slots[i].chunk;
 			}
 			if (slots[i].hash == 0) {
 				return (size_t)-1;
 			}
 			++i;
 		}
 		return (size_t)-1;
 	}

 	void insert(size_t hash, size_t chunk) {
 		if (count >= size/2) {
 			expand();
 		}
 		++count;
 		const size_t mySpot = hash % size;
 		if (slots[mySpot].hash == 0) {
 			slots[mySpot].hash = hash;
 			slots[mySpot].chunk = chunk;
 			return;
 		}
 		size_t i = mySpot + 1;
 		while (i <  size) {
 			if (slots[i].hash == 0) {
 				slots[i].hash = hash;
 				slots[i].chunk = chunk;
 				return;
 			}
 			++i;
 		}
 		i = 0;
 		while (i < mySpot) {
 			if (slots[i].hash == 0) {
 				slots[i].hash = hash;
 				slots[i].chunk = chunk;
 				return;
 			}
 			++i;
 		}
 	}

 	~ChunkCache() {
 		//delete[] slots;
 	}
 };

 static std::optional<size_t> getFileSizeOnDisk(const std::string& fpath) {
 	struct stat statbuf;
 	if (stat(fpath.c_str(), &statbuf) == -1) {
 		return std::nullopt;
 	}
 	return statbuf.st_size;
 }

 static void put_rand_chars_in_string(std::string& foo, size_t n) {
 	while(n--) {
 		foo.push_back((rand() % 90) + 32);
 	}
 }

 static bool stringEndsWith(const std::string& base, const std::string& suffix) {
 	return base.find(suffix) == base.size() - suffix.size();
 }

 static bool stringStartsWith(const std::string& base, const std::string& prefix) {
 	return base.find(prefix) == 0;
 }

 struct DataLocation {
 	size_t level = 0;
 	size_t chunk = 0;

 	static std::optional<DataLocation> fromString(const std::string& base);

 	void toString(std::string& target);
 };

 void DataLocation::toString(std::string& target) {
 	char buf[256] = {'\0'};
 	snprintf(buf, sizeof(buf), "level-%zu-chunk-%zu", level, chunk);
 	target.assign(buf);
 }

 // parses file patterns
 std::optional<DataLocation> DataLocation::fromString(const std::string& base) {
 	DataLocation dl;
 	bool foundLevel = false;
 	bool foundChunk = false;
 	auto wordsBegin = std::sregex_iterator(base.begin(), base.end(), HASH_FILE_REGEX);
 	auto wordsEnd = std::sregex_iterator();
 	for (std::sregex_iterator i = wordsBegin; i != wordsEnd; ++i) {
 		const std::string match = i->str();
 		const auto splitter = match.find('-');
 		const auto label = match.substr(0, splitter);
 		if ( label == HASH_FILE_LEVEL) {
 			try {
 				dl.level = std::stoll(match.substr(splitter + 1));
 			} catch (const std::exception&) {
 				return std::nullopt;
 			}
 		} else if (label == HASH_FILE_CHUNK) {
 			try {
 				dl.chunk = std::stoll(match.substr(splitter + 1));
 			} catch (const std::exception&) {
 				return std::nullopt;
 			}
 		}

 	}
 	return dl;
 }

 static void print_mem_as_str(const char* prefix, const void* str, size_t len) {
 	if (prefix != nullptr) {
 		printf("%s ", prefix);
 	}
 	const char* ptr = (const char*)str;
 	while(len--) {
 		putc(*ptr++, stdout);
 	}
 	putc('\n', stdout);
 }

 static bool cmp_mem_lt(const void* lhs, size_t llen, const void* rhs, size_t rlen) {
 	size_t shorter = llen < rlen ? llen : rlen;
 	int result = memcmp(lhs, rhs, shorter);
 	return (result == 0 && llen < rlen) || result < 0;
 }

 static bool cmp_mem_eq(const void* lhs, size_t llen, const void* rhs, size_t rlen) {
 	if (llen != rlen) return false;
 	return memcmp(lhs, rhs, llen) == 0;
 }

 // Note: this will change the size of the vector, so
 // make sure to fill it in!
 static size_t resizeByteVector(std::vector<unsigned char>& bytes, size_t n) {
 	size_t off_before = bytes.size();
 	bytes.resize(bytes.size() + n);
 	return off_before;
 }

 static size_t extendByteVector(std::vector<unsigned char>& bytes, const void* data, size_t n) {
 	size_t off_before = bytes.size();
 	bytes.resize(bytes.size() + n);
 	memcpy(bytes.data() + off_before, data, n);
 	return off_before;
 }

 static size_t extendByteVectorWithByte(std::vector<unsigned char>& bytes, unsigned char c, size_t n) {
 	size_t off_before = bytes.size();
 	bytes.resize(bytes.size() + n);
 	memset(bytes.data() + off_before, c, n);
 	return off_before;
 }

 static size_t extendByteVectorWithString(std::vector<unsigned char>& bytes, const std::string& value) {
 	uint64_t numSize = value.size();
 	size_t first = extendByteVector(bytes, &numSize, sizeof(numSize));
 	extendByteVector(bytes, value.data(), value.size());
 	return first;
 }

 /**
 * Non owning pointer to bytes
 * */
 struct DataSegment {
 	unsigned char* dataPtr;
 	size_t len;

 	DataSegment(unsigned char* data, size_t size): dataPtr(data), len(size) {}

 	unsigned char* data() const { return dataPtr;}
 	size_t size() const { return len; }
 };

 struct HashTableFile {
 	unsigned char* mapped = nullptr;
 	int fd = -1;
 	size_t file_size = 0;
 	size_t level = 0;
 	size_t chunk = 0;
 	std::string fpath;

 	bool syncToDisk() {
 		return msync(mapped, file_size, MS_SYNC) == 0;
 	}

 	static std::optional<HashTableFile> fromFile(const std::string& fpath) {
 		HashTableFile hf;
 		const auto checkSize = getFileSizeOnDisk(fpath);
 		if (!checkSize.has_value()) {
 			return std::nullopt;
 		}
 		const auto checkLevelAndChunk = DataLocation::fromString(fpath);
 		if (!checkLevelAndChunk.has_value()) {
 			return std::nullopt;
 		}
 		const DataLocation& dl = checkLevelAndChunk.value();
 		hf.level = dl.level;
 		hf.chunk = dl.chunk;
 		hf.file_size = checkSize.value();
 		hf.fpath = fpath;
 		hf.fd = open(fpath.c_str(), O_RDWR, 0666);
 		if (hf.fd == -1) {
 			return std::nullopt;
 		}
 		void* mapResult = mmap(nullptr, hf.file_size, PROT_READ | PROT_WRITE, MAP_SHARED, hf.fd, 0);

 		if (mapResult == MAP_FAILED) {
 			close(hf.fd);
 			return std::nullopt;
 		}
 		hf.mapped = static_cast<unsigned char*>(mapResult);
 		return hf;
 	}

 	static void loadLevelsInDir(const std::string& dpath, 
 		                        std::vector<std::vector<HashTableFile>>& levels) {
 		const std::filesystem::path targetDir{dpath};
 		for (auto const& dir_entry : std::filesystem::directory_iterator{targetDir}) {
 			const auto result = HashTableFile::fromFile(dir_entry.path().string());
 			if (result.has_value()) {
 				const auto myVal = result.value();
 				while (levels.size() < myVal.level + 1) {
 					levels.push_back(std::vector<HashTableFile>{});
 				}
 				levels[myVal.level].push_back(myVal);
 			} 
 		}
 		if (levels.size() == 0) {
 			// have at least 0 level ready
 			levels.push_back(std::vector<HashTableFile>{});
 		} 
 	}

 	unsigned char* data() const { return mapped; }
 	size_t size() const { return file_size; }

 	DataSegment asDataSegment() const {
 		return DataSegment{mapped, file_size};
 	}

 	void closeDown() {
 		if (fd != -1 && mapped != nullptr) {
 			munmap(mapped, file_size);
 			close(fd);
 			fd = -1;
 			mapped = nullptr;
 		}
 	}
 protected:
 	HashTableFile() = default;
 };

 static bool byteVectorToFile(const std::vector<unsigned char>& bytes, const std::string& path) {
 	FILE* fp = std::fopen(path.c_str(), "wb+");
 	if (fp == nullptr) {
 		return false;
 	}
 	std::fwrite(bytes.data(), bytes.size(), 1, fp);
 	std::fclose(fp);
 	return true;
 }

 static inline size_t hashWithMapFunc(const std::string& key) {
 	// todo prevent 1
 	static const std::unordered_map<std::string, std::string> _internalMap;
 	return _internalMap.hash_function()(key);
 }

 static uint64_t getSlotSizeOfTable(const DataSegment& bytes) {
 	return ((uint64_t*)bytes.data())[0]; // one set for hash, other for index
 }

 static uint64_t getKeyAndValSizeOfTable(const std::vector<unsigned char>& bytes) {
 	return bytes.size() - (((uint64_t*)bytes.data())[0] * sizeof(uint64_t));
 }

 static inline size_t incrementNumberSlot(size_t cur, size_t max) {
 	if (cur >= (max - 1))
 		return 0;
 	else
 		return cur + 1;
 }

 static void toHashedTable(const std::unordered_map<std::string, std::string>& pairs,
 	                      std::vector<unsigned char>& bytes) {
 	bytes.clear();
 	const size_t bucket_size = pairs.bucket_count();
 	size_t hash_block_size = pairs.bucket_count() * 2; // [hash][index]
 	extendByteVectorWithByte(bytes, 0, (hash_block_size + 1) * sizeof(uint64_t)); 
 	uint64_t* hash_nums = (uint64_t*)bytes.data();
 	hash_nums[0] = hash_block_size / 2;
 	size_t hash_i = 0;
 	for (size_t bucket_i = 0; bucket_i < pairs.bucket_count(); ++bucket_i) {
 		hash_i = bucket_i;
 		for (auto it = pairs.begin(bucket_i); it != pairs.end(bucket_i); ++it) {
 			uint64_t spot = extendByteVectorWithString(bytes, it->first);
 			uint64_t itemHash = hashWithMapFunc(it->first);
 			extendByteVectorWithString(bytes, it->second);
 			uint64_t* hash_table = ((uint64_t*)bytes.data()) + 1;
 			while (hash_table[hash_i] != 0) {
 				hash_i = incrementNumberSlot(hash_i, bucket_size);
 			}
 			hash_table[hash_i] = itemHash;
 			hash_table[hash_i + bucket_size] = spot;
 		}
 	}
 }

 void convertIndexToValueString(const DataSegment& bytes, uint64_t index, std::string& val) {
 	// consider cacheing?
 	uint64_t tableSize = getSlotSizeOfTable(bytes);
 	uint64_t keyIndex = ((const uint64_t*)(bytes.data()))[1 + index + tableSize];
 	const unsigned char* dataSpot = bytes.data() + keyIndex;
 	uint64_t keySize = 0;
 	uint64_t valSize = 0;
 	memcpy(&keySize, dataSpot, sizeof(uint64_t));
 	dataSpot += sizeof(uint64_t) + keySize;
 	memcpy(&valSize, dataSpot, sizeof(uint64_t));
 	dataSpot += sizeof(uint64_t);
 	val.assign((const char*)dataSpot, valSize);
 }

 std::optional<uint64_t> hashSearchTable(const DataSegment& bytes, const std::string& key) {
 	const uint64_t* numReader = (uint64_t*)bytes.data();
 	size_t numHashes = numReader[0];
 	size_t numHashesMinusOne = numHashes - 1;
 	const size_t myHash = hashWithMapFunc(key);
 	size_t myHashSpot = (myHash % numHashes); // for dual index
 	++numReader;
 	size_t hash_i = myHashSpot;
 	if (numReader[hash_i] == 0) {
 		return std::nullopt;
 	}
 	// implict delete market of 1.
 	if (numReader[hash_i] == myHash) {
 		const unsigned char* byteSpot = bytes.data() + numReader[hash_i + numHashes];
 		uint64_t keySize = 0;
 		memcpy(&keySize, byteSpot, sizeof(uint64_t));
 		byteSpot += sizeof(uint64_t);
 		if (cmp_mem_eq(byteSpot, keySize, key.data(), key.size())) {
 			return hash_i;
 		}
 	}
 	++hash_i;
 	while (hash_i < numHashes) {
 		if (numReader[hash_i] == 0) {
 			return std::nullopt;
 		} else 	if (numReader[hash_i] == myHash) {
 			const unsigned char* byteSpot = bytes.data() + numReader[hash_i + numHashes];
 			uint64_t keySize = 0;
 			memcpy(&keySize, byteSpot, sizeof(uint64_t));
 			byteSpot += sizeof(uint64_t);
 			if (cmp_mem_eq(byteSpot, keySize, key.data(), key.size())) {
 				return hash_i;
 			}
 		}
 		++hash_i;
 	}
 	hash_i = 0;
 	while (hash_i < myHashSpot) {
 		if (numReader[hash_i] == 0) {
 			return std::nullopt;
 		} else 	if (numReader[hash_i] == myHash) {
 			const unsigned char* byteSpot = bytes.data() + numReader[hash_i + numHashes];
 			uint64_t keySize = 0;
 			memcpy(&keySize, byteSpot, sizeof(uint64_t));
 			byteSpot += sizeof(uint64_t);
 			if (cmp_mem_eq(byteSpot, keySize, key.data(), key.size())) {
 				return hash_i;
 			}
 		}
 		++hash_i;
 	}
 	return std::nullopt;
 }

 bool hashTableDelete(DataSegment& bytes, const std::string& key) {
 	std::optional<uint64_t> result = hashSearchTable(bytes, key);
 	if (result.has_value()) {
 		((uint64_t*)bytes.data())[result.value() + 1] = 1; // delete marker
 		return true;
 	}
 	return false;
 }

 size_t hashTableMergeInto(std::unordered_map<std::string, std::string>& merger, const DataSegment& t1) {
 	// iter indexes for now
 	uint64_t t1size = getSlotSizeOfTable(t1);
 	size_t keysMerged = 0;
 	const uint64_t* reader1 = (const uint64_t*)t1.data();
 	for (size_t i = 1; i < t1size; ++i)
 	{
 		if (reader1[i] < 2)
 			continue;
 		const unsigned char* byteSpot = t1.data() + reader1[i + t1size];
 		uint64_t keySize = 0;
 		uint64_t valSize = 0;
 		memcpy(&keySize, byteSpot, sizeof(uint64_t));
 		byteSpot += sizeof(uint64_t);
 		const std::string curKey((const char*)byteSpot, keySize);
 		byteSpot += keySize;
 		memcpy(&valSize, byteSpot, sizeof(uint64_t));
 		byteSpot += sizeof(uint64_t);
 		const std::string curVal((const char*)byteSpot, valSize);
 		merger.insert(std::make_pair(curKey, curVal));
 		++keysMerged;
 	}
 	return keysMerged;
 }

 struct MemTable {
 	ChunkCache _cache;
 	std::unordered_map<std::string, std::string> _items;
 	size_t currentDataSize = 0;

 	MemTable(): _cache(40 * 1024 * 1024) {
 		_items.max_load_factor(0.8);
 	}

 	void cacheAtChunk(size_t chunk) {
 		//hashWithMapFunc(const std::string& key)
 		for (const auto& pair : _items) {
 			_cache.insert(hashWithMapFunc(pair.first), chunk);
 		}
 	}
 };

 struct HashMergeTableConfig {
 	size_t maxMemTableSize = 1024 * 1024;
 	std::string dpath;
 };

 class HashMergeTable {
 public:
 	static std::optional<HashMergeTable> openDB(const HashMergeTableConfig& cfg);
 	static void closeDB(HashMergeTable& table);

 	bool get(const std::string& key, std::string& val);
 	bool del(const std::string& key);
 	bool put(const std::string&key, const std::string& value);

 	void debug() {
 		//_memTable.debugPrintCollisions();
 	}

 	void syncAllLevelFiles() {
 		for (auto& level : _levels) {
 			for (auto& hfile : level) {
 				hfile.syncToDisk();
 			}
 		}
 	}

 	bool dumpMemTableToDisk();
 private:
 	void closeDownAllFiles();
 	HashMergeTable() = default;
 	HashMergeTableConfig _cfg;
 	std::string _dirPath;
 	MemTable _memTable;
 	std::vector<std::vector<HashTableFile>> _levels;
 };

 void HashMergeTable::closeDownAllFiles() {
 		for (auto& level : _levels) {
 			for (auto& hfile : level) {
 				hfile.closeDown();
 			}
 		}
 }

 std::optional<HashMergeTable> HashMergeTable::openDB(const HashMergeTableConfig& cfg) {
 	HashMergeTable ht;
 	try {
 		HashTableFile::loadLevelsInDir(cfg.dpath, ht._levels);
 	} catch (std::filesystem::filesystem_error const& ex) {
 		(void)ex;
 		return std::nullopt;
 	}
 	ht._cfg = cfg;
 	return ht;
 }

 void HashMergeTable::closeDB(HashMergeTable& table) {
 	table.syncAllLevelFiles();
 	table.closeDownAllFiles();
 }

 bool HashMergeTable::get(const std::string& key, std::string& val) {
 	const auto found = _memTable._items.find(key);
 	if ( found != _memTable._items.end()) {
 		val = found->second;
 		return true;
 	}
 	const auto foundCache = _memTable._cache.get(hashWithMapFunc(key));
 	if (foundCache != (size_t)-1) {
 		//for (const auto& chunk : foundCache->second) {
 			const DataSegment got = _levels[0][foundCache].asDataSegment();
 			const auto result = hashSearchTable(got, key);
 			if (result.has_value()) {
 				convertIndexToValueString(got, result.value(), val);
 				return true;
 			}
 		//}
 	}
 	puts("Got dup!");
 	return false;
 }

 bool HashMergeTable::del(const std::string& key) {
 	size_t erased_count = _memTable._items.erase("banana");
 	if (erased_count > 0) {
 		return true;
 	}

 	for (const auto& level: _levels) {
 		for (const auto& table: level) {
 			DataSegment got = table.asDataSegment();
 			if (hashTableDelete(got, key)) {
 				return true;
 			}
 		}
 	}
 	return false;
 }

 bool HashMergeTable::dumpMemTableToDisk() {
 	unsigned long nextChunk = _levels[0].size();
 	_memTable.cacheAtChunk(nextChunk);
 	std::vector<unsigned char> bytes;
 	toHashedTable(_memTable._items, bytes);
 	DataLocation dl;
 	std::string pathString;
 	dl.level = 0;
 	dl.chunk = nextChunk;
 	dl.toString(pathString);
 	const std::string fullPath = _cfg.dpath + "/" + pathString;
 	if (!byteVectorToFile(bytes, fullPath)) {
 		return false;
 	}
 	std::optional<HashTableFile> newHash =  HashTableFile::fromFile(fullPath);
 	if (!newHash.has_value()) {
 		return false;
 	}
 	_levels[0].push_back(newHash.value());
 	_memTable._items.clear();
 	_memTable.currentDataSize = 0;
 	return true;
 }

 bool HashMergeTable::put(const std::string& key, const std::string& value) {
 	_memTable._items.insert(std::make_pair(key, value));
 	_memTable.currentDataSize += key.size() + value.size();
 	if (_memTable.currentDataSize > _cfg.maxMemTableSize) {
 		if (!dumpMemTableToDisk()) {
 			// consider error handling
 			return false;
 		}
 	}
 	return true;
 }

 // tests begin here.

 static void bigTest1() {
 	static constexpr size_t testSize = 1000000;
 	static const std::string myValue = "foobar";
 	std::vector<unsigned char> myBytes;
 	std::vector<std::string> myKeys;
 	std::unordered_map<std::string, std::string> myMap;
 	std::string myString;
 	myMap.max_load_factor(0.5);

 	for (int i = 0; i < testSize; ++i)
 	{
 		put_rand_chars_in_string(myString, 16);
 		myKeys.push_back(myString);
 		// add some missed lookups
 		if (i % 2 == 0) myMap.insert(std::make_pair(myString, myValue));
 		myString.clear();
 	}
 	{
 		auto start = high_resolution_clock::now();
 		toHashedTable(myMap, myBytes);
 		auto stop = high_resolution_clock::now();
 		auto duration = duration_cast<microseconds>(stop - start);
 		std::printf("Time taken insert %llu\n", duration.count());
 	}
 	DataSegment ds(myBytes.data(), myBytes.size());
 	uint64_t adder = 0;

 	auto start = high_resolution_clock::now();
 	for (int j = 0; j < testSize; ++j)
 	{
 		const auto result = hashSearchTable(ds, myKeys[j]);
 		if (result.has_value()) {
 			adder += result.value();
 		}
 	}
 	auto stop = high_resolution_clock::now();
 	printf("Total adder %llu\n", adder);
 	auto duration = duration_cast<microseconds>(stop - start);
 	std::printf("Time taken search %llu\n", duration.count());
 }

 static void bigTest2() {
 //Time taken HT insert 19004083
 //Time taken HT search 10317895
 	static constexpr size_t testSize = 40000000;
 	static const std::string myValue = "foobar";
 	static const std::string myDir = "foo";
 	std::filesystem::create_directory(myDir);
 	std::vector<std::string> myKeys;
 	HashMergeTableConfig cfg;
 	cfg.dpath = myDir;
 	cfg.maxMemTableSize = 1024 * 1024  * 2;
 	std::optional<HashMergeTable> ht = HashMergeTable::openDB(cfg);
 	if (!ht.has_value()) {
 		fprintf(stderr, "FATAL cannot open db at %s\n", cfg.dpath.c_str());
 		abort();
 	}
 	auto myTable = ht.value();
 	std::string myString;

 	for (int i = 0; i < testSize; ++i)
 	{
 		put_rand_chars_in_string(myString, 8);
 		myKeys.push_back(myString);
 		myString.clear();
 	}

 	auto start = high_resolution_clock::now();
 	for (const auto& k : myKeys) {
 		myTable.put(k, myValue);
 	}
 	auto stop = high_resolution_clock::now();

 	auto duration = duration_cast<microseconds>(stop - start);
 	std::printf("Time taken HT insert %llu\n", duration.count());

 	std::string valRet;
 	auto start2 = high_resolution_clock::now();
 	for (const auto& k : myKeys) {
 		myTable.get(k, valRet);
 	}
 	auto stop2 = high_resolution_clock::now();
 	auto duration2 = duration_cast<microseconds>(stop2 - start2);
 	std::printf("Time taken HT search %llu\n", duration2.count());
 	HashMergeTable::closeDB(myTable);
 	std::filesystem::remove_all(myDir);
 }

 static void checkCondition(int cond, const char* express, unsigned lineno) {
 	if (!cond) {
 		fprintf(stderr, "FAIL %s line %u\n", express, lineno);
 	}
 }

 #define CHECKIT(cond) checkCondition(cond, #cond, __LINE__)

 static void testDelete() {
 	std::unordered_map<std::string, std::string> myMap;
 	myMap.insert(std::make_pair("foo", "bar"));
 	myMap.insert(std::make_pair("foo1", "bar"));
 	myMap.insert(std::make_pair("foo2", "bar"));
 	std::vector<unsigned char> bytes;
 	toHashedTable(myMap, bytes);
 	DataSegment ds(bytes.data(), bytes.size());
 	auto res = hashSearchTable(ds, "foo1");
 	CHECKIT(res.has_value());
 	if (!res.has_value()) {
 		fprintf(stderr, "Expected foo1 to be in map\n");
 	}
 	CHECKIT(hashTableDelete(ds, "foo1"));
 	res = hashSearchTable(ds, "foo1");
 	CHECKIT(!res.has_value());
 	if (res.has_value()) {
 		fprintf(stderr, "Expected foo1 to NOT be in map, val=%llu\n", res.value());
 	}
 }

 static void testByteExtendVec() {
 	std::vector<unsigned char> bytes;
 	bytes.push_back(3);
 	extendByteVectorWithByte(bytes, 3, 10);
 	CHECKIT(bytes.size() == 11);
 	CHECKIT(bytes[1] == 3);
 	CHECKIT(bytes[10] == 3);
 }

 static void testMerges() {
 	std::unordered_map<std::string, std::string> myMapNew;
 	printf("Hash of foo is %zu\n", hashWithMapFunc(std::string("foo")));
 	std::unordered_map<std::string, std::string> myMap;
 	myMap.max_load_factor(0.5);
 	myMap.insert(std::make_pair("foo", "bar"));
 	myMap.insert(std::make_pair("foo1", "bar"));
 	myMap.insert(std::make_pair("foo2", "bar"));
 	myMap.insert(std::make_pair("foo3", "barstobars"));
 	std::unordered_map<std::string, std::string> myMap2;
 	myMap2.max_load_factor(0.5);
 	myMap2.insert(std::make_pair("foo", "bar"));
 	myMap2.insert(std::make_pair("foo92", "bar"));
 	myMap2.insert(std::make_pair("foo45", "bar"));
 	myMap2.insert(std::make_pair("foo3", "barstobars"));

 	std::vector<unsigned char> bytes1;
 	std::vector<unsigned char> bytes2;
 	toHashedTable(myMap, bytes1);
 	toHashedTable(myMap2, bytes2);
 	DataSegment ds1(bytes1.data(), bytes1.size());
 	DataSegment ds2(bytes2.data(), bytes2.size());
 	CHECKIT(hashTableMergeInto(myMapNew, ds1) == 4);
 	hashTableMergeInto(myMapNew, ds2);
 	printf("size of map %zu\n", myMapNew.size());
 	CHECKIT(myMapNew.size() == 6);
 	const std::string barString("bar");
 	CHECKIT(myMapNew.find("foo")->second == barString);
 }

 static void testValueString() {
 	std::unordered_map<std::string, std::string> myMap;
 	std::vector<unsigned char> bytes1;
 	myMap.max_load_factor(0.5);
 	myMap.insert(std::make_pair("foo", "bar"));
 	myMap.insert(std::make_pair("foo1", "bar"));
 	myMap.insert(std::make_pair("foo2", "bar"));
 	myMap.insert(std::make_pair("foo3", "barstobars"));
 	toHashedTable(myMap, bytes1);
 	DataSegment ds1(bytes1.data(), bytes1.size());
 	std::optional<uint64_t> got =  hashSearchTable(ds1, "foo");
 	CHECKIT(got.has_value());
 	std::string myVal;
 	convertIndexToValueString(ds1, got.value(), myVal);
 	CHECKIT(myVal == "bar");
 }

 static void testDataLocation() {
 	DataLocation dl;
 	dl.level = 5;
 	dl.chunk = 1;
 	std::string dlString;
 	dl.toString(dlString);
 	CHECKIT(dlString == "level-5-chunk-1");
 	const std::optional<DataLocation> result = DataLocation::fromString(dlString);
 	CHECKIT(result.has_value());
 	CHECKIT(result.value().level == 5);
 	CHECKIT(result.value().chunk == 1);

 }

 int main(int argc, char const *argv[])
 {
 //Time taken HT insert 6887719
 //Time taken HT search 6620036
 	srand(time(nullptr));
 	//bigTest1();
 	bigTest2();
 	//testDelete();
 	//testByteExtendVec();
 	//testMerges();
 	//testValueString();
 	//testDataLocation();
 	return 0;
 }