Last active
January 10, 2025 03:30
-
-
Save sillypog/74319d8cde7e5038ebc3c99ae8c67e50 to your computer and use it in GitHub Desktop.
A cache where lowest ranked items are evicted first.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Speed up retrieval of rankable objects with a cache. | |
* | |
* We have a primary data store which implements the DataSource interface. | |
* Any class implementing this interface is likely to be slow as it has to | |
* fetch the data from disk or across the network. Therefore, it will be | |
* useful to temporarily store some of that data in a faster, in memory | |
* cache. This cache is not big enough to store all of the data from the | |
* primary store though. | |
* | |
* The objects stored in the primary data store will implement the Rankable | |
* interface. The rank of an object tells our cache how important it is to | |
* keep it in the cache. The lower the rank, the more likely the object is | |
* to be evicted when the cache is at capacity. | |
* | |
* RetainBestCache is an implementation of a cache with this behaviour. | |
*/ | |
#include <cassert> | |
#include <exception> | |
#include <iostream> | |
#include <queue> | |
#include <string> | |
#include <thread> | |
#include <type_traits> | |
#include <unordered_map> | |
using namespace std; | |
// using namespace std::literals::chrono_literals | |
/** | |
* Interfaces are predefined for the Rankable and DataSource objects. | |
* | |
* Our primary data store and the data within it must implement these | |
* interfaces. | |
*/ | |
class Rankable { | |
public: | |
virtual long getRank() = 0; | |
}; | |
// V should extend Rankable but there's no clear way to specify that in C++. | |
// This traits check doesn't seem to work. | |
template <typename K, typename V, typename = enable_if<is_base_of<Rankable, V>::value>> | |
class DataSource { | |
public: | |
virtual V& get(K key) = 0; | |
}; | |
/** | |
* In this example, the data in the primary store is objects of the Item class. | |
*/ | |
class Item : public Rankable { | |
private: | |
string value; | |
long rank; | |
public: | |
Item() {} | |
Item(string v, long r) : value(v), rank(r) {} | |
long getRank() override { | |
return rank; | |
} | |
string& getValue() { | |
return value; | |
} | |
}; | |
/** | |
* In this example, our primary store contains a small number of named Item objects. | |
* | |
* Fetching data from this store is artificially slow. This is meant to simulate time | |
* taken to fetch data from the network or disk, data decompression, validation, etc. | |
*/ | |
class ItemStore : public DataSource<string, Item> { | |
private: | |
unordered_map<string, Item> map; | |
public: | |
ItemStore() { | |
map.insert({"a", {"One Hundred", 100}}); | |
map.insert({"b", {"Thirty Three", 33}}); | |
map.insert({"c", {"Forty Five", 45}}); | |
} | |
Item& get(string key) override { | |
cout << "Fetching " << key << " from ItemStore..." << endl; | |
this_thread::sleep_for(2000ms); | |
auto result = map.find(key); | |
if (result == map.end()) { | |
throw invalid_argument("Item not found"); | |
} | |
return result->second; | |
} | |
}; | |
/** | |
* RetainBestCache is the implementation of the class to solve the problem of slow data | |
* retrieval from the primary store. | |
* | |
* The capacity of the cache is determined when it is instantiated. That many Rankable | |
* objects can be stored in an unordered_map within the cache. This allows O(1) lookup | |
* of objects by their key, as well as O(1) insertion and removal of objects. | |
* | |
* There is also a priority_queue containing the ranks and keys of each of these objects. | |
* The lowest ranked object is always at the front of the queue, allowing O(1) identification | |
* of the lowest ranked object. Insertion of new objects is O(logn). | |
* | |
* Rather than returning the Rankable object directly, that data is wrapped in a CacheResult | |
* struct which provides information about whether the data was in the cache or not even | |
* available in the primary store. | |
*/ | |
enum class CacheStatus { | |
MISS, | |
HIT, | |
NOT_FOUND | |
}; | |
template <typename V> | |
struct CacheResult { | |
CacheStatus status; | |
V* item; | |
}; | |
template <typename K> | |
auto minHeapComparison = [](const pair<long, K>& a, const pair<long, K>& b) { | |
return a.first > b.first; | |
}; | |
template <typename K, typename V, typename = enable_if<is_base_of<Rankable, V>::value>> | |
class RetainBestCache { | |
private: | |
DataSource<K, V>& dataSource; | |
unordered_map<K, V*> cache; | |
priority_queue<pair<long, K>, vector<pair<long, K>>, decltype(minHeapComparison<K>)> ranks; | |
unsigned long capacity; | |
public: | |
RetainBestCache(ItemStore& s, unsigned long c) : dataSource(s), capacity(c) {} | |
CacheResult<V> get(string key) { | |
cout << "Checking cache for " << key << endl; | |
V* value; | |
CacheStatus status; | |
auto result = cache.find(key); | |
if (result == cache.end()) { | |
// The key wasn't in there, we need to get it from the datastore. | |
status = CacheStatus::MISS; | |
try { | |
value = &(dataSource.get(key)); | |
} catch (const invalid_argument& e) { | |
return {CacheStatus::NOT_FOUND, nullptr}; | |
} | |
// Add this to the cache if appropriate | |
if (cache.size() < capacity) { | |
cache.insert({key, value}); | |
ranks.push({value->getRank(), key}); | |
} else { | |
// If this item is higher rank than our lowest, we'll need to make room for it | |
pair<long, K> newRank {value->getRank(), key}; | |
pair<long, K> lowest = ranks.top(); | |
if (newRank > lowest) { | |
// Remove the lowest from both cache stores | |
ranks.pop(); | |
cache.erase(lowest.second); | |
// Add the new value to both cache stores | |
cache.insert({key, value}); | |
ranks.push(newRank); | |
} | |
} | |
} else { | |
value = result->second; | |
status = CacheStatus::HIT; | |
} | |
return {status, value}; | |
} | |
}; | |
int main() { | |
ItemStore store; | |
RetainBestCache<string, Item> cache(store, 2); | |
CacheResult<Item> result; | |
result = cache.get("a"); // Expect cache miss. | |
assert(result.status == CacheStatus::MISS); | |
assert(result.item->getValue() == "One Hundred"); | |
cout << "Item a has value " << result.item->getValue() << endl; | |
string& s1 = result.item->getValue(); | |
s1 += " Edited"; | |
result = cache.get("a"); // Expect cache hit. | |
assert(result.status == CacheStatus::HIT); | |
assert(result.item->getValue() == "One Hundred Edited"); | |
cout << "Item a has value " << result.item->getValue() << endl; | |
result = cache.get("b"); // Expect cache miss. After addition, cache is filled to capacity | |
assert(result.status == CacheStatus::MISS); | |
assert(result.item->getValue() == "Thirty Three"); | |
cout << "Item b has value " << result.item->getValue() << endl; | |
string& sB = result.item->getValue(); | |
sB += " Edited"; | |
result = cache.get("b"); // Expect cache hit. | |
assert(result.status == CacheStatus::HIT); | |
assert(result.item->getValue() == "Thirty Three Edited"); | |
cout << "Item b has value " << result.item->getValue() << endl; | |
result = cache.get("c"); // Expect cache miss. b will be evicted | |
assert(result.status == CacheStatus::MISS); | |
assert(result.item->getValue() == "Forty Five"); | |
cout << "Item c has value " << result.item->getValue() << endl; | |
result = cache.get("c"); // Expect cache hit. | |
assert(result.status == CacheStatus::HIT); | |
assert(result.item->getValue() == "Forty Five"); | |
cout << "Item c has value " << result.item->getValue() << endl; | |
result = cache.get("b"); // Expect cache miss due to previous eviction. | |
assert(result.status == CacheStatus::MISS); | |
assert(result.item->getValue() == "Thirty Three Edited"); | |
cout << "Item b has value " << result.item->getValue() << endl; | |
result = cache.get("b"); // Expect cache miss as b shouldn't have been readded. | |
assert(result.status == CacheStatus::MISS); | |
assert(result.item->getValue() == "Thirty Three Edited"); | |
cout << "Item b has value " << result.item->getValue() << endl; | |
result = cache.get("a"); // Expect cache hit. | |
assert(result.status == CacheStatus::HIT); | |
assert(result.item->getValue() == "One Hundred Edited"); | |
cout << "Item a has value " << result.item->getValue() << endl; | |
result = cache.get("c"); // Expect cache hit. | |
assert(result.status == CacheStatus::HIT); | |
assert(result.item->getValue() == "Forty Five"); | |
cout << "Item c has value " << result.item->getValue() << endl; | |
result = cache.get("d"); // This isn't in cache or datastore. | |
assert(result.status == CacheStatus::NOT_FOUND); | |
cout << "There is no item d" << endl; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Also available at https://commie.io/#TJqqZa83 for easier commenting.