Skip to content

Instantly share code, notes, and snippets.

@sillypog
Last active January 10, 2025 03:30
Show Gist options
  • Save sillypog/74319d8cde7e5038ebc3c99ae8c67e50 to your computer and use it in GitHub Desktop.
Save sillypog/74319d8cde7e5038ebc3c99ae8c67e50 to your computer and use it in GitHub Desktop.
A cache where lowest ranked items are evicted first.
/**
* Speed up retrieval of rankable objects with a cache.
*
* We have a primary data store which implements the DataSource interface.
* Any class implementing this interface is likely to be slow as it has to
* fetch the data from disk or across the network. Therefore, it will be
* useful to temporarily store some of that data in a faster, in memory
* cache. This cache is not big enough to store all of the data from the
* primary store though.
*
* The objects stored in the primary data store will implement the Rankable
* interface. The rank of an object tells our cache how important it is to
* keep it in the cache. The lower the rank, the more likely the object is
* to be evicted when the cache is at capacity.
*
* RetainBestCache is an implementation of a cache with this behaviour.
*/
#include <cassert>
#include <exception>
#include <iostream>
#include <queue>
#include <string>
#include <thread>
#include <type_traits>
#include <unordered_map>
using namespace std;
// using namespace std::literals::chrono_literals
/**
* Interfaces are predefined for the Rankable and DataSource objects.
*
* Our primary data store and the data within it must implement these
* interfaces.
*/
class Rankable {
public:
virtual long getRank() = 0;
};
// V should extend Rankable but there's no clear way to specify that in C++.
// This traits check doesn't seem to work.
template <typename K, typename V, typename = enable_if<is_base_of<Rankable, V>::value>>
class DataSource {
public:
virtual V& get(K key) = 0;
};
/**
* In this example, the data in the primary store is objects of the Item class.
*/
class Item : public Rankable {
private:
string value;
long rank;
public:
Item() {}
Item(string v, long r) : value(v), rank(r) {}
long getRank() override {
return rank;
}
string& getValue() {
return value;
}
};
/**
* In this example, our primary store contains a small number of named Item objects.
*
* Fetching data from this store is artificially slow. This is meant to simulate time
* taken to fetch data from the network or disk, data decompression, validation, etc.
*/
class ItemStore : public DataSource<string, Item> {
private:
unordered_map<string, Item> map;
public:
ItemStore() {
map.insert({"a", {"One Hundred", 100}});
map.insert({"b", {"Thirty Three", 33}});
map.insert({"c", {"Forty Five", 45}});
}
Item& get(string key) override {
cout << "Fetching " << key << " from ItemStore..." << endl;
this_thread::sleep_for(2000ms);
auto result = map.find(key);
if (result == map.end()) {
throw invalid_argument("Item not found");
}
return result->second;
}
};
/**
* RetainBestCache is the implementation of the class to solve the problem of slow data
* retrieval from the primary store.
*
* The capacity of the cache is determined when it is instantiated. That many Rankable
* objects can be stored in an unordered_map within the cache. This allows O(1) lookup
* of objects by their key, as well as O(1) insertion and removal of objects.
*
* There is also a priority_queue containing the ranks and keys of each of these objects.
* The lowest ranked object is always at the front of the queue, allowing O(1) identification
* of the lowest ranked object. Insertion of new objects is O(logn).
*
* Rather than returning the Rankable object directly, that data is wrapped in a CacheResult
* struct which provides information about whether the data was in the cache or not even
* available in the primary store.
*/
enum class CacheStatus {
MISS,
HIT,
NOT_FOUND
};
template <typename V>
struct CacheResult {
CacheStatus status;
V* item;
};
template <typename K>
auto minHeapComparison = [](const pair<long, K>& a, const pair<long, K>& b) {
return a.first > b.first;
};
template <typename K, typename V, typename = enable_if<is_base_of<Rankable, V>::value>>
class RetainBestCache {
private:
DataSource<K, V>& dataSource;
unordered_map<K, V*> cache;
priority_queue<pair<long, K>, vector<pair<long, K>>, decltype(minHeapComparison<K>)> ranks;
unsigned long capacity;
public:
RetainBestCache(ItemStore& s, unsigned long c) : dataSource(s), capacity(c) {}
CacheResult<V> get(string key) {
cout << "Checking cache for " << key << endl;
V* value;
CacheStatus status;
auto result = cache.find(key);
if (result == cache.end()) {
// The key wasn't in there, we need to get it from the datastore.
status = CacheStatus::MISS;
try {
value = &(dataSource.get(key));
} catch (const invalid_argument& e) {
return {CacheStatus::NOT_FOUND, nullptr};
}
// Add this to the cache if appropriate
if (cache.size() < capacity) {
cache.insert({key, value});
ranks.push({value->getRank(), key});
} else {
// If this item is higher rank than our lowest, we'll need to make room for it
pair<long, K> newRank {value->getRank(), key};
pair<long, K> lowest = ranks.top();
if (newRank > lowest) {
// Remove the lowest from both cache stores
ranks.pop();
cache.erase(lowest.second);
// Add the new value to both cache stores
cache.insert({key, value});
ranks.push(newRank);
}
}
} else {
value = result->second;
status = CacheStatus::HIT;
}
return {status, value};
}
};
int main() {
ItemStore store;
RetainBestCache<string, Item> cache(store, 2);
CacheResult<Item> result;
result = cache.get("a"); // Expect cache miss.
assert(result.status == CacheStatus::MISS);
assert(result.item->getValue() == "One Hundred");
cout << "Item a has value " << result.item->getValue() << endl;
string& s1 = result.item->getValue();
s1 += " Edited";
result = cache.get("a"); // Expect cache hit.
assert(result.status == CacheStatus::HIT);
assert(result.item->getValue() == "One Hundred Edited");
cout << "Item a has value " << result.item->getValue() << endl;
result = cache.get("b"); // Expect cache miss. After addition, cache is filled to capacity
assert(result.status == CacheStatus::MISS);
assert(result.item->getValue() == "Thirty Three");
cout << "Item b has value " << result.item->getValue() << endl;
string& sB = result.item->getValue();
sB += " Edited";
result = cache.get("b"); // Expect cache hit.
assert(result.status == CacheStatus::HIT);
assert(result.item->getValue() == "Thirty Three Edited");
cout << "Item b has value " << result.item->getValue() << endl;
result = cache.get("c"); // Expect cache miss. b will be evicted
assert(result.status == CacheStatus::MISS);
assert(result.item->getValue() == "Forty Five");
cout << "Item c has value " << result.item->getValue() << endl;
result = cache.get("c"); // Expect cache hit.
assert(result.status == CacheStatus::HIT);
assert(result.item->getValue() == "Forty Five");
cout << "Item c has value " << result.item->getValue() << endl;
result = cache.get("b"); // Expect cache miss due to previous eviction.
assert(result.status == CacheStatus::MISS);
assert(result.item->getValue() == "Thirty Three Edited");
cout << "Item b has value " << result.item->getValue() << endl;
result = cache.get("b"); // Expect cache miss as b shouldn't have been readded.
assert(result.status == CacheStatus::MISS);
assert(result.item->getValue() == "Thirty Three Edited");
cout << "Item b has value " << result.item->getValue() << endl;
result = cache.get("a"); // Expect cache hit.
assert(result.status == CacheStatus::HIT);
assert(result.item->getValue() == "One Hundred Edited");
cout << "Item a has value " << result.item->getValue() << endl;
result = cache.get("c"); // Expect cache hit.
assert(result.status == CacheStatus::HIT);
assert(result.item->getValue() == "Forty Five");
cout << "Item c has value " << result.item->getValue() << endl;
result = cache.get("d"); // This isn't in cache or datastore.
assert(result.status == CacheStatus::NOT_FOUND);
cout << "There is no item d" << endl;
}
@sillypog
Copy link
Author

sillypog commented Aug 2, 2022

Also available at https://commie.io/#TJqqZa83 for easier commenting.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment