Skip to content

Instantly share code, notes, and snippets.

@unrays
Created May 24, 2026 20:19
Show Gist options
  • Select an option

  • Save unrays/33f61b9ed729c31796658a26d31e47e9 to your computer and use it in GitHub Desktop.

Select an option

Save unrays/33f61b9ed729c31796658a26d31e47e9 to your computer and use it in GitHub Desktop.
High-performance sharded string interner using custom memory allocation and low-contention concurrency design.
// Copyright (c) May 2026 Félix-Olivier Dumas. All rights reserved.
// Licensed under the terms described in the LICENSE file
#pragma once
#include <cstddef>
#include <stdexcept>
#include <atomic>
#include <iostream>
#include <new>
#include <utility>
#include <vector>
#include <cstring>
#include <mutex>
#include <shared_mutex>
#include <string_view>
#include <unordered_set>
#include <functional>
#include <array>
namespace exotic::memory {
class memory_resource {
public:
virtual ~memory_resource() = default;
public:
[[nodiscard]] void* allocate(std::size_t bytes, std::size_t alignment) noexcept {
if (alignment == 0 || (alignment & (alignment - 1)) != 0) [[unlikely]]
throw std::invalid_argument("alignment must be a power of two");
return do_allocate(bytes, alignment);
}
void deallocate(void* p, std::size_t bytes, std::size_t alignment) noexcept {
if (alignment == 0 || (alignment & (alignment - 1)) != 0) [[unlikely]]
throw std::invalid_argument("alignment must be a power of two");
do_deallocate(p, bytes, alignment);
}
bool is_equal(const memory_resource& other) const noexcept {
return do_is_equal(other);
}
protected:
virtual void* do_allocate(std::size_t bytes, std::size_t alignment) = 0;
virtual void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment) = 0;
virtual bool do_is_equal(const memory_resource& other) const noexcept = 0;
};
} // namespace exotic::memory
namespace exotic::memory {
struct monotonic_atomic_buffer : public memory_resource {
public:
explicit monotonic_atomic_buffer(std::size_t p_capacity)
: capacity_(p_capacity)
, offset_(0)
, buffer_(new std::byte[p_capacity])
{}
monotonic_atomic_buffer(const monotonic_atomic_buffer&) = delete;
monotonic_atomic_buffer& operator=(const monotonic_atomic_buffer&) = delete;
~monotonic_atomic_buffer() noexcept override {
delete[] buffer_;
}
protected:
void* do_allocate(std::size_t bytes, std::size_t alignment) override {
std::size_t current = offset_.load(std::memory_order_relaxed);
while (true) {
std::size_t aligned = (current + (alignment - 1)) & ~(alignment - 1);
std::size_t next = aligned + bytes;
if (next > capacity_)
return nullptr;
if (offset_.compare_exchange_weak(
current,
next,
std::memory_order_relaxed)) {
return buffer_ + aligned;
}
}
}
void do_deallocate(void*, std::size_t, std::size_t) override {}
bool do_is_equal(const memory_resource& other) const noexcept override {
return this == &other;
}
public:
[[nodiscard]] constexpr std::size_t capacity() const noexcept { return capacity_; }
[[nodiscard]] std::size_t size() const noexcept { return offset_.load(std::memory_order_relaxed); }
private:
std::byte* buffer_;
std::atomic<std::size_t> offset_;
std::size_t capacity_;
};
} // namespace exotic::memory
namespace exotic::memory {
template<typename Tp>
struct unsynchronized_chunk_allocator {
private:
static constexpr std::size_t default_chunk_capacity = 1024;
static constexpr std::size_t default_initial_reserve = 10;
static constexpr std::size_t default_chunk_refill = 5;
static constexpr std::size_t default_chunk_refill_treshold = 1;
public:
struct Chunk {
std::byte* buffer_ = nullptr;
std::size_t capacity_ = 0;
std::size_t offset_ = 0;
};
public:
explicit unsynchronized_chunk_allocator(memory_resource* upstream, std::size_t reserve = default_initial_reserve)
: ressource_{ upstream }
{
if (upstream == nullptr) [[unlikely]]
throw std::invalid_argument("Upstream memory resource cannot be null");
if (reserve == 0) [[unlikely]]
throw std::invalid_argument("reserve must be >= 1");
free_chunks_.reserve(std::size_t{ default_initial_reserve });
for (std::size_t i = 0; i < reserve - 1; ++i) {
void* raw = ressource_->allocate(sizeof(Tp) * default_chunk_capacity, alignof(Tp));
free_chunks_.emplace_back(Chunk{ static_cast<std::byte*>(raw), default_chunk_capacity, 0 });
}
void* raw = ressource_->allocate(sizeof(Tp) * default_chunk_capacity, alignof(Tp));
active_chunk_ = Chunk{ static_cast<std::byte*>(raw), default_chunk_capacity, 0 };
}
~unsynchronized_chunk_allocator() noexcept {
const std::size_t chunk_bytes = sizeof(Tp) * default_chunk_capacity;
const std::size_t chunk_align = alignof(Tp);
for (auto& chunk : free_chunks_) {
if (chunk.buffer_) {
ressource_->deallocate(chunk.buffer_, chunk_bytes, chunk_align);
}
}
if (active_chunk_.buffer_) {
ressource_->deallocate(active_chunk_.buffer_, chunk_bytes, chunk_align);
}
}
public:
void refill(std::size_t count = default_chunk_refill) {
for (std::size_t i = 0; i < count; ++i) {
void* raw = ressource_->allocate(sizeof(Tp) * default_chunk_capacity, alignof(Tp));
free_chunks_.emplace_back(Chunk{ static_cast<std::byte*>(raw), default_chunk_capacity, 0 });
}
}
public:
[[nodiscard]] Tp* allocate(std::size_t count) {
if (free_chunks_.size() <= default_chunk_refill_treshold) {
refill();
}
if (!active_chunk_.buffer_) {
if (free_chunks_.empty()) throw std::bad_alloc();
active_chunk_ = std::move(free_chunks_.back());
free_chunks_.pop_back();
}
std::size_t requested = sizeof(Tp) * count;
std::size_t aligned = (active_chunk_.offset_ + (alignof(Tp) - 1)) & ~(alignof(Tp) - 1);
std::size_t next_offset = aligned + requested;
if (next_offset >= active_chunk_.capacity_) {
if (free_chunks_.empty()) throw std::bad_alloc();
active_chunk_ = std::move(free_chunks_.back());
free_chunks_.pop_back();
aligned = (active_chunk_.offset_ + (alignof(Tp) - 1)) & ~(alignof(Tp) - 1);
next_offset = aligned + requested;
if (next_offset > active_chunk_.capacity_) throw std::bad_alloc();
}
active_chunk_.offset_ = next_offset;
return reinterpret_cast<Tp*>(active_chunk_.buffer_ + aligned);
}
[[nodiscard]] void* allocate_bytes(std::size_t bytes, std::size_t alignment) {
if (free_chunks_.size() <= default_chunk_refill_treshold) {
refill();
}
if (!active_chunk_.buffer_) {
if (free_chunks_.empty()) throw std::bad_alloc();
active_chunk_ = std::move(free_chunks_.back());
free_chunks_.pop_back();
}
std::size_t aligned = (active_chunk_.offset_ + (alignment - 1)) & ~(alignment - 1);
std::size_t next_offset = aligned + bytes;
if (next_offset >= active_chunk_.capacity_) {
if (free_chunks_.empty()) throw std::bad_alloc();
active_chunk_ = std::move(free_chunks_.back());
free_chunks_.pop_back();
aligned = (active_chunk_.offset_ + (alignment - 1)) & ~(alignment - 1);
next_offset = aligned + bytes;
if (next_offset > active_chunk_.capacity_) throw std::bad_alloc();
}
active_chunk_.offset_ = next_offset;
return reinterpret_cast<void*>(active_chunk_.buffer_ + aligned);
}
private:
std::vector<Chunk> free_chunks_;
Chunk active_chunk_;
memory_resource* ressource_;
};
} // namespace exotic::memory
namespace exotic::intern {
class alignas(std::hardware_destructive_interference_size) StringInterner {
public:
explicit StringInterner(exotic::memory::unsynchronized_chunk_allocator<char>&& upstream)
: ressource_{ std::move(upstream) } {}
public:
const char* intern(std::string_view sv) {
{
std::shared_lock<std::shared_mutex> rlock(shared_mutex_);
if (auto it = table_.find(sv); it != table_.end())
return it->data();
}
{
std::unique_lock<std::shared_mutex> wlock(shared_mutex_);
if (auto it = table_.find(sv); it != table_.end())
return it->data();
char* cptr = static_cast<char*>(ressource_.allocate_bytes(sv.size() + 1, 1));
std::memcpy(cptr, sv.data(), sv.size());
cptr[sv.size()] = '\0';
table_.emplace(std::string_view(cptr));
return cptr;
}
}
private:
std::unordered_set<std::string_view> table_;
exotic::memory::unsynchronized_chunk_allocator<char> ressource_;
std::shared_mutex shared_mutex_;
};
template<std::size_t N>
struct ShardedStringInterner {
public:
using Shard = StringInterner;
explicit ShardedStringInterner(exotic::memory::memory_resource* upstream) {
for (std::size_t i = 0; i < N; ++i) {
shards_[i] = ::new Shard(exotic::memory::unsynchronized_chunk_allocator<char>(upstream));
}
}
~ShardedStringInterner() noexcept {
for (auto* shard : shards_) {
delete shard;
}
}
public:
const char* intern(std::string_view sv) {
std::hash<std::string_view> h;
std::size_t index = h(sv) % N;
return shards_[index]->intern(sv);
}
private:
std::array<Shard*, N> shards_;
};
} // namespace exotic::intern
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment