Created
May 24, 2026 20:19
-
-
Save unrays/33f61b9ed729c31796658a26d31e47e9 to your computer and use it in GitHub Desktop.
High-performance sharded string interner using custom memory allocation and low-contention concurrency design.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // Copyright (c) May 2026 Félix-Olivier Dumas. All rights reserved. | |
| // Licensed under the terms described in the LICENSE file | |
| #pragma once | |
| #include <cstddef> | |
| #include <stdexcept> | |
| #include <atomic> | |
| #include <iostream> | |
| #include <new> | |
| #include <utility> | |
| #include <vector> | |
| #include <cstring> | |
| #include <mutex> | |
| #include <shared_mutex> | |
| #include <string_view> | |
| #include <unordered_set> | |
| #include <functional> | |
| #include <array> | |
| namespace exotic::memory { | |
| class memory_resource { | |
| public: | |
| virtual ~memory_resource() = default; | |
| public: | |
| [[nodiscard]] void* allocate(std::size_t bytes, std::size_t alignment) noexcept { | |
| if (alignment == 0 || (alignment & (alignment - 1)) != 0) [[unlikely]] | |
| throw std::invalid_argument("alignment must be a power of two"); | |
| return do_allocate(bytes, alignment); | |
| } | |
| void deallocate(void* p, std::size_t bytes, std::size_t alignment) noexcept { | |
| if (alignment == 0 || (alignment & (alignment - 1)) != 0) [[unlikely]] | |
| throw std::invalid_argument("alignment must be a power of two"); | |
| do_deallocate(p, bytes, alignment); | |
| } | |
| bool is_equal(const memory_resource& other) const noexcept { | |
| return do_is_equal(other); | |
| } | |
| protected: | |
| virtual void* do_allocate(std::size_t bytes, std::size_t alignment) = 0; | |
| virtual void do_deallocate(void* ptr, std::size_t bytes, std::size_t alignment) = 0; | |
| virtual bool do_is_equal(const memory_resource& other) const noexcept = 0; | |
| }; | |
| } // namespace exotic::memory | |
| namespace exotic::memory { | |
| struct monotonic_atomic_buffer : public memory_resource { | |
| public: | |
| explicit monotonic_atomic_buffer(std::size_t p_capacity) | |
| : capacity_(p_capacity) | |
| , offset_(0) | |
| , buffer_(new std::byte[p_capacity]) | |
| {} | |
| monotonic_atomic_buffer(const monotonic_atomic_buffer&) = delete; | |
| monotonic_atomic_buffer& operator=(const monotonic_atomic_buffer&) = delete; | |
| ~monotonic_atomic_buffer() noexcept override { | |
| delete[] buffer_; | |
| } | |
| protected: | |
| void* do_allocate(std::size_t bytes, std::size_t alignment) override { | |
| std::size_t current = offset_.load(std::memory_order_relaxed); | |
| while (true) { | |
| std::size_t aligned = (current + (alignment - 1)) & ~(alignment - 1); | |
| std::size_t next = aligned + bytes; | |
| if (next > capacity_) | |
| return nullptr; | |
| if (offset_.compare_exchange_weak( | |
| current, | |
| next, | |
| std::memory_order_relaxed)) { | |
| return buffer_ + aligned; | |
| } | |
| } | |
| } | |
| void do_deallocate(void*, std::size_t, std::size_t) override {} | |
| bool do_is_equal(const memory_resource& other) const noexcept override { | |
| return this == &other; | |
| } | |
| public: | |
| [[nodiscard]] constexpr std::size_t capacity() const noexcept { return capacity_; } | |
| [[nodiscard]] std::size_t size() const noexcept { return offset_.load(std::memory_order_relaxed); } | |
| private: | |
| std::byte* buffer_; | |
| std::atomic<std::size_t> offset_; | |
| std::size_t capacity_; | |
| }; | |
| } // namespace exotic::memory | |
| namespace exotic::memory { | |
| template<typename Tp> | |
| struct unsynchronized_chunk_allocator { | |
| private: | |
| static constexpr std::size_t default_chunk_capacity = 1024; | |
| static constexpr std::size_t default_initial_reserve = 10; | |
| static constexpr std::size_t default_chunk_refill = 5; | |
| static constexpr std::size_t default_chunk_refill_treshold = 1; | |
| public: | |
| struct Chunk { | |
| std::byte* buffer_ = nullptr; | |
| std::size_t capacity_ = 0; | |
| std::size_t offset_ = 0; | |
| }; | |
| public: | |
| explicit unsynchronized_chunk_allocator(memory_resource* upstream, std::size_t reserve = default_initial_reserve) | |
| : ressource_{ upstream } | |
| { | |
| if (upstream == nullptr) [[unlikely]] | |
| throw std::invalid_argument("Upstream memory resource cannot be null"); | |
| if (reserve == 0) [[unlikely]] | |
| throw std::invalid_argument("reserve must be >= 1"); | |
| free_chunks_.reserve(std::size_t{ default_initial_reserve }); | |
| for (std::size_t i = 0; i < reserve - 1; ++i) { | |
| void* raw = ressource_->allocate(sizeof(Tp) * default_chunk_capacity, alignof(Tp)); | |
| free_chunks_.emplace_back(Chunk{ static_cast<std::byte*>(raw), default_chunk_capacity, 0 }); | |
| } | |
| void* raw = ressource_->allocate(sizeof(Tp) * default_chunk_capacity, alignof(Tp)); | |
| active_chunk_ = Chunk{ static_cast<std::byte*>(raw), default_chunk_capacity, 0 }; | |
| } | |
| ~unsynchronized_chunk_allocator() noexcept { | |
| const std::size_t chunk_bytes = sizeof(Tp) * default_chunk_capacity; | |
| const std::size_t chunk_align = alignof(Tp); | |
| for (auto& chunk : free_chunks_) { | |
| if (chunk.buffer_) { | |
| ressource_->deallocate(chunk.buffer_, chunk_bytes, chunk_align); | |
| } | |
| } | |
| if (active_chunk_.buffer_) { | |
| ressource_->deallocate(active_chunk_.buffer_, chunk_bytes, chunk_align); | |
| } | |
| } | |
| public: | |
| void refill(std::size_t count = default_chunk_refill) { | |
| for (std::size_t i = 0; i < count; ++i) { | |
| void* raw = ressource_->allocate(sizeof(Tp) * default_chunk_capacity, alignof(Tp)); | |
| free_chunks_.emplace_back(Chunk{ static_cast<std::byte*>(raw), default_chunk_capacity, 0 }); | |
| } | |
| } | |
| public: | |
| [[nodiscard]] Tp* allocate(std::size_t count) { | |
| if (free_chunks_.size() <= default_chunk_refill_treshold) { | |
| refill(); | |
| } | |
| if (!active_chunk_.buffer_) { | |
| if (free_chunks_.empty()) throw std::bad_alloc(); | |
| active_chunk_ = std::move(free_chunks_.back()); | |
| free_chunks_.pop_back(); | |
| } | |
| std::size_t requested = sizeof(Tp) * count; | |
| std::size_t aligned = (active_chunk_.offset_ + (alignof(Tp) - 1)) & ~(alignof(Tp) - 1); | |
| std::size_t next_offset = aligned + requested; | |
| if (next_offset >= active_chunk_.capacity_) { | |
| if (free_chunks_.empty()) throw std::bad_alloc(); | |
| active_chunk_ = std::move(free_chunks_.back()); | |
| free_chunks_.pop_back(); | |
| aligned = (active_chunk_.offset_ + (alignof(Tp) - 1)) & ~(alignof(Tp) - 1); | |
| next_offset = aligned + requested; | |
| if (next_offset > active_chunk_.capacity_) throw std::bad_alloc(); | |
| } | |
| active_chunk_.offset_ = next_offset; | |
| return reinterpret_cast<Tp*>(active_chunk_.buffer_ + aligned); | |
| } | |
| [[nodiscard]] void* allocate_bytes(std::size_t bytes, std::size_t alignment) { | |
| if (free_chunks_.size() <= default_chunk_refill_treshold) { | |
| refill(); | |
| } | |
| if (!active_chunk_.buffer_) { | |
| if (free_chunks_.empty()) throw std::bad_alloc(); | |
| active_chunk_ = std::move(free_chunks_.back()); | |
| free_chunks_.pop_back(); | |
| } | |
| std::size_t aligned = (active_chunk_.offset_ + (alignment - 1)) & ~(alignment - 1); | |
| std::size_t next_offset = aligned + bytes; | |
| if (next_offset >= active_chunk_.capacity_) { | |
| if (free_chunks_.empty()) throw std::bad_alloc(); | |
| active_chunk_ = std::move(free_chunks_.back()); | |
| free_chunks_.pop_back(); | |
| aligned = (active_chunk_.offset_ + (alignment - 1)) & ~(alignment - 1); | |
| next_offset = aligned + bytes; | |
| if (next_offset > active_chunk_.capacity_) throw std::bad_alloc(); | |
| } | |
| active_chunk_.offset_ = next_offset; | |
| return reinterpret_cast<void*>(active_chunk_.buffer_ + aligned); | |
| } | |
| private: | |
| std::vector<Chunk> free_chunks_; | |
| Chunk active_chunk_; | |
| memory_resource* ressource_; | |
| }; | |
| } // namespace exotic::memory | |
| namespace exotic::intern { | |
| class alignas(std::hardware_destructive_interference_size) StringInterner { | |
| public: | |
| explicit StringInterner(exotic::memory::unsynchronized_chunk_allocator<char>&& upstream) | |
| : ressource_{ std::move(upstream) } {} | |
| public: | |
| const char* intern(std::string_view sv) { | |
| { | |
| std::shared_lock<std::shared_mutex> rlock(shared_mutex_); | |
| if (auto it = table_.find(sv); it != table_.end()) | |
| return it->data(); | |
| } | |
| { | |
| std::unique_lock<std::shared_mutex> wlock(shared_mutex_); | |
| if (auto it = table_.find(sv); it != table_.end()) | |
| return it->data(); | |
| char* cptr = static_cast<char*>(ressource_.allocate_bytes(sv.size() + 1, 1)); | |
| std::memcpy(cptr, sv.data(), sv.size()); | |
| cptr[sv.size()] = '\0'; | |
| table_.emplace(std::string_view(cptr)); | |
| return cptr; | |
| } | |
| } | |
| private: | |
| std::unordered_set<std::string_view> table_; | |
| exotic::memory::unsynchronized_chunk_allocator<char> ressource_; | |
| std::shared_mutex shared_mutex_; | |
| }; | |
| template<std::size_t N> | |
| struct ShardedStringInterner { | |
| public: | |
| using Shard = StringInterner; | |
| explicit ShardedStringInterner(exotic::memory::memory_resource* upstream) { | |
| for (std::size_t i = 0; i < N; ++i) { | |
| shards_[i] = ::new Shard(exotic::memory::unsynchronized_chunk_allocator<char>(upstream)); | |
| } | |
| } | |
| ~ShardedStringInterner() noexcept { | |
| for (auto* shard : shards_) { | |
| delete shard; | |
| } | |
| } | |
| public: | |
| const char* intern(std::string_view sv) { | |
| std::hash<std::string_view> h; | |
| std::size_t index = h(sv) % N; | |
| return shards_[index]->intern(sv); | |
| } | |
| private: | |
| std::array<Shard*, N> shards_; | |
| }; | |
| } // namespace exotic::intern |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment