Last active
August 28, 2024 22:48
-
-
Save bqqbarbhg/38bbe9e7707e76ef5df8f0206696f89f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#if 1 | |
#include <atomic> | |
#include <vector> | |
#include <mutex> | |
#include <unordered_set> | |
__declspec(noinline) void ebi_assert(bool cond) | |
{ | |
if (!cond) | |
{ | |
printf("ASSERTION HIT!\n"); | |
__debugbreak(); | |
} | |
} | |
template <typename T> | |
void swap_out(std::vector<T> &vec, size_t index) { | |
vec[index] = vec.back(); | |
vec.pop_back(); | |
} | |
#define MAX_SLOTS 128 | |
#define MAX_FREED 32 | |
#define MAX_MARK 32 | |
#define MAX_THREADS 64 | |
using mutex_guard = std::lock_guard<std::mutex>; | |
struct EbiVm; | |
template <typename T> | |
struct span | |
{ | |
T *data; | |
size_t count; | |
T *begin() const { return data; } | |
T *end() const { return data + count; } | |
}; | |
#define EBI_TYPE_MAGIC 0x74696265 | |
struct EbiType | |
{ | |
uint32_t magic; | |
std::vector<uint32_t> slotOffsets; | |
}; | |
struct EbiAllocation | |
{ | |
EbiType *type; | |
uint32_t mark; | |
uint32_t size; | |
uint32_t freeStatus; | |
bool reallyFreed; | |
}; | |
struct EbiFree | |
{ | |
void *pointer; | |
uint32_t generation; | |
}; | |
struct EbiThread | |
{ | |
// -- Hot | |
EbiVm *vm; | |
uint32_t index; | |
uint32_t localGeneration; | |
uint32_t localMark; | |
std::atomic_uint32_t vmGeneration; | |
uint64_t allocationCount; | |
uint64_t allocationBytes; | |
void *locals[MAX_SLOTS]; | |
uint32_t numLocals = 0; | |
EbiFree freeList[MAX_FREED]; | |
uint32_t numFreed = 0; | |
void *markList[MAX_MARK]; | |
uint32_t numMarked = 0; | |
// -- Cold | |
char prePad[64]; | |
std::atomic_uint32_t seenGeneration; | |
void *localsSnapshot[MAX_SLOTS]; | |
uint32_t numLocalsSnapshot = 0; | |
uint64_t allocationCountSnapshot; | |
uint64_t allocationBytesSnapshot; | |
char postPad[64]; | |
}; | |
struct EbiVm | |
{ | |
std::atomic_uint32_t generation; | |
std::atomic_uint32_t sweepMark; | |
std::mutex freeListMutex; | |
std::vector<EbiFree> freeList; | |
std::vector<EbiFree> internalFreeList; | |
std::vector<EbiFree> activeFreeList; | |
std::mutex markSetMutex; | |
std::unordered_set<void*> markSet; | |
std::unordered_set<void*> internalMarkSet; | |
std::vector<void*> internalMarkList; | |
std::vector<void*> activeMarkList; | |
EbiThread threads[MAX_THREADS]; | |
uint32_t numThreads = 0; | |
uint64_t freedAllocationCount = 0; | |
uint64_t freedAllocationBytes = 0; | |
bool markedRoots = false; | |
uint32_t markStartGeneration = ~0u; | |
std::mutex allocMutex; | |
std::unordered_set<EbiAllocation*> allocs; | |
}; | |
EbiAllocation *ebiAllocHeader(void *ptr) | |
{ | |
EbiAllocation *alloc = (EbiAllocation*)((char*)ptr - sizeof(EbiAllocation)); | |
ebi_assert(alloc->type); | |
ebi_assert(alloc->type->magic == EBI_TYPE_MAGIC); | |
ebi_assert(!alloc->reallyFreed); | |
return alloc; | |
} | |
EbiThread *ebiNewThread(EbiVm *vm) | |
{ | |
uint32_t index = vm->numThreads++; | |
EbiThread *thread = &vm->threads[index]; | |
thread->vm = vm; | |
thread->index = index; | |
thread->localMark = vm->sweepMark; | |
thread->localGeneration = vm->generation; | |
thread->seenGeneration.store(vm->generation); | |
thread->vmGeneration.store(vm->generation); | |
return thread; | |
} | |
void flushMarks(EbiThread &thread) | |
{ | |
EbiVm *vm = thread.vm; | |
{ | |
mutex_guard mg { vm->markSetMutex }; | |
vm->markSet.insert(thread.markList, thread.markList + thread.numMarked); | |
} | |
thread.numMarked = 0; | |
} | |
__declspec(noinline) void ebiCheckpointSlow(EbiThread &thread) | |
{ | |
uint32_t generation = thread.vmGeneration.load(std::memory_order_relaxed); | |
uint32_t threadIndex = (uint32_t)(&thread - thread.vm->threads); | |
thread.numLocalsSnapshot = thread.numLocals; | |
for (uint32_t i = 0; i < thread.numLocals; i++) { | |
thread.localsSnapshot[i] = thread.locals[i]; | |
} | |
thread.allocationBytesSnapshot = thread.allocationBytes; | |
thread.allocationCountSnapshot = thread.allocationCount; | |
thread.localMark = thread.vm->sweepMark; | |
flushMarks(thread); | |
thread.localGeneration = generation; | |
thread.seenGeneration.store(generation); | |
} | |
__forceinline void ebiCheckpoint(EbiThread &thread) | |
{ | |
uint32_t generation = thread.vmGeneration.load(std::memory_order_relaxed); | |
if (generation != thread.localGeneration) { | |
ebiCheckpointSlow(thread); | |
} | |
} | |
void *ebiAllocate(EbiThread &thread, EbiType *type, size_t size) | |
{ | |
EbiAllocation *alloc = (EbiAllocation*)malloc(sizeof(EbiAllocation) + size); | |
if (!alloc) return nullptr; | |
alloc->mark = thread.localMark; | |
alloc->size = (uint32_t)size; | |
alloc->type = type; | |
alloc->freeStatus = 0; | |
alloc->reallyFreed = false; | |
{ | |
mutex_guard mg { thread.vm->allocMutex }; | |
thread.vm->allocs.insert(alloc); | |
} | |
thread.allocationCount += 1; | |
thread.allocationBytes += size; | |
return alloc + 1; | |
} | |
void flushFree(EbiThread &thread) | |
{ | |
EbiVm *vm = thread.vm; | |
{ | |
mutex_guard mg { vm->freeListMutex }; | |
vm->freeList.insert(vm->freeList.end(), thread.freeList, thread.freeList + thread.numFreed); | |
} | |
thread.numFreed = 0; | |
} | |
void ebiFree(EbiThread &thread, void *pointer) | |
{ | |
if (!pointer) return; | |
EbiAllocation *alloc = (EbiAllocation*)((char*)pointer - sizeof(EbiAllocation)); | |
uint32_t index = thread.numFreed++; | |
thread.freeList[index] = { pointer, thread.localGeneration }; | |
if (index + 1 == MAX_FREED) { | |
flushFree(thread); | |
} | |
} | |
void ebiMark(EbiThread &thread, void *pointer) | |
{ | |
if (thread.localMark != 0 && pointer != nullptr) { | |
EbiAllocation *alloc = ebiAllocHeader(pointer); | |
if (alloc->mark < thread.localMark) { | |
alloc->mark = thread.localMark; | |
uint32_t index = thread.numMarked++; | |
thread.markList[index] = pointer; | |
if (index + 1 == MAX_MARK) { | |
flushMarks(thread); | |
} | |
} | |
} | |
} | |
void ebiSet(EbiThread &thread, void **slot, void *pointer) | |
{ | |
void *prev = *slot; | |
ebiMark(thread, prev); | |
ebiMark(thread, pointer); | |
*slot = pointer; | |
} | |
void ebiMark(EbiVm &vm, void *pointer, uint32_t mark) | |
{ | |
if (!pointer) return; | |
EbiAllocation *alloc = ebiAllocHeader(pointer); | |
if (alloc->mark < mark) { | |
alloc->mark = mark; | |
vm.internalMarkList.push_back(pointer); | |
} | |
} | |
void deallocate(EbiVm &vm, void *pointer) | |
{ | |
EbiAllocation *alloc = (EbiAllocation*)((char*)pointer - sizeof(EbiAllocation)); | |
vm.freedAllocationBytes += alloc->size; | |
vm.freedAllocationCount++; | |
{ | |
mutex_guard mg { vm.allocMutex }; | |
vm.allocs.erase(alloc); | |
} | |
alloc->reallyFreed = true; | |
alloc->freeStatus = 1; | |
free(alloc); | |
} | |
void collectStart(EbiVm &vm) | |
{ | |
span<EbiThread> threads { vm.threads, vm.numThreads }; | |
uint32_t generation = vm.generation.fetch_add(1) + 1; | |
// Notify all threads of the new generation | |
for (EbiThread &thread : threads) { | |
thread.vmGeneration.store(generation); | |
} | |
} | |
bool collectStep(EbiVm &vm) | |
{ | |
span<EbiThread> threads { vm.threads, vm.numThreads }; | |
uint32_t generation = vm.generation.load(); | |
// 1. Make sure all threads have seen this generation | |
for (EbiThread &thread : threads) { | |
uint32_t gen = thread.seenGeneration.load(std::memory_order_relaxed); | |
if (gen < generation) return false; | |
} | |
std::atomic_thread_fence(std::memory_order_acquire); | |
// 2. Collect all held pointers | |
std::unordered_set<void*> heldPointers; | |
uint64_t totalBytes = 0; | |
uint64_t totalAllocs = 0; | |
for (EbiThread &thread : threads) { | |
span<void*> slots { thread.localsSnapshot, thread.numLocalsSnapshot }; | |
for (void* slot : slots) { | |
if (slot) { | |
heldPointers.insert(slot); | |
} | |
} | |
totalBytes += thread.allocationBytes; | |
totalAllocs += thread.allocationCount; | |
} | |
// 3. Collect accumulated free list | |
{ | |
mutex_guard mg { vm.freeListMutex }; | |
vm.internalFreeList.insert(vm.internalFreeList.end(), vm.freeList.begin(), vm.freeList.end()); | |
vm.freeList.clear(); | |
} | |
// 4. Collect allocations that can be possibly freed | |
for (size_t i = 0; i < vm.internalFreeList.size(); i++) { | |
EbiFree free = vm.internalFreeList[i]; | |
if (free.generation < generation) { | |
EbiAllocation *alloc = ebiAllocHeader(free.pointer); | |
if (alloc->freeStatus == 0) { | |
vm.activeFreeList.push_back(free); | |
alloc->freeStatus = 1; | |
} | |
swap_out(vm.internalFreeList, i); | |
i--; | |
} | |
} | |
// 5. Try to free allocations | |
{ | |
mutex_guard mg { vm.markSetMutex }; | |
for (size_t i = 0; i < vm.activeFreeList.size(); i++) { | |
EbiFree free = vm.activeFreeList[i]; | |
if (heldPointers.find(free.pointer) == heldPointers.end()) { | |
vm.markSet.erase(free.pointer); | |
deallocate(vm, free.pointer); | |
swap_out(vm.activeFreeList, i); | |
i--; | |
} | |
} | |
} | |
uint64_t memoryUsed = totalBytes - vm.freedAllocationBytes; | |
uint64_t allocCount = totalAllocs - vm.freedAllocationCount; | |
if (generation % 1 == 0) { | |
printf("%u: %.2fk allocs, %.2fkB memory (%llu+%llu leftover)\n", generation, (double)allocCount / 1e3, (double)memoryUsed / 1e3, | |
vm.internalFreeList.size(), vm.activeFreeList.size()); | |
} | |
return true; | |
} | |
void markRoots(EbiVm &vm); | |
void markStart(EbiVm &vm) | |
{ | |
vm.sweepMark.fetch_add(1); | |
vm.markedRoots = false; | |
vm.markStartGeneration = vm.generation + 2; | |
} | |
bool collectMark(EbiVm &vm) | |
{ | |
uint32_t mark = vm.sweepMark.load(); | |
span<EbiThread> threads { vm.threads, vm.numThreads }; | |
if (vm.generation < vm.markStartGeneration) return false; | |
if (!vm.markedRoots) { | |
vm.markedRoots = true; | |
// 1. Mark all thread local roots | |
for (EbiThread &thread : threads) { | |
span<void*> slots { thread.localsSnapshot, thread.numLocalsSnapshot }; | |
for (void* slot : slots) { | |
if (slot) { | |
ebiMark(vm, slot, mark); | |
} | |
} | |
} | |
markRoots(vm); | |
} | |
// 2. Mark the heap | |
uint32_t markCycles = 0; | |
for (;;) { | |
{ | |
mutex_guard mg { vm.markSetMutex }; | |
vm.internalMarkSet.swap(vm.markSet); | |
} | |
for (void *pointer : vm.internalMarkSet) { | |
vm.internalMarkList.push_back(pointer); | |
} | |
vm.internalMarkSet.clear(); | |
if (vm.internalMarkList.empty()) break; | |
markCycles++; | |
vm.activeMarkList.swap(vm.internalMarkList); | |
for (void *pointer : vm.activeMarkList) { | |
EbiAllocation *alloc = ebiAllocHeader(pointer); | |
void **slots = (void**)pointer; | |
for (uint32_t slot : alloc->type->slotOffsets) { | |
void *slotPtr = slots[slot]; | |
if (slotPtr) { | |
ebiMark(vm, slotPtr, mark); | |
} | |
} | |
} | |
vm.activeMarkList.clear(); | |
} | |
if (markCycles > 0) { | |
return false; | |
} | |
std::vector<EbiAllocation*> toFree; | |
{ | |
mutex_guard mg { vm.allocMutex }; | |
for (EbiAllocation *alloc : vm.allocs) { | |
if (alloc->mark < mark) { | |
toFree.push_back(alloc); | |
} | |
} | |
} | |
for (EbiAllocation *alloc : toFree) { | |
deallocate(vm, alloc + 1); | |
} | |
return true; | |
} | |
void collectorThread(EbiVm &vm) | |
{ | |
bool collectionDone = true; | |
for (;;) { | |
std::this_thread::sleep_for(std::chrono::milliseconds{ 1 }); | |
collectStart(vm); | |
while (!collectStep(vm)) { } | |
if (collectMark(vm)) { | |
collectionDone = true; | |
} | |
if (collectionDone && vm.generation % 6 == 0) { | |
collectionDone = false; | |
markStart(vm); | |
} | |
} | |
} | |
template <typename T> | |
EbiType *ebiTypeof() { return nullptr; } | |
template <typename T> | |
T *newArray(EbiThread &thread, size_t count) | |
{ | |
EbiType *type = ebiTypeof<T>(); | |
T *data = (T*)ebiAllocate(thread, type, count * sizeof(T)); | |
for (T &t : span<T> { data, count }) { | |
new (&t) T(); | |
} | |
return data; | |
} | |
template <typename T> | |
T *newObject(EbiThread &thread) | |
{ | |
return newArray<T>(thread, 1); | |
} | |
#define TREE_CHILDREN 12 | |
struct Tree | |
{ | |
Tree *children[TREE_CHILDREN]; | |
}; | |
template <> | |
static EbiType *ebiTypeof<Tree>() { | |
static EbiType type = [](){ | |
EbiType type = { EBI_TYPE_MAGIC }; | |
for (uint32_t i = 0; i < TREE_CHILDREN; i++) { | |
type.slotOffsets.push_back(i); | |
} | |
return type; | |
}(); | |
return &type; | |
} | |
Tree g_tree; | |
struct xorshift64_state { | |
uint64_t a; | |
}; | |
uint64_t xorshift64(struct xorshift64_state *state) | |
{ | |
uint64_t x = state->a; | |
x ^= x << 7; | |
x ^= x >> 9; | |
return state->a = x; | |
} | |
void freeTree(EbiThread &thread, Tree **pTree) | |
{ | |
Tree *tree = *pTree; | |
if (!tree) return; | |
ebiSet(thread, (void**)pTree, nullptr); | |
for (Tree *&branch : tree->children) { | |
freeTree(thread, &branch); | |
} | |
ebiFree(thread, tree); | |
} | |
void markRoots(EbiVm &vm) | |
{ | |
uint32_t mark = vm.sweepMark.load(); | |
for (size_t i = 0; i < TREE_CHILDREN; i++) { | |
ebiMark(vm, g_tree.children[i], mark); | |
} | |
} | |
void mutatorThread(EbiThread &thread, uint64_t seed) | |
{ | |
Tree *tree = nullptr; | |
thread.numLocals = 1; | |
thread.locals[0] = nullptr; | |
uint32_t steps = 100000; | |
xorshift64_state rng = { seed }; | |
for (uint32_t iter = 0; ; iter++) { | |
uint32_t inst = (uint32_t)xorshift64(&rng); | |
ebiCheckpoint(thread); | |
if (iter % steps == 0) { | |
std::this_thread::sleep_for(std::chrono::milliseconds{1}); | |
} | |
if (!tree) { | |
tree = &g_tree; | |
} | |
uint32_t op = inst & 0xff; | |
uint32_t arg = inst >> 8; | |
if (op < 64) { | |
Tree *branch = tree->children[arg % TREE_CHILDREN]; | |
if (branch) { | |
ebiAllocHeader(branch); | |
thread.locals[0] = branch; | |
tree = branch; | |
} | |
} else if (op < 70) { | |
thread.locals[0] = nullptr; | |
tree = nullptr; | |
} else if (op < 75) { | |
Tree **branch = &tree->children[arg % TREE_CHILDREN]; | |
Tree *old = *branch; | |
ebiSet(thread, (void**)branch, nullptr); | |
tree = nullptr; | |
thread.locals[0] = nullptr; | |
} else if (op < 78) { | |
Tree **branch = &tree->children[arg % TREE_CHILDREN]; | |
freeTree(thread, branch); | |
thread.locals[0] = nullptr; | |
} else if (op < 240) { | |
Tree **branch = &tree->children[arg % TREE_CHILDREN]; | |
tree = newObject<Tree>(thread); | |
ebiSet(thread, (void**)branch, tree); | |
thread.locals[0] = tree; | |
} else { | |
Tree **branch = &tree->children[arg % TREE_CHILDREN]; | |
tree = newObject<Tree>(thread); | |
freeTree(thread, branch); | |
ebiSet(thread, (void**)branch, tree); | |
thread.locals[0] = tree; | |
} | |
} | |
} | |
void observerThread(EbiThread &thread, uint64_t seed) | |
{ | |
Tree *tree = nullptr; | |
thread.numLocals = 1; | |
thread.locals[0] = nullptr; | |
uint32_t loadGeneration = 0; | |
uint32_t steps = 1000; | |
xorshift64_state rng = { seed }; | |
for (uint32_t iter = 0; ; iter++) { | |
uint32_t inst = (uint32_t)xorshift64(&rng); | |
if (tree && tree != &g_tree) { | |
EbiAllocation *allocation = ebiAllocHeader(tree); | |
ebi_assert(allocation->freeStatus != 2); | |
} | |
if (iter % steps == 0) { | |
// std::this_thread::sleep_for(std::chrono::milliseconds{1}); | |
} | |
ebiCheckpoint(thread); | |
if (!tree) { | |
tree = &g_tree; | |
thread.locals[0] = nullptr; | |
} | |
uint32_t op = inst & 0x7f; | |
uint32_t arg = inst >> 8; | |
if (op < 64) { | |
Tree *branch = tree->children[arg % TREE_CHILDREN]; | |
if (branch) { | |
tree = branch; | |
thread.locals[0] = tree; | |
loadGeneration = thread.localGeneration; | |
EbiAllocation *allocation = ebiAllocHeader(branch); | |
ebi_assert(allocation->freeStatus != 2); | |
} | |
} else if (op < 126) { | |
// Nop | |
} else { | |
thread.locals[0] = nullptr; | |
tree = nullptr; | |
} | |
} | |
} | |
void recursiveObserve(EbiThread &thread, xorshift64_state &rng, Tree *tree, uint32_t depth) | |
{ | |
if (depth > 64) return; | |
ebiCheckpoint(thread); | |
uint32_t loc = thread.numLocals++; | |
thread.locals[loc] = nullptr; | |
for (uint32_t i = 0; i < 70; i++) { | |
uint32_t value = (uint32_t)xorshift64(&rng); | |
uint32_t index = value % TREE_CHILDREN; | |
if (value % 10000 == 0) { | |
break; | |
std::this_thread::sleep_for(std::chrono::milliseconds{10}); | |
} | |
Tree *branch = tree->children[index]; | |
if (!branch) continue; | |
EbiAllocation *allocation = ebiAllocHeader(branch); | |
ebi_assert(allocation->freeStatus != 2); | |
thread.locals[loc] = branch; | |
recursiveObserve(thread, rng, branch, depth + 1); | |
} | |
thread.numLocals--; | |
} | |
void recursiveThread(EbiThread &thread, uint64_t seed) | |
{ | |
uint32_t steps = 1000; | |
xorshift64_state rng = { seed }; | |
for (;;) { | |
recursiveObserve(thread, rng, &g_tree, 0); | |
} | |
} | |
EbiVm g_vm; | |
#define NUM_OBSERVERS 10 | |
int main(int argc, char **argv) | |
{ | |
EbiVm *vm = &g_vm; | |
vm->generation.store(1); | |
std::vector<std::thread> observers; | |
std::thread mt1 { [=] { mutatorThread(*ebiNewThread(vm), 1); } }; | |
std::thread mt2 { [=] { mutatorThread(*ebiNewThread(vm), 2); } }; | |
std::thread ot { [=] { observerThread(*ebiNewThread(vm), 100); } }; | |
std::vector<std::thread> rts; | |
for (size_t i = 0; i < 4; i++) { | |
rts.emplace_back([=] { recursiveThread(*ebiNewThread(vm), i*1000); }); | |
} | |
std::thread ct { [=] { collectorThread(*vm); } }; | |
ct.join(); | |
return 0; | |
} | |
#endif |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment