Last active
April 5, 2022 06:25
-
-
Save Delta-in-hub/a09891ccf9b9148db9001ba48859f23e to your computer and use it in GitHub Desktop.
alloc.cc
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// #define NDEBUG | |
#include <bits/stdc++.h> | |
#define DEBUGPRINTF | |
#undef DEBUGPRINTF | |
using namespace std; | |
int cpuid = 0; | |
mutex _map_mutex; | |
#define ROUNDUP(a, sz) ((((uintptr_t)a) + (sz)-1) & ~((sz)-1)) | |
// #define ROUNDDOWN(a, sz) ((((uintptr_t)a)) & ~((sz)-1)) | |
#define MAX(x, y) ((x) > (y) ? (x) : (y)) | |
#define ThreadNums 3 | |
int cpu_count() | |
{ | |
return ThreadNums + 1; | |
} | |
unordered_map<thread::id, int> _map; | |
int cpu_current() | |
{ | |
lock_guard<mutex> lk(_map_mutex); | |
return _map[this_thread::get_id()]; | |
} | |
typedef struct mutex_t | |
{ | |
mutex t; | |
int flag; // 1 for avaiable , 0 for locked | |
} mutex_t; | |
static inline void lock_init(mutex_t *lock) | |
{ | |
lock->flag = 1; | |
assert(lock->flag == 1); | |
} | |
static inline void lock(mutex_t *lock) | |
{ | |
lock->t.lock(); | |
lock->flag = 0; | |
} | |
static inline void unlock(mutex_t *lock) | |
{ | |
lock->flag = 1; | |
lock->t.unlock(); | |
} | |
static inline bool try_lock(mutex_t *lock) | |
{ | |
if (lock->t.try_lock()) | |
{ | |
lock->flag = 0; | |
return true; | |
} | |
return false; | |
} | |
#define LOG2(X) ((unsigned)(8 * sizeof(unsigned long long) - __builtin_clzll((X)) - 1)) | |
// return the smallest number , which >= s and is a power of 2 | |
static inline size_t next_pow2(size_t x) | |
{ | |
if (sizeof(size_t) == 8) | |
return x == 1 ? 1 : (size_t)1 << (size_t)(64 - __builtin_clzl(x - 1)); | |
else | |
return x == 1 ? 1 : (size_t)1 << (size_t)(32 - __builtin_clz(x - 1)); | |
} | |
// return the largest number , which <= s and is a power of 2 | |
static inline size_t pre_pow2(size_t s) | |
{ | |
assert(s >= 1ul); | |
size_t x = next_pow2(s); | |
if (s == x) | |
return s; | |
else | |
return x >> 1; | |
} | |
#define B (1ul) | |
#define KB (1024 * B) | |
#define MB (1024 * KB) | |
#define GB (1024 * MB) | |
#define TB (1024 * GB) | |
#define LCHILD(x) (x * 2 + 1) | |
#define RCHILD(x) (x * 2 + 2) | |
#define PARENT(x) ((x - 1) / 2) | |
#define BUDDY(x) (((x - 1) ^ 1) + 1) | |
typedef struct list_t | |
{ | |
struct list_t *prev, *next; | |
} list_t; | |
#define MIN_BUDDY_ALLOC_SIZE (4 * KB) | |
// Please forgive me for poor english | |
struct buddy_allocator | |
{ | |
/* | |
* MIN_BUDDY_ALLOC_SIZE is (4 * KB) | |
* MAX_BUDDY_ALLOC_SIZE is calculated by the following formula: | |
* pre_pow2(heap-size) , the largets number , which <= heap-size and is a power of 2 | |
* and is larger than MIN_BUDDY_ALLOC_SIZE | |
*/ | |
size_t MAX_BUDDY_ALLOC_SIZE; | |
uintptr_t buddy_start; // buddy allocator start address | |
size_t buddy_size; // buddy allocator size(in bytes) | |
uintptr_t reserved_start; // reserved memory for management start address | |
size_t reserved_size; // reserved memory for management size(in bytes) | |
uintptr_t unused_start; // unused memory start address | |
size_t unused_size; // unused memory size(in bytes) | |
uint8_t bucket_num; // the number of bucket(free list) | |
list_t *buckets; // buckets[BUCKET_COUNT] , each bucket is a free list for a series of specific size block. | |
uint8_t *bitmap; // 0 for avaiable ,1 for used | |
size_t bitmap_size; | |
mutex_t _mutex; | |
/* | |
* bitmap and buckets must be mdoified together, lock both of them at the same time | |
* | |
*/ | |
}; | |
bool get_bitmap(uint8_t *base, size_t index) | |
{ | |
size_t i = index / 8; | |
size_t j = index % 8; | |
return (base[i] >> j) & 1; | |
} | |
void set_bitmap(uint8_t *base, size_t index) | |
{ | |
size_t i = index / 8; | |
size_t j = index % 8; | |
base[i] |= (uint8_t)(1 << j); | |
} | |
void unset_bitmap(uint8_t *base, size_t index) | |
{ | |
size_t i = index / 8; | |
size_t j = index % 8; | |
base[i] &= ~(uint8_t)(1 << j); | |
} | |
/* | |
* Initialize a list to empty. Because these are circular lists, an "empty" | |
* list is an entry where both links point to itself. This makes insertion | |
* and removal simpler because they don't need any branches. | |
*/ | |
static inline void list_init(list_t *list) | |
{ | |
list->prev = list; | |
list->next = list; | |
} | |
/* | |
* Append the provided entry to the end of the list. This assumes the entry | |
* isn't in a list already because it overwrites the linked list pointers. | |
*/ | |
static inline void list_push(list_t *list, list_t *entry) | |
{ | |
list_t *prev = list->prev; | |
entry->prev = list->prev; | |
entry->next = list; | |
prev->next = entry; | |
list->prev = entry; | |
} | |
/* | |
* Remove the provided entry from whichever list it's currently in. This | |
* assumes that the entry is in a list. You don't need to provide the list | |
* because the lists are circular, so the list's pointers will automatically | |
* be updated if the first or last entries are removed. | |
*/ | |
static inline void list_remove(list_t *entry) | |
{ | |
list_t *prev = entry->prev; | |
list_t *next = entry->next; | |
prev->next = next; | |
next->prev = prev; | |
} | |
static inline bool list_empty(list_t *list) | |
{ | |
if (!list || (list->next == list && list->prev == list)) | |
return true; | |
return false; | |
} | |
/* | |
* Remove and return the first entry in the list or NULL if the list is empty. | |
*/ | |
static inline list_t *list_pop(list_t *list) | |
{ | |
if (list_empty(list)) | |
return NULL; | |
list_t *back = list->prev; | |
list_remove(back); | |
return back; | |
} | |
/** | |
* @brief convert a block pointer into the index of that block in bitmap | |
* | |
* @param allocator which allocator the block belongs to | |
* @param _ptr the block pointer | |
* @param bucket which bucket the block belongs to , a bucket stands for a specific size | |
* @return size_t the index of the block in bitmap | |
*/ | |
size_t ptr_to_index(struct buddy_allocator *allocator, void *_ptr, size_t bucket) | |
{ | |
uintptr_t ptr = (uintptr_t)_ptr; | |
size_t start_index = (1 << bucket) - 1; | |
size_t piece_size = (allocator->MAX_BUDDY_ALLOC_SIZE) / (1 << bucket); | |
size_t cnt = (ptr - allocator->buddy_start) / piece_size; | |
assert(cnt < (1 << bucket)); | |
return start_index + cnt; | |
} | |
/** @brief convert a block index into the block pointer | |
* | |
* @param allocator which allocator the block belongs to | |
* @param index the index of the block in bitmap | |
* @param bucket which bucket the block belongs to , a bucket stands for a specific size | |
* @return void* the block pointer | |
*/ | |
void *index_to_ptr(struct buddy_allocator *allocator, size_t index) | |
{ | |
size_t bucket = LOG2(index + 1); | |
size_t start_index = (1 << bucket) - 1; | |
size_t piece_size = (allocator->MAX_BUDDY_ALLOC_SIZE) / (1 << bucket); | |
size_t cnt = index - start_index; | |
return (void *)(allocator->buddy_start + (cnt * piece_size)); | |
} | |
/** | |
* @brief initialize a buddy allocator, calculate the variable of buddy allocator | |
* @param allocator which allocator to be initialized | |
* @param start the start address of heap | |
* @param end the end address of heap | |
* @return bool true if success , false if failed | |
*/ | |
bool buddy_init_variable(struct buddy_allocator *allocator, uintptr_t start, uintptr_t end) | |
{ | |
size_t total_size = end - start; // total size of heap | |
// calculate the max possiable size of buddy allocator | |
// now we have not alloc memory for management(reserved). | |
// we have no place to store the bitmap and buckets | |
allocator->MAX_BUDDY_ALLOC_SIZE = pre_pow2(total_size); | |
// calculate the reserved size of buddy allocator | |
// for bitmap and buckets | |
{ | |
allocator->bucket_num = LOG2(allocator->MAX_BUDDY_ALLOC_SIZE) - LOG2(MIN_BUDDY_ALLOC_SIZE) + 1; | |
allocator->bitmap_size = MAX(1, sizeof(uint8_t) * ((1 << (allocator->bucket_num)) / 8)); | |
allocator->reserved_size = (sizeof(list_t) * (allocator->bucket_num) + sizeof(uint8_t) * (allocator->bitmap_size)); | |
allocator->reserved_start = end - allocator->reserved_size; | |
} | |
// now we have alloc memory for management(reserved). | |
// re-calculate the max size of buddy allocator | |
allocator->MAX_BUDDY_ALLOC_SIZE = pre_pow2(allocator->reserved_start - start); | |
if (allocator->MAX_BUDDY_ALLOC_SIZE < MIN_BUDDY_ALLOC_SIZE) | |
{ | |
memset(allocator, 0, sizeof(struct buddy_allocator)); | |
return false; | |
} | |
/* | |
* re-calculate the variable of buddy allocator | |
* now MAX_BUDDY_ALLOC_SIZE is less or equal than its value before (Line 252) | |
* | |
* the reserved_size enough to store bitmap and buckets for current MAX_BUDDY_ALLOC_SIZE | |
*/ | |
allocator->bucket_num = LOG2(allocator->MAX_BUDDY_ALLOC_SIZE) - LOG2(MIN_BUDDY_ALLOC_SIZE) + 1; | |
allocator->bitmap_size = MAX(1, sizeof(uint8_t) * ((1 << (allocator->bucket_num)) / 8)); | |
// the size of memory managed by buddy allocator | |
allocator->buddy_size = allocator->MAX_BUDDY_ALLOC_SIZE; | |
allocator->buddy_start = start; | |
/* | |
* unuse the memory | |
* about half size of heap size | |
* caused by we must alloc memory for bitmap and buckets for manangement | |
*/ | |
allocator->unused_start = start + allocator->MAX_BUDDY_ALLOC_SIZE; | |
allocator->unused_size = allocator->reserved_start - (allocator->buddy_start + allocator->buddy_size); | |
#ifdef DEBUGPRINTF | |
printf("\n\nTotal heap size: %zuMB\n", total_size / MB); | |
printf("Total Heap address:\t%p-%p\t%zuKB\n", start, end, (end - start) / KB); | |
printf("-------------------------------------------------------\n"); | |
printf("Buddy allocator:\t%p-%p\t%zuKB\n", allocator->buddy_start, allocator->buddy_start + allocator->buddy_size, allocator->buddy_size / KB); | |
printf("Unused Heap address:\t%p-%p\t%zuKB\n", allocator->unused_start, allocator->unused_start + allocator->unused_size, allocator->unused_size / KB); | |
printf("Reserved for Buddy:\t%p-%p\t%zuB\n", allocator->reserved_start, allocator->reserved_start + allocator->reserved_size, allocator->reserved_size); | |
printf("-------------------------------------------------------\n"); | |
printf("BUCKET_COUNT:\t%zu\n", allocator->bucket_num); | |
printf("MIN_BUDDY_ALLOC_SIZE:\t%zuKB\n", MIN_BUDDY_ALLOC_SIZE / KB); | |
printf("MAX_BUDDY_ALLOC_SIZE:\t%zuKB\n", allocator->MAX_BUDDY_ALLOC_SIZE / KB); | |
printf("-------------------------------------------------------\n"); | |
#endif | |
return true; | |
} | |
/** | |
* @brief initialize a buddy allocator | |
* @param allocator which allocator to be initialized | |
* @param start the start address of heap | |
* @param end the end address of heap | |
* @return bool true if success , false if failed | |
*/ | |
bool buddy_init(struct buddy_allocator *allocator, uintptr_t start, uintptr_t end) | |
{ | |
// just for set @buddy_start and @buddy_size to 0 | |
memset(allocator, 0, sizeof(struct buddy_allocator)); | |
#ifdef DEBUGPRINTF | |
printf("Waste for alignment: %zuB\n", ROUNDUP(start, MIN_BUDDY_ALLOC_SIZE) - start); | |
#endif | |
// round up the start address to MIN_BUDDY_ALLOC_SIZE | |
start = ROUNDUP(start, MIN_BUDDY_ALLOC_SIZE); // comment this line also works | |
if (end < start + MIN_BUDDY_ALLOC_SIZE) | |
return false; | |
bool flag = buddy_init_variable(allocator, start, end); | |
if (!flag) | |
return false; | |
// initialize the pointer of bitmap and buckets | |
allocator->buckets = (list_t *)(allocator->reserved_start); | |
allocator->bitmap = (uint8_t *)(allocator->reserved_start + sizeof(list_t) * allocator->bucket_num); | |
// initialize the buckets(each of them is a list) | |
for (int i = 0; i < allocator->bucket_num; i++) | |
{ | |
list_init(&(allocator->buckets[i])); | |
} | |
memset(allocator->bitmap, 0, allocator->bitmap_size); | |
// after initialization , buckets[0] has a block of size MAX_BUDDY_ALLOC_SIZE | |
// and other buckets is empty | |
list_push(&(allocator->buckets[0]), (list_t *)(allocator->buddy_start)); | |
return true; | |
} | |
/** | |
* @brief use a buddy allocator to allocate memory | |
* | |
* @param allocator which allocator to be used | |
* @param size the size of memory to be allocated , must be a power of 2 , and less than MAX_BUDDY_ALLOC_SIZE, and greater or equal than MIN_BUDDY_ALLOC_SIZE | |
* @return void* the start address of allocated memory | |
*/ | |
void *buddy_alloc(struct buddy_allocator *allocator, size_t size) | |
{ | |
if (size == 0 || size > allocator->MAX_BUDDY_ALLOC_SIZE || size < MIN_BUDDY_ALLOC_SIZE || size != next_pow2(size)) | |
return NULL; | |
// which bucket stands for the size of memory to be allocated | |
ssize_t target_bucket = allocator->bucket_num - 1 - LOG2(size / MIN_BUDDY_ALLOC_SIZE); | |
// each bucket is a list to store the free blocks | |
list_t *l = &((allocator->buckets)[target_bucket]); | |
// A BIG LOCK SAVES THE PEACE | |
lock(&(allocator->_mutex)); | |
list_t *ret = list_pop(l); | |
if (!ret) | |
{ | |
/* | |
* target bucket is empty | |
* we need to split its nearest avaiable block greater than current size | |
* to get a block of size we want | |
*/ | |
ssize_t cur_bucket = target_bucket - 1; | |
while (cur_bucket > -1) | |
{ | |
ret = list_pop(&((allocator->buckets)[cur_bucket])); | |
if (!ret) | |
{ | |
// bucket is empty either | |
// continue to search the next bucket | |
cur_bucket--; | |
continue; | |
} | |
/* | |
* now we get proper block(cur_bucket) | |
* we need to split it till we get a block of size we want | |
*/ | |
size_t cur_index; | |
// till cur_bucket = target_bucket , we got the block we want | |
while (cur_bucket != target_bucket) | |
{ | |
// get the index of the current block | |
cur_index = ptr_to_index(allocator, ret, cur_bucket); | |
assert(get_bitmap(allocator->bitmap, cur_index) == false); | |
// set it to not-avaiable in bitmap | |
set_bitmap(allocator->bitmap, cur_index); | |
assert(get_bitmap(allocator->bitmap, cur_index) == true); | |
assert(cur_bucket + 1 < allocator->bucket_num && cur_bucket + 1 >= 0); | |
/* | |
* split current block to two blocks | |
* the right block is pushed to free list(buckets[cur_bucket+1]) | |
* the left one has been recursively splited | |
*/ | |
list_push(&((allocator->buckets)[cur_bucket + 1]), (list_t *)(index_to_ptr(allocator, RCHILD(cur_index)))); | |
// the left one | |
ret = (list_t *)(index_to_ptr(allocator, LCHILD(cur_index))); | |
cur_bucket++; | |
} | |
// now we get the block we want | |
cur_index = ptr_to_index(allocator, ret, cur_bucket); | |
assert(get_bitmap(allocator->bitmap, cur_index) == false); | |
set_bitmap(allocator->bitmap, cur_index); | |
assert(get_bitmap(allocator->bitmap, cur_index) == true); | |
assert(get_bitmap(allocator->bitmap, PARENT(cur_index)) == true); | |
break; | |
} | |
} | |
else | |
{ | |
/* | |
* target bucket is not empty | |
* ret is the block we want | |
*/ | |
size_t cur_index = ptr_to_index(allocator, ret, target_bucket); | |
assert(get_bitmap(allocator->bitmap, cur_index) == false); | |
set_bitmap(allocator->bitmap, cur_index); | |
assert(get_bitmap(allocator->bitmap, cur_index) == true); | |
} | |
unlock(&(allocator->_mutex)); | |
return (void *)ret; | |
} | |
/** | |
* @brief use a buddy allocator to free memory | |
* | |
* @param allocator which allocator to be used | |
* @param _ptr the start address of memory to be freed | |
* @param size the size of memory to be freed | |
*/ | |
void buddy_free(struct buddy_allocator *allocator, void *_ptr, size_t size) | |
{ | |
if (!_ptr) | |
return; | |
if (size == 0 || size > allocator->MAX_BUDDY_ALLOC_SIZE || size < MIN_BUDDY_ALLOC_SIZE || size != next_pow2(size)) | |
{ | |
assert(0); | |
return; | |
} | |
size_t cur_bucket = allocator->bucket_num - 1 - LOG2(size / MIN_BUDDY_ALLOC_SIZE); | |
size_t index = ptr_to_index(allocator, _ptr, cur_bucket); | |
lock(&(allocator->_mutex)); | |
assert(get_bitmap(allocator->bitmap, index) == true); | |
// set it to avaiable in bitmap | |
unset_bitmap(allocator->bitmap, index); | |
/* | |
* if a block and its buddy are both avaiable, merge them | |
* recursively merge until reached the root (index = 0) or the block is not avaiable | |
*/ | |
while (index && get_bitmap(allocator->bitmap, BUDDY(index)) == false) | |
{ | |
// if (index == 0 || get_bitmap(allocator->bitmap, BUDDY(index))) | |
// break; | |
list_t *pptr = (list_t *)index_to_ptr(allocator, BUDDY(index)); | |
assert(get_bitmap(allocator->bitmap, BUDDY(index)) == false); | |
// remove the buddy block from the free list | |
list_remove(pptr); | |
assert(get_bitmap(allocator->bitmap, PARENT(index)) == true); | |
// now we merge the block and its buddy | |
// so the parent block is avaiable | |
unset_bitmap(allocator->bitmap, PARENT(index)); | |
index = PARENT(index); | |
} | |
assert(get_bitmap(allocator->bitmap, index) == false); | |
cur_bucket = LOG2(index + 1); | |
assert(cur_bucket < allocator->bucket_num && cur_bucket >= 0); | |
list_t *p1 = (list_t *)index_to_ptr(allocator, index); | |
// p1 is the merged block,push it to the free list | |
list_push(&((allocator->buckets)[cur_bucket]), p1); | |
unlock(&(allocator->_mutex)); | |
} | |
/** | |
* @brief check the _ptr is in the range of a allocator's memory | |
* | |
* @param allocator which allocator to be used | |
* @param _ptr the start address of memory to be checked | |
* @return true if _ptr is in the range of a allocator's memory | |
*/ | |
bool is_in_buddy_allocator_range(struct buddy_allocator *allocator, void *_ptr) | |
{ | |
uintptr_t ptr = (uintptr_t)_ptr; | |
return ptr >= allocator->buddy_start && ptr < allocator->buddy_start + allocator->buddy_size; | |
} | |
/* | |
* About half of the the buddy allocator's memory is not used. | |
* The unused memory could be used to construct a new buddy allocator. | |
* recursively do this until the unused memory is less than MIN_BUDDY_ALLOC_SIZE | |
* | |
* the larger ,the unused memory is smaller. | |
* about 1/(2^n) of the total memory is not used. | |
* n is MAX_BUDDY_ALLOCATOR_NUMS | |
* MAX_BUDDY_ALLOCATOR_NUMS must less than UINT16_MAX | |
*/ | |
#define MAX_BUDDY_ALLOCATOR_NUMS (20) | |
struct buddy_allocator allocators[MAX_BUDDY_ALLOCATOR_NUMS]; | |
uint8_t allocator_inited_num = 0; | |
void buddy_init_for_heap(uintptr_t start, uintptr_t end) | |
{ | |
if (buddy_init(&allocators[0], start, end)) | |
allocator_inited_num++; | |
// initlize buddy allocator | |
for (int i = 1; i < MAX_BUDDY_ALLOCATOR_NUMS; i++) | |
{ | |
// use pre allocator's unused space to construct new allocator | |
if (buddy_init(&allocators[i], allocators[i - 1].unused_start, allocators[i - 1].unused_start + allocators[i - 1].unused_size)) | |
allocator_inited_num++; | |
else | |
break; | |
} | |
#ifdef DEBUGPRINTF | |
printf("allocator_inited_num : %d\n", allocator_inited_num); | |
#endif | |
} | |
/** | |
* @brief Get a block from buddy allocator ! CONCURRENCY SAFE ! | |
* | |
* @param size the size of memory to be allocated, must be a power of 2 and greater or equal than MIN_BUDDY_ALLOC_SIZE, and less than MAX_BUDDY_ALLOC_SIZE | |
* @return void* the start address of the allocated memory | |
*/ | |
void *get_block_from_buddy(size_t size) | |
{ | |
void *ret = NULL; | |
// start from smaller buddy allocator | |
for (int i = allocator_inited_num - 1; i >= 0; i--) | |
{ | |
ret = buddy_alloc(&allocators[i], size); | |
if (ret) | |
break; | |
} | |
return ret; | |
} | |
/** @brief free a block to buddy allocator ! CONCURRENCY SAFE ! | |
* | |
* @param _ptr the start address of memory to be freed | |
* @param size the size of memory to be freed | |
*/ | |
void free_block_to_buddy(void *_ptr, size_t size) | |
{ | |
for (int i = allocator_inited_num - 1; i >= 0; i--) | |
{ | |
if (is_in_buddy_allocator_range(&allocators[i], _ptr)) | |
{ | |
buddy_free(&allocators[i], _ptr, size); | |
return; | |
} | |
} | |
// NEVER REACH HERE | |
assert(0); | |
} | |
/* | |
* This slab allocator is not a real slab allocator. | |
* Just learn some basic idea about slab allocator. | |
* Like per cpu cache for specific size of memory. | |
*/ | |
/* | |
* each slab_allocator is designed to hold a fixed size of memory. | |
* each cpu has several slab_allocator,which size is range of [MIN_SLAB_ALLOC_SIZE,MAX_SLAB_ALLOC_SIZE] | |
* | |
* slab_allocator has three lists , @ free / partial / full list. | |
* each @list links to some @slab_header. | |
* each @slab_header links to some @slab_block_header. | |
* each @slab_block_header stands for a block of memory. | |
*/ | |
struct slab_block_header | |
{ | |
list_t to_block; | |
size_t cnt; | |
}; | |
struct slab_header | |
{ | |
list_t to_slab; // link to free/full/partial _slab | |
list_t to_block; // link to blocks | |
}; | |
#define MAX_SLAB_BLOCK_SIZE (32 * KB) | |
#define MIN_SLAB_BLOCK_SIZE ((2 * sizeof(uintptr_t)) * 2) // Must >= 3 * sizeof(uintptr_t) | |
// per slab_allocator alloca fixed size memory | |
// per slab_allocator has 2*MIN_BUDDY_ALLOC_SIZE memory for initilization | |
struct slab_allocator | |
{ | |
size_t blocks_per_slab; | |
size_t slab_block_size; // range from [MIN_SLAB_ALLOC_SIZE, MAX_SLAB_ALLOC_SIZE] | |
list_t free_slab; // totally not used | |
list_t full_slab; // totally used | |
list_t partial_slab; // partially used | |
}; | |
size_t get_requset_size_slab(struct slab_allocator *allocator) | |
{ | |
size_t request_size = allocator->slab_block_size + sizeof(struct slab_header); | |
request_size = MAX(next_pow2(request_size), MIN_BUDDY_ALLOC_SIZE); | |
size_t cnt2 = (request_size - sizeof(struct slab_header)) / allocator->slab_block_size; | |
int waste_percent = (allocator->slab_block_size - sizeof(struct slab_header)) * 100 / | |
(cnt2 * allocator->slab_block_size); | |
if (waste_percent > 13) | |
{ | |
request_size = allocator->slab_block_size * 8; | |
request_size += sizeof(struct slab_header); | |
request_size = MAX(next_pow2(request_size), MIN_BUDDY_ALLOC_SIZE); | |
cnt2 = (request_size - sizeof(struct slab_header)) / allocator->slab_block_size; | |
waste_percent = (allocator->slab_block_size - sizeof(struct slab_header)) * 100 / | |
(cnt2 * allocator->slab_block_size); | |
assert(waste_percent <= 13); | |
/* | |
waste_percent: 0% | |
waste_percent: 2% | |
waste_percent: 5% | |
waste_percent: 6% | |
waste_percent: 13% | |
*/ | |
} | |
#ifdef DEBUGPRINTF | |
printf("waste_percent: %d%%\n", waste_percent); | |
#endif | |
return request_size; | |
} | |
/** | |
* @brief from buddy allocator get a block, contruct it to a slab_header and some slab block ,and add it to slab allocator | |
*/ | |
bool add_slab_to_allocator(struct slab_allocator *allocator) | |
{ | |
size_t request_size = get_requset_size_slab(allocator); | |
void *ptr = get_block_from_buddy(request_size); | |
if (!ptr) | |
{ | |
return false; | |
} | |
struct slab_header *header = (struct slab_header *)ptr; | |
list_init(&(header->to_slab)); | |
list_init(&(header->to_block)); | |
list_push(&(allocator->free_slab), (list_t *)(header)); | |
/* | |
* got a block from buddy allocator | |
* / block / | |
* split it into | |
* / slab_header / slab_block_header / slab_block_header / slab_block_header / ...../ | |
* | |
*/ | |
uintptr_t start = (uintptr_t)ptr + sizeof(struct slab_header); | |
size_t cnt = 0; | |
while (start + allocator->slab_block_size <= (uintptr_t)ptr + request_size) | |
{ | |
// sizeof(struct slab_block_header); | |
assert(allocator->slab_block_size >= sizeof(struct slab_block_header)); | |
struct slab_block_header *kh = (struct slab_block_header *)(start); | |
// slab_block_header position in block , used in slab_free | |
kh->cnt = cnt; | |
list_push(&(header->to_block), (&(kh->to_block))); | |
start += allocator->slab_block_size; | |
cnt++; | |
} | |
// cause we will put the @cnt into kalloc_header , and use a uint16_t to store it | |
assert(cnt <= UINT16_MAX); | |
allocator->blocks_per_slab = cnt; | |
#ifdef DEBUGPRINTF | |
printf("waste_percent: %zd%%\n", waste_percent); | |
size_t waste_size = (uintptr_t)ptr + request_size - start; | |
printf("Requset: %zuKB \tWaste size: %zu, alloc %zu * %zuB\n", request_size / KB, waste_size, cnt, allocator->slab_alloc_size); | |
#endif | |
return true; | |
} | |
/** | |
* @brief init a slab allocator | |
* | |
* @param allocator the slab allocator to be init | |
* @param size the size of memory to be allocated, must be a power of 2 and greater or equal than MIN_SLAB_ALLOC_SIZE, and less than MAX_SLAB_ALLOC_SIZE | |
*/ | |
bool slab_allocator_init(struct slab_allocator *allocator, size_t size) | |
{ | |
if (size < MIN_SLAB_BLOCK_SIZE || size > MAX_SLAB_BLOCK_SIZE || size != next_pow2(size)) | |
return false; | |
allocator->slab_block_size = size; | |
list_init(&(allocator->free_slab)); | |
list_init(&(allocator->full_slab)); | |
list_init(&(allocator->partial_slab)); | |
bool flag = add_slab_to_allocator(allocator); | |
int cnt = MAX_SLAB_BLOCK_SIZE / allocator->slab_block_size; | |
cnt -= 1; | |
while (cnt > 0) | |
{ | |
add_slab_to_allocator(allocator); | |
cnt--; | |
} | |
return flag; | |
} | |
/** @brief init a cpu's slab allocator | |
*/ | |
void slab_init(struct slab_allocator *allocators) | |
{ | |
// the number of slab allocator per cpu has | |
size_t len = LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1; | |
size_t size = MIN_SLAB_BLOCK_SIZE; | |
for (size_t i = 0; i < len; i++, size *= 2) | |
{ | |
if (!slab_allocator_init(&allocators[i], size)) | |
{ | |
allocators[i].slab_block_size = 0; | |
assert(0); // no enough space to init slab allocator for per cpu | |
} | |
} | |
} | |
// the start memory address of slab_allocator array | |
// this array contains all cpus' slab_allocator | |
// a cpu's slab_allocator is continuous memory | |
uintptr_t slab_allocators_for_all_cpu; | |
// for all of cpus ,initlize slab_allocator | |
/** @brief initlize all of cpus' slab allocator | |
*/ | |
void slab_init_for_all_cpu() | |
{ | |
int cpu_num = cpu_count(); | |
size_t len = LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1; | |
// get a block from buddy allocator to store slab_allocator array | |
size_t size = MAX(MIN_BUDDY_ALLOC_SIZE, sizeof(struct slab_allocator) * len * cpu_num + sizeof(uint16_t) * cpu_num); | |
size = next_pow2(size); | |
slab_allocators_for_all_cpu = (uintptr_t)get_block_from_buddy(size); | |
if (!slab_allocators_for_all_cpu) | |
{ | |
assert(0); | |
return; | |
} | |
memset((void *)slab_allocators_for_all_cpu, 0, size); | |
#ifdef DEBUGPRINTF | |
size_t wasted_size = size - sizeof(struct slab_allocator) * cpu_num * len - sizeof(uint16_t) * cpu_num; | |
printf("slab_init_for_all_cpu Waste size: %zuB\n", wasted_size); | |
#endif | |
size_t offset = 0; | |
for (int i = 0; i < cpu_num; i++) | |
{ | |
// initlize slab_allocator for cpu(i) | |
slab_init((struct slab_allocator *)(slab_allocators_for_all_cpu + offset)); | |
offset += sizeof(struct slab_allocator) * len; | |
} | |
} | |
void slab_shrink() | |
{ | |
int cur_cpu = cpu_current(); | |
size_t len = LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1; | |
uintptr_t cur_slab_allocator = slab_allocators_for_all_cpu + sizeof(struct slab_allocator) * len * cur_cpu; | |
size_t index = 0; | |
for (size_t index = 0; index < len; index++) | |
{ | |
struct slab_allocator *allocator = (struct slab_allocator *)(cur_slab_allocator + sizeof(struct slab_allocator) * index); | |
list_t *cur = allocator->partial_slab.next; | |
while (cur != &(allocator->partial_slab)) | |
{ | |
list_t *next = cur->next; | |
list_t *block_cur = ((struct slab_header *)cur)->to_block.next; | |
size_t cnt = 0; | |
while (block_cur != &(((struct slab_header *)cur)->to_block)) | |
{ | |
list_t *block_next = block_cur->next; | |
cnt++; | |
block_cur = block_next; | |
} | |
assert(cnt <= allocator->blocks_per_slab); | |
if (cnt == allocator->blocks_per_slab) | |
{ | |
list_remove(cur); | |
list_push(&(allocator->free_slab), cur); | |
} | |
cur = next; | |
} | |
cur = allocator->free_slab.next; | |
while (cur != &(allocator->free_slab)) | |
{ | |
list_t *next = cur->next; | |
if (next == &(allocator->free_slab)) | |
break; | |
list_remove(cur); | |
free_block_to_buddy(cur, get_requset_size_slab(allocator)); | |
cur = next; | |
} | |
} | |
} | |
/** | |
* @brief get some memroy from slab allocator | |
* | |
* @param size the size of memory to be allocated, must be a power of 2 and greater or equal than MIN_SLAB_ALLOC_SIZE, and less than MAX_SLAB_ALLOC_SIZE | |
* @return the pointer to the start memory address , if failed , return NULL | |
*/ | |
void *slab_alloc(size_t size) | |
{ | |
if (size < MIN_SLAB_BLOCK_SIZE || size > MAX_SLAB_BLOCK_SIZE || size != next_pow2(size)) | |
return NULL; | |
// per cpu will use its own slab_allocator | |
int cur_cpu = cpu_current(); | |
uintptr_t cur_slab_allocator = slab_allocators_for_all_cpu + sizeof(struct slab_allocator) * (LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1) * cur_cpu; | |
// cur_slab_allocator is the start address of the slab_allocator of current cpu | |
size_t index = LOG2(size) - LOG2(MIN_SLAB_BLOCK_SIZE); | |
// get the slab_allocator of target size | |
struct slab_allocator *alloc = (struct slab_allocator *)(cur_slab_allocator + sizeof(struct slab_allocator) * index); | |
if (alloc->slab_block_size != size) | |
return NULL; // should not happen | |
void *ret = NULL; | |
if (!list_empty(&(alloc->partial_slab))) | |
{ | |
list_t *node = list_pop(&(alloc->partial_slab)); | |
struct slab_header *header = (struct slab_header *)node; | |
ret = list_pop(&(header->to_block)); | |
if (list_empty(&(header->to_block))) | |
{ | |
list_push(&(alloc->full_slab), node); | |
} | |
else | |
{ | |
list_push(&(alloc->partial_slab), node); | |
} | |
} | |
else if (!list_empty(&(alloc->free_slab))) | |
{ | |
list_t *node = list_pop(&(alloc->free_slab)); | |
// sizeof(struct slab_block_header); | |
struct slab_header *header = (struct slab_header *)node; | |
ret = list_pop(&(header->to_block)); | |
if (list_empty(&(header->to_block))) | |
{ | |
list_push(&(alloc->full_slab), node); | |
} | |
else | |
{ | |
list_push(&(alloc->partial_slab), node); | |
} | |
} | |
else | |
{ | |
// if free/partial_slab is empty, then alloc a new slab to free slab list | |
// that's say get a block from buddy allocator for current cpu to use | |
if (!add_slab_to_allocator(alloc)) | |
return NULL; | |
// May be at some time, we should shrink the slab allocator, giving back the block to buddy allocator | |
// otherwise, there will be some cpu get little memory, and then the other cpu will get a lot of memory. | |
return slab_alloc(size); | |
} | |
return ret; | |
} | |
// used for kalloc_header | |
size_t MAGIC; | |
struct kalloc_header | |
{ | |
size_t size; | |
union | |
{ | |
uint16_t cnt; // only used for slab allocator | |
size_t canary; // for error detection | |
}; | |
}; | |
void slab_free(void *_ptr, size_t size) | |
{ | |
if (!_ptr || size < MIN_SLAB_BLOCK_SIZE || size > MAX_SLAB_BLOCK_SIZE || size != next_pow2(size)) | |
return; | |
int cur_cpu = cpu_current(); | |
uintptr_t cur_slab_allocator = slab_allocators_for_all_cpu + sizeof(struct slab_allocator) * (LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1) * cur_cpu; | |
size_t index = LOG2(size) - LOG2(MIN_SLAB_BLOCK_SIZE); | |
struct slab_allocator *alloc = (struct slab_allocator *)(cur_slab_allocator + sizeof(struct slab_allocator) * index); | |
if (alloc->slab_block_size != size) | |
return; | |
size_t request_size = size + sizeof(struct slab_header); | |
request_size = MAX(next_pow2(request_size), MIN_BUDDY_ALLOC_SIZE); | |
struct kalloc_header *kh = (struct kalloc_header *)(_ptr); | |
size_t cnt = kh->cnt; | |
/* | |
* / slab_header / slab_block_header / slab_block_header / .... | |
* | | | |
*target _ptr | |
*/ | |
void *target = (void *)((uintptr_t)(_ptr) - sizeof(struct slab_header) - cnt * size); | |
struct slab_header *header = (struct slab_header *)(target); | |
struct slab_block_header *sbh = (struct slab_block_header *)(_ptr); | |
list_push(&(header->to_block), &(sbh->to_block)); | |
sbh->cnt = cnt; // <--- CAUTION: this is important | |
list_remove(&(header->to_slab)); // equal to list_remove(header); | |
/* | |
* Maybe to free_slab or partial_slab | |
* Just for simplity, don't want to traverse the whole list | |
*/ | |
list_push(&(alloc->partial_slab), &(header->to_slab)); | |
int cpu_num = cpu_count(); | |
size_t len = LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1; | |
uint16_t *free_cnt = (uint16_t *)(slab_allocators_for_all_cpu + sizeof(struct slab_allocator) * len * cpu_num + sizeof(uint16_t) * cur_cpu); | |
(*free_cnt)++; | |
if (*free_cnt == UINT16_MAX) | |
{ | |
slab_shrink(); | |
#ifdef DEBUGPRINTF | |
printf("CPU %d: slab_shrink\n", cur_cpu); | |
#endif | |
} | |
} | |
/* | |
* @brief initlize the kallocator | |
* Include the slab allocator and the buddy allocator | |
* Done by the boot cpu only | |
*/ | |
static void kalloc_init(uintptr_t start, uintptr_t end) | |
{ | |
srand(time(NULL)); | |
MAGIC = ((size_t)(rand()) << 32) + rand(); | |
buddy_init_for_heap(start, end); | |
slab_init_for_all_cpu(); | |
} | |
/** | |
* @brief make canary for error detection | |
*/ | |
size_t make_canary(size_t canary, uint16_t cnt) | |
{ | |
/* | |
* |31/63............. 15.....0| bits | |
* | canary | cnt | | |
*/ | |
size_t ret = cnt; | |
size_t mask = (SIZE_MAX) << (sizeof(cnt) * 8); | |
mask &= canary; | |
return ret | mask; | |
} | |
bool check_canary(size_t num, size_t canary, uint16_t cnt) | |
{ | |
size_t ret = cnt; | |
size_t mask = (SIZE_MAX) << (sizeof(cnt) * 8); | |
mask &= canary; | |
return (ret | mask) == num; | |
} | |
/** | |
* @brief alloc memory of size | |
* | |
* @param _size the size of memory to be allocated | |
* @return void* the start address of the allocated memory , NULL if failed | |
*/ | |
static void * | |
kalloc(size_t _size) | |
{ | |
sizeof(struct kalloc_header); | |
if (_size == 0) | |
return NULL; | |
void *ret = NULL; | |
size_t size = MAX(next_pow2(_size + sizeof(struct kalloc_header)), MIN_SLAB_BLOCK_SIZE); | |
// if small size, then use slab allocator , each cpu has its own slab allocators | |
if (size >= MIN_SLAB_BLOCK_SIZE && size <= MAX_SLAB_BLOCK_SIZE) | |
{ | |
// slab alloc | |
void *receive = slab_alloc(size); // bug for size==32768 | |
if (!receive) | |
return NULL; | |
struct slab_block_header *block_header = (struct slab_block_header *)receive; | |
uint16_t cnt = block_header->cnt; | |
struct kalloc_header *ptr = (struct kalloc_header *)(receive); | |
ptr->size = size; | |
ptr->canary = make_canary(MAGIC ^ size, cnt); | |
ret = ptr + 1; | |
} | |
else // otherwise use buddy allocator , all cpu share one buddy allocator | |
{ | |
struct kalloc_header *ptr = (struct kalloc_header *)get_block_from_buddy(size); | |
if (!ptr) | |
return NULL; | |
ptr->size = size; | |
ptr->canary = make_canary(MAGIC ^ size, (uint16_t)(rand())); // cnt is useless in buddy allocator | |
ret = ptr + 1; | |
} | |
return ret; | |
} | |
/** | |
* @brief free memory allocated by kalloc | |
* | |
* @param ptr the start address of the memory to be freed | |
*/ | |
static void kfree(void *ptr) | |
{ | |
if (!ptr) | |
return; | |
struct kalloc_header *header = (struct kalloc_header *)(ptr)-1; | |
if (!check_canary(header->canary, MAGIC ^ header->size, header->cnt) || header->size < MIN_SLAB_BLOCK_SIZE) | |
{ | |
assert(0); | |
return; | |
} | |
if (header->size >= MIN_SLAB_BLOCK_SIZE && header->size <= MAX_SLAB_BLOCK_SIZE) | |
{ | |
slab_free(header, header->size); | |
} | |
else | |
{ | |
free_block_to_buddy(header, header->size); | |
} | |
} | |
signed main(int argc, char **argv) | |
{ | |
setbuf(stdout, 0); | |
bool use_buddy = true; | |
if (argc >= 2) | |
{ | |
use_buddy = atoi(argv[1]); | |
} | |
if (use_buddy) | |
printf("use buddy\n"); | |
else | |
printf("use malloc\n"); | |
const size_t size = 125 * MB; | |
uintptr_t ptr = (uintptr_t)malloc(size); | |
clock_t s = clock(); | |
kalloc_init(ptr, ptr + size); | |
printf("kalloc_init: %ld clocks\n", (clock() - s)); | |
auto test = [&]() | |
{ | |
_map_mutex.lock(); | |
_map[this_thread::get_id()] = cpuid++; | |
printf("thread %d\n", _map[this_thread::get_id()]); | |
_map_mutex.unlock(); | |
clock_t start_time = clock(); | |
int cnt = 1; | |
size_t total_size = 0; | |
vector<void *> parr; | |
#define BASE_SIZE (32 * B) | |
for (int i = 0; i < 5e3; i++) | |
{ | |
void *p; | |
if (use_buddy) | |
p = kalloc(BASE_SIZE); | |
else | |
p = malloc(BASE_SIZE); | |
if (p) | |
{ | |
total_size += BASE_SIZE; | |
cnt++; | |
// printf("%d thread : %zuB\n", _map[this_thread::get_id()], BASE_SIZE * (i + 1)); | |
memset(p, 0xff, BASE_SIZE); | |
parr.push_back(p); | |
// kfree(p); | |
} | |
} | |
for (auto &&i : parr) | |
{ | |
if (use_buddy) | |
kfree(i); | |
else | |
free(i); | |
} | |
printf("%d thread :use %ld clocks us alloc %d times , total %zuKB \n", _map[this_thread::get_id()], clock() - start_time, cnt, total_size / KB); | |
}; | |
vector<thread> threads; | |
for (int i = 0; i < ThreadNums; i++) | |
{ | |
thread t(test); | |
threads.push_back(move(t)); | |
} | |
test(); | |
for (auto &&i : threads) | |
{ | |
i.join(); | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment