Skip to content

Instantly share code, notes, and snippets.

@Delta-in-hub
Last active April 5, 2022 06:25
Show Gist options
  • Save Delta-in-hub/a09891ccf9b9148db9001ba48859f23e to your computer and use it in GitHub Desktop.
Save Delta-in-hub/a09891ccf9b9148db9001ba48859f23e to your computer and use it in GitHub Desktop.
alloc.cc
// #define NDEBUG
#include <bits/stdc++.h>
#define DEBUGPRINTF
#undef DEBUGPRINTF
using namespace std;
int cpuid = 0;
mutex _map_mutex;
#define ROUNDUP(a, sz) ((((uintptr_t)a) + (sz)-1) & ~((sz)-1))
// #define ROUNDDOWN(a, sz) ((((uintptr_t)a)) & ~((sz)-1))
#define MAX(x, y) ((x) > (y) ? (x) : (y))
#define ThreadNums 3
int cpu_count()
{
return ThreadNums + 1;
}
unordered_map<thread::id, int> _map;
int cpu_current()
{
lock_guard<mutex> lk(_map_mutex);
return _map[this_thread::get_id()];
}
typedef struct mutex_t
{
mutex t;
int flag; // 1 for avaiable , 0 for locked
} mutex_t;
static inline void lock_init(mutex_t *lock)
{
lock->flag = 1;
assert(lock->flag == 1);
}
static inline void lock(mutex_t *lock)
{
lock->t.lock();
lock->flag = 0;
}
static inline void unlock(mutex_t *lock)
{
lock->flag = 1;
lock->t.unlock();
}
static inline bool try_lock(mutex_t *lock)
{
if (lock->t.try_lock())
{
lock->flag = 0;
return true;
}
return false;
}
#define LOG2(X) ((unsigned)(8 * sizeof(unsigned long long) - __builtin_clzll((X)) - 1))
// return the smallest number , which >= s and is a power of 2
static inline size_t next_pow2(size_t x)
{
if (sizeof(size_t) == 8)
return x == 1 ? 1 : (size_t)1 << (size_t)(64 - __builtin_clzl(x - 1));
else
return x == 1 ? 1 : (size_t)1 << (size_t)(32 - __builtin_clz(x - 1));
}
// return the largest number , which <= s and is a power of 2
static inline size_t pre_pow2(size_t s)
{
assert(s >= 1ul);
size_t x = next_pow2(s);
if (s == x)
return s;
else
return x >> 1;
}
#define B (1ul)
#define KB (1024 * B)
#define MB (1024 * KB)
#define GB (1024 * MB)
#define TB (1024 * GB)
#define LCHILD(x) (x * 2 + 1)
#define RCHILD(x) (x * 2 + 2)
#define PARENT(x) ((x - 1) / 2)
#define BUDDY(x) (((x - 1) ^ 1) + 1)
typedef struct list_t
{
struct list_t *prev, *next;
} list_t;
#define MIN_BUDDY_ALLOC_SIZE (4 * KB)
// Please forgive me for poor english
struct buddy_allocator
{
/*
* MIN_BUDDY_ALLOC_SIZE is (4 * KB)
* MAX_BUDDY_ALLOC_SIZE is calculated by the following formula:
* pre_pow2(heap-size) , the largets number , which <= heap-size and is a power of 2
* and is larger than MIN_BUDDY_ALLOC_SIZE
*/
size_t MAX_BUDDY_ALLOC_SIZE;
uintptr_t buddy_start; // buddy allocator start address
size_t buddy_size; // buddy allocator size(in bytes)
uintptr_t reserved_start; // reserved memory for management start address
size_t reserved_size; // reserved memory for management size(in bytes)
uintptr_t unused_start; // unused memory start address
size_t unused_size; // unused memory size(in bytes)
uint8_t bucket_num; // the number of bucket(free list)
list_t *buckets; // buckets[BUCKET_COUNT] , each bucket is a free list for a series of specific size block.
uint8_t *bitmap; // 0 for avaiable ,1 for used
size_t bitmap_size;
mutex_t _mutex;
/*
* bitmap and buckets must be mdoified together, lock both of them at the same time
*
*/
};
bool get_bitmap(uint8_t *base, size_t index)
{
size_t i = index / 8;
size_t j = index % 8;
return (base[i] >> j) & 1;
}
void set_bitmap(uint8_t *base, size_t index)
{
size_t i = index / 8;
size_t j = index % 8;
base[i] |= (uint8_t)(1 << j);
}
void unset_bitmap(uint8_t *base, size_t index)
{
size_t i = index / 8;
size_t j = index % 8;
base[i] &= ~(uint8_t)(1 << j);
}
/*
* Initialize a list to empty. Because these are circular lists, an "empty"
* list is an entry where both links point to itself. This makes insertion
* and removal simpler because they don't need any branches.
*/
static inline void list_init(list_t *list)
{
list->prev = list;
list->next = list;
}
/*
* Append the provided entry to the end of the list. This assumes the entry
* isn't in a list already because it overwrites the linked list pointers.
*/
static inline void list_push(list_t *list, list_t *entry)
{
list_t *prev = list->prev;
entry->prev = list->prev;
entry->next = list;
prev->next = entry;
list->prev = entry;
}
/*
* Remove the provided entry from whichever list it's currently in. This
* assumes that the entry is in a list. You don't need to provide the list
* because the lists are circular, so the list's pointers will automatically
* be updated if the first or last entries are removed.
*/
static inline void list_remove(list_t *entry)
{
list_t *prev = entry->prev;
list_t *next = entry->next;
prev->next = next;
next->prev = prev;
}
static inline bool list_empty(list_t *list)
{
if (!list || (list->next == list && list->prev == list))
return true;
return false;
}
/*
* Remove and return the first entry in the list or NULL if the list is empty.
*/
static inline list_t *list_pop(list_t *list)
{
if (list_empty(list))
return NULL;
list_t *back = list->prev;
list_remove(back);
return back;
}
/**
* @brief convert a block pointer into the index of that block in bitmap
*
* @param allocator which allocator the block belongs to
* @param _ptr the block pointer
* @param bucket which bucket the block belongs to , a bucket stands for a specific size
* @return size_t the index of the block in bitmap
*/
size_t ptr_to_index(struct buddy_allocator *allocator, void *_ptr, size_t bucket)
{
uintptr_t ptr = (uintptr_t)_ptr;
size_t start_index = (1 << bucket) - 1;
size_t piece_size = (allocator->MAX_BUDDY_ALLOC_SIZE) / (1 << bucket);
size_t cnt = (ptr - allocator->buddy_start) / piece_size;
assert(cnt < (1 << bucket));
return start_index + cnt;
}
/** @brief convert a block index into the block pointer
*
* @param allocator which allocator the block belongs to
* @param index the index of the block in bitmap
* @param bucket which bucket the block belongs to , a bucket stands for a specific size
* @return void* the block pointer
*/
void *index_to_ptr(struct buddy_allocator *allocator, size_t index)
{
size_t bucket = LOG2(index + 1);
size_t start_index = (1 << bucket) - 1;
size_t piece_size = (allocator->MAX_BUDDY_ALLOC_SIZE) / (1 << bucket);
size_t cnt = index - start_index;
return (void *)(allocator->buddy_start + (cnt * piece_size));
}
/**
* @brief initialize a buddy allocator, calculate the variable of buddy allocator
* @param allocator which allocator to be initialized
* @param start the start address of heap
* @param end the end address of heap
* @return bool true if success , false if failed
*/
bool buddy_init_variable(struct buddy_allocator *allocator, uintptr_t start, uintptr_t end)
{
size_t total_size = end - start; // total size of heap
// calculate the max possiable size of buddy allocator
// now we have not alloc memory for management(reserved).
// we have no place to store the bitmap and buckets
allocator->MAX_BUDDY_ALLOC_SIZE = pre_pow2(total_size);
// calculate the reserved size of buddy allocator
// for bitmap and buckets
{
allocator->bucket_num = LOG2(allocator->MAX_BUDDY_ALLOC_SIZE) - LOG2(MIN_BUDDY_ALLOC_SIZE) + 1;
allocator->bitmap_size = MAX(1, sizeof(uint8_t) * ((1 << (allocator->bucket_num)) / 8));
allocator->reserved_size = (sizeof(list_t) * (allocator->bucket_num) + sizeof(uint8_t) * (allocator->bitmap_size));
allocator->reserved_start = end - allocator->reserved_size;
}
// now we have alloc memory for management(reserved).
// re-calculate the max size of buddy allocator
allocator->MAX_BUDDY_ALLOC_SIZE = pre_pow2(allocator->reserved_start - start);
if (allocator->MAX_BUDDY_ALLOC_SIZE < MIN_BUDDY_ALLOC_SIZE)
{
memset(allocator, 0, sizeof(struct buddy_allocator));
return false;
}
/*
* re-calculate the variable of buddy allocator
* now MAX_BUDDY_ALLOC_SIZE is less or equal than its value before (Line 252)
*
* the reserved_size enough to store bitmap and buckets for current MAX_BUDDY_ALLOC_SIZE
*/
allocator->bucket_num = LOG2(allocator->MAX_BUDDY_ALLOC_SIZE) - LOG2(MIN_BUDDY_ALLOC_SIZE) + 1;
allocator->bitmap_size = MAX(1, sizeof(uint8_t) * ((1 << (allocator->bucket_num)) / 8));
// the size of memory managed by buddy allocator
allocator->buddy_size = allocator->MAX_BUDDY_ALLOC_SIZE;
allocator->buddy_start = start;
/*
* unuse the memory
* about half size of heap size
* caused by we must alloc memory for bitmap and buckets for manangement
*/
allocator->unused_start = start + allocator->MAX_BUDDY_ALLOC_SIZE;
allocator->unused_size = allocator->reserved_start - (allocator->buddy_start + allocator->buddy_size);
#ifdef DEBUGPRINTF
printf("\n\nTotal heap size: %zuMB\n", total_size / MB);
printf("Total Heap address:\t%p-%p\t%zuKB\n", start, end, (end - start) / KB);
printf("-------------------------------------------------------\n");
printf("Buddy allocator:\t%p-%p\t%zuKB\n", allocator->buddy_start, allocator->buddy_start + allocator->buddy_size, allocator->buddy_size / KB);
printf("Unused Heap address:\t%p-%p\t%zuKB\n", allocator->unused_start, allocator->unused_start + allocator->unused_size, allocator->unused_size / KB);
printf("Reserved for Buddy:\t%p-%p\t%zuB\n", allocator->reserved_start, allocator->reserved_start + allocator->reserved_size, allocator->reserved_size);
printf("-------------------------------------------------------\n");
printf("BUCKET_COUNT:\t%zu\n", allocator->bucket_num);
printf("MIN_BUDDY_ALLOC_SIZE:\t%zuKB\n", MIN_BUDDY_ALLOC_SIZE / KB);
printf("MAX_BUDDY_ALLOC_SIZE:\t%zuKB\n", allocator->MAX_BUDDY_ALLOC_SIZE / KB);
printf("-------------------------------------------------------\n");
#endif
return true;
}
/**
* @brief initialize a buddy allocator
* @param allocator which allocator to be initialized
* @param start the start address of heap
* @param end the end address of heap
* @return bool true if success , false if failed
*/
bool buddy_init(struct buddy_allocator *allocator, uintptr_t start, uintptr_t end)
{
// just for set @buddy_start and @buddy_size to 0
memset(allocator, 0, sizeof(struct buddy_allocator));
#ifdef DEBUGPRINTF
printf("Waste for alignment: %zuB\n", ROUNDUP(start, MIN_BUDDY_ALLOC_SIZE) - start);
#endif
// round up the start address to MIN_BUDDY_ALLOC_SIZE
start = ROUNDUP(start, MIN_BUDDY_ALLOC_SIZE); // comment this line also works
if (end < start + MIN_BUDDY_ALLOC_SIZE)
return false;
bool flag = buddy_init_variable(allocator, start, end);
if (!flag)
return false;
// initialize the pointer of bitmap and buckets
allocator->buckets = (list_t *)(allocator->reserved_start);
allocator->bitmap = (uint8_t *)(allocator->reserved_start + sizeof(list_t) * allocator->bucket_num);
// initialize the buckets(each of them is a list)
for (int i = 0; i < allocator->bucket_num; i++)
{
list_init(&(allocator->buckets[i]));
}
memset(allocator->bitmap, 0, allocator->bitmap_size);
// after initialization , buckets[0] has a block of size MAX_BUDDY_ALLOC_SIZE
// and other buckets is empty
list_push(&(allocator->buckets[0]), (list_t *)(allocator->buddy_start));
return true;
}
/**
* @brief use a buddy allocator to allocate memory
*
* @param allocator which allocator to be used
* @param size the size of memory to be allocated , must be a power of 2 , and less than MAX_BUDDY_ALLOC_SIZE, and greater or equal than MIN_BUDDY_ALLOC_SIZE
* @return void* the start address of allocated memory
*/
void *buddy_alloc(struct buddy_allocator *allocator, size_t size)
{
if (size == 0 || size > allocator->MAX_BUDDY_ALLOC_SIZE || size < MIN_BUDDY_ALLOC_SIZE || size != next_pow2(size))
return NULL;
// which bucket stands for the size of memory to be allocated
ssize_t target_bucket = allocator->bucket_num - 1 - LOG2(size / MIN_BUDDY_ALLOC_SIZE);
// each bucket is a list to store the free blocks
list_t *l = &((allocator->buckets)[target_bucket]);
// A BIG LOCK SAVES THE PEACE
lock(&(allocator->_mutex));
list_t *ret = list_pop(l);
if (!ret)
{
/*
* target bucket is empty
* we need to split its nearest avaiable block greater than current size
* to get a block of size we want
*/
ssize_t cur_bucket = target_bucket - 1;
while (cur_bucket > -1)
{
ret = list_pop(&((allocator->buckets)[cur_bucket]));
if (!ret)
{
// bucket is empty either
// continue to search the next bucket
cur_bucket--;
continue;
}
/*
* now we get proper block(cur_bucket)
* we need to split it till we get a block of size we want
*/
size_t cur_index;
// till cur_bucket = target_bucket , we got the block we want
while (cur_bucket != target_bucket)
{
// get the index of the current block
cur_index = ptr_to_index(allocator, ret, cur_bucket);
assert(get_bitmap(allocator->bitmap, cur_index) == false);
// set it to not-avaiable in bitmap
set_bitmap(allocator->bitmap, cur_index);
assert(get_bitmap(allocator->bitmap, cur_index) == true);
assert(cur_bucket + 1 < allocator->bucket_num && cur_bucket + 1 >= 0);
/*
* split current block to two blocks
* the right block is pushed to free list(buckets[cur_bucket+1])
* the left one has been recursively splited
*/
list_push(&((allocator->buckets)[cur_bucket + 1]), (list_t *)(index_to_ptr(allocator, RCHILD(cur_index))));
// the left one
ret = (list_t *)(index_to_ptr(allocator, LCHILD(cur_index)));
cur_bucket++;
}
// now we get the block we want
cur_index = ptr_to_index(allocator, ret, cur_bucket);
assert(get_bitmap(allocator->bitmap, cur_index) == false);
set_bitmap(allocator->bitmap, cur_index);
assert(get_bitmap(allocator->bitmap, cur_index) == true);
assert(get_bitmap(allocator->bitmap, PARENT(cur_index)) == true);
break;
}
}
else
{
/*
* target bucket is not empty
* ret is the block we want
*/
size_t cur_index = ptr_to_index(allocator, ret, target_bucket);
assert(get_bitmap(allocator->bitmap, cur_index) == false);
set_bitmap(allocator->bitmap, cur_index);
assert(get_bitmap(allocator->bitmap, cur_index) == true);
}
unlock(&(allocator->_mutex));
return (void *)ret;
}
/**
* @brief use a buddy allocator to free memory
*
* @param allocator which allocator to be used
* @param _ptr the start address of memory to be freed
* @param size the size of memory to be freed
*/
void buddy_free(struct buddy_allocator *allocator, void *_ptr, size_t size)
{
if (!_ptr)
return;
if (size == 0 || size > allocator->MAX_BUDDY_ALLOC_SIZE || size < MIN_BUDDY_ALLOC_SIZE || size != next_pow2(size))
{
assert(0);
return;
}
size_t cur_bucket = allocator->bucket_num - 1 - LOG2(size / MIN_BUDDY_ALLOC_SIZE);
size_t index = ptr_to_index(allocator, _ptr, cur_bucket);
lock(&(allocator->_mutex));
assert(get_bitmap(allocator->bitmap, index) == true);
// set it to avaiable in bitmap
unset_bitmap(allocator->bitmap, index);
/*
* if a block and its buddy are both avaiable, merge them
* recursively merge until reached the root (index = 0) or the block is not avaiable
*/
while (index && get_bitmap(allocator->bitmap, BUDDY(index)) == false)
{
// if (index == 0 || get_bitmap(allocator->bitmap, BUDDY(index)))
// break;
list_t *pptr = (list_t *)index_to_ptr(allocator, BUDDY(index));
assert(get_bitmap(allocator->bitmap, BUDDY(index)) == false);
// remove the buddy block from the free list
list_remove(pptr);
assert(get_bitmap(allocator->bitmap, PARENT(index)) == true);
// now we merge the block and its buddy
// so the parent block is avaiable
unset_bitmap(allocator->bitmap, PARENT(index));
index = PARENT(index);
}
assert(get_bitmap(allocator->bitmap, index) == false);
cur_bucket = LOG2(index + 1);
assert(cur_bucket < allocator->bucket_num && cur_bucket >= 0);
list_t *p1 = (list_t *)index_to_ptr(allocator, index);
// p1 is the merged block,push it to the free list
list_push(&((allocator->buckets)[cur_bucket]), p1);
unlock(&(allocator->_mutex));
}
/**
* @brief check the _ptr is in the range of a allocator's memory
*
* @param allocator which allocator to be used
* @param _ptr the start address of memory to be checked
* @return true if _ptr is in the range of a allocator's memory
*/
bool is_in_buddy_allocator_range(struct buddy_allocator *allocator, void *_ptr)
{
uintptr_t ptr = (uintptr_t)_ptr;
return ptr >= allocator->buddy_start && ptr < allocator->buddy_start + allocator->buddy_size;
}
/*
* About half of the the buddy allocator's memory is not used.
* The unused memory could be used to construct a new buddy allocator.
* recursively do this until the unused memory is less than MIN_BUDDY_ALLOC_SIZE
*
* the larger ,the unused memory is smaller.
* about 1/(2^n) of the total memory is not used.
* n is MAX_BUDDY_ALLOCATOR_NUMS
* MAX_BUDDY_ALLOCATOR_NUMS must less than UINT16_MAX
*/
#define MAX_BUDDY_ALLOCATOR_NUMS (20)
struct buddy_allocator allocators[MAX_BUDDY_ALLOCATOR_NUMS];
uint8_t allocator_inited_num = 0;
void buddy_init_for_heap(uintptr_t start, uintptr_t end)
{
if (buddy_init(&allocators[0], start, end))
allocator_inited_num++;
// initlize buddy allocator
for (int i = 1; i < MAX_BUDDY_ALLOCATOR_NUMS; i++)
{
// use pre allocator's unused space to construct new allocator
if (buddy_init(&allocators[i], allocators[i - 1].unused_start, allocators[i - 1].unused_start + allocators[i - 1].unused_size))
allocator_inited_num++;
else
break;
}
#ifdef DEBUGPRINTF
printf("allocator_inited_num : %d\n", allocator_inited_num);
#endif
}
/**
* @brief Get a block from buddy allocator ! CONCURRENCY SAFE !
*
* @param size the size of memory to be allocated, must be a power of 2 and greater or equal than MIN_BUDDY_ALLOC_SIZE, and less than MAX_BUDDY_ALLOC_SIZE
* @return void* the start address of the allocated memory
*/
void *get_block_from_buddy(size_t size)
{
void *ret = NULL;
// start from smaller buddy allocator
for (int i = allocator_inited_num - 1; i >= 0; i--)
{
ret = buddy_alloc(&allocators[i], size);
if (ret)
break;
}
return ret;
}
/** @brief free a block to buddy allocator ! CONCURRENCY SAFE !
*
* @param _ptr the start address of memory to be freed
* @param size the size of memory to be freed
*/
void free_block_to_buddy(void *_ptr, size_t size)
{
for (int i = allocator_inited_num - 1; i >= 0; i--)
{
if (is_in_buddy_allocator_range(&allocators[i], _ptr))
{
buddy_free(&allocators[i], _ptr, size);
return;
}
}
// NEVER REACH HERE
assert(0);
}
/*
* This slab allocator is not a real slab allocator.
* Just learn some basic idea about slab allocator.
* Like per cpu cache for specific size of memory.
*/
/*
* each slab_allocator is designed to hold a fixed size of memory.
* each cpu has several slab_allocator,which size is range of [MIN_SLAB_ALLOC_SIZE,MAX_SLAB_ALLOC_SIZE]
*
* slab_allocator has three lists , @ free / partial / full list.
* each @list links to some @slab_header.
* each @slab_header links to some @slab_block_header.
* each @slab_block_header stands for a block of memory.
*/
struct slab_block_header
{
list_t to_block;
size_t cnt;
};
struct slab_header
{
list_t to_slab; // link to free/full/partial _slab
list_t to_block; // link to blocks
};
#define MAX_SLAB_BLOCK_SIZE (32 * KB)
#define MIN_SLAB_BLOCK_SIZE ((2 * sizeof(uintptr_t)) * 2) // Must >= 3 * sizeof(uintptr_t)
// per slab_allocator alloca fixed size memory
// per slab_allocator has 2*MIN_BUDDY_ALLOC_SIZE memory for initilization
struct slab_allocator
{
size_t blocks_per_slab;
size_t slab_block_size; // range from [MIN_SLAB_ALLOC_SIZE, MAX_SLAB_ALLOC_SIZE]
list_t free_slab; // totally not used
list_t full_slab; // totally used
list_t partial_slab; // partially used
};
size_t get_requset_size_slab(struct slab_allocator *allocator)
{
size_t request_size = allocator->slab_block_size + sizeof(struct slab_header);
request_size = MAX(next_pow2(request_size), MIN_BUDDY_ALLOC_SIZE);
size_t cnt2 = (request_size - sizeof(struct slab_header)) / allocator->slab_block_size;
int waste_percent = (allocator->slab_block_size - sizeof(struct slab_header)) * 100 /
(cnt2 * allocator->slab_block_size);
if (waste_percent > 13)
{
request_size = allocator->slab_block_size * 8;
request_size += sizeof(struct slab_header);
request_size = MAX(next_pow2(request_size), MIN_BUDDY_ALLOC_SIZE);
cnt2 = (request_size - sizeof(struct slab_header)) / allocator->slab_block_size;
waste_percent = (allocator->slab_block_size - sizeof(struct slab_header)) * 100 /
(cnt2 * allocator->slab_block_size);
assert(waste_percent <= 13);
/*
waste_percent: 0%
waste_percent: 2%
waste_percent: 5%
waste_percent: 6%
waste_percent: 13%
*/
}
#ifdef DEBUGPRINTF
printf("waste_percent: %d%%\n", waste_percent);
#endif
return request_size;
}
/**
* @brief from buddy allocator get a block, contruct it to a slab_header and some slab block ,and add it to slab allocator
*/
bool add_slab_to_allocator(struct slab_allocator *allocator)
{
size_t request_size = get_requset_size_slab(allocator);
void *ptr = get_block_from_buddy(request_size);
if (!ptr)
{
return false;
}
struct slab_header *header = (struct slab_header *)ptr;
list_init(&(header->to_slab));
list_init(&(header->to_block));
list_push(&(allocator->free_slab), (list_t *)(header));
/*
* got a block from buddy allocator
* / block /
* split it into
* / slab_header / slab_block_header / slab_block_header / slab_block_header / ...../
*
*/
uintptr_t start = (uintptr_t)ptr + sizeof(struct slab_header);
size_t cnt = 0;
while (start + allocator->slab_block_size <= (uintptr_t)ptr + request_size)
{
// sizeof(struct slab_block_header);
assert(allocator->slab_block_size >= sizeof(struct slab_block_header));
struct slab_block_header *kh = (struct slab_block_header *)(start);
// slab_block_header position in block , used in slab_free
kh->cnt = cnt;
list_push(&(header->to_block), (&(kh->to_block)));
start += allocator->slab_block_size;
cnt++;
}
// cause we will put the @cnt into kalloc_header , and use a uint16_t to store it
assert(cnt <= UINT16_MAX);
allocator->blocks_per_slab = cnt;
#ifdef DEBUGPRINTF
printf("waste_percent: %zd%%\n", waste_percent);
size_t waste_size = (uintptr_t)ptr + request_size - start;
printf("Requset: %zuKB \tWaste size: %zu, alloc %zu * %zuB\n", request_size / KB, waste_size, cnt, allocator->slab_alloc_size);
#endif
return true;
}
/**
* @brief init a slab allocator
*
* @param allocator the slab allocator to be init
* @param size the size of memory to be allocated, must be a power of 2 and greater or equal than MIN_SLAB_ALLOC_SIZE, and less than MAX_SLAB_ALLOC_SIZE
*/
bool slab_allocator_init(struct slab_allocator *allocator, size_t size)
{
if (size < MIN_SLAB_BLOCK_SIZE || size > MAX_SLAB_BLOCK_SIZE || size != next_pow2(size))
return false;
allocator->slab_block_size = size;
list_init(&(allocator->free_slab));
list_init(&(allocator->full_slab));
list_init(&(allocator->partial_slab));
bool flag = add_slab_to_allocator(allocator);
int cnt = MAX_SLAB_BLOCK_SIZE / allocator->slab_block_size;
cnt -= 1;
while (cnt > 0)
{
add_slab_to_allocator(allocator);
cnt--;
}
return flag;
}
/** @brief init a cpu's slab allocator
*/
void slab_init(struct slab_allocator *allocators)
{
// the number of slab allocator per cpu has
size_t len = LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1;
size_t size = MIN_SLAB_BLOCK_SIZE;
for (size_t i = 0; i < len; i++, size *= 2)
{
if (!slab_allocator_init(&allocators[i], size))
{
allocators[i].slab_block_size = 0;
assert(0); // no enough space to init slab allocator for per cpu
}
}
}
// the start memory address of slab_allocator array
// this array contains all cpus' slab_allocator
// a cpu's slab_allocator is continuous memory
uintptr_t slab_allocators_for_all_cpu;
// for all of cpus ,initlize slab_allocator
/** @brief initlize all of cpus' slab allocator
*/
void slab_init_for_all_cpu()
{
int cpu_num = cpu_count();
size_t len = LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1;
// get a block from buddy allocator to store slab_allocator array
size_t size = MAX(MIN_BUDDY_ALLOC_SIZE, sizeof(struct slab_allocator) * len * cpu_num + sizeof(uint16_t) * cpu_num);
size = next_pow2(size);
slab_allocators_for_all_cpu = (uintptr_t)get_block_from_buddy(size);
if (!slab_allocators_for_all_cpu)
{
assert(0);
return;
}
memset((void *)slab_allocators_for_all_cpu, 0, size);
#ifdef DEBUGPRINTF
size_t wasted_size = size - sizeof(struct slab_allocator) * cpu_num * len - sizeof(uint16_t) * cpu_num;
printf("slab_init_for_all_cpu Waste size: %zuB\n", wasted_size);
#endif
size_t offset = 0;
for (int i = 0; i < cpu_num; i++)
{
// initlize slab_allocator for cpu(i)
slab_init((struct slab_allocator *)(slab_allocators_for_all_cpu + offset));
offset += sizeof(struct slab_allocator) * len;
}
}
void slab_shrink()
{
int cur_cpu = cpu_current();
size_t len = LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1;
uintptr_t cur_slab_allocator = slab_allocators_for_all_cpu + sizeof(struct slab_allocator) * len * cur_cpu;
size_t index = 0;
for (size_t index = 0; index < len; index++)
{
struct slab_allocator *allocator = (struct slab_allocator *)(cur_slab_allocator + sizeof(struct slab_allocator) * index);
list_t *cur = allocator->partial_slab.next;
while (cur != &(allocator->partial_slab))
{
list_t *next = cur->next;
list_t *block_cur = ((struct slab_header *)cur)->to_block.next;
size_t cnt = 0;
while (block_cur != &(((struct slab_header *)cur)->to_block))
{
list_t *block_next = block_cur->next;
cnt++;
block_cur = block_next;
}
assert(cnt <= allocator->blocks_per_slab);
if (cnt == allocator->blocks_per_slab)
{
list_remove(cur);
list_push(&(allocator->free_slab), cur);
}
cur = next;
}
cur = allocator->free_slab.next;
while (cur != &(allocator->free_slab))
{
list_t *next = cur->next;
if (next == &(allocator->free_slab))
break;
list_remove(cur);
free_block_to_buddy(cur, get_requset_size_slab(allocator));
cur = next;
}
}
}
/**
* @brief get some memroy from slab allocator
*
* @param size the size of memory to be allocated, must be a power of 2 and greater or equal than MIN_SLAB_ALLOC_SIZE, and less than MAX_SLAB_ALLOC_SIZE
* @return the pointer to the start memory address , if failed , return NULL
*/
void *slab_alloc(size_t size)
{
if (size < MIN_SLAB_BLOCK_SIZE || size > MAX_SLAB_BLOCK_SIZE || size != next_pow2(size))
return NULL;
// per cpu will use its own slab_allocator
int cur_cpu = cpu_current();
uintptr_t cur_slab_allocator = slab_allocators_for_all_cpu + sizeof(struct slab_allocator) * (LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1) * cur_cpu;
// cur_slab_allocator is the start address of the slab_allocator of current cpu
size_t index = LOG2(size) - LOG2(MIN_SLAB_BLOCK_SIZE);
// get the slab_allocator of target size
struct slab_allocator *alloc = (struct slab_allocator *)(cur_slab_allocator + sizeof(struct slab_allocator) * index);
if (alloc->slab_block_size != size)
return NULL; // should not happen
void *ret = NULL;
if (!list_empty(&(alloc->partial_slab)))
{
list_t *node = list_pop(&(alloc->partial_slab));
struct slab_header *header = (struct slab_header *)node;
ret = list_pop(&(header->to_block));
if (list_empty(&(header->to_block)))
{
list_push(&(alloc->full_slab), node);
}
else
{
list_push(&(alloc->partial_slab), node);
}
}
else if (!list_empty(&(alloc->free_slab)))
{
list_t *node = list_pop(&(alloc->free_slab));
// sizeof(struct slab_block_header);
struct slab_header *header = (struct slab_header *)node;
ret = list_pop(&(header->to_block));
if (list_empty(&(header->to_block)))
{
list_push(&(alloc->full_slab), node);
}
else
{
list_push(&(alloc->partial_slab), node);
}
}
else
{
// if free/partial_slab is empty, then alloc a new slab to free slab list
// that's say get a block from buddy allocator for current cpu to use
if (!add_slab_to_allocator(alloc))
return NULL;
// May be at some time, we should shrink the slab allocator, giving back the block to buddy allocator
// otherwise, there will be some cpu get little memory, and then the other cpu will get a lot of memory.
return slab_alloc(size);
}
return ret;
}
// used for kalloc_header
size_t MAGIC;
struct kalloc_header
{
size_t size;
union
{
uint16_t cnt; // only used for slab allocator
size_t canary; // for error detection
};
};
void slab_free(void *_ptr, size_t size)
{
if (!_ptr || size < MIN_SLAB_BLOCK_SIZE || size > MAX_SLAB_BLOCK_SIZE || size != next_pow2(size))
return;
int cur_cpu = cpu_current();
uintptr_t cur_slab_allocator = slab_allocators_for_all_cpu + sizeof(struct slab_allocator) * (LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1) * cur_cpu;
size_t index = LOG2(size) - LOG2(MIN_SLAB_BLOCK_SIZE);
struct slab_allocator *alloc = (struct slab_allocator *)(cur_slab_allocator + sizeof(struct slab_allocator) * index);
if (alloc->slab_block_size != size)
return;
size_t request_size = size + sizeof(struct slab_header);
request_size = MAX(next_pow2(request_size), MIN_BUDDY_ALLOC_SIZE);
struct kalloc_header *kh = (struct kalloc_header *)(_ptr);
size_t cnt = kh->cnt;
/*
* / slab_header / slab_block_header / slab_block_header / ....
* | |
*target _ptr
*/
void *target = (void *)((uintptr_t)(_ptr) - sizeof(struct slab_header) - cnt * size);
struct slab_header *header = (struct slab_header *)(target);
struct slab_block_header *sbh = (struct slab_block_header *)(_ptr);
list_push(&(header->to_block), &(sbh->to_block));
sbh->cnt = cnt; // <--- CAUTION: this is important
list_remove(&(header->to_slab)); // equal to list_remove(header);
/*
* Maybe to free_slab or partial_slab
* Just for simplity, don't want to traverse the whole list
*/
list_push(&(alloc->partial_slab), &(header->to_slab));
int cpu_num = cpu_count();
size_t len = LOG2(MAX_SLAB_BLOCK_SIZE) - LOG2(MIN_SLAB_BLOCK_SIZE) + 1;
uint16_t *free_cnt = (uint16_t *)(slab_allocators_for_all_cpu + sizeof(struct slab_allocator) * len * cpu_num + sizeof(uint16_t) * cur_cpu);
(*free_cnt)++;
if (*free_cnt == UINT16_MAX)
{
slab_shrink();
#ifdef DEBUGPRINTF
printf("CPU %d: slab_shrink\n", cur_cpu);
#endif
}
}
/*
* @brief initlize the kallocator
* Include the slab allocator and the buddy allocator
* Done by the boot cpu only
*/
static void kalloc_init(uintptr_t start, uintptr_t end)
{
srand(time(NULL));
MAGIC = ((size_t)(rand()) << 32) + rand();
buddy_init_for_heap(start, end);
slab_init_for_all_cpu();
}
/**
* @brief make canary for error detection
*/
size_t make_canary(size_t canary, uint16_t cnt)
{
/*
* |31/63............. 15.....0| bits
* | canary | cnt |
*/
size_t ret = cnt;
size_t mask = (SIZE_MAX) << (sizeof(cnt) * 8);
mask &= canary;
return ret | mask;
}
bool check_canary(size_t num, size_t canary, uint16_t cnt)
{
size_t ret = cnt;
size_t mask = (SIZE_MAX) << (sizeof(cnt) * 8);
mask &= canary;
return (ret | mask) == num;
}
/**
* @brief alloc memory of size
*
* @param _size the size of memory to be allocated
* @return void* the start address of the allocated memory , NULL if failed
*/
static void *
kalloc(size_t _size)
{
sizeof(struct kalloc_header);
if (_size == 0)
return NULL;
void *ret = NULL;
size_t size = MAX(next_pow2(_size + sizeof(struct kalloc_header)), MIN_SLAB_BLOCK_SIZE);
// if small size, then use slab allocator , each cpu has its own slab allocators
if (size >= MIN_SLAB_BLOCK_SIZE && size <= MAX_SLAB_BLOCK_SIZE)
{
// slab alloc
void *receive = slab_alloc(size); // bug for size==32768
if (!receive)
return NULL;
struct slab_block_header *block_header = (struct slab_block_header *)receive;
uint16_t cnt = block_header->cnt;
struct kalloc_header *ptr = (struct kalloc_header *)(receive);
ptr->size = size;
ptr->canary = make_canary(MAGIC ^ size, cnt);
ret = ptr + 1;
}
else // otherwise use buddy allocator , all cpu share one buddy allocator
{
struct kalloc_header *ptr = (struct kalloc_header *)get_block_from_buddy(size);
if (!ptr)
return NULL;
ptr->size = size;
ptr->canary = make_canary(MAGIC ^ size, (uint16_t)(rand())); // cnt is useless in buddy allocator
ret = ptr + 1;
}
return ret;
}
/**
* @brief free memory allocated by kalloc
*
* @param ptr the start address of the memory to be freed
*/
static void kfree(void *ptr)
{
if (!ptr)
return;
struct kalloc_header *header = (struct kalloc_header *)(ptr)-1;
if (!check_canary(header->canary, MAGIC ^ header->size, header->cnt) || header->size < MIN_SLAB_BLOCK_SIZE)
{
assert(0);
return;
}
if (header->size >= MIN_SLAB_BLOCK_SIZE && header->size <= MAX_SLAB_BLOCK_SIZE)
{
slab_free(header, header->size);
}
else
{
free_block_to_buddy(header, header->size);
}
}
signed main(int argc, char **argv)
{
setbuf(stdout, 0);
bool use_buddy = true;
if (argc >= 2)
{
use_buddy = atoi(argv[1]);
}
if (use_buddy)
printf("use buddy\n");
else
printf("use malloc\n");
const size_t size = 125 * MB;
uintptr_t ptr = (uintptr_t)malloc(size);
clock_t s = clock();
kalloc_init(ptr, ptr + size);
printf("kalloc_init: %ld clocks\n", (clock() - s));
auto test = [&]()
{
_map_mutex.lock();
_map[this_thread::get_id()] = cpuid++;
printf("thread %d\n", _map[this_thread::get_id()]);
_map_mutex.unlock();
clock_t start_time = clock();
int cnt = 1;
size_t total_size = 0;
vector<void *> parr;
#define BASE_SIZE (32 * B)
for (int i = 0; i < 5e3; i++)
{
void *p;
if (use_buddy)
p = kalloc(BASE_SIZE);
else
p = malloc(BASE_SIZE);
if (p)
{
total_size += BASE_SIZE;
cnt++;
// printf("%d thread : %zuB\n", _map[this_thread::get_id()], BASE_SIZE * (i + 1));
memset(p, 0xff, BASE_SIZE);
parr.push_back(p);
// kfree(p);
}
}
for (auto &&i : parr)
{
if (use_buddy)
kfree(i);
else
free(i);
}
printf("%d thread :use %ld clocks us alloc %d times , total %zuKB \n", _map[this_thread::get_id()], clock() - start_time, cnt, total_size / KB);
};
vector<thread> threads;
for (int i = 0; i < ThreadNums; i++)
{
thread t(test);
threads.push_back(move(t));
}
test();
for (auto &&i : threads)
{
i.join();
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment