Last active
December 2, 2022 23:41
-
-
Save ktsaou/42b7a2f617faf709cd20b090c332a9ec to your computer and use it in GitHub Desktop.
spinlock vs mutex benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// compile with: | |
// gcc -O2 -o spinlock spinlock.c -lpthread && ./spinlock | |
// | |
// verification and spinlock stats can be enabled with this: | |
// gcc -O2 -DSPINLOCK_VERIFY_AND_STATS=1 -o spinlock spinlock.c && ./spinlock | |
#define _GNU_SOURCE | |
#define __USE_GNU | |
#include <pthread.h> | |
#include <errno.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdarg.h> | |
#include <stddef.h> | |
#include <ctype.h> | |
#include <string.h> | |
#include <strings.h> | |
#include <arpa/inet.h> | |
#include <netinet/tcp.h> | |
#include <sys/ioctl.h> | |
#include <libgen.h> | |
#include <dirent.h> | |
#include <fcntl.h> | |
#include <getopt.h> | |
#include <grp.h> | |
#include <pwd.h> | |
#include <limits.h> | |
#include <locale.h> | |
#include <net/if.h> | |
#include <poll.h> | |
#include <signal.h> | |
#include <syslog.h> | |
#include <sys/mman.h> | |
#include <sys/resource.h> | |
#include <sys/socket.h> | |
#include <sys/syscall.h> | |
#include <sys/time.h> | |
#include <sys/types.h> | |
#include <sys/wait.h> | |
#include <sys/un.h> | |
#include <time.h> | |
#include <unistd.h> | |
#include <uuid/uuid.h> | |
#include <spawn.h> | |
#include <uv.h> | |
#include <assert.h> | |
#define likely(x) __builtin_expect(!!(x), 1) | |
#define unlikely(x) __builtin_expect(!!(x), 0) | |
#define assert_with_message(x) for ( ; !(x) ; assert(x) ) | |
pid_t gettid(void) { | |
static __thread pid_t cached_tid = -1; | |
if(unlikely(cached_tid == -1)) | |
cached_tid = (pid_t)syscall(SYS_gettid); | |
return cached_tid; | |
} | |
#define USEC_PER_SEC 1000000ULL | |
#define NSEC_PER_SEC 1000000000ULL | |
#define NSEC_PER_USEC 1000ULL | |
typedef unsigned long long usec_t; | |
usec_t now_usec(clockid_t clk_id) { | |
struct timespec ts = { 0, 0 }; | |
if(unlikely(clock_gettime(clk_id, &ts) == -1)) { | |
printf("clock_gettime(%d, ×pec) failed.\n", clk_id); | |
return 0; | |
} | |
return (usec_t)ts.tv_sec * USEC_PER_SEC + ((ts.tv_nsec % NSEC_PER_SEC) / NSEC_PER_USEC); | |
} | |
// static const struct timespec work_duration = { .tv_sec = 0, .tv_nsec = 1 * NSEC_PER_SEC / 1000 }; | |
static const struct timespec work_duration = { .tv_sec = 0, .tv_nsec = 0 }; | |
size_t counter = 0; | |
bool stop_stress = false; | |
// ---------------------------------------------------------------------------- | |
// SPINLOCK | |
typedef struct netdata_spinlock { | |
bool locked; | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
size_t spins; | |
size_t sleeps; | |
pid_t locker_pid; | |
#endif | |
} SPINLOCK; | |
#define NETDATA_SPINLOCK_INITIALIZER (SPINLOCK) { .locked = false } | |
void __netdata_spinlock_init(SPINLOCK *spinlock) { | |
*spinlock = NETDATA_SPINLOCK_INITIALIZER; | |
} | |
void __netdata_spinlock_lock(SPINLOCK *spinlock) { | |
static const struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 }; | |
size_t spins = 0; | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
size_t sleeps = 0; | |
#endif | |
while(__atomic_test_and_set(&spinlock->locked, __ATOMIC_ACQUIRE)) { | |
do { | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
if(unlikely((++spins % 8) == 0)) { | |
++sleeps; | |
nanosleep(&ns, NULL); | |
} | |
#else | |
if(unlikely(++spins == 8)) { | |
spins = 0; | |
nanosleep(&ns, NULL); | |
} | |
#endif | |
} while(__atomic_load_n(&spinlock->locked, __ATOMIC_RELAXED)); | |
} | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
pid_t last_locker_pid = spinlock->locker_pid; | |
if(last_locker_pid != 0) { | |
printf("spinlock locker pid is %d, but expected it to be unlocked, my pid is %d\n", last_locker_pid, gettid()); | |
abort(); | |
} | |
// we have the lock | |
spinlock->locker_pid = gettid(); | |
spinlock->spins += spins; | |
spinlock->sleeps += sleeps; | |
#endif | |
} | |
void __netdata_spinlock_unlock(SPINLOCK *spinlock) { | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
pid_t last_locker_pid = spinlock->locker_pid; | |
if(last_locker_pid != gettid()) { | |
printf("Spinlock should be locked by my pid %d, but it is locked by pid %d\n", gettid(), last_locker_pid); | |
abort(); | |
} | |
spinlock->locker_pid = 0; | |
#endif | |
__atomic_clear(&spinlock->locked, __ATOMIC_RELEASE); | |
} | |
SPINLOCK sp = NETDATA_SPINLOCK_INITIALIZER; | |
size_t stress_test_spinlock(size_t id) { | |
(void)id; | |
//printf(" >> Thread %zu started as tid %d\n", id, gettid()); | |
size_t count = 0; | |
while(!__atomic_load_n(&stop_stress, __ATOMIC_RELAXED)) { | |
__netdata_spinlock_lock(&sp); | |
if(work_duration.tv_nsec || work_duration.tv_sec) | |
nanosleep(&work_duration, NULL); | |
counter++; | |
count++; | |
__netdata_spinlock_unlock(&sp); | |
} | |
return count; | |
} | |
// ---------------------------------------------------------------------------- | |
// PTHREAD MUTEX | |
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; | |
size_t stress_test_mutex(size_t id) { | |
(void)id; | |
//printf(" >> Thread %zu started as tid %d\n", id, gettid()); | |
size_t count = 0; | |
while(!__atomic_load_n(&stop_stress, __ATOMIC_RELAXED)) { | |
pthread_mutex_lock(&mutex); | |
if(work_duration.tv_nsec || work_duration.tv_sec) | |
nanosleep(&work_duration, NULL); | |
counter++; | |
count++; | |
pthread_mutex_unlock(&mutex); | |
} | |
return count; | |
} | |
// ---------------------------------------------------------------------------- | |
// PTHREAD RWLOCK | |
pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER; | |
size_t stress_test_rwlock(size_t id) { | |
(void)id; | |
//printf(" >> Thread %zu started as tid %d\n", id, gettid()); | |
size_t count = 0; | |
while(!__atomic_load_n(&stop_stress, __ATOMIC_RELAXED)) { | |
pthread_rwlock_wrlock(&rwlock); | |
if(work_duration.tv_nsec || work_duration.tv_sec) | |
nanosleep(&work_duration, NULL); | |
counter++; | |
count++; | |
pthread_rwlock_unlock(&rwlock); | |
} | |
return count; | |
} | |
// ---------------------------------------------------------------------------- | |
// PTHREAD SPIN | |
pthread_spinlock_t pspinlock; | |
size_t stress_test_pspinlock(size_t id) { | |
(void)id; | |
//printf(" >> Thread %zu started as tid %d\n", id, gettid()); | |
size_t count = 0; | |
while(!__atomic_load_n(&stop_stress, __ATOMIC_RELAXED)) { | |
pthread_spin_lock(&pspinlock); | |
if(work_duration.tv_nsec || work_duration.tv_sec) | |
nanosleep(&work_duration, NULL); | |
counter++; | |
count++; | |
pthread_spin_unlock(&pspinlock); | |
} | |
return count; | |
} | |
// ---------------------------------------------------------------------------- | |
// stress test controller | |
struct worker { | |
size_t (*callback)(size_t id); | |
pthread_t thread; | |
size_t id; | |
size_t count; | |
usec_t duration_ut; | |
double cpu_pc; | |
}; | |
void *run_worker(void *ptr) { | |
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); | |
struct worker *me = ptr; | |
struct rusage start_ru, end_ru; | |
usec_t start_ut = now_usec(CLOCK_MONOTONIC); | |
getrusage(RUSAGE_THREAD, &start_ru); | |
me->count = me->callback(me->id); | |
getrusage(RUSAGE_THREAD, &end_ru); | |
usec_t end_ut = now_usec(CLOCK_MONOTONIC); | |
me->duration_ut = end_ut - start_ut; | |
unsigned long long user_cpu = end_ru.ru_utime.tv_sec * 1000000ULL + end_ru.ru_utime.tv_usec - start_ru.ru_utime.tv_sec * 1000000ULL + start_ru.ru_utime.tv_usec; | |
unsigned long long system_cpu = end_ru.ru_stime.tv_sec * 1000000ULL + end_ru.ru_stime.tv_usec - start_ru.ru_stime.tv_sec * 1000000ULL + start_ru.ru_stime.tv_usec; | |
me->cpu_pc = (double)(user_cpu + system_cpu) * 100.0 / (double)me->duration_ut; | |
return me; | |
} | |
void run_test(size_t (*function)(size_t id), const char *name) { | |
int threads_num[] = { 1, 2, 3, 4, 8, 16, 24, 48, 96, 128 }; | |
int runs = sizeof(threads_num) / sizeof(int); | |
static const struct timespec ns = { .tv_sec = 5, .tv_nsec = 0 }; | |
printf("\n%s:\n", name); | |
for(int i = 0; i < runs ; i++) { | |
int threads = threads_num[i]; | |
struct worker workers[threads]; | |
memset(workers, 0, sizeof(workers)); | |
sp = NETDATA_SPINLOCK_INITIALIZER; | |
stop_stress = false; | |
counter = 0; | |
for(int p = 0; p < threads ;p++) { | |
struct worker *w = &workers[p]; | |
w->callback = function; | |
w->id = p; | |
int ret = pthread_create(&w->thread, NULL, *run_worker, (void *)w); | |
if(ret != 0) { | |
fprintf(stderr, "failed to create thread %d, pthread_create() failed with code %d\n", p, ret); | |
exit(1); | |
} | |
} | |
nanosleep(&ns, NULL); | |
__atomic_store_n(&stop_stress, true, __ATOMIC_RELAXED); | |
size_t total_count = 0, min = 0, max = 0, avg = 0, deviation = 0; | |
double total_cpu = 0.0; | |
usec_t duration_ut = ns.tv_sec * USEC_PER_SEC; | |
for(int p = 0; p < threads ;p++) { | |
struct worker *w = &workers[p]; | |
int ret = pthread_join(w->thread, NULL); | |
if(ret != 0) { | |
fprintf(stderr, "failed to join thread %d, pthread_join() failed with code %d\n", p, ret); | |
exit(1); | |
} | |
total_count += w->count; | |
total_cpu += w->cpu_pc; | |
if(w->duration_ut > duration_ut) | |
duration_ut = w->duration_ut; | |
if(!p) { | |
min = w->count; | |
max = w->count; | |
} | |
else { | |
if(w->count < min) | |
min = w->count; | |
if(w->count > max) | |
max = w->count; | |
} | |
} | |
avg = total_count / threads; | |
deviation = (max - min) * 100 / avg; | |
printf( "Run No %3d: %3d threads, locks %10zu (%10zu %s), " | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
"spins %10zu, sleeps %10zu, " | |
#endif | |
"rate %8.2f Mlocks/s, cpu %8.2f %%, deviation %5zu %%\n", | |
i + 1, threads, counter, total_count, (counter == total_count) ? " OK" : "ERROR", | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
sp.spins, sp.sleeps, | |
#endif | |
(double)total_count / (double)duration_ut, | |
total_cpu, | |
deviation | |
); | |
} | |
} | |
int main(int argc, char **argv) { | |
(void)argc; (void)argv; | |
pthread_spin_init(&pspinlock, PTHREAD_PROCESS_PRIVATE); | |
run_test(stress_test_spinlock, "SPINLOCK"); | |
run_test(stress_test_pspinlock, "PTHREAD SPIN"); | |
run_test(stress_test_mutex, "PTHREAD MUTEX"); | |
run_test(stress_test_rwlock, "PTHREAD RWLOCK"); | |
pthread_spin_destroy(&pspinlock); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
AMD EPYC 7453 28-Core Processor x 2 processors (not an idle machine)