Last active
December 2, 2022 23:41
-
-
Save ktsaou/42b7a2f617faf709cd20b090c332a9ec to your computer and use it in GitHub Desktop.
spinlock vs mutex benchmark
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// compile with: | |
// gcc -O2 -o spinlock spinlock.c -lpthread && ./spinlock | |
// | |
// verification and spinlock stats can be enabled with this: | |
// gcc -O2 -DSPINLOCK_VERIFY_AND_STATS=1 -o spinlock spinlock.c && ./spinlock | |
#define _GNU_SOURCE | |
#define __USE_GNU | |
#include <pthread.h> | |
#include <errno.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdarg.h> | |
#include <stddef.h> | |
#include <ctype.h> | |
#include <string.h> | |
#include <strings.h> | |
#include <arpa/inet.h> | |
#include <netinet/tcp.h> | |
#include <sys/ioctl.h> | |
#include <libgen.h> | |
#include <dirent.h> | |
#include <fcntl.h> | |
#include <getopt.h> | |
#include <grp.h> | |
#include <pwd.h> | |
#include <limits.h> | |
#include <locale.h> | |
#include <net/if.h> | |
#include <poll.h> | |
#include <signal.h> | |
#include <syslog.h> | |
#include <sys/mman.h> | |
#include <sys/resource.h> | |
#include <sys/socket.h> | |
#include <sys/syscall.h> | |
#include <sys/time.h> | |
#include <sys/types.h> | |
#include <sys/wait.h> | |
#include <sys/un.h> | |
#include <time.h> | |
#include <unistd.h> | |
#include <uuid/uuid.h> | |
#include <spawn.h> | |
#include <uv.h> | |
#include <assert.h> | |
#define likely(x) __builtin_expect(!!(x), 1) | |
#define unlikely(x) __builtin_expect(!!(x), 0) | |
#define assert_with_message(x) for ( ; !(x) ; assert(x) ) | |
pid_t gettid(void) { | |
static __thread pid_t cached_tid = -1; | |
if(unlikely(cached_tid == -1)) | |
cached_tid = (pid_t)syscall(SYS_gettid); | |
return cached_tid; | |
} | |
#define USEC_PER_SEC 1000000ULL | |
#define NSEC_PER_SEC 1000000000ULL | |
#define NSEC_PER_USEC 1000ULL | |
typedef unsigned long long usec_t; | |
usec_t now_usec(clockid_t clk_id) { | |
struct timespec ts = { 0, 0 }; | |
if(unlikely(clock_gettime(clk_id, &ts) == -1)) { | |
printf("clock_gettime(%d, ×pec) failed.\n", clk_id); | |
return 0; | |
} | |
return (usec_t)ts.tv_sec * USEC_PER_SEC + ((ts.tv_nsec % NSEC_PER_SEC) / NSEC_PER_USEC); | |
} | |
// static const struct timespec work_duration = { .tv_sec = 0, .tv_nsec = 1 * NSEC_PER_SEC / 1000 }; | |
static const struct timespec work_duration = { .tv_sec = 0, .tv_nsec = 0 }; | |
size_t counter = 0; | |
bool stop_stress = false; | |
// ---------------------------------------------------------------------------- | |
// SPINLOCK | |
typedef struct netdata_spinlock { | |
bool locked; | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
size_t spins; | |
size_t sleeps; | |
pid_t locker_pid; | |
#endif | |
} SPINLOCK; | |
#define NETDATA_SPINLOCK_INITIALIZER (SPINLOCK) { .locked = false } | |
void __netdata_spinlock_init(SPINLOCK *spinlock) { | |
*spinlock = NETDATA_SPINLOCK_INITIALIZER; | |
} | |
void __netdata_spinlock_lock(SPINLOCK *spinlock) { | |
static const struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 }; | |
size_t spins = 0; | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
size_t sleeps = 0; | |
#endif | |
while(__atomic_test_and_set(&spinlock->locked, __ATOMIC_ACQUIRE)) { | |
do { | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
if(unlikely((++spins % 8) == 0)) { | |
++sleeps; | |
nanosleep(&ns, NULL); | |
} | |
#else | |
if(unlikely(++spins == 8)) { | |
spins = 0; | |
nanosleep(&ns, NULL); | |
} | |
#endif | |
} while(__atomic_load_n(&spinlock->locked, __ATOMIC_RELAXED)); | |
} | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
pid_t last_locker_pid = spinlock->locker_pid; | |
if(last_locker_pid != 0) { | |
printf("spinlock locker pid is %d, but expected it to be unlocked, my pid is %d\n", last_locker_pid, gettid()); | |
abort(); | |
} | |
// we have the lock | |
spinlock->locker_pid = gettid(); | |
spinlock->spins += spins; | |
spinlock->sleeps += sleeps; | |
#endif | |
} | |
void __netdata_spinlock_unlock(SPINLOCK *spinlock) { | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
pid_t last_locker_pid = spinlock->locker_pid; | |
if(last_locker_pid != gettid()) { | |
printf("Spinlock should be locked by my pid %d, but it is locked by pid %d\n", gettid(), last_locker_pid); | |
abort(); | |
} | |
spinlock->locker_pid = 0; | |
#endif | |
__atomic_clear(&spinlock->locked, __ATOMIC_RELEASE); | |
} | |
SPINLOCK sp = NETDATA_SPINLOCK_INITIALIZER; | |
size_t stress_test_spinlock(size_t id) { | |
(void)id; | |
//printf(" >> Thread %zu started as tid %d\n", id, gettid()); | |
size_t count = 0; | |
while(!__atomic_load_n(&stop_stress, __ATOMIC_RELAXED)) { | |
__netdata_spinlock_lock(&sp); | |
if(work_duration.tv_nsec || work_duration.tv_sec) | |
nanosleep(&work_duration, NULL); | |
counter++; | |
count++; | |
__netdata_spinlock_unlock(&sp); | |
} | |
return count; | |
} | |
// ---------------------------------------------------------------------------- | |
// PTHREAD MUTEX | |
pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; | |
size_t stress_test_mutex(size_t id) { | |
(void)id; | |
//printf(" >> Thread %zu started as tid %d\n", id, gettid()); | |
size_t count = 0; | |
while(!__atomic_load_n(&stop_stress, __ATOMIC_RELAXED)) { | |
pthread_mutex_lock(&mutex); | |
if(work_duration.tv_nsec || work_duration.tv_sec) | |
nanosleep(&work_duration, NULL); | |
counter++; | |
count++; | |
pthread_mutex_unlock(&mutex); | |
} | |
return count; | |
} | |
// ---------------------------------------------------------------------------- | |
// PTHREAD RWLOCK | |
pthread_rwlock_t rwlock = PTHREAD_RWLOCK_INITIALIZER; | |
size_t stress_test_rwlock(size_t id) { | |
(void)id; | |
//printf(" >> Thread %zu started as tid %d\n", id, gettid()); | |
size_t count = 0; | |
while(!__atomic_load_n(&stop_stress, __ATOMIC_RELAXED)) { | |
pthread_rwlock_wrlock(&rwlock); | |
if(work_duration.tv_nsec || work_duration.tv_sec) | |
nanosleep(&work_duration, NULL); | |
counter++; | |
count++; | |
pthread_rwlock_unlock(&rwlock); | |
} | |
return count; | |
} | |
// ---------------------------------------------------------------------------- | |
// PTHREAD SPIN | |
pthread_spinlock_t pspinlock; | |
size_t stress_test_pspinlock(size_t id) { | |
(void)id; | |
//printf(" >> Thread %zu started as tid %d\n", id, gettid()); | |
size_t count = 0; | |
while(!__atomic_load_n(&stop_stress, __ATOMIC_RELAXED)) { | |
pthread_spin_lock(&pspinlock); | |
if(work_duration.tv_nsec || work_duration.tv_sec) | |
nanosleep(&work_duration, NULL); | |
counter++; | |
count++; | |
pthread_spin_unlock(&pspinlock); | |
} | |
return count; | |
} | |
// ---------------------------------------------------------------------------- | |
// stress test controller | |
struct worker { | |
size_t (*callback)(size_t id); | |
pthread_t thread; | |
size_t id; | |
size_t count; | |
usec_t duration_ut; | |
double cpu_pc; | |
}; | |
void *run_worker(void *ptr) { | |
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); | |
struct worker *me = ptr; | |
struct rusage start_ru, end_ru; | |
usec_t start_ut = now_usec(CLOCK_MONOTONIC); | |
getrusage(RUSAGE_THREAD, &start_ru); | |
me->count = me->callback(me->id); | |
getrusage(RUSAGE_THREAD, &end_ru); | |
usec_t end_ut = now_usec(CLOCK_MONOTONIC); | |
me->duration_ut = end_ut - start_ut; | |
unsigned long long user_cpu = end_ru.ru_utime.tv_sec * 1000000ULL + end_ru.ru_utime.tv_usec - start_ru.ru_utime.tv_sec * 1000000ULL + start_ru.ru_utime.tv_usec; | |
unsigned long long system_cpu = end_ru.ru_stime.tv_sec * 1000000ULL + end_ru.ru_stime.tv_usec - start_ru.ru_stime.tv_sec * 1000000ULL + start_ru.ru_stime.tv_usec; | |
me->cpu_pc = (double)(user_cpu + system_cpu) * 100.0 / (double)me->duration_ut; | |
return me; | |
} | |
void run_test(size_t (*function)(size_t id), const char *name) { | |
int threads_num[] = { 1, 2, 3, 4, 8, 16, 24, 48, 96, 128 }; | |
int runs = sizeof(threads_num) / sizeof(int); | |
static const struct timespec ns = { .tv_sec = 5, .tv_nsec = 0 }; | |
printf("\n%s:\n", name); | |
for(int i = 0; i < runs ; i++) { | |
int threads = threads_num[i]; | |
struct worker workers[threads]; | |
memset(workers, 0, sizeof(workers)); | |
sp = NETDATA_SPINLOCK_INITIALIZER; | |
stop_stress = false; | |
counter = 0; | |
for(int p = 0; p < threads ;p++) { | |
struct worker *w = &workers[p]; | |
w->callback = function; | |
w->id = p; | |
int ret = pthread_create(&w->thread, NULL, *run_worker, (void *)w); | |
if(ret != 0) { | |
fprintf(stderr, "failed to create thread %d, pthread_create() failed with code %d\n", p, ret); | |
exit(1); | |
} | |
} | |
nanosleep(&ns, NULL); | |
__atomic_store_n(&stop_stress, true, __ATOMIC_RELAXED); | |
size_t total_count = 0, min = 0, max = 0, avg = 0, deviation = 0; | |
double total_cpu = 0.0; | |
usec_t duration_ut = ns.tv_sec * USEC_PER_SEC; | |
for(int p = 0; p < threads ;p++) { | |
struct worker *w = &workers[p]; | |
int ret = pthread_join(w->thread, NULL); | |
if(ret != 0) { | |
fprintf(stderr, "failed to join thread %d, pthread_join() failed with code %d\n", p, ret); | |
exit(1); | |
} | |
total_count += w->count; | |
total_cpu += w->cpu_pc; | |
if(w->duration_ut > duration_ut) | |
duration_ut = w->duration_ut; | |
if(!p) { | |
min = w->count; | |
max = w->count; | |
} | |
else { | |
if(w->count < min) | |
min = w->count; | |
if(w->count > max) | |
max = w->count; | |
} | |
} | |
avg = total_count / threads; | |
deviation = (max - min) * 100 / avg; | |
printf( "Run No %3d: %3d threads, locks %10zu (%10zu %s), " | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
"spins %10zu, sleeps %10zu, " | |
#endif | |
"rate %8.2f Mlocks/s, cpu %8.2f %%, deviation %5zu %%\n", | |
i + 1, threads, counter, total_count, (counter == total_count) ? " OK" : "ERROR", | |
#ifdef SPINLOCK_VERIFY_AND_STATS | |
sp.spins, sp.sleeps, | |
#endif | |
(double)total_count / (double)duration_ut, | |
total_cpu, | |
deviation | |
); | |
} | |
} | |
int main(int argc, char **argv) { | |
(void)argc; (void)argv; | |
pthread_spin_init(&pspinlock, PTHREAD_PROCESS_PRIVATE); | |
run_test(stress_test_spinlock, "SPINLOCK"); | |
run_test(stress_test_pspinlock, "PTHREAD SPIN"); | |
run_test(stress_test_mutex, "PTHREAD MUTEX"); | |
run_test(stress_test_rwlock, "PTHREAD RWLOCK"); | |
pthread_spin_destroy(&pspinlock); | |
return 0; | |
} |
AMD EPYC 7453 28-Core Processor x 2 processors (not an idle machine)
SPINLOCK:
Run No 1: 1 threads, locks 1326569716 (1326569716 OK), rate 265.31 Mlocks/s, cpu 99.94 %, deviation 0 %
Run No 2: 2 threads, locks 1286590410 (1286590410 OK), rate 257.31 Mlocks/s, cpu 108.32 %, deviation 12 %
Run No 3: 3 threads, locks 1280099869 (1280099869 OK), rate 256.01 Mlocks/s, cpu 114.07 %, deviation 16 %
Run No 4: 4 threads, locks 1266448441 (1266448441 OK), rate 253.28 Mlocks/s, cpu 120.24 %, deviation 15 %
Run No 5: 8 threads, locks 1216662420 (1216662420 OK), rate 243.32 Mlocks/s, cpu 145.78 %, deviation 21 %
Run No 6: 16 threads, locks 1143276728 (1143276728 OK), rate 228.63 Mlocks/s, cpu 194.09 %, deviation 17 %
Run No 7: 24 threads, locks 1075294625 (1075294625 OK), rate 215.03 Mlocks/s, cpu 246.64 %, deviation 27 %
Run No 8: 48 threads, locks 906365377 ( 906365377 OK), rate 181.23 Mlocks/s, cpu 402.08 %, deviation 30 %
Run No 9: 96 threads, locks 639875022 ( 639875022 OK), rate 127.86 Mlocks/s, cpu 759.73 %, deviation 56 %
Run No 10: 128 threads, locks 509513168 ( 509513168 OK), rate 101.68 Mlocks/s, cpu 1065.38 %, deviation 65 %
PTHREAD SPIN:
Run No 1: 1 threads, locks 836470739 ( 836470739 OK), rate 167.29 Mlocks/s, cpu 99.97 %, deviation 0 %
Run No 2: 2 threads, locks 129801675 ( 129801675 OK), rate 25.96 Mlocks/s, cpu 199.67 %, deviation 48 %
Run No 3: 3 threads, locks 72699405 ( 72699405 OK), rate 14.54 Mlocks/s, cpu 299.75 %, deviation 24 %
Run No 4: 4 threads, locks 56771941 ( 56771941 OK), rate 11.35 Mlocks/s, cpu 399.80 %, deviation 61 %
Run No 5: 8 threads, locks 36946464 ( 36946464 OK), rate 7.39 Mlocks/s, cpu 799.49 %, deviation 147 %
Run No 6: 16 threads, locks 22213483 ( 22213483 OK), rate 4.44 Mlocks/s, cpu 1598.50 %, deviation 238 %
Run No 7: 24 threads, locks 18294873 ( 18294873 OK), rate 3.66 Mlocks/s, cpu 2397.35 %, deviation 201 %
Run No 8: 48 threads, locks 12448019 ( 12448019 OK), rate 2.49 Mlocks/s, cpu 4739.11 %, deviation 646 %
Run No 9: 96 threads, locks 6482798 ( 6482798 OK), rate 1.29 Mlocks/s, cpu 8355.59 %, deviation 288 %
Run No 10: 128 threads, locks 4678594 ( 4678594 OK), rate 0.93 Mlocks/s, cpu 8336.90 %, deviation 449 %
PTHREAD MUTEX:
Run No 1: 1 threads, locks 592234293 ( 592234293 OK), rate 118.44 Mlocks/s, cpu 99.91 %, deviation 0 %
Run No 2: 2 threads, locks 169372770 ( 169372770 OK), rate 33.87 Mlocks/s, cpu 190.04 %, deviation 34 %
Run No 3: 3 threads, locks 149135010 ( 149135010 OK), rate 29.83 Mlocks/s, cpu 277.78 %, deviation 16 %
Run No 4: 4 threads, locks 137290923 ( 137290923 OK), rate 27.46 Mlocks/s, cpu 372.87 %, deviation 17 %
Run No 5: 8 threads, locks 139647828 ( 139647828 OK), rate 27.93 Mlocks/s, cpu 766.50 %, deviation 14 %
Run No 6: 16 threads, locks 138530171 ( 138530171 OK), rate 27.70 Mlocks/s, cpu 1561.10 %, deviation 53 %
Run No 7: 24 threads, locks 135833156 ( 135833156 OK), rate 27.16 Mlocks/s, cpu 2341.83 %, deviation 41 %
Run No 8: 48 threads, locks 125167604 ( 125167604 OK), rate 25.03 Mlocks/s, cpu 4533.17 %, deviation 60 %
Run No 9: 96 threads, locks 122419292 ( 122419292 OK), rate 24.47 Mlocks/s, cpu 7532.81 %, deviation 44 %
Run No 10: 128 threads, locks 119881917 ( 119881917 OK), rate 23.95 Mlocks/s, cpu 7419.06 %, deviation 41 %
PTHREAD RWLOCK:
Run No 1: 1 threads, locks 577515489 ( 577515489 OK), rate 115.50 Mlocks/s, cpu 99.97 %, deviation 0 %
Run No 2: 2 threads, locks 70897773 ( 70897773 OK), rate 14.18 Mlocks/s, cpu 197.97 %, deviation 2 %
Run No 3: 3 threads, locks 100096016 ( 100096016 OK), rate 20.02 Mlocks/s, cpu 269.59 %, deviation 4 %
Run No 4: 4 threads, locks 73535306 ( 73535306 OK), rate 14.71 Mlocks/s, cpu 357.71 %, deviation 11 %
Run No 5: 8 threads, locks 65474954 ( 65474954 OK), rate 13.09 Mlocks/s, cpu 734.34 %, deviation 14 %
Run No 6: 16 threads, locks 62848407 ( 62848407 OK), rate 12.57 Mlocks/s, cpu 1508.38 %, deviation 25 %
Run No 7: 24 threads, locks 57507160 ( 57507160 OK), rate 11.50 Mlocks/s, cpu 2262.38 %, deviation 12 %
Run No 8: 48 threads, locks 56545326 ( 56545326 OK), rate 11.31 Mlocks/s, cpu 4475.63 %, deviation 40 %
Run No 9: 96 threads, locks 25148992 ( 25148992 OK), rate 5.02 Mlocks/s, cpu 7403.88 %, deviation 35 %
Run No 10: 128 threads, locks 15254280 ( 15254280 OK), rate 3.05 Mlocks/s, cpu 7664.81 %, deviation 65 %
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
On an RPI 4+