Created
August 22, 2018 18:04
-
-
Save munnellg/f655033dd97af442bc09804f07b90292 to your computer and use it in GitHub Desktop.
Benchmarking glibc's strlen function
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <math.h> | |
#include <time.h> | |
#include <errno.h> | |
#include <stdio.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#ifdef __MACH__ | |
#include <sys/time.h> | |
#include <mach/clock.h> | |
#include <mach/mach.h> | |
#endif | |
#define STR_MIN 8 // start with 8 bytes | |
#define STR_MAX (4 * 1024 * 1024) // work up to 4 megabytes | |
#define BUF_LEN (STR_MAX + 1) // size of the string buffer | |
#define MAX_ITR 1000 // we'll average time over 1000 iterations | |
static size_t | |
glibc_strlen (const char *str) | |
{ | |
const char *char_ptr; | |
const unsigned long int *longword_ptr; | |
unsigned long int longword, himagic, lomagic; | |
for (char_ptr = str; ((unsigned long int) char_ptr | |
& (sizeof (longword) - 1)) != 0; | |
++char_ptr) | |
if (*char_ptr == '\0') | |
return char_ptr - str; | |
longword_ptr = (unsigned long int *) char_ptr; | |
himagic = 0x80808080L; | |
lomagic = 0x01010101L; | |
if (sizeof (longword) > 4) { | |
himagic = ((himagic << 16) << 16) | himagic; | |
lomagic = ((lomagic << 16) << 16) | lomagic; | |
} | |
if (sizeof (longword) > 8) | |
abort (); | |
for (;;) { | |
longword = *longword_ptr++; | |
if (((longword - lomagic) & ~longword & himagic) != 0) { | |
const char *cp = (const char *) (longword_ptr - 1); | |
if (cp[0] == 0) | |
return cp - str; | |
if (cp[1] == 0) | |
return cp - str + 1; | |
if (cp[2] == 0) | |
return cp - str + 2; | |
if (cp[3] == 0) | |
return cp - str + 3; | |
if (sizeof (longword) > 4) { | |
if (cp[4] == 0) | |
return cp - str + 4; | |
if (cp[5] == 0) | |
return cp - str + 5; | |
if (cp[6] == 0) | |
return cp - str + 6; | |
if (cp[7] == 0) | |
return cp - str + 7; | |
} | |
} | |
} | |
} | |
static size_t | |
simple_strlen (const char *str) | |
{ | |
const char *p = str; | |
if (str == NULL) | |
return 0; | |
while (*p != '\0') | |
p++; | |
return p - str; | |
} | |
static double | |
get_time (void) | |
{ | |
#ifdef _WIN32 | |
return 0.0; | |
#else | |
struct timespec t; | |
static double prev_value = 0.0; | |
#if __MACH__ | |
clock_serv_t cclock; | |
mach_timespec_t mts; | |
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock); | |
int r = clock_get_time(cclock, &mts); | |
mach_port_deallocate(mach_task_self(), cclock); | |
t.tv_sec = mts.tv_sec; | |
t.tv_nsec = mts.tv_nsec; | |
#else | |
int r = clock_gettime(CLOCK_MONOTONIC, &t); | |
#endif | |
if (r < 0) { | |
fprintf(stderr, "%s\n", strerror(errno)); | |
return prev_value; | |
} | |
double ns = t.tv_nsec; | |
double s = ns * 0.000000001; | |
time_t tts = t.tv_sec; | |
s += difftime(tts, 0); | |
prev_value = s; | |
return s; | |
#endif | |
} | |
static void | |
gen_unitstr (char *strbuf, size_t i) | |
{ | |
char *units[] = { "B", "KB", "MB", "GB", "TB" }; | |
int unit = log2(i) / 10; | |
sprintf(strbuf, "%d%s", (int) (i / pow(1024, unit)), units[unit]); | |
} | |
static void | |
eval_strlen (const char *name, size_t (*func)(const char *s)) | |
{ | |
/* Use nudge to shove buffer off the word boundary */ | |
/* If the size of nudge is a multiple of 8 then buffer is word aligned */ | |
struct offset { char nudge[1]; char buffer[BUF_LEN]; } offset; | |
char *buffer = offset.buffer; | |
char unitstr[6]; | |
memset(buffer, 'x', sizeof(char) * BUF_LEN); | |
buffer[STR_MAX] = 0; | |
fprintf(stdout, "Beginning test: %s\n", name); | |
fprintf(stdout, "Buffer at address %p\n", buffer); | |
for (size_t i = STR_MIN; i <= STR_MAX; i <<= 1) { | |
buffer[i] = 0; | |
gen_unitstr(unitstr, i); | |
fprintf(stdout, "[len = %5s] ", unitstr); | |
fflush(stdout); | |
double start = get_time(); | |
for (int j = 0; j < MAX_ITR; j++ ) { | |
/* update prevents compiler from optimising the loop away */ | |
buffer[j % i] = rand() % 26 + 'a'; | |
size_t res = func(buffer); | |
if (res != i) { | |
fprintf(stderr, "\nERROR: %s : ", name); | |
fprintf(stderr, "result %lu, expected %lu\n", res, i); | |
return; | |
} | |
} | |
double end = get_time(); | |
fprintf(stdout, "avg %.5fs over %d iterations\n", (end - start) / MAX_ITR, MAX_ITR); | |
buffer[i] = 'x'; | |
} | |
} | |
int | |
main ( int argc, char *argv[] ) | |
{ | |
srand(time(NULL)); | |
eval_strlen("simple ", simple_strlen); | |
fprintf(stdout, "\n"); | |
eval_strlen("glibc ", glibc_strlen); | |
fprintf(stdout, "\n"); | |
eval_strlen("string.h", strlen); | |
return EXIT_SUCCESS; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Benchmarking refactored
https://godbolt.org/z/xabx11EqY
strlen()
would always be the fastest onesimple_strlen()
Try and reuse as much as possible. Code the solution that fulfills the requirements with the minimum amount of coding possible. Benchmark as you go. Modern compilers are powerfull and unpredictable.