Last active
November 13, 2024 16:18
-
-
Save jart/c0008dc266425c741c7ff8078d401699 to your computer and use it in GitHub Desktop.
Easy high performance chrome://tracing output for C++
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*- | |
// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi | |
#define _GNU_SOURCE | |
#include <pthread.h> | |
#include <stdatomic.h> | |
#include <stdbool.h> | |
#include <stdio.h> | |
#include <threads.h> | |
#include <unistd.h> | |
struct TraceEvent | |
{ | |
unsigned long long ts; | |
int pid; | |
int tid; | |
const char* name; | |
const char* cat; | |
char ph; | |
}; | |
static int g_pid; | |
static atomic_bool g_oom; | |
static atomic_int g_count; | |
static thread_local int g_id; | |
static thread_local int g_ids; | |
static thread_local int g_tid; | |
static unsigned long g_start_rdtsc; | |
static struct TraceEvent g_events[1000000]; | |
static unsigned long | |
rdtsc(void) | |
{ | |
#ifdef __x86_64__ | |
unsigned ax, dx; | |
__asm__ volatile("rdtsc" : "=a"(ax), "=d"(dx)); | |
return (unsigned long)dx << 32 | ax; | |
#else | |
unsigned long c; | |
__asm__ volatile("mrs %0, cntvct_el0" : "=r"(c)); | |
return c * 48; // the fudge factor | |
#endif | |
} | |
static int | |
llamafile_trace_oom(void) | |
{ | |
if (atomic_load_explicit(&g_oom, memory_order_relaxed)) | |
return -1; | |
if (atomic_exchange_explicit(&g_oom, true, memory_order_acq_rel)) | |
return -1; | |
fprintf(stderr, "warning: ran out of trace event memory\n"); | |
return -1; | |
} | |
static int | |
llamafile_trace_reserve(int count) | |
{ | |
int id = atomic_load_explicit(&g_count, memory_order_relaxed); | |
if (id + count > sizeof(g_events) / sizeof(*g_events)) | |
return llamafile_trace_oom(); | |
id = atomic_fetch_add_explicit(&g_count, count, memory_order_acq_rel); | |
if (id + count > sizeof(g_events) / sizeof(*g_events)) | |
return llamafile_trace_oom(); | |
return id; | |
} | |
static void | |
llamafile_trace_event(int id, const char* name, const char* cat, char ph) | |
{ | |
g_events[id].ts = rdtsc(); | |
g_events[id].pid = g_pid ? g_pid - 1 : getpid(); | |
g_events[id].tid = g_tid ? g_tid - 1 : gettid(); | |
g_events[id].name = name; | |
g_events[id].cat = cat; | |
g_events[id].ph = ph; | |
} | |
void | |
llamafile_trace_set_pid(int pid) | |
{ | |
g_pid = pid + 1; | |
} | |
void | |
llamafile_trace_set_tid(int tid) | |
{ | |
g_tid = tid + 1; | |
} | |
void | |
llamafile_trace_begin(const char* name) | |
{ | |
if (g_ids < 2) { | |
g_ids = 20; | |
g_id = llamafile_trace_reserve(g_ids); | |
if (g_id == -1) { | |
g_ids = 0; | |
return; | |
} | |
} | |
llamafile_trace_event(g_id++, name, "category", 'B'); | |
--g_ids; | |
} | |
void | |
llamafile_trace_end(const char* name) | |
{ | |
if (g_ids < 1) | |
return; | |
llamafile_trace_event(g_id++, name, "category", 'E'); | |
--g_ids; | |
} | |
static void | |
llamafile_trace_save(const char* filename) | |
{ | |
int count = atomic_load_explicit(&g_count, memory_order_relaxed); | |
if (!count) | |
return; | |
fprintf(stderr, "saving trace to %s...\n", filename); | |
FILE* file = fopen(filename, "w"); | |
if (!file) { | |
perror(filename); | |
return; | |
} | |
fprintf(file, "[\n"); | |
bool once = false; | |
for (int i = 0; i < count; i++) { | |
if (!g_events[i].name) | |
continue; | |
if (!once) { | |
once = true; | |
} else { | |
fputs(",\n", file); | |
} | |
fprintf(file, | |
"{\"name\": \"%s\", \"cat\": \"%s\", \"ph\": \"%c\", " | |
"\"ts\": %.3f, \"pid\": %d, \"tid\": %d}", | |
g_events[i].name, | |
g_events[i].cat, | |
g_events[i].ph, | |
(g_events[i].ts - g_start_rdtsc) / 3000., | |
g_events[i].pid, | |
g_events[i].tid); | |
} | |
fprintf(file, "\n]\n"); | |
fclose(file); | |
} | |
__attribute__((__constructor__)) static void | |
trace_startup(void) | |
{ | |
g_start_rdtsc = rdtsc(); | |
} | |
__attribute__((__destructor__)) static void | |
trace_shutdown(void) | |
{ | |
llamafile_trace_save("trace.json"); // see chrome://tracing/ | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// -*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*- | |
// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi | |
#include "trace.h" | |
#include <assert.h> | |
#include <pthread.h> | |
#include <stdio.h> | |
#include <sys/resource.h> | |
#include <sys/time.h> | |
#include <time.h> | |
#define ITERATIONS 100000 | |
#define THREADS 30 | |
int g_chores; | |
pthread_mutex_t g_locker = PTHREAD_MUTEX_INITIALIZER; | |
void* | |
worker(void* arg) | |
{ | |
llamafile_trace_begin("worker"); | |
for (int i = 0; i < ITERATIONS; ++i) { | |
pthread_mutex_lock(&g_locker); | |
++g_chores; | |
pthread_mutex_unlock(&g_locker); | |
} | |
llamafile_trace_end("worker"); | |
return 0; | |
} | |
struct timeval | |
tub(struct timeval a, struct timeval b) | |
{ | |
a.tv_sec -= b.tv_sec; | |
if (a.tv_usec < b.tv_usec) { | |
a.tv_usec += 1000000; | |
a.tv_sec--; | |
} | |
a.tv_usec -= b.tv_usec; | |
return a; | |
} | |
long | |
tomicros(struct timeval x) | |
{ | |
return x.tv_sec * 1000000ul + x.tv_usec; | |
} | |
int | |
main() | |
{ | |
cpu_set_t x; | |
CPU_ZERO(&x); | |
CPU_SET(0, &x); | |
/* CPU_SET(1, &x); */ | |
sched_setaffinity(0, sizeof(x), &x); | |
struct timeval start; | |
gettimeofday(&start, 0); | |
pthread_t th[THREADS]; | |
for (int i = 0; i < THREADS; ++i) | |
pthread_create(&th[i], 0, worker, 0); | |
for (int i = 0; i < THREADS; ++i) | |
pthread_join(th[i], 0); | |
assert(g_chores == THREADS * ITERATIONS); | |
struct rusage ru; | |
struct timeval end; | |
gettimeofday(&end, 0); | |
getrusage(RUSAGE_SELF, &ru); | |
printf("%16ld us real\n" | |
"%16ld us user\n" | |
"%16ld us sys\n", | |
tomicros(tub(end, start)), | |
tomicros(ru.ru_utime), | |
tomicros(ru.ru_stime)); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment