Skip to content

Instantly share code, notes, and snippets.

@jart
Last active November 13, 2024 16:18
Show Gist options
  • Save jart/c0008dc266425c741c7ff8078d401699 to your computer and use it in GitHub Desktop.
Save jart/c0008dc266425c741c7ff8078d401699 to your computer and use it in GitHub Desktop.
Easy high performance chrome://tracing output for C++
// -*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi
#define _GNU_SOURCE
#include <pthread.h>
#include <stdatomic.h>
#include <stdbool.h>
#include <stdio.h>
#include <threads.h>
#include <unistd.h>
struct TraceEvent
{
unsigned long long ts;
int pid;
int tid;
const char* name;
const char* cat;
char ph;
};
static int g_pid;
static atomic_bool g_oom;
static atomic_int g_count;
static thread_local int g_id;
static thread_local int g_ids;
static thread_local int g_tid;
static unsigned long g_start_rdtsc;
static struct TraceEvent g_events[1000000];
static unsigned long
rdtsc(void)
{
#ifdef __x86_64__
unsigned ax, dx;
__asm__ volatile("rdtsc" : "=a"(ax), "=d"(dx));
return (unsigned long)dx << 32 | ax;
#else
unsigned long c;
__asm__ volatile("mrs %0, cntvct_el0" : "=r"(c));
return c * 48; // the fudge factor
#endif
}
static int
llamafile_trace_oom(void)
{
if (atomic_load_explicit(&g_oom, memory_order_relaxed))
return -1;
if (atomic_exchange_explicit(&g_oom, true, memory_order_acq_rel))
return -1;
fprintf(stderr, "warning: ran out of trace event memory\n");
return -1;
}
static int
llamafile_trace_reserve(int count)
{
int id = atomic_load_explicit(&g_count, memory_order_relaxed);
if (id + count > sizeof(g_events) / sizeof(*g_events))
return llamafile_trace_oom();
id = atomic_fetch_add_explicit(&g_count, count, memory_order_acq_rel);
if (id + count > sizeof(g_events) / sizeof(*g_events))
return llamafile_trace_oom();
return id;
}
static void
llamafile_trace_event(int id, const char* name, const char* cat, char ph)
{
g_events[id].ts = rdtsc();
g_events[id].pid = g_pid ? g_pid - 1 : getpid();
g_events[id].tid = g_tid ? g_tid - 1 : gettid();
g_events[id].name = name;
g_events[id].cat = cat;
g_events[id].ph = ph;
}
void
llamafile_trace_set_pid(int pid)
{
g_pid = pid + 1;
}
void
llamafile_trace_set_tid(int tid)
{
g_tid = tid + 1;
}
void
llamafile_trace_begin(const char* name)
{
if (g_ids < 2) {
g_ids = 20;
g_id = llamafile_trace_reserve(g_ids);
if (g_id == -1) {
g_ids = 0;
return;
}
}
llamafile_trace_event(g_id++, name, "category", 'B');
--g_ids;
}
void
llamafile_trace_end(const char* name)
{
if (g_ids < 1)
return;
llamafile_trace_event(g_id++, name, "category", 'E');
--g_ids;
}
static void
llamafile_trace_save(const char* filename)
{
int count = atomic_load_explicit(&g_count, memory_order_relaxed);
if (!count)
return;
fprintf(stderr, "saving trace to %s...\n", filename);
FILE* file = fopen(filename, "w");
if (!file) {
perror(filename);
return;
}
fprintf(file, "[\n");
bool once = false;
for (int i = 0; i < count; i++) {
if (!g_events[i].name)
continue;
if (!once) {
once = true;
} else {
fputs(",\n", file);
}
fprintf(file,
"{\"name\": \"%s\", \"cat\": \"%s\", \"ph\": \"%c\", "
"\"ts\": %.3f, \"pid\": %d, \"tid\": %d}",
g_events[i].name,
g_events[i].cat,
g_events[i].ph,
(g_events[i].ts - g_start_rdtsc) / 3000.,
g_events[i].pid,
g_events[i].tid);
}
fprintf(file, "\n]\n");
fclose(file);
}
__attribute__((__constructor__)) static void
trace_startup(void)
{
g_start_rdtsc = rdtsc();
}
__attribute__((__destructor__)) static void
trace_shutdown(void)
{
llamafile_trace_save("trace.json"); // see chrome://tracing/
}
// -*- mode:c;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*-
// vi: set et ft=c ts=4 sts=4 sw=4 fenc=utf-8 :vi
#include "trace.h"
#include <assert.h>
#include <pthread.h>
#include <stdio.h>
#include <sys/resource.h>
#include <sys/time.h>
#include <time.h>
#define ITERATIONS 100000
#define THREADS 30
int g_chores;
pthread_mutex_t g_locker = PTHREAD_MUTEX_INITIALIZER;
void*
worker(void* arg)
{
llamafile_trace_begin("worker");
for (int i = 0; i < ITERATIONS; ++i) {
pthread_mutex_lock(&g_locker);
++g_chores;
pthread_mutex_unlock(&g_locker);
}
llamafile_trace_end("worker");
return 0;
}
struct timeval
tub(struct timeval a, struct timeval b)
{
a.tv_sec -= b.tv_sec;
if (a.tv_usec < b.tv_usec) {
a.tv_usec += 1000000;
a.tv_sec--;
}
a.tv_usec -= b.tv_usec;
return a;
}
long
tomicros(struct timeval x)
{
return x.tv_sec * 1000000ul + x.tv_usec;
}
int
main()
{
cpu_set_t x;
CPU_ZERO(&x);
CPU_SET(0, &x);
/* CPU_SET(1, &x); */
sched_setaffinity(0, sizeof(x), &x);
struct timeval start;
gettimeofday(&start, 0);
pthread_t th[THREADS];
for (int i = 0; i < THREADS; ++i)
pthread_create(&th[i], 0, worker, 0);
for (int i = 0; i < THREADS; ++i)
pthread_join(th[i], 0);
assert(g_chores == THREADS * ITERATIONS);
struct rusage ru;
struct timeval end;
gettimeofday(&end, 0);
getrusage(RUSAGE_SELF, &ru);
printf("%16ld us real\n"
"%16ld us user\n"
"%16ld us sys\n",
tomicros(tub(end, start)),
tomicros(ru.ru_utime),
tomicros(ru.ru_stime));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment