Skip to content

Instantly share code, notes, and snippets.

@davidlohr
Created April 23, 2026 20:03
Show Gist options
  • Select an option

  • Save davidlohr/01ad349da27e18dc6bff4d6481cd3c54 to your computer and use it in GitHub Desktop.

Select an option

Save davidlohr/01ad349da27e18dc6bff4d6481cd3c54 to your computer and use it in GitHub Desktop.
BPF script to profile io_uring workloads
#!/usr/bin/env bpftrace
/*
* io_uring workload insight.
*
* Gives a complete picture of io_uring activity during a workload:
* - ring lifecycle (create, cqring wait)
* - submission & completion volume and per-opcode breakdown
* - per-request latency (submit -> complete) per opcode
* - registration-path activity (buffers/files/etc.), with stacks
* - async-work offload and io_uring_enter traffic
* - fixed-buffer hot path (io_import_reg_buf) count
*
* Usage (as root):
* ./io_uring_register_insight.bt 2>&1 | tee io_uring.log
* (start workload in another terminal)
* (Ctrl-C when done)
*
* Per-second lines show spike behaviour; end-of-run summary shows totals,
* latency histograms, call-stack attribution, and opcode distributions.
*
*/
BEGIN
{
printf("Tracing io_uring. Ctrl-C to stop.\n");
printf("per-second: submit / complete / async-off | enter | per-buf / batch-reg\n\n");
}
tracepoint:io_uring:io_uring_create
{
@ring_create_total++;
printf("[lifecycle] ring_create pid=%d comm=%s fd=%d ctx=%p sq=%u cq=%u flags=0x%x\n",
pid, comm, args->fd, args->ctx,
args->sq_entries, args->cq_entries, args->flags);
}
tracepoint:io_uring:io_uring_cqring_wait
{
@cqring_wait_total++;
@cqring_wait_this_sec++;
@cqring_wait_min_events = hist(args->min_events);
}
tracepoint:io_uring:io_uring_submit_req
{
@submit_total++;
@submit_this_sec++;
@submit_opcode[args->opcode] = count();
@submit_by_pid[pid, comm] = count();
/* track per-request for submit->complete latency */
@req_submit_ns[args->req] = nsecs;
@req_submit_op[args->req] = args->opcode;
if (args->sq_thread) {
@submit_sqpoll++;
}
}
tracepoint:io_uring:io_uring_complete
{
@complete_total++;
@complete_this_sec++;
if (args->res >= 0) {
@complete_ok++;
} else {
@complete_err++;
}
if (@req_submit_ns[args->req]) {
$lat_ns = nsecs - @req_submit_ns[args->req];
$op = @req_submit_op[args->req];
@op_lat_us[$op] = hist($lat_ns / 1000);
delete(@req_submit_ns[args->req]);
delete(@req_submit_op[args->req]);
}
}
tracepoint:io_uring:io_uring_queue_async_work
{
@async_total++;
@async_this_sec++;
@async_opcode[args->opcode] = count();
}
/* io_uring_enter is the main syscall that pushes submissions + waits for CQEs */
tracepoint:syscalls:sys_enter_io_uring_enter
{
@enter_total++;
@enter_this_sec++;
@enter_by_pid[pid, comm] = count();
}
kprobe:io_import_reg_buf
{
@hot_import_reg_buf++;
}
tracepoint:io_uring:io_uring_register
{
@register_opcode[args->opcode] = count();
@register_total++;
}
kprobe:io_sqe_buffers_register
{
@batch_reg_total++;
@batch_reg_this_sec++;
@batch_reg_stacks[kstack(6)] = count();
@batch_reg_start[tid] = nsecs;
}
kretprobe:io_sqe_buffers_register
/@batch_reg_start[tid]/
{
@batch_reg_lat_us = hist((nsecs - @batch_reg_start[tid]) / 1000);
delete(@batch_reg_start[tid]);
}
kprobe:io_sqe_buffers_unregister
{
@batch_unreg_total++;
}
kprobe:io_sqe_buffer_register
{
@per_buf_total++;
@per_buf_this_sec++;
@per_buf_start[tid] = nsecs;
}
kretprobe:io_sqe_buffer_register
/@per_buf_start[tid]/
{
@per_buf_lat_us = hist((nsecs - @per_buf_start[tid]) / 1000);
delete(@per_buf_start[tid]);
}
interval:s:1
{
printf("[%3lld] sub=%-7lu cmp=%-7lu async=%-4lu ent=%-5lu wait=%-4lu | pbuf=%-4lu breg=%-3lu\n",
elapsed / 1000000000,
(uint64)@submit_this_sec,
(uint64)@complete_this_sec,
(uint64)@async_this_sec,
(uint64)@enter_this_sec,
(uint64)@cqring_wait_this_sec,
(uint64)@per_buf_this_sec,
(uint64)@batch_reg_this_sec);
@submit_this_sec = 0;
@complete_this_sec = 0;
@async_this_sec = 0;
@enter_this_sec = 0;
@cqring_wait_this_sec = 0;
@per_buf_this_sec = 0;
@batch_reg_this_sec = 0;
}
END
{
printf("\n=== Ring lifecycle ===\n");
printf("io_uring_create events: %lu\n", (uint64)@ring_create_total);
printf("io_uring_cqring_wait events: %lu\n", (uint64)@cqring_wait_total);
printf("\n=== SQE / CQE volume ===\n");
printf("io_uring_submit_req (total SQEs): %lu\n", (uint64)@submit_total);
printf(" of which submitted by sqpoll: %lu\n", (uint64)@submit_sqpoll);
printf("io_uring_complete (total CQEs): %lu\n", (uint64)@complete_total);
printf("io_uring_queue_async_work: %lu\n", (uint64)@async_total);
printf("sys_enter_io_uring_enter calls: %lu\n", (uint64)@enter_total);
printf("\n=== Fixed-buffer hot path (per-I/O) ===\n");
printf("io_import_reg_buf calls: %lu\n", (uint64)@hot_import_reg_buf);
printf("\n=== Registration ===\n");
printf("IORING_REGISTER_* syscalls total: %lu\n", (uint64)@register_total);
printf("io_sqe_buffers_register (batch): %lu\n", (uint64)@batch_reg_total);
printf("io_sqe_buffers_unregister: %lu\n", (uint64)@batch_unreg_total);
printf("io_sqe_buffer_register (per-iovec): %lu\n", (uint64)@per_buf_total);
printf("\n=== SQE opcode distribution (submit_req) ===\n");
print(@submit_opcode);
printf("\n=== Async-offloaded opcode distribution ===\n");
print(@async_opcode);
printf("\n=== Completion result sign breakdown ===\n");
printf("ok (res >= 0): %lu\n", (uint64)@complete_ok);
printf("err (res < 0): %lu\n", (uint64)@complete_err);
printf("\n=== Submit/complete per (pid, comm) ===\n");
print(@submit_by_pid);
printf("\n=== io_uring_enter per (pid, comm) ===\n");
print(@enter_by_pid);
printf("\n=== Per-opcode submit->complete latency (us) ===\n");
print(@op_lat_us);
printf("\n=== cqring_wait min_events distribution ===\n");
print(@cqring_wait_min_events);
printf("\n=== IORING_REGISTER_* opcodes seen ===\n");
print(@register_opcode);
printf("\n=== io_sqe_buffer_register latency (us) ===\n");
print(@per_buf_lat_us);
printf("\n=== io_sqe_buffers_register latency (us) ===\n");
print(@batch_reg_lat_us);
printf("\n=== io_sqe_buffers_register call stacks ===\n");
print(@batch_reg_stacks);
/* drop internal state */
clear(@req_submit_ns);
clear(@req_submit_op);
clear(@per_buf_start);
clear(@batch_reg_start);
clear(@submit_this_sec);
clear(@complete_this_sec);
clear(@async_this_sec);
clear(@enter_this_sec);
clear(@cqring_wait_this_sec);
clear(@per_buf_this_sec);
clear(@batch_reg_this_sec);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment