Skip to content

Instantly share code, notes, and snippets.

@davidlohr
Last active May 13, 2025 01:32
Show Gist options
  • Save davidlohr/91f737cc2e3da12a5053bdaf62fc12b7 to your computer and use it in GitHub Desktop.
Save davidlohr/91f737cc2e3da12a5053bdaf62fc12b7 to your computer and use it in GitHub Desktop.
#!/usr/bin/env bpftrace
/*
* extfragsnoop Trace events that induce system external memory fragmentation.
* For Linux, uses bpftrace and eBPF.
*
* This traces when a fallback event occurs, particularly mixed pageblocks that
* occur as a consequence of pollution upon memory pressure.
*
* Unmovable memory is the enemy of a de-fragmenting process (compaction), so
* gather such allocations as well as situations when movable memory becomes
* unmovable: such as gup longterm pinning.
*
* USAGE: ./extfragsnoop.bt
*
* Copyright 2025 Davidlohr Bueso.
*/
#ifndef BPFTRACE_HAVE_BTF
#include <linux/hugetlb.h>
#include <linux/gfp.h>
#include <linux/mmzone.h>
#endif
BEGIN
{
printf("Tracing events that produce external memory fragmentation... Hit Ctrl-C to end.\n");
printf("%-8s %-8s %-16s %16s %4s %16s %4s\n", "TIME", "PID", "COMM", "ALLOC-MT", "ORDER", "FALLBACK-MT", "ORDER");
// see: /sys/kernel/debug/tracing/events/kmem/mm_page_alloc{_extfrag}/format
@migratetype[0] = "unmovable";
@migratetype[1] = "movable";
@migratetype[2] = "reclaimable";
@migratetype[3] = "pcptypes";
@migratetype[4] = "highatomic";
@migratetype[5] = "cma";
@migratetype[6] = "isolate";
}
tracepoint:kmem:mm_page_alloc
{
/*
* 4Kb is irrelevant.- as such, mm_page_alloc_zone_locked is not traced
* What is really more interesting in this context is filtering
* orders which are considered costly (PAGE_ALLOC_COSTLY_ORDER=3),
* but we can still deduce that from just ensuring larger than order-0.
*/
if (args->order > 0) {
@alloc_highorder[@migratetype[args->migratetype]] = lhist(args->order, 0, 11, 1);
if (args->migratetype != 1) {
@alloc_highorder_unmovable[@migratetype[args->migratetype]] = hist(args->order);
}
}
}
kprobe:__get_user_pages
{
$foll_longterm = 0x100; /* pin lifetime is indefinite */
if (arg2 > 0 /* nr_pages */ &&
(arg3 /* gup_flags */ & $foll_longterm)) {
@gup_longterm_pin[comm, arg2] = count();
}
}
tracepoint:kmem:mm_page_alloc_extfrag
{
$alloc_mt = args->alloc_migratetype;
$alloc_order = args->alloc_order;
$fallback_mt = args->fallback_migratetype;
$fallback_order = args->fallback_order;
$pageblock_order = 9; // 2mb hugepage size on x86-64
if ($fallback_order < $pageblock_order) {
@fragment_pollute[@migratetype[$alloc_mt], @migratetype[$fallback_mt]] = lhist($fallback_order, 0, 11, 1);
time("%H:%M:%S ");
printf("%-8d %-16s ", pid, comm);
printf("%16s %4d %16s %4d\n", @migratetype[$alloc_mt], $alloc_order, @migratetype[$fallback_mt], $fallback_order);
} else {
@nofragment_claim[@migratetype[$alloc_mt], @migratetype[$fallback_mt]] = lhist($fallback_order, 0, 11, 1);
}
}
END
{
clear(@migratetype);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment