Last active
May 13, 2025 01:32
-
-
Save davidlohr/91f737cc2e3da12a5053bdaf62fc12b7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bpftrace | |
/* | |
* extfragsnoop Trace events that induce system external memory fragmentation. | |
* For Linux, uses bpftrace and eBPF. | |
* | |
* This traces when a fallback event occurs, particularly mixed pageblocks that | |
* occur as a consequence of pollution upon memory pressure. | |
* | |
* Unmovable memory is the enemy of a de-fragmenting process (compaction), so | |
* gather such allocations as well as situations when movable memory becomes | |
* unmovable: such as gup longterm pinning. | |
* | |
* USAGE: ./extfragsnoop.bt | |
* | |
* Copyright 2025 Davidlohr Bueso. | |
*/ | |
#ifndef BPFTRACE_HAVE_BTF | |
#include <linux/hugetlb.h> | |
#include <linux/gfp.h> | |
#include <linux/mmzone.h> | |
#endif | |
BEGIN | |
{ | |
printf("Tracing events that produce external memory fragmentation... Hit Ctrl-C to end.\n"); | |
printf("%-8s %-8s %-16s %16s %4s %16s %4s\n", "TIME", "PID", "COMM", "ALLOC-MT", "ORDER", "FALLBACK-MT", "ORDER"); | |
// see: /sys/kernel/debug/tracing/events/kmem/mm_page_alloc{_extfrag}/format | |
@migratetype[0] = "unmovable"; | |
@migratetype[1] = "movable"; | |
@migratetype[2] = "reclaimable"; | |
@migratetype[3] = "pcptypes"; | |
@migratetype[4] = "highatomic"; | |
@migratetype[5] = "cma"; | |
@migratetype[6] = "isolate"; | |
} | |
tracepoint:kmem:mm_page_alloc | |
{ | |
/* | |
* 4Kb is irrelevant.- as such, mm_page_alloc_zone_locked is not traced | |
* What is really more interesting in this context is filtering | |
* orders which are considered costly (PAGE_ALLOC_COSTLY_ORDER=3), | |
* but we can still deduce that from just ensuring larger than order-0. | |
*/ | |
if (args->order > 0) { | |
@alloc_highorder[@migratetype[args->migratetype]] = lhist(args->order, 0, 11, 1); | |
if (args->migratetype != 1) { | |
@alloc_highorder_unmovable[@migratetype[args->migratetype]] = hist(args->order); | |
} | |
} | |
} | |
kprobe:__get_user_pages | |
{ | |
$foll_longterm = 0x100; /* pin lifetime is indefinite */ | |
if (arg2 > 0 /* nr_pages */ && | |
(arg3 /* gup_flags */ & $foll_longterm)) { | |
@gup_longterm_pin[comm, arg2] = count(); | |
} | |
} | |
tracepoint:kmem:mm_page_alloc_extfrag | |
{ | |
$alloc_mt = args->alloc_migratetype; | |
$alloc_order = args->alloc_order; | |
$fallback_mt = args->fallback_migratetype; | |
$fallback_order = args->fallback_order; | |
$pageblock_order = 9; // 2mb hugepage size on x86-64 | |
if ($fallback_order < $pageblock_order) { | |
@fragment_pollute[@migratetype[$alloc_mt], @migratetype[$fallback_mt]] = lhist($fallback_order, 0, 11, 1); | |
time("%H:%M:%S "); | |
printf("%-8d %-16s ", pid, comm); | |
printf("%16s %4d %16s %4d\n", @migratetype[$alloc_mt], $alloc_order, @migratetype[$fallback_mt], $fallback_order); | |
} else { | |
@nofragment_claim[@migratetype[$alloc_mt], @migratetype[$fallback_mt]] = lhist($fallback_order, 0, 11, 1); | |
} | |
} | |
END | |
{ | |
clear(@migratetype); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment