Skip to content

Instantly share code, notes, and snippets.

@kris-jusiak
Last active June 29, 2025 23:05
Show Gist options
  • Save kris-jusiak/f0f1af971069b53cd646c313a46a5d65 to your computer and use it in GitHub Desktop.
Save kris-jusiak/f0f1af971069b53cd646c313a46a5d65 to your computer and use it in GitHub Desktop.
${CXX} ${CXXFLAGS} -std=c++2c -g -O3 -I.. -mbmi2 -fprebuilt-module-path=.
  $*.cpp perf.pcm -lLLVM-18 -lipt -o $*.out && ./$*.out
import perf;
int main() {
perf::log(perf::info::spec{{
{"time", std::chrono::system_clock::now()},
{"perf", perf::info::version()},
{"sys", perf::info::sys::triple()},
{"cxx", std::format("{}-{}", perf::info::compiler::name(), perf::info::compiler::version())},
{"cpu", std::format("{} ({}:{})", perf::info::cpu::name(), perf::info::cpu::code_name(), perf::info::cpu::version())},
{"iL1", perf::info::memory::icache()[perf::info::memory::level::L1]},
{"dL1", perf::info::memory::dcache()[perf::info::memory::level::L1]},
}});
perf::runner bench{
perf::bench::latency{.before = perf::named("align2", []{ perf::code::align<std::align_val_t{2}>(); })},
perf::bench::latency{.before = perf::named("align4", []{ perf::code::align<std::align_val_t{4}>(); })},
perf::bench::latency{.before = perf::named("align8", []{ perf::code::align<std::align_val_t{8}>(); })},
perf::bench::latency{.before = perf::named("align16", []{ perf::code::align<std::align_val_t{16}>(); })},
perf::bench::latency{.before = perf::named("align64", []{ perf::code::align<std::align_val_t{64}>(); })},
perf::bench::latency{.before = perf::named("align128", []{ perf::code::align<std::align_val_t{128}>(); })},
perf::bench::latency{.before = perf::named("align256", []{ perf::code::align<std::align_val_t{256}>(); })},
};
static auto fizz_buzz = [](int n) {
if (n % 15 == 0) {
return "FizzBuzz";
} else if (n % 3 == 0) {
return "Fizz";
} else if (n % 5 == 0) {
return "Buzz";
} else {
return "Unknown";
}
};
bench(fizz_buzz, 3);
bench(fizz_buzz, 5);
bench(fizz_buzz, perf::data::unpredictable<int>{});
perf::report(bench[perf::time::steady_clock/op, op]);
perf::plot::bar(bench[perf::time::steady_clock]);
perf::plot::hist(bench[perf::time::steady_clock]);
perf::plot::ecdf(bench[perf::time::steady_clock]);
}
name info                                                    
---- ------------------------------------------------------- 
time 2025-06-25 01:24:11.118005352 
sys  x86_64-pc-linux-gnu 
cxx  clang-21.0.0 
cpu  12th Gen Intel(R) Core(TM) i7-12650 (alderlake:6.154.3) 
iL1  32Kb (64b) 
dL1  48Kb/12 (64b) 
perf 0.0.0 

benchmark                     [1]           [2]
-----------------  --------------  ------------  
add(1,2)/latency   (12.23x)  1.06  (3.00x) 1.00  
sub(1,2)/latency   (13.02x)  1.00  (3.00x) 1.00  
mult(1,2)/latency   (4.32x)  3.01  (3.00x) 1.00  
div(1,2)/latency    (1.00x) 13.00  (1.00x) 3.00  

[1] median((stat.cycles/bench.operations)) 
[2] median((stat.instructions/bench.operations)) 

benchmark                   [1]           [2]
-----------------  ------------  ------------  
add(1,2)/latency   (1.06x) 0.94  (3.00x) 1.00  
sub(1,2)/latency   (1.00x) 1.00  (3.00x) 1.00  
mult(1,2)/latency  (3.02x) 0.33  (3.00x) 1.00  
div(1,2)/latency   (4.34x) 0.23  (1.00x) 3.00  

[1] median((stat.instructions/stat.cycles)) 
[2] median((stat.instructions/bench.operations)) 
benchmark                                           [1]            [2]          [3]
-----------------------------------------  ------------  -------------  -----------  
fizz_buzz(3)/latency/align2                (3.72x) 0.81  (1.00x) 76728  (1.00x) 100  
fizz_buzz(3)/latency/align4                (3.70x) 0.82  (1.03x) 74205  (1.00x) 100  
fizz_buzz(3)/latency/align8                (3.70x) 0.82  (1.05x) 73126  (1.00x) 100  
fizz_buzz(3)/latency/align16               (3.70x) 0.82  (1.01x) 75631  (1.00x) 100  
fizz_buzz(3)/latency/align64               (4.73x) 0.64  (1.06x) 72616  (1.00x) 100  
fizz_buzz(3)/latency/align128              (4.26x) 0.71  (1.09x) 70229  (1.00x) 100  
fizz_buzz(3)/latency/align256              (5.56x) 0.54  (1.04x) 73784  (1.00x) 100  
fizz_buzz(5)/latency/align2                (1.90x) 1.59  (1.11x) 69041  (1.00x) 100  
fizz_buzz(5)/latency/align4                (1.90x) 1.60  (1.11x) 69070  (1.00x) 100  
fizz_buzz(5)/latency/align8                (1.89x) 1.60  (1.11x) 69027  (1.00x) 100  
fizz_buzz(5)/latency/align16               (1.90x) 1.59  (1.11x) 69194  (1.00x) 100  
fizz_buzz(5)/latency/align64               (2.27x) 1.33  (1.12x) 68577  (1.00x) 100  
fizz_buzz(5)/latency/align128              (2.04x) 1.48  (1.12x) 68572  (1.00x) 100  
fizz_buzz(5)/latency/align256              (1.89x) 1.60  (1.12x) 68544  (1.00x) 100  
fizz_buzz(unpredictable)/latency/align2    (1.03x) 2.93  (1.30x) 58875  (1.00x) 100  
fizz_buzz(unpredictable)/latency/align4    (1.02x) 2.97  (1.26x) 60812  (1.00x) 100  
fizz_buzz(unpredictable)/latency/align8    (1.01x) 3.00  (1.27x) 60642  (1.00x) 100  
fizz_buzz(unpredictable)/latency/align16   (1.01x) 2.98  (1.33x) 57733  (1.00x) 100  
fizz_buzz(unpredictable)/latency/align64   (1.09x) 2.76  (1.29x) 59523  (1.00x) 100  
fizz_buzz(unpredictable)/latency/align128  (1.06x) 2.86  (1.34x) 57474  (1.00x) 100  
fizz_buzz(unpredictable)/latency/align256  (1.00x) 3.03  (1.30x) 59143  (1.00x) 100  

[1] median((time.steady_clock/bench.operations)) 
[2] bench.operations 
[3] bench.samples 
add(1,2)/latency:         sub(1,2)/latency:         mult(1,2)/latency:        div(1,2)/latency:
 [0] [1]                   [0] [1]                   [0] [1]                   [0] [1]                  
 -- ------------           -- ------------           -- -------------          -- ------------          
 1. add esi, edi           1. sub edi, esi           1. imul esi, edi          1. mov eax, edi          
                                                                               2. cdq                   
                                                                               3. idiv esi              
                                                                                                        
 [0] index                 [0] index                 [0] index                 [0] index                
 [1] mc.assembly # intel   [1] mc.assembly # intel   [1] mc.assembly # intel   [1] mc.assembly # intel  
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment