Mic92 · March 12, 2026 16:38
diff --git a/bench-copy-README.md b/bench-copy-README.md
diff --git a/bench-copy.py b/bench-copy.py
 #!/usr/bin/env python3
 """Benchmark: nix copy from cache.nixos.org to a local chroot store.

 Measures the real-world nix copy path: download NARs from a binary cache
 and write them to a fresh local store.  Compares base (sequential
 addMultipleToStore) vs new (parallel addMultipleToStore).

 Usage:
    python3 bench-copy.py [--iters 5] [--store-path /nix/store/...-firefox-...]
    python3 bench-copy.py [--iters 5]  # defaults to nixpkgs#firefox

 Requires result-base/bin/nix and result-new/bin/nix to exist.
 """

 from __future__ import annotations

 import argparse
 import json
 import os
 import shutil
 import statistics
 import subprocess
 import sys
 import time
 from pathlib import Path

 SCRATCH = Path(os.environ.get("BENCH_COPY_DIR", "/dev/shm/bench-copy"))


 def welch_t_test(a: list[float], b: list[float]) -> float:
    """Two-sided Welch's t-test. Returns p-value.

    Uses the Welch-Satterthwaite approximation for degrees of freedom
    and a simple t-distribution CDF via the regularized incomplete beta
    function (continued fraction).
    """
    import math

    n1, n2 = len(a), len(b)
    m1, m2 = statistics.mean(a), statistics.mean(b)
    v1 = statistics.variance(a) if n1 > 1 else 0.0
    v2 = statistics.variance(b) if n2 > 1 else 0.0

    se = math.sqrt(v1 / n1 + v2 / n2)
    if se == 0:
        return 1.0

    t_stat = (m1 - m2) / se

    # Welch-Satterthwaite degrees of freedom
    num = (v1 / n1 + v2 / n2) ** 2
    denom = (v1 / n1) ** 2 / (n1 - 1) + (v2 / n2) ** 2 / (n2 - 1)
    if denom == 0:
        return 1.0
    df = num / denom

    # Two-sided p-value from t-distribution using regularized incomplete beta
    x = df / (df + t_stat**2)
    p = _betai(df / 2.0, 0.5, x)
    return p


 def _betai(a: float, b: float, x: float) -> float:
    """Regularized incomplete beta function I_x(a, b) via continued fraction."""
    import math

    if x < 0 or x > 1:
        return 0.0
    if x == 0 or x == 1:
        return x

    ln_beta = math.lgamma(a) + math.lgamma(b) - math.lgamma(a + b)
    front = math.exp(math.log(x) * a + math.log(1 - x) * b - ln_beta)

    # Lentz's continued fraction for I_x(a, b)
    if x < (a + 1) / (a + b + 2):
        return front * _betacf(a, b, x) / a
    else:
        return 1 - front * _betacf(b, a, 1 - x) / b


 def _betacf(a: float, b: float, x: float) -> float:
    """Continued fraction for incomplete beta function."""
    max_iter = 200
    eps = 1e-14
    qab = a + b
    qap = a + 1
    qam = a - 1
    c = 1.0
    d = max(1 - qab * x / qap, eps)
    d = 1.0 / d
    h = d

    for m in range(1, max_iter + 1):
        m2 = 2 * m
        # Even step
        aa = m * (b - m) * x / ((qam + m2) * (a + m2))
        d = max(abs(1 + aa * d), eps)
        c = max(abs(1 + aa / c), eps)
        d = 1.0 / d
        h *= d * c

        # Odd step
        aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2))
        d = max(abs(1 + aa * d), eps)
        c = max(abs(1 + aa / c), eps)
        d = 1.0 / d
        delta = d * c
        h *= delta

        if abs(delta - 1) < eps:
            break

    return h


 PERF = "nix run nixpkgs#linuxPackages.perf --".split()


 def run(cmd: list[str]) -> subprocess.CompletedProcess[str]:
    return subprocess.run(cmd, text=True, capture_output=True, check=True)


 def resolve_bin(name: str) -> Path:
    p = Path(name).resolve() / "bin" / "nix"
    if not p.is_file():
        sys.exit(f"ERROR: {p} not found")
    return p


 def resolve_store_path(store_path: str | None) -> tuple[str, int]:
    """Return (store_path, closure_size)."""
    if store_path is None:
        print("Resolving nixpkgs#firefox store path...")
        result = run(
            ["nix", "build", "nixpkgs#firefox", "--no-link", "--print-out-paths"]
        )
        store_path = result.stdout.strip().splitlines()[-1]
    closure = run(["nix-store", "-qR", store_path])
    size = len(closure.stdout.strip().splitlines())
    return store_path, size


 def fresh_dst(label: str) -> str:
    """Create a fresh empty chroot store dir. Removes any previous one."""
    d = SCRATCH / f"dest-{label}"
    if d.exists():
        subprocess.run(["chmod", "-R", "u+w", str(d)], check=False)
        shutil.rmtree(d)
    return str(d)


 def time_copy(
    nix_bin: Path, store_path: str, label: str, from_cache: str | None = None
 ) -> float:
    """Copy closure to a fresh chroot store, return elapsed seconds."""
    dst = fresh_dst(label)
    cmd = [str(nix_bin), "copy", "--to", dst, "--no-check-sigs", store_path]
    if from_cache:
        cmd = [
            str(nix_bin),
            "copy",
            "--from",
            from_cache,
            "--to",
            dst,
            "--no-check-sigs",
            store_path,
        ]
    t0 = time.monotonic()
    subprocess.run(cmd, check=True, capture_output=True)
    elapsed = time.monotonic() - t0
    # Clean up — firefox closure is ~1.6GB, tmpfs is ~3.9GB
    fresh_dst(label)
    return elapsed


 def perf_stat(
    nix_bin: Path, store_path: str, label: str, from_cache: str | None = None
 ) -> str:
    """Run perf stat on nix copy, return perf output."""
    dst = fresh_dst(f"perf-stat-{label}")
    cmd = [str(nix_bin), "copy", "--to", dst, "--no-check-sigs", store_path]
    if from_cache:
        cmd = [
            str(nix_bin),
            "copy",
            "--from",
            from_cache,
            "--to",
            dst,
            "--no-check-sigs",
            store_path,
        ]
    result = subprocess.run(
        [
            *PERF,
            "stat",
            "-e",
            "task-clock,context-switches,cpu-migrations,page-faults",
            *cmd,
        ],
        capture_output=True,
        text=True,
    )
    fresh_dst(f"perf-stat-{label}")
    return result.stderr


 def perf_record(
    nix_bin: Path, store_path: str, label: str, from_cache: str | None = None
 ) -> Path:
    """Run perf record on nix copy."""
    dst = fresh_dst(f"perf-record-{label}")
    perf_dir = Path.home() / ".claude" / "outputs" / "bench-copy"
    perf_dir.mkdir(parents=True, exist_ok=True)
    data_path = perf_dir / f"perf-{label}.data"
    cmd = [str(nix_bin), "copy", "--to", dst, "--no-check-sigs", store_path]
    if from_cache:
        cmd = [
            str(nix_bin),
            "copy",
            "--from",
            from_cache,
            "--to",
            dst,
            "--no-check-sigs",
            store_path,
        ]
    subprocess.run(
        [
            *PERF,
            "record",
            "-g",
            "--call-graph",
            "dwarf",
            "-o",
            str(data_path),
            *cmd,
        ],
        capture_output=True,
    )
    fresh_dst(f"perf-record-{label}")
    return data_path


 def print_stats(times: list[float]) -> None:
    med = statistics.median(times)
    mean = statistics.mean(times)
    mn = min(times)
    mx = max(times)
    stdev = statistics.stdev(times) if len(times) > 1 else 0
    print(
        f"  min={mn:.3f}s  median={med:.3f}s  mean={mean:.3f}s"
        f"  max={mx:.3f}s  stdev={stdev:.3f}s"
    )


 def main() -> None:
    parser = argparse.ArgumentParser(description="Benchmark nix copy (firefox)")
    parser.add_argument("--iters", type=int, default=5, help="timing iterations")
    parser.add_argument(
        "--store-path",
        type=str,
        default=None,
        help="store path to copy (default: nixpkgs#firefox)",
    )
    parser.add_argument(
        "--from-cache",
        type=str,
        nargs="?",
        const="https://cache.nixos.org",
        default=None,
        help="copy from binary cache (default: https://cache.nixos.org)",
    )
    args = parser.parse_args()

    SCRATCH.mkdir(parents=True, exist_ok=True)

    nix_base = resolve_bin("result-base")
    nix_new = resolve_bin("result-new")
    print(f"base: {nix_base}")
    print(f" new: {nix_new}")
    source = args.from_cache if args.from_cache else "local store"
    print(f"source: {source}")
    print()

    store_path, closure_size = resolve_store_path(args.store_path)
    print(f"closure: {closure_size} paths, root={store_path}")
    print()

    # --- Warmup ---
    print("--- Warmup run ---")
    time_copy(nix_base, store_path, "warmup", from_cache=args.from_cache)
    print()

    # --- Timing ---
    results: dict[str, list[float]] = {}
    for label, nix_bin in [("base", nix_base), ("new", nix_new)]:
        print(f"--- Timing: {label} ({args.iters} iterations) ---")
        times: list[float] = []
        for i in range(1, args.iters + 1):
            elapsed = time_copy(nix_bin, store_path, label, from_cache=args.from_cache)
            times.append(elapsed)
            print(f"  run {i}: {elapsed:.3f}s")
        print_stats(times)
        results[label] = times
        print()

    # --- Summary ---
    base_med = statistics.median(results["base"])
    new_med = statistics.median(results["new"])
    speedup = base_med / new_med if new_med > 0 else float("inf")
    pct = (1 - new_med / base_med) * 100 if base_med > 0 else 0
    print(f"=== Speedup: {speedup:.2f}x ({pct:.1f}% faster, median) ===")
    print()

    # --- perf stat ---
    for label, nix_bin in [("base", nix_base), ("new", nix_new)]:
        print(f"=== perf stat: {label} ===")
        output = perf_stat(nix_bin, store_path, label, from_cache=args.from_cache)
        print(output)

    # --- perf record ---
    for label, nix_bin in [("base", nix_base), ("new", nix_new)]:
        print(f"=== perf record: {label} ===")
        data_path = perf_record(nix_bin, store_path, label, from_cache=args.from_cache)
        print(f"  {data_path}")
        print(
            f"  View with: nix run nixpkgs#linuxPackages.perf -- report -i {data_path}"
        )
        print()

    # --- Significance test (Welch's t-test, no scipy needed) ---
    if len(results.get("base", [])) >= 3 and len(results.get("new", [])) >= 3:
        base_t = results["base"]
        new_t = results["new"]
        p = welch_t_test(base_t, new_t)
        print(f"=== Welch's t-test: p={p:.6f} ===")
        if p < 0.05:
            print("  Result IS statistically significant (p < 0.05)")
        else:
            print("  Result is NOT statistically significant (p >= 0.05)")
        print()

    # --- Save raw data ---
    raw_path = SCRATCH / "results.json"
    raw_path.write_text(json.dumps(results, indent=2))
    print(f"Raw data: {raw_path}")

    print(f"Done. Results in {SCRATCH}/")


 if __name__ == "__main__":
    main()
Scenario	Base (mean±sd)	New (mean±sd)	Speedup	p-value	Significant?
local→disk	1.413±0.026s	1.324±0.044s	+6.3%	9.3×10⁻⁹	YES
local→tmpfs	0.909±0.018s	0.949±0.027s	−4.2%	5×10⁻⁶	regression
cache.nixos.org→tmpfs	10.731±1.130s	10.148±1.063s	+5.4%	0.10	NO
Scenario	Base (mean±sd)	New (mean±sd)	Speedup	p-value	Significant?
local→disk	1.217±0.042s	1.113±0.049s	+8.7%	<10⁻⁶	YES
local→tmpfs	1.229±0.054s	1.111±0.048s	+10.0%	<10⁻⁶	YES
cache.nixos.org→tmpfs (xz)	5.190±0.109s	4.349±0.089s	+16.2%	<10⁻⁶	YES
cache.numtide.com→tmpfs (zstd)	2.096±0.485s	2.210±0.551s	−5.4%	0.49	NO
Cache	Compression	Base median	New median
cache.nixos.org	xz	5.190s	4.349s
cache.numtide.com	zstd	1.895s	2.025s
	#!/usr/bin/env python3
	"""Benchmark: nix copy from cache.nixos.org to a local chroot store.

	Measures the real-world nix copy path: download NARs from a binary cache
	and write them to a fresh local store. Compares base (sequential
	addMultipleToStore) vs new (parallel addMultipleToStore).

	Usage:
	python3 bench-copy.py [--iters 5] [--store-path /nix/store/...-firefox-...]
	python3 bench-copy.py [--iters 5] # defaults to nixpkgs#firefox

	Requires result-base/bin/nix and result-new/bin/nix to exist.
	"""

	from __future__ import annotations

	import argparse
	import json
	import os
	import shutil
	import statistics
	import subprocess
	import sys
	import time
	from pathlib import Path

	SCRATCH = Path(os.environ.get("BENCH_COPY_DIR", "/dev/shm/bench-copy"))


	def welch_t_test(a: list[float], b: list[float]) -> float:
	"""Two-sided Welch's t-test. Returns p-value.

	Uses the Welch-Satterthwaite approximation for degrees of freedom
	and a simple t-distribution CDF via the regularized incomplete beta
	function (continued fraction).
	"""
	import math

	n1, n2 = len(a), len(b)
	m1, m2 = statistics.mean(a), statistics.mean(b)
	v1 = statistics.variance(a) if n1 > 1 else 0.0
	v2 = statistics.variance(b) if n2 > 1 else 0.0

	se = math.sqrt(v1 / n1 + v2 / n2)
	if se == 0:
	return 1.0

	t_stat = (m1 - m2) / se

	# Welch-Satterthwaite degrees of freedom
	num = (v1 / n1 + v2 / n2) ** 2
	denom = (v1 / n1) 2 / (n1 - 1) + (v2 / n2) 2 / (n2 - 1)
	if denom == 0:
	return 1.0
	df = num / denom

	# Two-sided p-value from t-distribution using regularized incomplete beta
	x = df / (df + t_stat**2)
	p = _betai(df / 2.0, 0.5, x)
	return p


	def _betai(a: float, b: float, x: float) -> float:
	"""Regularized incomplete beta function I_x(a, b) via continued fraction."""
	import math

	if x < 0 or x > 1:
	return 0.0
	if x == 0 or x == 1:
	return x

	ln_beta = math.lgamma(a) + math.lgamma(b) - math.lgamma(a + b)
	front = math.exp(math.log(x) * a + math.log(1 - x) * b - ln_beta)

	# Lentz's continued fraction for I_x(a, b)
	if x < (a + 1) / (a + b + 2):
	return front * _betacf(a, b, x) / a
	else:
	return 1 - front * _betacf(b, a, 1 - x) / b


	def _betacf(a: float, b: float, x: float) -> float:
	"""Continued fraction for incomplete beta function."""
	max_iter = 200
	eps = 1e-14
	qab = a + b
	qap = a + 1
	qam = a - 1
	c = 1.0
	d = max(1 - qab * x / qap, eps)
	d = 1.0 / d
	h = d

	for m in range(1, max_iter + 1):
	m2 = 2 * m
	# Even step
	aa = m * (b - m) * x / ((qam + m2) * (a + m2))
	d = max(abs(1 + aa * d), eps)
	c = max(abs(1 + aa / c), eps)
	d = 1.0 / d
	h = d c

	# Odd step
	aa = -(a + m) * (qab + m) * x / ((a + m2) * (qap + m2))
	d = max(abs(1 + aa * d), eps)
	c = max(abs(1 + aa / c), eps)
	d = 1.0 / d
	delta = d * c
	h *= delta

	if abs(delta - 1) < eps:
	break

	return h


	PERF = "nix run nixpkgs#linuxPackages.perf --".split()


	def run(cmd: list[str]) -> subprocess.CompletedProcess[str]:
	return subprocess.run(cmd, text=True, capture_output=True, check=True)


	def resolve_bin(name: str) -> Path:
	p = Path(name).resolve() / "bin" / "nix"
	if not p.is_file():
	sys.exit(f"ERROR: {p} not found")
	return p


	def resolve_store_path(store_path: str \| None) -> tuple[str, int]:
	"""Return (store_path, closure_size)."""
	if store_path is None:
	print("Resolving nixpkgs#firefox store path...")
	result = run(
	["nix", "build", "nixpkgs#firefox", "--no-link", "--print-out-paths"]
	)
	store_path = result.stdout.strip().splitlines()[-1]
	closure = run(["nix-store", "-qR", store_path])
	size = len(closure.stdout.strip().splitlines())
	return store_path, size


	def fresh_dst(label: str) -> str:
	"""Create a fresh empty chroot store dir. Removes any previous one."""
	d = SCRATCH / f"dest-{label}"
	if d.exists():
	subprocess.run(["chmod", "-R", "u+w", str(d)], check=False)
	shutil.rmtree(d)
	return str(d)


	def time_copy(
	nix_bin: Path, store_path: str, label: str, from_cache: str \| None = None
	) -> float:
	"""Copy closure to a fresh chroot store, return elapsed seconds."""
	dst = fresh_dst(label)
	cmd = [str(nix_bin), "copy", "--to", dst, "--no-check-sigs", store_path]
	if from_cache:
	cmd = [
	str(nix_bin),
	"copy",
	"--from",
	from_cache,
	"--to",
	dst,
	"--no-check-sigs",
	store_path,
	]
	t0 = time.monotonic()
	subprocess.run(cmd, check=True, capture_output=True)
	elapsed = time.monotonic() - t0
	# Clean up — firefox closure is ~1.6GB, tmpfs is ~3.9GB
	fresh_dst(label)
	return elapsed


	def perf_stat(
	nix_bin: Path, store_path: str, label: str, from_cache: str \| None = None
	) -> str:
	"""Run perf stat on nix copy, return perf output."""
	dst = fresh_dst(f"perf-stat-{label}")
	cmd = [str(nix_bin), "copy", "--to", dst, "--no-check-sigs", store_path]
	if from_cache:
	cmd = [
	str(nix_bin),
	"copy",
	"--from",
	from_cache,
	"--to",
	dst,
	"--no-check-sigs",
	store_path,
	]
	result = subprocess.run(
	[
	*PERF,
	"stat",
	"-e",
	"task-clock,context-switches,cpu-migrations,page-faults",
	*cmd,
	],
	capture_output=True,
	text=True,
	)
	fresh_dst(f"perf-stat-{label}")
	return result.stderr


	def perf_record(
	nix_bin: Path, store_path: str, label: str, from_cache: str \| None = None
	) -> Path:
	"""Run perf record on nix copy."""
	dst = fresh_dst(f"perf-record-{label}")
	perf_dir = Path.home() / ".claude" / "outputs" / "bench-copy"
	perf_dir.mkdir(parents=True, exist_ok=True)
	data_path = perf_dir / f"perf-{label}.data"
	cmd = [str(nix_bin), "copy", "--to", dst, "--no-check-sigs", store_path]
	if from_cache:
	cmd = [
	str(nix_bin),
	"copy",
	"--from",
	from_cache,
	"--to",
	dst,
	"--no-check-sigs",
	store_path,
	]
	subprocess.run(
	[
	*PERF,
	"record",
	"-g",
	"--call-graph",
	"dwarf",
	"-o",
	str(data_path),
	*cmd,
	],
	capture_output=True,
	)
	fresh_dst(f"perf-record-{label}")
	return data_path


	def print_stats(times: list[float]) -> None:
	med = statistics.median(times)
	mean = statistics.mean(times)
	mn = min(times)
	mx = max(times)
	stdev = statistics.stdev(times) if len(times) > 1 else 0
	print(
	f" min={mn:.3f}s median={med:.3f}s mean={mean:.3f}s"
	f" max={mx:.3f}s stdev={stdev:.3f}s"
	)


	def main() -> None:
	parser = argparse.ArgumentParser(description="Benchmark nix copy (firefox)")
	parser.add_argument("--iters", type=int, default=5, help="timing iterations")
	parser.add_argument(
	"--store-path",
	type=str,
	default=None,
	help="store path to copy (default: nixpkgs#firefox)",
	)
	parser.add_argument(
	"--from-cache",
	type=str,
	nargs="?",
	const="https://cache.nixos.org",
	default=None,
	help="copy from binary cache (default: https://cache.nixos.org)",
	)
	args = parser.parse_args()

	SCRATCH.mkdir(parents=True, exist_ok=True)

	nix_base = resolve_bin("result-base")
	nix_new = resolve_bin("result-new")
	print(f"base: {nix_base}")
	print(f" new: {nix_new}")
	source = args.from_cache if args.from_cache else "local store"
	print(f"source: {source}")
	print()

	store_path, closure_size = resolve_store_path(args.store_path)
	print(f"closure: {closure_size} paths, root={store_path}")
	print()

	# --- Warmup ---
	print("--- Warmup run ---")
	time_copy(nix_base, store_path, "warmup", from_cache=args.from_cache)
	print()

	# --- Timing ---
	results: dict[str, list[float]] = {}
	for label, nix_bin in [("base", nix_base), ("new", nix_new)]:
	print(f"--- Timing: {label} ({args.iters} iterations) ---")
	times: list[float] = []
	for i in range(1, args.iters + 1):
	elapsed = time_copy(nix_bin, store_path, label, from_cache=args.from_cache)
	times.append(elapsed)
	print(f" run {i}: {elapsed:.3f}s")
	print_stats(times)
	results[label] = times
	print()

	# --- Summary ---
	base_med = statistics.median(results["base"])
	new_med = statistics.median(results["new"])
	speedup = base_med / new_med if new_med > 0 else float("inf")
	pct = (1 - new_med / base_med) * 100 if base_med > 0 else 0
	print(f"=== Speedup: {speedup:.2f}x ({pct:.1f}% faster, median) ===")
	print()

	# --- perf stat ---
	for label, nix_bin in [("base", nix_base), ("new", nix_new)]:
	print(f"=== perf stat: {label} ===")
	output = perf_stat(nix_bin, store_path, label, from_cache=args.from_cache)
	print(output)

	# --- perf record ---
	for label, nix_bin in [("base", nix_base), ("new", nix_new)]:
	print(f"=== perf record: {label} ===")
	data_path = perf_record(nix_bin, store_path, label, from_cache=args.from_cache)
	print(f" {data_path}")
	print(
	f" View with: nix run nixpkgs#linuxPackages.perf -- report -i {data_path}"
	)
	print()

	# --- Significance test (Welch's t-test, no scipy needed) ---
	if len(results.get("base", [])) >= 3 and len(results.get("new", [])) >= 3:
	base_t = results["base"]
	new_t = results["new"]
	p = welch_t_test(base_t, new_t)
	print(f"=== Welch's t-test: p={p:.6f} ===")
	if p < 0.05:
	print(" Result IS statistically significant (p < 0.05)")
	else:
	print(" Result is NOT statistically significant (p >= 0.05)")
	print()

	# --- Save raw data ---
	raw_path = SCRATCH / "results.json"
	raw_path.write_text(json.dumps(results, indent=2))
	print(f"Raw data: {raw_path}")

	print(f"Done. Results in {SCRATCH}/")


	if __name__ == "__main__":
	main()