Skip to content

Instantly share code, notes, and snippets.

@zas
Created May 28, 2026 12:34
Show Gist options
  • Select an option

  • Save zas/2ebf350315237f769db25b7119fcaa51 to your computer and use it in GitHub Desktop.

Select an option

Save zas/2ebf350315237f769db25b7119fcaa51 to your computer and use it in GitHub Desktop.
Picard file loading benchmark script (PICARD-3281)
#!/bin/bash
# Benchmark file loading performance across Picard commits.
# Usage: ./scripts/bench_load.sh /path/to/music/directory ref1 [ref2 ...]
#
# Examples:
# ./scripts/bench_load.sh ~/Music f14c46c upstream/master HEAD
# ./scripts/bench_load.sh ~/Music f14c46c a25fffa~5 a25fffa
#
# Requirements: run from the picard source directory with a working venv.
set -e
LC_NUMERIC=C
export LC_NUMERIC
MUSIC_DIR="${1:?Usage: $0 /path/to/music/directory ref1 [ref2 ...]}"
MUSIC_DIR=$(realpath "$MUSIC_DIR")
shift
if [ $# -lt 1 ]; then
echo "Error: provide at least one commit ref"
exit 1
fi
if [ ! -d "$MUSIC_DIR" ]; then
echo "Error: $MUSIC_DIR is not a directory"
exit 1
fi
FILE_COUNT=$(find "$MUSIC_DIR" -type f \( -name "*.flac" -o -name "*.mp3" -o -name "*.ogg" -o -name "*.wav" -o -name "*.m4a" -o -name "*.wma" \) | wc -l)
echo "=== Picard Load Benchmark ==="
echo "Directory: $MUSIC_DIR"
echo "Audio files: $FILE_COUNT"
echo ""
CURRENT_BRANCH=$(git rev-parse --abbrev-ref HEAD)
LOGDIR=$(mktemp -d)
run_test() {
local ref="$1"
local short=$(git rev-parse --short "$ref" 2>/dev/null || echo "$ref")
local label="$short"
local log="$LOGDIR/${label}.log"
local marker="MainPanel sort=True"
printf " %-40s " "$ref ($short)"
# Checkout the version to test
git checkout "$ref" -- picard/tagger.py picard/cluster.py 2>/dev/null
git checkout "$ref" -- picard/debug_opts.py 2>/dev/null || true
# Start Picard
python -u tagger.py --no-restore --debug --debug-opts=timings -e "LOAD $MUSIC_DIR" > "$log" 2>&1 &
local pid=$!
# Wait for loading to complete
local elapsed=0
while ! grep -q "$marker" "$log" 2>/dev/null; do
sleep 0.5
elapsed=$((elapsed + 1))
if [ $elapsed -gt 240 ]; then
printf "TIMEOUT\n"
kill $pid 2>/dev/null; wait $pid 2>/dev/null
git checkout "$CURRENT_BRANCH" -- picard/tagger.py picard/cluster.py picard/debug_opts.py 2>/dev/null || true
return
fi
done
sleep 0.5
# Quit via remote command
python tagger.py -e "QUIT" 2>/dev/null || true
wait $pid 2>/dev/null
# Extract timings
local start=$(grep "Adding files" "$log" | head -1 | grep -oP '\d+:\d+:\d+,\d+')
local end=$(grep "$marker" "$log" | head -1 | grep -oP '\d+:\d+:\d+,\d+')
if [ -n "$start" ] && [ -n "$end" ]; then
local s_sec=$(echo "$start" | awk -F'[,:]' '{print $1*3600 + $2*60 + $3 + $4/1000}')
local e_sec=$(echo "$end" | awk -F'[,:]' '{print $1*3600 + $2*60 + $3 + $4/1000}')
local duration=$(echo "$e_sec - $s_sec" | bc)
printf "%6.1fs\n" "$duration"
else
printf "FAILED\n"
fi
# Restore
git checkout "$CURRENT_BRANCH" -- picard/tagger.py picard/cluster.py picard/debug_opts.py 2>/dev/null || true
}
echo "Running benchmarks..."
echo ""
for ref in "$@"; do
run_test "$ref"
done
echo ""
echo "Logs: $LOGDIR"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment