Created
May 31, 2026 14:39
-
-
Save leogdion/8baa0baa33f7523c9383b7c92a2e390c to your computer and use it in GitHub Desktop.
Compare all MKV files in a directory to find the real movie among near-identical Blu-ray rip playlists
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # | |
| # compare-mkv.sh — Compare all MKV files in a directory to find the real movie | |
| # among near-identical Blu-ray rip playlists. | |
| # | |
| # Usage: ./compare-mkv.sh (operates on the current directory) | |
| # | |
| # Requires: ffprobe, ffmpeg (from FFmpeg). On macOS: brew install ffmpeg | |
| set -euo pipefail | |
| if ! command -v ffprobe >/dev/null 2>&1; then | |
| echo "Error: ffprobe not found. Install FFmpeg (brew install ffmpeg)." >&2 | |
| exit 1 | |
| fi | |
| shopt -s nullglob nocaseglob | |
| mkvs=(./*.mkv) | |
| shopt -u nullglob nocaseglob | |
| if [ ${#mkvs[@]} -eq 0 ]; then | |
| echo "No MKV files found in the current directory." >&2 | |
| exit 1 | |
| fi | |
| printf '%-30s %12s %12s %9s %s\n' "FILE" "DURATION" "SIZE(GiB)" "CHAPTERS" "VIDEO MD5" | |
| printf '%s\n' "--------------------------------------------------------------------------------------------" | |
| declare -A md5_seen | |
| for f in "${mkvs[@]}"; do | |
| name=$(basename "$f") | |
| # Duration in seconds, formatted as H:MM:SS | |
| dur_raw=$(ffprobe -v error -show_entries format=duration \ | |
| -of default=nw=1:nk=1 "$f" 2>/dev/null || echo 0) | |
| dur_int=${dur_raw%.*} | |
| dur_fmt=$(printf '%d:%02d:%02d' $((dur_int/3600)) $(((dur_int%3600)/60)) $((dur_int%60))) | |
| # Size in GiB | |
| size_bytes=$(ffprobe -v error -show_entries format=size \ | |
| -of default=nw=1:nk=1 "$f" 2>/dev/null || echo 0) | |
| size_gib=$(awk "BEGIN{printf \"%.2f\", $size_bytes/1073741824}") | |
| # Chapter count | |
| chapters=$(ffprobe -v error -show_chapters "$f" 2>/dev/null | grep -c '\[CHAPTER\]' || true) | |
| # MD5 of the video stream only (ignores audio/subtitle ordering differences) | |
| vmd5=$(ffmpeg -v error -i "$f" -map 0:v -f md5 - 2>/dev/null | sed 's/^MD5=//') | |
| dupe="" | |
| if [ -n "${md5_seen[$vmd5]:-}" ]; then | |
| dupe=" <-- DUPLICATE of ${md5_seen[$vmd5]}" | |
| else | |
| md5_seen[$vmd5]="$name" | |
| fi | |
| printf '%-30s %12s %12s %9s %s%s\n' \ | |
| "$name" "$dur_fmt" "$size_gib" "$chapters" "${vmd5:0:12}..." "$dupe" | |
| done | |
| echo | |
| echo "Tip: the real feature is usually the longest, largest file with the" | |
| echo "expected chapter count. Files sharing a video MD5 are identical features." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment