-
-
Save FurloSK/7f52303a10ab7478e3cddfe4bcc50881 to your computer and use it in GitHub Desktop.
#!/bin/sh | |
# Extract subtitles from each MKV/MP4 file in the given directory | |
# [updated 2024-01-09 by FurloSK] | |
# Permanent gist address: https://gist.github.com/FurloSK/7f52303a10ab7478e3cddfe4bcc50881 | |
# | |
# ===== Usage ===== | |
# extractSubtitles.sh [-i] [<fileOrDirectory>] | |
# -i | |
# Supplying this option will skip extraction and only print information about subtitles in file | |
# <fileOrDirectory> | |
# If a directory is given, will process all MKV/MP4 files in this directory (and subdirectories) | |
# If a file is given, will process this single file | |
# If the parameter is skipped altogether, will process current directory (and subdirectories) | |
# | |
# ===== History ===== | |
# Original version by ComputerNerdFromHell (site no longer working): | |
# http://www.computernerdfromhell.com/blog/automatically-extract-subtitles-from-mkv | |
# Archived here: https://web.archive.org/web/20181119144734/http://www.computernerdfromhell.com/blog/automatically-extract-subtitles-from-mkv/ | |
# Resubmitted by nux: | |
# https://askubuntu.com/questions/452268/extract-subtitle-from-mkv-files/452279#452279 | |
# Completely rewritten and tweaked by FurloSK: | |
# https://superuser.com/questions/1527829/extracting-subtitles-from-mkv-file/1649627#1649627 | |
# Permanent gist address: https://gist.github.com/FurloSK/7f52303a10ab7478e3cddfe4bcc50881 | |
# | |
# ============================================================================= | |
# Config part: this is the only thing you need to tweak | |
# MKVToolNix path - Leave empty if you have the tools added to $PATH. | |
# This is needed e.g. on macOS, if you just downloaded MKVToolNix app and dragged it to Applications folder | |
toolPath='/Applications/+ Moje/MKVToolNix.app/Contents/MacOS/' | |
# ============================================================================= | |
# Start of script | |
# by default, process all files in local dir | |
DIR="." | |
skipExtraction=false | |
# first parameter might be -i switch, which will only print subtitle tracks instead of extracting them | |
if [[ "$1" == "-i" ]] ; then | |
skipExtraction=true | |
# if also directory or file is given, print info about it instead of default local dir | |
if [[ "$#" -eq 2 && "$1" == "-i" ]] ; then | |
DIR="$2" | |
fi | |
# otherwise if directory or file is given, extract subtitles from that one | |
elif [[ "$#" -eq 1 ]] ; then | |
DIR="$1" | |
fi | |
# Get all the MKV/MP4 files in this dir and its subdirs | |
find "$DIR" -type f \( -iname '*.mkv' -o -iname '*.mp4' -o -iname '*.avi' -o -iname '*.ts' \) | while read filename | |
do | |
echo "\nProcessing file $filename:" | |
# Get base file name (without extension) | |
fileBasename=${filename%.*} | |
# Parse info about all subtitles tracks from file | |
# This will output lines in this format, one line per subtitle track, fields delimited by tabulator: | |
# trackID <tab> trackLanguage <tab> trackCodecID <tab> trackCodec | |
"${toolPath}mkvmerge" -J "$filename" | python -c "exec(\"import sys, json;\njs = json.load(sys.stdin);\nif not 'tracks' in js:\n\tprint('unsupported');\n\tsys.exit();\nfor track in js['tracks']:\n\tif track['type'] == 'subtitles':\n\t\tprint(str(track['id']) + '\t' + track['properties']['language'] + '\t' + (track['properties']['codec_id'] if 'codec_id' in track['properties'] else 'undefined') + '\t' + track['codec'])\")" | while IFS=$'\t' read -r trackNumber trackLanguage trackCodecID trackCodec; | |
#"${toolPath}mkvmerge" -J "$filename" | python -c "exec(\"import sys, json;\nfor track in json.load(sys.stdin)['tracks']:\n\tif track['type'] == 'subtitles':\n\t\tprint(str(track['id']) + '\t' + track['properties']['language'] + '\t' + (track['properties']['codec_id'] if 'codec_id' in track['properties'] else track['codec']) + '\t' + track['codec'])\")" | while IFS=$'\t' read -r trackNumber trackLanguage trackCodecID trackCodec; | |
do | |
# if JSON tracks extraction failed, continue to next file | |
if [ $trackNumber = 'unsupported' ] ; then | |
echo " Unsupported file, skipping..." | |
continue; | |
fi | |
echo " Found subtitle track #${trackNumber}: $trackLanguage ($trackCodec, $trackCodecID)" | |
# address missing ['properties']['codec_id'] in JSON | |
if [ $trackCodecID = 'undefined' ] ; then | |
# fix DVBSUB codec automatically | |
if [ $trackCodec = 'DVBSUB' ] ; then | |
trackCodecID='S_DVBSUB' | |
echo " Warning: missing codec_id for $trackCodec track => corrected to $trackCodecID." | |
else | |
echo " Error: missing codec_id for $trackCodec track!" | |
fi | |
fi | |
# if we are only printing tracks, not extracting them, print track and continue | |
if [ $skipExtraction = true ] ; then | |
continue; | |
fi | |
# optional: process only some types of subtitle tracks (according to $trackCodecID) | |
# See codec types here (under header Subtitle Codec Mappings): | |
# https://datatracker.ietf.org/doc/html/draft-ietf-cellar-codec/#name-subtitle-codec-mappings | |
# E.g. to skip DVD subtitles, add S_VOBSUB | |
if [[ $trackCodecID == 'unwantedCodecID_#1' || $trackCodecID == 'unwantedCodecID_#2' ]] ; then | |
echo " Unwanted codec ID $trackCodecID, skipping track..." | |
continue; | |
fi | |
# determine proper extension | |
if [ $trackCodecID = 'S_TEXT/SSA' ] ; then | |
extension='ssa' | |
elif [ $trackCodecID = 'S_TEXT/ASS' ] ; then | |
extension='ass' | |
elif [ $trackCodecID = 'S_TEXT/USF' ] ; then | |
extension='usf' | |
elif [ $trackCodecID = 'S_TEXT/WEBVTT' ] ; then | |
extension='vtt' | |
elif [ $trackCodecID = 'S_DVBSUB' ] ; then | |
extension='dvb' | |
else # fallback to standard .srt file (S_VOBSUB files will still get their proper extension) | |
extension='srt' | |
fi | |
# prepare output filename | |
# (adding . [dot] between filename and language, so VLC will properly recognize the language) | |
outFilename="${fileBasename} [#${trackNumber}].${trackLanguage}.${extension}" | |
# extract track with language and track id | |
echo " Extracting track to file ${outFilename}" | |
echo " Executing command \"${toolPath}mkvextract\" tracks \"${filename}\" ${trackNumber}:\"${outFilename}\"" | |
result=`"${toolPath}mkvextract" tracks "${filename}" ${trackNumber}:"${outFilename}"` | |
echo " > $result" | |
#`"${toolPath}mkvextract" tracks "${filename}" ${trackNumber}:"${outFilename}" > /dev/null 2>&1` | |
#========================================================================== | |
# Lines below are from the original source by ComputerNerdFromHell. | |
# They are now all obsolete and redundant (kept just for reference) | |
# Extract the track to a .tmp file | |
#`"${toolPath}mkvextract" tracks "$filename" $trackNumber:"$subtitlename.srt.tmp" > /dev/null 2>&1` | |
#`chmod g+rw "$subtitlename.srt.tmp"` | |
# # Do a super-primitive language guess: ENGLISH | |
# langtest=`egrep -ic ' you | to | the ' "$subtitlename".srt.tmp` | |
# trimregex="" | |
# | |
# # Check if subtitle passes our language filter (10 or more matches) | |
# if [ $langtest -ge 10 ]; then | |
# # Regex to remove credits at the end of subtitles (read my reason why!) | |
# `sed 's/\r//g' < "$subtitlename.srt.tmp" \ | |
# | sed 's/%/%%/g' \ | |
# | awk '{if (a){printf("\t")};printf $0; a=1; } /^$/{print ""; a=0;}' \ | |
# | grep -iv "$trimregex" \ | |
# | sed 's/\t/\r\n/g' > "$subtitlename.srt"` | |
# `rm "$subtitlename.srt.tmp"` | |
# `chmod g+rw "$subtitlename.srt"` | |
# else | |
# # Not our desired language: add a number to the filename and keep anyway, just in case | |
# `mv "$subtitlename.srt.tmp" "$subtitlename.$tracknumber.srt" > /dev/null 2>&1` | |
# fi | |
echo "" | |
done | |
done |
Nice scripting Thnx!
For MacOs users there are 3 things to check to make this script work for you:
1 - Make sure the path to MKVToolnix is correct. In my case this worked(line30):
toolPath='/Applications/MKVToolNix-79.0.app/Contents/MacOS/'
2 - Make sure python can be found. In my case I had to add the version number after python (line62):
"${toolPath}mkvmerge" -J "$filename" | python3 -c
3 - When downloading the script there was (in my case) a line-break that produced a python 'syntax error' (line 62).
Make sure that the line-break is after ' trackCodec; '
Then everything worked just fine !)
For use with Ubuntu, change first line to #!/bin/bash
, comment the toolPath
line, and add the -e
option to the first echo
command.
This needs support for PGS added
On MacOS
toolPath="/Applications/$(ls /Applications | grep MKVToolNix)/Contents/MacOS/"
Nice, thanks.
echo "\nProcessing file $filename:"
should beecho -e "\nProcessing file $filename:"
.