Last active
May 16, 2025 13:36
-
-
Save natyusha/34c6b9418e3348f37f3bc0650d9282e2 to your computer and use it in GitHub Desktop.
This script takes a raw video file and muxes it together with another video file. It will retain chapters/subtitles/attachments from the old file and sync them if there is a delay.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os, re, sys, json, time, argparse, subprocess | |
r""" | |
Description: | |
- This script takes a raw video file and muxes it together with another video file | |
- It will retain chapters/subtitles/attachments from the old file and sync them if there is a delay | |
Author: | |
- natyusha | |
Requirements: | |
- programs : python 3.7+, aegisub, aegisub-cli, mkvtoolnix, ffmpeg | |
- fonts : Lato-Bol.ttf, Lato-BolIta.ttf | |
- bash : grep, mv | |
- pip : audio-offset-finder | |
Usage: | |
- to auto merge create a "map.txt" file with the full path to all files in a single line separated by a pipe: "vid_new"|"vid_old"|title | |
- make sure there are no unicode characters present in the parent folder names of the video files | |
- entering "FORMAT" (without quotes) as "vid_new" will use the same video for the output as "vid_old" while applying the styling | |
- this can be used for raws and external subtitles by naming the subtitles the same as the raw with ".en" appended" | |
- place the mapping file in the terminal's working directory and run the script | |
- the title is optional and will default to the title of the original video | |
- use the copy as path context menu command (on windows) for both sets of files then organise them accordingly | |
- the script will output to a "mux" subfolder of the working directory and use the filenames from the second half of the mapping | |
- subtitles will be copied over from the same location as the "vid_old" if they are named "original_filename.en.ass" | |
- subtitles will be extracted from the original file if loose ones aren't present and will follow the same naming scheme as above | |
- copied subtitles will be resampled to the upgraded file's resolution and the correct script properties will be set | |
- Note: non ".ass" / ".srt" subtitles aren't handled by this script as they require manual editing and aren't that common | |
Arguments: | |
- run "pairedmuxing.py -h" for info on how to format the mapping file and for more details on the arguments | |
- if you want to maintain the original subtitle styling enter "original" as a positional argument | |
- if you want to change the language tag assigned to the audio simply enter a language tag as an argument e.g. "eng" | |
- must be the second argument if "original" is being used | |
""" | |
sys.stdout.reconfigure(encoding='utf-8') # allow unicode characters in print | |
err = '\033[31m⨯\033[0m' # use the red terminal colour for ⨯ | |
def print_f(text): print(text, flush=True) # unbuffered print command to allow the user to see progress immediately | |
# function to check for a valid language tag argument | |
def language_tag(tag): | |
if not re.match(r'^[a-z]{2,3}(-[A-Z]{2})?$', tag): | |
raise argparse.ArgumentTypeError(f'{err}Language must be a valid IETF tag') | |
return tag | |
# check the arguments | |
parser = argparse.ArgumentParser(description='Mux + sync subtitles, attachments and chapters from one video container to another.\nThis is achieved using a user populated "map.txt" file located in the script\'s working directory.\n\nmap.txt details:\n format {path to raw}|{path to original}|{mkv title override (optional)}\n *enter "FORMAT" (w/o quotes) as {path to raw} to format the original only\n *actual paths must be absolute and enclosed in double quotes', epilog='NOTE: This script may require editing of the "# path variables" section to function correctly.', formatter_class=argparse.RawTextHelpFormatter) | |
parser.add_argument('-l', '--lang' , type=language_tag , default='jpn', help='an optional IETF language tag for the audio (defaults to "jpn")') | |
parser.add_argument('-a', '--attach' , action='store_true', default=False, help='skips adding attachments (generally fonts) from the old file') | |
parser.add_argument('-t', '--tags' , type=str , default='' , help='optional tags to append to the filename (comma separated)') | |
parser.add_argument('-o', '--original', action='store_true', default=False, help='if you want to maintain the original subtitle styling') | |
parser.add_argument('-s', '--slow' , action='store_true', default=False, help='disables the 4min trim for audio offset calculation') | |
parser.add_argument('-v', '--verbose' , action='store_true', default=False, help='show console output for commands from mkvtoolnix') | |
args = parser.parse_args() | |
# path variables | |
mkvtoolnix = 'C:\\Program Files\\MKVToolNix\\' | |
aegisub_cli = 'C:\\Program Files\\Aegisub\\aegisub-cli.exe' | |
attach_path = f'{os.environ['USERPROFILE']}\\Documents\\refs\\' | |
output_path = '.\\mux\\' | |
subtitle = '' | |
# unwanted fonts | |
excluded_fonts = ('Lato-Bol.ttf', 'Lato-Bold.ttf', 'Lato-BolIta.ttf', 'Lato-BoldItalic.ttf', 'Figtree-ExtraBold.ttf', 'GandhiSans-Bold.otf', 'GandhiSans-BoldItalic.otf', 'GandhiSans-Regular.otf') | |
# command modifications | |
mod_tracks = f' --edit track:v1 --set flag-default=1 --set flag-forced=0 --set language="{args.lang}" --edit track:a1 --set flag-default=1 --set flag-forced=0 --set language="{args.lang}"' | |
add_fonts = f' --add-attachment {attach_path}Lato-Bol.ttf --add-attachment {attach_path}Lato-BolIta.ttf' if not args.original else '' | |
del_fonts = ''.join(f' --delete-attachment name:{font}' for font in excluded_fonts) if not args.original else '' | |
add_tags = ''.join(f' [{tag.lstrip().rstrip()}]' for tag in args.tags.split(',')) if args.tags else '' | |
del_images = f' --delete-attachment mime-type:image/jpeg --delete-attachment mime-type:image/png' | |
del_attach = f'' if not args.attach else ' -M' | |
enable_trim = f' --trim 240' if not args.slow else '' | |
quiet = f' -q' if not args.verbose else '' | |
# subtitle regex | |
vid_formats = '(mkv|avi|mp4|mov|ogm|wmv|mpg|mpeg|mk3d|m4v)' | |
sub_formats = ('en.ass', 'en.srt') | |
res_x_map = r'^(?=PlayResX:)' | |
style_map = r'(^\[V4\+ Styles\]$\n^Format:.*$\n)' | |
style_chk = r'^Style: (?:Default|Alternate|Thoughts|Top)(?:_dvd)?,(?:Lato|Figtree ExtraBold|Gandhi Sans),.*$\n' | |
style_480p = 'Style: Default,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,84.375,100,0,0,1,1.5,1,2,17,17,27,1\nStyle: Alternate,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,84.375,100,0,0,1,1.5,1,2,17,17,27,1\n' | |
style_720p = 'Style: Default,Lato,54,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,2.25,1.5,2,30,30,40,1\nStyle: Alternate,Lato,54,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,2.25,1.5,2,30,30,40,1\n' | |
style_1080p = 'Style: Default,Lato,81,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,3.375,2.25,2,45,45,60,1\nStyle: Alternate,Lato,81,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,3.375,2.25,2,45,45,60,1\n' | |
style_1280u = 'Style: Default,Lato,96,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,84.375,100,0,0,1,4,2.7,2,45,45,71,1\nStyle: Alternate,Lato,96,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,84.375,100,0,0,1,4,2.7,2,45,45,71,1\n' | |
line_chk = r'^(Dialogue: \d+,\d+:\d{2}:\d{2}\.\d{2},\d+:\d{2}:\d{2}\.\d{2},)' | |
line_def = r'(?:Default_dvd),' | |
line_ita = r'(?:Thoughts_dvd),' | |
line_top = r'(?:Top_dvd),,0,0,0,,' | |
sub_unicode = '[․½→←⧵⁄꞉*?<>|“”]' | |
# undo special characters from windows filenames, convert backticks and remove trailing white-space | |
def undo_reserved(title): | |
reserved = { '⧵': r'\\', '⁄': r'\/', '꞉': ':', '*': r'\*', '?': r'\?', '<': '<', '>': '>', '|': r'\|', '`': "'", '[“”]': r'\"', r'[ \t]+$': '' } | |
for key, value in reserved.items(): title = re.sub(key, value, title) | |
return title | |
# remove special characters | |
def del_uni(file): return re.sub(sub_unicode, '', file) | |
# output elapsed since previous time.time() | |
def elapsed(start): return f'{round(time.time() - start, 2)}s' | |
# change extension of file mapping | |
def mod_ext(find, replace, file): return re.sub(fr'\.{find}"$', f'.{replace}"', file, flags=re.I) | |
# parse captured stdout / stderr | |
def parse_std(capture): | |
out = None | |
if capture.stdout: out = f'│├─{capture.stdout.decode("utf-8").strip('\r\n').replace('\r\n', '\r\n│├─')}' | |
if capture.stderr: out += f'│├{err}{capture.stderr.decode("utf-8").strip('\r\n').replace('\r\n', f'\r\n│├{err}')}' | |
if out : out = re.sub(r'^│├─Warning: No attachment matched the spec.*$\n?', '', out, flags=re.M) # remove messages warning about removing attachments | |
if out : return print_f(out) | |
# search for subtitles which contain (or don't) the specified text [flags: L = exclusion / l = inclusion] | |
def grep(txt_search, flags): return subprocess.run(f'grep -r{flags} --include="*.ass" "{txt_search}" {output_path}', capture_output=True).stdout.decode('utf-8').replace('/', '').split('\n') | |
# replace text in the subtitles using regex | |
def replace(file, find, replace, log=True): | |
with open(file,'r+', encoding='utf8') as f: | |
if log: print_f(f'│├─{file}' ) | |
result = re.sub(find, replace, f.read(), flags=re.M) | |
f.seek(0) | |
f.write(result) | |
f.truncate() | |
print_f('\n ###############################################') | |
print_f( ' ## Paired Muxing Script for Quality Upgrades ##') | |
print_f( ' ###############################################') | |
try: | |
if os.stat('map.txt').st_size == 0: raise | |
mappings = [m.split('|') for m in open('map.txt', encoding='utf8').read().split('\n')] | |
except: | |
print_f(f'\n{err}Aborting: "map.txt" not found or malformed') | |
exit(1) | |
print_f('\n ####### Video Multiplexing & Formatting #######') | |
for mapping in mappings: | |
# strip special characters from file names for aegisub_cli to function | |
for i in range(2): | |
if mapping[i] != del_uni(mapping[i]): | |
if os.path.isfile(mapping[i].strip('"')):subprocess.run(f'mv {mapping[i]} {del_uni(mapping[i])}') | |
for s in sub_formats: | |
if os.path.isfile(mod_ext(vid_formats, s, mapping[i]).strip('"')): subprocess.run(f'mv {mod_ext(vid_formats, s, mapping[i])} {del_uni(mod_ext(vid_formats, s, mapping[i]))}') | |
mapping[i] = del_uni(mapping[i]) | |
vid_new, vid_old, chapters, subtitle, sub_ext, offset = mapping[0], mapping[1], None, '', '', 0 | |
mkv_pre = f'"{output_path}{os.path.basename(mod_ext(vid_formats, 'mkv', vid_old))}' | |
mkv_out = f'{mkv_pre[:-5]}{add_tags}{mkv_pre[-5:]}' | |
sub_out = f'{mod_ext(vid_formats, 'en.ass', mkv_out)}' | |
print_f(f'\n╭{os.path.basename(vid_old).strip('"')}') | |
# check if there is a new file or the old one is being formatted | |
if vid_new.lower() == 'format': | |
print_f(f'├┬Marking the Old Video for Formatting...') | |
vid_new = vid_old | |
print_f(f'│╰─Completed') | |
else: | |
# convert wmv to mkv for mkvmerge compatibility | |
if vid_new.lower().endswith('.wmv"'): | |
print_f(f'├┬Converting .wmv to .mkv...') | |
start, convert, vid_new = time.time(), subprocess.run(f'ffmpeg -v quiet -y -i {vid_new} -c:v copy -c:a copy {mod_ext('wmv', 'mkv', vid_new)}'), mod_ext('wmv', 'mkv', vid_new) | |
print_f(f'│╰─Completed in {elapsed(start)}') | |
# determine the audio offset in seconds by using the first 4 minutes of audio (increase trim for slightly higher accuracy and much slower parsing) | |
start = time.time() | |
print_f(f'├┬Determining Audio Offset...') | |
try: | |
offset = json.loads(subprocess.run(f'audio-offset-finder --find-offset-of {vid_old} --within {vid_new}{enable_trim} --json', capture_output=True).stdout) | |
offset = offset['time_offset'] if offset else 0 | |
except: | |
print_f(f'│{err}─Failed! Unable to Parse Video') | |
exit(1) | |
print_f(f'│╰─Completed in {elapsed(start)} [{round(offset,3)}s]') | |
# grab the file information of the original file in json format and store it in a variable | |
print_f(f'├┬Parsing Track Info & MultiPlexing...') | |
identify, sub_track = json.loads(subprocess.run(f'{mkvtoolnix}mkvmerge.exe -J {vid_old}', capture_output=True, universal_newlines=True, encoding='utf-8').stdout), None | |
# if available add the title from the mapping otherwise use the one from the original file to ensure titles from the new file aren't used | |
title = mapping[2] if len(mapping) > 2 else identify['container']['properties'].get('title') | |
mod_title = f' --edit info --set title="{undo_reserved(title)}"' if title else '' | |
# parse the identification variable to determine the track id of any .ass or .srt subtitles (if present) | |
try: | |
for track in identify['tracks']: | |
if track['codec'] == 'SubStationAlpha' : sub_ext, sub_track = 'ass', str(track['id']) | |
elif track['codec'] == 'SubRip/SRT' : sub_ext, sub_track = 'srt', str(track['id']) | |
continue | |
if sub_ext: subtitle = f'{mod_ext(vid_formats, f'en.{sub_ext}', vid_old)}' | |
except: | |
print_f(f'│{err}─Failed! Unable to Parse Video') | |
# parse the identification variable to determine if there are single chapter files present | |
try: chapters = identify['chapters'][0]['num_entries'] | |
except: chapters = None | |
sync_chapter = f'--chapter-sync {int(offset * 1000)}' if chapters and chapters > 1 else '--no-chapters' # only add chapters if there is more than one | |
new_chapters = f'' if sync_chapter == '--no-chapters' else ' --no-chapters' # use chapters from the new file if the old one doesn't have more than one | |
# mux the attachments and chapters (with offset) from the original file into the new file | |
start, merge = time.time(), subprocess.run(f'{mkvtoolnix}mkvmerge.exe{quiet} -o {mkv_out} -S{new_chapters} {vid_new} -A -D -S{del_attach} {sync_chapter} {vid_old}', capture_output=True) | |
parse_std(merge) | |
print_f(f'│╰─Completed in {elapsed(start)}') | |
# remove any tags, set the video+audio tracks to japanese (or the language from the mapping), set the title, and remove single entry chapters for the muxed file | |
print_f(f'├┬Editing Properties...') | |
start, propedit = time.time(), subprocess.run(f'{mkvtoolnix}mkvpropedit.exe{quiet} {mkv_out} -t all:{mod_tracks}{del_fonts}{add_fonts}{mod_title}{del_images}', capture_output=True) | |
parse_std(propedit) | |
print_f(f'│╰─Completed in {elapsed(start)}') | |
# extract the subtitle track from the original file and name it the same as the output file with ".en" appended | |
start = time.time() | |
if sub_track: | |
print_f(f'├┬Extracting Subtitles...') | |
extract = subprocess.run(f'{mkvtoolnix}mkvextract.exe{quiet} {vid_old} tracks "{sub_track}:{subtitle.lstrip('"')}', capture_output=True) | |
parse_std(extract) | |
print_f(f'│├─{os.path.basename(subtitle).strip('"')}') | |
print_f(f'│╰─Completed in {elapsed(start)}') | |
else: # check for external subtitles if there are no sub tracks | |
for s in sub_formats: | |
if os.path.isfile(mod_ext(vid_formats, s, vid_old).strip('"')): | |
print_f(f'├┬External Subtitles Found...') | |
subtitle = mod_ext(vid_formats, s, vid_old) | |
print_f(f'│├─{os.path.basename(subtitle).strip('"')}') | |
print_f(f'│╰─Completed in {elapsed(start)}') | |
continue | |
# convert any .srt subtitle to .ass | |
if subtitle.endswith('en.srt"'): | |
print_f(f'├┬Converting SubRip/SRT to SubStationAlpha...') | |
start, convert, subtitle = time.time(), subprocess.run(f'ffmpeg -v quiet -y -i {subtitle} {subtitle.replace('.en.srt', '.en.ass')}'), subtitle.replace('.en.srt', '.en.ass') | |
print_f(f'│╰─Completed in {elapsed(start)}') | |
# resample the subtitle file resolution to match the output file | |
if os.path.isfile(subtitle.strip('"')): | |
print_f(f'├┬Resampling & Offsetting Subtitles...') | |
start, resample = time.time(), subprocess.run(f'{aegisub_cli} --loglevel 2 --video {mkv_out} "{os.path.basename(subtitle)} {sub_out} tool/resampleres') # this will fail if the file paths contain any unicode | |
# apply the audio offset to the resampled file | |
subprocess.run(f'ffmpeg -v quiet -y -itsoffset {offset} -i {sub_out} temp.ass') | |
subprocess.run(f'mv temp.ass {sub_out}') | |
print_f(f'│╰─Completed in {elapsed(start)}') | |
print_f(f'╰─Paired Muxing Complete!') | |
print_f('\n ##### Subtitle Script Properties & Styles #####') | |
start = time.time() | |
print_f('\n╭SubStation Alpha Operations') | |
if not args.original: | |
# insert subtitle properties required for the unified style to remain consistent | |
print_f(f'├┬Inserting ScaledBorderAndShadow / WrapStyle...') | |
for file in grep('ScaledBorderAndShadow:', 'L'): | |
if file: replace(file, res_x_map, 'ScaledBorderAndShadow: yes\n') | |
for file in grep('ScaledBorderAndShadow: no', 'l'): | |
if file: replace(file, r'^ScaledBorderAndShadow: no', 'ScaledBorderAndShadow: yes') | |
for file in grep('WrapStyle:', 'L'): | |
if file: replace(file, res_x_map, 'WrapStyle: 0\n') | |
for file in grep(r'WrapStyle: [1-9]', 'l'): | |
if file: replace(file, r'^WrapStyle: [1-9]', 'WrapStyle: 0') | |
print_f(f'│╰─Done') | |
# insert SakuraCircle styled fonts after checking for dupes | |
print_f(f'├┬Inserting Unified Font Styles...') | |
for file in grep('', 'l'): | |
if file: | |
replace(file, style_chk, '', False) | |
replace(file, fr'{line_chk}{line_def}', r'\1Default,', False) | |
replace(file, fr'{line_chk}{line_ita}', r'\1Alternate,', False) | |
replace(file, fr'{line_chk}{line_top}', r'\1Default,,0,0,0,,{\\an8}', False) | |
for file in grep('PlayResY: 480', 'l'): | |
if file: replace(file, style_map, fr'\1{style_480p}') | |
for file in grep('PlayResY: 720', 'l'): | |
if file: replace(file, style_map, fr'\1{style_720p}') | |
for file in grep('PlayResY: 1080', 'l'): | |
if file: replace(file, style_map, fr'\1{style_1080p}') | |
for file in grep('PlayResY: 1280', 'l'): | |
if file: replace(file, style_map, fr'\1{style_1280u}') | |
print_f(f'│╰─Done') | |
# regex for fixing common subtitle script errors | |
print_f(f'├┬Cleaning the Script...') | |
for file in grep('[Script Info]', 'l'): | |
if file: | |
replace(file, r'--' , '—' , False) # Convert double hyphen to single long hyphen | |
replace(file, r'’' , "'" , False) # Convert curly to straight single quotes | |
replace(file, r'“|”', '"' , False) # Convert doubly curly quotes to single double quotes | |
replace(file, r"''" , '"' , False) # Convert double single quotes to single double quote | |
replace(file, r'…' , '...', False) # Convert ellipses to periods | |
replace(file, r' ' , ' ' , False) # Convert double spaces to single spaces | |
# Fix Aegisub Font Size Conversions | |
replace(file, r'(?!^Style: .+?,.*,)100\.039(?=,100,)', '100', False) | |
# Replace lowercase l when it should be an uppercase I (caused by old OCR techniques) | |
replace(file, r"""(?<!\d )(?:(?<=[}\.," -])|(?<=\\[nN]))l(?=[\., ]|[fnst] |'[md]|'ll|'ve|t's|t'll|sn't|-l|nside|dea|ntro)""" , 'I') | |
print_f(f'│╰─Done') | |
print_f(f'╰─Completed in {elapsed(start)}') | |
print_f('\n ############# All Tasks Complete! #############') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment