Skip to content

Instantly share code, notes, and snippets.

@natyusha
Last active May 16, 2025 13:36
Show Gist options
  • Save natyusha/34c6b9418e3348f37f3bc0650d9282e2 to your computer and use it in GitHub Desktop.
Save natyusha/34c6b9418e3348f37f3bc0650d9282e2 to your computer and use it in GitHub Desktop.
This script takes a raw video file and muxes it together with another video file. It will retain chapters/subtitles/attachments from the old file and sync them if there is a delay.
#!/usr/bin/env python3
import os, re, sys, json, time, argparse, subprocess
r"""
Description:
- This script takes a raw video file and muxes it together with another video file
- It will retain chapters/subtitles/attachments from the old file and sync them if there is a delay
Author:
- natyusha
Requirements:
- programs : python 3.7+, aegisub, aegisub-cli, mkvtoolnix, ffmpeg
- fonts : Lato-Bol.ttf, Lato-BolIta.ttf
- bash : grep, mv
- pip : audio-offset-finder
Usage:
- to auto merge create a "map.txt" file with the full path to all files in a single line separated by a pipe: "vid_new"|"vid_old"|title
- make sure there are no unicode characters present in the parent folder names of the video files
- entering "FORMAT" (without quotes) as "vid_new" will use the same video for the output as "vid_old" while applying the styling
- this can be used for raws and external subtitles by naming the subtitles the same as the raw with ".en" appended"
- place the mapping file in the terminal's working directory and run the script
- the title is optional and will default to the title of the original video
- use the copy as path context menu command (on windows) for both sets of files then organise them accordingly
- the script will output to a "mux" subfolder of the working directory and use the filenames from the second half of the mapping
- subtitles will be copied over from the same location as the "vid_old" if they are named "original_filename.en.ass"
- subtitles will be extracted from the original file if loose ones aren't present and will follow the same naming scheme as above
- copied subtitles will be resampled to the upgraded file's resolution and the correct script properties will be set
- Note: non ".ass" / ".srt" subtitles aren't handled by this script as they require manual editing and aren't that common
Arguments:
- run "pairedmuxing.py -h" for info on how to format the mapping file and for more details on the arguments
- if you want to maintain the original subtitle styling enter "original" as a positional argument
- if you want to change the language tag assigned to the audio simply enter a language tag as an argument e.g. "eng"
- must be the second argument if "original" is being used
"""
sys.stdout.reconfigure(encoding='utf-8') # allow unicode characters in print
err = '\033[31m⨯\033[0m' # use the red terminal colour for ⨯
def print_f(text): print(text, flush=True) # unbuffered print command to allow the user to see progress immediately
# function to check for a valid language tag argument
def language_tag(tag):
if not re.match(r'^[a-z]{2,3}(-[A-Z]{2})?$', tag):
raise argparse.ArgumentTypeError(f'{err}Language must be a valid IETF tag')
return tag
# check the arguments
parser = argparse.ArgumentParser(description='Mux + sync subtitles, attachments and chapters from one video container to another.\nThis is achieved using a user populated "map.txt" file located in the script\'s working directory.\n\nmap.txt details:\n format {path to raw}|{path to original}|{mkv title override (optional)}\n *enter "FORMAT" (w/o quotes) as {path to raw} to format the original only\n *actual paths must be absolute and enclosed in double quotes', epilog='NOTE: This script may require editing of the "# path variables" section to function correctly.', formatter_class=argparse.RawTextHelpFormatter)
parser.add_argument('-l', '--lang' , type=language_tag , default='jpn', help='an optional IETF language tag for the audio (defaults to "jpn")')
parser.add_argument('-a', '--attach' , action='store_true', default=False, help='skips adding attachments (generally fonts) from the old file')
parser.add_argument('-t', '--tags' , type=str , default='' , help='optional tags to append to the filename (comma separated)')
parser.add_argument('-o', '--original', action='store_true', default=False, help='if you want to maintain the original subtitle styling')
parser.add_argument('-s', '--slow' , action='store_true', default=False, help='disables the 4min trim for audio offset calculation')
parser.add_argument('-v', '--verbose' , action='store_true', default=False, help='show console output for commands from mkvtoolnix')
args = parser.parse_args()
# path variables
mkvtoolnix = 'C:\\Program Files\\MKVToolNix\\'
aegisub_cli = 'C:\\Program Files\\Aegisub\\aegisub-cli.exe'
attach_path = f'{os.environ['USERPROFILE']}\\Documents\\refs\\'
output_path = '.\\mux\\'
subtitle = ''
# unwanted fonts
excluded_fonts = ('Lato-Bol.ttf', 'Lato-Bold.ttf', 'Lato-BolIta.ttf', 'Lato-BoldItalic.ttf', 'Figtree-ExtraBold.ttf', 'GandhiSans-Bold.otf', 'GandhiSans-BoldItalic.otf', 'GandhiSans-Regular.otf')
# command modifications
mod_tracks = f' --edit track:v1 --set flag-default=1 --set flag-forced=0 --set language="{args.lang}" --edit track:a1 --set flag-default=1 --set flag-forced=0 --set language="{args.lang}"'
add_fonts = f' --add-attachment {attach_path}Lato-Bol.ttf --add-attachment {attach_path}Lato-BolIta.ttf' if not args.original else ''
del_fonts = ''.join(f' --delete-attachment name:{font}' for font in excluded_fonts) if not args.original else ''
add_tags = ''.join(f' [{tag.lstrip().rstrip()}]' for tag in args.tags.split(',')) if args.tags else ''
del_images = f' --delete-attachment mime-type:image/jpeg --delete-attachment mime-type:image/png'
del_attach = f'' if not args.attach else ' -M'
enable_trim = f' --trim 240' if not args.slow else ''
quiet = f' -q' if not args.verbose else ''
# subtitle regex
vid_formats = '(mkv|avi|mp4|mov|ogm|wmv|mpg|mpeg|mk3d|m4v)'
sub_formats = ('en.ass', 'en.srt')
res_x_map = r'^(?=PlayResX:)'
style_map = r'(^\[V4\+ Styles\]$\n^Format:.*$\n)'
style_chk = r'^Style: (?:Default|Alternate|Thoughts|Top)(?:_dvd)?,(?:Lato|Figtree ExtraBold|Gandhi Sans),.*$\n'
style_480p = 'Style: Default,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,84.375,100,0,0,1,1.5,1,2,17,17,27,1\nStyle: Alternate,Lato,36,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,84.375,100,0,0,1,1.5,1,2,17,17,27,1\n'
style_720p = 'Style: Default,Lato,54,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,2.25,1.5,2,30,30,40,1\nStyle: Alternate,Lato,54,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,2.25,1.5,2,30,30,40,1\n'
style_1080p = 'Style: Default,Lato,81,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,100,100,0,0,1,3.375,2.25,2,45,45,60,1\nStyle: Alternate,Lato,81,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,100,100,0,0,1,3.375,2.25,2,45,45,60,1\n'
style_1280u = 'Style: Default,Lato,96,&H00FFFFFF,&H00FFFFFF,&H00000000,&H96000000,0,0,0,0,84.375,100,0,0,1,4,2.7,2,45,45,71,1\nStyle: Alternate,Lato,96,&H00FFFFFF,&H00FFFFFF,&H00333333,&H96000000,0,-1,0,0,84.375,100,0,0,1,4,2.7,2,45,45,71,1\n'
line_chk = r'^(Dialogue: \d+,\d+:\d{2}:\d{2}\.\d{2},\d+:\d{2}:\d{2}\.\d{2},)'
line_def = r'(?:Default_dvd),'
line_ita = r'(?:Thoughts_dvd),'
line_top = r'(?:Top_dvd),,0,0,0,,'
sub_unicode = '[․½→←⧵⁄꞉*?<>|“”]'
# undo special characters from windows filenames, convert backticks and remove trailing white-space
def undo_reserved(title):
reserved = { '⧵': r'\\', '⁄': r'\/', '꞉': ':', '*': r'\*', '?': r'\?', '<': '<', '>': '>', '|': r'\|', '`': "'", '[“”]': r'\"', r'[ \t]+$': '' }
for key, value in reserved.items(): title = re.sub(key, value, title)
return title
# remove special characters
def del_uni(file): return re.sub(sub_unicode, '', file)
# output elapsed since previous time.time()
def elapsed(start): return f'{round(time.time() - start, 2)}s'
# change extension of file mapping
def mod_ext(find, replace, file): return re.sub(fr'\.{find}"$', f'.{replace}"', file, flags=re.I)
# parse captured stdout / stderr
def parse_std(capture):
out = None
if capture.stdout: out = f'│├─{capture.stdout.decode("utf-8").strip('\r\n').replace('\r\n', '\r\n│├─')}'
if capture.stderr: out += f'│├{err}{capture.stderr.decode("utf-8").strip('\r\n').replace('\r\n', f'\r\n│├{err}')}'
if out : out = re.sub(r'^│├─Warning: No attachment matched the spec.*$\n?', '', out, flags=re.M) # remove messages warning about removing attachments
if out : return print_f(out)
# search for subtitles which contain (or don't) the specified text [flags: L = exclusion / l = inclusion]
def grep(txt_search, flags): return subprocess.run(f'grep -r{flags} --include="*.ass" "{txt_search}" {output_path}', capture_output=True).stdout.decode('utf-8').replace('/', '').split('\n')
# replace text in the subtitles using regex
def replace(file, find, replace, log=True):
with open(file,'r+', encoding='utf8') as f:
if log: print_f(f'│├─{file}' )
result = re.sub(find, replace, f.read(), flags=re.M)
f.seek(0)
f.write(result)
f.truncate()
print_f('\n ###############################################')
print_f( ' ## Paired Muxing Script for Quality Upgrades ##')
print_f( ' ###############################################')
try:
if os.stat('map.txt').st_size == 0: raise
mappings = [m.split('|') for m in open('map.txt', encoding='utf8').read().split('\n')]
except:
print_f(f'\n{err}Aborting: "map.txt" not found or malformed')
exit(1)
print_f('\n ####### Video Multiplexing & Formatting #######')
for mapping in mappings:
# strip special characters from file names for aegisub_cli to function
for i in range(2):
if mapping[i] != del_uni(mapping[i]):
if os.path.isfile(mapping[i].strip('"')):subprocess.run(f'mv {mapping[i]} {del_uni(mapping[i])}')
for s in sub_formats:
if os.path.isfile(mod_ext(vid_formats, s, mapping[i]).strip('"')): subprocess.run(f'mv {mod_ext(vid_formats, s, mapping[i])} {del_uni(mod_ext(vid_formats, s, mapping[i]))}')
mapping[i] = del_uni(mapping[i])
vid_new, vid_old, chapters, subtitle, sub_ext, offset = mapping[0], mapping[1], None, '', '', 0
mkv_pre = f'"{output_path}{os.path.basename(mod_ext(vid_formats, 'mkv', vid_old))}'
mkv_out = f'{mkv_pre[:-5]}{add_tags}{mkv_pre[-5:]}'
sub_out = f'{mod_ext(vid_formats, 'en.ass', mkv_out)}'
print_f(f'\n{os.path.basename(vid_old).strip('"')}')
# check if there is a new file or the old one is being formatted
if vid_new.lower() == 'format':
print_f(f'├┬Marking the Old Video for Formatting...')
vid_new = vid_old
print_f(f'│╰─Completed')
else:
# convert wmv to mkv for mkvmerge compatibility
if vid_new.lower().endswith('.wmv"'):
print_f(f'├┬Converting .wmv to .mkv...')
start, convert, vid_new = time.time(), subprocess.run(f'ffmpeg -v quiet -y -i {vid_new} -c:v copy -c:a copy {mod_ext('wmv', 'mkv', vid_new)}'), mod_ext('wmv', 'mkv', vid_new)
print_f(f'│╰─Completed in {elapsed(start)}')
# determine the audio offset in seconds by using the first 4 minutes of audio (increase trim for slightly higher accuracy and much slower parsing)
start = time.time()
print_f(f'├┬Determining Audio Offset...')
try:
offset = json.loads(subprocess.run(f'audio-offset-finder --find-offset-of {vid_old} --within {vid_new}{enable_trim} --json', capture_output=True).stdout)
offset = offset['time_offset'] if offset else 0
except:
print_f(f'│{err}─Failed! Unable to Parse Video')
exit(1)
print_f(f'│╰─Completed in {elapsed(start)} [{round(offset,3)}s]')
# grab the file information of the original file in json format and store it in a variable
print_f(f'├┬Parsing Track Info & MultiPlexing...')
identify, sub_track = json.loads(subprocess.run(f'{mkvtoolnix}mkvmerge.exe -J {vid_old}', capture_output=True, universal_newlines=True, encoding='utf-8').stdout), None
# if available add the title from the mapping otherwise use the one from the original file to ensure titles from the new file aren't used
title = mapping[2] if len(mapping) > 2 else identify['container']['properties'].get('title')
mod_title = f' --edit info --set title="{undo_reserved(title)}"' if title else ''
# parse the identification variable to determine the track id of any .ass or .srt subtitles (if present)
try:
for track in identify['tracks']:
if track['codec'] == 'SubStationAlpha' : sub_ext, sub_track = 'ass', str(track['id'])
elif track['codec'] == 'SubRip/SRT' : sub_ext, sub_track = 'srt', str(track['id'])
continue
if sub_ext: subtitle = f'{mod_ext(vid_formats, f'en.{sub_ext}', vid_old)}'
except:
print_f(f'│{err}─Failed! Unable to Parse Video')
# parse the identification variable to determine if there are single chapter files present
try: chapters = identify['chapters'][0]['num_entries']
except: chapters = None
sync_chapter = f'--chapter-sync {int(offset * 1000)}' if chapters and chapters > 1 else '--no-chapters' # only add chapters if there is more than one
new_chapters = f'' if sync_chapter == '--no-chapters' else ' --no-chapters' # use chapters from the new file if the old one doesn't have more than one
# mux the attachments and chapters (with offset) from the original file into the new file
start, merge = time.time(), subprocess.run(f'{mkvtoolnix}mkvmerge.exe{quiet} -o {mkv_out} -S{new_chapters} {vid_new} -A -D -S{del_attach} {sync_chapter} {vid_old}', capture_output=True)
parse_std(merge)
print_f(f'│╰─Completed in {elapsed(start)}')
# remove any tags, set the video+audio tracks to japanese (or the language from the mapping), set the title, and remove single entry chapters for the muxed file
print_f(f'├┬Editing Properties...')
start, propedit = time.time(), subprocess.run(f'{mkvtoolnix}mkvpropedit.exe{quiet} {mkv_out} -t all:{mod_tracks}{del_fonts}{add_fonts}{mod_title}{del_images}', capture_output=True)
parse_std(propedit)
print_f(f'│╰─Completed in {elapsed(start)}')
# extract the subtitle track from the original file and name it the same as the output file with ".en" appended
start = time.time()
if sub_track:
print_f(f'├┬Extracting Subtitles...')
extract = subprocess.run(f'{mkvtoolnix}mkvextract.exe{quiet} {vid_old} tracks "{sub_track}:{subtitle.lstrip('"')}', capture_output=True)
parse_std(extract)
print_f(f'│├─{os.path.basename(subtitle).strip('"')}')
print_f(f'│╰─Completed in {elapsed(start)}')
else: # check for external subtitles if there are no sub tracks
for s in sub_formats:
if os.path.isfile(mod_ext(vid_formats, s, vid_old).strip('"')):
print_f(f'├┬External Subtitles Found...')
subtitle = mod_ext(vid_formats, s, vid_old)
print_f(f'│├─{os.path.basename(subtitle).strip('"')}')
print_f(f'│╰─Completed in {elapsed(start)}')
continue
# convert any .srt subtitle to .ass
if subtitle.endswith('en.srt"'):
print_f(f'├┬Converting SubRip/SRT to SubStationAlpha...')
start, convert, subtitle = time.time(), subprocess.run(f'ffmpeg -v quiet -y -i {subtitle} {subtitle.replace('.en.srt', '.en.ass')}'), subtitle.replace('.en.srt', '.en.ass')
print_f(f'│╰─Completed in {elapsed(start)}')
# resample the subtitle file resolution to match the output file
if os.path.isfile(subtitle.strip('"')):
print_f(f'├┬Resampling & Offsetting Subtitles...')
start, resample = time.time(), subprocess.run(f'{aegisub_cli} --loglevel 2 --video {mkv_out} "{os.path.basename(subtitle)} {sub_out} tool/resampleres') # this will fail if the file paths contain any unicode
# apply the audio offset to the resampled file
subprocess.run(f'ffmpeg -v quiet -y -itsoffset {offset} -i {sub_out} temp.ass')
subprocess.run(f'mv temp.ass {sub_out}')
print_f(f'│╰─Completed in {elapsed(start)}')
print_f(f'╰─Paired Muxing Complete!')
print_f('\n ##### Subtitle Script Properties & Styles #####')
start = time.time()
print_f('\n╭SubStation Alpha Operations')
if not args.original:
# insert subtitle properties required for the unified style to remain consistent
print_f(f'├┬Inserting ScaledBorderAndShadow / WrapStyle...')
for file in grep('ScaledBorderAndShadow:', 'L'):
if file: replace(file, res_x_map, 'ScaledBorderAndShadow: yes\n')
for file in grep('ScaledBorderAndShadow: no', 'l'):
if file: replace(file, r'^ScaledBorderAndShadow: no', 'ScaledBorderAndShadow: yes')
for file in grep('WrapStyle:', 'L'):
if file: replace(file, res_x_map, 'WrapStyle: 0\n')
for file in grep(r'WrapStyle: [1-9]', 'l'):
if file: replace(file, r'^WrapStyle: [1-9]', 'WrapStyle: 0')
print_f(f'│╰─Done')
# insert SakuraCircle styled fonts after checking for dupes
print_f(f'├┬Inserting Unified Font Styles...')
for file in grep('', 'l'):
if file:
replace(file, style_chk, '', False)
replace(file, fr'{line_chk}{line_def}', r'\1Default,', False)
replace(file, fr'{line_chk}{line_ita}', r'\1Alternate,', False)
replace(file, fr'{line_chk}{line_top}', r'\1Default,,0,0,0,,{\\an8}', False)
for file in grep('PlayResY: 480', 'l'):
if file: replace(file, style_map, fr'\1{style_480p}')
for file in grep('PlayResY: 720', 'l'):
if file: replace(file, style_map, fr'\1{style_720p}')
for file in grep('PlayResY: 1080', 'l'):
if file: replace(file, style_map, fr'\1{style_1080p}')
for file in grep('PlayResY: 1280', 'l'):
if file: replace(file, style_map, fr'\1{style_1280u}')
print_f(f'│╰─Done')
# regex for fixing common subtitle script errors
print_f(f'├┬Cleaning the Script...')
for file in grep('[Script Info]', 'l'):
if file:
replace(file, r'--' , '—' , False) # Convert double hyphen to single long hyphen
replace(file, r'’' , "'" , False) # Convert curly to straight single quotes
replace(file, r'“|”', '"' , False) # Convert doubly curly quotes to single double quotes
replace(file, r"''" , '"' , False) # Convert double single quotes to single double quote
replace(file, r'…' , '...', False) # Convert ellipses to periods
replace(file, r' ' , ' ' , False) # Convert double spaces to single spaces
# Fix Aegisub Font Size Conversions
replace(file, r'(?!^Style: .+?,.*,)100\.039(?=,100,)', '100', False)
# Replace lowercase l when it should be an uppercase I (caused by old OCR techniques)
replace(file, r"""(?<!\d )(?:(?<=[}\.," -])|(?<=\\[nN]))l(?=[\., ]|[fnst] |'[md]|'ll|'ve|t's|t'll|sn't|-l|nside|dea|ntro)""" , 'I')
print_f(f'│╰─Done')
print_f(f'╰─Completed in {elapsed(start)}')
print_f('\n ############# All Tasks Complete! #############')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment