Last active
August 14, 2018 07:56
-
-
Save fireattack/1b80b247e38d74be792f6c4c38129a71 to your computer and use it in GitHub Desktop.
Py3 requirement: chardet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import datetime | |
import re | |
from os.path import exists, splitext, join | |
from os import walk | |
import chardet | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument("filename") | |
parser.add_argument("-f", "--folder", action="store_true") | |
args = parser.parse_args() | |
if not exists(args.filename): | |
print('Input file missing!') | |
return 0 | |
if args.folder: | |
vttFiles = [] | |
for root, dirs, files in walk(args.filename): | |
for name in files: | |
f = join(root, name) | |
if splitext(f)[1].lower() == '.vtt': | |
vttFiles.append(f) | |
else: | |
vttFiles = [args.filename] | |
for vttFile in vttFiles: | |
# Detect file encoding | |
with open(vttFile, 'rb') as file: | |
raw = file.read() | |
encoding = chardet.detect(raw)['encoding'] | |
with open(vttFile, encoding=encoding) as f: | |
lines = f.readlines() | |
flag = False | |
newLines = [] | |
i = 1 | |
for line in lines: | |
if not flag and re.match(r'\d{2}:', line): | |
flag=True | |
if flag: | |
if re.match(r'\d{2}:', line): | |
newLines.append(str(i) + '\n' + re.sub(r'(\d{2}:\d{2}:\d{2})\.(\d{3})', r'\1,\2', line)) | |
i += 1 | |
else: | |
newLines.append(line) | |
newFilename = f'{splitext(vttFile)[0]}.srt' | |
# Output | |
with open(newFilename, 'w', encoding='utf-8') as f: | |
f.write(''.join(newLines)) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment