Last active
December 30, 2023 15:31
-
-
Save Red-Satori/f0a39aab395b55fc97d008278ded7cd1 to your computer and use it in GitHub Desktop.
Count TFM occurrences in a file and print them
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import re | |
if len(sys.argv) != 3: | |
print("Usage: python3 search_one_file.py <srt_file_path> <search_string>") | |
sys.exit(1) | |
srt_file_path = sys.argv[1] | |
search_string = sys.argv[2] | |
if not srt_file_path.endswith('.srt'): | |
print("Error: The provided file is not an .srt file.") | |
sys.exit(1) | |
lines_to_print = [] | |
timestamps = [] | |
try: | |
total = 0 | |
with open(srt_file_path, 'r', encoding='utf-8') as srt_file: | |
prev_line = '' | |
target_ts = '' | |
for line in srt_file: | |
if '-->' in line: | |
timestamp_line = line | |
ts_end = timestamp_line.find(',') | |
target_ts = timestamp_line[:ts_end] | |
elif re.search(search_string, line, re.IGNORECASE): | |
# Use re.sub to replace the matched part with <b> ... </b> | |
modified_line = re.sub( | |
re.escape(search_string), | |
f'<b>{search_string}</b>', | |
line, | |
flags=re.IGNORECASE | |
) | |
if '-->' in prev_line: | |
prev_line_partial = '' | |
else: | |
prev_line_partial = prev_line[-10:].strip() | |
# print("\n@" + target_ts + ": '..." + prev_line_partial + " ") | |
# print("\n<br>@" + target_ts + " ==> '..." + prev_line_partial + " " + modified_line.strip() + "...'<br>") | |
# prnt_line = "\n<br>@" + target_ts + " ==> '..." + prev_line_partial + " " + modified_line.strip() + "...'<br>" | |
prnt_line = "\n@" + target_ts + " ==> '..." + prev_line_partial + " " + modified_line.strip() + "...'" | |
lines_to_print += [ prnt_line ] | |
timestamps += [ target_ts ] | |
total += 1 | |
# print(prev_line) | |
# print(modified_line.strip()) | |
prev_line = line | |
except FileNotFoundError: | |
print(f"Error: File '{srt_file_path}' not found.") | |
sys.exit(1) | |
# Check if the first item in lines_to_print starts with '\n' | |
if lines_to_print and lines_to_print[0].startswith('\n'): | |
# Remove the leading '\n' | |
lines_to_print[0] = lines_to_print[0][1:] | |
lines_to_print.insert(0, f"<h4>'{search_string}'</h4><h4>Total: {total}</h4>") | |
for i, line in enumerate(lines_to_print): | |
print(line) | |
print ("\n\n") | |
print(f"Total of {total} '{search_string}':") | |
for i, line in enumerate(timestamps): | |
print(line) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment