Created
December 30, 2023 15:07
-
-
Save Red-Satori/58283215c7670eab63791241ba37e476 to your computer and use it in GitHub Desktop.
Search TFM subtitle files using Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import re | |
import sys | |
FILE_MATCH = "tfm-" | |
FILE_EXTENSTIONS = ('.srt', '.md', '.txt', '.mp3', '.mp4') # Add '.mp3' and '.mp4' extensions | |
DIRECTORY = os.getcwd() | |
if not sys.argv or len(sys.argv) < 2: | |
print("You must pass search args") | |
sys.exit(1) | |
string_args = sys.argv[1:] | |
def highlight_match(line, match): | |
return re.sub(re.escape(match), f"\x1b[32m{match}\x1b[37m", line, flags=re.IGNORECASE) | |
def search_files_for_string(string_args:[]): | |
matching_files = [] | |
for filename in os.listdir(DIRECTORY): | |
if any(filename.lower().endswith(ext) for ext in FILE_EXTENSTIONS) and FILE_MATCH.lower() in filename.lower(): | |
matching_files.append(filename) | |
# matching_files.sort() # Sort the matching files alphabetically | |
matching_files.sort(reverse=True) # Sort the matching files in reverse order | |
for filename in matching_files: | |
file_path = os.path.join(DIRECTORY, filename) | |
base_name, _ = os.path.splitext(filename) # Get the filename without extension | |
file_ext = os.path.splitext(filename)[1] | |
if file_ext != ".srt" and file_ext != ".txt": | |
continue | |
# Check if an accompanying audio/video file exists | |
audio_video_file = None | |
video_audio_ext = None | |
if os.path.isfile(os.path.join(DIRECTORY, base_name + '.mp3')): | |
audio_video_file = base_name + '.mp3' # Use MP3 if it exists | |
video_audio_ext = ".mp3" | |
elif os.path.isfile(os.path.join(DIRECTORY, base_name + '.mp4')): | |
audio_video_file = base_name + '.mp4' # Use MP4 if MP3 is not found | |
video_audio_ext = ".mp4" | |
start_times = [] # List to store start times for each match instance | |
first_match = True | |
with open(file_path, 'r', encoding='utf-8') as file: | |
lines = file.read().split('\n') | |
string_match_found = False # Flag to track if a string match is found | |
for i, line in enumerate(lines): | |
if all(arg.lower() in line.lower() for arg in string_args): | |
string_match_found = True | |
start_idx = max(i - 1, 0) | |
end_idx = min(i + 1, len(lines) - 1) | |
if first_match: | |
print("\n\n\x1b[90m" + "-"*30 + "\x1b[37m") | |
first_match = False | |
print(f"\nMatch found in '{filename}':") | |
for j in range(start_idx, end_idx + 1): | |
line_to_print = lines[j] | |
if j == i: | |
for arg in string_args: | |
line_to_print = highlight_match(line_to_print, arg) | |
# Check for timestamp format (HH:MM:SS,mmm --> HH:MM:SS,mmm) | |
timestamp_match = re.search(r'(\d{2}:\d{2}:\d{2},\d{3}) --> (\d{2}:\d{2}:\d{2},\d{3})', line_to_print) | |
if timestamp_match: | |
start_time = timestamp_match.group(1).replace(",", ".") | |
start_times.append(start_time) | |
print(line_to_print) | |
# Print accompanying audio/video files only if a string match is found | |
if string_match_found: | |
if audio_video_file: | |
print(f"\x1b[33mAudio/Video File Found: {audio_video_file}\x1b[37m") | |
# Generate FFmpeg trim commands for audio/video files | |
for start_time in start_times: | |
output_file_name_time = start_time.replace('.', '-').replace(':', '-') | |
output_file_name = f"{base_name}-{output_file_name_time}-{string_args[0].replace(' ', '-')}{video_audio_ext}" | |
ffmpeg_trim_command = f"ffmpeg -y -hide_banner -ss {start_time} -i \"{audio_video_file}\" -t 10 -c copy \"{output_file_name}\"" | |
print(f"\nFFmpeg Trim Command for '{audio_video_file}':") | |
print(f"\x1b[32m{ffmpeg_trim_command}\x1b[37m") | |
print("\x1b[90m" + "-"*30 + "\x1b[37m") # print closing border | |
if __name__ == "__main__": | |
search_files_for_string(string_args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment