Last active
June 22, 2023 08:54
-
-
Save jlmalone/448f712733a9c6312fddc704db457fbe to your computer and use it in GitHub Desktop.
The script will transcribe the audio and create an SRT file with the same name as the input file (or YouTube video title) in the current directory. The name of the SRT file will be printed as the output.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Usage | |
# python3 whisperpy.py [filename_input or youtube url] | |
# The script will transcribe the audio and create an SRT file with the same name as the input file | |
# (or YouTube video title) in the current directory. The name of the SRT file will be printed as the output. | |
import sys | |
import whisper | |
import hashlib | |
from pytube import YouTube | |
from datetime import timedelta | |
import os | |
def download_video(url): | |
print("Start downloading", url) | |
yt = YouTube(url) | |
hash_file = hashlib.md5() | |
hash_file.update(yt.title.encode()) | |
file_name = f'{hash_file.hexdigest()}.mp4' | |
yt.streams.first().download("", file_name) | |
print("Downloaded to", file_name) | |
return { | |
"file_name": file_name, | |
"title": yt.title | |
} | |
def transcribe_audio(path): | |
model = whisper.load_model("large") | |
print("Whisper model loaded.") | |
if path.startswith("https://www.youtube.com"): | |
video = download_video(path) | |
else: | |
video = {"file_name": path, "title": os.path.splitext(os.path.basename(path))[0]} | |
segments = model.transcribe(video["file_name"])['segments'] | |
# Extract short filename from path if provided | |
short_filename = os.path.splitext(os.path.basename(path))[0] | |
for segment in segments: | |
startTime = str(0) + str(timedelta(seconds=int(segment['start']))) + ',000' | |
endTime = str(0) + str(timedelta(seconds=int(segment['end']))) + ',000' | |
text = segment['text'] | |
segmentId = segment['id'] + 1 | |
print(f"Processing segment: {segmentId}") | |
if len(text) > 0: | |
if text[0] == ' ': | |
text = text[1:] | |
print(f"Segment text: {text}") | |
segment = f"{segmentId}\n{startTime} --> {endTime}\n{text}\n\n" | |
srtFilename = os.path.join(".", f"{short_filename}.srt") | |
with open(srtFilename, 'a', encoding='utf-8') as srtFile: | |
srtFile.write(segment) | |
return srtFilename | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
print("Usage: python whisperpy.py [system path of webm or mp4 file OR YouTube URL]") | |
sys.exit(1) | |
input_path = sys.argv[1] | |
try: | |
result = transcribe_audio(input_path) | |
print("SRT file created:", result) | |
except Exception as e: | |
print("Error:", str(e)) | |
# this is derived from https://github.com/openai/whisper/discussions/98#discussioncomment-4583593 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment