jlmalone · June 22, 2023 08:54
diff --git a/whisperpy.py b/whisperpy.py
 # Usage
 # python3 whisperpy.py [filename_input or youtube url]
 # The script will transcribe the audio and create an SRT file with the same name as the input file 
 # (or YouTube video title) in the current directory. The name of the SRT file will be printed as the output.


 import sys
 import whisper
 import hashlib
 from pytube import YouTube
 from datetime import timedelta
 import os

 def download_video(url):
    print("Start downloading", url)
    yt = YouTube(url)

    hash_file = hashlib.md5()
    hash_file.update(yt.title.encode())

    file_name = f'{hash_file.hexdigest()}.mp4'

    yt.streams.first().download("", file_name)
    print("Downloaded to", file_name)

    return {
        "file_name": file_name,
        "title": yt.title
    }

 def transcribe_audio(path):
    model = whisper.load_model("large")
    print("Whisper model loaded.")
    if path.startswith("https://www.youtube.com"):
        video = download_video(path)
    else:
        video = {"file_name": path, "title": os.path.splitext(os.path.basename(path))[0]}
    segments = model.transcribe(video["file_name"])['segments']

    # Extract short filename from path if provided
    short_filename = os.path.splitext(os.path.basename(path))[0]

    for segment in segments:
        startTime = str(0) + str(timedelta(seconds=int(segment['start']))) + ',000'
        endTime = str(0) + str(timedelta(seconds=int(segment['end']))) + ',000'
        text = segment['text']
        segmentId = segment['id'] + 1
        print(f"Processing segment: {segmentId}")

        if len(text) > 0:
            if text[0] == ' ':
                text = text[1:]

        print(f"Segment text: {text}")

        segment = f"{segmentId}\n{startTime} --> {endTime}\n{text}\n\n"

        srtFilename = os.path.join(".", f"{short_filename}.srt")
        with open(srtFilename, 'a', encoding='utf-8') as srtFile:
            srtFile.write(segment)

    return srtFilename

 if __name__ == '__main__':
    if len(sys.argv) != 2:
        print("Usage: python whisperpy.py [system path of webm or mp4 file OR YouTube URL]")
        sys.exit(1)

    input_path = sys.argv[1]
    try:
        result = transcribe_audio(input_path)
        print("SRT file created:", result)
    except Exception as e:
        print("Error:", str(e))


 # this is derived from https://github.com/openai/whisper/discussions/98#discussioncomment-4583593
	# Usage
	# python3 whisperpy.py [filename_input or youtube url]
	# The script will transcribe the audio and create an SRT file with the same name as the input file
	# (or YouTube video title) in the current directory. The name of the SRT file will be printed as the output.


	import sys
	import whisper
	import hashlib
	from pytube import YouTube
	from datetime import timedelta
	import os

	def download_video(url):
	print("Start downloading", url)
	yt = YouTube(url)

	hash_file = hashlib.md5()
	hash_file.update(yt.title.encode())

	file_name = f'{hash_file.hexdigest()}.mp4'

	yt.streams.first().download("", file_name)
	print("Downloaded to", file_name)

	return {
	"file_name": file_name,
	"title": yt.title
	}

	def transcribe_audio(path):
	model = whisper.load_model("large")
	print("Whisper model loaded.")
	if path.startswith("https://www.youtube.com"):
	video = download_video(path)
	else:
	video = {"file_name": path, "title": os.path.splitext(os.path.basename(path))[0]}
	segments = model.transcribe(video["file_name"])['segments']

	# Extract short filename from path if provided
	short_filename = os.path.splitext(os.path.basename(path))[0]

	for segment in segments:
	startTime = str(0) + str(timedelta(seconds=int(segment['start']))) + ',000'
	endTime = str(0) + str(timedelta(seconds=int(segment['end']))) + ',000'
	text = segment['text']
	segmentId = segment['id'] + 1
	print(f"Processing segment: {segmentId}")

	if len(text) > 0:
	if text[0] == ' ':
	text = text[1:]

	print(f"Segment text: {text}")

	segment = f"{segmentId}\n{startTime} --> {endTime}\n{text}\n\n"

	srtFilename = os.path.join(".", f"{short_filename}.srt")
	with open(srtFilename, 'a', encoding='utf-8') as srtFile:
	srtFile.write(segment)

	return srtFilename

	if __name__ == '__main__':
	if len(sys.argv) != 2:
	print("Usage: python whisperpy.py [system path of webm or mp4 file OR YouTube URL]")
	sys.exit(1)

	input_path = sys.argv[1]
	try:
	result = transcribe_audio(input_path)
	print("SRT file created:", result)
	except Exception as e:
	print("Error:", str(e))


	# this is derived from https://github.com/openai/whisper/discussions/98#discussioncomment-4583593