|
import whisper |
|
import ffmpeg |
|
import os |
|
import time |
|
import threading |
|
|
|
def extract_audio(video_file_path, audio_file_path): |
|
ffmpeg.input(video_file_path).output(audio_file_path).run() |
|
|
|
def transcribe_video(video_file_path): |
|
audio_file_path = "audio.wav" |
|
extract_audio(video_file_path, audio_file_path) |
|
|
|
model = whisper.load_model("base") |
|
|
|
# Extract the duration of the audio file |
|
probe = ffmpeg.probe(audio_file_path) |
|
duration = float(probe['format']['duration']) |
|
|
|
# Define a function to periodically print progress |
|
def print_progress(): |
|
start_time = time.time() |
|
while not transcription_done: |
|
elapsed_time = time.time() - start_time |
|
progress = min(elapsed_time / duration, 1.0) * 100 |
|
print(f"Transcription progress: {progress:.2f}%") |
|
time.sleep(5) # Update every 5 seconds |
|
|
|
# Start a thread to print progress |
|
transcription_done = False |
|
progress_thread = threading.Thread(target=print_progress) |
|
progress_thread.start() |
|
|
|
# Start transcription |
|
print("Starting transcription...") |
|
result = model.transcribe(audio_file_path) |
|
transcription_done = True |
|
progress_thread.join() |
|
print("Transcription completed.") |
|
|
|
# Format the transcription with timestamps in hh:mm:ss format |
|
def format_timestamp(seconds): |
|
hours = int(seconds // 3600) |
|
minutes = int((seconds % 3600) // 60) |
|
seconds = int(seconds % 60) |
|
return f"{hours:02}:{minutes:02}:{seconds:02}" |
|
|
|
formatted_transcript = "" |
|
for segment in result['segments']: |
|
start_time = format_timestamp(segment['start']) |
|
end_time = format_timestamp(segment['end']) |
|
text = segment['text'].strip() |
|
formatted_transcript += f"[{start_time} - {end_time}] {text}\n\n" |
|
|
|
return formatted_transcript |
|
|
|
# Call the function for each video segment (if split) or single video file |
|
video_files = ["video.mkv"] # List all segment files or use ["video.mkv"] if not split |
|
full_transcription = "" |
|
|
|
for i, video_file in enumerate(video_files): |
|
transcription = transcribe_video(video_file) |
|
full_transcription += transcription |
|
with open(f"partial_transcript_{i}.txt", "w") as file: |
|
file.write(transcription) |
|
|
|
# Write the full transcription to a file |
|
with open("transcript.txt", "w") as file: |
|
file.write(full_transcription) |
|
|
|
print("Transcription completed and saved to transcript.txt") |