Created
July 21, 2025 21:41
-
-
Save AshtonIzmev/80a31843e29e7931d405c3b2f97abdc7 to your computer and use it in GitHub Desktop.
Whisper STT snippet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #%pip install openai | |
| from openai import OpenAI | |
| import os | |
| import subprocess | |
| from pydub import AudioSegment | |
| import math | |
| client = OpenAI(api_key="sk-proj-XXX") | |
| def transcribe_audio(file_path): | |
| audio_file = open(file_path, "rb") | |
| transcription = client.audio.transcriptions.create( | |
| model="whisper-1", | |
| file=audio_file | |
| ) | |
| return transcription | |
| def convert_mp3_to_ogg(mp3_path, output_dir): | |
| """Convert an MP3 file to OGG format""" | |
| # Ensure output directory exists | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Get the base filename without extension | |
| base_name = os.path.basename(mp3_path).rsplit('.', 1)[0] | |
| ogg_path = os.path.join(output_dir, f"{base_name}.ogg") | |
| # Convert MP3 to OGG using pydub | |
| audio = AudioSegment.from_mp3(mp3_path) | |
| audio.export(ogg_path, format="ogg") | |
| print(f"Converted {mp3_path} to {ogg_path}") | |
| return ogg_path | |
| def split_audio_into_chunks(ogg_path, chunk_length_minutes, output_dir): | |
| """Split an OGG file into chunks of specified length""" | |
| # Ensure output directory exists | |
| os.makedirs(output_dir, exist_ok=True) | |
| # Load the audio file | |
| audio = AudioSegment.from_ogg(ogg_path) | |
| # Calculate chunk length in milliseconds | |
| chunk_length_ms = chunk_length_minutes * 60 * 1000 | |
| # Calculate number of chunks | |
| total_chunks = math.ceil(len(audio) / chunk_length_ms) | |
| # Get the base filename without extension | |
| base_name = os.path.basename(ogg_path).rsplit('.', 1)[0] | |
| chunks = [] | |
| # Split the audio and export chunks | |
| for i in range(total_chunks): | |
| start_ms = i * chunk_length_ms | |
| end_ms = min((i + 1) * chunk_length_ms, len(audio)) | |
| chunk = audio[start_ms:end_ms] | |
| chunk_path = os.path.join(output_dir, f"{base_name}_chunk{i+1}.ogg") | |
| chunk.export(chunk_path, format="ogg") | |
| chunks.append(chunk_path) | |
| print(f"Created chunk {i+1}/{total_chunks}: {chunk_path}") | |
| return chunks | |
| # Process the MP3 file | |
| mp3_path = os.path.expanduser("~/Downloads/output_audio.mp3") # Expand the tilde in the path | |
| ogg_directory = "ogg/xx" | |
| ogg_path = convert_mp3_to_ogg(mp3_path, "ogg") | |
| chunks = split_audio_into_chunks(ogg_path, 15, ogg_directory) | |
| print(f"Audio file split into {len(chunks)} chunks") | |
| transcriptions = {} | |
| for filename in os.listdir(ogg_directory): | |
| if filename.endswith(".ogg"): | |
| print(f"Processing {filename}") | |
| file_path = os.path.join(ogg_directory, filename) | |
| transcription = transcribe_audio(file_path) | |
| transcriptions[filename] = transcription |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment