Skip to content

Instantly share code, notes, and snippets.

@AshtonIzmev
Created July 21, 2025 21:41
Show Gist options
  • Select an option

  • Save AshtonIzmev/80a31843e29e7931d405c3b2f97abdc7 to your computer and use it in GitHub Desktop.

Select an option

Save AshtonIzmev/80a31843e29e7931d405c3b2f97abdc7 to your computer and use it in GitHub Desktop.
Whisper STT snippet
#%pip install openai
from openai import OpenAI
import os
import subprocess
from pydub import AudioSegment
import math
client = OpenAI(api_key="sk-proj-XXX")
def transcribe_audio(file_path):
audio_file = open(file_path, "rb")
transcription = client.audio.transcriptions.create(
model="whisper-1",
file=audio_file
)
return transcription
def convert_mp3_to_ogg(mp3_path, output_dir):
"""Convert an MP3 file to OGG format"""
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Get the base filename without extension
base_name = os.path.basename(mp3_path).rsplit('.', 1)[0]
ogg_path = os.path.join(output_dir, f"{base_name}.ogg")
# Convert MP3 to OGG using pydub
audio = AudioSegment.from_mp3(mp3_path)
audio.export(ogg_path, format="ogg")
print(f"Converted {mp3_path} to {ogg_path}")
return ogg_path
def split_audio_into_chunks(ogg_path, chunk_length_minutes, output_dir):
"""Split an OGG file into chunks of specified length"""
# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)
# Load the audio file
audio = AudioSegment.from_ogg(ogg_path)
# Calculate chunk length in milliseconds
chunk_length_ms = chunk_length_minutes * 60 * 1000
# Calculate number of chunks
total_chunks = math.ceil(len(audio) / chunk_length_ms)
# Get the base filename without extension
base_name = os.path.basename(ogg_path).rsplit('.', 1)[0]
chunks = []
# Split the audio and export chunks
for i in range(total_chunks):
start_ms = i * chunk_length_ms
end_ms = min((i + 1) * chunk_length_ms, len(audio))
chunk = audio[start_ms:end_ms]
chunk_path = os.path.join(output_dir, f"{base_name}_chunk{i+1}.ogg")
chunk.export(chunk_path, format="ogg")
chunks.append(chunk_path)
print(f"Created chunk {i+1}/{total_chunks}: {chunk_path}")
return chunks
# Process the MP3 file
mp3_path = os.path.expanduser("~/Downloads/output_audio.mp3") # Expand the tilde in the path
ogg_directory = "ogg/xx"
ogg_path = convert_mp3_to_ogg(mp3_path, "ogg")
chunks = split_audio_into_chunks(ogg_path, 15, ogg_directory)
print(f"Audio file split into {len(chunks)} chunks")
transcriptions = {}
for filename in os.listdir(ogg_directory):
if filename.endswith(".ogg"):
print(f"Processing {filename}")
file_path = os.path.join(ogg_directory, filename)
transcription = transcribe_audio(file_path)
transcriptions[filename] = transcription
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment