Created
April 28, 2025 12:39
-
-
Save danielres/4fd183ab759ff90914ee7b04b80f3726 to your computer and use it in GitHub Desktop.
Whisper transcriber (speech to text)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# transcribe_chunks.sh — splits audio, uploads each chunk separately | |
# usage: transcribe_chunks.sh "input audio file" | |
# Requires OPENAI_API_KEY in environment. | |
set -euo pipefail | |
usage() { | |
echo "Usage: $(basename "$0") \"input audio file\"" | |
echo "Requires OPENAI_API_KEY environment variable to be set." | |
exit 1 | |
} | |
[[ $# -eq 1 ]] || usage | |
[[ -n "${OPENAI_API_KEY:-}" ]] || { | |
echo "Missing OPENAI_API_KEY env var" >&2 | |
exit 2 | |
} | |
in="$1" | |
[[ -f "$in" ]] || { | |
echo "No such file: $in" >&2 | |
exit 3 | |
} | |
# ── prepare ──────────────────────────────────────────────────────────────── | |
base="${in%.*}" | |
workdir="$(mktemp -d)" | |
trap 'rm -rf "$workdir"' EXIT | |
# ── split into ~9 minute chunks ───────────────────────────────────────────── | |
echo "Splitting into chunks..." | |
ffmpeg -loglevel error -i "$in" -f segment -segment_time 540 -c copy "$workdir/chunk-%03d.mp3" | |
chunks=("$workdir"/chunk-*.mp3) | |
[[ ${#chunks[@]} -gt 0 ]] || { | |
echo "Splitting failed." >&2 | |
exit 4 | |
} | |
# ── transcribe each chunk into its own file ───────────────────────────────── | |
echo "Transcribing ${#chunks[@]} chunks..." | |
n=1 | |
for chunk in "${chunks[@]}"; do | |
printf -v suf "%03d" "$n" | |
out="${base}-${suf}.txt" | |
size=$(stat --printf="%s" "$chunk") | |
max_size=$((25 * 1024 * 1024)) | |
if ((size > max_size)); then | |
echo "Error: chunk $chunk still too large (${size} bytes)." >&2 | |
exit 5 | |
fi | |
curl -sS https://api.openai.com/v1/audio/transcriptions \ | |
-H "Authorization: Bearer $OPENAI_API_KEY" \ | |
-F model=whisper-1 \ | |
-F response_format=text \ | |
-F file=@"$chunk" \ | |
--output "$out" | |
echo "Chunk $n transcribed to $out" | |
((n++)) | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment