Skip to content

Instantly share code, notes, and snippets.

@wooddar
Created December 3, 2024 17:40
Show Gist options
  • Save wooddar/d8d04120dd1d966316aa792d791f0dab to your computer and use it in GitHub Desktop.
Save wooddar/d8d04120dd1d966316aa792d791f0dab to your computer and use it in GitHub Desktop.
Random script to transcribe and summarize long voicenotes
import whisper
import argparse
from openai import OpenAI
"""
Transcribes and summarizes audio files (voice notes) using Whisper and GPT-4.
Examples:
# Full pipeline - transcribe and summarize
python transcribe_voicenote.py audio.m4a
# Skip transcription, generate summary from existing transcription file
python transcribe_voicenote.py audio.m4a --skip-transcribe
# Only transcribe, skip summary
python transcribe_voicenote.py audio.m4a --skip-summary
# Use custom output file paths
python transcribe_voicenote.py audio.m4a --transcription-file trans.txt --summary-file sum.txt
"""
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('audio_file', type=str, help='Path to audio file')
parser.add_argument('--skip-transcribe', action='store_true', help='Skip transcription step')
parser.add_argument('--skip-summary', action='store_true', help='Skip summary step')
parser.add_argument('--transcription-file', type=str, default='voice_note.txt')
parser.add_argument('--summary-file', type=str, default='voicenote_summary.txt')
return parser.parse_args()
def transcribe(audio_file, output_file):
print('Loading transcription model')
model = whisper.load_model("large-v2")
result = model.transcribe(audio_file)
text = result['text'].replace('. ','.\n')
print(f'Writing transcription to {output_file}')
with open(output_file, 'w') as f:
f.write(text)
return text
def summarize(text, output_file):
print('Simplifying')
client = OpenAI()
print('Sending to GPT-4o')
chat_completion = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": "You simplify voicenote transcripts. You should mention the general things it talks about/tone, assuming the voicenote is addressed to the user 'you'. You should highlight any points that the user needs to respond to in markdown bullet points after your brief voicenote summary. You should assume that the user is not able to hear the voicenote. Think carefully about where it looks like there has been mistakes in the transcription. You should be as brief as possible."},
{"role": "user", "content": text}
]
)
summary = chat_completion.choices[0].message.content
print(f'Writing summary to {output_file}')
with open(output_file, 'w') as f:
f.write(summary)
print(f'===Summary===\n{summary}')
return summary
def main():
args = parse_args()
text = None
print(f'Transcribing {args.audio_file}')
if not args.skip_transcribe:
text = transcribe(args.audio_file, args.transcription_file)
if not args.skip_summary:
if text is None:
with open(args.transcription_file) as f:
text = f.read()
summarize(text, args.summary_file)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment