Created
December 3, 2024 17:40
-
-
Save wooddar/d8d04120dd1d966316aa792d791f0dab to your computer and use it in GitHub Desktop.
Random script to transcribe and summarize long voicenotes
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import whisper | |
import argparse | |
from openai import OpenAI | |
""" | |
Transcribes and summarizes audio files (voice notes) using Whisper and GPT-4. | |
Examples: | |
# Full pipeline - transcribe and summarize | |
python transcribe_voicenote.py audio.m4a | |
# Skip transcription, generate summary from existing transcription file | |
python transcribe_voicenote.py audio.m4a --skip-transcribe | |
# Only transcribe, skip summary | |
python transcribe_voicenote.py audio.m4a --skip-summary | |
# Use custom output file paths | |
python transcribe_voicenote.py audio.m4a --transcription-file trans.txt --summary-file sum.txt | |
""" | |
def parse_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('audio_file', type=str, help='Path to audio file') | |
parser.add_argument('--skip-transcribe', action='store_true', help='Skip transcription step') | |
parser.add_argument('--skip-summary', action='store_true', help='Skip summary step') | |
parser.add_argument('--transcription-file', type=str, default='voice_note.txt') | |
parser.add_argument('--summary-file', type=str, default='voicenote_summary.txt') | |
return parser.parse_args() | |
def transcribe(audio_file, output_file): | |
print('Loading transcription model') | |
model = whisper.load_model("large-v2") | |
result = model.transcribe(audio_file) | |
text = result['text'].replace('. ','.\n') | |
print(f'Writing transcription to {output_file}') | |
with open(output_file, 'w') as f: | |
f.write(text) | |
return text | |
def summarize(text, output_file): | |
print('Simplifying') | |
client = OpenAI() | |
print('Sending to GPT-4o') | |
chat_completion = client.chat.completions.create( | |
model="gpt-4o", | |
messages=[ | |
{"role": "system", "content": "You simplify voicenote transcripts. You should mention the general things it talks about/tone, assuming the voicenote is addressed to the user 'you'. You should highlight any points that the user needs to respond to in markdown bullet points after your brief voicenote summary. You should assume that the user is not able to hear the voicenote. Think carefully about where it looks like there has been mistakes in the transcription. You should be as brief as possible."}, | |
{"role": "user", "content": text} | |
] | |
) | |
summary = chat_completion.choices[0].message.content | |
print(f'Writing summary to {output_file}') | |
with open(output_file, 'w') as f: | |
f.write(summary) | |
print(f'===Summary===\n{summary}') | |
return summary | |
def main(): | |
args = parse_args() | |
text = None | |
print(f'Transcribing {args.audio_file}') | |
if not args.skip_transcribe: | |
text = transcribe(args.audio_file, args.transcription_file) | |
if not args.skip_summary: | |
if text is None: | |
with open(args.transcription_file) as f: | |
text = f.read() | |
summarize(text, args.summary_file) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment