wooddar · December 3, 2024 17:40
diff --git a/transcribe_voicenote.py b/transcribe_voicenote.py
 import whisper
 import argparse
 from openai import OpenAI

 """
 Transcribes and summarizes audio files (voice notes) using Whisper and GPT-4.

 Examples:
    # Full pipeline - transcribe and summarize
    python transcribe_voicenote.py audio.m4a

    # Skip transcription, generate summary from existing transcription file
    python transcribe_voicenote.py audio.m4a --skip-transcribe

    # Only transcribe, skip summary
    python transcribe_voicenote.py audio.m4a --skip-summary 

    # Use custom output file paths
    python transcribe_voicenote.py audio.m4a --transcription-file trans.txt --summary-file sum.txt
 """

 def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument('audio_file', type=str, help='Path to audio file')
    parser.add_argument('--skip-transcribe', action='store_true', help='Skip transcription step')
    parser.add_argument('--skip-summary', action='store_true', help='Skip summary step')
    parser.add_argument('--transcription-file', type=str, default='voice_note.txt')
    parser.add_argument('--summary-file', type=str, default='voicenote_summary.txt')
    return parser.parse_args()

 def transcribe(audio_file, output_file):
    print('Loading transcription model')
    model = whisper.load_model("large-v2")
    result = model.transcribe(audio_file)
    
    text = result['text'].replace('. ','.\n')
    print(f'Writing transcription to {output_file}')
    with open(output_file, 'w') as f:
        f.write(text)
    return text

 def summarize(text, output_file):
    print('Simplifying')
    client = OpenAI()
    
    print('Sending to GPT-4o')
    chat_completion = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You simplify voicenote transcripts. You should mention the general things it talks about/tone, assuming the voicenote is addressed to the user 'you'. You should highlight any points that the user needs to respond to in markdown bullet points after your brief voicenote summary. You should assume that the user is not able to hear the voicenote. Think carefully about where it looks like there has been mistakes in the transcription. You should be as brief as possible."},
            {"role": "user", "content": text}
        ]
    )
    summary = chat_completion.choices[0].message.content
    print(f'Writing summary to {output_file}')
    with open(output_file, 'w') as f:
        f.write(summary)
    print(f'===Summary===\n{summary}')
    return summary

 def main():
    args = parse_args()
    
    text = None
    print(f'Transcribing {args.audio_file}')
    if not args.skip_transcribe:
        text = transcribe(args.audio_file, args.transcription_file)
    
    if not args.skip_summary:
        if text is None:
            with open(args.transcription_file) as f:
                text = f.read()
        summarize(text, args.summary_file)

 if __name__ == "__main__":
    main()
	import whisper
	import argparse
	from openai import OpenAI

	"""
	Transcribes and summarizes audio files (voice notes) using Whisper and GPT-4.

	Examples:
	# Full pipeline - transcribe and summarize
	python transcribe_voicenote.py audio.m4a

	# Skip transcription, generate summary from existing transcription file
	python transcribe_voicenote.py audio.m4a --skip-transcribe

	# Only transcribe, skip summary
	python transcribe_voicenote.py audio.m4a --skip-summary

	# Use custom output file paths
	python transcribe_voicenote.py audio.m4a --transcription-file trans.txt --summary-file sum.txt
	"""

	def parse_args():
	parser = argparse.ArgumentParser()
	parser.add_argument('audio_file', type=str, help='Path to audio file')
	parser.add_argument('--skip-transcribe', action='store_true', help='Skip transcription step')
	parser.add_argument('--skip-summary', action='store_true', help='Skip summary step')
	parser.add_argument('--transcription-file', type=str, default='voice_note.txt')
	parser.add_argument('--summary-file', type=str, default='voicenote_summary.txt')
	return parser.parse_args()

	def transcribe(audio_file, output_file):
	print('Loading transcription model')
	model = whisper.load_model("large-v2")
	result = model.transcribe(audio_file)

	text = result['text'].replace('. ','.\n')
	print(f'Writing transcription to {output_file}')
	with open(output_file, 'w') as f:
	f.write(text)
	return text

	def summarize(text, output_file):
	print('Simplifying')
	client = OpenAI()

	print('Sending to GPT-4o')
	chat_completion = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{"role": "system", "content": "You simplify voicenote transcripts. You should mention the general things it talks about/tone, assuming the voicenote is addressed to the user 'you'. You should highlight any points that the user needs to respond to in markdown bullet points after your brief voicenote summary. You should assume that the user is not able to hear the voicenote. Think carefully about where it looks like there has been mistakes in the transcription. You should be as brief as possible."},
	{"role": "user", "content": text}
	]
	)
	summary = chat_completion.choices[0].message.content
	print(f'Writing summary to {output_file}')
	with open(output_file, 'w') as f:
	f.write(summary)
	print(f'===Summary===\n{summary}')
	return summary

	def main():
	args = parse_args()

	text = None
	print(f'Transcribing {args.audio_file}')
	if not args.skip_transcribe:
	text = transcribe(args.audio_file, args.transcription_file)

	if not args.skip_summary:
	if text is None:
	with open(args.transcription_file) as f:
	text = f.read()
	summarize(text, args.summary_file)

	if __name__ == "__main__":
	main()