kublermdk · January 28, 2025 15:53
diff --git a/basic-pipe-whisper.py b/basic-pipe-whisper.py
 ## Converts Speech to Text using the OpenAI Whisper (English medium size ~3GB) AI model
 ## NOTE: You'll want to update the file (e.g "TheUnaccountabilityMachine.mp3") to whatever mp3, flac or wav file that you want processed.

 ## -- Instal requirements by installing Python 3 and pip then run:
 # pip install torch transformers librosa soundfile ffmpeg

 ## -- Run in Windows (after updating the filenames in the code below) using:
 # cd C:\workspace\whisper-huggingface\
 # python basic-pipe-whisper.py

 import os,sys
 import subprocess
 import glob
 import json
 from os import path
 from datetime import datetime
 from transformers import pipeline

 now = datetime.now()
 print("The Starting Time is: ", now)

 pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v3")
 result = pipe("TheUnaccountabilityMachine.mp3", return_timestamps=True)

 # NB: We have return_timestamps=True to support audio longer than 30s, as it's then split into chunks via the Hugging Face pipeline

 print ("-------------------")
 print(result)
 print ("-------------------")

 f = open('TheUnaccountabilityMachine.json','w')
 f.write(json.dumps(result))

 # Save just the plain text version, without the 
 f = open('TheUnaccountabilityMachine.txt','w')
 f.write(json.dumps(result['text']))

 print("Converted and Saved to file")
 print("Finished at ", datetime.now())
	## Converts Speech to Text using the OpenAI Whisper (English medium size ~3GB) AI model
	## NOTE: You'll want to update the file (e.g "TheUnaccountabilityMachine.mp3") to whatever mp3, flac or wav file that you want processed.

	## -- Instal requirements by installing Python 3 and pip then run:
	# pip install torch transformers librosa soundfile ffmpeg

	## -- Run in Windows (after updating the filenames in the code below) using:
	# cd C:\workspace\whisper-huggingface\
	# python basic-pipe-whisper.py

	import os,sys
	import subprocess
	import glob
	import json
	from os import path
	from datetime import datetime
	from transformers import pipeline

	now = datetime.now()
	print("The Starting Time is: ", now)

	pipe = pipeline("automatic-speech-recognition", "openai/whisper-large-v3")
	result = pipe("TheUnaccountabilityMachine.mp3", return_timestamps=True)

	# NB: We have return_timestamps=True to support audio longer than 30s, as it's then split into chunks via the Hugging Face pipeline

	print ("-------------------")
	print(result)
	print ("-------------------")

	f = open('TheUnaccountabilityMachine.json','w')
	f.write(json.dumps(result))

	# Save just the plain text version, without the
	f = open('TheUnaccountabilityMachine.txt','w')
	f.write(json.dumps(result['text']))

	print("Converted and Saved to file")
	print("Finished at ", datetime.now())