ScriptAutomate · March 23, 2023 21:45
diff --git a/output-example.log b/output-example.log
 [00:00.000 --> 00:17.320]  I'm currently testing Whisper on my desktop as a way to eventually share with the linking
 [00:17.320 --> 00:20.080]  your thinking community.
 [00:20.080 --> 00:28.720]  This is just a way to show different timestamps and general display ability and seeing how
 [00:28.720 --> 00:34.000]  this could look as an easy note taking solution.
 [00:34.000 --> 00:38.240]  I for one think this is a rather cool piece of software that I could simply run from my
 [00:38.240 --> 00:40.960]  terminal of my Linux desktop.
diff --git a/setup-whisper.sh b/setup-whisper.sh
 # First have pyenv setup
 # - https://github.com/pyenv/pyenv

 # Create dir for whisper work
 mkdir -p ~/git/whisper
 cd ~/git/whisper
 # Use Python <3.11 because whisper isn't supported in
 # Python 3.11.x just yet
 echo '3.10.10' > .python-version
 pyenv install 3.10.10
 pyenv virtualenv 3.10.10 whisper
 pyenv activate whisper

 # Upgrade pip, install whisper
 pip install -U pip wheel
 pip install openai-whisper
diff --git a/transcribe.sh b/transcribe.sh
 # Expects to be in ~/git/whisper
 # Expects setup-whisper.sh has already been used
 # Expects ffmpeg to be installed
 # cd ~/git/whisper
 # pyenv activate whisper

 # This line just turns on the microphone to record; then press Ctrl+C when done recording.
 # It outputs a recording file called "audio.mp3"
 # To find available sound devices, I first ran this command on Linux to list pulse audio devices:
 # -> ffmpeg -sources pulse
 # I then selected the microphone I wanted to use in '<name-of-my-microphone-device>'
 ffmpeg -f pulse -i '<name-of-my-microphone-device>' audio.mp3

 # Transcribe the audio into text
 # I go with the small model because I have 4GB VRAM
 # See readme: https://github.com/openai/whisper
 whisper audio.mp3 --model small --language English | tee transcription.log
	[00:00.000 --> 00:17.320] I'm currently testing Whisper on my desktop as a way to eventually share with the linking
	[00:17.320 --> 00:20.080] your thinking community.
	[00:20.080 --> 00:28.720] This is just a way to show different timestamps and general display ability and seeing how
	[00:28.720 --> 00:34.000] this could look as an easy note taking solution.
	[00:34.000 --> 00:38.240] I for one think this is a rather cool piece of software that I could simply run from my
	[00:38.240 --> 00:40.960] terminal of my Linux desktop.
	# First have pyenv setup
	# - https://github.com/pyenv/pyenv

	# Create dir for whisper work
	mkdir -p ~/git/whisper
	cd ~/git/whisper
	# Use Python <3.11 because whisper isn't supported in
	# Python 3.11.x just yet
	echo '3.10.10' > .python-version
	pyenv install 3.10.10
	pyenv virtualenv 3.10.10 whisper
	pyenv activate whisper

	# Upgrade pip, install whisper
	pip install -U pip wheel
	pip install openai-whisper
	# Expects to be in ~/git/whisper
	# Expects setup-whisper.sh has already been used
	# Expects ffmpeg to be installed
	# cd ~/git/whisper
	# pyenv activate whisper

	# This line just turns on the microphone to record; then press Ctrl+C when done recording.
	# It outputs a recording file called "audio.mp3"
	# To find available sound devices, I first ran this command on Linux to list pulse audio devices:
	# -> ffmpeg -sources pulse
	# I then selected the microphone I wanted to use in '<name-of-my-microphone-device>'
	ffmpeg -f pulse -i '<name-of-my-microphone-device>' audio.mp3

	# Transcribe the audio into text
	# I go with the small model because I have 4GB VRAM
	# See readme: https://github.com/openai/whisper
	whisper audio.mp3 --model small --language English \| tee transcription.log