Skip to content

Instantly share code, notes, and snippets.

@andrewnc
Created October 11, 2024 00:35
Show Gist options
  • Save andrewnc/ec92950fa352739250760e0240a34174 to your computer and use it in GitHub Desktop.
Save andrewnc/ec92950fa352739250760e0240a34174 to your computer and use it in GitHub Desktop.
#!/bin/bash
# ollama run llama3.2:1b
# chmod +x talk.sh
# ./talk.sh "Your question here"
# Check if a question is passed as an argument
if [ -z "$1" ]; then
echo "Usage: ./talk.sh 'Your question here'"
exit 1
fi
# Get the user input (the question)
QUESTION="$1"
# Define the system prompt for natural speech
SYSTEM_PROMPT="You are a helpful AI assistant whose output is piped to a text to speech engine. Use '...' for natural pauses where appropriate in your answers."
# Make the curl request to the local Ollama model
RESPONSE=$(curl -s http://localhost:11434/api/chat -d '{
"model": "llama3.2:1b",
"stream": false,
"messages": [
{ "role": "system", "content": "'"$SYSTEM_PROMPT"'" },
{ "role": "user", "content": "'"$QUESTION"'" }
]
}' | jq -r '.message.content')
# Check if the response is empty
if [ -z "$RESPONSE" ]; then
echo "Error: No response from the model."
exit 1
fi
# Debug: print the response (optional)
# echo "Model Response: $RESPONSE"
# Convert the response text to speech using edge-tts and play it
# check if temp_audio file exists and delete it
if [ -f /tmp/temp_audioXXXXXX.mp3 ]; then
rm /tmp/temp_audioXXXXXX.mp3
fi
tempfile=$(mktemp /tmp/temp_audioXXXXXX.mp3)
uvx edge-tts --text "$RESPONSE" --write-media "$tempfile" && afplay "$tempfile"
rm "$tempfile"
@andrewnc
Copy link
Author

# ollama run llama3.2:1b
# chmod +x talk.sh
# ./talk.sh

# Function to handle key press events for speech-to-text
listen_to_question() {
  echo "Press 's' to start speaking. Hold the key down while you speak and release to stop recording."

  while true; do
    # Read a single key input
    read -r -n 1 -s key
    if [ "$key" == "s" ]; then
      echo "Recording... Release 's' key to stop."

      # Start recording using sox
      sox -d /tmp/question.wav silence 1 0.1 1% 1 1.0 1%  # Record audio to a temporary file
      
      # Convert speech to text using whisper and capture the output
      echo "Processing the audio with Whisper..."
      whisper /tmp/question.wav --model tiny --language en --output_format txt --output_dir /tmp/ 2>&1

      # Read the entire content of the file, preserving newlines
      question=$(cat /tmp/question.txt)

      if [ -z "$question" ]; then
        echo "Error: No input from speech recognition."
        exit 1
      fi
      # Remove any leading/trailing whitespace and newlines
      question=$(echo "$question" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')

      # Replace newlines with spaces to create a single-line question
      question=$(echo "$question" | tr '\n' ' ')

      echo "You said: $question"
      QUESTION="$question"
      break
    fi
  done
}

# Call the function to listen for the question
listen_to_question

# Define the system prompt for natural speech
SYSTEM_PROMPT="You are a helpful AI assistant whose output is piped to a text to speech engine. Use '...' for natural pauses where appropriate in your answers. No Yapping. Do not use any formatting or bullet points because they don't work in text to speech'"

# Make the curl request to the local Ollama model
RESPONSE=$(curl -s http://localhost:11434/api/chat -d '{
  "model": "llama3.2:1b",
  "stream": false,
  "messages": [
    { "role": "system", "content": "'"$SYSTEM_PROMPT"'" },
    { "role": "user", "content": "'"$QUESTION"'" }
  ]
}' | jq -r '.message.content')

# Check if the response is empty
if [ -z "$RESPONSE" ]; then
  echo "Error: No response from the model."
  exit 1
fi

# Convert the response text to speech using edge-tts and play it
# check if temp_audio file exists and delete it
if [ -f /tmp/temp_audioXXXXXX.mp3 ]; then
  rm /tmp/temp_audioXXXXXX.mp3
fi
tempfile=$(mktemp /tmp/temp_audioXXXXXX.mp3)
uvx edge-tts --text "$RESPONSE" --write-media "$tempfile" --pitch=-5Hz --voice en-US-RogerNeural && afplay "$tempfile"
rm "$tempfile"

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment