Skip to content

Instantly share code, notes, and snippets.

@vijinho
Created January 7, 2025 16:25
Show Gist options
  • Save vijinho/50553202da24cfa73347b65f7df74057 to your computer and use it in GitHub Desktop.
Save vijinho/50553202da24cfa73347b65f7df74057 to your computer and use it in GitHub Desktop.
Wrapper for TTS https://github.com/coqui-ai/TTS that takes model and vocoder ids and outputs a wav or mp3
#!/bin/bash
# Associative arrays for mapping IDs to model and vocoder names
declare -A model_ids=(
[12]="tts_models/en/ljspeech/tacotron2-DDC_ph"
[15]="tts_models/en/ljspeech/tacotron2-DCA"
)
declare -A vocoder_ids=(
[4]="vocoder_models/en/ljspeech/multiband-melgan"
[6]="vocoder_models/en/ljspeech/univnet"
)
# Function to display help
usage() {
cat <<EOF
Usage: $0 [-t <text>] [-m <model_id>] [-v <vocoder_id>] [-f <output_file>] [-p]
Options:
-t <text> : The text to speak
-m <model_id> : Model ID (12 or 15).
-v <vocoder_id> : Vocoder ID (4 or 6).
-f <output_file>: Path to save the output file. Default: /tmp/tts_m<model_id>_v<vocoder_id>-YYYYMMDD-HHMMSS.wav
-p : Play the generated sound file with aplay (for audio playback).
-h, --help : Display this help message.
Examples:
$0 -m 12 -v 4 -f /home/user/output.wav
$0 -r -p
EOF
exit 1
}
# Check if the provided IDs are valid
model_id=""
vocoder_id=""
text="TESTING. This is a test because no text was specified."
while getopts "t:m:v:f:ph" opt; do
case ${opt} in
t)
text=$OPTARG
;;
m)
model_id=$OPTARG
;;
v)
vocoder_id=$OPTARG
;;
f)
out_path=$OPTARG
;;
p)
pipe_out=true
;;
h)
usage
;;
\?)
echo "Invalid option: -$OPTARG" 1>&2
usage
;;
:)
echo "Invalid option: -$OPTARG requires an argument" 1>&2
usage
;;
esac
done
# Validate model_id and vocoder_id
if [[ -z "${model_ids[$model_id]}" ]]; then
echo "Invalid model ID: $model_id. Valid models: ${!model_ids[@]}"
usage
fi
if [[ -z "${vocoder_ids[$vocoder_id]}" ]]; then
echo "Invalid vocoder ID: $vocoder_id. Valid vocoders: ${!vocoder_ids[@]}"
usage
fi
# Assign out_path if not provided
if [[ -z "$out_path" ]]; then
out_path="$TEMP/tts_m${model_id}_v${vocoder_id}-$(date "+%Y%m%d-%H%M%S").wav"
else
# Ensure the provided path is writable
out_dir=$(dirname "$out_path")
if [ ! -d "$out_dir" ] || [ ! -w "$out_dir" ]; then
echo "Invalid output directory: $out_dir"
usage
fi
fi
# Retrieve the corresponding model and vocoder names
model_name="${model_ids[$model_id]}"
vocoder_name="${vocoder_ids[$vocoder_id]}"
# Execute the TTS command
tts_command="tts --text \"$text\" --model_name \"$model_name\" --vocoder_name \"$vocoder_name\" --out_path \"$out_path\""
echo "Executing: $tts_command"
eval "$tts_command"
# Convert the WAV file to MP3
if ! command -v ffmpeg &>/dev/null; then
echo "Error: 'ffmpeg' is not installed. Please install it to convert the audio wav file to mp3." >&2
exit 1
fi
OUTPUT_FILE="${out_path%.*}.mp3"
ffmpeg -i $out_path -b:a 128k $OUTPUT_FILE && rm $out_path
echo "File converted to mp3 and original WAV file deleted. Output file: $OUTPUT_FILE"
if [ "$pipe_out" = true ]; then
#aplay $OUTPUT_FILE
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment