Created
January 7, 2025 16:25
-
-
Save vijinho/50553202da24cfa73347b65f7df74057 to your computer and use it in GitHub Desktop.
Wrapper for TTS https://github.com/coqui-ai/TTS that takes model and vocoder ids and outputs a wav or mp3
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# Associative arrays for mapping IDs to model and vocoder names | |
declare -A model_ids=( | |
[12]="tts_models/en/ljspeech/tacotron2-DDC_ph" | |
[15]="tts_models/en/ljspeech/tacotron2-DCA" | |
) | |
declare -A vocoder_ids=( | |
[4]="vocoder_models/en/ljspeech/multiband-melgan" | |
[6]="vocoder_models/en/ljspeech/univnet" | |
) | |
# Function to display help | |
usage() { | |
cat <<EOF | |
Usage: $0 [-t <text>] [-m <model_id>] [-v <vocoder_id>] [-f <output_file>] [-p] | |
Options: | |
-t <text> : The text to speak | |
-m <model_id> : Model ID (12 or 15). | |
-v <vocoder_id> : Vocoder ID (4 or 6). | |
-f <output_file>: Path to save the output file. Default: /tmp/tts_m<model_id>_v<vocoder_id>-YYYYMMDD-HHMMSS.wav | |
-p : Play the generated sound file with aplay (for audio playback). | |
-h, --help : Display this help message. | |
Examples: | |
$0 -m 12 -v 4 -f /home/user/output.wav | |
$0 -r -p | |
EOF | |
exit 1 | |
} | |
# Check if the provided IDs are valid | |
model_id="" | |
vocoder_id="" | |
text="TESTING. This is a test because no text was specified." | |
while getopts "t:m:v:f:ph" opt; do | |
case ${opt} in | |
t) | |
text=$OPTARG | |
;; | |
m) | |
model_id=$OPTARG | |
;; | |
v) | |
vocoder_id=$OPTARG | |
;; | |
f) | |
out_path=$OPTARG | |
;; | |
p) | |
pipe_out=true | |
;; | |
h) | |
usage | |
;; | |
\?) | |
echo "Invalid option: -$OPTARG" 1>&2 | |
usage | |
;; | |
:) | |
echo "Invalid option: -$OPTARG requires an argument" 1>&2 | |
usage | |
;; | |
esac | |
done | |
# Validate model_id and vocoder_id | |
if [[ -z "${model_ids[$model_id]}" ]]; then | |
echo "Invalid model ID: $model_id. Valid models: ${!model_ids[@]}" | |
usage | |
fi | |
if [[ -z "${vocoder_ids[$vocoder_id]}" ]]; then | |
echo "Invalid vocoder ID: $vocoder_id. Valid vocoders: ${!vocoder_ids[@]}" | |
usage | |
fi | |
# Assign out_path if not provided | |
if [[ -z "$out_path" ]]; then | |
out_path="$TEMP/tts_m${model_id}_v${vocoder_id}-$(date "+%Y%m%d-%H%M%S").wav" | |
else | |
# Ensure the provided path is writable | |
out_dir=$(dirname "$out_path") | |
if [ ! -d "$out_dir" ] || [ ! -w "$out_dir" ]; then | |
echo "Invalid output directory: $out_dir" | |
usage | |
fi | |
fi | |
# Retrieve the corresponding model and vocoder names | |
model_name="${model_ids[$model_id]}" | |
vocoder_name="${vocoder_ids[$vocoder_id]}" | |
# Execute the TTS command | |
tts_command="tts --text \"$text\" --model_name \"$model_name\" --vocoder_name \"$vocoder_name\" --out_path \"$out_path\"" | |
echo "Executing: $tts_command" | |
eval "$tts_command" | |
# Convert the WAV file to MP3 | |
if ! command -v ffmpeg &>/dev/null; then | |
echo "Error: 'ffmpeg' is not installed. Please install it to convert the audio wav file to mp3." >&2 | |
exit 1 | |
fi | |
OUTPUT_FILE="${out_path%.*}.mp3" | |
ffmpeg -i $out_path -b:a 128k $OUTPUT_FILE && rm $out_path | |
echo "File converted to mp3 and original WAV file deleted. Output file: $OUTPUT_FILE" | |
if [ "$pipe_out" = true ]; then | |
#aplay $OUTPUT_FILE | |
fi |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment