Skip to content

Instantly share code, notes, and snippets.

@JupyterJones
Created May 23, 2025 19:25
Show Gist options
  • Save JupyterJones/3bf75177a9da4683841104f12228fb78 to your computer and use it in GitHub Desktop.
Save JupyterJones/3bf75177a9da4683841104f12228fb78 to your computer and use it in GitHub Desktop.
convert text to audio using balacoon library
from balacoon_tts import TTS
import soundfile as sf
import numpy as np
import traceback
import os
import subprocess
import re
from icecream import ic
import sys
# >>> ============================================================ <<<
# >>> IMPORTANT: SET YOUR BALACOON MODEL ADDON FILE PATH HERE <<<
BALACOON_MODEL_PATH = "/home/jack/hidden/en_us_cmuartic_jets_cpu.addon" # EDIT THIS
# >>> ============================================================ <<<
tts_instance = None
selected_speaker = None
AUDIO_FOLDER = 'static/new_audio'
# ---------------- SAFE FILENAME FUNCTION -------------------------
def safe_filename(text, count):
"""Generates a safe filename from the first 5 words of text."""
words = text.strip().split()
first_words = "_".join(words[:5])
# Remove unsafe characters
cleaned = re.sub(r'[^\w\s-]', '', first_words)
cleaned = cleaned.replace(" ", "_").replace("-", "_")
cleaned = cleaned[:50]
if not cleaned:
cleaned = f"entry_{count}_audio"
return cleaned
# ---------------- INITIALIZE TTS ----------------------------------
ic(f"Loading Balacoon model from: {BALACOON_MODEL_PATH}")
tts_instance = TTS(BALACOON_MODEL_PATH)
ic("Balacoon TTS instance created.")
supported_speakers = tts_instance.get_speakers()
ic(supported_speakers)
# speaker -3 male nice voice
selected_speaker = supported_speakers[-3]
ic(f"Automatically selected speaker: '{selected_speaker}'")
# ---------------- FUNCTION TO GENERATE AUDIO --------------------
def generate_audio_file(text, count, filename):
"""Generates WAV and MP3 audio files, returns MP3 path."""
if not tts_instance or not selected_speaker:
ic("Error: Balacoon TTS instance or speaker not available.")
return None, "Balacoon TTS not initialized correctly."
try:
os.makedirs(AUDIO_FOLDER, exist_ok=True)
base_filename = f"{filename}"
wav_path = os.path.join(AUDIO_FOLDER, base_filename + ".wav")
mp3_path = os.path.join(AUDIO_FOLDER, base_filename + ".mp3")
ic(wav_path, mp3_path)
text = text.replace("*", "").replace("…", ", ")
ic("Synthesizing WAV...")
audio_data = tts_instance.synthesize(text=text, speaker=selected_speaker)
ic(audio_data.shape, audio_data.dtype)
# Save WAV
balacoon_samplerate = 24000
sf.write(wav_path, audio_data, balacoon_samplerate)
ic(f"WAV saved: {wav_path}")
# Convert to MP3 using ffmpeg
ic("Converting to MP3...")
command = [
"ffmpeg",
"-y",
"-i", wav_path,
"-codec:a", "libmp3lame",
"-qscale:a", "2",
mp3_path
]
subprocess.run(command, check=True)
ic(f"MP3 created: {mp3_path}")
# Remove the .wav to save space
os.remove(wav_path)
ic(f"Deleted temp WAV: {wav_path}")
static_path = os.path.join("audio", filename + ".mp3").replace("\\", "/")
return static_path, None
except Exception as e:
error_message = f"Error generating audio for ID {count}: {e}"
ic(error_message)
traceback.print_exc()
return None, error_message
# ------------------- READ TEXT FILE ------------------------------
def read_text(text_file):
count = 0
with open(text_file, 'r', encoding='utf-8') as f:
content = f.read()
blocks = content.split("\n\n\n")
for text in blocks:
count += 1
ic(count, text[:60])
filename = safe_filename(text, count)
ic(f"Using filename: {filename}")
generate_audio_file(text, count, filename)
# ------------------- MAIN EXECUTION BLOCK -----------------------
if __name__ == '__main__':
text_file = sys.argv[1]#'static/TXT/'
read_text(text_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment