Skip to content

Instantly share code, notes, and snippets.

@thewh1teagle
Last active July 3, 2025 02:41
Show Gist options
  • Save thewh1teagle/04252cbcf5cf32f395f5fc0498f8fee2 to your computer and use it in GitHub Desktop.
Save thewh1teagle/04252cbcf5cf32f395f5fc0498f8fee2 to your computer and use it in GitHub Desktop.
How to run old saspeech Hebrew model (olddd)
uv venv -p3.10
brew install mecab
git clone https://github.com/shenberg/TTS
uv pip install -e TTS
uv pip install gdown
uvx gdown 1dExa0AZqmyjz8rSZz1noyQY9aF7dR8ew
uvx gdown 1eK1XR_ZwuUy4yWh80nui-q5PBifJsYfy
uvx gdown 1XdmRRHjZ_eZOFKoAQgQ8wivrLDJnNDkh
uvx gdown 1An6cTCYkxXWhagIJe3NGkoP8n2CQWQ-3
mkdir tts_model
mkdir hifigan_model
mv saspeech_nikud_7350.pth tts_model/
mv config_overflow.json tts_model/
mv checkpoint_500000.pth hifigan_model/
mv config_hifigan.json hifigan_model/

CUDA_VISIBLE_DEVICES= uv run tts --text "שָׁלוֹם וּבְרָכָה נִפָּרֵד בְּשִׂמְחָה מִמֻּמֵּן" \
        --model_path tts_model/saspeech_nikud_7350.pth \
        --config_path tts_model/config_overflow.json \
        --vocoder_path hifigan_model/checkpoint_500000.pth \
        --vocoder_config_path hifigan_model/config_hifigan.json \
        --out_path test.wav

Then use:

import sys
import os
sys.path.append('./TTS')

from TTS.api import TTS

# Load model once
print("Loading TTS model...")
tts = TTS(
    model_path="tts_model/saspeech_nikud_7350.pth",
    config_path="tts_model/config_overflow.json", 
    vocoder_path="hifigan_model/checkpoint_500000.pth",
    vocoder_config_path="hifigan_model/config_hifigan.json",
    gpu=False
)
print("Model loaded!")

# Test text
text = "שָׁלוֹם וּבְרָכָה נִפָּרֵד בְּשִׂמְחָה מִמֻּמֵּן"

# Generate in loop (fast after first load)
for i in range(3):
    print(f"Generating speech {i+1}...")
    tts.tts_to_file(text=text, file_path=f"test_{i+1}.wav")
    print(f"Saved to test_{i+1}.wav") 
"""
uv venv -p3.10
uv pip insatll numpy==1.26.4 soundfile transformers
uv pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
"""
from transformers import VitsModel, AutoTokenizer
import torch
import soundfile as sf
# Load the model and tokenizer
model = VitsModel.from_pretrained("facebook/mms-tts-heb")
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-heb")
# Define the text
text = "שלום עולם! אני מדבר ישירות מהמחשב"
# Tokenize the text
inputs = tokenizer(text, return_tensors="pt")
# Generate the speech waveform
with torch.no_grad():
output = model(**inputs).waveform
# Convert the output tensor to a numpy array and ensure it's in the correct shape
output_np = output.squeeze().cpu().numpy()
# Save the waveform to a WAV file
sf.write("audio.wav", output_np, samplerate=model.config.sampling_rate)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment