Last active
January 9, 2025 17:34
-
-
Save Spiritdude/5258f3e2829537572e733781eb984017 to your computer and use it in GitHub Desktop.
Kokoro ONNX TTS Server (OpenAI compatible)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
Description: | |
Implements OpenAI Speech API backend: | |
https://platform.openai.com/docs/api-reference/audio/createSpeech | |
Client-side: | |
import openai, pyaudio | |
player_stream = pyaudio.PyAudio().open(format=pyaudio.paInt16, channels=1, rate=24000, output=True) | |
client = OpenAI(base_url="http://localhost:7008/v1") | |
with client.audio.speech.with_streaming_response.create( | |
voice=vid, | |
input=txt | |
) as response: | |
for chunk in response.iter_bytes(chunk_size=1024): | |
player_stream.write(chunk) | |
or use it with open-webui (Admin -> Settings -> Audio -> TTS Engine: OpenAI, TTS URL: http://localhost:7008/v1) | |
History: | |
2025/01/09: start | |
''' | |
from flask import Flask, request, jsonify, Response | |
from dotenv import load_dotenv | |
import os, io | |
load_dotenv() | |
app = Flask(__name__) | |
import soundfile as sf | |
from kokoro_onnx import Kokoro | |
kokoro = Kokoro("kokoro-v0_19.onnx", "voices.json") | |
voices = kokoro.get_voices() | |
print("available voices:",list(voices)) | |
@app.route('/v1/audio/speech', methods=['POST']) | |
def text_to_speech(): | |
try: | |
data = request.get_json() | |
model = data.get('model') # -- ignored | |
text = data.get('input') | |
lang = data.get('language','en-us') | |
fmt = data.get('response_format','wav') | |
voice = data.get('voice', 'af') | |
speed = data.get('speed', 1.0) | |
if voice not in voices: | |
voice = 'af' | |
samples, sample_rate = kokoro.create(text, voice=voice, speed=speed, lang=lang) | |
audio_buffer = io.BytesIO() | |
if fmt == 'pcm': | |
sf.write(audio_buffer, samples, sample_rate, subtype='PCM_16') | |
else: | |
sf.write(audio_buffer, samples, sample_rate, format=fmt.upper()) | |
#sf.write(f"{voice}.wav", samples, sample_rate) | |
audio_buffer.seek(0) | |
return Response(audio_buffer, mimetype=f'audio/{fmt}',headers={"Content-Disposition": f"attachment;filename=audio.{fmt}"}) | |
#return jsonify({'status': 'success', 'audio': audio_buffer) | |
except Exception as e: | |
return jsonify({'error': str(e)}), 400 | |
if __name__ == '__main__': | |
app.run(debug=True,port=7008,host='0.0.0.0') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment