Created
April 22, 2023 10:17
-
-
Save rumbu13/ef85148016853d5f63250e4ccd6b0353 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask, Response, request, jsonify | |
from tempfile import NamedTemporaryFile | |
from faster_whisper import WhisperModel | |
import re | |
import time | |
import os | |
from statistics import mean | |
import logging | |
from unidecode import unidecode | |
import multiprocessing | |
app = Flask(__name__) | |
models = {} | |
app.logger.setLevel(logging.INFO) | |
app.logger.info("Started") | |
def str2bool(v): | |
return v.lower() in ("yes", "true", "on", "y") | |
@app.route('/api/speech-to-text', methods=['POST']) | |
def handler(): | |
app.logger.info("Incoming connection from %s", request.host) | |
modelName = request.args.get('model', 'tiny') | |
beamSize = int(request.args.get('beamSize', 5)) | |
language = request.args.get('language', 'ro') | |
stripAccents = str2bool(request.args.get('stripAccents', "no")) | |
if modelName not in models: | |
app.logger.warning("Model %s was not cached", modelName) | |
models[modelName] = WhisperModel(modelName, device="cpu", compute_type="int8", cpu_threads=multiprocessing.cpu_count()) | |
model = models[modelName] | |
data = request.get_data() | |
temp = NamedTemporaryFile(suffix='.wav', delete=False) | |
app.logger.info("Saving audio stream to %s", temp.name) | |
temp.write(data) | |
temp.close() | |
start_time = time.perf_counter() | |
iterable, _ = model.transcribe(temp.name, language=language, beam_size=beamSize, temperature=0, without_timestamps = True) | |
app.logger.info("Processing audio stream") | |
segments = list(iterable) | |
end_time = time.perf_counter() | |
app.logger.info("Audio stream converted to text in %f seconds", end_time - start_time) | |
average = mean(segment.avg_log_prob for segment in segments) | |
os.remove(temp.name) | |
text = " ".join(segment.text for segment in segments) | |
clean_text = re.sub(r"[,.;@#?!&$]+\ *", " ", text).strip().lower() | |
if stripAccents: | |
clean_text = unidecode(clean_text) | |
return jsonify( | |
text = clean_text, | |
original_text = text, | |
transcribe_seconds = (end_time - start_time), | |
likelihood = 1 + average, | |
model = modelName, | |
beamSize = beamSize, | |
language = language, | |
stripAccents = stripAccents | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment