Created
June 18, 2025 15:52
-
-
Save ldenoue/e0440834cb3c894874f8f3a89c33066a to your computer and use it in GitHub Desktop.
livekit/turn-detector test
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import onnxruntime as ort | |
import numpy as np | |
from transformers import AutoTokenizer | |
from huggingface_hub import hf_hub_download, errors | |
# Constants | |
HG_MODEL = "livekit/turn-detector" | |
ONNX_FILENAME = "model_q8.onnx" | |
MODEL_REVISION = "v1.2.0" | |
MAX_HISTORY = 4 | |
MAX_HISTORY_TOKENS = 512 | |
# Ensure model file exists or download it | |
try: | |
model_path = hf_hub_download( | |
repo_id=HG_MODEL, | |
filename=ONNX_FILENAME, | |
subfolder="onnx", | |
revision=MODEL_REVISION, | |
local_files_only=False, # Set to True if you've already downloaded it | |
) | |
except errors.LocalEntryNotFoundError: | |
raise RuntimeError(f"Could not find {ONNX_FILENAME}. Make sure the model is available on Hugging Face.") | |
# Load tokenizer | |
tokenizer = AutoTokenizer.from_pretrained( | |
HG_MODEL, | |
revision=MODEL_REVISION, | |
truncation_side="left", | |
) | |
# Load ONNX model | |
session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"]) | |
# Chat context | |
chat_ctx = [ | |
#{"role": "user", "content": "Hello how are you"}, | |
#{"role": "assistant", "content": "I'm doing well, thank you! How can I assist you today"}, | |
{"role": "user", "content": "I need help with my printer. "} | |
] | |
chat_ctx = chat_ctx[-MAX_HISTORY:] | |
def format_chat_ctx(chat_ctx: list[dict]) -> str: | |
new_chat_ctx = [] | |
for msg in chat_ctx: | |
content = msg["content"] | |
if not content: | |
continue | |
new_chat_ctx.append(msg) | |
convo_text = tokenizer.apply_chat_template( | |
new_chat_ctx, | |
add_generation_prompt=False, | |
add_special_tokens=False, | |
tokenize=False, | |
) | |
# Remove the EOU token from current utterance | |
ix = convo_text.rfind("<|im_end|>") | |
text = convo_text[:ix] if ix != -1 else convo_text | |
return text | |
# Format chat context | |
text = format_chat_ctx(chat_ctx) | |
# Tokenize input | |
inputs = tokenizer( | |
text, | |
add_special_tokens=False, | |
return_tensors="np", # ONNX requires NumPy format | |
max_length=MAX_HISTORY_TOKENS, | |
truncation=True | |
) | |
# Run inference | |
outputs = session.run(None, {"input_ids": inputs["input_ids"]}) | |
eou_probability = outputs[0][0] # Extract probability | |
# Output result | |
print(f"End-of-Utterance Probability: {eou_probability}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment