Skip to content

Instantly share code, notes, and snippets.

@ldenoue
Created June 18, 2025 15:52
Show Gist options
  • Save ldenoue/e0440834cb3c894874f8f3a89c33066a to your computer and use it in GitHub Desktop.
Save ldenoue/e0440834cb3c894874f8f3a89c33066a to your computer and use it in GitHub Desktop.
livekit/turn-detector test
import os
import onnxruntime as ort
import numpy as np
from transformers import AutoTokenizer
from huggingface_hub import hf_hub_download, errors
# Constants
HG_MODEL = "livekit/turn-detector"
ONNX_FILENAME = "model_q8.onnx"
MODEL_REVISION = "v1.2.0"
MAX_HISTORY = 4
MAX_HISTORY_TOKENS = 512
# Ensure model file exists or download it
try:
model_path = hf_hub_download(
repo_id=HG_MODEL,
filename=ONNX_FILENAME,
subfolder="onnx",
revision=MODEL_REVISION,
local_files_only=False, # Set to True if you've already downloaded it
)
except errors.LocalEntryNotFoundError:
raise RuntimeError(f"Could not find {ONNX_FILENAME}. Make sure the model is available on Hugging Face.")
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
HG_MODEL,
revision=MODEL_REVISION,
truncation_side="left",
)
# Load ONNX model
session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"])
# Chat context
chat_ctx = [
#{"role": "user", "content": "Hello how are you"},
#{"role": "assistant", "content": "I'm doing well, thank you! How can I assist you today"},
{"role": "user", "content": "I need help with my printer. "}
]
chat_ctx = chat_ctx[-MAX_HISTORY:]
def format_chat_ctx(chat_ctx: list[dict]) -> str:
new_chat_ctx = []
for msg in chat_ctx:
content = msg["content"]
if not content:
continue
new_chat_ctx.append(msg)
convo_text = tokenizer.apply_chat_template(
new_chat_ctx,
add_generation_prompt=False,
add_special_tokens=False,
tokenize=False,
)
# Remove the EOU token from current utterance
ix = convo_text.rfind("<|im_end|>")
text = convo_text[:ix] if ix != -1 else convo_text
return text
# Format chat context
text = format_chat_ctx(chat_ctx)
# Tokenize input
inputs = tokenizer(
text,
add_special_tokens=False,
return_tensors="np", # ONNX requires NumPy format
max_length=MAX_HISTORY_TOKENS,
truncation=True
)
# Run inference
outputs = session.run(None, {"input_ids": inputs["input_ids"]})
eou_probability = outputs[0][0] # Extract probability
# Output result
print(f"End-of-Utterance Probability: {eou_probability}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment