ldenoue · June 18, 2025 15:52
diff --git a/gistfile1.txt b/gistfile1.txt
 import os
 import onnxruntime as ort
 import numpy as np
 from transformers import AutoTokenizer
 from huggingface_hub import hf_hub_download, errors

 # Constants
 HG_MODEL = "livekit/turn-detector"
 ONNX_FILENAME = "model_q8.onnx"
 MODEL_REVISION = "v1.2.0"
 MAX_HISTORY = 4
 MAX_HISTORY_TOKENS = 512

 # Ensure model file exists or download it
 try:
    model_path = hf_hub_download(
        repo_id=HG_MODEL,
        filename=ONNX_FILENAME,
        subfolder="onnx",
        revision=MODEL_REVISION,
        local_files_only=False,  # Set to True if you've already downloaded it
    )
 except errors.LocalEntryNotFoundError:
    raise RuntimeError(f"Could not find {ONNX_FILENAME}. Make sure the model is available on Hugging Face.")

 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(
    HG_MODEL,
    revision=MODEL_REVISION,
    truncation_side="left",
 )

 # Load ONNX model
 session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"])

 # Chat context
 chat_ctx = [
    #{"role": "user", "content": "Hello how are you"},
    #{"role": "assistant", "content": "I'm doing well, thank you! How can I assist you today"},
    {"role": "user", "content": "I need help with my printer. "}
 ]

 chat_ctx = chat_ctx[-MAX_HISTORY:]

 def format_chat_ctx(chat_ctx: list[dict]) -> str:
    new_chat_ctx = []
    for msg in chat_ctx:
        content = msg["content"]
        if not content:
            continue
        new_chat_ctx.append(msg)

    convo_text = tokenizer.apply_chat_template(
        new_chat_ctx,
        add_generation_prompt=False,
        add_special_tokens=False,
        tokenize=False,
    )

    # Remove the EOU token from current utterance
    ix = convo_text.rfind("<|im_end|>")
    text = convo_text[:ix] if ix != -1 else convo_text
    return text

 # Format chat context
 text = format_chat_ctx(chat_ctx)

 # Tokenize input
 inputs = tokenizer(
    text,
    add_special_tokens=False,
    return_tensors="np",  # ONNX requires NumPy format
    max_length=MAX_HISTORY_TOKENS,
    truncation=True
 )

 # Run inference
 outputs = session.run(None, {"input_ids": inputs["input_ids"]})
 eou_probability = outputs[0][0]  # Extract probability

 # Output result
 print(f"End-of-Utterance Probability: {eou_probability}")
	import os
	import onnxruntime as ort
	import numpy as np
	from transformers import AutoTokenizer
	from huggingface_hub import hf_hub_download, errors

	# Constants
	HG_MODEL = "livekit/turn-detector"
	ONNX_FILENAME = "model_q8.onnx"
	MODEL_REVISION = "v1.2.0"
	MAX_HISTORY = 4
	MAX_HISTORY_TOKENS = 512

	# Ensure model file exists or download it
	try:
	model_path = hf_hub_download(
	repo_id=HG_MODEL,
	filename=ONNX_FILENAME,
	subfolder="onnx",
	revision=MODEL_REVISION,
	local_files_only=False, # Set to True if you've already downloaded it
	)
	except errors.LocalEntryNotFoundError:
	raise RuntimeError(f"Could not find {ONNX_FILENAME}. Make sure the model is available on Hugging Face.")

	# Load tokenizer
	tokenizer = AutoTokenizer.from_pretrained(
	HG_MODEL,
	revision=MODEL_REVISION,
	truncation_side="left",
	)

	# Load ONNX model
	session = ort.InferenceSession(model_path, providers=["CPUExecutionProvider"])

	# Chat context
	chat_ctx = [
	#{"role": "user", "content": "Hello how are you"},
	#{"role": "assistant", "content": "I'm doing well, thank you! How can I assist you today"},
	{"role": "user", "content": "I need help with my printer. "}
	]

	chat_ctx = chat_ctx[-MAX_HISTORY:]

	def format_chat_ctx(chat_ctx: list[dict]) -> str:
	new_chat_ctx = []
	for msg in chat_ctx:
	content = msg["content"]
	if not content:
	continue
	new_chat_ctx.append(msg)

	convo_text = tokenizer.apply_chat_template(
	new_chat_ctx,
	add_generation_prompt=False,
	add_special_tokens=False,
	tokenize=False,
	)

	# Remove the EOU token from current utterance
	ix = convo_text.rfind("<\|im_end\|>")
	text = convo_text[:ix] if ix != -1 else convo_text
	return text

	# Format chat context
	text = format_chat_ctx(chat_ctx)

	# Tokenize input
	inputs = tokenizer(
	text,
	add_special_tokens=False,
	return_tensors="np", # ONNX requires NumPy format
	max_length=MAX_HISTORY_TOKENS,
	truncation=True
	)

	# Run inference
	outputs = session.run(None, {"input_ids": inputs["input_ids"]})
	eou_probability = outputs[0][0] # Extract probability

	# Output result
	print(f"End-of-Utterance Probability: {eou_probability}")