Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python3
"""Run vLLM ``benchmark_serving`` scenarios against local OpenAI-compatible servers.
Originally derived from ``script_run_jio.sh``. For each scenario triple in ``TRIPLES``,
starts one ``benchmark_serving.py`` subprocess per port in parallel (thread pool).
"""
from __future__ import annotations
import argparse
#!/usr/bin/env python3
"""Launch four furiosa-llm Docker containers on npu:0..3 with non-conflicting host ports.
Uses ``docker run -d --rm`` so all four can run at once. Host ports:
instance i -> 8000+i on host and inside the container.
Requires HF_TOKEN in the environment if the image needs it (same as your shell).
After each ``docker run``, the script optionally polls ``GET http://<ready-host>:<port><ready-path>``
until it receives HTTP 2xx (default path ``/v1/models``), then continues with the next container.
# https://qwen.ai/blog?id=qwen3-vl-embedding
from pudb import set_trace
from furiosa_llm import LLM, PoolingParams
import numpy as np
import torch
queries = ["A woman playing with her dog on a beach at sunset.",
"Pet owner training dog outdoors near water.",
"Woman surfing on waves during a sunny day.",
mport onnxruntime as ort
import numpy as np
import torch
print(f"onnx runtime version:{ort.__version__}")
# Check if PyTorch can see an available GPU \
if torch.cuda.is_available():
import argparse
import os
import onnx
import onnx_safetensors
INPUT_DIR = "onnx-files" # onnx files have to go here
OUTPUT_DIR = "safetensors-files" # out put files will generated here
@aurotripathy
aurotripathy / resnet-on-onnxruntime.py
Last active March 13, 2026 01:17
resnet on onnx runtime
import onnxruntime # to inference ONNX models, we use the ONNX Runtime
import onnx
from onnx import numpy_helper
import urllib.request
import json
import time
import torch
import numpy as np
import onnxruntime
import numpy as np
from ultralytics import YOLO
# Load a pretrained YOLOv8 model
model = YOLO('yolov8n.pt')
# Export to ONNX format
model.export(format='onnx')
import onnx
import onnx_tool
# Load the ONNX model from a file
# model_path = "resnet34_1_3_416_640.onnx"
# model_path = "pointpillar_custom.onnx"
model_path = "detr_1_3_512_512.onnx"
print(f"Model: {model_path}")
# Use onnx.load to get the model proto object
#!/bin/bash
if [ "$#" -ne 2 ]; then
echo "Usage: $0 <results_suffix> <port_num>"
exit 1
fi
results_suffix=$1
port_num=$2
@aurotripathy
aurotripathy / flowsettings.py
Last active June 4, 2025 21:15
Kotaemon flowsettings.py file for configuring the project
import os
from importlib.metadata import version
from inspect import currentframe, getframeinfo
from pathlib import Path
from decouple import config
from ktem.utils.lang import SUPPORTED_LANGUAGE_MAP
from theflow.settings.default import * # noqa
cur_frame = currentframe()