Skip to content

Instantly share code, notes, and snippets.

@ngxson
Created May 29, 2026 19:33
Show Gist options
  • Select an option

  • Save ngxson/d46506aab7e4a6b0f0df9f843d721cd6 to your computer and use it in GitHub Desktop.

Select an option

Save ngxson/d46506aab7e4a6b0f0df9f843d721cd6 to your computer and use it in GitHub Desktop.
llama.cpp test timing http header
#!/usr/bin/env python3
import http.client
import json
import time
HOST = "127.0.0.1"
PORT = 8080
URL = "/v1/chat/completions"
payload = json.dumps({
"model": "gpt-3.5-turbo",
"stream": True,
"max_tokens": 10,
"messages": [
{"role": "user", "content": "Hi " * 1000}
]
})
conn = http.client.HTTPConnection(HOST, PORT)
t0 = time.monotonic()
def ts():
return f"{time.monotonic() - t0:.3f}s"
conn.request("POST", URL, body=payload, headers={
"Content-Type": "application/json",
})
response = conn.getresponse()
print(f"[{ts()}] STATUS: {response.status} {response.reason}")
print(f"[{ts()}] HEADERS:")
for name, value in response.getheaders():
print(f"[{ts()}] {name}: {value}")
print(f"[{ts()}] --- streaming chunks ---")
buf = b""
while True:
chunk = response.read(1)
if not chunk:
break
buf += chunk
if b"\n" in buf:
lines = buf.split(b"\n")
buf = lines[-1]
for line in lines[:-1]:
line = line.strip()
if not line:
continue
if line.startswith(b"data: "):
data = line[6:]
if data == b"[DONE]":
print(f"[{ts()}] CHUNK: [DONE]")
else:
print(f"[{ts()}] CHUNK: {data.decode(errors='replace')}")
else:
print(f"[{ts()}] LINE: {line.decode(errors='replace')}")
conn.close()
print(f"[{ts()}] done")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment