johnandersen777 · May 16, 2026 20:16
diff --git a/QWEN_3_6.md b/QWEN_3_6.md
diff --git a/qwen_example.py b/qwen_example.py
 #!/usr/bin/env -S uv run --script
 # /// script
 # requires-python = ">=3.11"
 # dependencies = [
 #   "openai>=1.54",
 # ]
 # ///
 """Async chat with tool calls against local Qwen3.6-MTP via llama-server.

 Run: ./qwen_example.py
 """
 import asyncio
 import json
 import random

 from openai import AsyncOpenAI

 BASE_URL = "http://127.0.0.1:12434/v1"
 MODEL = "qwen3.6-mtp"

 TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get current weather for a city.",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {"type": "string", "description": "City name"},
                    "units": {
                        "type": "string",
                        "enum": ["celsius", "fahrenheit"],
                        "default": "fahrenheit",
                    },
                },
                "required": ["city"],
            },
        },
    }
 ]


 def fake_weather(city: str, units: str = "fahrenheit") -> dict:
    temp = random.randint(45, 85) if units == "fahrenheit" else random.randint(7, 30)
    return {"city": city, "temp": temp, "units": units, "conditions": "partly cloudy"}


 TOOL_IMPL = {"get_weather": lambda **kw: fake_weather(**kw)}


 async def run(prompt: str) -> str:
    client = AsyncOpenAI(base_url=BASE_URL, api_key="not-needed")
    messages = [
        {"role": "system", "content": "You are concise. Use tools when needed. /no_think"},
        {"role": "user", "content": prompt},
    ]

    for _ in range(4):
        resp = await client.chat.completions.create(
            model=MODEL,
            messages=messages,
            tools=TOOLS,
            tool_choice="auto",
            max_tokens=512,
            extra_body={"chat_template_kwargs": {"enable_thinking": False}},
        )
        msg = resp.choices[0].message
        messages.append(msg.model_dump(exclude_none=True))

        if not msg.tool_calls:
            return msg.content or ""

        for call in msg.tool_calls:
            name = call.function.name
            args = json.loads(call.function.arguments or "{}")
            print(f"→ tool {name}({args})")
            result = TOOL_IMPL[name](**args)
            print(f"← {result}")
            messages.append(
                {
                    "role": "tool",
                    "tool_call_id": call.id,
                    "content": json.dumps(result),
                }
            )

    return "(max tool-call iterations exceeded)"


 async def main() -> None:
    answer = await run("What's the weather in Portland, OR right now? Use fahrenheit.")
    print("\n=== assistant ===")
    print(answer)


 if __name__ == "__main__":
    asyncio.run(main())
diff --git a/qwen_json.py b/qwen_json.py
 #!/usr/bin/env -S uv run --script
 # /// script
 # requires-python = ">=3.11"
 # dependencies = [
 #   "snoop",
 #   "openai>=1.54",
 #   "pydantic>=2.7",
 # ]
 # ///
 """Qwen3.6-MTP: tool calls + Pydantic structured output in one async flow.

 Flow:
  1. Model calls get_weather tool (function calling).
  2. We return tool result.
  3. Final turn forces JSON schema response matching TripPlan pydantic model.

 Run: ./qwen_json.py
 """
 import sys
 import asyncio
 import json
 import random

 import snoop
 from openai import AsyncOpenAI
 from pydantic import BaseModel, Field

 BASE_URL = "http://127.0.0.1:12434/v1"
 MODEL = "qwen3.6-mtp"

 NO_THINK = {"chat_template_kwargs": {"enable_thinking": False}}


 class Activity(BaseModel):
    name: str
    indoor: bool
    duration_hours: float = Field(ge=0.5, le=12)


 class TripPlan(BaseModel):
    city: str
    temp_f: int
    conditions: str
    summary: str
    activities: list[Activity] = Field(min_length=2, max_length=4)


 TOOLS = [
    {
        "type": "function",
        "function": {
            "name": "get_weather",
            "description": "Get current weather for a city.",
            "parameters": {
                "type": "object",
                "properties": {
                    "city": {"type": "string"},
                    "units": {"type": "string", "enum": ["celsius", "fahrenheit"]},
                },
                "required": ["city"],
            },
        },
    }
 ]


 def fake_weather(city: str, units: str = "fahrenheit") -> dict:
    temp = random.randint(45, 85) if units == "fahrenheit" else random.randint(7, 30)
    return {"city": city, "temp": temp, "units": units, "conditions": "light rain"}


 TOOL_IMPL = {"get_weather": lambda **kw: fake_weather(**kw)}


 async def plan_trip(city: str) -> TripPlan:
    client = AsyncOpenAI(base_url=BASE_URL, api_key="not-needed")

    messages = [
        {
            "role": "system",
            "content": (
                "You plan day trips. First call get_weather, then produce a "
                "TripPlan JSON object matching the schema exactly."
            ),
        },
        {"role": "user", "content": f"Plan a day in {city}. Use fahrenheit."},
    ]

    for _ in range(4):
        resp = await client.chat.completions.create(
            model=MODEL,
            messages=messages,
            tools=TOOLS,
            tool_choice="auto",
            # max_tokens=512,
            # extra_body=NO_THINK,
        )
        msg = resp.choices[0].message
        messages.append(msg.model_dump(exclude_none=True))

        if not msg.tool_calls:
            break

        for call in msg.tool_calls:
            name = call.function.name
            args = json.loads(call.function.arguments or "{}")
            print(f"→ tool {name}({args})", file=sys.stderr)
            result = TOOL_IMPL[name](**args)
            print(f"← {result}", file=sys.stderr)
            messages.append(
                {
                    "role": "tool",
                    "tool_call_id": call.id,
                    "content": json.dumps(result),
                }
            )

    messages.append(
        {
            "role": "user",
            "content": "Now emit the final TripPlan JSON. JSON only, no prose.",
        }
    )

    final = await client.chat.completions.create(
        model=MODEL,
        messages=messages,
        # max_tokens=600,
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name": "TripPlan",
                "strict": True,
                "schema": TripPlan.model_json_schema(),
            },
        },
        # extra_body=NO_THINK,
    )
    raw = final.choices[0].message.content or "{}"
    try:
        return TripPlan.model_validate_json(raw)
    except Exception as error:
        snoop.pp("error", error, final)
        raise


 async def main() -> None:
    plan = await plan_trip("Portland, OR")
    print(plan.model_dump_json(indent=2))


 if __name__ == "__main__":
    asyncio.run(main())
	#!/usr/bin/env -S uv run --script
	# /// script
	# requires-python = ">=3.11"
	# dependencies = [
	# "openai>=1.54",
	# ]
	# ///
	"""Async chat with tool calls against local Qwen3.6-MTP via llama-server.

	Run: ./qwen_example.py
	"""
	import asyncio
	import json
	import random

	from openai import AsyncOpenAI

	BASE_URL = "http://127.0.0.1:12434/v1"
	MODEL = "qwen3.6-mtp"

	TOOLS = [
	{
	"type": "function",
	"function": {
	"name": "get_weather",
	"description": "Get current weather for a city.",
	"parameters": {
	"type": "object",
	"properties": {
	"city": {"type": "string", "description": "City name"},
	"units": {
	"type": "string",
	"enum": ["celsius", "fahrenheit"],
	"default": "fahrenheit",
	},
	},
	"required": ["city"],
	},
	},
	}
	]


	def fake_weather(city: str, units: str = "fahrenheit") -> dict:
	temp = random.randint(45, 85) if units == "fahrenheit" else random.randint(7, 30)
	return {"city": city, "temp": temp, "units": units, "conditions": "partly cloudy"}


	TOOL_IMPL = {"get_weather": lambda kw: fake_weather(kw)}


	async def run(prompt: str) -> str:
	client = AsyncOpenAI(base_url=BASE_URL, api_key="not-needed")
	messages = [
	{"role": "system", "content": "You are concise. Use tools when needed. /no_think"},
	{"role": "user", "content": prompt},
	]

	for _ in range(4):
	resp = await client.chat.completions.create(
	model=MODEL,
	messages=messages,
	tools=TOOLS,
	tool_choice="auto",
	max_tokens=512,
	extra_body={"chat_template_kwargs": {"enable_thinking": False}},
	)
	msg = resp.choices[0].message
	messages.append(msg.model_dump(exclude_none=True))

	if not msg.tool_calls:
	return msg.content or ""

	for call in msg.tool_calls:
	name = call.function.name
	args = json.loads(call.function.arguments or "{}")
	print(f"→ tool {name}({args})")
	result = TOOL_IMPL[name](**args)
	print(f"← {result}")
	messages.append(
	{
	"role": "tool",
	"tool_call_id": call.id,
	"content": json.dumps(result),
	}
	)

	return "(max tool-call iterations exceeded)"


	async def main() -> None:
	answer = await run("What's the weather in Portland, OR right now? Use fahrenheit.")
	print("\n=== assistant ===")
	print(answer)


	if __name__ == "__main__":
	asyncio.run(main())
No results found