Created
March 27, 2025 19:19
-
-
Save KeremTurgutlu/d52861ae6d80795bd86e8f066c63cfd4 to your computer and use it in GitHub Desktop.
test-qwen-omni.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "09ebfb6e", | |
"cell_type": "code", | |
"source": "import os\nimport torch\n\nfrom transformers import Qwen2_5OmniProcessor, AutoTokenizer\nfrom vllm import LLM, SamplingParams\nfrom qwen_omni_utils import process_mm_info", | |
"execution_count": 1, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "/usr/local/lib/python3.10/dist-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n from .autonotebook import tqdm as notebook_tqdm\n" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "INFO 03-27 19:18:16 [__init__.py:239] Automatically detected platform cuda.\n" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "2025-03-27 19:18:16,503\tINFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "fb39dcce", | |
"cell_type": "code", | |
"source": "from vllm import LLM, SamplingParams\nfrom vllm.assets.audio import AudioAsset\nfrom vllm.assets.image import ImageAsset\nfrom vllm.assets.video import VideoAsset\nfrom vllm.utils import FlexibleArgumentParser\n\nfrom typing import NamedTuple\n\n\nclass QueryResult(NamedTuple):\n inputs: dict\n limit_mm_per_prompt: dict[str, int]\n\n\n# NOTE: The default `max_num_seqs` and `max_model_len` may result in OOM on\n# lower-end GPUs.\n# Unless specified, these settings have been tested to work on a single L4.\n\ndefault_system = (\n \"You are Qwen, a virtual human developed by the Qwen Team, Alibaba \"\n \"Group, capable of perceiving auditory and visual inputs, as well as \"\n \"generating text and speech.\")\n\n\ndef get_mixed_modalities_query() -> QueryResult:\n question = (\"What is recited in the audio? \"\n \"What is the content of this image? Why is this video funny?\")\n prompt = (f\"<|im_start|>system\\n{default_system}<|im_end|>\\n\"\n \"<|im_start|>user\\n<|audio_bos|><|AUDIO|><|audio_eos|>\"\n \"<|vision_bos|><|IMAGE|><|vision_eos|>\"\n \"<|vision_bos|><|VIDEO|><|vision_eos|>\"\n f\"{question}<|im_end|>\\n\"\n f\"<|im_start|>assistant\\n\")\n return QueryResult(\n inputs={\n \"prompt\": prompt,\n \"multi_modal_data\": {\n \"audio\":\n AudioAsset(\"mary_had_lamb\").audio_and_sample_rate,\n \"image\":\n ImageAsset(\"cherry_blossom\").pil_image.convert(\"RGB\"),\n \"video\":\n VideoAsset(name=\"sample_demo_1.mp4\",\n num_frames=16).np_ndarrays,\n },\n },\n limit_mm_per_prompt={\n \"audio\": 1,\n \"image\": 1,\n \"video\": 1\n },\n )\n\n\ndef get_use_audio_in_video_query() -> QueryResult:\n question = (\"Describe the content of the video, \"\n \"then convert what the baby say into text.\")\n prompt = (f\"<|im_start|>system\\n{default_system}<|im_end|>\\n\"\n \"<|im_start|>user\\n<|vision_bos|><|VIDEO|><|vision_eos|>\"\n f\"{question}<|im_end|>\\n\"\n f\"<|im_start|>assistant\\n\")\n asset = VideoAsset(name=\"sample_demo_1.mp4\", num_frames=16)\n audio = asset.get_audio(sampling_rate=16000)\n return QueryResult(\n inputs={\n \"prompt\": prompt,\n \"multi_modal_data\": {\n \"video\": asset.np_ndarrays,\n \"audio\": audio,\n },\n \"mm_processor_kwargs\": {\n \"use_audio_in_video\": True,\n },\n },\n limit_mm_per_prompt={\n \"audio\": 1,\n \"video\": 1\n },\n )\n\n\ndef get_multi_audios_query() -> QueryResult:\n question = \"Are these two audio clips the same?\"\n prompt = (f\"<|im_start|>system\\n{default_system}<|im_end|>\\n\"\n \"<|im_start|>user\\n<|audio_bos|><|AUDIO|><|audio_eos|>\"\n \"<|audio_bos|><|AUDIO|><|audio_eos|>\"\n f\"{question}<|im_end|>\\n\"\n f\"<|im_start|>assistant\\n\")\n return QueryResult(\n inputs={\n \"prompt\": prompt,\n \"multi_modal_data\": {\n \"audio\": [\n AudioAsset(\"winning_call\").audio_and_sample_rate,\n AudioAsset(\"mary_had_lamb\").audio_and_sample_rate,\n ],\n },\n },\n limit_mm_per_prompt={\n \"audio\": 2,\n },\n )\n\n\nquery_map = {\n \"mixed_modalities\": get_mixed_modalities_query,\n \"use_audio_in_video\": get_use_audio_in_video_query,\n \"multi_audios\": get_multi_audios_query,\n}", | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "0a8445fc", | |
"cell_type": "code", | |
"source": "# vLLM engine v1 not supported yet\nos.environ['VLLM_USE_V1'] = '0'", | |
"execution_count": 3, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "0bda2af1", | |
"cell_type": "code", | |
"source": "MODEL_PATH = \"Qwen/Qwen2.5-Omni-7B\"", | |
"execution_count": 4, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "dbc30e27", | |
"cell_type": "code", | |
"source": "tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)", | |
"execution_count": 5, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"scrolled": true, | |
"trusted": true | |
}, | |
"id": "0b380f10", | |
"cell_type": "code", | |
"source": "llm = LLM(\n model=MODEL_PATH, trust_remote_code=True, gpu_memory_utilization=0.9,\n tensor_parallel_size=torch.cuda.device_count(),\n limit_mm_per_prompt={'image': 1, 'video': 1, 'audio': 1},\n seed=1234,\n)", | |
"execution_count": 6, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "INFO 03-27 19:09:31 [config.py:588] This model supports multiple tasks: {'embed', 'score', 'reward', 'generate', 'classify'}. Defaulting to 'generate'.\nINFO 03-27 19:09:31 [llm_engine.py:241] Initializing a V0 LLM engine (v0.1.dev5432+gf8668bf.d20250327) with config: model='Qwen/Qwen2.5-Omni-7B', speculative_config=None, tokenizer='Qwen/Qwen2.5-Omni-7B', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, kv_cache_dtype=auto, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='xgrammar', reasoning_backend=None), observability_config=ObservabilityConfig(show_hidden_metrics=False, otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=1234, served_model_name=Qwen/Qwen2.5-Omni-7B, num_scheduler_steps=1, multi_step_stream_outputs=True, enable_prefix_caching=None, chunked_prefill_enabled=False, use_async_output_proc=True, disable_mm_preprocessor_cache=False, mm_processor_kwargs=None, pooler_config=None, compilation_config={\"splitting_ops\":[],\"compile_sizes\":[],\"cudagraph_capture_sizes\":[256,248,240,232,224,216,208,200,192,184,176,168,160,152,144,136,128,120,112,104,96,88,80,72,64,56,48,40,32,24,16,8,4,2,1],\"max_capture_size\":256}, use_cached_outputs=False, \nINFO 03-27 19:09:33 [cuda.py:292] Using Flash Attention backend.\nINFO 03-27 19:09:33 [parallel_state.py:954] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, TP rank 0\nINFO 03-27 19:09:33 [model_runner.py:1118] Starting to load model Qwen/Qwen2.5-Omni-7B...\nINFO 03-27 19:09:34 [config.py:3276] cudagraph sizes specified by model runner [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256] is overridden by config [256, 128, 2, 1, 4, 136, 8, 144, 16, 152, 24, 160, 32, 168, 40, 176, 48, 184, 56, 192, 64, 200, 72, 208, 80, 216, 88, 120, 224, 96, 232, 104, 240, 112, 248]\nINFO 03-27 19:09:34 [weight_utils.py:265] Using model weights format ['*.safetensors']\n" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "Loading safetensors checkpoint shards: 0% Completed | 0/5 [00:00<?, ?it/s]\nLoading safetensors checkpoint shards: 20% Completed | 1/5 [00:01<00:05, 1.28s/it]\nLoading safetensors checkpoint shards: 40% Completed | 2/5 [00:02<00:03, 1.18s/it]\nLoading safetensors checkpoint shards: 60% Completed | 3/5 [00:03<00:02, 1.19s/it]\nLoading safetensors checkpoint shards: 80% Completed | 4/5 [00:04<00:00, 1.06it/s]\nLoading safetensors checkpoint shards: 100% Completed | 5/5 [00:04<00:00, 1.55it/s]\nLoading safetensors checkpoint shards: 100% Completed | 5/5 [00:04<00:00, 1.17it/s]\n" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "INFO 03-27 19:09:38 [loader.py:447] Loading weights took 4.28 seconds\n" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "\n" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "INFO 03-27 19:09:39 [model_runner.py:1154] Model loading took 15.7177 GB and 5.092847 seconds\n" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.50, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.\n" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "WARNING 03-27 19:09:41 [model_runner.py:1319] Computed max_num_seqs (min(256, 32768 // 33518)) to be less than 1. Setting it to the minimum value of 1.\nWARNING 03-27 19:09:48 [profiling.py:222] The sequence length used for profiling (max_num_batched_tokens / max_num_seqs = 32768) is too short to hold the multi-modal embeddings in the worst case (33518 tokens in total, out of which {'audio': 750, 'image': 16384, 'video': 16384} are reserved for multi-modal embeddings). This may cause certain multi-modal inputs to fail during inference, even when the input text is short. To avoid this, you should increase `max_model_len`, reduce `max_num_seqs`, and/or reduce `mm_counts`.\nINFO 03-27 19:09:51 [worker.py:267] Memory profiling takes 12.07 seconds\nINFO 03-27 19:09:51 [worker.py:267] the current vLLM instance can use total_gpu_memory (79.25GiB) x gpu_memory_utilization (0.90) = 71.33GiB\nINFO 03-27 19:09:51 [worker.py:267] model weights take 15.72GiB; non_torch_memory takes 0.09GiB; PyTorch activation peak memory takes 4.35GiB; the rest of the memory reserved for KV Cache is 51.17GiB.\nINFO 03-27 19:09:52 [executor_base.py:111] # cuda blocks: 59879, # CPU blocks: 4681\nINFO 03-27 19:09:52 [executor_base.py:116] Maximum concurrency for 32768 tokens per request: 29.24x\nINFO 03-27 19:09:59 [model_runner.py:1464] Capturing cudagraphs for decoding. This may lead to unexpected consequences if the model is not static. To run the model in eager mode, set 'enforce_eager=True' or use '--enforce-eager' in the CLI. If out-of-memory error occurs during cudagraph capture, consider decreasing `gpu_memory_utilization` or switching to eager mode. You can also reduce the `max_num_seqs` as needed to decrease memory usage.\n" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "Capturing CUDA graph shapes: 100%|█████████████████████████████████████████| 35/35 [00:24<00:00, 1.42it/s]" | |
}, | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "INFO 03-27 19:10:23 [model_runner.py:1606] Graph capturing finished in 25 secs, took 0.37 GiB\nINFO 03-27 19:10:23 [llm_engine.py:447] init engine (profile, create kv cache, warmup model) took 44.42 seconds\n" | |
}, | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "66e33498", | |
"cell_type": "code", | |
"source": "model = llm.llm_engine.model_executor.driver_worker.worker.model_runner.model", | |
"execution_count": 7, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"scrolled": true, | |
"trusted": true | |
}, | |
"id": "4000cfbc", | |
"cell_type": "code", | |
"source": "for n,p in model.named_parameters():\n if p.dtype != torch.bfloat16: print(n,p.dtype)", | |
"execution_count": 8, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "dd87387e", | |
"cell_type": "code", | |
"source": "query_map.keys()", | |
"execution_count": 9, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "dict_keys(['mixed_modalities', 'use_audio_in_video', 'multi_audios'])" | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "e588a814", | |
"cell_type": "code", | |
"source": "# query_result = query_map['mixed_modalities']()\nquery_result = query_map['mixed_modalities']()", | |
"execution_count": 13, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "01e38925", | |
"cell_type": "code", | |
"source": "query_result.inputs.keys()", | |
"execution_count": 14, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "dict_keys(['prompt', 'multi_modal_data'])" | |
}, | |
"execution_count": 14, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "d3fca219", | |
"cell_type": "code", | |
"source": "print(query_result.inputs['prompt'])", | |
"execution_count": 15, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "<|im_start|>system\nYou are Qwen, a virtual human developed by the Qwen Team, Alibaba Group, capable of perceiving auditory and visual inputs, as well as generating text and speech.<|im_end|>\n<|im_start|>user\n<|audio_bos|><|AUDIO|><|audio_eos|><|vision_bos|><|IMAGE|><|vision_eos|><|vision_bos|><|VIDEO|><|vision_eos|>What is recited in the audio? What is the content of this image? Why is this video funny?<|im_end|>\n<|im_start|>assistant\n\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "0f85c18e", | |
"cell_type": "code", | |
"source": "sampling_params = SamplingParams(temperature=0.01, max_tokens=128)", | |
"execution_count": 17, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "7a20eff5", | |
"cell_type": "code", | |
"source": "outputs = llm.generate(query_result.inputs, sampling_params=sampling_params); output", | |
"execution_count": 18, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "\rProcessed prompts: 0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]" | |
}, | |
{ | |
"ename": "RuntimeError", | |
"evalue": "Input type (float) and bias type (c10::BFloat16) should be the same", | |
"output_type": "error", | |
"traceback": [ | |
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | |
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", | |
"Cell \u001b[0;32mIn[18], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[43mllm\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgenerate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery_result\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minputs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msampling_params\u001b[49m\u001b[43m)\u001b[49m; output\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/utils.py:1074\u001b[0m, in \u001b[0;36mdeprecate_kwargs.<locals>.wrapper.<locals>.inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 1067\u001b[0m msg \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m \u001b[39m\u001b[38;5;132;01m{\u001b[39;00madditional_message\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1069\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 1070\u001b[0m \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m(msg),\n\u001b[1;32m 1071\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m3\u001b[39m, \u001b[38;5;66;03m# The inner function takes up one level\u001b[39;00m\n\u001b[1;32m 1072\u001b[0m )\n\u001b[0;32m-> 1074\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/llm.py:465\u001b[0m, in \u001b[0;36mLLM.generate\u001b[0;34m(self, prompts, sampling_params, prompt_token_ids, use_tqdm, lora_request, prompt_adapter_request, guided_options_request, priority)\u001b[0m\n\u001b[1;32m 455\u001b[0m sampling_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_default_sampling_params()\n\u001b[1;32m 457\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_and_add_requests(\n\u001b[1;32m 458\u001b[0m prompts\u001b[38;5;241m=\u001b[39mparsed_prompts,\n\u001b[1;32m 459\u001b[0m params\u001b[38;5;241m=\u001b[39msampling_params,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 462\u001b[0m guided_options\u001b[38;5;241m=\u001b[39mguided_options_request,\n\u001b[1;32m 463\u001b[0m priority\u001b[38;5;241m=\u001b[39mpriority)\n\u001b[0;32m--> 465\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43muse_tqdm\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muse_tqdm\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 466\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mengine_class\u001b[38;5;241m.\u001b[39mvalidate_outputs(outputs, RequestOutput)\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/entrypoints/llm.py:1375\u001b[0m, in \u001b[0;36mLLM._run_engine\u001b[0;34m(self, use_tqdm)\u001b[0m\n\u001b[1;32m 1373\u001b[0m total_out_toks \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 1374\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mllm_engine\u001b[38;5;241m.\u001b[39mhas_unfinished_requests():\n\u001b[0;32m-> 1375\u001b[0m step_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mllm_engine\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstep\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1376\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m output \u001b[38;5;129;01min\u001b[39;00m step_outputs:\n\u001b[1;32m 1377\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m output\u001b[38;5;241m.\u001b[39mfinished:\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/engine/llm_engine.py:1434\u001b[0m, in \u001b[0;36mLLMEngine.step\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1430\u001b[0m execute_model_req\u001b[38;5;241m.\u001b[39masync_callback \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39masync_callbacks[\n\u001b[1;32m 1431\u001b[0m virtual_engine]\n\u001b[1;32m 1433\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1434\u001b[0m outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_executor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1435\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecute_model_req\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexecute_model_req\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1436\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_skip_scheduling_next_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n\u001b[1;32m 1437\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InputProcessingError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m 1438\u001b[0m \u001b[38;5;66;03m# The input for this request cannot be processed, so we must\u001b[39;00m\n\u001b[1;32m 1439\u001b[0m \u001b[38;5;66;03m# abort it. If there are remaining requests in the batch that\u001b[39;00m\n\u001b[1;32m 1440\u001b[0m \u001b[38;5;66;03m# have been scheduled, they will be retried on the next step.\u001b[39;00m\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/executor/executor_base.py:139\u001b[0m, in \u001b[0;36mExecutorBase.execute_model\u001b[0;34m(self, execute_model_req)\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mexecute_model\u001b[39m(\n\u001b[1;32m 137\u001b[0m \u001b[38;5;28mself\u001b[39m, execute_model_req: ExecuteModelRequest\n\u001b[1;32m 138\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Optional[List[Union[SamplerOutput, PoolerOutput]]]:\n\u001b[0;32m--> 139\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcollective_rpc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mexecute_model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 140\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mexecute_model_req\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 141\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output[\u001b[38;5;241m0\u001b[39m]\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/executor/uniproc_executor.py:56\u001b[0m, in \u001b[0;36mUniProcExecutor.collective_rpc\u001b[0;34m(self, method, timeout, args, kwargs)\u001b[0m\n\u001b[1;32m 54\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m kwargs \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 55\u001b[0m kwargs \u001b[38;5;241m=\u001b[39m {}\n\u001b[0;32m---> 56\u001b[0m answer \u001b[38;5;241m=\u001b[39m \u001b[43mrun_method\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdriver_worker\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [answer]\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/utils.py:2260\u001b[0m, in \u001b[0;36mrun_method\u001b[0;34m(obj, method, args, kwargs)\u001b[0m\n\u001b[1;32m 2258\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 2259\u001b[0m func \u001b[38;5;241m=\u001b[39m partial(method, obj) \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[0;32m-> 2260\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/worker/worker_base.py:420\u001b[0m, in \u001b[0;36mLocalOrDistributedWorkerBase.execute_model\u001b[0;34m(self, execute_model_req)\u001b[0m\n\u001b[1;32m 415\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 416\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config\u001b[38;5;241m.\u001b[39mcollect_model_execute_time):\n\u001b[1;32m 417\u001b[0m orig_model_execute_time \u001b[38;5;241m=\u001b[39m intermediate_tensors\u001b[38;5;241m.\u001b[39mtensors\u001b[38;5;241m.\u001b[39mget(\n\u001b[1;32m 418\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel_execute_time\u001b[39m\u001b[38;5;124m\"\u001b[39m, torch\u001b[38;5;241m.\u001b[39mtensor(\u001b[38;5;241m0\u001b[39m))\u001b[38;5;241m.\u001b[39mitem()\n\u001b[0;32m--> 420\u001b[0m output \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel_runner\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 421\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 422\u001b[0m \u001b[43m \u001b[49m\u001b[43mkv_caches\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkv_cache\u001b[49m\u001b[43m[\u001b[49m\u001b[43mworker_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvirtual_engine\u001b[49m\u001b[43m]\u001b[49m\n\u001b[1;32m 423\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkv_cache\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 424\u001b[0m \u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 425\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_steps\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 426\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 427\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 429\u001b[0m model_execute_time \u001b[38;5;241m=\u001b[39m time\u001b[38;5;241m.\u001b[39mperf_counter() \u001b[38;5;241m-\u001b[39m start_time\n\u001b[1;32m 430\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m get_pp_group()\u001b[38;5;241m.\u001b[39mis_last_rank:\n\u001b[1;32m 431\u001b[0m \u001b[38;5;66;03m# output is IntermediateTensors\u001b[39;00m\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py:116\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 113\u001b[0m \u001b[38;5;129m@functools\u001b[39m\u001b[38;5;241m.\u001b[39mwraps(func)\n\u001b[1;32m 114\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mdecorate_context\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m 115\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 116\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/worker/model_runner.py:1778\u001b[0m, in \u001b[0;36mModelRunner.execute_model\u001b[0;34m(self, model_input, kv_caches, intermediate_tensors, num_steps, **kwargs)\u001b[0m\n\u001b[1;32m 1775\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m bypass_model_exec:\n\u001b[1;32m 1776\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m set_forward_context(model_input\u001b[38;5;241m.\u001b[39mattn_metadata,\n\u001b[1;32m 1777\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvllm_config, virtual_engine):\n\u001b[0;32m-> 1778\u001b[0m hidden_or_intermediate_states \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_executable\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1779\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_ids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1780\u001b[0m \u001b[43m \u001b[49m\u001b[43mpositions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmodel_input\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minput_positions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1781\u001b[0m \u001b[43m \u001b[49m\u001b[43mintermediate_tensors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mintermediate_tensors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1782\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mMultiModalKwargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mas_kwargs\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmulti_modal_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1783\u001b[0m \u001b[43m \u001b[49m\u001b[43mdevice\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1784\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mseqlen_agnostic_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1785\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mmodel_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1786\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1788\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1789\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobservability_config\u001b[38;5;241m.\u001b[39mcollect_model_forward_time):\n\u001b[1;32m 1790\u001b[0m model_forward_end\u001b[38;5;241m.\u001b[39mrecord()\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1739\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1738\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1739\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1750\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1747\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1748\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1749\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1750\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1752\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1753\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/qwen2_5_omni_thinker.py:786\u001b[0m, in \u001b[0;36mQwen2_5OmniThinkerForConditionalGeneration.forward\u001b[0;34m(self, input_ids, positions, intermediate_tensors, inputs_embeds, **kwargs)\u001b[0m\n\u001b[1;32m 783\u001b[0m \u001b[38;5;66;03m# NOTE: In v1, inputs_embeds is always generated at model runner, this\u001b[39;00m\n\u001b[1;32m 784\u001b[0m \u001b[38;5;66;03m# condition is for v0 compatibility.\u001b[39;00m\n\u001b[1;32m 785\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m inputs_embeds \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 786\u001b[0m multimodal_embeddings \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_multimodal_embeddings\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 787\u001b[0m inputs_embeds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_input_embeddings(input_ids,\n\u001b[1;32m 788\u001b[0m multimodal_embeddings)\n\u001b[1;32m 789\u001b[0m input_ids \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/qwen2_5_omni_thinker.py:742\u001b[0m, in \u001b[0;36mQwen2_5OmniThinkerForConditionalGeneration.get_multimodal_embeddings\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 739\u001b[0m multimodal_embeddings: List[Tuple[NestedTensors, \u001b[38;5;28mstr\u001b[39m]] \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m 741\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m audio_input \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 742\u001b[0m audio_embeds \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_audio_input\u001b[49m\u001b[43m(\u001b[49m\u001b[43maudio_input\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 743\u001b[0m multimodal_embeddings\u001b[38;5;241m.\u001b[39mappend((audio_embeds, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maudio\u001b[39m\u001b[38;5;124m\"\u001b[39m))\n\u001b[1;32m 744\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m image_input \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/vllm/model_executor/models/qwen2_5_omni_thinker.py:635\u001b[0m, in \u001b[0;36mQwen2_5OmniConditionalGenerationMixin._process_audio_input\u001b[0;34m(self, audio_input, audio_hashes, cached_audio_features)\u001b[0m\n\u001b[1;32m 629\u001b[0m audio_feature_lengths \u001b[38;5;241m=\u001b[39m audio_feature_lengths\u001b[38;5;241m.\u001b[39msqueeze(\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 631\u001b[0m audio_feat_lengths, audio_output_lengths \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maudio_tower\u001b[38;5;241m.\u001b[39m_get_feat_extract_output_lengths(\n\u001b[1;32m 633\u001b[0m audio_feature_lengths))\n\u001b[0;32m--> 635\u001b[0m audio_outputs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43maudio_tower\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 636\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_features\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 637\u001b[0m \u001b[43m \u001b[49m\u001b[43mfeature_lens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maudio_feature_lengths\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 638\u001b[0m \u001b[43m \u001b[49m\u001b[43maftercnn_lens\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maudio_feat_lengths\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 639\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 640\u001b[0m audio_features \u001b[38;5;241m=\u001b[39m audio_outputs\u001b[38;5;241m.\u001b[39mlast_hidden_state\n\u001b[1;32m 641\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m audio_features\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1739\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1738\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1739\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1750\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1747\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1748\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1749\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1750\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1752\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1753\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/transformers/models/qwen2_5_omni/modeling_qwen2_5_omni.py:1071\u001b[0m, in \u001b[0;36mQwen2_5OmniAudioEncoder.forward\u001b[0;34m(self, input_features, feature_lens, aftercnn_lens, head_mask, output_attentions, output_hidden_states, return_dict)\u001b[0m\n\u001b[1;32m 1066\u001b[0m each_audio_split_list \u001b[38;5;241m=\u001b[39m input_features[\n\u001b[1;32m 1067\u001b[0m :, feature_lens_accum[index_] : feature_lens_accum[index_ \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 1068\u001b[0m ]\u001b[38;5;241m.\u001b[39msplit(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mn_window \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m2\u001b[39m, dim\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 1070\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m each_audio_split \u001b[38;5;129;01min\u001b[39;00m each_audio_split_list:\n\u001b[0;32m-> 1071\u001b[0m each_split_embed \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mfunctional\u001b[38;5;241m.\u001b[39mgelu(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv1\u001b[49m\u001b[43m(\u001b[49m\u001b[43meach_audio_split\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 1072\u001b[0m each_split_embed \u001b[38;5;241m=\u001b[39m nn\u001b[38;5;241m.\u001b[39mfunctional\u001b[38;5;241m.\u001b[39mgelu(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mconv2(each_split_embed))\u001b[38;5;241m.\u001b[39mtranspose_(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m 1074\u001b[0m embed_pos \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpositional_embedding(each_split_embed\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;241m.\u001b[39mto(each_split_embed\u001b[38;5;241m.\u001b[39mdtype)\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1739\u001b[0m, in \u001b[0;36mModule._wrapped_call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1737\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_compiled_call_impl(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;66;03m# type: ignore[misc]\u001b[39;00m\n\u001b[1;32m 1738\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1739\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_call_impl\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py:1750\u001b[0m, in \u001b[0;36mModule._call_impl\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1745\u001b[0m \u001b[38;5;66;03m# If we don't have any hooks, we want to skip the rest of the logic in\u001b[39;00m\n\u001b[1;32m 1746\u001b[0m \u001b[38;5;66;03m# this function, and just call forward.\u001b[39;00m\n\u001b[1;32m 1747\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_forward_pre_hooks\n\u001b[1;32m 1748\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_backward_pre_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_backward_hooks\n\u001b[1;32m 1749\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m _global_forward_hooks \u001b[38;5;129;01mor\u001b[39;00m _global_forward_pre_hooks):\n\u001b[0;32m-> 1750\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mforward_call\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1752\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1753\u001b[0m called_always_called_hooks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mset\u001b[39m()\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py:375\u001b[0m, in \u001b[0;36mConv1d.forward\u001b[0;34m(self, input)\u001b[0m\n\u001b[1;32m 374\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mforward\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;28minput\u001b[39m: Tensor) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Tensor:\n\u001b[0;32m--> 375\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_conv_forward\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbias\u001b[49m\u001b[43m)\u001b[49m\n", | |
"File \u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py:370\u001b[0m, in \u001b[0;36mConv1d._conv_forward\u001b[0;34m(self, input, weight, bias)\u001b[0m\n\u001b[1;32m 358\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mzeros\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m 359\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m F\u001b[38;5;241m.\u001b[39mconv1d(\n\u001b[1;32m 360\u001b[0m F\u001b[38;5;241m.\u001b[39mpad(\n\u001b[1;32m 361\u001b[0m \u001b[38;5;28minput\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reversed_padding_repeated_twice, mode\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpadding_mode\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 368\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgroups,\n\u001b[1;32m 369\u001b[0m )\n\u001b[0;32m--> 370\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mF\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconv1d\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 371\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43minput\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweight\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mbias\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mstride\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpadding\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdilation\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroups\u001b[49m\n\u001b[1;32m 372\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", | |
"\u001b[0;31mRuntimeError\u001b[0m: Input type (float) and bias type (c10::BFloat16) should be the same" | |
] | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "4e9b3410", | |
"cell_type": "code", | |
"source": "%debug", | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": "> \u001b[0;32m/usr/local/lib/python3.10/dist-packages/torch/nn/modules/conv.py\u001b[0m(370)\u001b[0;36m_conv_forward\u001b[0;34m()\u001b[0m\n\u001b[0;32m 368 \u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroups\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 369 \u001b[0;31m )\n\u001b[0m\u001b[0;32m--> 370 \u001b[0;31m return F.conv1d(\n\u001b[0m\u001b[0;32m 371 \u001b[0;31m \u001b[0minput\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweight\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbias\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstride\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpadding\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdilation\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mgroups\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 372 \u001b[0;31m )\n\u001b[0m\nipdb> input.dtype, weight.dtype, bias.dtype\n(torch.float32, torch.bfloat16, torch.bfloat16)\nipdb> exit\n" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "a1a98a8c", | |
"cell_type": "code", | |
"source": "prompt = \"\"\"<|im_start|>system\nYou are Qwen, a virtual human developed by the Qwen Team, Alibaba Group.<|im_end|>\n<|im_start|>user\nHello how are you?<|im_end|>\n<|im_start|>assistant\n\"\"\"", | |
"execution_count": 24, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"scrolled": true, | |
"trusted": true | |
}, | |
"id": "6c3d1a54", | |
"cell_type": "code", | |
"source": "output = llm.generate([prompt], sampling_params=sampling_params)\nrt = output[0].outputs[0]; rt.text", | |
"execution_count": 25, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": "\n\nProcessed prompts: 0%| | 0/1 [00:00<?, ?it/s, est. speed input: 0.00 toks/s, output: 0.00 toks/s]\u001b[A\u001b[A\n\nProcessed prompts: 100%|█| 1/1 [00:01<00:00, 1.65s/it, est. speed input: 21.82 toks/s, output: 77.57 toks/\u001b[A\u001b[A\n" | |
}, | |
{ | |
"data": { | |
"text/plain": "''" | |
}, | |
"execution_count": 25, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "e76c7b57", | |
"cell_type": "code", | |
"source": "rt.token_ids[:16]", | |
"execution_count": 27, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "(151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872,\n 151872)" | |
}, | |
"execution_count": 27, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "7242f455", | |
"cell_type": "code", | |
"source": "tokenizer.convert_ids_to_tokens([151872])", | |
"execution_count": 28, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "[None]" | |
}, | |
"execution_count": 28, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "c8262072", | |
"cell_type": "code", | |
"source": "import vllm", | |
"execution_count": 29, | |
"outputs": [] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "1d56bced", | |
"cell_type": "code", | |
"source": "vllm.__version__", | |
"execution_count": 31, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": "'0.1.dev5432+gf8668bf.d20250327'" | |
}, | |
"execution_count": 31, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
] | |
}, | |
{ | |
"metadata": { | |
"trusted": true | |
}, | |
"id": "c07ab150", | |
"cell_type": "code", | |
"source": "", | |
"execution_count": null, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"gist": { | |
"id": "", | |
"data": { | |
"description": "test-qwen-omni.ipynb", | |
"public": true | |
} | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3 (ipykernel)", | |
"language": "python" | |
}, | |
"language_info": { | |
"name": "python", | |
"version": "3.10.12", | |
"mimetype": "text/x-python", | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"pygments_lexer": "ipython3", | |
"nbconvert_exporter": "python", | |
"file_extension": ".py" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 5 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment