./llama-server -m ../qwen3.6-q8_0.gguf -np 1 --chat-template-kwargs "{\"preserve_thinking\": true}"
code_python pred= 192 draft= 0 acc= 0 rate=n/a tok/s=7.0
code_cpp pred= 192 draft= 0 acc= 0 rate=n/a tok/s=7.3
explain_concept pred= 192 draft= 0 acc= 0 rate=n/a tok/s=7.3
summarize pred= 53 draft= 0 acc= 0 rate=n/a tok/s=7.1
qa_factual pred= 177 draft= 0 acc= 0 rate=n/a tok/s=7.0
translation pred= 22 draft= 0 acc= 0 rate=n/a tok/s=7.7
creative_short pred= 192 draft= 0 acc= 0 rate=n/a tok/s=7.1
stepwise_math pred= 192 draft= 0 acc= 0 rate=n/a tok/s=7.2
long_code_review pred= 192 draft= 0 acc= 0 rate=n/a tok/s=7.0
Aggregate: {
"n_requests": 9,
"total_predicted": 1404,
"total_draft": 0,
"total_draft_accepted": 0,
"aggregate_accept_rate": null,
"wall_s_total": 201.07
}
./llama-server -m ../qwen3.6-q8_0-mtp.gguf -np 1 --chat-template-kwargs "{\"preserve_thinking\": true}" --spec-type mtp --spec-draft-n-max 3
code_python pred= 192 draft= 153 acc= 139 rate=0.908 tok/s=21.6
code_cpp pred= 192 draft= 176 acc= 132 rate=0.750 tok/s=18.7
explain_concept pred= 192 draft= 191 acc= 126 rate=0.660 tok/s=16.3
summarize pred= 55 draft= 51 acc= 37 rate=0.726 tok/s=17.9
qa_factual pred= 177 draft= 174 acc= 118 rate=0.678 tok/s=16.5
translation pred= 22 draft= 24 acc= 13 rate=0.542 tok/s=13.9
creative_short pred= 192 draft= 200 acc= 123 rate=0.615 tok/s=15.8
stepwise_math pred= 192 draft= 171 acc= 133 rate=0.778 tok/s=19.3
long_code_review pred= 192 draft= 179 acc= 131 rate=0.732 tok/s=18.0
Aggregate: {
"n_requests": 9,
"total_predicted": 1406,
"total_draft": 1319,
"total_draft_accepted": 952,
"aggregate_accept_rate": 0.7218,
"wall_s_total": 83.8
}
./llama-server -m ../qwen3.6-q8_0-mtp.gguf -np 1 --chat-template-kwargs "{\"preserve_thinking\": true}" --spec-type mtp --spec-draft-n-max 2
code_python pred= 192 draft= 134 acc= 123 rate=0.918 tok/s=17.4
code_cpp pred= 192 draft= 145 acc= 118 rate=0.814 tok/s=16.5
explain_concept pred= 192 draft= 148 acc= 116 rate=0.784 tok/s=16.1
summarize pred= 55 draft= 44 acc= 32 rate=0.727 tok/s=15.6
qa_factual pred= 192 draft= 132 acc= 125 rate=0.947 tok/s=18.2
translation pred= 22 draft= 18 acc= 12 rate=0.667 tok/s=15.2
creative_short pred= 192 draft= 149 acc= 116 rate=0.778 tok/s=16.1
stepwise_math pred= 192 draft= 139 acc= 121 rate=0.871 tok/s=17.2
long_code_review pred= 192 draft= 153 acc= 114 rate=0.745 tok/s=15.6
Aggregate: {
"n_requests": 9,
"total_predicted": 1421,
"total_draft": 1062,
"total_draft_accepted": 877,
"aggregate_accept_rate": 0.8258,
"wall_s_total": 90.44
}
llama-server -m ../qwen3.6/Qwen3.6-27B-Q8_0.gguf -hfd unsloth/Qwen3.5-0.8B-GGUF:Q8_0 --spec-draft-n-max 16 -np 1 --chat-template-kwargs "{\"preserve_thinking\": true}"
code_python pred= 192 draft= 188 acc= 156 rate=0.830 tok/s=26.4
code_cpp pred= 192 draft= 201 acc= 126 rate=0.627 tok/s=16.8
explain_concept pred= 192 draft= 263 acc= 112 rate=0.426 tok/s=12.7
summarize pred= 57 draft= 63 acc= 39 rate=0.619 tok/s=16.9
qa_factual pred= 192 draft= 178 acc= 177 rate=0.994 tok/s=47.7
translation pred= 23 draft= 18 acc= 15 rate=0.833 tok/s=18.7
creative_short pred= 192 draft= 189 acc= 120 rate=0.635 tok/s=15.4
stepwise_math pred= 192 draft= 190 acc= 148 rate=0.779 tok/s=22.3
long_code_review pred= 192 draft= 207 acc= 120 rate=0.580 tok/s=14.5
Aggregate: {
"n_requests": 9,
"total_predicted": 1424,
"total_draft": 1497,
"total_draft_accepted": 1013,
"aggregate_accept_rate": 0.6767,
"wall_s_total": 81.39
}
llama-server -m ../qwen3.6/Qwen3.6-27B-Q8_0.gguf -hfd unsloth/Qwen3.5-0.8B-GGUF:Q8_0 --spec-draft-n-max 64 -np 1 --chat-template-kwargs "{\"preserve_thinking\": true}"
code_python pred= 192 draft= 174 acc= 159 rate=0.914 tok/s=27.2
code_cpp pred= 192 draft= 138 acc= 120 rate=0.870 tok/s=15.0
explain_concept pred= 192 draft= 170 acc= 101 rate=0.594 tok/s=11.4
summarize pred= 55 draft= 48 acc= 36 rate=0.750 tok/s=14.6
qa_factual pred= 177 draft= 126 acc= 106 rate=0.841 tok/s=13.9
translation pred= 22 draft= 13 acc= 13 rate=1.000 tok/s=16.5
creative_short pred= 192 draft= 136 acc= 104 rate=0.765 tok/s=12.8
stepwise_math pred= 192 draft= 172 acc= 147 rate=0.855 tok/s=22.0
long_code_review pred= 192 draft= 160 acc= 111 rate=0.694 tok/s=13.0
Aggregate: {
"n_requests": 9,
"total_predicted": 1406,
"total_draft": 1137,
"total_draft_accepted": 897,
"aggregate_accept_rate": 0.7889,
"wall_s_total": 97.13
}
привет