Skip to content

Instantly share code, notes, and snippets.

@ubergarm
Created July 12, 2025 23:20
Show Gist options
  • Save ubergarm/d9a3e89355199fc34d8c75882bcc3ab4 to your computer and use it in GitHub Desktop.
Save ubergarm/d9a3e89355199fc34d8c75882bcc3ab4 to your computer and use it in GitHub Desktop.
Comparing evshiron+triton-cpu vs mainline casting (with or without triton-cpu) for DeepSeek fp8 safetensors to bf16 GGUF with ik/llama.cpp forks.
* File is LITTLE endian, script is running on a LITTLE endian host.
* Dumping 51 key/value pair(s)
1: UINT32 | 1 | GGUF.version = 3
2: UINT64 | 1 | GGUF.tensor_count = 79
3: UINT64 | 1 | GGUF.kv_count = 48
4: STRING | 1 | general.architecture = 'deepseek2'
5: STRING | 1 | general.type = 'model'
6: STRING | 1 | general.name = 'DeepSeek R1 0528'
7: STRING | 1 | general.version = '0528'
8: STRING | 1 | general.basename = 'DeepSeek-R1'
9: STRING | 1 | general.size_label = '256x21B'
10: UINT32 | 1 | deepseek2.block_count = 61
11: UINT32 | 1 | deepseek2.context_length = 163840
12: UINT32 | 1 | deepseek2.embedding_length = 7168
13: UINT32 | 1 | deepseek2.feed_forward_length = 18432
14: UINT32 | 1 | deepseek2.attention.head_count = 128
15: UINT32 | 1 | deepseek2.attention.head_count_kv = 128
16: FLOAT32 | 1 | deepseek2.rope.freq_base = 10000.0
17: FLOAT32 | 1 | deepseek2.attention.layer_norm_rms_epsilon = 9.999999974752427e-07
18: UINT32 | 1 | deepseek2.expert_used_count = 8
19: UINT32 | 1 | general.file_type = 32
20: UINT32 | 1 | deepseek2.leading_dense_block_count = 3
21: UINT32 | 1 | deepseek2.vocab_size = 129280
22: UINT32 | 1 | deepseek2.attention.q_lora_rank = 1536
23: UINT32 | 1 | deepseek2.attention.kv_lora_rank = 512
24: UINT32 | 1 | deepseek2.attention.key_length = 192
25: UINT32 | 1 | deepseek2.attention.value_length = 128
26: UINT32 | 1 | deepseek2.expert_feed_forward_length = 2048
27: UINT32 | 1 | deepseek2.expert_count = 256
28: UINT32 | 1 | deepseek2.expert_shared_count = 1
29: FLOAT32 | 1 | deepseek2.expert_weights_scale = 2.5
30: BOOL | 1 | deepseek2.expert_weights_norm = True
31: UINT32 | 1 | deepseek2.expert_gating_func = 2
32: UINT32 | 1 | deepseek2.rope.dimension_count = 64
33: STRING | 1 | deepseek2.rope.scaling.type = 'yarn'
34: FLOAT32 | 1 | deepseek2.rope.scaling.factor = 40.0
35: UINT32 | 1 | deepseek2.rope.scaling.original_context_length = 4096
36: FLOAT32 | 1 | deepseek2.rope.scaling.yarn_log_multiplier = 0.10000000149011612
37: STRING | 1 | tokenizer.ggml.model = 'gpt2'
38: STRING | 1 | tokenizer.ggml.pre = 'deepseek-v3'
39: [STRING] | 129280 | tokenizer.ggml.tokens
40: [INT32] | 129280 | tokenizer.ggml.token_type
41: [STRING] | 127741 | tokenizer.ggml.merges
42: UINT32 | 1 | tokenizer.ggml.bos_token_id = 0
43: UINT32 | 1 | tokenizer.ggml.eos_token_id = 1
44: UINT32 | 1 | tokenizer.ggml.padding_token_id = 1
45: BOOL | 1 | tokenizer.ggml.add_bos_token = True
46: BOOL | 1 | tokenizer.ggml.add_eos_token = False
47: STRING | 1 | tokenizer.chat_template = '{% if not add_generation_prompt is defined %}{% set add_gene'
48: UINT32 | 1 | general.quantization_version = 2
49: UINT16 | 1 | split.no = 0
50: UINT16 | 1 | split.count = 30
51: INT32 | 1 | split.tensors.count = 1147
* Dumping 79 tensor(s)
1: 926679040 | 7168, 129280, 1, 1 | BF16 | token_embd.weight
2: 7168 | 7168, 1, 1, 1 | F32 | blk.0.attn_norm.weight
3: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.0.ffn_down.weight
4: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_gate.weight
5: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_up.weight
6: 7168 | 7168, 1, 1, 1 | F32 | blk.0.ffn_norm.weight
7: 512 | 512, 1, 1, 1 | F32 | blk.0.attn_kv_a_norm.weight
8: 4128768 | 7168, 576, 1, 1 | BF16 | blk.0.attn_kv_a_mqa.weight
9: 16777216 | 512, 32768, 1, 1 | BF16 | blk.0.attn_kv_b.weight
10: 8388608 | 128, 65536, 1, 1 | BF16 | blk.0.attn_k_b.weight
11: 8388608 | 512, 16384, 1, 1 | BF16 | blk.0.attn_v_b.weight
12: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.0.attn_output.weight
13: 1536 | 1536, 1, 1, 1 | F32 | blk.0.attn_q_a_norm.weight
14: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.0.attn_q_a.weight
15: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.0.attn_q_b.weight
16: 7168 | 7168, 1, 1, 1 | F32 | blk.1.attn_norm.weight
17: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.1.ffn_down.weight
18: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_gate.weight
19: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_up.weight
20: 7168 | 7168, 1, 1, 1 | F32 | blk.1.ffn_norm.weight
21: 512 | 512, 1, 1, 1 | F32 | blk.1.attn_kv_a_norm.weight
22: 4128768 | 7168, 576, 1, 1 | BF16 | blk.1.attn_kv_a_mqa.weight
23: 16777216 | 512, 32768, 1, 1 | BF16 | blk.1.attn_kv_b.weight
24: 8388608 | 128, 65536, 1, 1 | BF16 | blk.1.attn_k_b.weight
25: 8388608 | 512, 16384, 1, 1 | BF16 | blk.1.attn_v_b.weight
26: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.1.attn_output.weight
27: 1536 | 1536, 1, 1, 1 | F32 | blk.1.attn_q_a_norm.weight
28: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.1.attn_q_a.weight
29: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.1.attn_q_b.weight
30: 7168 | 7168, 1, 1, 1 | F32 | blk.2.attn_norm.weight
31: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.2.ffn_down.weight
32: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_gate.weight
33: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_up.weight
34: 7168 | 7168, 1, 1, 1 | F32 | blk.2.ffn_norm.weight
35: 512 | 512, 1, 1, 1 | F32 | blk.2.attn_kv_a_norm.weight
36: 4128768 | 7168, 576, 1, 1 | BF16 | blk.2.attn_kv_a_mqa.weight
37: 16777216 | 512, 32768, 1, 1 | BF16 | blk.2.attn_kv_b.weight
38: 8388608 | 128, 65536, 1, 1 | BF16 | blk.2.attn_k_b.weight
39: 8388608 | 512, 16384, 1, 1 | BF16 | blk.2.attn_v_b.weight
40: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.2.attn_output.weight
41: 1536 | 1536, 1, 1, 1 | F32 | blk.2.attn_q_a_norm.weight
42: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.2.attn_q_a.weight
43: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.2.attn_q_b.weight
44: 256 | 256, 1, 1, 1 | F32 | blk.3.exp_probs_b.bias
45: 1835008 | 7168, 256, 1, 1 | F32 | blk.3.ffn_gate_inp.weight
46: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.3.ffn_down_shexp.weight
47: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_gate_shexp.weight
48: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_up_shexp.weight
49: 512 | 512, 1, 1, 1 | F32 | blk.3.attn_kv_a_norm.weight
50: 4128768 | 7168, 576, 1, 1 | BF16 | blk.3.attn_kv_a_mqa.weight
51: 16777216 | 512, 32768, 1, 1 | BF16 | blk.3.attn_kv_b.weight
52: 8388608 | 128, 65536, 1, 1 | BF16 | blk.3.attn_k_b.weight
53: 8388608 | 512, 16384, 1, 1 | BF16 | blk.3.attn_v_b.weight
54: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.3.attn_output.weight
55: 1536 | 1536, 1, 1, 1 | F32 | blk.3.attn_q_a_norm.weight
56: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.3.attn_q_a.weight
57: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.3.attn_q_b.weight
58: 7168 | 7168, 1, 1, 1 | F32 | blk.3.attn_norm.weight
59: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.3.ffn_down_exps.weight
60: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_gate_exps.weight
61: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_up_exps.weight
62: 7168 | 7168, 1, 1, 1 | F32 | blk.3.ffn_norm.weight
63: 256 | 256, 1, 1, 1 | F32 | blk.4.exp_probs_b.bias
64: 1835008 | 7168, 256, 1, 1 | F32 | blk.4.ffn_gate_inp.weight
65: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.4.ffn_down_shexp.weight
66: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_gate_shexp.weight
67: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_up_shexp.weight
68: 512 | 512, 1, 1, 1 | F32 | blk.4.attn_kv_a_norm.weight
69: 4128768 | 7168, 576, 1, 1 | BF16 | blk.4.attn_kv_a_mqa.weight
70: 16777216 | 512, 32768, 1, 1 | BF16 | blk.4.attn_kv_b.weight
71: 8388608 | 128, 65536, 1, 1 | BF16 | blk.4.attn_k_b.weight
72: 8388608 | 512, 16384, 1, 1 | BF16 | blk.4.attn_v_b.weight
73: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.4.attn_output.weight
74: 1536 | 1536, 1, 1, 1 | F32 | blk.4.attn_q_a_norm.weight
75: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.4.attn_q_a.weight
76: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.4.attn_q_b.weight
77: 7168 | 7168, 1, 1, 1 | F32 | blk.4.attn_norm.weight
78: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.4.ffn_down_exps.weight
79: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.4.ffn_gate_exps.weight
* File is LITTLE endian, script is running on a LITTLE endian host.
* Dumping 51 key/value pair(s)
1: UINT32 | 1 | GGUF.version = 3
2: UINT64 | 1 | GGUF.tensor_count = 79
3: UINT64 | 1 | GGUF.kv_count = 48
4: STRING | 1 | general.architecture = 'deepseek2'
5: STRING | 1 | general.type = 'model'
6: STRING | 1 | general.name = 'DeepSeek R1 0528'
7: STRING | 1 | general.version = '0528'
8: STRING | 1 | general.basename = 'DeepSeek-R1'
9: STRING | 1 | general.size_label = '256x21B'
10: UINT32 | 1 | deepseek2.block_count = 61
11: UINT32 | 1 | deepseek2.context_length = 163840
12: UINT32 | 1 | deepseek2.embedding_length = 7168
13: UINT32 | 1 | deepseek2.feed_forward_length = 18432
14: UINT32 | 1 | deepseek2.attention.head_count = 128
15: UINT32 | 1 | deepseek2.attention.head_count_kv = 128
16: FLOAT32 | 1 | deepseek2.rope.freq_base = 10000.0
17: FLOAT32 | 1 | deepseek2.attention.layer_norm_rms_epsilon = 9.999999974752427e-07
18: UINT32 | 1 | deepseek2.expert_used_count = 8
19: UINT32 | 1 | general.file_type = 32
20: UINT32 | 1 | deepseek2.leading_dense_block_count = 3
21: UINT32 | 1 | deepseek2.vocab_size = 129280
22: UINT32 | 1 | deepseek2.attention.q_lora_rank = 1536
23: UINT32 | 1 | deepseek2.attention.kv_lora_rank = 512
24: UINT32 | 1 | deepseek2.attention.key_length = 192
25: UINT32 | 1 | deepseek2.attention.value_length = 128
26: UINT32 | 1 | deepseek2.expert_feed_forward_length = 2048
27: UINT32 | 1 | deepseek2.expert_count = 256
28: UINT32 | 1 | deepseek2.expert_shared_count = 1
29: FLOAT32 | 1 | deepseek2.expert_weights_scale = 2.5
30: BOOL | 1 | deepseek2.expert_weights_norm = True
31: UINT32 | 1 | deepseek2.expert_gating_func = 2
32: UINT32 | 1 | deepseek2.rope.dimension_count = 64
33: STRING | 1 | deepseek2.rope.scaling.type = 'yarn'
34: FLOAT32 | 1 | deepseek2.rope.scaling.factor = 40.0
35: UINT32 | 1 | deepseek2.rope.scaling.original_context_length = 4096
36: FLOAT32 | 1 | deepseek2.rope.scaling.yarn_log_multiplier = 0.10000000149011612
37: STRING | 1 | tokenizer.ggml.model = 'gpt2'
38: STRING | 1 | tokenizer.ggml.pre = 'deepseek-v3'
39: [STRING] | 129280 | tokenizer.ggml.tokens
40: [INT32] | 129280 | tokenizer.ggml.token_type
41: [STRING] | 127741 | tokenizer.ggml.merges
42: UINT32 | 1 | tokenizer.ggml.bos_token_id = 0
43: UINT32 | 1 | tokenizer.ggml.eos_token_id = 1
44: UINT32 | 1 | tokenizer.ggml.padding_token_id = 1
45: BOOL | 1 | tokenizer.ggml.add_bos_token = True
46: BOOL | 1 | tokenizer.ggml.add_eos_token = False
47: STRING | 1 | tokenizer.chat_template = '{% if not add_generation_prompt is defined %}{% set add_gene'
48: UINT32 | 1 | general.quantization_version = 2
49: UINT16 | 1 | split.no = 0
50: UINT16 | 1 | split.count = 30
51: INT32 | 1 | split.tensors.count = 1147
* Dumping 79 tensor(s)
1: 926679040 | 7168, 129280, 1, 1 | BF16 | token_embd.weight
2: 7168 | 7168, 1, 1, 1 | F32 | blk.0.attn_norm.weight
3: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.0.ffn_down.weight
4: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_gate.weight
5: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_up.weight
6: 7168 | 7168, 1, 1, 1 | F32 | blk.0.ffn_norm.weight
7: 512 | 512, 1, 1, 1 | F32 | blk.0.attn_kv_a_norm.weight
8: 4128768 | 7168, 576, 1, 1 | BF16 | blk.0.attn_kv_a_mqa.weight
9: 16777216 | 512, 32768, 1, 1 | BF16 | blk.0.attn_kv_b.weight
10: 8388608 | 128, 65536, 1, 1 | BF16 | blk.0.attn_k_b.weight
11: 8388608 | 512, 16384, 1, 1 | BF16 | blk.0.attn_v_b.weight
12: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.0.attn_output.weight
13: 1536 | 1536, 1, 1, 1 | F32 | blk.0.attn_q_a_norm.weight
14: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.0.attn_q_a.weight
15: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.0.attn_q_b.weight
16: 7168 | 7168, 1, 1, 1 | F32 | blk.1.attn_norm.weight
17: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.1.ffn_down.weight
18: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_gate.weight
19: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_up.weight
20: 7168 | 7168, 1, 1, 1 | F32 | blk.1.ffn_norm.weight
21: 512 | 512, 1, 1, 1 | F32 | blk.1.attn_kv_a_norm.weight
22: 4128768 | 7168, 576, 1, 1 | BF16 | blk.1.attn_kv_a_mqa.weight
23: 16777216 | 512, 32768, 1, 1 | BF16 | blk.1.attn_kv_b.weight
24: 8388608 | 128, 65536, 1, 1 | BF16 | blk.1.attn_k_b.weight
25: 8388608 | 512, 16384, 1, 1 | BF16 | blk.1.attn_v_b.weight
26: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.1.attn_output.weight
27: 1536 | 1536, 1, 1, 1 | F32 | blk.1.attn_q_a_norm.weight
28: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.1.attn_q_a.weight
29: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.1.attn_q_b.weight
30: 7168 | 7168, 1, 1, 1 | F32 | blk.2.attn_norm.weight
31: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.2.ffn_down.weight
32: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_gate.weight
33: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_up.weight
34: 7168 | 7168, 1, 1, 1 | F32 | blk.2.ffn_norm.weight
35: 512 | 512, 1, 1, 1 | F32 | blk.2.attn_kv_a_norm.weight
36: 4128768 | 7168, 576, 1, 1 | BF16 | blk.2.attn_kv_a_mqa.weight
37: 16777216 | 512, 32768, 1, 1 | BF16 | blk.2.attn_kv_b.weight
38: 8388608 | 128, 65536, 1, 1 | BF16 | blk.2.attn_k_b.weight
39: 8388608 | 512, 16384, 1, 1 | BF16 | blk.2.attn_v_b.weight
40: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.2.attn_output.weight
41: 1536 | 1536, 1, 1, 1 | F32 | blk.2.attn_q_a_norm.weight
42: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.2.attn_q_a.weight
43: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.2.attn_q_b.weight
44: 256 | 256, 1, 1, 1 | F32 | blk.3.exp_probs_b.bias
45: 1835008 | 7168, 256, 1, 1 | F32 | blk.3.ffn_gate_inp.weight
46: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.3.ffn_down_shexp.weight
47: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_gate_shexp.weight
48: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_up_shexp.weight
49: 512 | 512, 1, 1, 1 | F32 | blk.3.attn_kv_a_norm.weight
50: 4128768 | 7168, 576, 1, 1 | BF16 | blk.3.attn_kv_a_mqa.weight
51: 16777216 | 512, 32768, 1, 1 | BF16 | blk.3.attn_kv_b.weight
52: 8388608 | 128, 65536, 1, 1 | BF16 | blk.3.attn_k_b.weight
53: 8388608 | 512, 16384, 1, 1 | BF16 | blk.3.attn_v_b.weight
54: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.3.attn_output.weight
55: 1536 | 1536, 1, 1, 1 | F32 | blk.3.attn_q_a_norm.weight
56: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.3.attn_q_a.weight
57: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.3.attn_q_b.weight
58: 7168 | 7168, 1, 1, 1 | F32 | blk.3.attn_norm.weight
59: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.3.ffn_down_exps.weight
60: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_gate_exps.weight
61: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_up_exps.weight
62: 7168 | 7168, 1, 1, 1 | F32 | blk.3.ffn_norm.weight
63: 256 | 256, 1, 1, 1 | F32 | blk.4.exp_probs_b.bias
64: 1835008 | 7168, 256, 1, 1 | F32 | blk.4.ffn_gate_inp.weight
65: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.4.ffn_down_shexp.weight
66: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_gate_shexp.weight
67: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_up_shexp.weight
68: 512 | 512, 1, 1, 1 | F32 | blk.4.attn_kv_a_norm.weight
69: 4128768 | 7168, 576, 1, 1 | BF16 | blk.4.attn_kv_a_mqa.weight
70: 16777216 | 512, 32768, 1, 1 | BF16 | blk.4.attn_kv_b.weight
71: 8388608 | 128, 65536, 1, 1 | BF16 | blk.4.attn_k_b.weight
72: 8388608 | 512, 16384, 1, 1 | BF16 | blk.4.attn_v_b.weight
73: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.4.attn_output.weight
74: 1536 | 1536, 1, 1, 1 | F32 | blk.4.attn_q_a_norm.weight
75: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.4.attn_q_a.weight
76: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.4.attn_q_b.weight
77: 7168 | 7168, 1, 1, 1 | F32 | blk.4.attn_norm.weight
78: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.4.ffn_down_exps.weight
79: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.4.ffn_gate_exps.weight
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment