Created
July 12, 2025 23:20
-
-
Save ubergarm/d9a3e89355199fc34d8c75882bcc3ab4 to your computer and use it in GitHub Desktop.
Comparing evshiron+triton-cpu vs mainline casting (with or without triton-cpu) for DeepSeek fp8 safetensors to bf16 GGUF with ik/llama.cpp forks.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
* File is LITTLE endian, script is running on a LITTLE endian host. | |
* Dumping 51 key/value pair(s) | |
1: UINT32 | 1 | GGUF.version = 3 | |
2: UINT64 | 1 | GGUF.tensor_count = 79 | |
3: UINT64 | 1 | GGUF.kv_count = 48 | |
4: STRING | 1 | general.architecture = 'deepseek2' | |
5: STRING | 1 | general.type = 'model' | |
6: STRING | 1 | general.name = 'DeepSeek R1 0528' | |
7: STRING | 1 | general.version = '0528' | |
8: STRING | 1 | general.basename = 'DeepSeek-R1' | |
9: STRING | 1 | general.size_label = '256x21B' | |
10: UINT32 | 1 | deepseek2.block_count = 61 | |
11: UINT32 | 1 | deepseek2.context_length = 163840 | |
12: UINT32 | 1 | deepseek2.embedding_length = 7168 | |
13: UINT32 | 1 | deepseek2.feed_forward_length = 18432 | |
14: UINT32 | 1 | deepseek2.attention.head_count = 128 | |
15: UINT32 | 1 | deepseek2.attention.head_count_kv = 128 | |
16: FLOAT32 | 1 | deepseek2.rope.freq_base = 10000.0 | |
17: FLOAT32 | 1 | deepseek2.attention.layer_norm_rms_epsilon = 9.999999974752427e-07 | |
18: UINT32 | 1 | deepseek2.expert_used_count = 8 | |
19: UINT32 | 1 | general.file_type = 32 | |
20: UINT32 | 1 | deepseek2.leading_dense_block_count = 3 | |
21: UINT32 | 1 | deepseek2.vocab_size = 129280 | |
22: UINT32 | 1 | deepseek2.attention.q_lora_rank = 1536 | |
23: UINT32 | 1 | deepseek2.attention.kv_lora_rank = 512 | |
24: UINT32 | 1 | deepseek2.attention.key_length = 192 | |
25: UINT32 | 1 | deepseek2.attention.value_length = 128 | |
26: UINT32 | 1 | deepseek2.expert_feed_forward_length = 2048 | |
27: UINT32 | 1 | deepseek2.expert_count = 256 | |
28: UINT32 | 1 | deepseek2.expert_shared_count = 1 | |
29: FLOAT32 | 1 | deepseek2.expert_weights_scale = 2.5 | |
30: BOOL | 1 | deepseek2.expert_weights_norm = True | |
31: UINT32 | 1 | deepseek2.expert_gating_func = 2 | |
32: UINT32 | 1 | deepseek2.rope.dimension_count = 64 | |
33: STRING | 1 | deepseek2.rope.scaling.type = 'yarn' | |
34: FLOAT32 | 1 | deepseek2.rope.scaling.factor = 40.0 | |
35: UINT32 | 1 | deepseek2.rope.scaling.original_context_length = 4096 | |
36: FLOAT32 | 1 | deepseek2.rope.scaling.yarn_log_multiplier = 0.10000000149011612 | |
37: STRING | 1 | tokenizer.ggml.model = 'gpt2' | |
38: STRING | 1 | tokenizer.ggml.pre = 'deepseek-v3' | |
39: [STRING] | 129280 | tokenizer.ggml.tokens | |
40: [INT32] | 129280 | tokenizer.ggml.token_type | |
41: [STRING] | 127741 | tokenizer.ggml.merges | |
42: UINT32 | 1 | tokenizer.ggml.bos_token_id = 0 | |
43: UINT32 | 1 | tokenizer.ggml.eos_token_id = 1 | |
44: UINT32 | 1 | tokenizer.ggml.padding_token_id = 1 | |
45: BOOL | 1 | tokenizer.ggml.add_bos_token = True | |
46: BOOL | 1 | tokenizer.ggml.add_eos_token = False | |
47: STRING | 1 | tokenizer.chat_template = '{% if not add_generation_prompt is defined %}{% set add_gene' | |
48: UINT32 | 1 | general.quantization_version = 2 | |
49: UINT16 | 1 | split.no = 0 | |
50: UINT16 | 1 | split.count = 30 | |
51: INT32 | 1 | split.tensors.count = 1147 | |
* Dumping 79 tensor(s) | |
1: 926679040 | 7168, 129280, 1, 1 | BF16 | token_embd.weight | |
2: 7168 | 7168, 1, 1, 1 | F32 | blk.0.attn_norm.weight | |
3: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.0.ffn_down.weight | |
4: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_gate.weight | |
5: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_up.weight | |
6: 7168 | 7168, 1, 1, 1 | F32 | blk.0.ffn_norm.weight | |
7: 512 | 512, 1, 1, 1 | F32 | blk.0.attn_kv_a_norm.weight | |
8: 4128768 | 7168, 576, 1, 1 | BF16 | blk.0.attn_kv_a_mqa.weight | |
9: 16777216 | 512, 32768, 1, 1 | BF16 | blk.0.attn_kv_b.weight | |
10: 8388608 | 128, 65536, 1, 1 | BF16 | blk.0.attn_k_b.weight | |
11: 8388608 | 512, 16384, 1, 1 | BF16 | blk.0.attn_v_b.weight | |
12: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.0.attn_output.weight | |
13: 1536 | 1536, 1, 1, 1 | F32 | blk.0.attn_q_a_norm.weight | |
14: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.0.attn_q_a.weight | |
15: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.0.attn_q_b.weight | |
16: 7168 | 7168, 1, 1, 1 | F32 | blk.1.attn_norm.weight | |
17: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.1.ffn_down.weight | |
18: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_gate.weight | |
19: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_up.weight | |
20: 7168 | 7168, 1, 1, 1 | F32 | blk.1.ffn_norm.weight | |
21: 512 | 512, 1, 1, 1 | F32 | blk.1.attn_kv_a_norm.weight | |
22: 4128768 | 7168, 576, 1, 1 | BF16 | blk.1.attn_kv_a_mqa.weight | |
23: 16777216 | 512, 32768, 1, 1 | BF16 | blk.1.attn_kv_b.weight | |
24: 8388608 | 128, 65536, 1, 1 | BF16 | blk.1.attn_k_b.weight | |
25: 8388608 | 512, 16384, 1, 1 | BF16 | blk.1.attn_v_b.weight | |
26: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.1.attn_output.weight | |
27: 1536 | 1536, 1, 1, 1 | F32 | blk.1.attn_q_a_norm.weight | |
28: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.1.attn_q_a.weight | |
29: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.1.attn_q_b.weight | |
30: 7168 | 7168, 1, 1, 1 | F32 | blk.2.attn_norm.weight | |
31: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.2.ffn_down.weight | |
32: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_gate.weight | |
33: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_up.weight | |
34: 7168 | 7168, 1, 1, 1 | F32 | blk.2.ffn_norm.weight | |
35: 512 | 512, 1, 1, 1 | F32 | blk.2.attn_kv_a_norm.weight | |
36: 4128768 | 7168, 576, 1, 1 | BF16 | blk.2.attn_kv_a_mqa.weight | |
37: 16777216 | 512, 32768, 1, 1 | BF16 | blk.2.attn_kv_b.weight | |
38: 8388608 | 128, 65536, 1, 1 | BF16 | blk.2.attn_k_b.weight | |
39: 8388608 | 512, 16384, 1, 1 | BF16 | blk.2.attn_v_b.weight | |
40: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.2.attn_output.weight | |
41: 1536 | 1536, 1, 1, 1 | F32 | blk.2.attn_q_a_norm.weight | |
42: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.2.attn_q_a.weight | |
43: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.2.attn_q_b.weight | |
44: 256 | 256, 1, 1, 1 | F32 | blk.3.exp_probs_b.bias | |
45: 1835008 | 7168, 256, 1, 1 | F32 | blk.3.ffn_gate_inp.weight | |
46: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.3.ffn_down_shexp.weight | |
47: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_gate_shexp.weight | |
48: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_up_shexp.weight | |
49: 512 | 512, 1, 1, 1 | F32 | blk.3.attn_kv_a_norm.weight | |
50: 4128768 | 7168, 576, 1, 1 | BF16 | blk.3.attn_kv_a_mqa.weight | |
51: 16777216 | 512, 32768, 1, 1 | BF16 | blk.3.attn_kv_b.weight | |
52: 8388608 | 128, 65536, 1, 1 | BF16 | blk.3.attn_k_b.weight | |
53: 8388608 | 512, 16384, 1, 1 | BF16 | blk.3.attn_v_b.weight | |
54: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.3.attn_output.weight | |
55: 1536 | 1536, 1, 1, 1 | F32 | blk.3.attn_q_a_norm.weight | |
56: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.3.attn_q_a.weight | |
57: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.3.attn_q_b.weight | |
58: 7168 | 7168, 1, 1, 1 | F32 | blk.3.attn_norm.weight | |
59: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.3.ffn_down_exps.weight | |
60: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_gate_exps.weight | |
61: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_up_exps.weight | |
62: 7168 | 7168, 1, 1, 1 | F32 | blk.3.ffn_norm.weight | |
63: 256 | 256, 1, 1, 1 | F32 | blk.4.exp_probs_b.bias | |
64: 1835008 | 7168, 256, 1, 1 | F32 | blk.4.ffn_gate_inp.weight | |
65: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.4.ffn_down_shexp.weight | |
66: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_gate_shexp.weight | |
67: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_up_shexp.weight | |
68: 512 | 512, 1, 1, 1 | F32 | blk.4.attn_kv_a_norm.weight | |
69: 4128768 | 7168, 576, 1, 1 | BF16 | blk.4.attn_kv_a_mqa.weight | |
70: 16777216 | 512, 32768, 1, 1 | BF16 | blk.4.attn_kv_b.weight | |
71: 8388608 | 128, 65536, 1, 1 | BF16 | blk.4.attn_k_b.weight | |
72: 8388608 | 512, 16384, 1, 1 | BF16 | blk.4.attn_v_b.weight | |
73: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.4.attn_output.weight | |
74: 1536 | 1536, 1, 1, 1 | F32 | blk.4.attn_q_a_norm.weight | |
75: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.4.attn_q_a.weight | |
76: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.4.attn_q_b.weight | |
77: 7168 | 7168, 1, 1, 1 | F32 | blk.4.attn_norm.weight | |
78: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.4.ffn_down_exps.weight | |
79: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.4.ffn_gate_exps.weight |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
* File is LITTLE endian, script is running on a LITTLE endian host. | |
* Dumping 51 key/value pair(s) | |
1: UINT32 | 1 | GGUF.version = 3 | |
2: UINT64 | 1 | GGUF.tensor_count = 79 | |
3: UINT64 | 1 | GGUF.kv_count = 48 | |
4: STRING | 1 | general.architecture = 'deepseek2' | |
5: STRING | 1 | general.type = 'model' | |
6: STRING | 1 | general.name = 'DeepSeek R1 0528' | |
7: STRING | 1 | general.version = '0528' | |
8: STRING | 1 | general.basename = 'DeepSeek-R1' | |
9: STRING | 1 | general.size_label = '256x21B' | |
10: UINT32 | 1 | deepseek2.block_count = 61 | |
11: UINT32 | 1 | deepseek2.context_length = 163840 | |
12: UINT32 | 1 | deepseek2.embedding_length = 7168 | |
13: UINT32 | 1 | deepseek2.feed_forward_length = 18432 | |
14: UINT32 | 1 | deepseek2.attention.head_count = 128 | |
15: UINT32 | 1 | deepseek2.attention.head_count_kv = 128 | |
16: FLOAT32 | 1 | deepseek2.rope.freq_base = 10000.0 | |
17: FLOAT32 | 1 | deepseek2.attention.layer_norm_rms_epsilon = 9.999999974752427e-07 | |
18: UINT32 | 1 | deepseek2.expert_used_count = 8 | |
19: UINT32 | 1 | general.file_type = 32 | |
20: UINT32 | 1 | deepseek2.leading_dense_block_count = 3 | |
21: UINT32 | 1 | deepseek2.vocab_size = 129280 | |
22: UINT32 | 1 | deepseek2.attention.q_lora_rank = 1536 | |
23: UINT32 | 1 | deepseek2.attention.kv_lora_rank = 512 | |
24: UINT32 | 1 | deepseek2.attention.key_length = 192 | |
25: UINT32 | 1 | deepseek2.attention.value_length = 128 | |
26: UINT32 | 1 | deepseek2.expert_feed_forward_length = 2048 | |
27: UINT32 | 1 | deepseek2.expert_count = 256 | |
28: UINT32 | 1 | deepseek2.expert_shared_count = 1 | |
29: FLOAT32 | 1 | deepseek2.expert_weights_scale = 2.5 | |
30: BOOL | 1 | deepseek2.expert_weights_norm = True | |
31: UINT32 | 1 | deepseek2.expert_gating_func = 2 | |
32: UINT32 | 1 | deepseek2.rope.dimension_count = 64 | |
33: STRING | 1 | deepseek2.rope.scaling.type = 'yarn' | |
34: FLOAT32 | 1 | deepseek2.rope.scaling.factor = 40.0 | |
35: UINT32 | 1 | deepseek2.rope.scaling.original_context_length = 4096 | |
36: FLOAT32 | 1 | deepseek2.rope.scaling.yarn_log_multiplier = 0.10000000149011612 | |
37: STRING | 1 | tokenizer.ggml.model = 'gpt2' | |
38: STRING | 1 | tokenizer.ggml.pre = 'deepseek-v3' | |
39: [STRING] | 129280 | tokenizer.ggml.tokens | |
40: [INT32] | 129280 | tokenizer.ggml.token_type | |
41: [STRING] | 127741 | tokenizer.ggml.merges | |
42: UINT32 | 1 | tokenizer.ggml.bos_token_id = 0 | |
43: UINT32 | 1 | tokenizer.ggml.eos_token_id = 1 | |
44: UINT32 | 1 | tokenizer.ggml.padding_token_id = 1 | |
45: BOOL | 1 | tokenizer.ggml.add_bos_token = True | |
46: BOOL | 1 | tokenizer.ggml.add_eos_token = False | |
47: STRING | 1 | tokenizer.chat_template = '{% if not add_generation_prompt is defined %}{% set add_gene' | |
48: UINT32 | 1 | general.quantization_version = 2 | |
49: UINT16 | 1 | split.no = 0 | |
50: UINT16 | 1 | split.count = 30 | |
51: INT32 | 1 | split.tensors.count = 1147 | |
* Dumping 79 tensor(s) | |
1: 926679040 | 7168, 129280, 1, 1 | BF16 | token_embd.weight | |
2: 7168 | 7168, 1, 1, 1 | F32 | blk.0.attn_norm.weight | |
3: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.0.ffn_down.weight | |
4: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_gate.weight | |
5: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.0.ffn_up.weight | |
6: 7168 | 7168, 1, 1, 1 | F32 | blk.0.ffn_norm.weight | |
7: 512 | 512, 1, 1, 1 | F32 | blk.0.attn_kv_a_norm.weight | |
8: 4128768 | 7168, 576, 1, 1 | BF16 | blk.0.attn_kv_a_mqa.weight | |
9: 16777216 | 512, 32768, 1, 1 | BF16 | blk.0.attn_kv_b.weight | |
10: 8388608 | 128, 65536, 1, 1 | BF16 | blk.0.attn_k_b.weight | |
11: 8388608 | 512, 16384, 1, 1 | BF16 | blk.0.attn_v_b.weight | |
12: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.0.attn_output.weight | |
13: 1536 | 1536, 1, 1, 1 | F32 | blk.0.attn_q_a_norm.weight | |
14: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.0.attn_q_a.weight | |
15: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.0.attn_q_b.weight | |
16: 7168 | 7168, 1, 1, 1 | F32 | blk.1.attn_norm.weight | |
17: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.1.ffn_down.weight | |
18: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_gate.weight | |
19: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.1.ffn_up.weight | |
20: 7168 | 7168, 1, 1, 1 | F32 | blk.1.ffn_norm.weight | |
21: 512 | 512, 1, 1, 1 | F32 | blk.1.attn_kv_a_norm.weight | |
22: 4128768 | 7168, 576, 1, 1 | BF16 | blk.1.attn_kv_a_mqa.weight | |
23: 16777216 | 512, 32768, 1, 1 | BF16 | blk.1.attn_kv_b.weight | |
24: 8388608 | 128, 65536, 1, 1 | BF16 | blk.1.attn_k_b.weight | |
25: 8388608 | 512, 16384, 1, 1 | BF16 | blk.1.attn_v_b.weight | |
26: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.1.attn_output.weight | |
27: 1536 | 1536, 1, 1, 1 | F32 | blk.1.attn_q_a_norm.weight | |
28: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.1.attn_q_a.weight | |
29: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.1.attn_q_b.weight | |
30: 7168 | 7168, 1, 1, 1 | F32 | blk.2.attn_norm.weight | |
31: 132120576 | 18432, 7168, 1, 1 | BF16 | blk.2.ffn_down.weight | |
32: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_gate.weight | |
33: 132120576 | 7168, 18432, 1, 1 | BF16 | blk.2.ffn_up.weight | |
34: 7168 | 7168, 1, 1, 1 | F32 | blk.2.ffn_norm.weight | |
35: 512 | 512, 1, 1, 1 | F32 | blk.2.attn_kv_a_norm.weight | |
36: 4128768 | 7168, 576, 1, 1 | BF16 | blk.2.attn_kv_a_mqa.weight | |
37: 16777216 | 512, 32768, 1, 1 | BF16 | blk.2.attn_kv_b.weight | |
38: 8388608 | 128, 65536, 1, 1 | BF16 | blk.2.attn_k_b.weight | |
39: 8388608 | 512, 16384, 1, 1 | BF16 | blk.2.attn_v_b.weight | |
40: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.2.attn_output.weight | |
41: 1536 | 1536, 1, 1, 1 | F32 | blk.2.attn_q_a_norm.weight | |
42: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.2.attn_q_a.weight | |
43: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.2.attn_q_b.weight | |
44: 256 | 256, 1, 1, 1 | F32 | blk.3.exp_probs_b.bias | |
45: 1835008 | 7168, 256, 1, 1 | F32 | blk.3.ffn_gate_inp.weight | |
46: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.3.ffn_down_shexp.weight | |
47: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_gate_shexp.weight | |
48: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.3.ffn_up_shexp.weight | |
49: 512 | 512, 1, 1, 1 | F32 | blk.3.attn_kv_a_norm.weight | |
50: 4128768 | 7168, 576, 1, 1 | BF16 | blk.3.attn_kv_a_mqa.weight | |
51: 16777216 | 512, 32768, 1, 1 | BF16 | blk.3.attn_kv_b.weight | |
52: 8388608 | 128, 65536, 1, 1 | BF16 | blk.3.attn_k_b.weight | |
53: 8388608 | 512, 16384, 1, 1 | BF16 | blk.3.attn_v_b.weight | |
54: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.3.attn_output.weight | |
55: 1536 | 1536, 1, 1, 1 | F32 | blk.3.attn_q_a_norm.weight | |
56: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.3.attn_q_a.weight | |
57: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.3.attn_q_b.weight | |
58: 7168 | 7168, 1, 1, 1 | F32 | blk.3.attn_norm.weight | |
59: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.3.ffn_down_exps.weight | |
60: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_gate_exps.weight | |
61: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.3.ffn_up_exps.weight | |
62: 7168 | 7168, 1, 1, 1 | F32 | blk.3.ffn_norm.weight | |
63: 256 | 256, 1, 1, 1 | F32 | blk.4.exp_probs_b.bias | |
64: 1835008 | 7168, 256, 1, 1 | F32 | blk.4.ffn_gate_inp.weight | |
65: 14680064 | 2048, 7168, 1, 1 | BF16 | blk.4.ffn_down_shexp.weight | |
66: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_gate_shexp.weight | |
67: 14680064 | 7168, 2048, 1, 1 | BF16 | blk.4.ffn_up_shexp.weight | |
68: 512 | 512, 1, 1, 1 | F32 | blk.4.attn_kv_a_norm.weight | |
69: 4128768 | 7168, 576, 1, 1 | BF16 | blk.4.attn_kv_a_mqa.weight | |
70: 16777216 | 512, 32768, 1, 1 | BF16 | blk.4.attn_kv_b.weight | |
71: 8388608 | 128, 65536, 1, 1 | BF16 | blk.4.attn_k_b.weight | |
72: 8388608 | 512, 16384, 1, 1 | BF16 | blk.4.attn_v_b.weight | |
73: 117440512 | 16384, 7168, 1, 1 | BF16 | blk.4.attn_output.weight | |
74: 1536 | 1536, 1, 1, 1 | F32 | blk.4.attn_q_a_norm.weight | |
75: 11010048 | 7168, 1536, 1, 1 | BF16 | blk.4.attn_q_a.weight | |
76: 37748736 | 1536, 24576, 1, 1 | BF16 | blk.4.attn_q_b.weight | |
77: 7168 | 7168, 1, 1, 1 | F32 | blk.4.attn_norm.weight | |
78: 3758096384 | 2048, 7168, 256, 1 | BF16 | blk.4.ffn_down_exps.weight | |
79: 3758096384 | 7168, 2048, 256, 1 | BF16 | blk.4.ffn_gate_exps.weight |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment