ubergarm · July 12, 2025 23:20
diff --git a/bf16-evshiron.log b/bf16-evshiron.log
 * File is LITTLE endian, script is running on a LITTLE endian host.
 * Dumping 51 key/value pair(s)
      1: UINT32     |        1 | GGUF.version = 3
      2: UINT64     |        1 | GGUF.tensor_count = 79
      3: UINT64     |        1 | GGUF.kv_count = 48
      4: STRING     |        1 | general.architecture = 'deepseek2'
      5: STRING     |        1 | general.type = 'model'
      6: STRING     |        1 | general.name = 'DeepSeek R1 0528'
      7: STRING     |        1 | general.version = '0528'
      8: STRING     |        1 | general.basename = 'DeepSeek-R1'
      9: STRING     |        1 | general.size_label = '256x21B'
     10: UINT32     |        1 | deepseek2.block_count = 61
     11: UINT32     |        1 | deepseek2.context_length = 163840
     12: UINT32     |        1 | deepseek2.embedding_length = 7168
     13: UINT32     |        1 | deepseek2.feed_forward_length = 18432
     14: UINT32     |        1 | deepseek2.attention.head_count = 128
     15: UINT32     |        1 | deepseek2.attention.head_count_kv = 128
     16: FLOAT32    |        1 | deepseek2.rope.freq_base = 10000.0
     17: FLOAT32    |        1 | deepseek2.attention.layer_norm_rms_epsilon = 9.999999974752427e-07
     18: UINT32     |        1 | deepseek2.expert_used_count = 8
     19: UINT32     |        1 | general.file_type = 32
     20: UINT32     |        1 | deepseek2.leading_dense_block_count = 3
     21: UINT32     |        1 | deepseek2.vocab_size = 129280
     22: UINT32     |        1 | deepseek2.attention.q_lora_rank = 1536
     23: UINT32     |        1 | deepseek2.attention.kv_lora_rank = 512
     24: UINT32     |        1 | deepseek2.attention.key_length = 192
     25: UINT32     |        1 | deepseek2.attention.value_length = 128
     26: UINT32     |        1 | deepseek2.expert_feed_forward_length = 2048
     27: UINT32     |        1 | deepseek2.expert_count = 256
     28: UINT32     |        1 | deepseek2.expert_shared_count = 1
     29: FLOAT32    |        1 | deepseek2.expert_weights_scale = 2.5
     30: BOOL       |        1 | deepseek2.expert_weights_norm = True
     31: UINT32     |        1 | deepseek2.expert_gating_func = 2
     32: UINT32     |        1 | deepseek2.rope.dimension_count = 64
     33: STRING     |        1 | deepseek2.rope.scaling.type = 'yarn'
     34: FLOAT32    |        1 | deepseek2.rope.scaling.factor = 40.0
     35: UINT32     |        1 | deepseek2.rope.scaling.original_context_length = 4096
     36: FLOAT32    |        1 | deepseek2.rope.scaling.yarn_log_multiplier = 0.10000000149011612
     37: STRING     |        1 | tokenizer.ggml.model = 'gpt2'
     38: STRING     |        1 | tokenizer.ggml.pre = 'deepseek-v3'
     39: [STRING]   |   129280 | tokenizer.ggml.tokens
     40: [INT32]    |   129280 | tokenizer.ggml.token_type
     41: [STRING]   |   127741 | tokenizer.ggml.merges
     42: UINT32     |        1 | tokenizer.ggml.bos_token_id = 0
     43: UINT32     |        1 | tokenizer.ggml.eos_token_id = 1
     44: UINT32     |        1 | tokenizer.ggml.padding_token_id = 1
     45: BOOL       |        1 | tokenizer.ggml.add_bos_token = True
     46: BOOL       |        1 | tokenizer.ggml.add_eos_token = False
     47: STRING     |        1 | tokenizer.chat_template = '{% if not add_generation_prompt is defined %}{% set add_gene'
     48: UINT32     |        1 | general.quantization_version = 2
     49: UINT16     |        1 | split.no = 0
     50: UINT16     |        1 | split.count = 30
     51: INT32      |        1 | split.tensors.count = 1147
 * Dumping 79 tensor(s)
      1:  926679040 |  7168, 129280,     1,     1 | BF16    | token_embd.weight
      2:       7168 |  7168,     1,     1,     1 | F32     | blk.0.attn_norm.weight
      3:  132120576 | 18432,  7168,     1,     1 | BF16    | blk.0.ffn_down.weight
      4:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.0.ffn_gate.weight
      5:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.0.ffn_up.weight
      6:       7168 |  7168,     1,     1,     1 | F32     | blk.0.ffn_norm.weight
      7:        512 |   512,     1,     1,     1 | F32     | blk.0.attn_kv_a_norm.weight
      8:    4128768 |  7168,   576,     1,     1 | BF16    | blk.0.attn_kv_a_mqa.weight
      9:   16777216 |   512, 32768,     1,     1 | BF16    | blk.0.attn_kv_b.weight
     10:    8388608 |   128, 65536,     1,     1 | BF16    | blk.0.attn_k_b.weight
     11:    8388608 |   512, 16384,     1,     1 | BF16    | blk.0.attn_v_b.weight
     12:  117440512 | 16384,  7168,     1,     1 | BF16    | blk.0.attn_output.weight
     13:       1536 |  1536,     1,     1,     1 | F32     | blk.0.attn_q_a_norm.weight
     14:   11010048 |  7168,  1536,     1,     1 | BF16    | blk.0.attn_q_a.weight
     15:   37748736 |  1536, 24576,     1,     1 | BF16    | blk.0.attn_q_b.weight
     16:       7168 |  7168,     1,     1,     1 | F32     | blk.1.attn_norm.weight
     17:  132120576 | 18432,  7168,     1,     1 | BF16    | blk.1.ffn_down.weight
     18:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.1.ffn_gate.weight
     19:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.1.ffn_up.weight
     20:       7168 |  7168,     1,     1,     1 | F32     | blk.1.ffn_norm.weight
     21:        512 |   512,     1,     1,     1 | F32     | blk.1.attn_kv_a_norm.weight
     22:    4128768 |  7168,   576,     1,     1 | BF16    | blk.1.attn_kv_a_mqa.weight
     23:   16777216 |   512, 32768,     1,     1 | BF16    | blk.1.attn_kv_b.weight
     24:    8388608 |   128, 65536,     1,     1 | BF16    | blk.1.attn_k_b.weight
     25:    8388608 |   512, 16384,     1,     1 | BF16    | blk.1.attn_v_b.weight
     26:  117440512 | 16384,  7168,     1,     1 | BF16    | blk.1.attn_output.weight
     27:       1536 |  1536,     1,     1,     1 | F32     | blk.1.attn_q_a_norm.weight
     28:   11010048 |  7168,  1536,     1,     1 | BF16    | blk.1.attn_q_a.weight
     29:   37748736 |  1536, 24576,     1,     1 | BF16    | blk.1.attn_q_b.weight
     30:       7168 |  7168,     1,     1,     1 | F32     | blk.2.attn_norm.weight
     31:  132120576 | 18432,  7168,     1,     1 | BF16    | blk.2.ffn_down.weight
     32:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.2.ffn_gate.weight
     33:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.2.ffn_up.weight
     34:       7168 |  7168,     1,     1,     1 | F32     | blk.2.ffn_norm.weight
     35:        512 |   512,     1,     1,     1 | F32     | blk.2.attn_kv_a_norm.weight
     36:    4128768 |  7168,   576,     1,     1 | BF16    | blk.2.attn_kv_a_mqa.weight
     37:   16777216 |   512, 32768,     1,     1 | BF16    | blk.2.attn_kv_b.weight
     38:    8388608 |   128, 65536,     1,     1 | BF16    | blk.2.attn_k_b.weight
     39:    8388608 |   512, 16384,     1,     1 | BF16    | blk.2.attn_v_b.weight
     40:  117440512 | 16384,  7168,     1,     1 | BF16    | blk.2.attn_output.weight
     41:       1536 |  1536,     1,     1,     1 | F32     | blk.2.attn_q_a_norm.weight
     42:   11010048 |  7168,  1536,     1,     1 | BF16    | blk.2.attn_q_a.weight
     43:   37748736 |  1536, 24576,     1,     1 | BF16    | blk.2.attn_q_b.weight
     44:        256 |   256,     1,     1,     1 | F32     | blk.3.exp_probs_b.bias
     45:    1835008 |  7168,   256,     1,     1 | F32     | blk.3.ffn_gate_inp.weight
     46:   14680064 |  2048,  7168,     1,     1 | BF16    | blk.3.ffn_down_shexp.weight
     47:   14680064 |  7168,  2048,     1,     1 | BF16    | blk.3.ffn_gate_shexp.weight
     48:   14680064 |  7168,  2048,     1,     1 | BF16    | blk.3.ffn_up_shexp.weight
     49:        512 |   512,     1,     1,     1 | F32     | blk.3.attn_kv_a_norm.weight
     50:    4128768 |  7168,   576,     1,     1 | BF16    | blk.3.attn_kv_a_mqa.weight
     51:   16777216 |   512, 32768,     1,     1 | BF16    | blk.3.attn_kv_b.weight
     52:    8388608 |   128, 65536,     1,     1 | BF16    | blk.3.attn_k_b.weight
     53:    8388608 |   512, 16384,     1,     1 | BF16    | blk.3.attn_v_b.weight
     54:  117440512 | 16384,  7168,     1,     1 | BF16    | blk.3.attn_output.weight
     55:       1536 |  1536,     1,     1,     1 | F32     | blk.3.attn_q_a_norm.weight
     56:   11010048 |  7168,  1536,     1,     1 | BF16    | blk.3.attn_q_a.weight
     57:   37748736 |  1536, 24576,     1,     1 | BF16    | blk.3.attn_q_b.weight
     58:       7168 |  7168,     1,     1,     1 | F32     | blk.3.attn_norm.weight
     59: 3758096384 |  2048,  7168,   256,     1 | BF16    | blk.3.ffn_down_exps.weight
     60: 3758096384 |  7168,  2048,   256,     1 | BF16    | blk.3.ffn_gate_exps.weight
     61: 3758096384 |  7168,  2048,   256,     1 | BF16    | blk.3.ffn_up_exps.weight
     62:       7168 |  7168,     1,     1,     1 | F32     | blk.3.ffn_norm.weight
     63:        256 |   256,     1,     1,     1 | F32     | blk.4.exp_probs_b.bias
     64:    1835008 |  7168,   256,     1,     1 | F32     | blk.4.ffn_gate_inp.weight
     65:   14680064 |  2048,  7168,     1,     1 | BF16    | blk.4.ffn_down_shexp.weight
     66:   14680064 |  7168,  2048,     1,     1 | BF16    | blk.4.ffn_gate_shexp.weight
     67:   14680064 |  7168,  2048,     1,     1 | BF16    | blk.4.ffn_up_shexp.weight
     68:        512 |   512,     1,     1,     1 | F32     | blk.4.attn_kv_a_norm.weight
     69:    4128768 |  7168,   576,     1,     1 | BF16    | blk.4.attn_kv_a_mqa.weight
     70:   16777216 |   512, 32768,     1,     1 | BF16    | blk.4.attn_kv_b.weight
     71:    8388608 |   128, 65536,     1,     1 | BF16    | blk.4.attn_k_b.weight
     72:    8388608 |   512, 16384,     1,     1 | BF16    | blk.4.attn_v_b.weight
     73:  117440512 | 16384,  7168,     1,     1 | BF16    | blk.4.attn_output.weight
     74:       1536 |  1536,     1,     1,     1 | F32     | blk.4.attn_q_a_norm.weight
     75:   11010048 |  7168,  1536,     1,     1 | BF16    | blk.4.attn_q_a.weight
     76:   37748736 |  1536, 24576,     1,     1 | BF16    | blk.4.attn_q_b.weight
     77:       7168 |  7168,     1,     1,     1 | F32     | blk.4.attn_norm.weight
     78: 3758096384 |  2048,  7168,   256,     1 | BF16    | blk.4.ffn_down_exps.weight
     79: 3758096384 |  7168,  2048,   256,     1 | BF16    | blk.4.ffn_gate_exps.weight
diff --git a/mainline-two-step-cast.log b/mainline-two-step-cast.log
 * File is LITTLE endian, script is running on a LITTLE endian host.
 * Dumping 51 key/value pair(s)
      1: UINT32     |        1 | GGUF.version = 3
      2: UINT64     |        1 | GGUF.tensor_count = 79
      3: UINT64     |        1 | GGUF.kv_count = 48
      4: STRING     |        1 | general.architecture = 'deepseek2'
      5: STRING     |        1 | general.type = 'model'
      6: STRING     |        1 | general.name = 'DeepSeek R1 0528'
      7: STRING     |        1 | general.version = '0528'
      8: STRING     |        1 | general.basename = 'DeepSeek-R1'
      9: STRING     |        1 | general.size_label = '256x21B'
     10: UINT32     |        1 | deepseek2.block_count = 61
     11: UINT32     |        1 | deepseek2.context_length = 163840
     12: UINT32     |        1 | deepseek2.embedding_length = 7168
     13: UINT32     |        1 | deepseek2.feed_forward_length = 18432
     14: UINT32     |        1 | deepseek2.attention.head_count = 128
     15: UINT32     |        1 | deepseek2.attention.head_count_kv = 128
     16: FLOAT32    |        1 | deepseek2.rope.freq_base = 10000.0
     17: FLOAT32    |        1 | deepseek2.attention.layer_norm_rms_epsilon = 9.999999974752427e-07
     18: UINT32     |        1 | deepseek2.expert_used_count = 8
     19: UINT32     |        1 | general.file_type = 32
     20: UINT32     |        1 | deepseek2.leading_dense_block_count = 3
     21: UINT32     |        1 | deepseek2.vocab_size = 129280
     22: UINT32     |        1 | deepseek2.attention.q_lora_rank = 1536
     23: UINT32     |        1 | deepseek2.attention.kv_lora_rank = 512
     24: UINT32     |        1 | deepseek2.attention.key_length = 192
     25: UINT32     |        1 | deepseek2.attention.value_length = 128
     26: UINT32     |        1 | deepseek2.expert_feed_forward_length = 2048
     27: UINT32     |        1 | deepseek2.expert_count = 256
     28: UINT32     |        1 | deepseek2.expert_shared_count = 1
     29: FLOAT32    |        1 | deepseek2.expert_weights_scale = 2.5
     30: BOOL       |        1 | deepseek2.expert_weights_norm = True
     31: UINT32     |        1 | deepseek2.expert_gating_func = 2
     32: UINT32     |        1 | deepseek2.rope.dimension_count = 64
     33: STRING     |        1 | deepseek2.rope.scaling.type = 'yarn'
     34: FLOAT32    |        1 | deepseek2.rope.scaling.factor = 40.0
     35: UINT32     |        1 | deepseek2.rope.scaling.original_context_length = 4096
     36: FLOAT32    |        1 | deepseek2.rope.scaling.yarn_log_multiplier = 0.10000000149011612
     37: STRING     |        1 | tokenizer.ggml.model = 'gpt2'
     38: STRING     |        1 | tokenizer.ggml.pre = 'deepseek-v3'
     39: [STRING]   |   129280 | tokenizer.ggml.tokens
     40: [INT32]    |   129280 | tokenizer.ggml.token_type
     41: [STRING]   |   127741 | tokenizer.ggml.merges
     42: UINT32     |        1 | tokenizer.ggml.bos_token_id = 0
     43: UINT32     |        1 | tokenizer.ggml.eos_token_id = 1
     44: UINT32     |        1 | tokenizer.ggml.padding_token_id = 1
     45: BOOL       |        1 | tokenizer.ggml.add_bos_token = True
     46: BOOL       |        1 | tokenizer.ggml.add_eos_token = False
     47: STRING     |        1 | tokenizer.chat_template = '{% if not add_generation_prompt is defined %}{% set add_gene'
     48: UINT32     |        1 | general.quantization_version = 2
     49: UINT16     |        1 | split.no = 0
     50: UINT16     |        1 | split.count = 30
     51: INT32      |        1 | split.tensors.count = 1147
 * Dumping 79 tensor(s)
      1:  926679040 |  7168, 129280,     1,     1 | BF16    | token_embd.weight
      2:       7168 |  7168,     1,     1,     1 | F32     | blk.0.attn_norm.weight
      3:  132120576 | 18432,  7168,     1,     1 | BF16    | blk.0.ffn_down.weight
      4:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.0.ffn_gate.weight
      5:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.0.ffn_up.weight
      6:       7168 |  7168,     1,     1,     1 | F32     | blk.0.ffn_norm.weight
      7:        512 |   512,     1,     1,     1 | F32     | blk.0.attn_kv_a_norm.weight
      8:    4128768 |  7168,   576,     1,     1 | BF16    | blk.0.attn_kv_a_mqa.weight
      9:   16777216 |   512, 32768,     1,     1 | BF16    | blk.0.attn_kv_b.weight
     10:    8388608 |   128, 65536,     1,     1 | BF16    | blk.0.attn_k_b.weight
     11:    8388608 |   512, 16384,     1,     1 | BF16    | blk.0.attn_v_b.weight
     12:  117440512 | 16384,  7168,     1,     1 | BF16    | blk.0.attn_output.weight
     13:       1536 |  1536,     1,     1,     1 | F32     | blk.0.attn_q_a_norm.weight
     14:   11010048 |  7168,  1536,     1,     1 | BF16    | blk.0.attn_q_a.weight
     15:   37748736 |  1536, 24576,     1,     1 | BF16    | blk.0.attn_q_b.weight
     16:       7168 |  7168,     1,     1,     1 | F32     | blk.1.attn_norm.weight
     17:  132120576 | 18432,  7168,     1,     1 | BF16    | blk.1.ffn_down.weight
     18:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.1.ffn_gate.weight
     19:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.1.ffn_up.weight
     20:       7168 |  7168,     1,     1,     1 | F32     | blk.1.ffn_norm.weight
     21:        512 |   512,     1,     1,     1 | F32     | blk.1.attn_kv_a_norm.weight
     22:    4128768 |  7168,   576,     1,     1 | BF16    | blk.1.attn_kv_a_mqa.weight
     23:   16777216 |   512, 32768,     1,     1 | BF16    | blk.1.attn_kv_b.weight
     24:    8388608 |   128, 65536,     1,     1 | BF16    | blk.1.attn_k_b.weight
     25:    8388608 |   512, 16384,     1,     1 | BF16    | blk.1.attn_v_b.weight
     26:  117440512 | 16384,  7168,     1,     1 | BF16    | blk.1.attn_output.weight
     27:       1536 |  1536,     1,     1,     1 | F32     | blk.1.attn_q_a_norm.weight
     28:   11010048 |  7168,  1536,     1,     1 | BF16    | blk.1.attn_q_a.weight
     29:   37748736 |  1536, 24576,     1,     1 | BF16    | blk.1.attn_q_b.weight
     30:       7168 |  7168,     1,     1,     1 | F32     | blk.2.attn_norm.weight
     31:  132120576 | 18432,  7168,     1,     1 | BF16    | blk.2.ffn_down.weight
     32:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.2.ffn_gate.weight
     33:  132120576 |  7168, 18432,     1,     1 | BF16    | blk.2.ffn_up.weight
     34:       7168 |  7168,     1,     1,     1 | F32     | blk.2.ffn_norm.weight
     35:        512 |   512,     1,     1,     1 | F32     | blk.2.attn_kv_a_norm.weight
     36:    4128768 |  7168,   576,     1,     1 | BF16    | blk.2.attn_kv_a_mqa.weight
     37:   16777216 |   512, 32768,     1,     1 | BF16    | blk.2.attn_kv_b.weight
     38:    8388608 |   128, 65536,     1,     1 | BF16    | blk.2.attn_k_b.weight
     39:    8388608 |   512, 16384,     1,     1 | BF16    | blk.2.attn_v_b.weight
     40:  117440512 | 16384,  7168,     1,     1 | BF16    | blk.2.attn_output.weight
     41:       1536 |  1536,     1,     1,     1 | F32     | blk.2.attn_q_a_norm.weight
     42:   11010048 |  7168,  1536,     1,     1 | BF16    | blk.2.attn_q_a.weight
     43:   37748736 |  1536, 24576,     1,     1 | BF16    | blk.2.attn_q_b.weight
     44:        256 |   256,     1,     1,     1 | F32     | blk.3.exp_probs_b.bias
     45:    1835008 |  7168,   256,     1,     1 | F32     | blk.3.ffn_gate_inp.weight
     46:   14680064 |  2048,  7168,     1,     1 | BF16    | blk.3.ffn_down_shexp.weight
     47:   14680064 |  7168,  2048,     1,     1 | BF16    | blk.3.ffn_gate_shexp.weight
     48:   14680064 |  7168,  2048,     1,     1 | BF16    | blk.3.ffn_up_shexp.weight
     49:        512 |   512,     1,     1,     1 | F32     | blk.3.attn_kv_a_norm.weight
     50:    4128768 |  7168,   576,     1,     1 | BF16    | blk.3.attn_kv_a_mqa.weight
     51:   16777216 |   512, 32768,     1,     1 | BF16    | blk.3.attn_kv_b.weight
     52:    8388608 |   128, 65536,     1,     1 | BF16    | blk.3.attn_k_b.weight
     53:    8388608 |   512, 16384,     1,     1 | BF16    | blk.3.attn_v_b.weight
     54:  117440512 | 16384,  7168,     1,     1 | BF16    | blk.3.attn_output.weight
     55:       1536 |  1536,     1,     1,     1 | F32     | blk.3.attn_q_a_norm.weight
     56:   11010048 |  7168,  1536,     1,     1 | BF16    | blk.3.attn_q_a.weight
     57:   37748736 |  1536, 24576,     1,     1 | BF16    | blk.3.attn_q_b.weight
     58:       7168 |  7168,     1,     1,     1 | F32     | blk.3.attn_norm.weight
     59: 3758096384 |  2048,  7168,   256,     1 | BF16    | blk.3.ffn_down_exps.weight
     60: 3758096384 |  7168,  2048,   256,     1 | BF16    | blk.3.ffn_gate_exps.weight
     61: 3758096384 |  7168,  2048,   256,     1 | BF16    | blk.3.ffn_up_exps.weight
     62:       7168 |  7168,     1,     1,     1 | F32     | blk.3.ffn_norm.weight
     63:        256 |   256,     1,     1,     1 | F32     | blk.4.exp_probs_b.bias
     64:    1835008 |  7168,   256,     1,     1 | F32     | blk.4.ffn_gate_inp.weight
     65:   14680064 |  2048,  7168,     1,     1 | BF16    | blk.4.ffn_down_shexp.weight
     66:   14680064 |  7168,  2048,     1,     1 | BF16    | blk.4.ffn_gate_shexp.weight
     67:   14680064 |  7168,  2048,     1,     1 | BF16    | blk.4.ffn_up_shexp.weight
     68:        512 |   512,     1,     1,     1 | F32     | blk.4.attn_kv_a_norm.weight
     69:    4128768 |  7168,   576,     1,     1 | BF16    | blk.4.attn_kv_a_mqa.weight
     70:   16777216 |   512, 32768,     1,     1 | BF16    | blk.4.attn_kv_b.weight
     71:    8388608 |   128, 65536,     1,     1 | BF16    | blk.4.attn_k_b.weight
     72:    8388608 |   512, 16384,     1,     1 | BF16    | blk.4.attn_v_b.weight
     73:  117440512 | 16384,  7168,     1,     1 | BF16    | blk.4.attn_output.weight
     74:       1536 |  1536,     1,     1,     1 | F32     | blk.4.attn_q_a_norm.weight
     75:   11010048 |  7168,  1536,     1,     1 | BF16    | blk.4.attn_q_a.weight
     76:   37748736 |  1536, 24576,     1,     1 | BF16    | blk.4.attn_q_b.weight
     77:       7168 |  7168,     1,     1,     1 | F32     | blk.4.attn_norm.weight
     78: 3758096384 |  2048,  7168,   256,     1 | BF16    | blk.4.ffn_down_exps.weight
     79: 3758096384 |  7168,  2048,   256,     1 | BF16    | blk.4.ffn_gate_exps.weight
	* File is LITTLE endian, script is running on a LITTLE endian host.
	* Dumping 51 key/value pair(s)
	1: UINT32 \| 1 \| GGUF.version = 3
	2: UINT64 \| 1 \| GGUF.tensor_count = 79
	3: UINT64 \| 1 \| GGUF.kv_count = 48
	4: STRING \| 1 \| general.architecture = 'deepseek2'
	5: STRING \| 1 \| general.type = 'model'
	6: STRING \| 1 \| general.name = 'DeepSeek R1 0528'
	7: STRING \| 1 \| general.version = '0528'
	8: STRING \| 1 \| general.basename = 'DeepSeek-R1'
	9: STRING \| 1 \| general.size_label = '256x21B'
	10: UINT32 \| 1 \| deepseek2.block_count = 61
	11: UINT32 \| 1 \| deepseek2.context_length = 163840
	12: UINT32 \| 1 \| deepseek2.embedding_length = 7168
	13: UINT32 \| 1 \| deepseek2.feed_forward_length = 18432
	14: UINT32 \| 1 \| deepseek2.attention.head_count = 128
	15: UINT32 \| 1 \| deepseek2.attention.head_count_kv = 128
	16: FLOAT32 \| 1 \| deepseek2.rope.freq_base = 10000.0
	17: FLOAT32 \| 1 \| deepseek2.attention.layer_norm_rms_epsilon = 9.999999974752427e-07
	18: UINT32 \| 1 \| deepseek2.expert_used_count = 8
	19: UINT32 \| 1 \| general.file_type = 32
	20: UINT32 \| 1 \| deepseek2.leading_dense_block_count = 3
	21: UINT32 \| 1 \| deepseek2.vocab_size = 129280
	22: UINT32 \| 1 \| deepseek2.attention.q_lora_rank = 1536
	23: UINT32 \| 1 \| deepseek2.attention.kv_lora_rank = 512
	24: UINT32 \| 1 \| deepseek2.attention.key_length = 192
	25: UINT32 \| 1 \| deepseek2.attention.value_length = 128
	26: UINT32 \| 1 \| deepseek2.expert_feed_forward_length = 2048
	27: UINT32 \| 1 \| deepseek2.expert_count = 256
	28: UINT32 \| 1 \| deepseek2.expert_shared_count = 1
	29: FLOAT32 \| 1 \| deepseek2.expert_weights_scale = 2.5
	30: BOOL \| 1 \| deepseek2.expert_weights_norm = True
	31: UINT32 \| 1 \| deepseek2.expert_gating_func = 2
	32: UINT32 \| 1 \| deepseek2.rope.dimension_count = 64
	33: STRING \| 1 \| deepseek2.rope.scaling.type = 'yarn'
	34: FLOAT32 \| 1 \| deepseek2.rope.scaling.factor = 40.0
	35: UINT32 \| 1 \| deepseek2.rope.scaling.original_context_length = 4096
	36: FLOAT32 \| 1 \| deepseek2.rope.scaling.yarn_log_multiplier = 0.10000000149011612
	37: STRING \| 1 \| tokenizer.ggml.model = 'gpt2'
	38: STRING \| 1 \| tokenizer.ggml.pre = 'deepseek-v3'
	39: [STRING] \| 129280 \| tokenizer.ggml.tokens
	40: [INT32] \| 129280 \| tokenizer.ggml.token_type
	41: [STRING] \| 127741 \| tokenizer.ggml.merges
	42: UINT32 \| 1 \| tokenizer.ggml.bos_token_id = 0
	43: UINT32 \| 1 \| tokenizer.ggml.eos_token_id = 1
	44: UINT32 \| 1 \| tokenizer.ggml.padding_token_id = 1
	45: BOOL \| 1 \| tokenizer.ggml.add_bos_token = True
	46: BOOL \| 1 \| tokenizer.ggml.add_eos_token = False
	47: STRING \| 1 \| tokenizer.chat_template = '{% if not add_generation_prompt is defined %}{% set add_gene'
	48: UINT32 \| 1 \| general.quantization_version = 2
	49: UINT16 \| 1 \| split.no = 0
	50: UINT16 \| 1 \| split.count = 30
	51: INT32 \| 1 \| split.tensors.count = 1147
	* Dumping 79 tensor(s)
	1: 926679040 \| 7168, 129280, 1, 1 \| BF16 \| token_embd.weight
	2: 7168 \| 7168, 1, 1, 1 \| F32 \| blk.0.attn_norm.weight
	3: 132120576 \| 18432, 7168, 1, 1 \| BF16 \| blk.0.ffn_down.weight
	4: 132120576 \| 7168, 18432, 1, 1 \| BF16 \| blk.0.ffn_gate.weight
	5: 132120576 \| 7168, 18432, 1, 1 \| BF16 \| blk.0.ffn_up.weight
	6: 7168 \| 7168, 1, 1, 1 \| F32 \| blk.0.ffn_norm.weight
	7: 512 \| 512, 1, 1, 1 \| F32 \| blk.0.attn_kv_a_norm.weight
	8: 4128768 \| 7168, 576, 1, 1 \| BF16 \| blk.0.attn_kv_a_mqa.weight
	9: 16777216 \| 512, 32768, 1, 1 \| BF16 \| blk.0.attn_kv_b.weight
	10: 8388608 \| 128, 65536, 1, 1 \| BF16 \| blk.0.attn_k_b.weight
	11: 8388608 \| 512, 16384, 1, 1 \| BF16 \| blk.0.attn_v_b.weight
	12: 117440512 \| 16384, 7168, 1, 1 \| BF16 \| blk.0.attn_output.weight
	13: 1536 \| 1536, 1, 1, 1 \| F32 \| blk.0.attn_q_a_norm.weight
	14: 11010048 \| 7168, 1536, 1, 1 \| BF16 \| blk.0.attn_q_a.weight
	15: 37748736 \| 1536, 24576, 1, 1 \| BF16 \| blk.0.attn_q_b.weight
	16: 7168 \| 7168, 1, 1, 1 \| F32 \| blk.1.attn_norm.weight
	17: 132120576 \| 18432, 7168, 1, 1 \| BF16 \| blk.1.ffn_down.weight
	18: 132120576 \| 7168, 18432, 1, 1 \| BF16 \| blk.1.ffn_gate.weight
	19: 132120576 \| 7168, 18432, 1, 1 \| BF16 \| blk.1.ffn_up.weight
	20: 7168 \| 7168, 1, 1, 1 \| F32 \| blk.1.ffn_norm.weight
	21: 512 \| 512, 1, 1, 1 \| F32 \| blk.1.attn_kv_a_norm.weight
	22: 4128768 \| 7168, 576, 1, 1 \| BF16 \| blk.1.attn_kv_a_mqa.weight
	23: 16777216 \| 512, 32768, 1, 1 \| BF16 \| blk.1.attn_kv_b.weight
	24: 8388608 \| 128, 65536, 1, 1 \| BF16 \| blk.1.attn_k_b.weight
	25: 8388608 \| 512, 16384, 1, 1 \| BF16 \| blk.1.attn_v_b.weight
	26: 117440512 \| 16384, 7168, 1, 1 \| BF16 \| blk.1.attn_output.weight
	27: 1536 \| 1536, 1, 1, 1 \| F32 \| blk.1.attn_q_a_norm.weight
	28: 11010048 \| 7168, 1536, 1, 1 \| BF16 \| blk.1.attn_q_a.weight
	29: 37748736 \| 1536, 24576, 1, 1 \| BF16 \| blk.1.attn_q_b.weight
	30: 7168 \| 7168, 1, 1, 1 \| F32 \| blk.2.attn_norm.weight
	31: 132120576 \| 18432, 7168, 1, 1 \| BF16 \| blk.2.ffn_down.weight
	32: 132120576 \| 7168, 18432, 1, 1 \| BF16 \| blk.2.ffn_gate.weight
	33: 132120576 \| 7168, 18432, 1, 1 \| BF16 \| blk.2.ffn_up.weight
	34: 7168 \| 7168, 1, 1, 1 \| F32 \| blk.2.ffn_norm.weight
	35: 512 \| 512, 1, 1, 1 \| F32 \| blk.2.attn_kv_a_norm.weight
	36: 4128768 \| 7168, 576, 1, 1 \| BF16 \| blk.2.attn_kv_a_mqa.weight
	37: 16777216 \| 512, 32768, 1, 1 \| BF16 \| blk.2.attn_kv_b.weight
	38: 8388608 \| 128, 65536, 1, 1 \| BF16 \| blk.2.attn_k_b.weight
	39: 8388608 \| 512, 16384, 1, 1 \| BF16 \| blk.2.attn_v_b.weight
	40: 117440512 \| 16384, 7168, 1, 1 \| BF16 \| blk.2.attn_output.weight
	41: 1536 \| 1536, 1, 1, 1 \| F32 \| blk.2.attn_q_a_norm.weight
	42: 11010048 \| 7168, 1536, 1, 1 \| BF16 \| blk.2.attn_q_a.weight
	43: 37748736 \| 1536, 24576, 1, 1 \| BF16 \| blk.2.attn_q_b.weight
	44: 256 \| 256, 1, 1, 1 \| F32 \| blk.3.exp_probs_b.bias
	45: 1835008 \| 7168, 256, 1, 1 \| F32 \| blk.3.ffn_gate_inp.weight
	46: 14680064 \| 2048, 7168, 1, 1 \| BF16 \| blk.3.ffn_down_shexp.weight
	47: 14680064 \| 7168, 2048, 1, 1 \| BF16 \| blk.3.ffn_gate_shexp.weight
	48: 14680064 \| 7168, 2048, 1, 1 \| BF16 \| blk.3.ffn_up_shexp.weight
	49: 512 \| 512, 1, 1, 1 \| F32 \| blk.3.attn_kv_a_norm.weight
	50: 4128768 \| 7168, 576, 1, 1 \| BF16 \| blk.3.attn_kv_a_mqa.weight
	51: 16777216 \| 512, 32768, 1, 1 \| BF16 \| blk.3.attn_kv_b.weight
	52: 8388608 \| 128, 65536, 1, 1 \| BF16 \| blk.3.attn_k_b.weight
	53: 8388608 \| 512, 16384, 1, 1 \| BF16 \| blk.3.attn_v_b.weight
	54: 117440512 \| 16384, 7168, 1, 1 \| BF16 \| blk.3.attn_output.weight
	55: 1536 \| 1536, 1, 1, 1 \| F32 \| blk.3.attn_q_a_norm.weight
	56: 11010048 \| 7168, 1536, 1, 1 \| BF16 \| blk.3.attn_q_a.weight
	57: 37748736 \| 1536, 24576, 1, 1 \| BF16 \| blk.3.attn_q_b.weight
	58: 7168 \| 7168, 1, 1, 1 \| F32 \| blk.3.attn_norm.weight
	59: 3758096384 \| 2048, 7168, 256, 1 \| BF16 \| blk.3.ffn_down_exps.weight
	60: 3758096384 \| 7168, 2048, 256, 1 \| BF16 \| blk.3.ffn_gate_exps.weight
	61: 3758096384 \| 7168, 2048, 256, 1 \| BF16 \| blk.3.ffn_up_exps.weight
	62: 7168 \| 7168, 1, 1, 1 \| F32 \| blk.3.ffn_norm.weight
	63: 256 \| 256, 1, 1, 1 \| F32 \| blk.4.exp_probs_b.bias
	64: 1835008 \| 7168, 256, 1, 1 \| F32 \| blk.4.ffn_gate_inp.weight
	65: 14680064 \| 2048, 7168, 1, 1 \| BF16 \| blk.4.ffn_down_shexp.weight
	66: 14680064 \| 7168, 2048, 1, 1 \| BF16 \| blk.4.ffn_gate_shexp.weight
	67: 14680064 \| 7168, 2048, 1, 1 \| BF16 \| blk.4.ffn_up_shexp.weight
	68: 512 \| 512, 1, 1, 1 \| F32 \| blk.4.attn_kv_a_norm.weight
	69: 4128768 \| 7168, 576, 1, 1 \| BF16 \| blk.4.attn_kv_a_mqa.weight
	70: 16777216 \| 512, 32768, 1, 1 \| BF16 \| blk.4.attn_kv_b.weight
	71: 8388608 \| 128, 65536, 1, 1 \| BF16 \| blk.4.attn_k_b.weight
	72: 8388608 \| 512, 16384, 1, 1 \| BF16 \| blk.4.attn_v_b.weight
	73: 117440512 \| 16384, 7168, 1, 1 \| BF16 \| blk.4.attn_output.weight
	74: 1536 \| 1536, 1, 1, 1 \| F32 \| blk.4.attn_q_a_norm.weight
	75: 11010048 \| 7168, 1536, 1, 1 \| BF16 \| blk.4.attn_q_a.weight
	76: 37748736 \| 1536, 24576, 1, 1 \| BF16 \| blk.4.attn_q_b.weight
	77: 7168 \| 7168, 1, 1, 1 \| F32 \| blk.4.attn_norm.weight
	78: 3758096384 \| 2048, 7168, 256, 1 \| BF16 \| blk.4.ffn_down_exps.weight
	79: 3758096384 \| 7168, 2048, 256, 1 \| BF16 \| blk.4.ffn_gate_exps.weight