robbiemu · January 10, 2025 19:39
diff --git a/1. get the models b/1. get the models
 git clone https://huggingface.co/sail/Sailor2-1B-Chat
 Cloning into 'Sailor2-1B-Chat'...
 remote: Enumerating objects: 39, done.
 remote: Counting objects: 100% (36/36), done.
 remote: Compressing objects: 100% (36/36), done.
 remote: Total 39 (delta 14), reused 0 (delta 0), pack-reused 3 (from 1)
 Unpacking objects: 100% (39/39), 2.02 MiB | 2.26 MiB/s, done.
 Filtering content: 100% (2/2), 1.85 GiB | 14.92 MiB/s, done.

 ./convert_hf_to_gguf.py --outfile $HF/Sailor2-1B-Chat_bf16.gguf --outtype bf16 $HF/Sailor2-1B-Chat
 INFO:hf-to-gguf:Loading model: Sailor2-1B-Chat
 INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only
 INFO:hf-to-gguf:Exporting model...
 INFO:hf-to-gguf:gguf: loading model part 'model.safetensors'
 INFO:hf-to-gguf:output.weight,             torch.bfloat16 --> BF16, shape = {896, 151936}
 INFO:hf-to-gguf:token_embd.weight,         torch.bfloat16 --> BF16, shape = {896, 151936}
 INFO:hf-to-gguf:blk.0.attn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.0.ffn_down.weight,     torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.0.ffn_gate.weight,     torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.0.ffn_up.weight,       torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.0.ffn_norm.weight,     torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.0.attn_k.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.0.attn_k.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.0.attn_output.weight,  torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.0.attn_q.bias,         torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.0.attn_q.weight,       torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.0.attn_v.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.0.attn_v.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.1.attn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.1.ffn_down.weight,     torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.1.ffn_gate.weight,     torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.1.ffn_up.weight,       torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.1.ffn_norm.weight,     torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.1.attn_k.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.1.attn_k.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.1.attn_output.weight,  torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.1.attn_q.bias,         torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.1.attn_q.weight,       torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.1.attn_v.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.1.attn_v.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.10.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.10.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.10.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.10.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.10.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.10.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.10.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.10.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.10.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.10.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.10.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.10.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.11.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.11.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.11.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.11.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.11.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.11.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.11.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.11.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.11.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.11.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.11.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.11.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.12.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.12.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.12.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.12.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.12.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.12.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.12.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.12.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.12.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.12.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.12.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.12.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.13.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.13.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.13.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.13.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.13.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.13.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.13.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.13.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.13.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.13.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.13.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.13.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.14.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.14.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.14.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.14.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.14.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.14.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.14.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.14.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.14.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.14.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.14.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.14.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.15.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.15.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.15.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.15.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.15.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.15.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.15.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.15.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.15.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.15.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.15.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.15.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.16.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.16.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.16.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.16.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.16.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.16.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.16.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.16.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.16.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.16.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.16.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.16.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.17.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.17.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.17.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.17.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.17.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.17.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.17.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.17.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.17.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.17.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.17.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.17.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.18.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.18.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.18.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.18.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.18.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.18.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.18.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.18.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.18.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.18.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.18.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.18.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.19.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.19.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.19.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.19.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.19.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.19.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.19.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.19.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.19.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.19.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.19.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.19.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.2.attn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.2.ffn_down.weight,     torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.2.ffn_gate.weight,     torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.2.ffn_up.weight,       torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.2.ffn_norm.weight,     torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.2.attn_k.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.2.attn_k.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.2.attn_output.weight,  torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.2.attn_q.bias,         torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.2.attn_q.weight,       torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.2.attn_v.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.2.attn_v.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.20.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.20.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.20.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.20.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.20.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.20.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.20.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.20.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.20.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.20.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.20.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.20.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.21.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.21.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.21.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.21.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.21.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.21.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.21.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.21.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.21.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.21.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.21.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.21.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.22.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.22.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.22.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.22.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.22.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.22.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.22.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.22.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.22.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.22.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.22.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.22.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.23.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.23.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.23.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.23.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.23.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.23.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.23.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.23.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.23.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.23.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.23.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.23.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.24.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.24.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.24.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.24.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.24.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.24.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.24.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.24.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.24.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.24.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.24.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.24.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.25.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.25.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.25.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.25.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.25.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.25.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.25.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.25.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.25.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.25.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.25.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.25.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.26.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.26.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.26.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.26.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.26.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.26.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.26.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.26.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.26.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.26.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.26.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.26.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.27.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.27.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.27.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.27.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.27.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.27.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.27.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.27.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.27.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.27.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.27.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.27.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.28.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.28.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.28.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.28.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.28.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.28.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.28.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.28.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.28.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.28.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.28.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.28.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.29.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.29.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.29.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.29.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.29.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.29.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.29.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.29.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.29.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.29.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.29.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.29.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.3.attn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.3.ffn_down.weight,     torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.3.ffn_gate.weight,     torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.3.ffn_up.weight,       torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.3.ffn_norm.weight,     torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.3.attn_k.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.3.attn_k.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.3.attn_output.weight,  torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.3.attn_q.bias,         torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.3.attn_q.weight,       torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.3.attn_v.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.3.attn_v.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.30.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.30.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.30.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.30.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.30.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.30.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.30.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.30.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.30.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.30.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.30.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.30.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.31.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.31.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.31.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.31.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.31.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.31.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.31.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.31.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.31.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.31.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.31.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.31.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.32.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.32.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.32.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.32.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.32.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.32.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.32.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.32.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.32.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.32.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.32.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.32.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.33.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.33.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.33.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.33.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.33.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.33.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.33.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.33.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.33.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.33.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.33.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.33.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.34.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.34.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.34.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.34.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.34.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.34.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.34.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.34.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.34.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.34.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.34.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.34.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.35.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.35.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.35.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.35.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.35.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.35.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.35.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.35.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.35.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.35.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.35.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.35.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.36.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.36.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.36.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.36.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.36.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.36.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.36.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.36.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.36.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.36.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.36.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.36.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.37.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.37.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.37.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.37.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.37.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.37.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.37.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.37.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.37.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.37.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.37.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.37.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.38.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.38.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.38.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.38.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.38.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.38.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.38.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.38.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.38.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.38.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.38.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.38.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.39.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.39.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.39.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.39.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.39.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.39.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.39.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.39.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.39.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.39.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.39.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.39.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.4.attn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.4.ffn_down.weight,     torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.4.ffn_gate.weight,     torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.4.ffn_up.weight,       torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.4.ffn_norm.weight,     torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.4.attn_k.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.4.attn_k.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.4.attn_output.weight,  torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.4.attn_q.bias,         torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.4.attn_q.weight,       torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.4.attn_v.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.4.attn_v.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.40.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.40.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.40.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.40.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.40.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.40.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.40.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.40.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.40.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.40.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.40.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.40.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.41.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.41.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.41.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.41.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.41.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.41.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.41.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.41.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.41.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.41.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.41.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.41.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.42.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.42.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.42.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.42.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.42.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.42.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.42.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.42.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.42.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.42.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.42.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.42.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.43.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.43.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.43.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.43.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.43.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.43.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.43.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.43.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.43.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.43.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.43.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.43.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.44.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.44.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.44.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.44.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.44.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.44.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.44.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.44.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.44.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.44.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.44.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.44.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.45.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.45.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.45.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.45.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.45.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.45.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.45.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.45.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.45.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.45.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.45.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.45.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.46.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.46.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.46.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.46.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.46.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.46.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.46.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.46.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.46.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.46.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.46.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.46.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.47.attn_norm.weight,   torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.47.ffn_down.weight,    torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.47.ffn_gate.weight,    torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.47.ffn_up.weight,      torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.47.ffn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.47.attn_k.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.47.attn_k.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.47.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.47.attn_q.bias,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.47.attn_q.weight,      torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.47.attn_v.bias,        torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.47.attn_v.weight,      torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.5.attn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.5.ffn_down.weight,     torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.5.ffn_gate.weight,     torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.5.ffn_up.weight,       torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.5.ffn_norm.weight,     torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.5.attn_k.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.5.attn_k.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.5.attn_output.weight,  torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.5.attn_q.bias,         torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.5.attn_q.weight,       torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.5.attn_v.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.5.attn_v.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.6.attn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.6.ffn_down.weight,     torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.6.ffn_gate.weight,     torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.6.ffn_up.weight,       torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.6.ffn_norm.weight,     torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.6.attn_k.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.6.attn_k.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.6.attn_output.weight,  torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.6.attn_q.bias,         torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.6.attn_q.weight,       torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.6.attn_v.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.6.attn_v.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.7.attn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.7.ffn_down.weight,     torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.7.ffn_gate.weight,     torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.7.ffn_up.weight,       torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.7.ffn_norm.weight,     torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.7.attn_k.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.7.attn_k.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.7.attn_output.weight,  torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.7.attn_q.bias,         torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.7.attn_q.weight,       torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.7.attn_v.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.7.attn_v.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.8.attn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.8.ffn_down.weight,     torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.8.ffn_gate.weight,     torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.8.ffn_up.weight,       torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.8.ffn_norm.weight,     torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.8.attn_k.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.8.attn_k.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.8.attn_output.weight,  torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.8.attn_q.bias,         torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.8.attn_q.weight,       torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.8.attn_v.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.8.attn_v.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.9.attn_norm.weight,    torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.9.ffn_down.weight,     torch.bfloat16 --> BF16, shape = {4864, 896}
 INFO:hf-to-gguf:blk.9.ffn_gate.weight,     torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.9.ffn_up.weight,       torch.bfloat16 --> BF16, shape = {896, 4864}
 INFO:hf-to-gguf:blk.9.ffn_norm.weight,     torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.9.attn_k.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.9.attn_k.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:blk.9.attn_output.weight,  torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.9.attn_q.bias,         torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:blk.9.attn_q.weight,       torch.bfloat16 --> BF16, shape = {896, 896}
 INFO:hf-to-gguf:blk.9.attn_v.bias,         torch.bfloat16 --> F32, shape = {128}
 INFO:hf-to-gguf:blk.9.attn_v.weight,       torch.bfloat16 --> BF16, shape = {896, 128}
 INFO:hf-to-gguf:output_norm.weight,        torch.bfloat16 --> F32, shape = {896}
 INFO:hf-to-gguf:Set meta model
 INFO:hf-to-gguf:Set model parameters
 INFO:hf-to-gguf:gguf: context length = 32768
 INFO:hf-to-gguf:gguf: embedding length = 896
 INFO:hf-to-gguf:gguf: feed forward length = 4864
 INFO:hf-to-gguf:gguf: head count = 14
 INFO:hf-to-gguf:gguf: key-value head count = 2
 INFO:hf-to-gguf:gguf: rope theta = 1000000.0
 INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-06
 INFO:hf-to-gguf:gguf: file type = 32
 INFO:hf-to-gguf:Set model tokenizer
 INFO:gguf.vocab:Adding 151387 merge(s).
 INFO:gguf.vocab:Setting special token type eos to 151645
 INFO:gguf.vocab:Setting special token type pad to 151643
 INFO:gguf.vocab:Setting special token type bos to 151643
 INFO:gguf.vocab:Setting add_bos_token to False
 INFO:gguf.vocab:Setting chat_template to {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
 You are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<|im_end|>
 ' }}{% endif %}{{'<|im_start|>' + message['role'] + '
 ' + message['content'] + '<|im_end|>' + '
 '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
 ' }}{% endif %}
 INFO:hf-to-gguf:Set model quantization version
 INFO:gguf.gguf_writer:Writing the following files:
 INFO:gguf.gguf_writer:/Users/Shared/Public/huggingface/Sailor2-1B-Chat_bf16.gguf: n_tensors = 579, total_size = 2.0G
 huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
 To disable this warning, you can either:
 	- Avoid using `tokenizers` before the fork if possible
 	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
 Writing: 100%|██████████████████████████████████████| 1.98G/1.98G [00:03<00:00, 515Mbyte/s]
 INFO:hf-to-gguf:Model successfully exported to /Users/Shared/Public/huggingface/Sailor2-1B-Chat_bf16.gguf

 llama-quantize Sailor2-1B-Chat_bf16.gguf ./Sailor2-1B-Chat_Q4_K_M.gguf Q4_K_M
 main: build = 4419 (46e3556e)
 main: built with Apple clang version 16.0.0 (clang-1600.0.26.4) for arm64-apple-darwin24.1.0
 main: quantizing 'Sailor2-1B-Chat_bf16.gguf' to './Sailor2-1B-Chat_Q4_K_M.gguf' as Q4_K_M
 llama_model_loader: loaded meta data with 33 key-value pairs and 579 tensors from Sailor2-1B-Chat_bf16.gguf (version GGUF V3 (latest))
 llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
 llama_model_loader: - kv   0:                       general.architecture str              = qwen2
 llama_model_loader: - kv   1:                               general.type str              = model
 llama_model_loader: - kv   2:                               general.name str              = Sailor2 1B Chat
 llama_model_loader: - kv   3:                           general.finetune str              = Chat
 llama_model_loader: - kv   4:                           general.basename str              = Sailor2
 llama_model_loader: - kv   5:                         general.size_label str              = 1B
 llama_model_loader: - kv   6:                            general.license str              = apache-2.0
 llama_model_loader: - kv   7:                   general.base_model.count u32              = 1
 llama_model_loader: - kv   8:                  general.base_model.0.name str              = Sailor2 1B
 llama_model_loader: - kv   9:          general.base_model.0.organization str              = Sail
 llama_model_loader: - kv  10:              general.base_model.0.repo_url str              = https://huggingface.co/sail/Sailor2-1B
 llama_model_loader: - kv  11:                               general.tags arr[str,6]       = ["multilingual", "sea", "sailor", "sf...
 llama_model_loader: - kv  12:                          general.languages arr[str,12]      = ["en", "zh", "id", "th", "vi", "ms", ...
 llama_model_loader: - kv  13:                          qwen2.block_count u32              = 48
 llama_model_loader: - kv  14:                       qwen2.context_length u32              = 32768
 llama_model_loader: - kv  15:                     qwen2.embedding_length u32              = 896
 llama_model_loader: - kv  16:                  qwen2.feed_forward_length u32              = 4864
 llama_model_loader: - kv  17:                 qwen2.attention.head_count u32              = 14
 llama_model_loader: - kv  18:              qwen2.attention.head_count_kv u32              = 2
 llama_model_loader: - kv  19:                       qwen2.rope.freq_base f32              = 1000000.000000
 llama_model_loader: - kv  20:     qwen2.attention.layer_norm_rms_epsilon f32              = 0.000001
 llama_model_loader: - kv  21:                          general.file_type u32              = 32
 llama_model_loader: - kv  22:                       tokenizer.ggml.model str              = gpt2
 llama_model_loader: - kv  23:                         tokenizer.ggml.pre str              = qwen2
 llama_model_loader: - kv  24:                      tokenizer.ggml.tokens arr[str,151936]  = ["!", "\"", "#", "$", "%", "&", "'", ...
 llama_model_loader: - kv  25:                  tokenizer.ggml.token_type arr[i32,151936]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
 llama_model_loader: - kv  26:                      tokenizer.ggml.merges arr[str,151387]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
 llama_model_loader: - kv  27:                tokenizer.ggml.eos_token_id u32              = 151645
 llama_model_loader: - kv  28:            tokenizer.ggml.padding_token_id u32              = 151643
 llama_model_loader: - kv  29:                tokenizer.ggml.bos_token_id u32              = 151643
 llama_model_loader: - kv  30:               tokenizer.ggml.add_bos_token bool             = false
 llama_model_loader: - kv  31:                    tokenizer.chat_template str              = {% for message in messages %}{% if lo...
 llama_model_loader: - kv  32:               general.quantization_version u32              = 2
 llama_model_loader: - type  f32:  241 tensors
 llama_model_loader: - type bf16:  338 tensors
 [   1/ 579]                        output.weight - [  896, 151936,     1,     1], type =   bf16, converting to q8_0 .. size =   259.66 MiB ->   137.94 MiB
 [   2/ 579]                   output_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [   3/ 579]                    token_embd.weight - [  896, 151936,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 151936 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =   259.66 MiB ->    89.26 MiB
 [   4/ 579]                    blk.0.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [   5/ 579]                  blk.0.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [   6/ 579]               blk.0.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [   7/ 579]             blk.0.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [   8/ 579]                    blk.0.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [   9/ 579]                  blk.0.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  10/ 579]                    blk.0.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  11/ 579]                  blk.0.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [  12/ 579]                blk.0.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [  13/ 579]                blk.0.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  14/ 579]                blk.0.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  15/ 579]                  blk.0.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  16/ 579]                    blk.1.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  17/ 579]                  blk.1.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [  18/ 579]               blk.1.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  19/ 579]             blk.1.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  20/ 579]                    blk.1.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  21/ 579]                  blk.1.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  22/ 579]                    blk.1.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  23/ 579]                  blk.1.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [  24/ 579]                blk.1.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [  25/ 579]                blk.1.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  26/ 579]                blk.1.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  27/ 579]                  blk.1.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  28/ 579]                    blk.2.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  29/ 579]                  blk.2.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [  30/ 579]               blk.2.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  31/ 579]             blk.2.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  32/ 579]                    blk.2.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  33/ 579]                  blk.2.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  34/ 579]                    blk.2.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  35/ 579]                  blk.2.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [  36/ 579]                blk.2.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [  37/ 579]                blk.2.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  38/ 579]                blk.2.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  39/ 579]                  blk.2.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  40/ 579]                    blk.3.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  41/ 579]                  blk.3.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [  42/ 579]               blk.3.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  43/ 579]             blk.3.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  44/ 579]                    blk.3.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  45/ 579]                  blk.3.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  46/ 579]                    blk.3.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  47/ 579]                  blk.3.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [  48/ 579]                blk.3.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [  49/ 579]                blk.3.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  50/ 579]                blk.3.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  51/ 579]                  blk.3.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  52/ 579]                    blk.4.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  53/ 579]                  blk.4.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [  54/ 579]               blk.4.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  55/ 579]             blk.4.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  56/ 579]                    blk.4.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  57/ 579]                  blk.4.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  58/ 579]                    blk.4.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  59/ 579]                  blk.4.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [  60/ 579]                blk.4.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [  61/ 579]                blk.4.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  62/ 579]                blk.4.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  63/ 579]                  blk.4.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  64/ 579]                    blk.5.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  65/ 579]                  blk.5.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [  66/ 579]               blk.5.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  67/ 579]             blk.5.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  68/ 579]                    blk.5.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  69/ 579]                  blk.5.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  70/ 579]                    blk.5.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  71/ 579]                  blk.5.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [  72/ 579]                blk.5.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [  73/ 579]                blk.5.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  74/ 579]                blk.5.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  75/ 579]                  blk.5.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  76/ 579]                    blk.6.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  77/ 579]                  blk.6.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [  78/ 579]               blk.6.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  79/ 579]             blk.6.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  80/ 579]                    blk.6.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  81/ 579]                  blk.6.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  82/ 579]                    blk.6.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  83/ 579]                  blk.6.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [  84/ 579]                blk.6.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [  85/ 579]                blk.6.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  86/ 579]                blk.6.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  87/ 579]                  blk.6.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  88/ 579]                    blk.7.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  89/ 579]                  blk.7.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [  90/ 579]               blk.7.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  91/ 579]             blk.7.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  92/ 579]                    blk.7.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  93/ 579]                  blk.7.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [  94/ 579]                    blk.7.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [  95/ 579]                  blk.7.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [  96/ 579]                blk.7.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [  97/ 579]                blk.7.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [  98/ 579]                blk.7.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [  99/ 579]                  blk.7.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 100/ 579]                    blk.8.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 101/ 579]                  blk.8.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 102/ 579]               blk.8.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 103/ 579]             blk.8.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 104/ 579]                    blk.8.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 105/ 579]                  blk.8.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 106/ 579]                    blk.8.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 107/ 579]                  blk.8.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 108/ 579]                blk.8.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 109/ 579]                blk.8.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 110/ 579]                blk.8.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 111/ 579]                  blk.8.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 112/ 579]                    blk.9.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 113/ 579]                  blk.9.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 114/ 579]               blk.9.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 115/ 579]             blk.9.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 116/ 579]                    blk.9.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 117/ 579]                  blk.9.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 118/ 579]                    blk.9.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 119/ 579]                  blk.9.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 120/ 579]                blk.9.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 121/ 579]                blk.9.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 122/ 579]                blk.9.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 123/ 579]                  blk.9.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 124/ 579]                   blk.10.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 125/ 579]                 blk.10.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 126/ 579]              blk.10.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 127/ 579]            blk.10.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 128/ 579]                   blk.10.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 129/ 579]                 blk.10.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 130/ 579]                   blk.10.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 131/ 579]                 blk.10.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 132/ 579]               blk.10.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 133/ 579]               blk.10.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 134/ 579]               blk.10.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 135/ 579]                 blk.10.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 136/ 579]                   blk.11.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 137/ 579]                 blk.11.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 138/ 579]              blk.11.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 139/ 579]            blk.11.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 140/ 579]                   blk.11.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 141/ 579]                 blk.11.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 142/ 579]                   blk.11.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 143/ 579]                 blk.11.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 144/ 579]               blk.11.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 145/ 579]               blk.11.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 146/ 579]               blk.11.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 147/ 579]                 blk.11.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 148/ 579]                   blk.12.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 149/ 579]                 blk.12.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 150/ 579]              blk.12.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 151/ 579]            blk.12.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 152/ 579]                   blk.12.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 153/ 579]                 blk.12.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 154/ 579]                   blk.12.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 155/ 579]                 blk.12.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 156/ 579]               blk.12.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 157/ 579]               blk.12.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 158/ 579]               blk.12.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 159/ 579]                 blk.12.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 160/ 579]                   blk.13.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 161/ 579]                 blk.13.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 162/ 579]              blk.13.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 163/ 579]            blk.13.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 164/ 579]                   blk.13.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 165/ 579]                 blk.13.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 166/ 579]                   blk.13.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 167/ 579]                 blk.13.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 168/ 579]               blk.13.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 169/ 579]               blk.13.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 170/ 579]               blk.13.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 171/ 579]                 blk.13.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 172/ 579]                   blk.14.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 173/ 579]                 blk.14.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 174/ 579]              blk.14.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 175/ 579]            blk.14.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 176/ 579]                   blk.14.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 177/ 579]                 blk.14.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 178/ 579]                   blk.14.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 179/ 579]                 blk.14.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 180/ 579]               blk.14.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 181/ 579]               blk.14.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 182/ 579]               blk.14.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 183/ 579]                 blk.14.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 184/ 579]                   blk.15.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 185/ 579]                 blk.15.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 186/ 579]              blk.15.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 187/ 579]            blk.15.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 188/ 579]                   blk.15.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 189/ 579]                 blk.15.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 190/ 579]                   blk.15.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 191/ 579]                 blk.15.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 192/ 579]               blk.15.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 193/ 579]               blk.15.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 194/ 579]               blk.15.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 195/ 579]                 blk.15.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 196/ 579]                   blk.16.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 197/ 579]                 blk.16.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 198/ 579]              blk.16.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 199/ 579]            blk.16.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 200/ 579]                   blk.16.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 201/ 579]                 blk.16.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 202/ 579]                   blk.16.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 203/ 579]                 blk.16.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 204/ 579]               blk.16.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 205/ 579]               blk.16.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 206/ 579]               blk.16.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 207/ 579]                 blk.16.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 208/ 579]                   blk.17.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 209/ 579]                 blk.17.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 210/ 579]              blk.17.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 211/ 579]            blk.17.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 212/ 579]                   blk.17.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 213/ 579]                 blk.17.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 214/ 579]                   blk.17.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 215/ 579]                 blk.17.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 216/ 579]               blk.17.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 217/ 579]               blk.17.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 218/ 579]               blk.17.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 219/ 579]                 blk.17.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 220/ 579]                   blk.18.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 221/ 579]                 blk.18.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 222/ 579]              blk.18.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 223/ 579]            blk.18.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 224/ 579]                   blk.18.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 225/ 579]                 blk.18.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 226/ 579]                   blk.18.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 227/ 579]                 blk.18.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 228/ 579]               blk.18.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 229/ 579]               blk.18.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 230/ 579]               blk.18.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 231/ 579]                 blk.18.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 232/ 579]                   blk.19.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 233/ 579]                 blk.19.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 234/ 579]              blk.19.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 235/ 579]            blk.19.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 236/ 579]                   blk.19.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 237/ 579]                 blk.19.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 238/ 579]                   blk.19.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 239/ 579]                 blk.19.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 240/ 579]               blk.19.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 241/ 579]               blk.19.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 242/ 579]               blk.19.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 243/ 579]                 blk.19.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 244/ 579]                   blk.20.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 245/ 579]                 blk.20.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 246/ 579]              blk.20.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 247/ 579]            blk.20.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 248/ 579]                   blk.20.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 249/ 579]                 blk.20.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 250/ 579]                   blk.20.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 251/ 579]                 blk.20.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 252/ 579]               blk.20.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 253/ 579]               blk.20.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 254/ 579]               blk.20.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 255/ 579]                 blk.20.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 256/ 579]                   blk.21.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 257/ 579]                 blk.21.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 258/ 579]              blk.21.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 259/ 579]            blk.21.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 260/ 579]                   blk.21.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 261/ 579]                 blk.21.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 262/ 579]                   blk.21.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 263/ 579]                 blk.21.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 264/ 579]               blk.21.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 265/ 579]               blk.21.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 266/ 579]               blk.21.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 267/ 579]                 blk.21.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 268/ 579]                   blk.22.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 269/ 579]                 blk.22.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 270/ 579]              blk.22.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 271/ 579]            blk.22.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 272/ 579]                   blk.22.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 273/ 579]                 blk.22.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 274/ 579]                   blk.22.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 275/ 579]                 blk.22.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 276/ 579]               blk.22.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 277/ 579]               blk.22.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 278/ 579]               blk.22.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 279/ 579]                 blk.22.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 280/ 579]                   blk.23.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 281/ 579]                 blk.23.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 282/ 579]              blk.23.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 283/ 579]            blk.23.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 284/ 579]                   blk.23.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 285/ 579]                 blk.23.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 286/ 579]                   blk.23.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 287/ 579]                 blk.23.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 288/ 579]               blk.23.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 289/ 579]               blk.23.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 290/ 579]               blk.23.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 291/ 579]                 blk.23.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 292/ 579]                   blk.24.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 293/ 579]                 blk.24.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 294/ 579]              blk.24.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 295/ 579]            blk.24.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 296/ 579]                   blk.24.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 297/ 579]                 blk.24.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 298/ 579]                   blk.24.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 299/ 579]                 blk.24.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 300/ 579]               blk.24.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 301/ 579]               blk.24.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 302/ 579]               blk.24.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 303/ 579]                 blk.24.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 304/ 579]                   blk.25.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 305/ 579]                 blk.25.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 306/ 579]              blk.25.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 307/ 579]            blk.25.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 308/ 579]                   blk.25.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 309/ 579]                 blk.25.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 310/ 579]                   blk.25.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 311/ 579]                 blk.25.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 312/ 579]               blk.25.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 313/ 579]               blk.25.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 314/ 579]               blk.25.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 315/ 579]                 blk.25.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 316/ 579]                   blk.26.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 317/ 579]                 blk.26.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 318/ 579]              blk.26.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 319/ 579]            blk.26.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 320/ 579]                   blk.26.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 321/ 579]                 blk.26.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 322/ 579]                   blk.26.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 323/ 579]                 blk.26.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 324/ 579]               blk.26.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 325/ 579]               blk.26.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 326/ 579]               blk.26.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 327/ 579]                 blk.26.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 328/ 579]                   blk.27.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 329/ 579]                 blk.27.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 330/ 579]              blk.27.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 331/ 579]            blk.27.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 332/ 579]                   blk.27.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 333/ 579]                 blk.27.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 334/ 579]                   blk.27.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 335/ 579]                 blk.27.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 336/ 579]               blk.27.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 337/ 579]               blk.27.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 338/ 579]               blk.27.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 339/ 579]                 blk.27.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 340/ 579]                   blk.28.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 341/ 579]                 blk.28.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 342/ 579]              blk.28.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 343/ 579]            blk.28.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 344/ 579]                   blk.28.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 345/ 579]                 blk.28.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 346/ 579]                   blk.28.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 347/ 579]                 blk.28.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 348/ 579]               blk.28.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 349/ 579]               blk.28.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 350/ 579]               blk.28.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 351/ 579]                 blk.28.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 352/ 579]                   blk.29.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 353/ 579]                 blk.29.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 354/ 579]              blk.29.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 355/ 579]            blk.29.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 356/ 579]                   blk.29.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 357/ 579]                 blk.29.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 358/ 579]                   blk.29.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 359/ 579]                 blk.29.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 360/ 579]               blk.29.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 361/ 579]               blk.29.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 362/ 579]               blk.29.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 363/ 579]                 blk.29.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 364/ 579]                   blk.30.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 365/ 579]                 blk.30.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 366/ 579]              blk.30.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 367/ 579]            blk.30.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 368/ 579]                   blk.30.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 369/ 579]                 blk.30.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 370/ 579]                   blk.30.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 371/ 579]                 blk.30.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 372/ 579]               blk.30.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 373/ 579]               blk.30.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 374/ 579]               blk.30.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 375/ 579]                 blk.30.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 376/ 579]                   blk.31.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 377/ 579]                 blk.31.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 378/ 579]              blk.31.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 379/ 579]            blk.31.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 380/ 579]                   blk.31.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 381/ 579]                 blk.31.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 382/ 579]                   blk.31.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 383/ 579]                 blk.31.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 384/ 579]               blk.31.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 385/ 579]               blk.31.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 386/ 579]               blk.31.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 387/ 579]                 blk.31.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 388/ 579]                   blk.32.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 389/ 579]                 blk.32.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 390/ 579]              blk.32.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 391/ 579]            blk.32.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 392/ 579]                   blk.32.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 393/ 579]                 blk.32.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 394/ 579]                   blk.32.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 395/ 579]                 blk.32.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 396/ 579]               blk.32.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 397/ 579]               blk.32.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 398/ 579]               blk.32.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 399/ 579]                 blk.32.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 400/ 579]                   blk.33.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 401/ 579]                 blk.33.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 402/ 579]              blk.33.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 403/ 579]            blk.33.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 404/ 579]                   blk.33.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 405/ 579]                 blk.33.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 406/ 579]                   blk.33.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 407/ 579]                 blk.33.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 408/ 579]               blk.33.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 409/ 579]               blk.33.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 410/ 579]               blk.33.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 411/ 579]                 blk.33.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 412/ 579]                   blk.34.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 413/ 579]                 blk.34.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 414/ 579]              blk.34.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 415/ 579]            blk.34.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 416/ 579]                   blk.34.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 417/ 579]                 blk.34.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 418/ 579]                   blk.34.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 419/ 579]                 blk.34.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 420/ 579]               blk.34.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 421/ 579]               blk.34.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 422/ 579]               blk.34.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 423/ 579]                 blk.34.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 424/ 579]                   blk.35.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 425/ 579]                 blk.35.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 426/ 579]              blk.35.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 427/ 579]            blk.35.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 428/ 579]                   blk.35.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 429/ 579]                 blk.35.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 430/ 579]                   blk.35.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 431/ 579]                 blk.35.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 432/ 579]               blk.35.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 433/ 579]               blk.35.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 434/ 579]               blk.35.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 435/ 579]                 blk.35.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 436/ 579]                   blk.36.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 437/ 579]                 blk.36.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 438/ 579]              blk.36.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 439/ 579]            blk.36.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 440/ 579]                   blk.36.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 441/ 579]                 blk.36.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 442/ 579]                   blk.36.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 443/ 579]                 blk.36.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 444/ 579]               blk.36.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 445/ 579]               blk.36.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 446/ 579]               blk.36.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 447/ 579]                 blk.36.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 448/ 579]                   blk.37.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 449/ 579]                 blk.37.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 450/ 579]              blk.37.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 451/ 579]            blk.37.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 452/ 579]                   blk.37.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 453/ 579]                 blk.37.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 454/ 579]                   blk.37.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 455/ 579]                 blk.37.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 456/ 579]               blk.37.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 457/ 579]               blk.37.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 458/ 579]               blk.37.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 459/ 579]                 blk.37.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 460/ 579]                   blk.38.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 461/ 579]                 blk.38.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 462/ 579]              blk.38.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 463/ 579]            blk.38.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 464/ 579]                   blk.38.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 465/ 579]                 blk.38.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 466/ 579]                   blk.38.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 467/ 579]                 blk.38.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 468/ 579]               blk.38.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 469/ 579]               blk.38.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 470/ 579]               blk.38.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 471/ 579]                 blk.38.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 472/ 579]                   blk.39.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 473/ 579]                 blk.39.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 474/ 579]              blk.39.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 475/ 579]            blk.39.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 476/ 579]                   blk.39.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 477/ 579]                 blk.39.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 478/ 579]                   blk.39.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 479/ 579]                 blk.39.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 480/ 579]               blk.39.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 481/ 579]               blk.39.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 482/ 579]               blk.39.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 483/ 579]                 blk.39.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 484/ 579]                   blk.40.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 485/ 579]                 blk.40.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 486/ 579]              blk.40.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 487/ 579]            blk.40.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 488/ 579]                   blk.40.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 489/ 579]                 blk.40.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 490/ 579]                   blk.40.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 491/ 579]                 blk.40.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 492/ 579]               blk.40.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q4_K .. size =     8.31 MiB ->     2.34 MiB
 [ 493/ 579]               blk.40.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 494/ 579]               blk.40.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 495/ 579]                 blk.40.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 496/ 579]                   blk.41.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 497/ 579]                 blk.41.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 498/ 579]              blk.41.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 499/ 579]            blk.41.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 500/ 579]                   blk.41.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 501/ 579]                 blk.41.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 502/ 579]                   blk.41.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 503/ 579]                 blk.41.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 504/ 579]               blk.41.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 505/ 579]               blk.41.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 506/ 579]               blk.41.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 507/ 579]                 blk.41.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 508/ 579]                   blk.42.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 509/ 579]                 blk.42.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 510/ 579]              blk.42.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 511/ 579]            blk.42.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 512/ 579]                   blk.42.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 513/ 579]                 blk.42.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 514/ 579]                   blk.42.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 515/ 579]                 blk.42.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 516/ 579]               blk.42.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 517/ 579]               blk.42.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 518/ 579]               blk.42.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 519/ 579]                 blk.42.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 520/ 579]                   blk.43.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 521/ 579]                 blk.43.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 522/ 579]              blk.43.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 523/ 579]            blk.43.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 524/ 579]                   blk.43.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 525/ 579]                 blk.43.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 526/ 579]                   blk.43.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 527/ 579]                 blk.43.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 528/ 579]               blk.43.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 529/ 579]               blk.43.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 530/ 579]               blk.43.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 531/ 579]                 blk.43.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 532/ 579]                   blk.44.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 533/ 579]                 blk.44.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 534/ 579]              blk.44.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 535/ 579]            blk.44.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 536/ 579]                   blk.44.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 537/ 579]                 blk.44.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 538/ 579]                   blk.44.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 539/ 579]                 blk.44.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 540/ 579]               blk.44.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 541/ 579]               blk.44.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 542/ 579]               blk.44.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 543/ 579]                 blk.44.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 544/ 579]                   blk.45.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 545/ 579]                 blk.45.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 546/ 579]              blk.45.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 547/ 579]            blk.45.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 548/ 579]                   blk.45.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 549/ 579]                 blk.45.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 550/ 579]                   blk.45.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 551/ 579]                 blk.45.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 552/ 579]               blk.45.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 553/ 579]               blk.45.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 554/ 579]               blk.45.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 555/ 579]                 blk.45.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 556/ 579]                   blk.46.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 557/ 579]                 blk.46.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 558/ 579]              blk.46.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 559/ 579]            blk.46.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 560/ 579]                   blk.46.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 561/ 579]                 blk.46.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 562/ 579]                   blk.46.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 563/ 579]                 blk.46.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 564/ 579]               blk.46.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 565/ 579]               blk.46.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 566/ 579]               blk.46.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 567/ 579]                 blk.46.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 568/ 579]                   blk.47.attn_k.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 569/ 579]                 blk.47.attn_k.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     0.22 MiB ->     0.08 MiB
 [ 570/ 579]              blk.47.attn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 571/ 579]            blk.47.attn_output.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 572/ 579]                   blk.47.attn_q.bias - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 573/ 579]                 blk.47.attn_q.weight - [  896,   896,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     1.53 MiB ->     0.53 MiB
 [ 574/ 579]                   blk.47.attn_v.bias - [  128,     1,     1,     1], type =    f32, size =    0.000 MB
 [ 575/ 579]                 blk.47.attn_v.weight - [  896,   128,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0
 converting to q8_0 .. size =     0.22 MiB ->     0.12 MiB
 [ 576/ 579]               blk.47.ffn_down.weight - [ 4864,   896,     1,     1], type =   bf16, converting to q6_K .. size =     8.31 MiB ->     3.41 MiB
 [ 577/ 579]               blk.47.ffn_gate.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 [ 578/ 579]               blk.47.ffn_norm.weight - [  896,     1,     1,     1], type =    f32, size =    0.003 MB
 [ 579/ 579]                 blk.47.ffn_up.weight - [  896,  4864,     1,     1], type =   bf16, 

 llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0
 converting to q5_0 .. size =     8.31 MiB ->     2.86 MiB
 llama_model_quantize_internal: model size  =  1884.85 MB
 llama_model_quantize_internal: quant size  =   698.72 MB
 llama_model_quantize_internal: WARNING: 289 of 337 tensor(s) required fallback quantization

 main: quantize time =  1814.01 ms
 main:    total time =  1814.01 ms
diff --git a/2. sample data from sailor2 sea-commoncrawl-high-quality b/2. sample data from sailor2 sea-commoncrawl-high-quality
 uv run src/imatrix_dataset.py \
  --output $HF/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/calibration-dataset.txt \
  --datasource-plugin $HF/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/sea_commoncrawl_high_quality_plugin.py --plugin-class SeaCommonCrawlHQDataSource \
  --model $HF/Sailor2-1B-Chat/Sailor2-1B-Chat_Q4_K_M.gguf \
  --config $HF/Sailor2-1B-Chat/README.md --url sailor2/sea-commoncrawl-high-quality --num-samples 500 --shuffle --chunk-size 32768
 2025-01-06 10:21:16,204 - INFO - Loaded languages: ['en', 'zh', 'id', 'th', 'vi', 'ms', 'lo', 'my', 'jv', 'km', 'su', 'tl']
 2025-01-06 10:21:16,444 - INFO - PyTorch version 2.5.1 available.
 2025-01-06 10:21:16,519 - INFO - Successfully loaded plugin class SeaCommonCrawlHQDataSource from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/sea_commoncrawl_high_quality_plugin.py.
 2025-01-06 10:21:16,519 - INFO - Downloading 500 samples for en, skipping the first 0 entries.
 2025-01-06 10:21:16,519 - ERROR - Unsupported language 'en'. Available languages: ['my', 'ceb', 'ilo', 'id', 'jv', 'km', 'lo', 'min', 'su', 'tl', 'th', 'vi', 'war']
 2025-01-06 10:21:16,520 - INFO - Appended 0 entries to raw_transactions_en.json.
 2025-01-06 10:21:16,520 - INFO - Downloading 500 samples for zh, skipping the first 0 entries.
 2025-01-06 10:21:16,520 - ERROR - Unsupported language 'zh'. Available languages: ['my', 'ceb', 'ilo', 'id', 'jv', 'km', 'lo', 'min', 'su', 'tl', 'th', 'vi', 'war']
 2025-01-06 10:21:16,520 - INFO - Appended 0 entries to raw_transactions_zh.json.
 2025-01-06 10:21:16,525 - INFO - raw_transactions_id.json already contains enough samples (500), skipping API call.
 2025-01-06 10:21:16,534 - INFO - raw_transactions_th.json already contains enough samples (500), skipping API call.
 2025-01-06 10:21:16,546 - INFO - raw_transactions_vi.json already contains enough samples (500), skipping API call.
 2025-01-06 10:21:16,553 - INFO - Downloading 500 samples for ms, skipping the first 0 entries.
 2025-01-06 10:21:16,553 - ERROR - Unsupported language 'ms'. Available languages: ['my', 'ceb', 'ilo', 'id', 'jv', 'km', 'lo', 'min', 'su', 'tl', 'th', 'vi', 'war']
 2025-01-06 10:21:16,553 - INFO - Appended 0 entries to raw_transactions_ms.json.
 2025-01-06 10:21:16,561 - INFO - raw_transactions_lo.json already contains enough samples (500), skipping API call.
 2025-01-06 10:21:16,577 - INFO - raw_transactions_my.json already contains enough samples (500), skipping API call.
 2025-01-06 10:21:16,592 - INFO - raw_transactions_jv.json already contains enough samples (500), skipping API call.
 2025-01-06 10:21:16,614 - INFO - raw_transactions_km.json already contains enough samples (500), skipping API call.
 2025-01-06 10:21:16,624 - INFO - raw_transactions_su.json already contains enough samples (500), skipping API call.
 2025-01-06 10:21:16,632 - INFO - raw_transactions_tl.json already contains enough samples (500), skipping API call.
 llama_model_loader: loaded meta data with 33 key-value pairs and 579 tensors from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/Sailor2-1B-Chat_Q4_K_M.gguf (version GGUF V3 (latest))
 llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
 llama_model_loader: - kv   0:                       general.architecture str              = qwen2
 llama_model_loader: - kv   1:                               general.type str              = model
 llama_model_loader: - kv   2:                               general.name str              = Sailor2 1B Chat
 llama_model_loader: - kv   3:                           general.finetune str              = Chat
 llama_model_loader: - kv   4:                           general.basename str              = Sailor2
 llama_model_loader: - kv   5:                         general.size_label str              = 1B
 llama_model_loader: - kv   6:                            general.license str              = apache-2.0
 llama_model_loader: - kv   7:                   general.base_model.count u32              = 1
 llama_model_loader: - kv   8:                  general.base_model.0.name str              = Sailor2 1B
 llama_model_loader: - kv   9:          general.base_model.0.organization str              = Sail
 llama_model_loader: - kv  10:              general.base_model.0.repo_url str              = https://huggingface.co/sail/Sailor2-1B
 llama_model_loader: - kv  11:                               general.tags arr[str,6]       = ["multilingual", "sea", "sailor", "sf...
 llama_model_loader: - kv  12:                          general.languages arr[str,12]      = ["en", "zh", "id", "th", "vi", "ms", ...
 llama_model_loader: - kv  13:                          qwen2.block_count u32              = 48
 llama_model_loader: - kv  14:                       qwen2.context_length u32              = 32768
 llama_model_loader: - kv  15:                     qwen2.embedding_length u32              = 896
 llama_model_loader: - kv  16:                  qwen2.feed_forward_length u32              = 4864
 llama_model_loader: - kv  17:                 qwen2.attention.head_count u32              = 14
 llama_model_loader: - kv  18:              qwen2.attention.head_count_kv u32              = 2
 llama_model_loader: - kv  19:                       qwen2.rope.freq_base f32              = 1000000.000000
 llama_model_loader: - kv  20:     qwen2.attention.layer_norm_rms_epsilon f32              = 0.000001
 llama_model_loader: - kv  21:                          general.file_type u32              = 15
 llama_model_loader: - kv  22:                       tokenizer.ggml.model str              = gpt2
 llama_model_loader: - kv  23:                         tokenizer.ggml.pre str              = qwen2
 llama_model_loader: - kv  24:                      tokenizer.ggml.tokens arr[str,151936]  = ["!", "\"", "#", "$", "%", "&", "'", ...
 llama_model_loader: - kv  25:                  tokenizer.ggml.token_type arr[i32,151936]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
 llama_model_loader: - kv  26:                      tokenizer.ggml.merges arr[str,151387]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
 llama_model_loader: - kv  27:                tokenizer.ggml.eos_token_id u32              = 151645
 llama_model_loader: - kv  28:            tokenizer.ggml.padding_token_id u32              = 151643
 llama_model_loader: - kv  29:                tokenizer.ggml.bos_token_id u32              = 151643
 llama_model_loader: - kv  30:               tokenizer.ggml.add_bos_token bool             = false
 llama_model_loader: - kv  31:                    tokenizer.chat_template str              = {% for message in messages %}{% if lo...
 llama_model_loader: - kv  32:               general.quantization_version u32              = 2
 llama_model_loader: - type  f32:  241 tensors
 llama_model_loader: - type q5_0:  265 tensors
 llama_model_loader: - type q8_0:   24 tensors
 llama_model_loader: - type q4_K:   24 tensors
 llama_model_loader: - type q6_K:   24 tensors
 llama_model_loader: - type bf16:    1 tensors
 llm_load_vocab: special tokens cache size = 22
 llm_load_vocab: token to piece cache size = 0.9310 MB
 llm_load_print_meta: format           = GGUF V3 (latest)
 llm_load_print_meta: arch             = qwen2
 llm_load_print_meta: vocab type       = BPE
 llm_load_print_meta: n_vocab          = 151936
 llm_load_print_meta: n_merges         = 151387
 llm_load_print_meta: vocab_only       = 0
 llm_load_print_meta: n_ctx_train      = 32768
 llm_load_print_meta: n_embd           = 896
 llm_load_print_meta: n_layer          = 48
 llm_load_print_meta: n_head           = 14
 llm_load_print_meta: n_head_kv        = 2
 llm_load_print_meta: n_rot            = 64
 llm_load_print_meta: n_swa            = 0
 llm_load_print_meta: n_embd_head_k    = 64
 llm_load_print_meta: n_embd_head_v    = 64
 llm_load_print_meta: n_gqa            = 7
 llm_load_print_meta: n_embd_k_gqa     = 128
 llm_load_print_meta: n_embd_v_gqa     = 128
 llm_load_print_meta: f_norm_eps       = 0.0e+00
 llm_load_print_meta: f_norm_rms_eps   = 1.0e-06
 llm_load_print_meta: f_clamp_kqv      = 0.0e+00
 llm_load_print_meta: f_max_alibi_bias = 0.0e+00
 llm_load_print_meta: f_logit_scale    = 0.0e+00
 llm_load_print_meta: n_ff             = 4864
 llm_load_print_meta: n_expert         = 0
 llm_load_print_meta: n_expert_used    = 0
 llm_load_print_meta: causal attn      = 1
 llm_load_print_meta: pooling type     = 0
 llm_load_print_meta: rope type        = 2
 llm_load_print_meta: rope scaling     = linear
 llm_load_print_meta: freq_base_train  = 1000000.0
 llm_load_print_meta: freq_scale_train = 1
 llm_load_print_meta: n_ctx_orig_yarn  = 32768
 llm_load_print_meta: rope_finetuned   = unknown
 llm_load_print_meta: ssm_d_conv       = 0
 llm_load_print_meta: ssm_d_inner      = 0
 llm_load_print_meta: ssm_d_state      = 0
 llm_load_print_meta: ssm_dt_rank      = 0
 llm_load_print_meta: ssm_dt_b_c_rms   = 0
 llm_load_print_meta: model type       = ?B
 llm_load_print_meta: model ftype      = Q4_K - Medium
 llm_load_print_meta: model params     = 988.06 M
 llm_load_print_meta: model size       = 820.44 MiB (6.97 BPW) 
 llm_load_print_meta: general.name     = Sailor2 1B Chat
 llm_load_print_meta: BOS token        = 151643 '<|endoftext|>'
 llm_load_print_meta: EOS token        = 151645 '<|im_end|>'
 llm_load_print_meta: PAD token        = 151643 '<|endoftext|>'
 llm_load_print_meta: LF token         = 148848 'ÄĬ'
 llm_load_print_meta: EOT token        = 151645 '<|im_end|>'
 llm_load_print_meta: EOG token        = 151643 '<|endoftext|>'
 llm_load_print_meta: EOG token        = 151645 '<|im_end|>'
 llm_load_print_meta: max token length = 256
 llm_load_tensors: ggml ctx size =    0.25 MiB
 llm_load_tensors: offloading 0 repeating layers to GPU
 llm_load_tensors: offloaded 0/49 layers to GPU
 llm_load_tensors:        CPU buffer size =   820.44 MiB
 ............................................................
 llama_new_context_with_model: n_ctx      = 512
 llama_new_context_with_model: n_batch    = 512
 llama_new_context_with_model: n_ubatch   = 512
 llama_new_context_with_model: flash_attn = 0
 llama_new_context_with_model: freq_base  = 1000000.0
 llama_new_context_with_model: freq_scale = 1
 llama_kv_cache_init:        CPU KV buffer size =    12.00 MiB
 llama_new_context_with_model: KV self size  =   12.00 MiB, K (f16):    6.00 MiB, V (f16):    6.00 MiB
 llama_new_context_with_model:        CPU  output buffer size =     0.58 MiB
 llama_new_context_with_model:        CPU compute buffer size =   298.50 MiB
 llama_new_context_with_model: graph nodes  = 1686
 llama_new_context_with_model: graph splits = 770
 AVX = 0 | AVX_VNNI = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 0 | NEON = 1 | SVE = 0 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | RISCV_VECT = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | MATMUL_INT8 = 1 | LLAMAFILE = 1 | 
 Model metadata: {'tokenizer.chat_template': "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", 'tokenizer.ggml.add_bos_token': 'false', 'tokenizer.ggml.padding_token_id': '151643', 'tokenizer.ggml.eos_token_id': '151645', 'qwen2.attention.layer_norm_rms_epsilon': '0.000001', 'general.basename': 'Sailor2', 'qwen2.attention.head_count_kv': '2', 'general.size_label': '1B', 'general.base_model.0.name': 'Sailor2 1B', 'qwen2.embedding_length': '896', 'qwen2.context_length': '32768', 'qwen2.block_count': '48', 'general.base_model.0.organization': 'Sail', 'tokenizer.ggml.pre': 'qwen2', 'general.base_model.count': '1', 'qwen2.rope.freq_base': '1000000.000000', 'general.quantization_version': '2', 'general.license': 'apache-2.0', 'general.base_model.0.repo_url': 'https://huggingface.co/sail/Sailor2-1B', 'general.file_type': '15', 'general.finetune': 'Chat', 'general.name': 'Sailor2 1B Chat', 'qwen2.feed_forward_length': '4864', 'general.architecture': 'qwen2', 'qwen2.attention.head_count': '14', 'tokenizer.ggml.bos_token_id': '151643', 'general.type': 'model', 'tokenizer.ggml.model': 'gpt2'}
 Available chat formats from metadata: chat_template.default
 Using gguf chat template: {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system
 You are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<|im_end|>
 ' }}{% endif %}{{'<|im_start|>' + message['role'] + '
 ' + message['content'] + '<|im_end|>' + '
 '}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
 ' }}{% endif %}
 Using chat eos_token: <|im_end|>
 Using chat bos_token: <|endoftext|>
 2025-01-06 10:21:31,952 - INFO - Combined dataset with balanced chunks written to /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/calibration-dataset.txt
 2025-01-06 10:21:31,962 - WARNING - raw_transactions_en.json contains only 0 samples, fewer than requested 500.
 2025-01-06 10:21:31,962 - WARNING - raw_transactions_zh.json contains only 0 samples, fewer than requested 500.
 2025-01-06 10:21:31,967 - INFO - raw_transactions_id.json contains 500 samples, meeting the requested count.
 2025-01-06 10:21:31,971 - INFO - raw_transactions_th.json contains 500 samples, meeting the requested count.
 2025-01-06 10:21:31,979 - INFO - raw_transactions_vi.json contains 500 samples, meeting the requested count.
 2025-01-06 10:21:31,979 - WARNING - raw_transactions_ms.json contains only 0 samples, fewer than requested 500.
 2025-01-06 10:21:31,987 - INFO - raw_transactions_lo.json contains 500 samples, meeting the requested count.
 2025-01-06 10:21:31,996 - INFO - raw_transactions_my.json contains 500 samples, meeting the requested count.
 2025-01-06 10:21:32,002 - INFO - raw_transactions_jv.json contains 500 samples, meeting the requested count.
 2025-01-06 10:21:32,006 - INFO - raw_transactions_km.json contains 500 samples, meeting the requested count.
 2025-01-06 10:21:32,011 - INFO - raw_transactions_su.json contains 500 samples, meeting the requested count.
 2025-01-06 10:21:32,016 - INFO - raw_transactions_tl.json contains 500 samples, meeting the requested count.
diff --git a/3. take the baseline b/3. take the baseline
 uv run ../src/kl_d_bench.py \
  --baseline-model $HF/Sailor2-1B-Chat/Sailor2-1B-Chat_bf16.gguf \
  --target-model $HF/Sailor2-1B-Chat/Sailor2-1B-Chat_Q4_K_M.gguf \
  --dataset $HF/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt \
  --batch-size 4096 --ubatch-size 4096 --context-size 4096 \
  --n-gpu-layers 99 --seed 42 --top-p 1 --top-k 1 --temp 0 \
  --early-stopping --compute-overall \
  --kld-precision 64 --model-precision 32 --parts 4 --verbosity DEBUG | tee baseline.log
 beta=63), stopping probability=0.000883
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7742)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.240241, p-value=0.000015
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2688938595122996, EMA_p_value_std_dev: 0.016285304607153703
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=771, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=771, beta=63), stopping probability=0.000909
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8165)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.302724, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.26851627416008733, EMA_p_value_std_dev: 0.016133462446776332
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=772, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=772, beta=63), stopping probability=0.000936
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0698)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.284739, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2680663488687824, EMA_p_value_std_dev: 0.015973730155451085
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=773, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=773, beta=63), stopping probability=0.000964
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8095)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.293976, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2666827292876756, EMA_p_value_std_dev: 0.01581446060608855
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=774, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=774, beta=63), stopping probability=0.000992
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2047)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.271693, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2648925872407296, EMA_p_value_std_dev: 0.0156567794572488
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=775, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=775, beta=63), stopping probability=0.001021
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7742)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.276494, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.26271040401176154, EMA_p_value_std_dev: 0.015500604329960628
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=776, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=776, beta=63), stopping probability=0.001051
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8092)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.241267, p-value=0.000014
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.26067552440929165, EMA_p_value_std_dev: 0.015346046001708951
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=777, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=777, beta=63), stopping probability=0.001082
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8765)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.231344, p-value=0.000042
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2586934636533882, EMA_p_value_std_dev: 0.01519315179176191
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=778, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=778, beta=63), stopping probability=0.001114
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0934)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.315993, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2572210098362066, EMA_p_value_std_dev: 0.015041782721332775
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=779, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=779, beta=63), stopping probability=0.001146
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8585)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.309270, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.255767728756761, EMA_p_value_std_dev: 0.014891924009030642
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=780, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=780, beta=63), stopping probability=0.001179
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1832)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.288047, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2541088720318699, EMA_p_value_std_dev: 0.014743560327191102
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=781, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=781, beta=63), stopping probability=0.001213
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0072)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.298400, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.25244715671843315, EMA_p_value_std_dev: 0.014596682070996304
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=782, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=782, beta=63), stopping probability=0.001248
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9237)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.260753, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.250503843583963, EMA_p_value_std_dev: 0.014451085723711444
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=783, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=783, beta=63), stopping probability=0.001283
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9022)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.283089, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24878428755671259, EMA_p_value_std_dev: 0.014306941634020093
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=784, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=784, beta=63), stopping probability=0.001320
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8552)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.237844, p-value=0.000020
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24733916910617576, EMA_p_value_std_dev: 0.014164318940434193
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=785, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=785, beta=63), stopping probability=0.001358
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9908)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.311142, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24643247075686348, EMA_p_value_std_dev: 0.014023118944732595
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=786, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=786, beta=63), stopping probability=0.001396
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2203)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.255076, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24573709720374193, EMA_p_value_std_dev: 0.013883325035745154
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=787, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=787, beta=63), stopping probability=0.001435
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7459)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.220365, p-value=0.000139
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2451113709091207, EMA_p_value_std_dev: 0.013745440227385608
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=788, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=788, beta=63), stopping probability=0.001476
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7105)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.226885, p-value=0.000069
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24427054037170579, EMA_p_value_std_dev: 0.013608917223079888
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=789, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=789, beta=63), stopping probability=0.001517
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9023)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.241110, p-value=0.000014
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24283904748523719, EMA_p_value_std_dev: 0.013473763705463285
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=790, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=790, beta=63), stopping probability=0.001560
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7047)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.179415, p-value=0.006465
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24151596773486267, EMA_p_value_std_dev: 0.013367956510830222
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=791, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=791, beta=63), stopping probability=0.001603
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8903)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.230674, p-value=0.000046
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24069167649245243, EMA_p_value_std_dev: 0.013263155355783304
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=792, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=792, beta=63), stopping probability=0.001648
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8513)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.224114, p-value=0.000093
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2398741778403336, EMA_p_value_std_dev: 0.013159448469866318
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=793, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=793, beta=63), stopping probability=0.001693
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7172)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.207256, p-value=0.000528
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.23911171899593944, EMA_p_value_std_dev: 0.013056339978072564
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=794, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=794, beta=63), stopping probability=0.001740
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9032)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.263216, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2384097402062265, EMA_p_value_std_dev: 0.012954276250620457
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=795, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=795, beta=63), stopping probability=0.001788
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6911)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.166299, p-value=0.017993
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2378173721703721, EMA_p_value_std_dev: 0.012904608371642319
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=796, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=796, beta=63), stopping probability=0.001837
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9540)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.225186, p-value=0.000083
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2383331629303817, EMA_p_value_std_dev: 0.012855392734947896
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=797, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=797, beta=63), stopping probability=0.001887
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8705)
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.230093, p-value=0.000049
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.23877735473612513, EMA_p_value_std_dev: 0.012806719320656527
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=798, beta=63
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=798, beta=63), stopping probability=0.001938
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Added chunk 25 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Added chunk 25 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Processing chunk 27
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Generating logits for model, chunk 27
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Processing chunks from 27 to 27
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 09:58:02 - DEBUG - Inference time: 5508.97 ms
 [llama_gguf_optmize v0.6.0] 09:58:02 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 09:58:02 - DEBUG - Reusing freed chunk 1 for chunk 27.
 [llama_gguf_optmize v0.6.0] 09:58:02 - DEBUG - Written chunk 27 at physical slot 1
 [llama_gguf_optmize v0.6.0] 09:58:02 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [27] 5966.55 ms 
 [llama_gguf_optmize v0.6.0] 09:58:02 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 09:58:02 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 09:58:02 - INFO - Generating logits for model, chunk 27
 [llama_gguf_optmize v0.6.0] 09:58:02 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 09:58:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:58:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:58:03 - INFO - Processing chunks from 27 to 27
 [llama_gguf_optmize v0.6.0] 09:58:03 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 09:58:03 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 09:58:03 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 09:58:05 - DEBUG - Inference time: 2433.17 ms
 [llama_gguf_optmize v0.6.0] 09:58:05 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 09:58:05 - DEBUG - Reusing freed chunk 1 for chunk 27.
 [llama_gguf_optmize v0.6.0] 09:58:05 - DEBUG - Written chunk 27 at physical slot 1
 [llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [27] 2864.40 ms 
 [llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Comparing logits for chunk 27
 [llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Processing chunks 27 to 27...
 [llama_gguf_optmize v0.6.0] 09:58:07 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 09:58:12 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 09:58:16 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 09:58:21 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - 
 ===== KL-divergence statistics for Chunk 27 =====
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Average : 0.017473
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - StdDev  : 0.031769
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Median  : 0.009938
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Maximum : 1.188030
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_99  : 0.123103
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_95  : 0.059611
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_90  : 0.039959
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_10  : 0.000158
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_05  : 0.000040
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_01  : 0.000006
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Samples seen: 110464
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8017)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.272066, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.23888920872313707, EMA_p_value_std_dev: 0.01275909250729215
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=799, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=799, beta=63), stopping probability=0.001991
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9508)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.268840, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2377259615997975, EMA_p_value_std_dev: 0.0127119413071079
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=800, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=800, beta=63), stopping probability=0.002045
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6981)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.252049, p-value=0.000004
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.23591169572546333, EMA_p_value_std_dev: 0.01258551252350339
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=801, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=801, beta=63), stopping probability=0.002100
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7505)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.223127, p-value=0.000104
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.23446352186955832, EMA_p_value_std_dev: 0.01246042462392792
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=802, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=802, beta=63), stopping probability=0.002156
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6757)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.182211, p-value=0.005130
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2330309860214457, EMA_p_value_std_dev: 0.012358898145795579
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=803, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=803, beta=63), stopping probability=0.002214
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0154)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.217157, p-value=0.000195
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.23213882015305, EMA_p_value_std_dev: 0.012258177610971501
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=804, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=804, beta=63), stopping probability=0.002273
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8828)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.222165, p-value=0.000115
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2312065479553198, EMA_p_value_std_dev: 0.012158329936266579
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=805, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=805, beta=63), stopping probability=0.002334
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8292)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.213186, p-value=0.000292
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.23000529998341054, EMA_p_value_std_dev: 0.012059159058411576
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=806, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=806, beta=63), stopping probability=0.002396
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9065)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.251223, p-value=0.000004
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.22879749687625603, EMA_p_value_std_dev: 0.011961100797814749
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=807, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=807, beta=63), stopping probability=0.002459
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8110)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.246411, p-value=0.000007
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.22699861546616754, EMA_p_value_std_dev: 0.01184302407408586
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=808, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=808, beta=63), stopping probability=0.002524
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7232)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.215461, p-value=0.000232
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2256015762923993, EMA_p_value_std_dev: 0.01172618852665655
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=809, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=809, beta=63), stopping probability=0.002590
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7150)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.213783, p-value=0.000275
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2241247424997084, EMA_p_value_std_dev: 0.011610660680713055
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=810, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=810, beta=63), stopping probability=0.002658
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7476)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.202093, p-value=0.000869
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.22233142482444823, EMA_p_value_std_dev: 0.011498368123227137
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=811, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=811, beta=63), stopping probability=0.002727
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7112)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.198473, p-value=0.001221
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.22055133791234857, EMA_p_value_std_dev: 0.0113887046849472
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=812, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=812, beta=63), stopping probability=0.002798
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7828)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.193870, p-value=0.001861
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2185829190246973, EMA_p_value_std_dev: 0.011281912535895328
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=813, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=813, beta=63), stopping probability=0.002870
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6570)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.240256, p-value=0.000015
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2170139478479097, EMA_p_value_std_dev: 0.011176752113465757
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=814, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=814, beta=63), stopping probability=0.002944
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6815)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.249868, p-value=0.000005
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2154057105397977, EMA_p_value_std_dev: 0.011073237280591591
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=815, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=815, beta=63), stopping probability=0.003020
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7612)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.221196, p-value=0.000127
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2141628531260812, EMA_p_value_std_dev: 0.0109712527302956
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=816, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=816, beta=63), stopping probability=0.003097
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9144)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.303059, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21386197728605422, EMA_p_value_std_dev: 0.010869969613415723
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=817, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=817, beta=63), stopping probability=0.003176
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6848)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.216198, p-value=0.000215
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21374136953118952, EMA_p_value_std_dev: 0.010762493050285725
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=818, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=818, beta=63), stopping probability=0.003257
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9191)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.205106, p-value=0.000651
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21363601376680427, EMA_p_value_std_dev: 0.010657807388672646
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=819, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=819, beta=63), stopping probability=0.003340
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8246)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.209633, p-value=0.000417
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21325782906615226, EMA_p_value_std_dev: 0.010554049380292438
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=820, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=820, beta=63), stopping probability=0.003424
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7985)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.205940, p-value=0.000600
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21178367703251127, EMA_p_value_std_dev: 0.01045148023900518
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=821, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=821, beta=63), stopping probability=0.003511
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7514)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.146186, p-value=0.070638
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2103286351849488, EMA_p_value_std_dev: 0.010660246241518052
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=822, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=822, beta=63), stopping probability=0.003599
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8360)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.260919, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21039756231697235, EMA_p_value_std_dev: 0.010867173745141422
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=823, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=823, beta=63), stopping probability=0.003689
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8516)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.229992, p-value=0.000049
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21082692068513115, EMA_p_value_std_dev: 0.011072707527303162
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=824, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=824, beta=63), stopping probability=0.003781
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8691)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.218698, p-value=0.000166
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21129115878588434, EMA_p_value_std_dev: 0.01127647064766372
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=825, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=825, beta=63), stopping probability=0.003875
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8716)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.280307, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2117191411337825, EMA_p_value_std_dev: 0.01147886222050015
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=826, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=826, beta=63), stopping probability=0.003971
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7481)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.206746, p-value=0.000555
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21144490837360805, EMA_p_value_std_dev: 0.011366693220233369
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=827, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=827, beta=63), stopping probability=0.004069
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9878)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.198536, p-value=0.001214
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.210901808443724, EMA_p_value_std_dev: 0.011258356569532802
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=828, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=828, beta=63), stopping probability=0.004169
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7435)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.226841, p-value=0.000070
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21071404001394126, EMA_p_value_std_dev: 0.011151066430182337
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=829, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=829, beta=63), stopping probability=0.004272
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8910)
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.262072, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2101031538088608, EMA_p_value_std_dev: 0.011045086710279017
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=830, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=830, beta=63), stopping probability=0.004376
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Added chunk 26 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Added chunk 26 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Processing chunk 28
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Generating logits for model, chunk 28
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Processing chunks from 28 to 28
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 09:58:32 - DEBUG - Inference time: 5506.24 ms
 [llama_gguf_optmize v0.6.0] 09:58:32 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 09:58:32 - DEBUG - Reusing freed chunk 0 for chunk 28.
 [llama_gguf_optmize v0.6.0] 09:58:32 - DEBUG - Written chunk 28 at physical slot 0
 [llama_gguf_optmize v0.6.0] 09:58:32 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [28] 5961.03 ms 
 [llama_gguf_optmize v0.6.0] 09:58:32 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 09:58:32 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 09:58:32 - INFO - Generating logits for model, chunk 28
 [llama_gguf_optmize v0.6.0] 09:58:32 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 09:58:32 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:58:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:58:33 - INFO - Processing chunks from 28 to 28
 [llama_gguf_optmize v0.6.0] 09:58:33 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 09:58:33 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 09:58:33 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 09:58:35 - DEBUG - Inference time: 2387.65 ms
 [llama_gguf_optmize v0.6.0] 09:58:35 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 09:58:35 - DEBUG - Reusing freed chunk 0 for chunk 28.
 [llama_gguf_optmize v0.6.0] 09:58:35 - DEBUG - Written chunk 28 at physical slot 0
 [llama_gguf_optmize v0.6.0] 09:58:35 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [28] 2827.37 ms 
 [llama_gguf_optmize v0.6.0] 09:58:35 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 09:58:35 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 09:58:36 - INFO - Comparing logits for chunk 28
 [llama_gguf_optmize v0.6.0] 09:58:36 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 09:58:36 - INFO - Processing chunks 28 to 28...
 [llama_gguf_optmize v0.6.0] 09:58:36 - DEBUG - Processing chunk 0, part 0
 [llama_gguf_optmize v0.6.0] 09:58:41 - DEBUG - Processing chunk 0, part 1
 [llama_gguf_optmize v0.6.0] 09:58:46 - DEBUG - Processing chunk 0, part 2
 [llama_gguf_optmize v0.6.0] 09:58:51 - DEBUG - Processing chunk 0, part 3
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - 
 ===== KL-divergence statistics for Chunk 28 =====
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Average : 0.018790
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - StdDev  : 0.031870
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Median  : 0.010542
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Maximum : 1.052105
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_99  : 0.130811
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_95  : 0.064162
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_90  : 0.044110
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_10  : 0.000147
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_05  : 0.000041
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_01  : 0.000005
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Samples seen: 114560
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6217)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.190697, p-value=0.002472
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20951070129353994, EMA_p_value_std_dev: 0.010945107437383847
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=831, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=831, beta=63), stopping probability=0.004483
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5423)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.169638, p-value=0.014000
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20901211074394935, EMA_p_value_std_dev: 0.010895055688396448
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=832, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=832, beta=63), stopping probability=0.004592
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8205)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.225417, p-value=0.000081
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20907457906126226, EMA_p_value_std_dev: 0.01084681810443832
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=833, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=833, beta=63), stopping probability=0.004703
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8936)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.257672, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20909066633951992, EMA_p_value_std_dev: 0.010799153042659758
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=834, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=834, beta=63), stopping probability=0.004816
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8596)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.302350, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20873611212024348, EMA_p_value_std_dev: 0.010751964876789349
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=835, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=835, beta=63), stopping probability=0.004932
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0615)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.207984, p-value=0.000491
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20905847756135215, EMA_p_value_std_dev: 0.010706560816431494
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=836, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=836, beta=63), stopping probability=0.005050
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9975)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.317972, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20962640714475958, EMA_p_value_std_dev: 0.01060189091144209
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=837, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=837, beta=63), stopping probability=0.005170
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9478)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.249355, p-value=0.000005
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.21038511192913664, EMA_p_value_std_dev: 0.010498321272010263
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=838, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=838, beta=63), stopping probability=0.005293
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7482)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.280567, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.2110762777034972, EMA_p_value_std_dev: 0.010395786720460145
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=839, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=839, beta=63), stopping probability=0.005419
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9465)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.254116, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.21102228327932743, EMA_p_value_std_dev: 0.010294271719573868
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=840, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=840, beta=63), stopping probability=0.005547
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9963)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.248135, p-value=0.000006
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.21003657408922335, EMA_p_value_std_dev: 0.010191614255310125
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=841, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=841, beta=63), stopping probability=0.005678
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8382)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.253306, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20856413372106217, EMA_p_value_std_dev: 0.010089975923599613
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=842, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=842, beta=63), stopping probability=0.005811
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7913)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.218225, p-value=0.000174
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20712294597503422, EMA_p_value_std_dev: 0.009990092008236688
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=843, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=843, beta=63), stopping probability=0.005947
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7314)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.225749, p-value=0.000078
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20547225195411664, EMA_p_value_std_dev: 0.009891189247641074
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=844, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=844, beta=63), stopping probability=0.006085
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9878)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.211992, p-value=0.000330
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20397804206542478, EMA_p_value_std_dev: 0.009793893057693915
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=845, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=845, beta=63), stopping probability=0.006227
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8821)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.215932, p-value=0.000221
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20249593100570495, EMA_p_value_std_dev: 0.009697460365988828
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=846, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=846, beta=63), stopping probability=0.006371
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7559)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.233789, p-value=0.000032
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20095921213445803, EMA_p_value_std_dev: 0.00960190294376644
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=847, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=847, beta=63), stopping probability=0.006518
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6696)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.231183, p-value=0.000043
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19936908051653568, EMA_p_value_std_dev: 0.009507418345900697
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=848, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=848, beta=63), stopping probability=0.006667
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7902)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.150130, p-value=0.055099
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.1982094797079848, EMA_p_value_std_dev: 0.009657679550404722
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=849, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=849, beta=63), stopping probability=0.006820
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8530)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.238366, p-value=0.000019
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19859936433372627, EMA_p_value_std_dev: 0.009806786556793515
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=850, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=850, beta=63), stopping probability=0.006976
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8141)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.226231, p-value=0.000074
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19886634819964819, EMA_p_value_std_dev: 0.009954568337186538
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=851, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=851, beta=63), stopping probability=0.007134
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7906)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.246177, p-value=0.000008
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.1993195911392359, EMA_p_value_std_dev: 0.01010090342341853
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=852, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=852, beta=63), stopping probability=0.007296
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8883)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.216912, p-value=0.000200
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19890080321034134, EMA_p_value_std_dev: 0.010245605316422634
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=853, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=853, beta=63), stopping probability=0.007461
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7475)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.230223, p-value=0.000048
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19768773877895596, EMA_p_value_std_dev: 0.010144174593233182
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=854, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=854, beta=63), stopping probability=0.007629
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6878)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.193731, p-value=0.001885
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19676437941729086, EMA_p_value_std_dev: 0.010051058498849796
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=855, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=855, beta=63), stopping probability=0.007800
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9430)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.284455, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19649887582291267, EMA_p_value_std_dev: 0.009958961847875711
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=856, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=856, beta=63), stopping probability=0.007974
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7985)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.266932, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19602159591796153, EMA_p_value_std_dev: 0.009867792953801312
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=857, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=857, beta=63), stopping probability=0.008152
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7209)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.234544, p-value=0.000030
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19577299792613292, EMA_p_value_std_dev: 0.009777684781637324
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=858, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=858, beta=63), stopping probability=0.008333
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8464)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.269643, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19549371974477028, EMA_p_value_std_dev: 0.009688526957402787
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=859, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=859, beta=63), stopping probability=0.008517
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6068)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.249788, p-value=0.000005
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.194536324017785, EMA_p_value_std_dev: 0.009592010634012663
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=860, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=860, beta=63), stopping probability=0.008705
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8211)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.175310, p-value=0.009004
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19407273980890022, EMA_p_value_std_dev: 0.009536458185588514
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=861, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=861, beta=63), stopping probability=0.008896
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7137)
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.180015, p-value=0.006154
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.1933769920912596, EMA_p_value_std_dev: 0.009483881298940889
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=862, beta=63
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=862, beta=63), stopping probability=0.009091
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Added chunk 27 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Added chunk 27 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Processing chunk 29
 [llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Generating logits for model, chunk 29
 [llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 09:58:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:58:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:58:56 - INFO - Processing chunks from 29 to 29
 [llama_gguf_optmize v0.6.0] 09:58:56 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 09:58:56 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 09:58:56 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 09:59:01 - DEBUG - Inference time: 5540.58 ms
 [llama_gguf_optmize v0.6.0] 09:59:01 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 09:59:01 - DEBUG - Reusing freed chunk 1 for chunk 29.
 [llama_gguf_optmize v0.6.0] 09:59:01 - DEBUG - Written chunk 29 at physical slot 1
 [llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [29] 5976.15 ms 
 [llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Generating logits for model, chunk 29
 [llama_gguf_optmize v0.6.0] 09:59:02 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Processing chunks from 29 to 29
 [llama_gguf_optmize v0.6.0] 09:59:02 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 09:59:02 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 09:59:02 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 09:59:05 - DEBUG - Inference time: 2415.97 ms
 [llama_gguf_optmize v0.6.0] 09:59:05 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 09:59:05 - DEBUG - Reusing freed chunk 1 for chunk 29.
 [llama_gguf_optmize v0.6.0] 09:59:05 - DEBUG - Written chunk 29 at physical slot 1
 [llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [29] 2866.26 ms 
 [llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Comparing logits for chunk 29
 [llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Processing chunks 29 to 29...
 [llama_gguf_optmize v0.6.0] 09:59:06 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 09:59:11 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 09:59:16 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 09:59:21 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - 
 ===== KL-divergence statistics for Chunk 29 =====
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Average : 0.023875
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - StdDev  : 0.056183
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Median  : 0.015091
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Maximum : 2.612865
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_99  : 0.157537
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_95  : 0.073438
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_90  : 0.050380
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_10  : 0.000327
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_05  : 0.000100
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_01  : 0.000019
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Samples seen: 118656
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6817)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.198107, p-value=0.001263
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19250419732823, EMA_p_value_std_dev: 0.009429974574036395
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=863, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=863, beta=63), stopping probability=0.009290
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9477)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.191354, p-value=0.002332
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19149207027982196, EMA_p_value_std_dev: 0.009373102447043759
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=864, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=864, beta=63), stopping probability=0.009492
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8893)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.233536, p-value=0.000033
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19039336748115465, EMA_p_value_std_dev: 0.009316727154076557
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=865, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=865, beta=63), stopping probability=0.009697
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8413)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.188607, p-value=0.002970
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18990725785470047, EMA_p_value_std_dev: 0.009246758815976313
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=866, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=866, beta=63), stopping probability=0.009907
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7550)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.157864, p-value=0.032910
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18965997066580317, EMA_p_value_std_dev: 0.009294417125171518
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=867, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=867, beta=63), stopping probability=0.010120
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7233)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.150879, p-value=0.052502
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18942778557938633, EMA_p_value_std_dev: 0.009435999776592315
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=868, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=868, beta=63), stopping probability=0.010337
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0673)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.270116, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18990666483957971, EMA_p_value_std_dev: 0.009580280484148083
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=869, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=869, beta=63), stopping probability=0.010557
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8256)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.178165, p-value=0.007159
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1909349556356431, EMA_p_value_std_dev: 0.009711852292599184
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=870, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=870, beta=63), stopping probability=0.010782
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4929)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.178731, p-value=0.006836
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19145733548841087, EMA_p_value_std_dev: 0.009835847145944624
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=871, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=871, beta=63), stopping probability=0.011011
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9225)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.165091, p-value=0.019670
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19209455192114294, EMA_p_value_std_dev: 0.009946756899788231
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=872, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=872, beta=63), stopping probability=0.011244
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7165)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.170546, p-value=0.013062
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19101514846852571, EMA_p_value_std_dev: 0.009921315002228545
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=873, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=873, beta=63), stopping probability=0.011481
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7260)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.187866, p-value=0.003168
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18964943434900638, EMA_p_value_std_dev: 0.009886925246226244
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=874, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=874, beta=63), stopping probability=0.011722
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8075)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.184364, p-value=0.004279
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18831657632244825, EMA_p_value_std_dev: 0.009857143633315777
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=875, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=875, beta=63), stopping probability=0.011967
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6313)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.174575, p-value=0.009545
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1869100917177987, EMA_p_value_std_dev: 0.009826191611951373
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=876, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=876, beta=63), stopping probability=0.012216
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8036)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.175161, p-value=0.009111
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18544472899929018, EMA_p_value_std_dev: 0.009768786678885111
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=877, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=877, beta=63), stopping probability=0.012470
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6812)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.213393, p-value=0.000286
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18413913642847052, EMA_p_value_std_dev: 0.009711013731376105
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=878, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=878, beta=63), stopping probability=0.012728
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8652)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.202419, p-value=0.000842
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18296929777592766, EMA_p_value_std_dev: 0.009658027802929606
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=879, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=879, beta=63), stopping probability=0.012990
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7395)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.150711, p-value=0.053075
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1824757230181702, EMA_p_value_std_dev: 0.009780807204849412
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=880, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=880, beta=63), stopping probability=0.013257
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7075)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.122184, p-value=0.257722
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18256295661506186, EMA_p_value_std_dev: 0.010783983547278421
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=881, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=881, beta=63), stopping probability=0.013529
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0504)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.208116, p-value=0.000485
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1833455463398918, EMA_p_value_std_dev: 0.011788507768123214
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=882, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=882, beta=63), stopping probability=0.013805
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8642)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.162970, p-value=0.022953
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18439528609658437, EMA_p_value_std_dev: 0.012755740724192246
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=883, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=883, beta=63), stopping probability=0.014085
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7127)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.207980, p-value=0.000492
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18551219764948002, EMA_p_value_std_dev: 0.013713858443126038
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=884, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=884, beta=63), stopping probability=0.014371
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7147)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.191884, p-value=0.002225
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18647880947638623, EMA_p_value_std_dev: 0.014701582380537523
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=885, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=885, beta=63), stopping probability=0.014661
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7985)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.204411, p-value=0.000696
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1861452700391996, EMA_p_value_std_dev: 0.014653248065166546
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=886, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=886, beta=63), stopping probability=0.014956
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6098)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.193218, p-value=0.001974
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18523425792913542, EMA_p_value_std_dev: 0.014603766556867593
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=887, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=887, beta=63), stopping probability=0.015256
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6096)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.147813, p-value=0.063833
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1842810976268743, EMA_p_value_std_dev: 0.014736947085957325
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=888, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=888, beta=63), stopping probability=0.015560
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7767)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.180126, p-value=0.006098
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18386611316415907, EMA_p_value_std_dev: 0.0148631865644551
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=889, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=889, beta=63), stopping probability=0.015870
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6113)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.197855, p-value=0.001293
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18357931828594098, EMA_p_value_std_dev: 0.01498927689676091
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=890, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=890, beta=63), stopping probability=0.016185
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9883)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.187283, p-value=0.003332
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1832937378104956, EMA_p_value_std_dev: 0.015110977106029613
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=891, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=891, beta=63), stopping probability=0.016504
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7478)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.169445, p-value=0.014208
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1825287936952436, EMA_p_value_std_dev: 0.015221823418513202
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=892, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=892, beta=63), stopping probability=0.016829
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7156)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.139692, p-value=0.104077
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18171797062134473, EMA_p_value_std_dev: 0.015509202193431746
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=893, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=893, beta=63), stopping probability=0.017159
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8553)
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.196803, p-value=0.001425
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1814942148176164, EMA_p_value_std_dev: 0.01579938747071239
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=894, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=894, beta=63), stopping probability=0.017495
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Added chunk 28 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Added chunk 28 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Processing chunk 30
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Generating logits for model, chunk 30
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Processing chunks from 30 to 30
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 09:59:31 - DEBUG - Inference time: 5696.00 ms
 [llama_gguf_optmize v0.6.0] 09:59:31 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 09:59:31 - DEBUG - Reusing freed chunk 0 for chunk 30.
 [llama_gguf_optmize v0.6.0] 09:59:31 - DEBUG - Written chunk 30 at physical slot 0
 [llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [30] 6152.06 ms 
 [llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Generating logits for model, chunk 30
 [llama_gguf_optmize v0.6.0] 09:59:32 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Processing chunks from 30 to 30
 [llama_gguf_optmize v0.6.0] 09:59:32 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 09:59:32 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 09:59:32 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 09:59:35 - DEBUG - Inference time: 2489.70 ms
 [llama_gguf_optmize v0.6.0] 09:59:35 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 09:59:35 - DEBUG - Reusing freed chunk 0 for chunk 30.
 [llama_gguf_optmize v0.6.0] 09:59:35 - DEBUG - Written chunk 30 at physical slot 0
 [llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [30] 2948.58 ms 
 [llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Comparing logits for chunk 30
 [llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Processing chunks 30 to 30...
 [llama_gguf_optmize v0.6.0] 09:59:36 - DEBUG - Processing chunk 0, part 0
 [llama_gguf_optmize v0.6.0] 09:59:41 - DEBUG - Processing chunk 0, part 1
 [llama_gguf_optmize v0.6.0] 09:59:46 - DEBUG - Processing chunk 0, part 2
 [llama_gguf_optmize v0.6.0] 09:59:51 - DEBUG - Processing chunk 0, part 3
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - 
 ===== KL-divergence statistics for Chunk 30 =====
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Average : 0.023869
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - StdDev  : 0.050627
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Median  : 0.013036
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Maximum : 1.599863
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_99  : 0.216543
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_95  : 0.081015
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_90  : 0.052226
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_10  : 0.000090
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_05  : 0.000027
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_01  : 0.000002
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Samples seen: 122752
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6836)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.146891, p-value=0.067620
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18191758039599043, EMA_p_value_std_dev: 0.01609751092266995
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=895, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=895, beta=63), stopping probability=0.017836
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0542)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.178032, p-value=0.007236
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.1825729772447379, EMA_p_value_std_dev: 0.016385522095396027
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=896, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=896, beta=63), stopping probability=0.018182
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7627)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.211714, p-value=0.000339
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18323875641332094, EMA_p_value_std_dev: 0.01669335300483294
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=897, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=897, beta=63), stopping probability=0.018533
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8204)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.202158, p-value=0.000863
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18328449294404353, EMA_p_value_std_dev: 0.016818806078764494
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=898, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=898, beta=63), stopping probability=0.018890
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6627)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.235876, p-value=0.000025
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18310875543887137, EMA_p_value_std_dev: 0.01694474682384702
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=899, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=899, beta=63), stopping probability=0.019253
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0013)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.224845, p-value=0.000086
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18237319873303012, EMA_p_value_std_dev: 0.01680671345466865
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=900, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=900, beta=63), stopping probability=0.019621
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0104)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.245632, p-value=0.000008
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18133880922474546, EMA_p_value_std_dev: 0.016642658557545428
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=901, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=901, beta=63), stopping probability=0.019995
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7857)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.156054, p-value=0.037251
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18101223181656273, EMA_p_value_std_dev: 0.016641765154245237
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=902, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=902, beta=63), stopping probability=0.020375
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8351)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.156545, p-value=0.036027
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18018531859532455, EMA_p_value_std_dev: 0.016675770961532072
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=903, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=903, beta=63), stopping probability=0.020761
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6924)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.180531, p-value=0.005898
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17976517812919693, EMA_p_value_std_dev: 0.016700238700806735
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=904, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=904, beta=63), stopping probability=0.021152
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7732)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.204033, p-value=0.000722
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17949049107576262, EMA_p_value_std_dev: 0.01672317322229408
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=905, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=905, beta=63), stopping probability=0.021549
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8893)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.201695, p-value=0.000902
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17842482482329572, EMA_p_value_std_dev: 0.016744037859040488
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=906, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=906, beta=63), stopping probability=0.021953
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7264)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.156620, p-value=0.035845
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17799823107031182, EMA_p_value_std_dev: 0.016760828618271557
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=907, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=907, beta=63), stopping probability=0.022362
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7479)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.161100, p-value=0.026240
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17710937488216924, EMA_p_value_std_dev: 0.016754519298730387
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=908, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=908, beta=63), stopping probability=0.022777
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7171)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.184654, p-value=0.004175
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.1762893677866171, EMA_p_value_std_dev: 0.016750576978363848
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=909, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=909, beta=63), stopping probability=0.023199
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5743)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.161655, p-value=0.025223
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17585805803058802, EMA_p_value_std_dev: 0.016734836458805854
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=910, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=910, beta=63), stopping probability=0.023627
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7675)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.215542, p-value=0.000230
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17556769456262067, EMA_p_value_std_dev: 0.016721211695255273
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=911, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=911, beta=63), stopping probability=0.024061
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0359)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.253651, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17559518984386258, EMA_p_value_std_dev: 0.016688050534628768
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=912, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=912, beta=63), stopping probability=0.024502
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7348)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.254734, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17539173679611128, EMA_p_value_std_dev: 0.016630636361397035
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=913, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=913, beta=63), stopping probability=0.024949
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8462)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.153288, p-value=0.044845
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17605271581834617, EMA_p_value_std_dev: 0.01666787728521364
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=914, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=914, beta=63), stopping probability=0.025402
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1379)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.211264, p-value=0.000355
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17687509057339498, EMA_p_value_std_dev: 0.016701011960392448
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=915, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=915, beta=63), stopping probability=0.025862
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1191)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.240381, p-value=0.000015
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17754332413638754, EMA_p_value_std_dev: 0.016734055556191132
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=916, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=916, beta=63), stopping probability=0.026329
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7390)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.257402, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17828764857511964, EMA_p_value_std_dev: 0.016766770833788303
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=917, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=917, beta=63), stopping probability=0.026802
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1200)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.290751, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17806872651282796, EMA_p_value_std_dev: 0.01679916277273027
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=918, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=918, beta=63), stopping probability=0.027282
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.8495)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.601933, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.1787127247141417, EMA_p_value_std_dev: 0.01663315596681239
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=919, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=919, beta=63), stopping probability=0.027769
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3067)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.335889, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.1804889955339877, EMA_p_value_std_dev: 0.016467305982880893
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=920, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=920, beta=63), stopping probability=0.028263
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8563)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.248336, p-value=0.000006
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18281142111731657, EMA_p_value_std_dev: 0.016303070031760385
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=921, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=921, beta=63), stopping probability=0.028763
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0688)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.282558, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18512093100649699, EMA_p_value_std_dev: 0.01614047302107115
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=922, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=922, beta=63), stopping probability=0.029271
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9378)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.387011, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18635525129767042, EMA_p_value_std_dev: 0.015979497939496382
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=923, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=923, beta=63), stopping probability=0.029786
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1062)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.302890, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18691711187031534, EMA_p_value_std_dev: 0.01582012858033542
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=924, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=924, beta=63), stopping probability=0.030307
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8930)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.390215, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18757687752328953, EMA_p_value_std_dev: 0.015662348930552978
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=925, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=925, beta=63), stopping probability=0.030836
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0362)
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.359070, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18815794118822118, EMA_p_value_std_dev: 0.015506117300052701
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=926, beta=63
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=926, beta=63), stopping probability=0.031372
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Added chunk 29 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Added chunk 29 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Processing chunk 31
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Generating logits for model, chunk 31
 [llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:59:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 09:59:56 - INFO - Processing chunks from 31 to 31
 [llama_gguf_optmize v0.6.0] 09:59:56 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 09:59:56 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 09:59:56 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:00:01 - DEBUG - Inference time: 5557.11 ms
 [llama_gguf_optmize v0.6.0] 10:00:01 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:00:01 - DEBUG - Reusing freed chunk 1 for chunk 31.
 [llama_gguf_optmize v0.6.0] 10:00:01 - DEBUG - Written chunk 31 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [31] 6070.42 ms 
 [llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Generating logits for model, chunk 31
 [llama_gguf_optmize v0.6.0] 10:00:02 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Processing chunks from 31 to 31
 [llama_gguf_optmize v0.6.0] 10:00:02 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:00:02 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:00:02 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:00:05 - DEBUG - Inference time: 2426.35 ms
 [llama_gguf_optmize v0.6.0] 10:00:05 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:00:05 - DEBUG - Reusing freed chunk 1 for chunk 31.
 [llama_gguf_optmize v0.6.0] 10:00:05 - DEBUG - Written chunk 31 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [31] 2870.95 ms 
 [llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Comparing logits for chunk 31
 [llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Processing chunks 31 to 31...
 [llama_gguf_optmize v0.6.0] 10:00:06 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 10:00:11 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 10:00:16 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 10:00:21 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - 
 ===== KL-divergence statistics for Chunk 31 =====
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Average : 0.019687
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - StdDev  : 0.036965
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Median  : 0.010690
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Maximum : 0.851223
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_99  : 0.159542
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_95  : 0.067962
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_90  : 0.044213
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_10  : 0.000060
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_05  : 0.000011
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_01  : 0.000001
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Samples seen: 126848
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6927)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.238384, p-value=0.000019
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18869146416694635, EMA_p_value_std_dev: 0.015351528723005109
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=927, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=927, beta=63), stopping probability=0.031916
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7856)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.250618, p-value=0.000004
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.188669287332136, EMA_p_value_std_dev: 0.015198479479940036
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=928, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=928, beta=63), stopping probability=0.032467
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7349)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.230823, p-value=0.000045
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1878636685077469, EMA_p_value_std_dev: 0.015047065208510057
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=929, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=929, beta=63), stopping probability=0.033025
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0293)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.263164, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18729181396684863, EMA_p_value_std_dev: 0.014897159688416437
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=930, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=930, beta=63), stopping probability=0.033591
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8292)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.269381, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18611502917891015, EMA_p_value_std_dev: 0.014748748737977036
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=931, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=931, beta=63), stopping probability=0.034164
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7559)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.211306, p-value=0.000353
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1853762768855138, EMA_p_value_std_dev: 0.01460316079725557
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=932, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=932, beta=63), stopping probability=0.034745
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8171)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.262061, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1851869174773789, EMA_p_value_std_dev: 0.01445902937148727
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=933, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=933, beta=63), stopping probability=0.035333
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1562)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.370594, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18559050279986408, EMA_p_value_std_dev: 0.014316373841511644
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=934, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=934, beta=63), stopping probability=0.035930
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2742)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.376297, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18597982104166252, EMA_p_value_std_dev: 0.014175142328005926
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=935, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=935, beta=63), stopping probability=0.036534
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9316)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.212418, p-value=0.000316
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18713519073508453, EMA_p_value_std_dev: 0.014035576080494888
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=936, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=936, beta=63), stopping probability=0.037146
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0650)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.277339, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1885239783351104, EMA_p_value_std_dev: 0.013896978986022341
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=937, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=937, beta=63), stopping probability=0.037765
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5680)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.175611, p-value=0.008791
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1897216407257481, EMA_p_value_std_dev: 0.013797246170931674
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=938, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=938, beta=63), stopping probability=0.038393
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1007)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.273884, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1919089277277359, EMA_p_value_std_dev: 0.013698507929875746
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=939, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=939, beta=63), stopping probability=0.039029
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8968)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.371589, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1937795216325741, EMA_p_value_std_dev: 0.013600754602774327
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=940, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=940, beta=63), stopping probability=0.039673
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1806)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.407451, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19537137030485127, EMA_p_value_std_dev: 0.013504304720442952
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=941, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=941, beta=63), stopping probability=0.040325
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0034)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.328915, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19629573535305053, EMA_p_value_std_dev: 0.013408817081769704
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=942, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=942, beta=63), stopping probability=0.040985
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1941)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.226067, p-value=0.000076
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19679364326751483, EMA_p_value_std_dev: 0.013275401296363945
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=943, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=943, beta=63), stopping probability=0.041654
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5288)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.146160, p-value=0.070754
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19728251730767662, EMA_p_value_std_dev: 0.013458525256111017
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=944, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=944, beta=63), stopping probability=0.042331
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5380)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.146032, p-value=0.071314
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19764756904946143, EMA_p_value_std_dev: 0.013712241403840515
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=945, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=945, beta=63), stopping probability=0.043016
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8229)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.258164, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19912607221536155, EMA_p_value_std_dev: 0.013963423554145338
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=946, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=946, beta=63), stopping probability=0.043710
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6967)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.177329, p-value=0.007660
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20059143090287126, EMA_p_value_std_dev: 0.014199435625683604
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=947, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=947, beta=63), stopping probability=0.044412
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6770)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.152317, p-value=0.047805
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20124690892408376, EMA_p_value_std_dev: 0.014397233723543287
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=948, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=948, beta=63), stopping probability=0.045123
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5802)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.104288, p-value=0.520621
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20272264329982842, EMA_p_value_std_dev: 0.016454065405029906
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=949, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=949, beta=63), stopping probability=0.045842
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6867)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.246912, p-value=0.000007
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20492957581962032, EMA_p_value_std_dev: 0.01855918477469726
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=950, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=950, beta=63), stopping probability=0.046570
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8449)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.227609, p-value=0.000064
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20639210885204304, EMA_p_value_std_dev: 0.02064322704619125
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=951, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=951, beta=63), stopping probability=0.047307
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6280)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.221255, p-value=0.000127
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20754218096397742, EMA_p_value_std_dev: 0.022715594957562986
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=952, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=952, beta=63), stopping probability=0.048053
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6059)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.143206, p-value=0.084672
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20882092086249684, EMA_p_value_std_dev: 0.02474659862293689
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=953, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=953, beta=63), stopping probability=0.048807
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1033)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.310659, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20984029700613635, EMA_p_value_std_dev: 0.02487725026810016
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=954, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=954, beta=63), stopping probability=0.049570
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9476)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.251940, p-value=0.000004
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.21126444767400612, EMA_p_value_std_dev: 0.025006602228985837
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=955, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=955, beta=63), stopping probability=0.050343
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9842)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.290263, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.2128288537198804, EMA_p_value_std_dev: 0.025134735218726372
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=956, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=956, beta=63), stopping probability=0.051124
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9564)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.256887, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.21358030064543895, EMA_p_value_std_dev: 0.02526172871393049
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=957, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=957, beta=63), stopping probability=0.051915
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0224)
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.349671, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.21331913525365526, EMA_p_value_std_dev: 0.02500976000807308
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=958, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=958, beta=63), stopping probability=0.052714
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Added chunk 30 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Added chunk 30 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Processing chunk 32
 [llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Generating logits for model, chunk 32
 [llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:00:26 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:00:26 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:00:26 - INFO - Processing chunks from 32 to 32
 [llama_gguf_optmize v0.6.0] 10:00:26 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:00:26 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:00:26 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:00:31 - DEBUG - Inference time: 5488.69 ms
 [llama_gguf_optmize v0.6.0] 10:00:31 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:00:31 - DEBUG - Reusing freed chunk 0 for chunk 32.
 [llama_gguf_optmize v0.6.0] 10:00:31 - DEBUG - Written chunk 32 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:00:32 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [32] 6178.54 ms 
 [llama_gguf_optmize v0.6.0] 10:00:32 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:00:32 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:00:32 - INFO - Generating logits for model, chunk 32
 [llama_gguf_optmize v0.6.0] 10:00:32 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:00:32 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:00:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:00:33 - INFO - Processing chunks from 32 to 32
 [llama_gguf_optmize v0.6.0] 10:00:33 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:00:33 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:00:33 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:00:35 - DEBUG - Inference time: 2858.16 ms
 [llama_gguf_optmize v0.6.0] 10:00:35 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:00:35 - DEBUG - Reusing freed chunk 0 for chunk 32.
 [llama_gguf_optmize v0.6.0] 10:00:35 - DEBUG - Written chunk 32 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [32] 3543.51 ms 
 [llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Comparing logits for chunk 32
 [llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Processing chunks 32 to 32...
 [llama_gguf_optmize v0.6.0] 10:00:37 - DEBUG - Processing chunk 0, part 0
 [llama_gguf_optmize v0.6.0] 10:00:42 - DEBUG - Processing chunk 0, part 1
 [llama_gguf_optmize v0.6.0] 10:00:47 - DEBUG - Processing chunk 0, part 2
 [llama_gguf_optmize v0.6.0] 10:00:52 - DEBUG - Processing chunk 0, part 3
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - 
 ===== KL-divergence statistics for Chunk 32 =====
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Average : 0.020281
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - StdDev  : 0.043231
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Median  : 0.012936
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Maximum : 0.956846
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_99  : 0.140956
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_95  : 0.057223
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_90  : 0.040363
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_10  : 0.000410
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_05  : 0.000092
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_01  : 0.000010
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Samples seen: 130944
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9363)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.144270, p-value=0.079416
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2139865103982876, EMA_p_value_std_dev: 0.025114553002234953
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=959, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=959, beta=63), stopping probability=0.053523
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1307)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.209281, p-value=0.000432
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21576477655766255, EMA_p_value_std_dev: 0.025217827721165816
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=960, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=960, beta=63), stopping probability=0.054341
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6833)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.261547, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21769290842066516, EMA_p_value_std_dev: 0.02532007102715168
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=961, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=961, beta=63), stopping probability=0.055168
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3918)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.206770, p-value=0.000553
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2191344637291135, EMA_p_value_std_dev: 0.025420683367937284
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=962, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=962, beta=63), stopping probability=0.056005
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4828)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.214770, p-value=0.000249
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21927621525563668, EMA_p_value_std_dev: 0.025520011981844892
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=963, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=963, beta=63), stopping probability=0.056851
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5855)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.178020, p-value=0.007243
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21887235287967335, EMA_p_value_std_dev: 0.02529645279486314
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=964, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=964, beta=63), stopping probability=0.057706
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5533)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.178721, p-value=0.006842
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21773994350509024, EMA_p_value_std_dev: 0.02508121338642607
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=965, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=965, beta=63), stopping probability=0.058571
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4723)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.227189, p-value=0.000067
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2166956447101478, EMA_p_value_std_dev: 0.024867990565742612
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=966, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=966, beta=63), stopping probability=0.059445
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3813)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.275148, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21630718876672925, EMA_p_value_std_dev: 0.02465787172244793
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=967, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=967, beta=63), stopping probability=0.060329
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3104)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.336502, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21611681253731457, EMA_p_value_std_dev: 0.02445029166453877
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=968, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=968, beta=63), stopping probability=0.061223
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4111)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.265237, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21618856050897417, EMA_p_value_std_dev: 0.024236847720903794
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=969, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=969, beta=63), stopping probability=0.062127
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4447)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.287081, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21599498600332812, EMA_p_value_std_dev: 0.02399538275546079
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=970, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=970, beta=63), stopping probability=0.063040
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4187)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.256671, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2154096227865109, EMA_p_value_std_dev: 0.02375603762378272
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=971, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=971, beta=63), stopping probability=0.063963
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5433)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.231241, p-value=0.000043
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21439422105291428, EMA_p_value_std_dev: 0.023519259149220588
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=972, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=972, beta=63), stopping probability=0.064895
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4854)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.163081, p-value=0.022770
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21349215294722396, EMA_p_value_std_dev: 0.023386180061999582
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=973, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=973, beta=63), stopping probability=0.065838
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1110)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.174863, p-value=0.009329
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2125865140733292, EMA_p_value_std_dev: 0.02325251098634077
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=974, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=974, beta=63), stopping probability=0.066791
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.8156)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.464435, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2140898874922575, EMA_p_value_std_dev: 0.023120175317511718
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=975, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=975, beta=63), stopping probability=0.067753
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2185)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.349152, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21613662896103558, EMA_p_value_std_dev: 0.02298916307486868
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=976, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=976, beta=63), stopping probability=0.068726
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5286)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.162162, p-value=0.024326
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21906443653978835, EMA_p_value_std_dev: 0.022877924878034604
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=977, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=977, beta=63), stopping probability=0.069708
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6833)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.141685, p-value=0.092671
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.22203022995622976, EMA_p_value_std_dev: 0.023038457894683253
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=978, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=978, beta=63), stopping probability=0.070701
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5829)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.076702, p-value=0.927508
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.22328496286891272, EMA_p_value_std_dev: 0.02683357316032534
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=979, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=979, beta=63), stopping probability=0.071704
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5891)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.180606, p-value=0.005861
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.22662163424772927, EMA_p_value_std_dev: 0.030583342654101738
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=980, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=980, beta=63), stopping probability=0.072717
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8566)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.286037, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2301054955948655, EMA_p_value_std_dev: 0.034295708142880986
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=981, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=981, beta=63), stopping probability=0.073741
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5148)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.242831, p-value=0.000011
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23358542332786353, EMA_p_value_std_dev: 0.038000364615506106
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=982, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=982, beta=63), stopping probability=0.074774
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7537)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.177871, p-value=0.007331
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23631508016882372, EMA_p_value_std_dev: 0.04174429208308859
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=983, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=983, beta=63), stopping probability=0.075818
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9021)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.207957, p-value=0.000493
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23650650839521895, EMA_p_value_std_dev: 0.04136345029514671
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=984, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=984, beta=63), stopping probability=0.076872
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9773)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.197064, p-value=0.001391
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23574396654117347, EMA_p_value_std_dev: 0.040981957503621325
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=985, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=985, beta=63), stopping probability=0.077937
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9060)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.295940, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2357163599359977, EMA_p_value_std_dev: 0.0406042701495125
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=986, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=986, beta=63), stopping probability=0.079012
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4868)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.302146, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23484456887271477, EMA_p_value_std_dev: 0.040230366882264335
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=987, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=987, beta=63), stopping probability=0.080098
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9032)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.348012, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23391652162477375, EMA_p_value_std_dev: 0.03983511048597645
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=988, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=988, beta=63), stopping probability=0.081194
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.6432)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.344232, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2328675651650862, EMA_p_value_std_dev: 0.039443959061218566
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=989, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=989, beta=63), stopping probability=0.082300
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6584)
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.335251, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2309232090152675, EMA_p_value_std_dev: 0.0390505056132884
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=990, beta=63
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=990, beta=63), stopping probability=0.083418
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Added chunk 31 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Added chunk 31 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Processing chunk 33
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Generating logits for model, chunk 33
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Processing chunks from 33 to 33
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:01:03 - DEBUG - Inference time: 5570.12 ms
 [llama_gguf_optmize v0.6.0] 10:01:03 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:01:03 - DEBUG - Reusing freed chunk 1 for chunk 33.
 [llama_gguf_optmize v0.6.0] 10:01:03 - DEBUG - Written chunk 33 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [33] 6050.90 ms 
 [llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Generating logits for model, chunk 33
 [llama_gguf_optmize v0.6.0] 10:01:04 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Processing chunks from 33 to 33
 [llama_gguf_optmize v0.6.0] 10:01:04 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:01:04 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:01:04 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:01:07 - DEBUG - Inference time: 2728.24 ms
 [llama_gguf_optmize v0.6.0] 10:01:07 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:01:07 - DEBUG - Reusing freed chunk 1 for chunk 33.
 [llama_gguf_optmize v0.6.0] 10:01:07 - DEBUG - Written chunk 33 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:01:07 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [33] 3170.56 ms 
 [llama_gguf_optmize v0.6.0] 10:01:07 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:01:07 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:01:08 - INFO - Comparing logits for chunk 33
 [llama_gguf_optmize v0.6.0] 10:01:08 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:01:08 - INFO - Processing chunks 33 to 33...
 [llama_gguf_optmize v0.6.0] 10:01:08 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 10:01:13 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 10:01:18 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 10:01:23 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - 
 ===== KL-divergence statistics for Chunk 33 =====
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Average : 0.017978
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - StdDev  : 0.094371
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Median  : 0.007920
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Maximum : 5.516145
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_99  : 0.141806
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_95  : 0.060467
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_90  : 0.037510
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_10  : 0.000070
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_05  : 0.000024
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_01  : 0.000002
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Samples seen: 135040
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6411)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.306680, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.229238792984905, EMA_p_value_std_dev: 0.038660976837533306
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=991, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=991, beta=63), stopping probability=0.084545
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9587)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.335894, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2275008583558687, EMA_p_value_std_dev: 0.03827533360247188
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=992, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=992, beta=63), stopping probability=0.085684
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8698)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.205334, p-value=0.000636
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22642311257617218, EMA_p_value_std_dev: 0.03789637622689043
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=993, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=993, beta=63), stopping probability=0.086833
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8143)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.287186, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22657812788005516, EMA_p_value_std_dev: 0.03752119891158167
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=994, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=994, beta=63), stopping probability=0.087993
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4877)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.460681, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22813538672665867, EMA_p_value_std_dev: 0.03714976399001603
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=995, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=995, beta=63), stopping probability=0.089163
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.01 (KL div: 2.0857)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.487324, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22963056576169025, EMA_p_value_std_dev: 0.036782034134029036
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=996, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=996, beta=63), stopping probability=0.090345
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.01 to 0.009999999999999998 (KL div: 1.3339)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.405774, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23028215367520405, EMA_p_value_std_dev: 0.03641797238337607
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=997, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=997, beta=63), stopping probability=0.091537
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0539)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.175940, p-value=0.008563
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23155537997974882, EMA_p_value_std_dev: 0.036092902321990514
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=998, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=998, beta=63), stopping probability=0.092740
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1686)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.284873, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2328345919098627, EMA_p_value_std_dev: 0.03577107481915819
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=999, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=999, beta=63), stopping probability=0.093953
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5660)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.223374, p-value=0.000101
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23441896678675472, EMA_p_value_std_dev: 0.03545234727672423
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1000, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1000, beta=63), stopping probability=0.095178
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7314)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.293233, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23641054425020835, EMA_p_value_std_dev: 0.035136799024698226
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1001, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1001, beta=63), stopping probability=0.096414
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7444)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.324802, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23693537406965479, EMA_p_value_std_dev: 0.034824398366355556
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1002, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1002, beta=63), stopping probability=0.097660
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7465)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.322545, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23618081606876756, EMA_p_value_std_dev: 0.03447747601056208
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1003, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1003, beta=63), stopping probability=0.098917
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7737)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.268203, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2352563142294785, EMA_p_value_std_dev: 0.0341340137322417
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1004, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1004, beta=63), stopping probability=0.100186
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6303)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.312766, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23415290470370978, EMA_p_value_std_dev: 0.03379352905831523
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1005, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1005, beta=63), stopping probability=0.101465
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6184)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.315261, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23268732902795386, EMA_p_value_std_dev: 0.03345644073407296
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1006, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1006, beta=63), stopping probability=0.102756
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2487)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.280510, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23162046365459313, EMA_p_value_std_dev: 0.033122714802005154
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1007, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1007, beta=63), stopping probability=0.104057
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5641)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.231879, p-value=0.000040
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2305703162002142, EMA_p_value_std_dev: 0.03279249295057837
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1008, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1008, beta=63), stopping probability=0.105369
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6324)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.367976, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2300719994004717, EMA_p_value_std_dev: 0.03246556558413072
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1009, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1009, beta=63), stopping probability=0.106693
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9036)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.380311, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22961042264367604, EMA_p_value_std_dev: 0.03214189931904001
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1010, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1010, beta=63), stopping probability=0.108028
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9168)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.263567, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22988999780263603, EMA_p_value_std_dev: 0.03182146068789297
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1011, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1011, beta=63), stopping probability=0.109373
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9082)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.417340, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23170197663002684, EMA_p_value_std_dev: 0.031504218535340515
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1012, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1012, beta=63), stopping probability=0.110730
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8475)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.331480, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23207751260281062, EMA_p_value_std_dev: 0.031189967856235744
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1013, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1013, beta=63), stopping probability=0.112098
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3253)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.508208, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23388925841975144, EMA_p_value_std_dev: 0.030878851827655117
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1014, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1014, beta=63), stopping probability=0.113477
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9557)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.799918, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2366826140302733, EMA_p_value_std_dev: 0.03057083918145959
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1015, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1015, beta=63), stopping probability=0.114867
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.01 (KL div: 2.1675)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.768038, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23785136526320838, EMA_p_value_std_dev: 0.03026589506079652
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1016, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1016, beta=63), stopping probability=0.116269
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.01 to 0.009999999999999998 (KL div: 0.7935)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.225815, p-value=0.000078
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24055131318786382, EMA_p_value_std_dev: 0.02996433951892635
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1017, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1017, beta=63), stopping probability=0.117681
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7249)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.221892, p-value=0.000118
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24134959893890842, EMA_p_value_std_dev: 0.02966599996858246
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1018, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1018, beta=63), stopping probability=0.119105
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9298)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.253720, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24052801280363847, EMA_p_value_std_dev: 0.029370631147999007
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1019, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1019, beta=63), stopping probability=0.120540
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9556)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.178115, p-value=0.007188
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24068100260732636, EMA_p_value_std_dev: 0.029109505153098722
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1020, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1020, beta=63), stopping probability=0.121986
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8815)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.264432, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24055713869275214, EMA_p_value_std_dev: 0.028850982988051155
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1021, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1021, beta=63), stopping probability=0.123444
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7463)
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.159542, p-value=0.029286
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24153930590698958, EMA_p_value_std_dev: 0.028689515013827844
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1022, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1022, beta=63), stopping probability=0.124912
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Added chunk 32 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Added chunk 32 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Processing chunk 34
 [llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Generating logits for model, chunk 34
 [llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:01:28 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:01:28 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:01:28 - INFO - Processing chunks from 34 to 34
 [llama_gguf_optmize v0.6.0] 10:01:28 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:01:28 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:01:28 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:01:33 - DEBUG - Inference time: 5468.46 ms
 [llama_gguf_optmize v0.6.0] 10:01:33 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:01:33 - DEBUG - Reusing freed chunk 0 for chunk 34.
 [llama_gguf_optmize v0.6.0] 10:01:33 - DEBUG - Written chunk 34 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [34] 5956.76 ms 
 [llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Generating logits for model, chunk 34
 [llama_gguf_optmize v0.6.0] 10:01:34 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Processing chunks from 34 to 34
 [llama_gguf_optmize v0.6.0] 10:01:34 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:01:34 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:01:34 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:01:37 - DEBUG - Inference time: 2377.65 ms
 [llama_gguf_optmize v0.6.0] 10:01:37 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:01:37 - DEBUG - Reusing freed chunk 0 for chunk 34.
 [llama_gguf_optmize v0.6.0] 10:01:37 - DEBUG - Written chunk 34 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [34] 2844.40 ms 
 [llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Comparing logits for chunk 34
 [llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Processing chunks 34 to 34...
 [llama_gguf_optmize v0.6.0] 10:01:38 - DEBUG - Processing chunk 0, part 0
 [llama_gguf_optmize v0.6.0] 10:01:43 - DEBUG - Processing chunk 0, part 1
 [llama_gguf_optmize v0.6.0] 10:01:48 - DEBUG - Processing chunk 0, part 2
 [llama_gguf_optmize v0.6.0] 10:01:53 - DEBUG - Processing chunk 0, part 3
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - 
 ===== KL-divergence statistics for Chunk 34 =====
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Average : 0.021730
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - StdDev  : 0.040784
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Median  : 0.013903
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Maximum : 1.035068
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_99  : 0.152550
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_95  : 0.066096
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_90  : 0.045996
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_10  : 0.000371
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_05  : 0.000061
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_01  : 0.000001
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Samples seen: 139136
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5317)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.217718, p-value=0.000184
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.24295342355634775, EMA_p_value_std_dev: 0.02852956555305134
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1023, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1023, beta=63), stopping probability=0.126392
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7409)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.197920, p-value=0.001285
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.24409591418484713, EMA_p_value_std_dev: 0.028369477641266985
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1024, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1024, beta=63), stopping probability=0.127883
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9407)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.229463, p-value=0.000052
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.244229275590456, EMA_p_value_std_dev: 0.028215544108497925
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1025, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1025, beta=63), stopping probability=0.129385
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9690)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.240560, p-value=0.000015
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.24324385099553297, EMA_p_value_std_dev: 0.028063129436666716
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1026, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1026, beta=63), stopping probability=0.130899
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1387)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.198403, p-value=0.001229
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.24203270127370588, EMA_p_value_std_dev: 0.027789643844840287
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1027, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1027, beta=63), stopping probability=0.132423
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8587)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.211017, p-value=0.000364
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.24073121656866228, EMA_p_value_std_dev: 0.027518676267322687
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1028, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1028, beta=63), stopping probability=0.133959
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1580)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.266168, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23960924190686192, EMA_p_value_std_dev: 0.027249393712194994
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1029, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1029, beta=63), stopping probability=0.135506
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1038)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.270395, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23845806018206225, EMA_p_value_std_dev: 0.026982870963473096
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1030, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1030, beta=63), stopping probability=0.137064
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9303)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.264160, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23674860378672488, EMA_p_value_std_dev: 0.02671902728515536
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1031, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1031, beta=63), stopping probability=0.138634
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9536)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.250305, p-value=0.000005
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23501454160028998, EMA_p_value_std_dev: 0.026454120214184845
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1032, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1032, beta=63), stopping probability=0.140214
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9703)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.201221, p-value=0.000943
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23321660244797246, EMA_p_value_std_dev: 0.026194441847944182
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1033, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1033, beta=63), stopping probability=0.141806
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7468)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.260977, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23221878914030983, EMA_p_value_std_dev: 0.02593735310586036
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1034, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1034, beta=63), stopping probability=0.143409
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6759)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.141370, p-value=0.094403
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23238355784222522, EMA_p_value_std_dev: 0.0260987171976906
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1035, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1035, beta=63), stopping probability=0.145023
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4519)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.270133, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2342698072267933, EMA_p_value_std_dev: 0.02625847217952801
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1036, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1036, beta=63), stopping probability=0.146648
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4915)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.201484, p-value=0.000920
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2362094235824962, EMA_p_value_std_dev: 0.02641561851452433
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1037, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1037, beta=63), stopping probability=0.148285
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4343)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.248530, p-value=0.000006
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23794365942868442, EMA_p_value_std_dev: 0.026572236033795864
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1038, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1038, beta=63), stopping probability=0.149932
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5905)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.209651, p-value=0.000417
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2387231254852501, EMA_p_value_std_dev: 0.026726826100216632
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1039, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1039, beta=63), stopping probability=0.151591
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1072)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.259334, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23861385496025986, EMA_p_value_std_dev: 0.026464274736517402
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1040, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1040, beta=63), stopping probability=0.153261
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6880)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.247281, p-value=0.000007
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2380460421764414, EMA_p_value_std_dev: 0.02620433200685003
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1041, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1041, beta=63), stopping probability=0.154942
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4223)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.198061, p-value=0.001268
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23735491173104142, EMA_p_value_std_dev: 0.02594841866915034
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1042, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1042, beta=63), stopping probability=0.156634
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4305)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.172736, p-value=0.011027
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2365637114488755, EMA_p_value_std_dev: 0.025737165826422607
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1043, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1043, beta=63), stopping probability=0.158336
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6979)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.218038, p-value=0.000178
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23581184355079468, EMA_p_value_std_dev: 0.025528297310724462
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1044, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1044, beta=63), stopping probability=0.160050
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6218)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.246796, p-value=0.000007
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.235183790721094, EMA_p_value_std_dev: 0.0253215050423255
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1045, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1045, beta=63), stopping probability=0.161775
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5037)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.272758, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2342673308457656, EMA_p_value_std_dev: 0.025116783837538587
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1046, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1046, beta=63), stopping probability=0.163511
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4553)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.175468, p-value=0.008892
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23398060522554723, EMA_p_value_std_dev: 0.024920847084818447
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1047, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1047, beta=63), stopping probability=0.165258
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0303)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.155984, p-value=0.037429
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23313061081800382, EMA_p_value_std_dev: 0.024833680379604928
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1048, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1048, beta=63), stopping probability=0.167016
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4972)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.240245, p-value=0.000015
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23335889381464234, EMA_p_value_std_dev: 0.024747612729476558
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1049, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1049, beta=63), stopping probability=0.168785
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5462)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.214386, p-value=0.000259
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23358793139871248, EMA_p_value_std_dev: 0.02466204846058643
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1050, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1050, beta=63), stopping probability=0.170564
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3947)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.225743, p-value=0.000078
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23281429887073993, EMA_p_value_std_dev: 0.024577225813936676
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1051, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1051, beta=63), stopping probability=0.172355
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3117)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.312723, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23307515631905748, EMA_p_value_std_dev: 0.024498647372868686
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1052, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1052, beta=63), stopping probability=0.174156
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4292)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.293042, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23193390027229516, EMA_p_value_std_dev: 0.024255371391164852
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1053, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1053, beta=63), stopping probability=0.175968
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3837)
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.307479, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2305947835246991, EMA_p_value_std_dev: 0.024014543188691638
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1054, beta=63
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1054, beta=63), stopping probability=0.177791
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Added chunk 33 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Added chunk 33 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Processing chunk 35
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Generating logits for model, chunk 35
 [llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:01:58 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:01:58 - INFO - Processing chunks from 35 to 35
 [llama_gguf_optmize v0.6.0] 10:01:58 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:01:58 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:01:58 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:02:03 - DEBUG - Inference time: 5539.83 ms
 [llama_gguf_optmize v0.6.0] 10:02:03 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:02:03 - DEBUG - Reusing freed chunk 1 for chunk 35.
 [llama_gguf_optmize v0.6.0] 10:02:03 - DEBUG - Written chunk 35 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [35] 5996.12 ms 
 [llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Generating logits for model, chunk 35
 [llama_gguf_optmize v0.6.0] 10:02:04 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Processing chunks from 35 to 35
 [llama_gguf_optmize v0.6.0] 10:02:04 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:02:04 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:02:04 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:02:07 - DEBUG - Inference time: 2381.45 ms
 [llama_gguf_optmize v0.6.0] 10:02:07 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:02:07 - DEBUG - Reusing freed chunk 1 for chunk 35.
 [llama_gguf_optmize v0.6.0] 10:02:07 - DEBUG - Written chunk 35 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [35] 2829.10 ms 
 [llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Comparing logits for chunk 35
 [llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Processing chunks 35 to 35...
 [llama_gguf_optmize v0.6.0] 10:02:08 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 10:02:13 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 10:02:18 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 10:02:23 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - 
 ===== KL-divergence statistics for Chunk 35 =====
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Average : 0.017046
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - StdDev  : 0.029465
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Median  : 0.010546
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Maximum : 0.998195
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_99  : 0.122316
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_95  : 0.054708
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_90  : 0.037489
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_10  : 0.000099
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_05  : 0.000023
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_01  : 0.000002
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Samples seen: 143232
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4719)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.231841, p-value=0.000040
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.229991650177346, EMA_p_value_std_dev: 0.023775348569332302
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1055, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1055, beta=63), stopping probability=0.179624
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7159)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.245714, p-value=0.000008
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22852434215210302, EMA_p_value_std_dev: 0.02353836261820639
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1056, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1056, beta=63), stopping probability=0.181468
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9527)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.295468, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22759412918717778, EMA_p_value_std_dev: 0.023303740588205446
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1057, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1057, beta=63), stopping probability=0.183323
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7218)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.238751, p-value=0.000018
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2271809491170695, EMA_p_value_std_dev: 0.023071452738103498
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1058, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1058, beta=63), stopping probability=0.185189
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9788)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.246510, p-value=0.000007
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22614972804114233, EMA_p_value_std_dev: 0.022841470272614836
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1059, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1059, beta=63), stopping probability=0.187065
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8300)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.275827, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22535000398533986, EMA_p_value_std_dev: 0.022613701085523224
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1060, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1060, beta=63), stopping probability=0.188951
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7924)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.274405, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22391417865488344, EMA_p_value_std_dev: 0.022388208585972503
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1061, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1061, beta=63), stopping probability=0.190848
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9235)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.247650, p-value=0.000006
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22232164638148866, EMA_p_value_std_dev: 0.022164959913771216
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1062, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1062, beta=63), stopping probability=0.192756
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0368)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.296018, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22116357644109644, EMA_p_value_std_dev: 0.021943901319678945
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1063, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1063, beta=63), stopping probability=0.194674
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7341)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.146548, p-value=0.069074
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2206881167507447, EMA_p_value_std_dev: 0.02203313797774197
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1064, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1064, beta=63), stopping probability=0.196602
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8225)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.201430, p-value=0.000925
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22136926101865342, EMA_p_value_std_dev: 0.022120478993687822
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1065, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1065, beta=63), stopping probability=0.198541
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7450)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.229467, p-value=0.000052
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22209545599143662, EMA_p_value_std_dev: 0.022206889989912573
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1066, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1066, beta=63), stopping probability=0.200490
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8283)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.204843, p-value=0.000667
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2225646492046494, EMA_p_value_std_dev: 0.02229170235371785
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1067, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1067, beta=63), stopping probability=0.202449
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8649)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.198434, p-value=0.001225
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22177208599472592, EMA_p_value_std_dev: 0.022374307387846935
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1068, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1068, beta=63), stopping probability=0.204419
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6550)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.191980, p-value=0.002205
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22029544299797896, EMA_p_value_std_dev: 0.022159036984806965
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1069, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1069, beta=63), stopping probability=0.206398
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8260)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.181989, p-value=0.005225
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21858696659928734, EMA_p_value_std_dev: 0.021958282872788372
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1070, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1070, beta=63), stopping probability=0.208388
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7525)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.204898, p-value=0.000664
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21691862663638586, EMA_p_value_std_dev: 0.02175831038004752
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1071, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1071, beta=63), stopping probability=0.210387
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8666)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.277202, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21590028593013438, EMA_p_value_std_dev: 0.021561675857442127
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1072, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1072, beta=63), stopping probability=0.212397
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7438)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.264873, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2149069643571508, EMA_p_value_std_dev: 0.021368623037411588
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1073, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1073, beta=63), stopping probability=0.214417
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6912)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.245990, p-value=0.000008
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2139956274612318, EMA_p_value_std_dev: 0.021178210864829204
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1074, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1074, beta=63), stopping probability=0.216446
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0023)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.274725, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21305945632570636, EMA_p_value_std_dev: 0.020969910082550464
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1075, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1075, beta=63), stopping probability=0.218485
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6252)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.194901, p-value=0.001695
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.212055115085474, EMA_p_value_std_dev: 0.020768287763984997
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1076, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1076, beta=63), stopping probability=0.220534
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7810)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.214637, p-value=0.000252
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2112063401100939, EMA_p_value_std_dev: 0.02056847481732563
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1077, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1077, beta=63), stopping probability=0.222593
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8730)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.299467, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21124217054441738, EMA_p_value_std_dev: 0.020370655973114167
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1078, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1078, beta=63), stopping probability=0.224662
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6109)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.222629, p-value=0.000109
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21180089087291926, EMA_p_value_std_dev: 0.020174691514941973
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1079, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1079, beta=63), stopping probability=0.226740
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7242)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.259169, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2118807181296927, EMA_p_value_std_dev: 0.01998067991533809
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1080, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1080, beta=63), stopping probability=0.228827
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6874)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.279829, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21193295010325178, EMA_p_value_std_dev: 0.01978247876288141
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1081, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1081, beta=63), stopping probability=0.230924
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8141)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.219605, p-value=0.000151
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2116573693628416, EMA_p_value_std_dev: 0.019585871375283514
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1082, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1082, beta=63), stopping probability=0.233031
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7047)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.220232, p-value=0.000141
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21073020723201022, EMA_p_value_std_dev: 0.019391244718988895
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1083, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1083, beta=63), stopping probability=0.235146
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8581)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.237512, p-value=0.000021
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.20949157342536542, EMA_p_value_std_dev: 0.01919857846428616
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1084, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1084, beta=63), stopping probability=0.237271
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7365)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.215366, p-value=0.000234
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.20831593664406034, EMA_p_value_std_dev: 0.01900804498945383
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1085, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1085, beta=63), stopping probability=0.239406
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8974)
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.290972, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.20732097893749413, EMA_p_value_std_dev: 0.018819412306477757
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1086, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1086, beta=63), stopping probability=0.241549
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Added chunk 34 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Added chunk 34 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Processing chunk 36
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Generating logits for model, chunk 36
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Processing chunks from 36 to 36
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:02:33 - DEBUG - Inference time: 5489.10 ms
 [llama_gguf_optmize v0.6.0] 10:02:33 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:02:33 - DEBUG - Reusing freed chunk 0 for chunk 36.
 [llama_gguf_optmize v0.6.0] 10:02:33 - DEBUG - Written chunk 36 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:02:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [36] 5965.84 ms 
 [llama_gguf_optmize v0.6.0] 10:02:33 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:02:33 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:02:33 - INFO - Generating logits for model, chunk 36
 [llama_gguf_optmize v0.6.0] 10:02:34 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:02:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:02:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:02:34 - INFO - Processing chunks from 36 to 36
 [llama_gguf_optmize v0.6.0] 10:02:34 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:02:34 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:02:34 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:02:36 - DEBUG - Inference time: 2370.28 ms
 [llama_gguf_optmize v0.6.0] 10:02:36 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:02:36 - DEBUG - Reusing freed chunk 0 for chunk 36.
 [llama_gguf_optmize v0.6.0] 10:02:36 - DEBUG - Written chunk 36 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [36] 2833.22 ms 
 [llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Comparing logits for chunk 36
 [llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Processing chunks 36 to 36...
 [llama_gguf_optmize v0.6.0] 10:02:38 - DEBUG - Processing chunk 0, part 0
 [llama_gguf_optmize v0.6.0] 10:02:43 - DEBUG - Processing chunk 0, part 1
 [llama_gguf_optmize v0.6.0] 10:02:48 - DEBUG - Processing chunk 0, part 2
 [llama_gguf_optmize v0.6.0] 10:02:52 - DEBUG - Processing chunk 0, part 3
 [llama_gguf_optmize v0.6.0] 10:02:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - 
 ===== KL-divergence statistics for Chunk 36 =====
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Average : 0.023975
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - StdDev  : 0.043240
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Median  : 0.012804
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Maximum : 1.404639
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_99  : 0.179422
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_95  : 0.083223
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_90  : 0.055958
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_10  : 0.000306
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_05  : 0.000030
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_01  : 0.000002
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Samples seen: 147328
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6906)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.285642, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20635517802483416, EMA_p_value_std_dev: 0.018632731654406723
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1087, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1087, beta=63), stopping probability=0.243702
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7761)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.275808, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2052341518728502, EMA_p_value_std_dev: 0.01844789448901718
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1088, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1088, beta=63), stopping probability=0.245863
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8835)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.269023, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20386734430389788, EMA_p_value_std_dev: 0.018264920064393092
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1089, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1089, beta=63), stopping probability=0.248034
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9470)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.293725, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20219174573839677, EMA_p_value_std_dev: 0.018082729239815416
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1090, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1090, beta=63), stopping probability=0.250213
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7899)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.311546, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2006757149943174, EMA_p_value_std_dev: 0.017902355802777114
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1091, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1091, beta=63), stopping probability=0.252401
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7656)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.314581, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19911954331153953, EMA_p_value_std_dev: 0.01772378165348401
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1092, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1092, beta=63), stopping probability=0.254598
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0394)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.296893, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19769728682264512, EMA_p_value_std_dev: 0.01754698881686911
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1093, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1093, beta=63), stopping probability=0.256803
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8176)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.255264, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19634940567156806, EMA_p_value_std_dev: 0.017371968771648334
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1094, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1094, beta=63), stopping probability=0.259017
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8093)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.345960, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19558027136078926, EMA_p_value_std_dev: 0.01719869456673238
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1095, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1095, beta=63), stopping probability=0.261240
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7149)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.247624, p-value=0.000006
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19573025806265457, EMA_p_value_std_dev: 0.017027165527192797
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1096, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1096, beta=63), stopping probability=0.263471
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6971)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.310503, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.1964089278778327, EMA_p_value_std_dev: 0.01685734748899065
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1097, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1097, beta=63), stopping probability=0.265710
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6401)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.157052, p-value=0.034800
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.1977745314644047, EMA_p_value_std_dev: 0.01684442550547747
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1098, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1098, beta=63), stopping probability=0.267958
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4940)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.187747, p-value=0.003201
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19867425157779264, EMA_p_value_std_dev: 0.016828693370982598
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1099, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1099, beta=63), stopping probability=0.270214
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4804)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.153410, p-value=0.044484
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.1992169459875737, EMA_p_value_std_dev: 0.01687471552870729
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1100, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1100, beta=63), stopping probability=0.272478
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4834)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.221110, p-value=0.000129
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2004557246454641, EMA_p_value_std_dev: 0.016920044704221357
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1101, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1101, beta=63), stopping probability=0.274750
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7841)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.214462, p-value=0.000257
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2004588420372314, EMA_p_value_std_dev: 0.01696443038583044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1102, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1102, beta=63), stopping probability=0.277029
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7207)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.129072, p-value=0.184892
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20122084198663445, EMA_p_value_std_dev: 0.01758883197214421
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1103, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1103, beta=63), stopping probability=0.279317
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8159)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.169108, p-value=0.014576
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20245176059312933, EMA_p_value_std_dev: 0.01819303357312788
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1104, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1104, beta=63), stopping probability=0.281613
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6898)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.180597, p-value=0.005865
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20238049399201696, EMA_p_value_std_dev: 0.01881527143704294
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1105, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1105, beta=63), stopping probability=0.283916
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7723)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.159274, p-value=0.029840
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2025447121751636, EMA_p_value_std_dev: 0.01940405256512261
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1106, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1106, beta=63), stopping probability=0.286227
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8109)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.268553, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2030654259999787, EMA_p_value_std_dev: 0.019987345803635582
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1107, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1107, beta=63), stopping probability=0.288545
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9117)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.213492, p-value=0.000283
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20318729639642705, EMA_p_value_std_dev: 0.019912760995769226
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1108, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1108, beta=63), stopping probability=0.290871
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8840)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.201986, p-value=0.000877
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20329047595464889, EMA_p_value_std_dev: 0.01984166552132383
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1109, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1109, beta=63), stopping probability=0.293204
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6828)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.153182, p-value=0.045158
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20385516252564653, EMA_p_value_std_dev: 0.01985360471912983
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1110, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1110, beta=63), stopping probability=0.295544
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8052)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.161799, p-value=0.024965
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20316742333723936, EMA_p_value_std_dev: 0.019857977005861057
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1111, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1111, beta=63), stopping probability=0.297892
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7852)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.163061, p-value=0.022803
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20186340095271643, EMA_p_value_std_dev: 0.019847363549239102
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1112, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1112, beta=63), stopping probability=0.300246
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8331)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.194056, p-value=0.001830
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.201082257778073, EMA_p_value_std_dev: 0.019833142129520878
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1113, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1113, beta=63), stopping probability=0.302608
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9415)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.214789, p-value=0.000248
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19993315920251228, EMA_p_value_std_dev: 0.01982063163342824
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1114, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1114, beta=63), stopping probability=0.304977
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6252)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.155345, p-value=0.039083
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19940435204196336, EMA_p_value_std_dev: 0.019787765721201712
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1115, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1115, beta=63), stopping probability=0.307352
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6228)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.198625, p-value=0.001204
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.1996424203704434, EMA_p_value_std_dev: 0.019763365170388878
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1116, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1116, beta=63), stopping probability=0.309734
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8260)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.181206, p-value=0.005577
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19954265232522048, EMA_p_value_std_dev: 0.019731922714223197
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1117, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1117, beta=63), stopping probability=0.312122
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8778)
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.169374, p-value=0.014285
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.1993443498241292, EMA_p_value_std_dev: 0.01969551882901338
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1118, beta=63
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1118, beta=63), stopping probability=0.314518
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Added chunk 35 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Added chunk 35 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Processing chunk 37
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Generating logits for model, chunk 37
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Processing chunks from 37 to 37
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:03:03 - DEBUG - Inference time: 5504.06 ms
 [llama_gguf_optmize v0.6.0] 10:03:03 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:03:03 - DEBUG - Reusing freed chunk 1 for chunk 37.
 [llama_gguf_optmize v0.6.0] 10:03:03 - DEBUG - Written chunk 37 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:03:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [37] 5955.99 ms 
 [llama_gguf_optmize v0.6.0] 10:03:03 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:03:03 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:03:03 - INFO - Generating logits for model, chunk 37
 [llama_gguf_optmize v0.6.0] 10:03:03 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:03:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:03:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:03:04 - INFO - Processing chunks from 37 to 37
 [llama_gguf_optmize v0.6.0] 10:03:04 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:03:04 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:03:04 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:03:06 - DEBUG - Inference time: 2460.95 ms
 [llama_gguf_optmize v0.6.0] 10:03:06 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:03:06 - DEBUG - Reusing freed chunk 1 for chunk 37.
 [llama_gguf_optmize v0.6.0] 10:03:06 - DEBUG - Written chunk 37 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [37] 2911.48 ms 
 [llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Comparing logits for chunk 37
 [llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Processing chunks 37 to 37...
 [llama_gguf_optmize v0.6.0] 10:03:08 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 10:03:12 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 10:03:17 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 10:03:22 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - 
 ===== KL-divergence statistics for Chunk 37 =====
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Average : 0.020722
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - StdDev  : 0.048673
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Median  : 0.006612
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Maximum : 1.303131
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_99  : 0.205267
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_95  : 0.082247
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_90  : 0.050595
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_10  : 0.000154
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_05  : 0.000039
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_01  : 0.000006
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Samples seen: 151424
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5804)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.149089, p-value=0.058888
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.19856028508907136, EMA_p_value_std_dev: 0.019743286832800716
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1119, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1119, beta=63), stopping probability=0.316919
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7628)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.220193, p-value=0.000142
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.19803070197380473, EMA_p_value_std_dev: 0.01979176897792631
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1120, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1120, beta=63), stopping probability=0.319327
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6144)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.155443, p-value=0.038827
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.19827898388385787, EMA_p_value_std_dev: 0.019840657427701497
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1121, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1121, beta=63), stopping probability=0.321741
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7981)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.123277, p-value=0.245025
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.19891885348651805, EMA_p_value_std_dev: 0.020636539485962415
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1122, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1122, beta=63), stopping probability=0.324162
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9022)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.192591, p-value=0.002088
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20077767671064403, EMA_p_value_std_dev: 0.021443241485750653
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1123, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1123, beta=63), stopping probability=0.326588
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8008)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.154529, p-value=0.041287
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20159560501380647, EMA_p_value_std_dev: 0.022249283969564878
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1124, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1124, beta=63), stopping probability=0.329020
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7342)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.205089, p-value=0.000652
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20252938785779467, EMA_p_value_std_dev: 0.023046475802404747
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1125, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1125, beta=63), stopping probability=0.331458
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9251)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.257543, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20363668078580763, EMA_p_value_std_dev: 0.023875023070521632
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1126, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1126, beta=63), stopping probability=0.333902
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7520)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.194976, p-value=0.001684
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.2041251504379074, EMA_p_value_std_dev: 0.02381630093272934
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1127, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1127, beta=63), stopping probability=0.336352
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0671)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.246558, p-value=0.000007
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.2047460276130154, EMA_p_value_std_dev: 0.023760425510504137
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1128, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1128, beta=63), stopping probability=0.338807
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8078)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.123553, p-value=0.241895
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20557426598833872, EMA_p_value_std_dev: 0.02459990544462103
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1129, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1129, beta=63), stopping probability=0.341267
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8329)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.160592, p-value=0.027200
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20655906899759363, EMA_p_value_std_dev: 0.025407707297274706
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1130, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1130, beta=63), stopping probability=0.343733
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7066)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.121501, p-value=0.265874
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20753465066283125, EMA_p_value_std_dev: 0.02649574778073413
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1131, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1131, beta=63), stopping probability=0.346204
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1070)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.307975, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21066615326329394, EMA_p_value_std_dev: 0.027576250406409226
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1132, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1132, beta=63), stopping probability=0.348680
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2450)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.461805, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.2137686561077186, EMA_p_value_std_dev: 0.028645989423102827
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1133, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1133, beta=63), stopping probability=0.351161
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4938)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.490583, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.2164039391258559, EMA_p_value_std_dev: 0.02952192145228879
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1134, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1134, beta=63), stopping probability=0.353648
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1350)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.499754, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21828067100989199, EMA_p_value_std_dev: 0.030413494179814347
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1135, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1135, beta=63), stopping probability=0.356139
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9922)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.464526, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.2172450124691678, EMA_p_value_std_dev: 0.030110119582706298
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1136, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1136, beta=63), stopping probability=0.358634
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0542)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.471835, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21548852322783463, EMA_p_value_std_dev: 0.029809771139868803
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1137, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1137, beta=63), stopping probability=0.361135
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8456)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.410652, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21385375813495958, EMA_p_value_std_dev: 0.029512418672748612
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1138, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1138, beta=63), stopping probability=0.363639
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.01 (KL div: 2.1096)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.549682, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21291056993221288, EMA_p_value_std_dev: 0.029218032296487943
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1139, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1139, beta=63), stopping probability=0.366149
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.01 to 0.009999999999999998 (KL div: 1.0323)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.491973, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21210592889037871, EMA_p_value_std_dev: 0.028926582424330476
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1140, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1140, beta=63), stopping probability=0.368662
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0571)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.440484, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21141991555572165, EMA_p_value_std_dev: 0.02863803976464778
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1141, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1141, beta=63), stopping probability=0.371180
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0720)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.402919, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21062300924975513, EMA_p_value_std_dev: 0.028352375317995417
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1142, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1142, beta=63), stopping probability=0.373702
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0696)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.442134, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20951486778941683, EMA_p_value_std_dev: 0.028069560374198412
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1143, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1143, beta=63), stopping probability=0.376227
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2705)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.504888, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20846373478587435, EMA_p_value_std_dev: 0.027789566509465784
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1144, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1144, beta=63), stopping probability=0.378757
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8706)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.443892, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20748355208898017, EMA_p_value_std_dev: 0.02751236558353386
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1145, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1145, beta=63), stopping probability=0.381290
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8241)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.436702, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20642722825278853, EMA_p_value_std_dev: 0.02723792973683811
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1146, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1146, beta=63), stopping probability=0.383827
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7402)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.459261, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20521047941477913, EMA_p_value_std_dev: 0.026966231387713147
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1147, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1147, beta=63), stopping probability=0.386368
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8887)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.537932, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20404238765690583, EMA_p_value_std_dev: 0.02669724322962071
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1148, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1148, beta=63), stopping probability=0.388912
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8723)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.408331, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20316155496874544, EMA_p_value_std_dev: 0.026430938228405244
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1149, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1149, beta=63), stopping probability=0.391459
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0964)
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.449726, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20251973199391252, EMA_p_value_std_dev: 0.0261672896195769
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1150, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1150, beta=63), stopping probability=0.394010
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Added chunk 36 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Added chunk 36 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Processing chunk 38
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Generating logits for model, chunk 38
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Processing chunks from 38 to 38
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:03:33 - DEBUG - Inference time: 5434.89 ms
 [llama_gguf_optmize v0.6.0] 10:03:33 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:03:33 - DEBUG - Reusing freed chunk 0 for chunk 38.
 [llama_gguf_optmize v0.6.0] 10:03:33 - DEBUG - Written chunk 38 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:03:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [38] 5901.49 ms 
 [llama_gguf_optmize v0.6.0] 10:03:33 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:03:33 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:03:33 - INFO - Generating logits for model, chunk 38
 [llama_gguf_optmize v0.6.0] 10:03:33 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:03:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:03:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:03:34 - INFO - Processing chunks from 38 to 38
 [llama_gguf_optmize v0.6.0] 10:03:34 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:03:34 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:03:34 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:03:36 - DEBUG - Inference time: 2431.00 ms
 [llama_gguf_optmize v0.6.0] 10:03:36 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:03:36 - DEBUG - Reusing freed chunk 0 for chunk 38.
 [llama_gguf_optmize v0.6.0] 10:03:36 - DEBUG - Written chunk 38 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [38] 2875.10 ms 
 [llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Comparing logits for chunk 38
 [llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Processing chunks 38 to 38...
 [llama_gguf_optmize v0.6.0] 10:03:38 - DEBUG - Processing chunk 0, part 0
 [llama_gguf_optmize v0.6.0] 10:03:42 - DEBUG - Processing chunk 0, part 1
 [llama_gguf_optmize v0.6.0] 10:03:47 - DEBUG - Processing chunk 0, part 2
 [llama_gguf_optmize v0.6.0] 10:03:52 - DEBUG - Processing chunk 0, part 3
 [llama_gguf_optmize v0.6.0] 10:03:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - 
 ===== KL-divergence statistics for Chunk 38 =====
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Average : 0.016250
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - StdDev  : 0.040581
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Median  : 0.005430
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Maximum : 0.867015
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_99  : 0.154382
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_95  : 0.063324
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_90  : 0.038309
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_10  : 0.000051
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_05  : 0.000015
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_01  : 0.000002
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Samples seen: 155520
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9198)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.416543, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.201911829270496, EMA_p_value_std_dev: 0.02590627090562162
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1151, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1151, beta=63), stopping probability=0.396563
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9922)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.402917, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20089384285677128, EMA_p_value_std_dev: 0.025647855853338047
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1152, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1152, beta=63), stopping probability=0.399120
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9956)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.398977, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19942686925020994, EMA_p_value_std_dev: 0.025392018491201
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1153, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1153, beta=63), stopping probability=0.401680
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1362)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.248244, p-value=0.000006
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19831833203644575, EMA_p_value_std_dev: 0.025138759613406537
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1154, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1154, beta=63), stopping probability=0.404242
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9809)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.348590, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19839187461324506, EMA_p_value_std_dev: 0.024888026992915757
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1155, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1155, beta=63), stopping probability=0.406807
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0207)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.353305, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.1983928621131688, EMA_p_value_std_dev: 0.02463979543031336
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1156, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1156, beta=63), stopping probability=0.409374
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9592)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.338464, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.1985051262527758, EMA_p_value_std_dev: 0.024394039977534605
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1157, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1157, beta=63), stopping probability=0.411944
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7115)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.269913, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19796012125115162, EMA_p_value_std_dev: 0.024150735562536132
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1158, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1158, beta=63), stopping probability=0.414517
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7876)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.294456, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19675187051035464, EMA_p_value_std_dev: 0.023909833660928095
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1159, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1159, beta=63), stopping probability=0.417091
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6346)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.146509, p-value=0.069243
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19656507584035846, EMA_p_value_std_dev: 0.02398022126792473
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1160, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1160, beta=63), stopping probability=0.419668
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0388)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.368993, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.1992454162873803, EMA_p_value_std_dev: 0.02404990675854253
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1161, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1161, beta=63), stopping probability=0.422247
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.6961)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.422904, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20168775361460103, EMA_p_value_std_dev: 0.024118897136404725
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1162, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1162, beta=63), stopping probability=0.424827
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.7939)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.370570, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20414655051564776, EMA_p_value_std_dev: 0.024187199759909933
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1163, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1163, beta=63), stopping probability=0.427409
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4051)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.535019, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.2063611756363553, EMA_p_value_std_dev: 0.024254821078623246
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1164, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1164, beta=63), stopping probability=0.429993
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9686)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.428347, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20630197293478086, EMA_p_value_std_dev: 0.024012879238364235
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1165, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1165, beta=63), stopping probability=0.432579
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2613)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.478363, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20615098862037462, EMA_p_value_std_dev: 0.023773350767961724
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1166, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1166, beta=63), stopping probability=0.435166
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0706)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.400558, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20614713804008342, EMA_p_value_std_dev: 0.02353621159405148
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1167, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1167, beta=63), stopping probability=0.437754
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1636)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.400914, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20538595035910184, EMA_p_value_std_dev: 0.023301437883400816
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1168, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1168, beta=63), stopping probability=0.440344
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0874)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.416375, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20412213261407697, EMA_p_value_std_dev: 0.02306900604051389
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1169, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1169, beta=63), stopping probability=0.442934
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0121)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.467666, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20289496032955318, EMA_p_value_std_dev: 0.022838892705259763
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1170, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1170, beta=63), stopping probability=0.445526
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0947)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.426623, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20149833407886586, EMA_p_value_std_dev: 0.022611074750524797
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1171, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1171, beta=63), stopping probability=0.448118
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0942)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.427826, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20011804243926112, EMA_p_value_std_dev: 0.02238552927988831
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1172, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1172, beta=63), stopping probability=0.450712
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9952)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.267659, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.1993807359524497, EMA_p_value_std_dev: 0.022162235913274066
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1173, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1173, beta=63), stopping probability=0.453305
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9887)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.419614, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19983164306622628, EMA_p_value_std_dev: 0.0219411698979918
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1174, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1174, beta=63), stopping probability=0.455900
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0660)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.355905, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20051789000698755, EMA_p_value_std_dev: 0.021722309016211337
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1175, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1175, beta=63), stopping probability=0.458495
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9555)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.302470, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20138051982434463, EMA_p_value_std_dev: 0.021505631267554247
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1176, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1176, beta=63), stopping probability=0.461090
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1002)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.327944, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.2013130789248529, EMA_p_value_std_dev: 0.02129111487936225
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1177, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1177, beta=63), stopping probability=0.463686
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0780)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.396955, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20080194358725376, EMA_p_value_std_dev: 0.021078736025290225
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1178, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1178, beta=63), stopping probability=0.466281
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1719)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.293309, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20057834967624028, EMA_p_value_std_dev: 0.020868475696868255
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1179, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1179, beta=63), stopping probability=0.468877
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8539)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.259329, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20022860778140159, EMA_p_value_std_dev: 0.02066031934372062
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1180, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1180, beta=63), stopping probability=0.471472
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2477)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.283435, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.1998932174986564, EMA_p_value_std_dev: 0.02045423929157665
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1181, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1181, beta=63), stopping probability=0.474068
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8436)
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.310823, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19913509731864695, EMA_p_value_std_dev: 0.02025021488678012
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1182, beta=63
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1182, beta=63), stopping probability=0.476663
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Added chunk 37 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Added chunk 37 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Processing chunk 39
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Generating logits for model, chunk 39
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Processing chunks from 39 to 39
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:04:03 - DEBUG - Inference time: 5579.97 ms
 [llama_gguf_optmize v0.6.0] 10:04:03 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:04:03 - DEBUG - Reusing freed chunk 1 for chunk 39.
 [llama_gguf_optmize v0.6.0] 10:04:03 - DEBUG - Written chunk 39 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:04:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [39] 6032.68 ms 
 [llama_gguf_optmize v0.6.0] 10:04:03 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:04:03 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:04:03 - INFO - Generating logits for model, chunk 39
 [llama_gguf_optmize v0.6.0] 10:04:03 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:04:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:04:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:04:04 - INFO - Processing chunks from 39 to 39
 [llama_gguf_optmize v0.6.0] 10:04:04 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:04:04 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:04:04 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:04:06 - DEBUG - Inference time: 2551.68 ms
 [llama_gguf_optmize v0.6.0] 10:04:06 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:04:06 - DEBUG - Reusing freed chunk 1 for chunk 39.
 [llama_gguf_optmize v0.6.0] 10:04:06 - DEBUG - Written chunk 39 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [39] 2990.04 ms 
 [llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Comparing logits for chunk 39
 [llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Processing chunks 39 to 39...
 [llama_gguf_optmize v0.6.0] 10:04:08 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 10:04:13 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 10:04:17 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 10:04:22 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - 
 ===== KL-divergence statistics for Chunk 39 =====
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Average : 0.022853
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - StdDev  : 0.044143
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Median  : 0.010927
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Maximum : 0.821665
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_99  : 0.205363
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_95  : 0.080140
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_90  : 0.052837
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_10  : 0.000035
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_05  : 0.000011
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_01  : 0.000002
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Samples seen: 159616
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7015)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.244775, p-value=0.000009
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19832408036247104, EMA_p_value_std_dev: 0.02004825793589753
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1183, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1183, beta=63), stopping probability=0.479257
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0561)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.338448, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19809465609728455, EMA_p_value_std_dev: 0.019848315525388883
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1184, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1184, beta=63), stopping probability=0.481851
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1271)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.333474, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19777018849712724, EMA_p_value_std_dev: 0.01965036868749063
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1185, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1185, beta=63), stopping probability=0.484444
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0941)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.218133, p-value=0.000176
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1983820843124998, EMA_p_value_std_dev: 0.019455131630581646
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1186, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1186, beta=63), stopping probability=0.487037
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9890)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.297070, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19954712316666187, EMA_p_value_std_dev: 0.019261842054517395
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1187, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1187, beta=63), stopping probability=0.489628
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9828)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.389064, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.2004792051625368, EMA_p_value_std_dev: 0.019070489610358295
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1188, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1188, beta=63), stopping probability=0.492219
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9941)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.276730, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20174281711196088, EMA_p_value_std_dev: 0.01888104573741464
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1189, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1189, beta=63), stopping probability=0.494809
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0308)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.309965, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20257014841968282, EMA_p_value_std_dev: 0.018693491565782833
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1190, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1190, beta=63), stopping probability=0.497397
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0220)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.322690, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20251628304032693, EMA_p_value_std_dev: 0.0185070246561153
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1191, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1191, beta=63), stopping probability=0.499984
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1446)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.347640, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20164543541240018, EMA_p_value_std_dev: 0.0183224177624293
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1192, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1192, beta=63), stopping probability=0.502570
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9548)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.280277, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.2006651720990951, EMA_p_value_std_dev: 0.01813965234505754
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1193, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1193, beta=63), stopping probability=0.505154
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7744)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.237285, p-value=0.000022
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19981930857078498, EMA_p_value_std_dev: 0.017958805610156593
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1194, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1194, beta=63), stopping probability=0.507737
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0745)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.362847, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19988010533724337, EMA_p_value_std_dev: 0.017779762822793397
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1195, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1195, beta=63), stopping probability=0.510317
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9897)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.288847, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20022416843045196, EMA_p_value_std_dev: 0.01760250595610994
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1196, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1196, beta=63), stopping probability=0.512896
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0258)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.327183, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20038790582020463, EMA_p_value_std_dev: 0.017427017226586492
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1197, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1197, beta=63), stopping probability=0.515473
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9997)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.404930, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20086537465056234, EMA_p_value_std_dev: 0.017253279100696316
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1198, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1198, beta=63), stopping probability=0.518048
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9203)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.345161, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20063846377516412, EMA_p_value_std_dev: 0.01708117776642972
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1199, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1199, beta=63), stopping probability=0.520621
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2231)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.349792, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.199998793629323, EMA_p_value_std_dev: 0.016910793142970736
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1200, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1200, beta=63), stopping probability=0.523192
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0991)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.345998, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1989156795225175, EMA_p_value_std_dev: 0.01674210798170941
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1201, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1201, beta=63), stopping probability=0.525760
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9091)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.314237, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19752245990760275, EMA_p_value_std_dev: 0.016575105457360486
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1202, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1202, beta=63), stopping probability=0.528326
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7795)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.344233, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1960782783610691, EMA_p_value_std_dev: 0.01640976878318696
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1203, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1203, beta=63), stopping probability=0.530889
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2928)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.415585, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19509470771830098, EMA_p_value_std_dev: 0.01624608134234257
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1204, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1204, beta=63), stopping probability=0.533449
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9409)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.337595, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1945805369882103, EMA_p_value_std_dev: 0.016084026683707484
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1205, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1205, beta=63), stopping probability=0.536007
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9194)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.282607, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1943284822430689, EMA_p_value_std_dev: 0.015923588818431714
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1206, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1206, beta=63), stopping probability=0.538562
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8761)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.254666, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19408318606235514, EMA_p_value_std_dev: 0.015764763009220514
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1207, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1207, beta=63), stopping probability=0.541114
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8892)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.285813, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19357125932079697, EMA_p_value_std_dev: 0.01560752143935325
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1208, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1208, beta=63), stopping probability=0.543663
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8426)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.270977, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19273555385313904, EMA_p_value_std_dev: 0.015451848053829074
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1209, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1209, beta=63), stopping probability=0.546208
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8968)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.277298, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19155860722729548, EMA_p_value_std_dev: 0.015297727336748071
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1210, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1210, beta=63), stopping probability=0.548751
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7592)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.268375, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19014372768292254, EMA_p_value_std_dev: 0.015145143595364223
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1211, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1211, beta=63), stopping probability=0.551290
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9192)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.257452, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.18861345179322678, EMA_p_value_std_dev: 0.014994078383721174
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1212, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1212, beta=63), stopping probability=0.553826
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7065)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.249416, p-value=0.000005
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1870489812801139, EMA_p_value_std_dev: 0.01484453346945921
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1213, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1213, beta=63), stopping probability=0.556358
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1269)
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.240344, p-value=0.000015
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.18552924805624807, EMA_p_value_std_dev: 0.014696521599503465
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1214, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1214, beta=63), stopping probability=0.558886
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Added chunk 38 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Added chunk 38 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Processing chunk 40
 [llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Generating logits for model, chunk 40
 [llama_gguf_optmize v0.6.0] 10:04:27 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:04:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:04:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:04:27 - INFO - Processing chunks from 40 to 40
 [llama_gguf_optmize v0.6.0] 10:04:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:04:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:04:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:04:32 - DEBUG - Inference time: 5591.33 ms
 [llama_gguf_optmize v0.6.0] 10:04:32 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:04:32 - DEBUG - Reusing freed chunk 0 for chunk 40.
 [llama_gguf_optmize v0.6.0] 10:04:32 - DEBUG - Written chunk 40 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:04:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [40] 6069.64 ms 
 [llama_gguf_optmize v0.6.0] 10:04:33 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:04:33 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:04:33 - INFO - Generating logits for model, chunk 40
 [llama_gguf_optmize v0.6.0] 10:04:33 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:04:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:04:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:04:34 - INFO - Processing chunks from 40 to 40
 [llama_gguf_optmize v0.6.0] 10:04:34 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:04:34 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:04:34 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:04:36 - DEBUG - Inference time: 2557.50 ms
 [llama_gguf_optmize v0.6.0] 10:04:36 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:04:36 - DEBUG - Reusing freed chunk 0 for chunk 40.
 [llama_gguf_optmize v0.6.0] 10:04:36 - DEBUG - Written chunk 40 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [40] 3036.02 ms 
 [llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Comparing logits for chunk 40
 [llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Processing chunks 40 to 40...
 [llama_gguf_optmize v0.6.0] 10:04:38 - DEBUG - Processing chunk 0, part 0
 [llama_gguf_optmize v0.6.0] 10:04:42 - DEBUG - Processing chunk 0, part 1
 [llama_gguf_optmize v0.6.0] 10:04:47 - DEBUG - Processing chunk 0, part 2
 [llama_gguf_optmize v0.6.0] 10:04:52 - DEBUG - Processing chunk 0, part 3
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - 
 ===== KL-divergence statistics for Chunk 40 =====
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Average : 0.022817
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - StdDev  : 0.039763
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Median  : 0.014336
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Maximum : 1.233832
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_99  : 0.147886
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_95  : 0.071295
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_90  : 0.049504
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_10  : 0.000671
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_05  : 0.000154
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_01  : 0.000018
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Samples seen: 163712
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9845)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.211900, p-value=0.000333
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18415315382708042, EMA_p_value_std_dev: 0.014551384307121492
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1215, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1215, beta=63), stopping probability=0.561411
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0958)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.234702, p-value=0.000029
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18304127753284116, EMA_p_value_std_dev: 0.014407665675477617
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1216, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1216, beta=63), stopping probability=0.563932
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1113)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.294325, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18242256932024695, EMA_p_value_std_dev: 0.014265383126958768
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1217, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1217, beta=63), stopping probability=0.566448
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7279)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.223235, p-value=0.000103
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18240002144599868, EMA_p_value_std_dev: 0.014124464163656975
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1218, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1218, beta=63), stopping probability=0.568961
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4120)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.260310, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1825563967621823, EMA_p_value_std_dev: 0.01398497226309037
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1219, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1219, beta=63), stopping probability=0.571470
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5241)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.267744, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18261289135793043, EMA_p_value_std_dev: 0.013845913010905608
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1220, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1220, beta=63), stopping probability=0.573974
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4960)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.193183, p-value=0.001980
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18269921896361846, EMA_p_value_std_dev: 0.013716527647014264
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1221, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1221, beta=63), stopping probability=0.576475
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7425)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.171316, p-value=0.012312
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18230421580229247, EMA_p_value_std_dev: 0.013632970653980697
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1222, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1222, beta=63), stopping probability=0.578970
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8252)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.167203, p-value=0.016823
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18139264338799796, EMA_p_value_std_dev: 0.013575026691803702
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1223, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1223, beta=63), stopping probability=0.581462
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5788)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.115448, p-value=0.345287
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18146442001154856, EMA_p_value_std_dev: 0.0149468488742328
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1224, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1224, beta=63), stopping probability=0.583948
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5734)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.159187, p-value=0.030023
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18173795523995676, EMA_p_value_std_dev: 0.016273303944147895
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1225, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1225, beta=63), stopping probability=0.586430
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7347)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.150472, p-value=0.053899
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18178992325463023, EMA_p_value_std_dev: 0.01753437050641644
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1226, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1226, beta=63), stopping probability=0.588908
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6175)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.192455, p-value=0.002114
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18271919355032123, EMA_p_value_std_dev: 0.018797650919454775
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1227, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1227, beta=63), stopping probability=0.591380
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9252)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.152035, p-value=0.048697
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18326666887506923, EMA_p_value_std_dev: 0.020014830759121072
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1228, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1228, beta=63), stopping probability=0.593848
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7852)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.172339, p-value=0.011373
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18313993513670304, EMA_p_value_std_dev: 0.02004057654094759
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1229, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1229, beta=63), stopping probability=0.596310
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5187)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.180070, p-value=0.006126
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18300217149636783, EMA_p_value_std_dev: 0.02008808478448712
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1230, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1230, beta=63), stopping probability=0.598767
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8167)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.271500, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18315027690448446, EMA_p_value_std_dev: 0.020087796913603587
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1231, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1231, beta=63), stopping probability=0.601220
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4374)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.194070, p-value=0.001828
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18357833000093635, EMA_p_value_std_dev: 0.020087925828999494
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1232, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1232, beta=63), stopping probability=0.603666
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3852)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.275612, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18507515105495953, EMA_p_value_std_dev: 0.019936291019206145
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1233, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1233, beta=63), stopping probability=0.606108
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6261)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.137678, p-value=0.116724
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1876618720863669, EMA_p_value_std_dev: 0.020249866039475494
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1234, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1234, beta=63), stopping probability=0.608544
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6083)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.148421, p-value=0.061436
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18915015337895197, EMA_p_value_std_dev: 0.020569135977644124
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1235, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1235, beta=63), stopping probability=0.610975
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6170)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.119422, p-value=0.291710
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.19028735394530996, EMA_p_value_std_dev: 0.02156507868262764
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1236, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1236, beta=63), stopping probability=0.613400
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5834)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.177607, p-value=0.007489
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.19166425673558565, EMA_p_value_std_dev: 0.02254045858793605
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1237, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1237, beta=63), stopping probability=0.615819
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7252)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.229998, p-value=0.000049
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.19229124569886136, EMA_p_value_std_dev: 0.0235060113992945
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1238, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1238, beta=63), stopping probability=0.618232
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6031)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.150675, p-value=0.053199
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1936176495159693, EMA_p_value_std_dev: 0.0244674525853469
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1239, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1239, beta=63), stopping probability=0.620640
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7286)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.225753, p-value=0.000078
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.19578815911422617, EMA_p_value_std_dev: 0.025476495081659733
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1240, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1240, beta=63), stopping probability=0.623042
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8159)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.188960, p-value=0.002880
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1970531228280722, EMA_p_value_std_dev: 0.025449999885628245
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1241, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1241, beta=63), stopping probability=0.625437
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6867)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.286192, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1990868101764685, EMA_p_value_std_dev: 0.025430420017874177
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1242, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1242, beta=63), stopping probability=0.627827
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0010)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.244432, p-value=0.000009
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.20039796973230772, EMA_p_value_std_dev: 0.025411082780855798
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1243, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1243, beta=63), stopping probability=0.630210
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7574)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.230555, p-value=0.000046
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.2002660254514454, EMA_p_value_std_dev: 0.0251703094173119
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1244, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1244, beta=63), stopping probability=0.632588
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6570)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.204280, p-value=0.000705
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.19995189776482528, EMA_p_value_std_dev: 0.024931597125656634
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1245, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1245, beta=63), stopping probability=0.634958
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0295)
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.298881, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1995768951118408, EMA_p_value_std_dev: 0.024685991750714505
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1246, beta=63
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1246, beta=63), stopping probability=0.637323
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Added chunk 39 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Added chunk 39 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Processing chunk 41
 [llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Generating logits for model, chunk 41
 [llama_gguf_optmize v0.6.0] 10:04:57 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:04:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:04:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:04:57 - INFO - Processing chunks from 41 to 41
 [llama_gguf_optmize v0.6.0] 10:04:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:04:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:04:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:05:02 - DEBUG - Inference time: 5622.76 ms
 [llama_gguf_optmize v0.6.0] 10:05:02 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:05:02 - DEBUG - Reusing freed chunk 1 for chunk 41.
 [llama_gguf_optmize v0.6.0] 10:05:02 - DEBUG - Written chunk 41 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [41] 6086.10 ms 
 [llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Generating logits for model, chunk 41
 [llama_gguf_optmize v0.6.0] 10:05:03 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:05:04 - INFO - Processing chunks from 41 to 41
 [llama_gguf_optmize v0.6.0] 10:05:04 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:05:04 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:05:04 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:05:06 - DEBUG - Inference time: 2533.56 ms
 [llama_gguf_optmize v0.6.0] 10:05:06 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:05:06 - DEBUG - Reusing freed chunk 1 for chunk 41.
 [llama_gguf_optmize v0.6.0] 10:05:06 - DEBUG - Written chunk 41 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [41] 2980.00 ms 
 [llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Comparing logits for chunk 41
 [llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Processing chunks 41 to 41...
 [llama_gguf_optmize v0.6.0] 10:05:08 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 10:05:12 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 10:05:17 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 10:05:22 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - 
 ===== KL-divergence statistics for Chunk 41 =====
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Average : 0.088268
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - StdDev  : 0.368675
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Median  : 0.010919
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Maximum : 5.727451
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_99  : 2.007464
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_95  : 0.372391
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_90  : 0.073360
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_10  : 0.000081
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_05  : 0.000017
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_01  : 0.000003
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Samples seen: 167808
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5695)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.300169, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.19880665375542622, EMA_p_value_std_dev: 0.024442836332856585
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1247, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1247, beta=63), stopping probability=0.639681
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4829)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.242589, p-value=0.000012
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.19854969229178585, EMA_p_value_std_dev: 0.02420210378142371
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1248, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1248, beta=63), stopping probability=0.642033
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6487)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.196865, p-value=0.001416
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.19857427732482955, EMA_p_value_std_dev: 0.023966983190355347
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1249, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1249, beta=63), stopping probability=0.644378
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6738)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.295708, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.19864710401796346, EMA_p_value_std_dev: 0.02373421835760726
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1250, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1250, beta=63), stopping probability=0.646716
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8521)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.175377, p-value=0.008956
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.19976606224317628, EMA_p_value_std_dev: 0.023536313223037553
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1251, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1251, beta=63), stopping probability=0.649048
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6392)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.105363, p-value=0.502521
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20165028748995512, EMA_p_value_std_dev: 0.025531991457439562
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1252, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1252, beta=63), stopping probability=0.651372
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6232)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.107715, p-value=0.463637
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20330904586088588, EMA_p_value_std_dev: 0.027901543234661798
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1253, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1253, beta=63), stopping probability=0.653690
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6053)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.179737, p-value=0.006296
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20515840902804128, EMA_p_value_std_dev: 0.030238565197619464
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1254, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1254, beta=63), stopping probability=0.656001
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7804)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.240999, p-value=0.000014
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20674194097997511, EMA_p_value_std_dev: 0.0325522491466953
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1255, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1255, beta=63), stopping probability=0.658305
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.7112)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.269520, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20725815352674146, EMA_p_value_std_dev: 0.03485906637964742
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1256, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1256, beta=63), stopping probability=0.660602
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0054)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.617073, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21072104696176958, EMA_p_value_std_dev: 0.03657275153787879
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1257, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1257, beta=63), stopping probability=0.662892
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1484)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.637021, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2119822503528795, EMA_p_value_std_dev: 0.036236010109627785
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1258, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1258, beta=63), stopping probability=0.665174
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0051)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.517013, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21266011400190615, EMA_p_value_std_dev: 0.03587461818571727
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1259, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1259, beta=63), stopping probability=0.667450
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7967)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.651907, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21386495143129233, EMA_p_value_std_dev: 0.03551677065895113
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1260, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1260, beta=63), stopping probability=0.669718
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9646)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.489648, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2137334590449094, EMA_p_value_std_dev: 0.035162490871628094
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1261, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1261, beta=63), stopping probability=0.671978
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8162)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.468335, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2136172104673364, EMA_p_value_std_dev: 0.0348117450251836
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1262, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1262, beta=63), stopping probability=0.674231
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9394)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.598200, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2137939939652445, EMA_p_value_std_dev: 0.034464497868557396
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1263, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1263, beta=63), stopping probability=0.676477
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8695)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.457366, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.213873622072587, EMA_p_value_std_dev: 0.03412071450231854
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1264, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1264, beta=63), stopping probability=0.678715
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6466)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.439076, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21317919959268702, EMA_p_value_std_dev: 0.03378036037515791
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1265, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1265, beta=63), stopping probability=0.680946
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8503)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.516228, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21298657643005214, EMA_p_value_std_dev: 0.03344340128041571
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1266, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1266, beta=63), stopping probability=0.683168
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.8186)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.482051, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2121102754965759, EMA_p_value_std_dev: 0.03310980335264356
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1267, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1267, beta=63), stopping probability=0.685384
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8648)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.442446, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2108181991576191, EMA_p_value_std_dev: 0.03277953306420094
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1268, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1268, beta=63), stopping probability=0.687591
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9698)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.544295, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21004935517728326, EMA_p_value_std_dev: 0.03245255722188554
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1269, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1269, beta=63), stopping probability=0.689790
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.7258)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.450246, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20927723508267043, EMA_p_value_std_dev: 0.03212884296359723
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1270, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1270, beta=63), stopping probability=0.691982
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7858)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.472731, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20849258799749779, EMA_p_value_std_dev: 0.03180835775503535
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1271, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1271, beta=63), stopping probability=0.694166
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7971)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.517062, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20775925596607903, EMA_p_value_std_dev: 0.03149106938642887
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1272, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1272, beta=63), stopping probability=0.696341
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8802)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.439296, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20687801401038455, EMA_p_value_std_dev: 0.031176945969299243
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1273, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1273, beta=63), stopping probability=0.698509
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3924)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.436007, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20554881848863776, EMA_p_value_std_dev: 0.03086595593325548
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1274, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1274, beta=63), stopping probability=0.700668
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9368)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.409633, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20427237441918955, EMA_p_value_std_dev: 0.030558068022821256
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1275, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1275, beta=63), stopping probability=0.702820
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6745)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.408335, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2026802487294875, EMA_p_value_std_dev: 0.030253251294293612
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1276, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1276, beta=63), stopping probability=0.704963
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4462)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.195639, p-value=0.001585
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20156479547843942, EMA_p_value_std_dev: 0.02995854544209238
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1277, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1277, beta=63), stopping probability=0.707098
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1131)
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.227577, p-value=0.000064
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20104358878461712, EMA_p_value_std_dev: 0.029666713206384598
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1278, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1278, beta=63), stopping probability=0.709225
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Added chunk 40 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Added chunk 40 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Processing chunk 42
 [llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Generating logits for model, chunk 42
 [llama_gguf_optmize v0.6.0] 10:05:27 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:05:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:05:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:05:27 - INFO - Processing chunks from 42 to 42
 [llama_gguf_optmize v0.6.0] 10:05:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:05:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:05:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:05:32 - DEBUG - Inference time: 5512.93 ms
 [llama_gguf_optmize v0.6.0] 10:05:32 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:05:32 - DEBUG - Reusing freed chunk 0 for chunk 42.
 [llama_gguf_optmize v0.6.0] 10:05:32 - DEBUG - Written chunk 42 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [42] 5960.89 ms 
 [llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Generating logits for model, chunk 42
 [llama_gguf_optmize v0.6.0] 10:05:33 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Processing chunks from 42 to 42
 [llama_gguf_optmize v0.6.0] 10:05:33 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:05:33 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:05:33 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:05:36 - DEBUG - Inference time: 2362.95 ms
 [llama_gguf_optmize v0.6.0] 10:05:36 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:05:36 - DEBUG - Reusing freed chunk 0 for chunk 42.
 [llama_gguf_optmize v0.6.0] 10:05:36 - DEBUG - Written chunk 42 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [42] 2823.82 ms 
 [llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Comparing logits for chunk 42
 [llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Processing chunks 42 to 42...
 [llama_gguf_optmize v0.6.0] 10:05:37 - DEBUG - Processing chunk 0, part 0
 [llama_gguf_optmize v0.6.0] 10:05:42 - DEBUG - Processing chunk 0, part 1
 [llama_gguf_optmize v0.6.0] 10:05:47 - DEBUG - Processing chunk 0, part 2
 [llama_gguf_optmize v0.6.0] 10:05:52 - DEBUG - Processing chunk 0, part 3
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - 
 ===== KL-divergence statistics for Chunk 42 =====
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Average : 0.029643
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - StdDev  : 0.079060
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Median  : 0.014136
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Maximum : 1.827285
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_99  : 0.274111
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_95  : 0.095251
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_90  : 0.059316
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_10  : 0.000440
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_05  : 0.000120
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_01  : 0.000009
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Samples seen: 171904
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6324)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.106303, p-value=0.486853
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2022661162713936, EMA_p_value_std_dev: 0.031540786785459955
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1279, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1279, beta=63), stopping probability=0.711343
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8205)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.170212, p-value=0.013400
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20514435243066695, EMA_p_value_std_dev: 0.033381935292090305
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1280, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1280, beta=63), stopping probability=0.713453
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7277)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.204109, p-value=0.000716
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2067893596234715, EMA_p_value_std_dev: 0.035203890835373876
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1281, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1281, beta=63), stopping probability=0.715555
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8972)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.227733, p-value=0.000063
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2083588741481209, EMA_p_value_std_dev: 0.03700942044258173
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1282, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1282, beta=63), stopping probability=0.717648
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1634)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.157988, p-value=0.032630
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20906427744963219, EMA_p_value_std_dev: 0.03876396103908638
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1283, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1283, beta=63), stopping probability=0.719733
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7852)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.180839, p-value=0.005750
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20879074598619898, EMA_p_value_std_dev: 0.03851161673364751
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1284, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1284, beta=63), stopping probability=0.721809
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6603)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.144616, p-value=0.077771
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20852429329098093, EMA_p_value_std_dev: 0.038458749710268324
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1285, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1285, beta=63), stopping probability=0.723877
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8212)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.197077, p-value=0.001389
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20897178377098766, EMA_p_value_std_dev: 0.03840527646989945
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1286, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1286, beta=63), stopping probability=0.725935
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3966)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.256516, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20939739090564058, EMA_p_value_std_dev: 0.03835244440963511
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1287, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1287, beta=63), stopping probability=0.727986
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5015)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.283271, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20976858774447682, EMA_p_value_std_dev: 0.038309661490528724
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1288, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1288, beta=63), stopping probability=0.730027
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5258)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.176194, p-value=0.008391
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21076322976549547, EMA_p_value_std_dev: 0.03826533161349413
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1289, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1289, beta=63), stopping probability=0.732060
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0783)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.252884, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21187622060147568, EMA_p_value_std_dev: 0.03792000969260212
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1290, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1290, beta=63), stopping probability=0.734085
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5038)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.288531, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21237671237249917, EMA_p_value_std_dev: 0.037579184607895603
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1291, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1291, beta=63), stopping probability=0.736100
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7971)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.227059, p-value=0.000068
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21318768678023203, EMA_p_value_std_dev: 0.037241687037344186
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1292, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1292, beta=63), stopping probability=0.738107
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5681)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.155539, p-value=0.038576
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21375465781865935, EMA_p_value_std_dev: 0.03703681623604003
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1293, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1293, beta=63), stopping probability=0.740104
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8355)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.177381, p-value=0.007628
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21351422517532448, EMA_p_value_std_dev: 0.03683413965248452
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1294, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1294, beta=63), stopping probability=0.742093
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3387)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.207645, p-value=0.000508
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21338721230918015, EMA_p_value_std_dev: 0.03663280219966807
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1295, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1295, beta=63), stopping probability=0.744073
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9423)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.234205, p-value=0.000031
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21297401564527696, EMA_p_value_std_dev: 0.036433430165139065
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1296, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1296, beta=63), stopping probability=0.746044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7971)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.310959, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21258078074103862, EMA_p_value_std_dev: 0.03623614165383823
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1297, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1297, beta=63), stopping probability=0.748006
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6857)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.257589, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2123082754202434, EMA_p_value_std_dev: 0.03590817917831324
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1298, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1298, beta=63), stopping probability=0.749959
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7492)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.228256, p-value=0.000060
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21186113209991453, EMA_p_value_std_dev: 0.035552172031377685
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1299, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1299, beta=63), stopping probability=0.751903
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9638)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.221177, p-value=0.000128
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21126285849922438, EMA_p_value_std_dev: 0.03519806562519139
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1300, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1300, beta=63), stopping probability=0.753838
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6178)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.147061, p-value=0.066908
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21067972734522966, EMA_p_value_std_dev: 0.03514522595543294
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1301, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1301, beta=63), stopping probability=0.755764
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8364)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.198149, p-value=0.001258
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21072968910772774, EMA_p_value_std_dev: 0.03509155515307647
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1302, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1302, beta=63), stopping probability=0.757681
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0706)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.224865, p-value=0.000086
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2108575870277415, EMA_p_value_std_dev: 0.035038323427388565
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1303, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1303, beta=63), stopping probability=0.759588
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5945)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.258750, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21117272364032938, EMA_p_value_std_dev: 0.03498568897664276
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1304, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1304, beta=63), stopping probability=0.761487
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9182)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.285970, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2106980960383623, EMA_p_value_std_dev: 0.03493372504686675
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1305, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1305, beta=63), stopping probability=0.763376
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8334)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.176634, p-value=0.008100
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21027165753169746, EMA_p_value_std_dev: 0.03462030151143729
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1306, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1306, beta=63), stopping probability=0.765256
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7833)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.298575, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2111591860694384, EMA_p_value_std_dev: 0.03431100323076525
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1307, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1307, beta=63), stopping probability=0.767127
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6886)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.294515, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21175362041065265, EMA_p_value_std_dev: 0.0340048843668094
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1308, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1308, beta=63), stopping probability=0.768989
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8047)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.293896, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21198490043787058, EMA_p_value_std_dev: 0.033701820828814004
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1309, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1309, beta=63), stopping probability=0.770841
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5418)
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.313232, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21102881737906223, EMA_p_value_std_dev: 0.033401780395795534
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1310, beta=63
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1310, beta=63), stopping probability=0.772684
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Added chunk 41 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Added chunk 41 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Processing chunk 43
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Generating logits for model, chunk 43
 [llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:05:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:05:57 - INFO - Processing chunks from 43 to 43
 [llama_gguf_optmize v0.6.0] 10:05:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:05:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:05:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:06:02 - DEBUG - Inference time: 5552.30 ms
 [llama_gguf_optmize v0.6.0] 10:06:02 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:06:02 - DEBUG - Reusing freed chunk 1 for chunk 43.
 [llama_gguf_optmize v0.6.0] 10:06:02 - DEBUG - Written chunk 43 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [43] 5993.54 ms 
 [llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Generating logits for model, chunk 43
 [llama_gguf_optmize v0.6.0] 10:06:03 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Processing chunks from 43 to 43
 [llama_gguf_optmize v0.6.0] 10:06:03 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:06:03 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:06:03 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:06:06 - DEBUG - Inference time: 2494.15 ms
 [llama_gguf_optmize v0.6.0] 10:06:06 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:06:06 - DEBUG - Reusing freed chunk 1 for chunk 43.
 [llama_gguf_optmize v0.6.0] 10:06:06 - DEBUG - Written chunk 43 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:06:06 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [43] 3258.35 ms 
 [llama_gguf_optmize v0.6.0] 10:06:06 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:06:06 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:06:07 - INFO - Comparing logits for chunk 43
 [llama_gguf_optmize v0.6.0] 10:06:07 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:06:07 - INFO - Processing chunks 43 to 43...
 [llama_gguf_optmize v0.6.0] 10:06:08 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 10:06:12 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 10:06:17 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 10:06:22 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - 
 ===== KL-divergence statistics for Chunk 43 =====
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Average : 0.015078
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - StdDev  : 0.027749
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Median  : 0.009472
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Maximum : 1.034442
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_99  : 0.106170
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_95  : 0.050940
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_90  : 0.033877
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_10  : 0.000124
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_05  : 0.000032
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_01  : 0.000003
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Samples seen: 176000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7378)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.220932, p-value=0.000131
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20959013124716863, EMA_p_value_std_dev: 0.033069182474144095
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1311, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1311, beta=63), stopping probability=0.774518
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7845)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.253908, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20867367428639014, EMA_p_value_std_dev: 0.03273989905231318
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1312, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1312, beta=63), stopping probability=0.776342
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7525)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.240595, p-value=0.000015
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20782895815073554, EMA_p_value_std_dev: 0.032413886909146254
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1313, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1313, beta=63), stopping probability=0.778158
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7044)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.220552, p-value=0.000136
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20705824768842862, EMA_p_value_std_dev: 0.03209125953634867
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1314, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1314, beta=63), stopping probability=0.779964
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8737)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.305598, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20640832591656091, EMA_p_value_std_dev: 0.031771850368284796
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1315, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1315, beta=63), stopping probability=0.781760
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4061)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.305228, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20533867977009607, EMA_p_value_std_dev: 0.031455518327506204
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1316, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1316, beta=63), stopping probability=0.783547
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7415)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.283495, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.204349981705377, EMA_p_value_std_dev: 0.031142345237818485
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1317, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1317, beta=63), stopping probability=0.785325
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9904)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.287520, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20311241563141066, EMA_p_value_std_dev: 0.0308323090447727
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1318, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1318, beta=63), stopping probability=0.787094
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8866)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.214954, p-value=0.000244
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20172149950498752, EMA_p_value_std_dev: 0.030525845707633843
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1319, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1319, beta=63), stopping probability=0.788853
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9740)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.247427, p-value=0.000007
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.200873010894941, EMA_p_value_std_dev: 0.030222432399040582
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1320, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1320, beta=63), stopping probability=0.790602
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9305)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.303661, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20061728908476187, EMA_p_value_std_dev: 0.02992204563742835
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1321, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1321, beta=63), stopping probability=0.792342
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9432)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.255466, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20056297901002276, EMA_p_value_std_dev: 0.029624652533143975
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1322, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1322, beta=63), stopping probability=0.794073
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9104)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.288039, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.2001633658715458, EMA_p_value_std_dev: 0.02933022592788985
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1323, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1323, beta=63), stopping probability=0.795795
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7925)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.233278, p-value=0.000034
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19991393617704076, EMA_p_value_std_dev: 0.029037801351536562
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1324, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1324, beta=63), stopping probability=0.797507
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9864)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.271577, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19952365671328334, EMA_p_value_std_dev: 0.028748298547526255
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1325, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1325, beta=63), stopping probability=0.799209
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8532)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.225459, p-value=0.000081
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19917882935527537, EMA_p_value_std_dev: 0.02846188432775515
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1326, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1326, beta=63), stopping probability=0.800902
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7116)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.217728, p-value=0.000183
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.1987215007511755, EMA_p_value_std_dev: 0.028178741461945326
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1327, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1327, beta=63), stopping probability=0.802586
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8519)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.284951, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19841627537068623, EMA_p_value_std_dev: 0.027898422912713182
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1328, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1328, beta=63), stopping probability=0.804260
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6267)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.238640, p-value=0.000018
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19810790130006306, EMA_p_value_std_dev: 0.02762091654981684
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1329, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1329, beta=63), stopping probability=0.805925
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7317)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.194372, p-value=0.001779
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19786133715689705, EMA_p_value_std_dev: 0.027353050682146895
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1330, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1330, beta=63), stopping probability=0.807580
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7022)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.173259, p-value=0.010585
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19770967992945712, EMA_p_value_std_dev: 0.027125824803657956
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1331, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1331, beta=63), stopping probability=0.809226
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7193)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.239103, p-value=0.000018
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19764819288116084, EMA_p_value_std_dev: 0.02690108171355477
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1332, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1332, beta=63), stopping probability=0.810862
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7571)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.244160, p-value=0.000010
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19727673397212636, EMA_p_value_std_dev: 0.02667856748112819
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1333, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1333, beta=63), stopping probability=0.812489
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7307)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.232133, p-value=0.000039
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19639891691990594, EMA_p_value_std_dev: 0.02645824580951608
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1334, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1334, beta=63), stopping probability=0.814106
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8236)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.304983, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19629811676192363, EMA_p_value_std_dev: 0.026241472652143286
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1335, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1335, beta=63), stopping probability=0.815714
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7241)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.194075, p-value=0.001827
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19617795997719187, EMA_p_value_std_dev: 0.025987792349102718
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1336, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1336, beta=63), stopping probability=0.817313
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1344)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.310488, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.1975647276023671, EMA_p_value_std_dev: 0.02573666234576847
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1337, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1337, beta=63), stopping probability=0.818902
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8733)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.328994, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19895052765996826, EMA_p_value_std_dev: 0.025488048346332105
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1338, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1338, beta=63), stopping probability=0.820482
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6425)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.171687, p-value=0.011964
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20093767057416795, EMA_p_value_std_dev: 0.025285740032353222
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1339, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1339, beta=63), stopping probability=0.822052
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6041)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.239548, p-value=0.000017
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20302332517792826, EMA_p_value_std_dev: 0.025085427808888765
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1340, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1340, beta=63), stopping probability=0.823612
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5793)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.306774, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20419739519553295, EMA_p_value_std_dev: 0.024888551683711138
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1341, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1341, beta=63), stopping probability=0.825164
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7529)
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.238555, p-value=0.000019
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20556220319036517, EMA_p_value_std_dev: 0.02469361860075733
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1342, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1342, beta=63), stopping probability=0.826706
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Added chunk 42 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Added chunk 42 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Processing chunk 44
 [llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Generating logits for model, chunk 44
 [llama_gguf_optmize v0.6.0] 10:06:27 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:06:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:06:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:06:27 - INFO - Processing chunks from 44 to 44
 [llama_gguf_optmize v0.6.0] 10:06:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:06:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:06:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:06:32 - DEBUG - Inference time: 5532.49 ms
 [llama_gguf_optmize v0.6.0] 10:06:32 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:06:32 - DEBUG - Reusing freed chunk 0 for chunk 44.
 [llama_gguf_optmize v0.6.0] 10:06:32 - DEBUG - Written chunk 44 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [44] 5998.05 ms 
 [llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Generating logits for model, chunk 44
 [llama_gguf_optmize v0.6.0] 10:06:33 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Processing chunks from 44 to 44
 [llama_gguf_optmize v0.6.0] 10:06:33 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:06:33 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:06:33 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:06:36 - DEBUG - Inference time: 2377.85 ms
 [llama_gguf_optmize v0.6.0] 10:06:36 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:06:36 - DEBUG - Reusing freed chunk 0 for chunk 44.
 [llama_gguf_optmize v0.6.0] 10:06:36 - DEBUG - Written chunk 44 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:06:36 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [44] 2806.82 ms 
 [llama_gguf_optmize v0.6.0] 10:06:36 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:06:36 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:06:36 - INFO - Comparing logits for chunk 44
 [llama_gguf_optmize v0.6.0] 10:06:37 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:06:37 - INFO - Processing chunks 44 to 44...
 [llama_gguf_optmize v0.6.0] 10:06:37 - DEBUG - Processing chunk 0, part 0
 [llama_gguf_optmize v0.6.0] 10:06:42 - DEBUG - Processing chunk 0, part 1
 [llama_gguf_optmize v0.6.0] 10:06:47 - DEBUG - Processing chunk 0, part 2
 [llama_gguf_optmize v0.6.0] 10:06:52 - DEBUG - Processing chunk 0, part 3
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - 
 ===== KL-divergence statistics for Chunk 44 =====
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Average : 0.013907
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - StdDev  : 0.024257
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Median  : 0.008231
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Maximum : 0.448273
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_99  : 0.102345
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_95  : 0.045147
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_90  : 0.030872
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_10  : 0.000125
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_05  : 0.000032
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_01  : 0.000003
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Samples seen: 180096
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7744)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.255707, p-value=0.000002
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20597591222366296, EMA_p_value_std_dev: 0.024500627318318688
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1343, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1343, beta=63), stopping probability=0.828238
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0510)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.201941, p-value=0.000881
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20597946728969163, EMA_p_value_std_dev: 0.02426012362917181
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1344, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1344, beta=63), stopping probability=0.829761
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9825)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.213709, p-value=0.000277
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20533023197238517, EMA_p_value_std_dev: 0.024021912668185895
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1345, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1345, beta=63), stopping probability=0.831275
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6188)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.296154, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20489070560597794, EMA_p_value_std_dev: 0.023786077849553527
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1346, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1346, beta=63), stopping probability=0.832779
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6336)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.261948, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.2046187925727715, EMA_p_value_std_dev: 0.023552621335059594
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1347, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1347, beta=63), stopping probability=0.834274
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7234)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.294161, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20402280542315226, EMA_p_value_std_dev: 0.023321497108942615
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1348, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1348, beta=63), stopping probability=0.835759
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7507)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.272002, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.2034539725860579, EMA_p_value_std_dev: 0.023090100105393377
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1349, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1349, beta=63), stopping probability=0.837235
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8618)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.281568, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20233562383533124, EMA_p_value_std_dev: 0.022859780872372764
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1350, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1350, beta=63), stopping probability=0.838702
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8193)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.247235, p-value=0.000007
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20125252274676206, EMA_p_value_std_dev: 0.022631783182355033
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1351, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1351, beta=63), stopping probability=0.840159
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6188)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.271131, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20008428072878942, EMA_p_value_std_dev: 0.022406060325265247
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1352, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1352, beta=63), stopping probability=0.841607
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9571)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.363115, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19937247478089482, EMA_p_value_std_dev: 0.02218258907000282
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1353, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1353, beta=63), stopping probability=0.843046
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7011)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.211785, p-value=0.000337
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19974889328429676, EMA_p_value_std_dev: 0.021962812056532978
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1354, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1354, beta=63), stopping probability=0.844476
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8277)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.260511, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20052639685675192, EMA_p_value_std_dev: 0.02174522593706971
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1355, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1355, beta=63), stopping probability=0.845896
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8212)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.327725, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20156667116525742, EMA_p_value_std_dev: 0.021529817486466964
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1356, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1356, beta=63), stopping probability=0.847307
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8072)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.225249, p-value=0.000083
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.2024970827259509, EMA_p_value_std_dev: 0.021316510005908847
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1357, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1357, beta=63), stopping probability=0.848708
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9979)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.305461, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20334277830554, EMA_p_value_std_dev: 0.021105330263991987
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1358, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1358, beta=63), stopping probability=0.850101
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9985)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.262670, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20406811714214917, EMA_p_value_std_dev: 0.020895170779034074
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1359, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1359, beta=63), stopping probability=0.851484
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7810)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.304222, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20441252167095278, EMA_p_value_std_dev: 0.020687109054360952
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1360, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1360, beta=63), stopping probability=0.852858
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9271)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.306409, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20394202282806217, EMA_p_value_std_dev: 0.020481122743107892
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1361, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1361, beta=63), stopping probability=0.854222
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9112)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.288849, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20285238991486046, EMA_p_value_std_dev: 0.020276827884545257
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1362, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1362, beta=63), stopping probability=0.855578
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9720)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.344417, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.2018839210232717, EMA_p_value_std_dev: 0.02007457086998419
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1363, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1363, beta=63), stopping probability=0.856924
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7924)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.353036, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20042657737185116, EMA_p_value_std_dev: 0.0198743271454752
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1364, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1364, beta=63), stopping probability=0.858262
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1501)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.262497, p-value=0.000001
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19971218038667207, EMA_p_value_std_dev: 0.01967608517792828
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1365, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1365, beta=63), stopping probability=0.859590
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7023)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.221785, p-value=0.000120
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19937099124299124, EMA_p_value_std_dev: 0.019480349408137212
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1366, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1366, beta=63), stopping probability=0.860909
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7291)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.233151, p-value=0.000035
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19850484273286464, EMA_p_value_std_dev: 0.019286549209559824
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1367, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1367, beta=63), stopping probability=0.862219
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8681)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.239855, p-value=0.000016
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19765197360158893, EMA_p_value_std_dev: 0.01909466273115571
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1368, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1368, beta=63), stopping probability=0.863520
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7917)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.290688, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19675223343971926, EMA_p_value_std_dev: 0.018904690283498497
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1369, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1369, beta=63), stopping probability=0.864812
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7665)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.251732, p-value=0.000004
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19578254806638493, EMA_p_value_std_dev: 0.018716608154110723
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1370, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1370, beta=63), stopping probability=0.866095
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0187)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.253775, p-value=0.000003
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19471266800728596, EMA_p_value_std_dev: 0.01853005234882434
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1371, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1371, beta=63), stopping probability=0.867368
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9477)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.290850, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.1940927492279791, EMA_p_value_std_dev: 0.01834528129500981
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1372, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1372, beta=63), stopping probability=0.868633
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4982)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.454601, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.1939156600055606, EMA_p_value_std_dev: 0.018162306144762046
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1373, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1373, beta=63), stopping probability=0.869889
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9604)
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.425724, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.1934764332014863, EMA_p_value_std_dev: 0.017981156210264738
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1374, beta=63
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1374, beta=63), stopping probability=0.871136
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Added chunk 43 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Added chunk 43 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Processing chunk 45
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Generating logits for model, chunk 45
 [llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:06:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:06:57 - INFO - Processing chunks from 45 to 45
 [llama_gguf_optmize v0.6.0] 10:06:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:06:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:06:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:07:02 - DEBUG - Inference time: 5511.72 ms
 [llama_gguf_optmize v0.6.0] 10:07:02 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:07:02 - DEBUG - Reusing freed chunk 1 for chunk 45.
 [llama_gguf_optmize v0.6.0] 10:07:02 - DEBUG - Written chunk 45 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [45] 5955.09 ms 
 [llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Generating logits for model, chunk 45
 [llama_gguf_optmize v0.6.0] 10:07:03 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Processing chunks from 45 to 45
 [llama_gguf_optmize v0.6.0] 10:07:03 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:07:03 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:07:03 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:07:06 - DEBUG - Inference time: 2553.92 ms
 [llama_gguf_optmize v0.6.0] 10:07:06 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:07:06 - DEBUG - Reusing freed chunk 1 for chunk 45.
 [llama_gguf_optmize v0.6.0] 10:07:06 - DEBUG - Written chunk 45 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [45] 3010.06 ms 
 [llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Comparing logits for chunk 45
 [llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Processing chunks 45 to 45...
 [llama_gguf_optmize v0.6.0] 10:07:07 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 10:07:12 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 10:07:17 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 10:07:22 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - 
 ===== KL-divergence statistics for Chunk 45 =====
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Average : 0.010650
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - StdDev  : 0.035294
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Median  : 0.002136
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Maximum : 1.557601
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_99  : 0.109571
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_95  : 0.044345
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_90  : 0.026093
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_10  : 0.000027
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_05  : 0.000010
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_01  : 0.000001
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Samples seen: 184192
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8674)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.327367, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19373761988344293, EMA_p_value_std_dev: 0.01780180762415367
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1375, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1375, beta=63), stopping probability=0.872375
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2809)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.488927, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19529684681703785, EMA_p_value_std_dev: 0.017624234686671084
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1376, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1376, beta=63), stopping probability=0.873604
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0857)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.515545, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19513301651137888, EMA_p_value_std_dev: 0.017448432946010378
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1377, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1377, beta=63), stopping probability=0.874824
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4604)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.467076, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1950849800113235, EMA_p_value_std_dev: 0.017274384827712763
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1378, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1378, beta=63), stopping probability=0.876036
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3222)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.489094, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1943672228614953, EMA_p_value_std_dev: 0.017102072839395167
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1379, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1379, beta=63), stopping probability=0.877239
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1491)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.453447, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19308063836262665, EMA_p_value_std_dev: 0.016931479662822203
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1380, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1380, beta=63), stopping probability=0.878433
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0880)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.578688, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19218163334778893, EMA_p_value_std_dev: 0.01676258815318555
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1381, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1381, beta=63), stopping probability=0.879618
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5636)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.386481, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1920432222529456, EMA_p_value_std_dev: 0.016595381336357528
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1382, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1382, beta=63), stopping probability=0.880795
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0558)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.443406, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1922167730605071, EMA_p_value_std_dev: 0.01642984240752737
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1383, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1383, beta=63), stopping probability=0.881963
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5466)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.450869, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19231887979298729, EMA_p_value_std_dev: 0.01626595472951229
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1384, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1384, beta=63), stopping probability=0.883122
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2267)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.504228, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19182901826773702, EMA_p_value_std_dev: 0.016103701831085414
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1385, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1385, beta=63), stopping probability=0.884273
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4188)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.539858, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19082725529634034, EMA_p_value_std_dev: 0.01594306740532034
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1386, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1386, beta=63), stopping probability=0.885415
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1190)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.395500, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18998487102455622, EMA_p_value_std_dev: 0.01578403530795227
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1387, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1387, beta=63), stopping probability=0.886548
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1553)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.441480, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18938853525132646, EMA_p_value_std_dev: 0.015626589555755445
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1388, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1388, beta=63), stopping probability=0.887673
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1460)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.452177, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18856187859899315, EMA_p_value_std_dev: 0.015470714324936784
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1389, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1389, beta=63), stopping probability=0.888790
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2774)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.446688, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18755459934014806, EMA_p_value_std_dev: 0.01531639394954554
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1390, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1390, beta=63), stopping probability=0.889898
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0531)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.402983, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1863004462387506, EMA_p_value_std_dev: 0.015163612919898824
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1391, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1391, beta=63), stopping probability=0.890997
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3102)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.494214, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1851957797405128, EMA_p_value_std_dev: 0.015012355881022833
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1392, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1392, beta=63), stopping probability=0.892088
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2002)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.476660, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18414439509437322, EMA_p_value_std_dev: 0.01486260763110963
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1393, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1393, beta=63), stopping probability=0.893171
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.01 (KL div: 2.0707)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.465702, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18311699470691442, EMA_p_value_std_dev: 0.014714353119989311
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1394, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1394, beta=63), stopping probability=0.894245
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.01 to 0.009999999999999998 (KL div: 1.3616)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.470953, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18180504159279975, EMA_p_value_std_dev: 0.014567577447617419
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1395, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1395, beta=63), stopping probability=0.895311
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4147)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.414894, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18037265036275865, EMA_p_value_std_dev: 0.014422265862577435
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1396, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1396, beta=63), stopping probability=0.896369
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.7744)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.541431, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17949847589255696, EMA_p_value_std_dev: 0.014278403760598225
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1397, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1397, beta=63), stopping probability=0.897418
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3666)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.571251, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17875314823667607, EMA_p_value_std_dev: 0.01413597668308626
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1398, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1398, beta=63), stopping probability=0.898460
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3465)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.487830, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17841024947310938, EMA_p_value_std_dev: 0.013994970315672473
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1399, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1399, beta=63), stopping probability=0.899493
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4957)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.486265, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1777668274245904, EMA_p_value_std_dev: 0.013855370486773641
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1400, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1400, beta=63), stopping probability=0.900518
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5842)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.469226, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17651659534918693, EMA_p_value_std_dev: 0.013717163166168073
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1401, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1401, beta=63), stopping probability=0.901534
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5361)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.486709, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1752121287154263, EMA_p_value_std_dev: 0.013580334463585548
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1402, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1402, beta=63), stopping probability=0.902543
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9682)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.250655, p-value=0.000004
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1743422633959022, EMA_p_value_std_dev: 0.013444890400501276
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1403, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1403, beta=63), stopping probability=0.903544
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9978)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.401300, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17534948176484363, EMA_p_value_std_dev: 0.01331079739194627
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1404, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1404, beta=63), stopping probability=0.904536
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9800)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.349745, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17669089032606594, EMA_p_value_std_dev: 0.013178041961149703
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1405, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1405, beta=63), stopping probability=0.905521
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0637)
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.432890, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17854140851653372, EMA_p_value_std_dev: 0.013046610765775332
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1406, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1406, beta=63), stopping probability=0.906497
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Added chunk 44 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Added chunk 44 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Processing chunk 46
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Generating logits for model, chunk 46
 [llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:07:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:07:27 - INFO - Processing chunks from 46 to 46
 [llama_gguf_optmize v0.6.0] 10:07:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:07:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:07:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:07:32 - DEBUG - Inference time: 5539.19 ms
 [llama_gguf_optmize v0.6.0] 10:07:32 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:07:32 - DEBUG - Reusing freed chunk 0 for chunk 46.
 [llama_gguf_optmize v0.6.0] 10:07:32 - DEBUG - Written chunk 46 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [46] 5981.72 ms 
 [llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Generating logits for model, chunk 46
 [llama_gguf_optmize v0.6.0] 10:07:33 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Processing chunks from 46 to 46
 [llama_gguf_optmize v0.6.0] 10:07:33 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:07:33 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:07:33 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:07:36 - DEBUG - Inference time: 2526.32 ms
 [llama_gguf_optmize v0.6.0] 10:07:36 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:07:36 - DEBUG - Reusing freed chunk 0 for chunk 46.
 [llama_gguf_optmize v0.6.0] 10:07:36 - DEBUG - Written chunk 46 at physical slot 0
 [llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [46] 2969.42 ms 
 [llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Comparing logits for chunk 46
 [llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Processing chunks 46 to 46...
 [llama_gguf_optmize v0.6.0] 10:07:37 - DEBUG - Processing chunk 0, part 0
 [llama_gguf_optmize v0.6.0] 10:07:42 - DEBUG - Processing chunk 0, part 1
 [llama_gguf_optmize v0.6.0] 10:07:47 - DEBUG - Processing chunk 0, part 2
 [llama_gguf_optmize v0.6.0] 10:07:52 - DEBUG - Processing chunk 0, part 3
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - 
 ===== KL-divergence statistics for Chunk 46 =====
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Average : 0.013298
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - StdDev  : 0.045913
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Median  : 0.003842
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Maximum : 2.100630
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_99  : 0.126697
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_95  : 0.051914
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_90  : 0.030688
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_10  : 0.000059
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_05  : 0.000026
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_01  : 0.000006
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Samples seen: 188288
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6374)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.248124, p-value=0.000006
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.18015566282993478, EMA_p_value_std_dev: 0.012916499950107135
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1407, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1407, beta=63), stopping probability=0.907466
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3237)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.443958, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1819879599216119, EMA_p_value_std_dev: 0.01278768475687118
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1408, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1408, beta=63), stopping probability=0.908427
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2777)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.430895, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1836787888473592, EMA_p_value_std_dev: 0.012660154495187754
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1409, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1409, beta=63), stopping probability=0.909380
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0592)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.461061, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.18472483726270453, EMA_p_value_std_dev: 0.012533896347866514
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1410, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1410, beta=63), stopping probability=0.910325
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5845)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.623516, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1854816462235328, EMA_p_value_std_dev: 0.012408897625564807
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1411, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1411, beta=63), stopping probability=0.911263
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1124)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.276803, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1859408645170756, EMA_p_value_std_dev: 0.012285119543779708
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1412, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1412, beta=63), stopping probability=0.912192
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2944)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.369194, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1874334710239158, EMA_p_value_std_dev: 0.012162576148360358
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1413, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1413, beta=63), stopping probability=0.913114
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0662)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.190255, p-value=0.002570
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.18980416857719462, EMA_p_value_std_dev: 0.012052720208882914
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1414, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1414, beta=63), stopping probability=0.914029
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.6823)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.315798, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1930429796757973, EMA_p_value_std_dev: 0.011943960081858213
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1415, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1415, beta=63), stopping probability=0.914935
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.7128)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.285565, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.19512124753785398, EMA_p_value_std_dev: 0.01183628478733458
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1416, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1416, beta=63), stopping probability=0.915834
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2742)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.445845, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.19803045677756245, EMA_p_value_std_dev: 0.01172968372186652
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1417, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1417, beta=63), stopping probability=0.916726
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0358)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.352422, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.19988061648213415, EMA_p_value_std_dev: 0.011624146002025377
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1418, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1418, beta=63), stopping probability=0.917610
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9999)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.507044, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20113096520459944, EMA_p_value_std_dev: 0.011508195344177498
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1419, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1419, beta=63), stopping probability=0.918486
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2357)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.563951, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20238936274699312, EMA_p_value_std_dev: 0.01139340129467401
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1420, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1420, beta=63), stopping probability=0.919355
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9587)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.466152, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20248951086713773, EMA_p_value_std_dev: 0.011279752116764368
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1421, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1421, beta=63), stopping probability=0.920217
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2355)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.378620, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20250717075894314, EMA_p_value_std_dev: 0.011167236589404363
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1422, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1422, beta=63), stopping probability=0.921071
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8901)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.465080, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20226298536479984, EMA_p_value_std_dev: 0.011055843404425092
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1423, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1423, beta=63), stopping probability=0.921918
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8713)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.441437, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20194486993278105, EMA_p_value_std_dev: 0.010945561366465989
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1424, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1424, beta=63), stopping probability=0.922758
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9222)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.350723, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2017500257387452, EMA_p_value_std_dev: 0.01083637939184187
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1425, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1425, beta=63), stopping probability=0.923591
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0780)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.386131, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2012321042933323, EMA_p_value_std_dev: 0.010728286507414623
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1426, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1426, beta=63), stopping probability=0.924416
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9812)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.429754, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2003445419812342, EMA_p_value_std_dev: 0.010621271849509546
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1427, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1427, beta=63), stopping probability=0.925234
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8973)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.292766, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20006041436519076, EMA_p_value_std_dev: 0.010515324733798317
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1428, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1428, beta=63), stopping probability=0.926045
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9508)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.465415, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2005401406305359, EMA_p_value_std_dev: 0.010410434440566307
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1429, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1429, beta=63), stopping probability=0.926848
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3206)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.361451, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20144335012129755, EMA_p_value_std_dev: 0.010306590428010649
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1430, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1430, beta=63), stopping probability=0.927645
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9614)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.349292, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20223887414048455, EMA_p_value_std_dev: 0.010203782259478184
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1431, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1431, beta=63), stopping probability=0.928435
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2997)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.450469, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2029175274392632, EMA_p_value_std_dev: 0.010101999602426828
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1432, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1432, beta=63), stopping probability=0.929218
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1748)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.337447, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20307110405857418, EMA_p_value_std_dev: 0.010001232156454093
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1433, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1433, beta=63), stopping probability=0.929994
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1143)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.512215, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20375518225947398, EMA_p_value_std_dev: 0.009901469865754936
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1434, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1434, beta=63), stopping probability=0.930762
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0349)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.399761, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2046636582354639, EMA_p_value_std_dev: 0.009802702703905767
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1435, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1435, beta=63), stopping probability=0.931524
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8302)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.364375, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20513761083995613, EMA_p_value_std_dev: 0.009704920744497449
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1436, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1436, beta=63), stopping probability=0.932280
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0539)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.449156, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20553861540282709, EMA_p_value_std_dev: 0.009608114160134228
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1437, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1437, beta=63), stopping probability=0.933028
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2783)
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.431671, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20497948643843966, EMA_p_value_std_dev: 0.009512273221387436
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1438, beta=63
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1438, beta=63), stopping probability=0.933770
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Added chunk 45 to freed chunks list in baseline_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Added chunk 45 to freed chunks list in target_logits.h5.
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Processing chunk 47
 [llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Generating logits for model, chunk 47
 [llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:07:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:07:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:07:57 - INFO - Processing chunks from 47 to 47
 [llama_gguf_optmize v0.6.0] 10:07:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5
 [llama_gguf_optmize v0.6.0] 10:07:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:07:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:08:02 - DEBUG - Inference time: 5522.79 ms
 [llama_gguf_optmize v0.6.0] 10:08:02 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:08:02 - DEBUG - Reusing freed chunk 1 for chunk 47.
 [llama_gguf_optmize v0.6.0] 10:08:02 - DEBUG - Written chunk 47 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [47] 5973.30 ms 
 [llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Generating logits for model, chunk 47
 [llama_gguf_optmize v0.6.0] 10:08:03 - DEBUG - Number of logits: 151936.
 [llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy
 [llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Processing chunks from 47 to 47
 [llama_gguf_optmize v0.6.0] 10:08:03 - DEBUG - Resuming with existing HDF5 file: target_logits.h5
 [llama_gguf_optmize v0.6.0] 10:08:03 - DEBUG - require_bos: add_bos_token=false (require_bos=False)
 [llama_gguf_optmize v0.6.0] 10:08:03 - DEBUG - require_eos: add_eos_token=true (require_eos=True)
 [llama_gguf_optmize v0.6.0] 10:08:06 - DEBUG - Inference time: 2441.65 ms
 [llama_gguf_optmize v0.6.0] 10:08:06 - DEBUG - Logits shape (4096, 151936) dtype float32
 [llama_gguf_optmize v0.6.0] 10:08:06 - DEBUG - Reusing freed chunk 1 for chunk 47.
 [llama_gguf_optmize v0.6.0] 10:08:06 - DEBUG - Written chunk 47 at physical slot 1
 [llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks
 [47] 2901.51 ms 
 [llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Processed 1 chunks
 [llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Final file size: 4785.13 MB
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Comparing logits for chunk 47
 [llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file.
 [llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Processing chunks 47 to 47...
 [llama_gguf_optmize v0.6.0] 10:08:07 - DEBUG - Processing chunk 1, part 0
 [llama_gguf_optmize v0.6.0] 10:08:12 - DEBUG - Processing chunk 1, part 1
 [llama_gguf_optmize v0.6.0] 10:08:17 - DEBUG - Processing chunk 1, part 2
 [llama_gguf_optmize v0.6.0] 10:08:22 - DEBUG - Processing chunk 1, part 3
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)]
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - 
 ===== KL-divergence statistics for Chunk 47 =====
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Average : 0.013446
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - StdDev  : 0.038014
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Median  : 0.004572
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Minimum : 0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Maximum : 1.367855
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_99  : 0.132956
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_95  : 0.051674
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_90  : 0.031284
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_10  : 0.000070
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_05  : 0.000027
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_01  : 0.000005
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Samples seen: 192384
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128)
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8388)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.399639, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20388181698368996, EMA_p_value_std_dev: 0.009417388296004643
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1439, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1439, beta=63), stopping probability=0.934505
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8734)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.324240, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20317003826948501, EMA_p_value_std_dev: 0.009323449848315927
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1440, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1440, beta=63), stopping probability=0.935233
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2912)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.491830, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20281882756056532, EMA_p_value_std_dev: 0.009230448436643043
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1441, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1441, beta=63), stopping probability=0.935955
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1543)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.440519, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20262776469082627, EMA_p_value_std_dev: 0.009138374714051597
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1442, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1442, beta=63), stopping probability=0.936670
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3082)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.492756, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20252943645898308, EMA_p_value_std_dev: 0.009047219426843
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1443, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1443, beta=63), stopping probability=0.937378
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2478)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.511295, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.2020333673031973, EMA_p_value_std_dev: 0.00895697341362431
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1444, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1444, beta=63), stopping probability=0.938080
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1389)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.396545, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20117852315556806, EMA_p_value_std_dev: 0.008867627603823407
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1445, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1445, beta=63), stopping probability=0.938776
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8975)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.401806, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20005219601702548, EMA_p_value_std_dev: 0.00877917301847527
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1446, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1446, beta=63), stopping probability=0.939465
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8883)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.448957, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19893248562726049, EMA_p_value_std_dev: 0.008691600767615978
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1447, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1447, beta=63), stopping probability=0.940148
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1712)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.447905, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1977191771819592, EMA_p_value_std_dev: 0.008604902049959008
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1448, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1448, beta=63), stopping probability=0.940824
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3553)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.528435, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19646245424095188, EMA_p_value_std_dev: 0.008519068152010667
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1449, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1449, beta=63), stopping probability=0.941495
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3431)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.373173, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19598351144640036, EMA_p_value_std_dev: 0.008434090447194469
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1450, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1450, beta=63), stopping probability=0.942158
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1415)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.361916, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1951829581381491, EMA_p_value_std_dev: 0.008349960394984542
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1451, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1451, beta=63), stopping probability=0.942816
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2414)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.418907, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19490159783843797, EMA_p_value_std_dev: 0.008266669540045408
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1452, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1452, beta=63), stopping probability=0.943467
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2933)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.487322, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1945613540723793, EMA_p_value_std_dev: 0.008184209511384292
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1453, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1453, beta=63), stopping probability=0.944113
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0357)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.402752, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19407491175664904, EMA_p_value_std_dev: 0.008102572021509071
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1454, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1454, beta=63), stopping probability=0.944752
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0752)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.454919, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1937169024396292, EMA_p_value_std_dev: 0.008021748865595377
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1455, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1455, beta=63), stopping probability=0.945385
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5048)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.433627, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19315495431469856, EMA_p_value_std_dev: 0.007941731920661063
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1456, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1456, beta=63), stopping probability=0.946012
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9912)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.340474, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19268823600263346, EMA_p_value_std_dev: 0.007862513144790289
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1457, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1457, beta=63), stopping probability=0.946633
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9953)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.305864, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1919570421793953, EMA_p_value_std_dev: 0.00778408458628876
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1458, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1458, beta=63), stopping probability=0.947248
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9142)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.235537, p-value=0.000026
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19157126028615618, EMA_p_value_std_dev: 0.007706556065898349
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1459, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1459, beta=63), stopping probability=0.947858
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0974)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.238718, p-value=0.000018
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19114791501163086, EMA_p_value_std_dev: 0.007629808578691157
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1460, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1460, beta=63), stopping probability=0.948461
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0130)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.343926, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1910475156736969, EMA_p_value_std_dev: 0.007553826647660544
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1461, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1461, beta=63), stopping probability=0.949059
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1018)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.297309, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19107500060885518, EMA_p_value_std_dev: 0.007478602621892268
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1462, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1462, beta=63), stopping probability=0.949650
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0897)
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.318075, p-value=0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19050290709013015, EMA_p_value_std_dev: 0.007404128959205302
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha.
 [llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1463, beta=63
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1463, beta=63), stopping probability=0.950236
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Saved early stopping stats and Bayesian prior state to the output file.
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Early stopping at chunk 47, segment 24
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - confidence_level: 0.95
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file.
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - TOTAL CHUNKS processed.
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - 
 ===== Overall KL-divergence statistics =====
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Average : 0.023298
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - StdDev  : 0.096553
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Minimum : -0.000000
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Maximum : 6.588896
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_99  : 0.204897
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_95  : 0.068418
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_90  : 0.043672
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Median  : 0.010238
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_10  : 0.000105
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_05  : 0.000026
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_01  : 0.000002
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Completed processing 2325 chunks.
 Cumulative statistics stored in kl_divergence.h5.
 ggml_metal_free: deallocating
 [llama_gguf_optmize v0.6.0] 10:08:26 - INFO - kl_d_bench completed successfully.
diff --git a/4. separate out unused data from calibration data b/4. separate out unused data from calibration data
 python separate_unused_samples.py --calibration-file calibration-dataset.txt --output-dir-250 train250 --output-dir-rest train500 --model ../../Sailor2-1B-Chat_Q4_K_M.gguf
 llama_load_model_from_file: using device Metal (Apple M3 Max) - 40959 MiB free
 llama_model_loader: loaded meta data with 33 key-value pairs and 579 tensors from ../../Sailor2-1B-Chat_Q4_K_M.gguf (version GGUF V3 (latest))
 llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
 llama_model_loader: - kv   0:                       general.architecture str              = qwen2
 llama_model_loader: - kv   1:                               general.type str              = model
 llama_model_loader: - kv   2:                               general.name str              = Sailor2 1B Chat
 llama_model_loader: - kv   3:                           general.finetune str              = Chat
 llama_model_loader: - kv   4:                           general.basename str              = Sailor2
 llama_model_loader: - kv   5:                         general.size_label str              = 1B
 llama_model_loader: - kv   6:                            general.license str              = apache-2.0
 llama_model_loader: - kv   7:                   general.base_model.count u32              = 1
 llama_model_loader: - kv   8:                  general.base_model.0.name str              = Sailor2 1B
 llama_model_loader: - kv   9:          general.base_model.0.organization str              = Sail
 llama_model_loader: - kv  10:              general.base_model.0.repo_url str              = https://huggingface.co/sail/Sailor2-1B
 llama_model_loader: - kv  11:                               general.tags arr[str,6]       = ["multilingual", "sea", "sailor", "sf...
 llama_model_loader: - kv  12:                          general.languages arr[str,12]      = ["en", "zh", "id", "th", "vi", "ms", ...
 llama_model_loader: - kv  13:                          qwen2.block_count u32              = 48
 llama_model_loader: - kv  14:                       qwen2.context_length u32              = 32768
 llama_model_loader: - kv  15:                     qwen2.embedding_length u32              = 896
 llama_model_loader: - kv  16:                  qwen2.feed_forward_length u32              = 4864
 llama_model_loader: - kv  17:                 qwen2.attention.head_count u32              = 14
 llama_model_loader: - kv  18:              qwen2.attention.head_count_kv u32              = 2
 llama_model_loader: - kv  19:                       qwen2.rope.freq_base f32              = 1000000.000000
 llama_model_loader: - kv  20:     qwen2.attention.layer_norm_rms_epsilon f32              = 0.000001
 llama_model_loader: - kv  21:                          general.file_type u32              = 15
 llama_model_loader: - kv  22:                       tokenizer.ggml.model str              = gpt2
 llama_model_loader: - kv  23:                         tokenizer.ggml.pre str              = qwen2
 llama_model_loader: - kv  24:                      tokenizer.ggml.tokens arr[str,151936]  = ["!", "\"", "#", "$", "%", "&", "'", ...
 llama_model_loader: - kv  25:                  tokenizer.ggml.token_type arr[i32,151936]  = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
 llama_model_loader: - kv  26:                      tokenizer.ggml.merges arr[str,151387]  = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
 llama_model_loader: - kv  27:                tokenizer.ggml.eos_token_id u32              = 151645
 llama_model_loader: - kv  28:            tokenizer.ggml.padding_token_id u32              = 151643
 llama_model_loader: - kv  29:                tokenizer.ggml.bos_token_id u32              = 151643
 llama_model_loader: - kv  30:               tokenizer.ggml.add_bos_token bool             = false
 llama_model_loader: - kv  31:                    tokenizer.chat_template str              = {% for message in messages %}{% if lo...
 llama_model_loader: - kv  32:               general.quantization_version u32              = 2
 llama_model_loader: - type  f32:  241 tensors
 llama_model_loader: - type q5_0:  265 tensors
 llama_model_loader: - type q8_0:   25 tensors
 llama_model_loader: - type q4_K:   24 tensors
 llama_model_loader: - type q6_K:   24 tensors
 llm_load_vocab: control token: 151659 '<|fim_prefix|>' is not marked as EOG
 llm_load_vocab: control token: 151656 '<|video_pad|>' is not marked as EOG
 llm_load_vocab: control token: 151655 '<|image_pad|>' is not marked as EOG
 llm_load_vocab: control token: 151653 '<|vision_end|>' is not marked as EOG
 llm_load_vocab: control token: 151652 '<|vision_start|>' is not marked as EOG
 llm_load_vocab: control token: 151651 '<|quad_end|>' is not marked as EOG
 llm_load_vocab: control token: 151649 '<|box_end|>' is not marked as EOG
 llm_load_vocab: control token: 151648 '<|box_start|>' is not marked as EOG
 llm_load_vocab: control token: 151646 '<|object_ref_start|>' is not marked as EOG
 llm_load_vocab: control token: 151644 '<|im_start|>' is not marked as EOG
 llm_load_vocab: control token: 151661 '<|fim_suffix|>' is not marked as EOG
 llm_load_vocab: control token: 151647 '<|object_ref_end|>' is not marked as EOG
 llm_load_vocab: control token: 151660 '<|fim_middle|>' is not marked as EOG
 llm_load_vocab: control token: 151654 '<|vision_pad|>' is not marked as EOG
 llm_load_vocab: control token: 151650 '<|quad_start|>' is not marked as EOG
 llm_load_vocab: special tokens cache size = 22
 llm_load_vocab: token to piece cache size = 0.9310 MB
 llm_load_print_meta: format           = GGUF V3 (latest)
 llm_load_print_meta: arch             = qwen2
 llm_load_print_meta: vocab type       = BPE
 llm_load_print_meta: n_vocab          = 151936
 llm_load_print_meta: n_merges         = 151387
 llm_load_print_meta: vocab_only       = 0
 llm_load_print_meta: n_ctx_train      = 32768
 llm_load_print_meta: n_embd           = 896
 llm_load_print_meta: n_layer          = 48
 llm_load_print_meta: n_head           = 14
 llm_load_print_meta: n_head_kv        = 2
 llm_load_print_meta: n_rot            = 64
 llm_load_print_meta: n_swa            = 0
 llm_load_print_meta: n_embd_head_k    = 64
 llm_load_print_meta: n_embd_head_v    = 64
 llm_load_print_meta: n_gqa            = 7
 llm_load_print_meta: n_embd_k_gqa     = 128
 llm_load_print_meta: n_embd_v_gqa     = 128
 llm_load_print_meta: f_norm_eps       = 0.0e+00
 llm_load_print_meta: f_norm_rms_eps   = 1.0e-06
 llm_load_print_meta: f_clamp_kqv      = 0.0e+00
 llm_load_print_meta: f_max_alibi_bias = 0.0e+00
 llm_load_print_meta: f_logit_scale    = 0.0e+00
 llm_load_print_meta: n_ff             = 4864
 llm_load_print_meta: n_expert         = 0
 llm_load_print_meta: n_expert_used    = 0
 llm_load_print_meta: causal attn      = 1
 llm_load_print_meta: pooling type     = 0
 llm_load_print_meta: rope type        = 2
 llm_load_print_meta: rope scaling     = linear
 llm_load_print_meta: freq_base_train  = 1000000.0
 llm_load_print_meta: freq_scale_train = 1
 llm_load_print_meta: n_ctx_orig_yarn  = 32768
 llm_load_print_meta: rope_finetuned   = unknown
 llm_load_print_meta: ssm_d_conv       = 0
 llm_load_print_meta: ssm_d_inner      = 0
 llm_load_print_meta: ssm_d_state      = 0
 llm_load_print_meta: ssm_dt_rank      = 0
 llm_load_print_meta: ssm_dt_b_c_rms   = 0
 llm_load_print_meta: model type       = 14B
 llm_load_print_meta: model ftype      = Q4_K - Medium
 llm_load_print_meta: model params     = 988.06 M
 llm_load_print_meta: model size       = 698.72 MiB (5.93 BPW) 
 llm_load_print_meta: general.name     = Sailor2 1B Chat
 llm_load_print_meta: BOS token        = 151643 '<|endoftext|>'
 llm_load_print_meta: EOS token        = 151645 '<|im_end|>'
 llm_load_print_meta: EOT token        = 151645 '<|im_end|>'
 llm_load_print_meta: PAD token        = 151643 '<|endoftext|>'
 llm_load_print_meta: LF token         = 148848 'ÄĬ'
 llm_load_print_meta: FIM PRE token    = 151659 '<|fim_prefix|>'
 llm_load_print_meta: FIM SUF token    = 151661 '<|fim_suffix|>'
 llm_load_print_meta: FIM MID token    = 151660 '<|fim_middle|>'
 llm_load_print_meta: FIM PAD token    = 151662 '<|fim_pad|>'
 llm_load_print_meta: FIM REP token    = 151663 '<|repo_name|>'
 llm_load_print_meta: FIM SEP token    = 151664 '<|file_sep|>'
 llm_load_print_meta: EOG token        = 151643 '<|endoftext|>'
 llm_load_print_meta: EOG token        = 151645 '<|im_end|>'
 llm_load_print_meta: EOG token        = 151662 '<|fim_pad|>'
 llm_load_print_meta: EOG token        = 151663 '<|repo_name|>'
 llm_load_print_meta: EOG token        = 151664 '<|file_sep|>'
 llm_load_print_meta: max token length = 256
 llm_load_tensors: tensor 'token_embd.weight' (q5_0) (and 578 others) cannot be used with preferred buffer type CPU_AARCH64, using CPU instead
 llm_load_tensors: offloading 0 repeating layers to GPU
 llm_load_tensors: offloaded 0/49 layers to GPU
 llm_load_tensors:   CPU_Mapped model buffer size =   698.72 MiB
 ......................................................................
 llama_new_context_with_model: n_seq_max     = 1
 llama_new_context_with_model: n_ctx         = 512
 llama_new_context_with_model: n_ctx_per_seq = 512
 llama_new_context_with_model: n_batch       = 512
 llama_new_context_with_model: n_ubatch      = 512
 llama_new_context_with_model: flash_attn    = 0
 llama_new_context_with_model: freq_base     = 1000000.0
 llama_new_context_with_model: freq_scale    = 1
 llama_new_context_with_model: n_ctx_per_seq (512) < n_ctx_train (32768) -- the full capacity of the model will not be utilized
 ggml_metal_init: allocating
 ggml_metal_init: found device: Apple M3 Max
 ggml_metal_init: picking default device: Apple M3 Max
 ggml_metal_init: using embedded metal library
 ggml_metal_init: GPU name:   Apple M3 Max
 ggml_metal_init: GPU family: MTLGPUFamilyApple9  (1009)
 ggml_metal_init: GPU family: MTLGPUFamilyCommon3 (3003)
 ggml_metal_init: GPU family: MTLGPUFamilyMetal3  (5001)
 ggml_metal_init: simdgroup reduction   = true
 ggml_metal_init: simdgroup matrix mul. = true
 ggml_metal_init: has bfloat            = true
 ggml_metal_init: use bfloat            = false
 ggml_metal_init: hasUnifiedMemory      = true
 ggml_metal_init: recommendedMaxWorkingSetSize  = 42949.67 MB
 ggml_metal_init: loaded kernel_add                                    0x146e2d2c0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_add_row                                0x146e2d9d0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_sub                                    0x146e2df80 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_sub_row                                0x146e2e530 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul                                    0x146e2eae0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_row                                0x146e2f090 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_div                                    0x146e2f640 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_div_row                                0x146e2fbf0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_repeat_f32                             0x146e301a0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_repeat_f16                             0x146e306a0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_repeat_i32                             0x146e30ba0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_repeat_i16                             0x146e310a0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_scale                                  0x146e31bc0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_scale_4                                0x146e32370 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_clamp                                  0x146e32b80 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_tanh                                   0x146e332a0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_relu                                   0x146e339c0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_sigmoid                                0x146e340e0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_gelu                                   0x146e34800 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_gelu_4                                 0x146e34fd0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_gelu_quick                             0x146e356f0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_gelu_quick_4                           0x146e35e10 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_silu                                   0x146e36530 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_silu_4                                 0x146e36dd0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_elu                                    0x146e374f0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_soft_max_f16                           0x146e377b0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_soft_max_f16_4                         0x146e38420 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_soft_max_f32                           0x146e38960 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_soft_max_f32_4                         0x146e38ea0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_diag_mask_inf                          0x146e39160 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_diag_mask_inf_8                        0x146e39600 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_f32                           0x146e398c0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_f16                           0x146e3a150 | th_max = 1024 | th_width =   32
 ggml_metal_init: skipping kernel_get_rows_bf16                     (not supported)
 ggml_metal_init: loaded kernel_get_rows_q4_0                          0x146e3a410 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_q4_1                          0x146e3a8b0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_q5_0                          0x146e3ad50 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_q5_1                          0x146e3b1f0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_q8_0                          0x146e3b690 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_q2_K                          0x146e3bb30 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_q3_K                          0x146e3bfd0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_q4_K                          0x146e3c470 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_q5_K                          0x146e3c910 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_q6_K                          0x146e3cdb0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_iq2_xxs                       0x146e3d070 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_iq2_xs                        0x146e3d680 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_iq3_xxs                       0x146e3dc90 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_iq3_s                         0x146e3e2a0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_iq2_s                         0x146e3ebc0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_iq1_s                         0x146e3f1d0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_iq1_m                         0x146e3f7e0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_iq4_nl                        0x146e3fdf0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_iq4_xs                        0x146e40400 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_get_rows_i32                           0x146e40a10 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_rms_norm                               0x146e41200 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_group_norm                             0x146e416a0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_norm                                   0x146e41b40 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_ssm_conv_f32                           0x146e41e00 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_ssm_scan_f32                           0x146e42410 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_f32_f32                         0x146e42c00 | th_max = 1024 | th_width =   32
 ggml_metal_init: skipping kernel_mul_mv_bf16_f32                   (not supported)
 ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row              (not supported)
 ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4                (not supported)
 ggml_metal_init: skipping kernel_mul_mv_bf16_bf16                  (not supported)
 ggml_metal_init: loaded kernel_mul_mv_f16_f32                         0x146e42ec0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_f16_f32_1row                    0x146e43360 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_f16_f32_l4                      0x146e43800 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_f16_f16                         0x146e43ca0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_q4_0_f32                        0x146e44140 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_q4_1_f32                        0x146e445e0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_q5_0_f32                        0x146e44a80 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_q5_1_f32                        0x146e44f20 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_q8_0_f32                        0x146e453c0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_2                0x146e45910 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_3                0x146e45e60 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_4                0x146e463b0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_5                0x146e46900 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_2               0x146e46e50 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_3               0x146e473a0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_4               0x146e478f0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_5               0x146e47e40 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_2               0x146e48390 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_3               0x146e488e0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_4               0x146e48e30 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_5               0x146e49380 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_2               0x146e498d0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_3               0x146e49e20 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_4               0x146e4a370 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_5               0x146e4a8c0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_2               0x146e4ae10 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_3               0x146e4b360 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_4               0x146e4b8b0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_5               0x146e4be00 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_2               0x146e4c350 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_3               0x146e4c8a0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_4               0x146e4cdf0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_5               0x146e4d340 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_2               0x146e4d890 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_3               0x146e4dde0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_4               0x146e4e330 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_5               0x146e4e880 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_2               0x146e4edd0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_3               0x146e3e8b0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_4               0x146e4f240 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_5               0x146e4f9f0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_2               0x146e4ff40 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_3               0x146e50490 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_4               0x146e509e0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_5               0x146e50f30 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_2             0x146e51480 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_3             0x146e519d0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_4             0x146e51f20 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_5             0x146e52470 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_q2_K_f32                        0x146e52910 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_q3_K_f32                        0x146e52db0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_q4_K_f32                        0x146e53250 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_q5_K_f32                        0x146e536f0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_q6_K_f32                        0x146e53b90 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_iq2_xxs_f32                     0x146e54030 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_iq2_xs_f32                      0x146e544d0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_iq3_xxs_f32                     0x146e54970 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_iq3_s_f32                       0x146e54e10 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_iq2_s_f32                       0x146e552b0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_iq1_s_f32                       0x146e55750 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_iq1_m_f32                       0x146e55bf0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_iq4_nl_f32                      0x146e56090 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_iq4_xs_f32                      0x146e56530 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_id_f32_f32                      0x146e569d0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_id_f16_f32                      0x146e56e70 | th_max = 1024 | th_width =   32
 ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32                (not supported)
 ggml_metal_init: loaded kernel_mul_mv_id_q4_0_f32                     0x146e57310 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_id_q4_1_f32                     0x146e577b0 | th_max = 1024 | th_width =   32
 ggml_metal_init: loaded kernel_mul_mv_id_q5_0_f32                     0x146e57c50 | th_max = 1024 | th_width =   32
diff --git a/5. create the first imatrix and quantization b/5. create the first imatrix and quantization
diff --git a/6 benchmark it b/6 benchmark it
diff --git a/7. benchmark next model made at ~500 samples b/7. benchmark next model made at ~500 samples
diff --git a/8. compare first initial test runs b/8. compare first initial test runs
diff --git a/local_shuffle_plugin.py b/local_shuffle_plugin.py
diff --git a/mix the two datasets b/mix the two datasets
diff --git a/sailor2_pretrain_stage1_plugin.py b/sailor2_pretrain_stage1_plugin.py
diff --git a/sea_commoncrawl_high_quality_plugin.py b/sea_commoncrawl_high_quality_plugin.py
diff --git a/separate_unused_samples.py b/separate_unused_samples.py
diff --git a/zh, en, and ms languages. b/zh, en, and ms languages.
	uv run src/imatrix_dataset.py \
	--output $HF/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/calibration-dataset.txt \
	--datasource-plugin $HF/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/sea_commoncrawl_high_quality_plugin.py --plugin-class SeaCommonCrawlHQDataSource \
	--model $HF/Sailor2-1B-Chat/Sailor2-1B-Chat_Q4_K_M.gguf \
	--config $HF/Sailor2-1B-Chat/README.md --url sailor2/sea-commoncrawl-high-quality --num-samples 500 --shuffle --chunk-size 32768
	2025-01-06 10:21:16,204 - INFO - Loaded languages: ['en', 'zh', 'id', 'th', 'vi', 'ms', 'lo', 'my', 'jv', 'km', 'su', 'tl']
	2025-01-06 10:21:16,444 - INFO - PyTorch version 2.5.1 available.
	2025-01-06 10:21:16,519 - INFO - Successfully loaded plugin class SeaCommonCrawlHQDataSource from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/sea_commoncrawl_high_quality_plugin.py.
	2025-01-06 10:21:16,519 - INFO - Downloading 500 samples for en, skipping the first 0 entries.
	2025-01-06 10:21:16,519 - ERROR - Unsupported language 'en'. Available languages: ['my', 'ceb', 'ilo', 'id', 'jv', 'km', 'lo', 'min', 'su', 'tl', 'th', 'vi', 'war']
	2025-01-06 10:21:16,520 - INFO - Appended 0 entries to raw_transactions_en.json.
	2025-01-06 10:21:16,520 - INFO - Downloading 500 samples for zh, skipping the first 0 entries.
	2025-01-06 10:21:16,520 - ERROR - Unsupported language 'zh'. Available languages: ['my', 'ceb', 'ilo', 'id', 'jv', 'km', 'lo', 'min', 'su', 'tl', 'th', 'vi', 'war']
	2025-01-06 10:21:16,520 - INFO - Appended 0 entries to raw_transactions_zh.json.
	2025-01-06 10:21:16,525 - INFO - raw_transactions_id.json already contains enough samples (500), skipping API call.
	2025-01-06 10:21:16,534 - INFO - raw_transactions_th.json already contains enough samples (500), skipping API call.
	2025-01-06 10:21:16,546 - INFO - raw_transactions_vi.json already contains enough samples (500), skipping API call.
	2025-01-06 10:21:16,553 - INFO - Downloading 500 samples for ms, skipping the first 0 entries.
	2025-01-06 10:21:16,553 - ERROR - Unsupported language 'ms'. Available languages: ['my', 'ceb', 'ilo', 'id', 'jv', 'km', 'lo', 'min', 'su', 'tl', 'th', 'vi', 'war']
	2025-01-06 10:21:16,553 - INFO - Appended 0 entries to raw_transactions_ms.json.
	2025-01-06 10:21:16,561 - INFO - raw_transactions_lo.json already contains enough samples (500), skipping API call.
	2025-01-06 10:21:16,577 - INFO - raw_transactions_my.json already contains enough samples (500), skipping API call.
	2025-01-06 10:21:16,592 - INFO - raw_transactions_jv.json already contains enough samples (500), skipping API call.
	2025-01-06 10:21:16,614 - INFO - raw_transactions_km.json already contains enough samples (500), skipping API call.
	2025-01-06 10:21:16,624 - INFO - raw_transactions_su.json already contains enough samples (500), skipping API call.
	2025-01-06 10:21:16,632 - INFO - raw_transactions_tl.json already contains enough samples (500), skipping API call.
	llama_model_loader: loaded meta data with 33 key-value pairs and 579 tensors from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/Sailor2-1B-Chat_Q4_K_M.gguf (version GGUF V3 (latest))
	llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
	llama_model_loader: - kv 0: general.architecture str = qwen2
	llama_model_loader: - kv 1: general.type str = model
	llama_model_loader: - kv 2: general.name str = Sailor2 1B Chat
	llama_model_loader: - kv 3: general.finetune str = Chat
	llama_model_loader: - kv 4: general.basename str = Sailor2
	llama_model_loader: - kv 5: general.size_label str = 1B
	llama_model_loader: - kv 6: general.license str = apache-2.0
	llama_model_loader: - kv 7: general.base_model.count u32 = 1
	llama_model_loader: - kv 8: general.base_model.0.name str = Sailor2 1B
	llama_model_loader: - kv 9: general.base_model.0.organization str = Sail
	llama_model_loader: - kv 10: general.base_model.0.repo_url str = https://huggingface.co/sail/Sailor2-1B
	llama_model_loader: - kv 11: general.tags arr[str,6] = ["multilingual", "sea", "sailor", "sf...
	llama_model_loader: - kv 12: general.languages arr[str,12] = ["en", "zh", "id", "th", "vi", "ms", ...
	llama_model_loader: - kv 13: qwen2.block_count u32 = 48
	llama_model_loader: - kv 14: qwen2.context_length u32 = 32768
	llama_model_loader: - kv 15: qwen2.embedding_length u32 = 896
	llama_model_loader: - kv 16: qwen2.feed_forward_length u32 = 4864
	llama_model_loader: - kv 17: qwen2.attention.head_count u32 = 14
	llama_model_loader: - kv 18: qwen2.attention.head_count_kv u32 = 2
	llama_model_loader: - kv 19: qwen2.rope.freq_base f32 = 1000000.000000
	llama_model_loader: - kv 20: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001
	llama_model_loader: - kv 21: general.file_type u32 = 15
	llama_model_loader: - kv 22: tokenizer.ggml.model str = gpt2
	llama_model_loader: - kv 23: tokenizer.ggml.pre str = qwen2
	llama_model_loader: - kv 24: tokenizer.ggml.tokens arr[str,151936] = ["!", "\"", "#", "$", "%", "&", "'", ...
	llama_model_loader: - kv 25: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
	llama_model_loader: - kv 26: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",...
	llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 151645
	llama_model_loader: - kv 28: tokenizer.ggml.padding_token_id u32 = 151643
	llama_model_loader: - kv 29: tokenizer.ggml.bos_token_id u32 = 151643
	llama_model_loader: - kv 30: tokenizer.ggml.add_bos_token bool = false
	llama_model_loader: - kv 31: tokenizer.chat_template str = {% for message in messages %}{% if lo...
	llama_model_loader: - kv 32: general.quantization_version u32 = 2
	llama_model_loader: - type f32: 241 tensors
	llama_model_loader: - type q5_0: 265 tensors
	llama_model_loader: - type q8_0: 24 tensors
	llama_model_loader: - type q4_K: 24 tensors
	llama_model_loader: - type q6_K: 24 tensors
	llama_model_loader: - type bf16: 1 tensors
	llm_load_vocab: special tokens cache size = 22
	llm_load_vocab: token to piece cache size = 0.9310 MB
	llm_load_print_meta: format = GGUF V3 (latest)
	llm_load_print_meta: arch = qwen2
	llm_load_print_meta: vocab type = BPE
	llm_load_print_meta: n_vocab = 151936
	llm_load_print_meta: n_merges = 151387
	llm_load_print_meta: vocab_only = 0
	llm_load_print_meta: n_ctx_train = 32768
	llm_load_print_meta: n_embd = 896
	llm_load_print_meta: n_layer = 48
	llm_load_print_meta: n_head = 14
	llm_load_print_meta: n_head_kv = 2
	llm_load_print_meta: n_rot = 64
	llm_load_print_meta: n_swa = 0
	llm_load_print_meta: n_embd_head_k = 64
	llm_load_print_meta: n_embd_head_v = 64
	llm_load_print_meta: n_gqa = 7
	llm_load_print_meta: n_embd_k_gqa = 128
	llm_load_print_meta: n_embd_v_gqa = 128
	llm_load_print_meta: f_norm_eps = 0.0e+00
	llm_load_print_meta: f_norm_rms_eps = 1.0e-06
	llm_load_print_meta: f_clamp_kqv = 0.0e+00
	llm_load_print_meta: f_max_alibi_bias = 0.0e+00
	llm_load_print_meta: f_logit_scale = 0.0e+00
	llm_load_print_meta: n_ff = 4864
	llm_load_print_meta: n_expert = 0
	llm_load_print_meta: n_expert_used = 0
	llm_load_print_meta: causal attn = 1
	llm_load_print_meta: pooling type = 0
	llm_load_print_meta: rope type = 2
	llm_load_print_meta: rope scaling = linear
	llm_load_print_meta: freq_base_train = 1000000.0
	llm_load_print_meta: freq_scale_train = 1
	llm_load_print_meta: n_ctx_orig_yarn = 32768
	llm_load_print_meta: rope_finetuned = unknown
	llm_load_print_meta: ssm_d_conv = 0
	llm_load_print_meta: ssm_d_inner = 0
	llm_load_print_meta: ssm_d_state = 0
	llm_load_print_meta: ssm_dt_rank = 0
	llm_load_print_meta: ssm_dt_b_c_rms = 0
	llm_load_print_meta: model type = ?B
	llm_load_print_meta: model ftype = Q4_K - Medium
	llm_load_print_meta: model params = 988.06 M
	llm_load_print_meta: model size = 820.44 MiB (6.97 BPW)
	llm_load_print_meta: general.name = Sailor2 1B Chat
	llm_load_print_meta: BOS token = 151643 '<\|endoftext\|>'
	llm_load_print_meta: EOS token = 151645 '<\|im_end\|>'
	llm_load_print_meta: PAD token = 151643 '<\|endoftext\|>'
	llm_load_print_meta: LF token = 148848 'ÄĬ'
	llm_load_print_meta: EOT token = 151645 '<\|im_end\|>'
	llm_load_print_meta: EOG token = 151643 '<\|endoftext\|>'
	llm_load_print_meta: EOG token = 151645 '<\|im_end\|>'
	llm_load_print_meta: max token length = 256
	llm_load_tensors: ggml ctx size = 0.25 MiB
	llm_load_tensors: offloading 0 repeating layers to GPU
	llm_load_tensors: offloaded 0/49 layers to GPU
	llm_load_tensors: CPU buffer size = 820.44 MiB
	............................................................
	llama_new_context_with_model: n_ctx = 512
	llama_new_context_with_model: n_batch = 512
	llama_new_context_with_model: n_ubatch = 512
	llama_new_context_with_model: flash_attn = 0
	llama_new_context_with_model: freq_base = 1000000.0
	llama_new_context_with_model: freq_scale = 1
	llama_kv_cache_init: CPU KV buffer size = 12.00 MiB
	llama_new_context_with_model: KV self size = 12.00 MiB, K (f16): 6.00 MiB, V (f16): 6.00 MiB
	llama_new_context_with_model: CPU output buffer size = 0.58 MiB
	llama_new_context_with_model: CPU compute buffer size = 298.50 MiB
	llama_new_context_with_model: graph nodes = 1686
	llama_new_context_with_model: graph splits = 770
	AVX = 0 \| AVX_VNNI = 0 \| AVX2 = 0 \| AVX512 = 0 \| AVX512_VBMI = 0 \| AVX512_VNNI = 0 \| AVX512_BF16 = 0 \| FMA = 0 \| NEON = 1 \| SVE = 0 \| ARM_FMA = 1 \| F16C = 0 \| FP16_VA = 1 \| RISCV_VECT = 0 \| WASM_SIMD = 0 \| BLAS = 1 \| SSE3 = 0 \| SSSE3 = 0 \| VSX = 0 \| MATMUL_INT8 = 1 \| LLAMAFILE = 1 \|
	Model metadata: {'tokenizer.chat_template': "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<\|im_start\|>system\nYou are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<\|im_end\|>\n' }}{% endif %}{{'<\|im_start\|>' + message['role'] + '\n' + message['content'] + '<\|im_end\|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<\|im_start\|>assistant\n' }}{% endif %}", 'tokenizer.ggml.add_bos_token': 'false', 'tokenizer.ggml.padding_token_id': '151643', 'tokenizer.ggml.eos_token_id': '151645', 'qwen2.attention.layer_norm_rms_epsilon': '0.000001', 'general.basename': 'Sailor2', 'qwen2.attention.head_count_kv': '2', 'general.size_label': '1B', 'general.base_model.0.name': 'Sailor2 1B', 'qwen2.embedding_length': '896', 'qwen2.context_length': '32768', 'qwen2.block_count': '48', 'general.base_model.0.organization': 'Sail', 'tokenizer.ggml.pre': 'qwen2', 'general.base_model.count': '1', 'qwen2.rope.freq_base': '1000000.000000', 'general.quantization_version': '2', 'general.license': 'apache-2.0', 'general.base_model.0.repo_url': 'https://huggingface.co/sail/Sailor2-1B', 'general.file_type': '15', 'general.finetune': 'Chat', 'general.name': 'Sailor2 1B Chat', 'qwen2.feed_forward_length': '4864', 'general.architecture': 'qwen2', 'qwen2.attention.head_count': '14', 'tokenizer.ggml.bos_token_id': '151643', 'general.type': 'model', 'tokenizer.ggml.model': 'gpt2'}
	Available chat formats from metadata: chat_template.default
	Using gguf chat template: {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<\|im_start\|>system
	You are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<\|im_end\|>
	' }}{% endif %}{{'<\|im_start\|>' + message['role'] + '
	' + message['content'] + '<\|im_end\|>' + '
	'}}{% endfor %}{% if add_generation_prompt %}{{ '<\|im_start\|>assistant
	' }}{% endif %}
	Using chat eos_token: <\|im_end\|>
	Using chat bos_token: <\|endoftext\|>
	2025-01-06 10:21:31,952 - INFO - Combined dataset with balanced chunks written to /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/calibration-dataset.txt
	2025-01-06 10:21:31,962 - WARNING - raw_transactions_en.json contains only 0 samples, fewer than requested 500.
	2025-01-06 10:21:31,962 - WARNING - raw_transactions_zh.json contains only 0 samples, fewer than requested 500.
	2025-01-06 10:21:31,967 - INFO - raw_transactions_id.json contains 500 samples, meeting the requested count.
	2025-01-06 10:21:31,971 - INFO - raw_transactions_th.json contains 500 samples, meeting the requested count.
	2025-01-06 10:21:31,979 - INFO - raw_transactions_vi.json contains 500 samples, meeting the requested count.
	2025-01-06 10:21:31,979 - WARNING - raw_transactions_ms.json contains only 0 samples, fewer than requested 500.
	2025-01-06 10:21:31,987 - INFO - raw_transactions_lo.json contains 500 samples, meeting the requested count.
	2025-01-06 10:21:31,996 - INFO - raw_transactions_my.json contains 500 samples, meeting the requested count.
	2025-01-06 10:21:32,002 - INFO - raw_transactions_jv.json contains 500 samples, meeting the requested count.
	2025-01-06 10:21:32,006 - INFO - raw_transactions_km.json contains 500 samples, meeting the requested count.
	2025-01-06 10:21:32,011 - INFO - raw_transactions_su.json contains 500 samples, meeting the requested count.
	2025-01-06 10:21:32,016 - INFO - raw_transactions_tl.json contains 500 samples, meeting the requested count.