Last active
January 10, 2025 19:39
-
-
Save robbiemu/2796f81798e0fdcd891f9e1fd13b8097 to your computer and use it in GitHub Desktop.
Creating Sailor2 imatrix for llama.cpp quantization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git clone https://huggingface.co/sail/Sailor2-1B-Chat | |
Cloning into 'Sailor2-1B-Chat'... | |
remote: Enumerating objects: 39, done. | |
remote: Counting objects: 100% (36/36), done. | |
remote: Compressing objects: 100% (36/36), done. | |
remote: Total 39 (delta 14), reused 0 (delta 0), pack-reused 3 (from 1) | |
Unpacking objects: 100% (39/39), 2.02 MiB | 2.26 MiB/s, done. | |
Filtering content: 100% (2/2), 1.85 GiB | 14.92 MiB/s, done. | |
./convert_hf_to_gguf.py --outfile $HF/Sailor2-1B-Chat_bf16.gguf --outtype bf16 $HF/Sailor2-1B-Chat | |
INFO:hf-to-gguf:Loading model: Sailor2-1B-Chat | |
INFO:gguf.gguf_writer:gguf: This GGUF file is for Little Endian only | |
INFO:hf-to-gguf:Exporting model... | |
INFO:hf-to-gguf:gguf: loading model part 'model.safetensors' | |
INFO:hf-to-gguf:output.weight, torch.bfloat16 --> BF16, shape = {896, 151936} | |
INFO:hf-to-gguf:token_embd.weight, torch.bfloat16 --> BF16, shape = {896, 151936} | |
INFO:hf-to-gguf:blk.0.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.0.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.0.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.0.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.0.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.0.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.0.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.0.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.0.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.0.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.0.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.0.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.1.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.1.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.1.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.1.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.1.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.1.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.1.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.1.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.1.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.1.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.1.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.1.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.10.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.10.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.10.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.10.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.10.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.10.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.10.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.10.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.10.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.10.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.10.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.10.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.11.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.11.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.11.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.11.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.11.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.11.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.11.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.11.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.11.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.11.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.11.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.11.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.12.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.12.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.12.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.12.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.12.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.12.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.12.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.12.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.12.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.12.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.12.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.12.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.13.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.13.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.13.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.13.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.13.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.13.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.13.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.13.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.13.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.13.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.13.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.13.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.14.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.14.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.14.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.14.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.14.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.14.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.14.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.14.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.14.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.14.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.14.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.14.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.15.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.15.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.15.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.15.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.15.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.15.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.15.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.15.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.15.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.15.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.15.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.15.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.16.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.16.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.16.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.16.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.16.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.16.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.16.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.16.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.16.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.16.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.16.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.16.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.17.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.17.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.17.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.17.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.17.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.17.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.17.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.17.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.17.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.17.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.17.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.17.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.18.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.18.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.18.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.18.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.18.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.18.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.18.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.18.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.18.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.18.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.18.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.18.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.19.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.19.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.19.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.19.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.19.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.19.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.19.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.19.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.19.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.19.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.19.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.19.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.2.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.2.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.2.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.2.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.2.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.2.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.2.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.2.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.2.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.2.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.2.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.2.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.20.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.20.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.20.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.20.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.20.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.20.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.20.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.20.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.20.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.20.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.20.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.20.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.21.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.21.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.21.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.21.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.21.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.21.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.21.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.21.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.21.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.21.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.21.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.21.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.22.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.22.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.22.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.22.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.22.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.22.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.22.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.22.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.22.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.22.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.22.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.22.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.23.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.23.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.23.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.23.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.23.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.23.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.23.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.23.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.23.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.23.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.23.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.23.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.24.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.24.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.24.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.24.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.24.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.24.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.24.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.24.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.24.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.24.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.24.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.24.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.25.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.25.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.25.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.25.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.25.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.25.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.25.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.25.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.25.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.25.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.25.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.25.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.26.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.26.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.26.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.26.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.26.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.26.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.26.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.26.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.26.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.26.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.26.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.26.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.27.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.27.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.27.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.27.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.27.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.27.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.27.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.27.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.27.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.27.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.27.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.27.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.28.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.28.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.28.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.28.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.28.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.28.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.28.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.28.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.28.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.28.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.28.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.28.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.29.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.29.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.29.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.29.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.29.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.29.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.29.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.29.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.29.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.29.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.29.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.29.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.3.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.3.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.3.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.3.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.3.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.3.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.3.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.3.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.3.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.3.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.3.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.3.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.30.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.30.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.30.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.30.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.30.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.30.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.30.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.30.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.30.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.30.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.30.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.30.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.31.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.31.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.31.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.31.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.31.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.31.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.31.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.31.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.31.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.31.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.31.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.31.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.32.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.32.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.32.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.32.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.32.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.32.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.32.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.32.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.32.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.32.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.32.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.32.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.33.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.33.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.33.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.33.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.33.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.33.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.33.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.33.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.33.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.33.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.33.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.33.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.34.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.34.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.34.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.34.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.34.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.34.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.34.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.34.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.34.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.34.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.34.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.34.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.35.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.35.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.35.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.35.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.35.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.35.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.35.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.35.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.35.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.35.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.35.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.35.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.36.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.36.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.36.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.36.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.36.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.36.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.36.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.36.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.36.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.36.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.36.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.36.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.37.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.37.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.37.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.37.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.37.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.37.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.37.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.37.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.37.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.37.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.37.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.37.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.38.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.38.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.38.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.38.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.38.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.38.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.38.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.38.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.38.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.38.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.38.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.38.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.39.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.39.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.39.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.39.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.39.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.39.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.39.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.39.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.39.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.39.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.39.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.39.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.4.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.4.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.4.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.4.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.4.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.4.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.4.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.4.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.4.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.4.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.4.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.4.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.40.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.40.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.40.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.40.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.40.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.40.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.40.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.40.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.40.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.40.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.40.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.40.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.41.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.41.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.41.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.41.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.41.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.41.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.41.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.41.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.41.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.41.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.41.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.41.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.42.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.42.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.42.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.42.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.42.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.42.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.42.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.42.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.42.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.42.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.42.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.42.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.43.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.43.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.43.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.43.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.43.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.43.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.43.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.43.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.43.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.43.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.43.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.43.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.44.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.44.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.44.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.44.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.44.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.44.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.44.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.44.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.44.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.44.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.44.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.44.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.45.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.45.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.45.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.45.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.45.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.45.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.45.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.45.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.45.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.45.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.45.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.45.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.46.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.46.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.46.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.46.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.46.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.46.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.46.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.46.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.46.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.46.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.46.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.46.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.47.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.47.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.47.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.47.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.47.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.47.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.47.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.47.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.47.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.47.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.47.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.47.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.5.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.5.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.5.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.5.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.5.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.5.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.5.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.5.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.5.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.5.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.5.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.5.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.6.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.6.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.6.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.6.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.6.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.6.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.6.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.6.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.6.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.6.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.6.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.6.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.7.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.7.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.7.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.7.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.7.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.7.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.7.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.7.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.7.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.7.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.7.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.7.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.8.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.8.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.8.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.8.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.8.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.8.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.8.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.8.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.8.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.8.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.8.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.8.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.9.attn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.9.ffn_down.weight, torch.bfloat16 --> BF16, shape = {4864, 896} | |
INFO:hf-to-gguf:blk.9.ffn_gate.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.9.ffn_up.weight, torch.bfloat16 --> BF16, shape = {896, 4864} | |
INFO:hf-to-gguf:blk.9.ffn_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.9.attn_k.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.9.attn_k.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:blk.9.attn_output.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.9.attn_q.bias, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:blk.9.attn_q.weight, torch.bfloat16 --> BF16, shape = {896, 896} | |
INFO:hf-to-gguf:blk.9.attn_v.bias, torch.bfloat16 --> F32, shape = {128} | |
INFO:hf-to-gguf:blk.9.attn_v.weight, torch.bfloat16 --> BF16, shape = {896, 128} | |
INFO:hf-to-gguf:output_norm.weight, torch.bfloat16 --> F32, shape = {896} | |
INFO:hf-to-gguf:Set meta model | |
INFO:hf-to-gguf:Set model parameters | |
INFO:hf-to-gguf:gguf: context length = 32768 | |
INFO:hf-to-gguf:gguf: embedding length = 896 | |
INFO:hf-to-gguf:gguf: feed forward length = 4864 | |
INFO:hf-to-gguf:gguf: head count = 14 | |
INFO:hf-to-gguf:gguf: key-value head count = 2 | |
INFO:hf-to-gguf:gguf: rope theta = 1000000.0 | |
INFO:hf-to-gguf:gguf: rms norm epsilon = 1e-06 | |
INFO:hf-to-gguf:gguf: file type = 32 | |
INFO:hf-to-gguf:Set model tokenizer | |
INFO:gguf.vocab:Adding 151387 merge(s). | |
INFO:gguf.vocab:Setting special token type eos to 151645 | |
INFO:gguf.vocab:Setting special token type pad to 151643 | |
INFO:gguf.vocab:Setting special token type bos to 151643 | |
INFO:gguf.vocab:Setting add_bos_token to False | |
INFO:gguf.vocab:Setting chat_template to {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system | |
You are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<|im_end|> | |
' }}{% endif %}{{'<|im_start|>' + message['role'] + ' | |
' + message['content'] + '<|im_end|>' + ' | |
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant | |
' }}{% endif %} | |
INFO:hf-to-gguf:Set model quantization version | |
INFO:gguf.gguf_writer:Writing the following files: | |
INFO:gguf.gguf_writer:/Users/Shared/Public/huggingface/Sailor2-1B-Chat_bf16.gguf: n_tensors = 579, total_size = 2.0G | |
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks... | |
To disable this warning, you can either: | |
- Avoid using `tokenizers` before the fork if possible | |
- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false) | |
Writing: 100%|██████████████████████████████████████| 1.98G/1.98G [00:03<00:00, 515Mbyte/s] | |
INFO:hf-to-gguf:Model successfully exported to /Users/Shared/Public/huggingface/Sailor2-1B-Chat_bf16.gguf | |
llama-quantize Sailor2-1B-Chat_bf16.gguf ./Sailor2-1B-Chat_Q4_K_M.gguf Q4_K_M | |
main: build = 4419 (46e3556e) | |
main: built with Apple clang version 16.0.0 (clang-1600.0.26.4) for arm64-apple-darwin24.1.0 | |
main: quantizing 'Sailor2-1B-Chat_bf16.gguf' to './Sailor2-1B-Chat_Q4_K_M.gguf' as Q4_K_M | |
llama_model_loader: loaded meta data with 33 key-value pairs and 579 tensors from Sailor2-1B-Chat_bf16.gguf (version GGUF V3 (latest)) | |
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. | |
llama_model_loader: - kv 0: general.architecture str = qwen2 | |
llama_model_loader: - kv 1: general.type str = model | |
llama_model_loader: - kv 2: general.name str = Sailor2 1B Chat | |
llama_model_loader: - kv 3: general.finetune str = Chat | |
llama_model_loader: - kv 4: general.basename str = Sailor2 | |
llama_model_loader: - kv 5: general.size_label str = 1B | |
llama_model_loader: - kv 6: general.license str = apache-2.0 | |
llama_model_loader: - kv 7: general.base_model.count u32 = 1 | |
llama_model_loader: - kv 8: general.base_model.0.name str = Sailor2 1B | |
llama_model_loader: - kv 9: general.base_model.0.organization str = Sail | |
llama_model_loader: - kv 10: general.base_model.0.repo_url str = https://huggingface.co/sail/Sailor2-1B | |
llama_model_loader: - kv 11: general.tags arr[str,6] = ["multilingual", "sea", "sailor", "sf... | |
llama_model_loader: - kv 12: general.languages arr[str,12] = ["en", "zh", "id", "th", "vi", "ms", ... | |
llama_model_loader: - kv 13: qwen2.block_count u32 = 48 | |
llama_model_loader: - kv 14: qwen2.context_length u32 = 32768 | |
llama_model_loader: - kv 15: qwen2.embedding_length u32 = 896 | |
llama_model_loader: - kv 16: qwen2.feed_forward_length u32 = 4864 | |
llama_model_loader: - kv 17: qwen2.attention.head_count u32 = 14 | |
llama_model_loader: - kv 18: qwen2.attention.head_count_kv u32 = 2 | |
llama_model_loader: - kv 19: qwen2.rope.freq_base f32 = 1000000.000000 | |
llama_model_loader: - kv 20: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001 | |
llama_model_loader: - kv 21: general.file_type u32 = 32 | |
llama_model_loader: - kv 22: tokenizer.ggml.model str = gpt2 | |
llama_model_loader: - kv 23: tokenizer.ggml.pre str = qwen2 | |
llama_model_loader: - kv 24: tokenizer.ggml.tokens arr[str,151936] = ["!", "\"", "#", "$", "%", "&", "'", ... | |
llama_model_loader: - kv 25: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... | |
llama_model_loader: - kv 26: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",... | |
llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 151645 | |
llama_model_loader: - kv 28: tokenizer.ggml.padding_token_id u32 = 151643 | |
llama_model_loader: - kv 29: tokenizer.ggml.bos_token_id u32 = 151643 | |
llama_model_loader: - kv 30: tokenizer.ggml.add_bos_token bool = false | |
llama_model_loader: - kv 31: tokenizer.chat_template str = {% for message in messages %}{% if lo... | |
llama_model_loader: - kv 32: general.quantization_version u32 = 2 | |
llama_model_loader: - type f32: 241 tensors | |
llama_model_loader: - type bf16: 338 tensors | |
[ 1/ 579] output.weight - [ 896, 151936, 1, 1], type = bf16, converting to q8_0 .. size = 259.66 MiB -> 137.94 MiB | |
[ 2/ 579] output_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 3/ 579] token_embd.weight - [ 896, 151936, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 151936 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 259.66 MiB -> 89.26 MiB | |
[ 4/ 579] blk.0.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 5/ 579] blk.0.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 6/ 579] blk.0.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 7/ 579] blk.0.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 8/ 579] blk.0.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 9/ 579] blk.0.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 10/ 579] blk.0.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 11/ 579] blk.0.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 12/ 579] blk.0.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 13/ 579] blk.0.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 14/ 579] blk.0.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 15/ 579] blk.0.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 16/ 579] blk.1.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 17/ 579] blk.1.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 18/ 579] blk.1.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 19/ 579] blk.1.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 20/ 579] blk.1.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 21/ 579] blk.1.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 22/ 579] blk.1.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 23/ 579] blk.1.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 24/ 579] blk.1.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 25/ 579] blk.1.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 26/ 579] blk.1.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 27/ 579] blk.1.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 28/ 579] blk.2.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 29/ 579] blk.2.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 30/ 579] blk.2.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 31/ 579] blk.2.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 32/ 579] blk.2.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 33/ 579] blk.2.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 34/ 579] blk.2.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 35/ 579] blk.2.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 36/ 579] blk.2.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 37/ 579] blk.2.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 38/ 579] blk.2.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 39/ 579] blk.2.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 40/ 579] blk.3.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 41/ 579] blk.3.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 42/ 579] blk.3.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 43/ 579] blk.3.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 44/ 579] blk.3.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 45/ 579] blk.3.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 46/ 579] blk.3.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 47/ 579] blk.3.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 48/ 579] blk.3.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 49/ 579] blk.3.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 50/ 579] blk.3.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 51/ 579] blk.3.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 52/ 579] blk.4.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 53/ 579] blk.4.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 54/ 579] blk.4.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 55/ 579] blk.4.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 56/ 579] blk.4.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 57/ 579] blk.4.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 58/ 579] blk.4.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 59/ 579] blk.4.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 60/ 579] blk.4.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 61/ 579] blk.4.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 62/ 579] blk.4.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 63/ 579] blk.4.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 64/ 579] blk.5.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 65/ 579] blk.5.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 66/ 579] blk.5.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 67/ 579] blk.5.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 68/ 579] blk.5.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 69/ 579] blk.5.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 70/ 579] blk.5.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 71/ 579] blk.5.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 72/ 579] blk.5.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 73/ 579] blk.5.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 74/ 579] blk.5.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 75/ 579] blk.5.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 76/ 579] blk.6.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 77/ 579] blk.6.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 78/ 579] blk.6.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 79/ 579] blk.6.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 80/ 579] blk.6.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 81/ 579] blk.6.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 82/ 579] blk.6.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 83/ 579] blk.6.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 84/ 579] blk.6.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 85/ 579] blk.6.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 86/ 579] blk.6.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 87/ 579] blk.6.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 88/ 579] blk.7.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 89/ 579] blk.7.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 90/ 579] blk.7.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 91/ 579] blk.7.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 92/ 579] blk.7.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 93/ 579] blk.7.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 94/ 579] blk.7.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 95/ 579] blk.7.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 96/ 579] blk.7.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 97/ 579] blk.7.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 98/ 579] blk.7.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 99/ 579] blk.7.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 100/ 579] blk.8.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 101/ 579] blk.8.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 102/ 579] blk.8.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 103/ 579] blk.8.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 104/ 579] blk.8.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 105/ 579] blk.8.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 106/ 579] blk.8.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 107/ 579] blk.8.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 108/ 579] blk.8.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 109/ 579] blk.8.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 110/ 579] blk.8.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 111/ 579] blk.8.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 112/ 579] blk.9.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 113/ 579] blk.9.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 114/ 579] blk.9.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 115/ 579] blk.9.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 116/ 579] blk.9.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 117/ 579] blk.9.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 118/ 579] blk.9.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 119/ 579] blk.9.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 120/ 579] blk.9.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 121/ 579] blk.9.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 122/ 579] blk.9.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 123/ 579] blk.9.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 124/ 579] blk.10.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 125/ 579] blk.10.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 126/ 579] blk.10.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 127/ 579] blk.10.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 128/ 579] blk.10.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 129/ 579] blk.10.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 130/ 579] blk.10.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 131/ 579] blk.10.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 132/ 579] blk.10.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 133/ 579] blk.10.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 134/ 579] blk.10.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 135/ 579] blk.10.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 136/ 579] blk.11.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 137/ 579] blk.11.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 138/ 579] blk.11.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 139/ 579] blk.11.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 140/ 579] blk.11.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 141/ 579] blk.11.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 142/ 579] blk.11.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 143/ 579] blk.11.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 144/ 579] blk.11.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 145/ 579] blk.11.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 146/ 579] blk.11.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 147/ 579] blk.11.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 148/ 579] blk.12.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 149/ 579] blk.12.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 150/ 579] blk.12.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 151/ 579] blk.12.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 152/ 579] blk.12.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 153/ 579] blk.12.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 154/ 579] blk.12.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 155/ 579] blk.12.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 156/ 579] blk.12.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 157/ 579] blk.12.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 158/ 579] blk.12.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 159/ 579] blk.12.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 160/ 579] blk.13.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 161/ 579] blk.13.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 162/ 579] blk.13.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 163/ 579] blk.13.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 164/ 579] blk.13.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 165/ 579] blk.13.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 166/ 579] blk.13.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 167/ 579] blk.13.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 168/ 579] blk.13.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 169/ 579] blk.13.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 170/ 579] blk.13.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 171/ 579] blk.13.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 172/ 579] blk.14.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 173/ 579] blk.14.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 174/ 579] blk.14.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 175/ 579] blk.14.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 176/ 579] blk.14.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 177/ 579] blk.14.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 178/ 579] blk.14.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 179/ 579] blk.14.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 180/ 579] blk.14.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 181/ 579] blk.14.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 182/ 579] blk.14.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 183/ 579] blk.14.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 184/ 579] blk.15.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 185/ 579] blk.15.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 186/ 579] blk.15.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 187/ 579] blk.15.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 188/ 579] blk.15.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 189/ 579] blk.15.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 190/ 579] blk.15.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 191/ 579] blk.15.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 192/ 579] blk.15.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 193/ 579] blk.15.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 194/ 579] blk.15.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 195/ 579] blk.15.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 196/ 579] blk.16.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 197/ 579] blk.16.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 198/ 579] blk.16.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 199/ 579] blk.16.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 200/ 579] blk.16.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 201/ 579] blk.16.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 202/ 579] blk.16.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 203/ 579] blk.16.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 204/ 579] blk.16.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 205/ 579] blk.16.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 206/ 579] blk.16.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 207/ 579] blk.16.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 208/ 579] blk.17.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 209/ 579] blk.17.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 210/ 579] blk.17.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 211/ 579] blk.17.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 212/ 579] blk.17.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 213/ 579] blk.17.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 214/ 579] blk.17.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 215/ 579] blk.17.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 216/ 579] blk.17.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 217/ 579] blk.17.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 218/ 579] blk.17.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 219/ 579] blk.17.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 220/ 579] blk.18.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 221/ 579] blk.18.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 222/ 579] blk.18.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 223/ 579] blk.18.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 224/ 579] blk.18.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 225/ 579] blk.18.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 226/ 579] blk.18.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 227/ 579] blk.18.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 228/ 579] blk.18.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 229/ 579] blk.18.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 230/ 579] blk.18.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 231/ 579] blk.18.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 232/ 579] blk.19.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 233/ 579] blk.19.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 234/ 579] blk.19.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 235/ 579] blk.19.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 236/ 579] blk.19.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 237/ 579] blk.19.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 238/ 579] blk.19.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 239/ 579] blk.19.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 240/ 579] blk.19.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 241/ 579] blk.19.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 242/ 579] blk.19.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 243/ 579] blk.19.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 244/ 579] blk.20.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 245/ 579] blk.20.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 246/ 579] blk.20.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 247/ 579] blk.20.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 248/ 579] blk.20.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 249/ 579] blk.20.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 250/ 579] blk.20.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 251/ 579] blk.20.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 252/ 579] blk.20.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 253/ 579] blk.20.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 254/ 579] blk.20.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 255/ 579] blk.20.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 256/ 579] blk.21.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 257/ 579] blk.21.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 258/ 579] blk.21.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 259/ 579] blk.21.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 260/ 579] blk.21.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 261/ 579] blk.21.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 262/ 579] blk.21.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 263/ 579] blk.21.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 264/ 579] blk.21.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 265/ 579] blk.21.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 266/ 579] blk.21.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 267/ 579] blk.21.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 268/ 579] blk.22.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 269/ 579] blk.22.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 270/ 579] blk.22.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 271/ 579] blk.22.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 272/ 579] blk.22.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 273/ 579] blk.22.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 274/ 579] blk.22.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 275/ 579] blk.22.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 276/ 579] blk.22.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 277/ 579] blk.22.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 278/ 579] blk.22.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 279/ 579] blk.22.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 280/ 579] blk.23.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 281/ 579] blk.23.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 282/ 579] blk.23.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 283/ 579] blk.23.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 284/ 579] blk.23.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 285/ 579] blk.23.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 286/ 579] blk.23.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 287/ 579] blk.23.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 288/ 579] blk.23.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 289/ 579] blk.23.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 290/ 579] blk.23.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 291/ 579] blk.23.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 292/ 579] blk.24.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 293/ 579] blk.24.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 294/ 579] blk.24.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 295/ 579] blk.24.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 296/ 579] blk.24.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 297/ 579] blk.24.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 298/ 579] blk.24.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 299/ 579] blk.24.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 300/ 579] blk.24.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 301/ 579] blk.24.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 302/ 579] blk.24.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 303/ 579] blk.24.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 304/ 579] blk.25.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 305/ 579] blk.25.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 306/ 579] blk.25.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 307/ 579] blk.25.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 308/ 579] blk.25.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 309/ 579] blk.25.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 310/ 579] blk.25.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 311/ 579] blk.25.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 312/ 579] blk.25.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 313/ 579] blk.25.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 314/ 579] blk.25.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 315/ 579] blk.25.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 316/ 579] blk.26.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 317/ 579] blk.26.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 318/ 579] blk.26.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 319/ 579] blk.26.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 320/ 579] blk.26.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 321/ 579] blk.26.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 322/ 579] blk.26.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 323/ 579] blk.26.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 324/ 579] blk.26.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 325/ 579] blk.26.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 326/ 579] blk.26.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 327/ 579] blk.26.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 328/ 579] blk.27.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 329/ 579] blk.27.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 330/ 579] blk.27.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 331/ 579] blk.27.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 332/ 579] blk.27.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 333/ 579] blk.27.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 334/ 579] blk.27.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 335/ 579] blk.27.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 336/ 579] blk.27.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 337/ 579] blk.27.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 338/ 579] blk.27.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 339/ 579] blk.27.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 340/ 579] blk.28.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 341/ 579] blk.28.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 342/ 579] blk.28.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 343/ 579] blk.28.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 344/ 579] blk.28.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 345/ 579] blk.28.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 346/ 579] blk.28.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 347/ 579] blk.28.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 348/ 579] blk.28.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 349/ 579] blk.28.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 350/ 579] blk.28.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 351/ 579] blk.28.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 352/ 579] blk.29.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 353/ 579] blk.29.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 354/ 579] blk.29.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 355/ 579] blk.29.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 356/ 579] blk.29.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 357/ 579] blk.29.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 358/ 579] blk.29.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 359/ 579] blk.29.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 360/ 579] blk.29.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 361/ 579] blk.29.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 362/ 579] blk.29.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 363/ 579] blk.29.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 364/ 579] blk.30.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 365/ 579] blk.30.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 366/ 579] blk.30.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 367/ 579] blk.30.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 368/ 579] blk.30.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 369/ 579] blk.30.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 370/ 579] blk.30.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 371/ 579] blk.30.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 372/ 579] blk.30.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 373/ 579] blk.30.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 374/ 579] blk.30.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 375/ 579] blk.30.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 376/ 579] blk.31.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 377/ 579] blk.31.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 378/ 579] blk.31.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 379/ 579] blk.31.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 380/ 579] blk.31.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 381/ 579] blk.31.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 382/ 579] blk.31.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 383/ 579] blk.31.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 384/ 579] blk.31.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 385/ 579] blk.31.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 386/ 579] blk.31.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 387/ 579] blk.31.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 388/ 579] blk.32.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 389/ 579] blk.32.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 390/ 579] blk.32.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 391/ 579] blk.32.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 392/ 579] blk.32.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 393/ 579] blk.32.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 394/ 579] blk.32.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 395/ 579] blk.32.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 396/ 579] blk.32.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 397/ 579] blk.32.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 398/ 579] blk.32.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 399/ 579] blk.32.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 400/ 579] blk.33.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 401/ 579] blk.33.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 402/ 579] blk.33.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 403/ 579] blk.33.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 404/ 579] blk.33.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 405/ 579] blk.33.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 406/ 579] blk.33.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 407/ 579] blk.33.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 408/ 579] blk.33.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 409/ 579] blk.33.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 410/ 579] blk.33.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 411/ 579] blk.33.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 412/ 579] blk.34.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 413/ 579] blk.34.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 414/ 579] blk.34.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 415/ 579] blk.34.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 416/ 579] blk.34.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 417/ 579] blk.34.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 418/ 579] blk.34.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 419/ 579] blk.34.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 420/ 579] blk.34.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 421/ 579] blk.34.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 422/ 579] blk.34.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 423/ 579] blk.34.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 424/ 579] blk.35.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 425/ 579] blk.35.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 426/ 579] blk.35.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 427/ 579] blk.35.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 428/ 579] blk.35.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 429/ 579] blk.35.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 430/ 579] blk.35.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 431/ 579] blk.35.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 432/ 579] blk.35.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 433/ 579] blk.35.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 434/ 579] blk.35.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 435/ 579] blk.35.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 436/ 579] blk.36.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 437/ 579] blk.36.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 438/ 579] blk.36.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 439/ 579] blk.36.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 440/ 579] blk.36.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 441/ 579] blk.36.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 442/ 579] blk.36.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 443/ 579] blk.36.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 444/ 579] blk.36.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 445/ 579] blk.36.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 446/ 579] blk.36.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 447/ 579] blk.36.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 448/ 579] blk.37.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 449/ 579] blk.37.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 450/ 579] blk.37.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 451/ 579] blk.37.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 452/ 579] blk.37.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 453/ 579] blk.37.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 454/ 579] blk.37.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 455/ 579] blk.37.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 456/ 579] blk.37.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 457/ 579] blk.37.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 458/ 579] blk.37.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 459/ 579] blk.37.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 460/ 579] blk.38.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 461/ 579] blk.38.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 462/ 579] blk.38.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 463/ 579] blk.38.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 464/ 579] blk.38.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 465/ 579] blk.38.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 466/ 579] blk.38.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 467/ 579] blk.38.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 468/ 579] blk.38.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 469/ 579] blk.38.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 470/ 579] blk.38.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 471/ 579] blk.38.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 472/ 579] blk.39.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 473/ 579] blk.39.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 474/ 579] blk.39.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 475/ 579] blk.39.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 476/ 579] blk.39.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 477/ 579] blk.39.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 478/ 579] blk.39.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 479/ 579] blk.39.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 480/ 579] blk.39.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 481/ 579] blk.39.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 482/ 579] blk.39.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 483/ 579] blk.39.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 484/ 579] blk.40.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 485/ 579] blk.40.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 486/ 579] blk.40.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 487/ 579] blk.40.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 488/ 579] blk.40.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 489/ 579] blk.40.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 490/ 579] blk.40.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 491/ 579] blk.40.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 492/ 579] blk.40.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q4_K .. size = 8.31 MiB -> 2.34 MiB | |
[ 493/ 579] blk.40.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 494/ 579] blk.40.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 495/ 579] blk.40.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 496/ 579] blk.41.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 497/ 579] blk.41.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 498/ 579] blk.41.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 499/ 579] blk.41.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 500/ 579] blk.41.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 501/ 579] blk.41.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 502/ 579] blk.41.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 503/ 579] blk.41.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 504/ 579] blk.41.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 505/ 579] blk.41.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 506/ 579] blk.41.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 507/ 579] blk.41.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 508/ 579] blk.42.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 509/ 579] blk.42.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 510/ 579] blk.42.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 511/ 579] blk.42.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 512/ 579] blk.42.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 513/ 579] blk.42.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 514/ 579] blk.42.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 515/ 579] blk.42.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 516/ 579] blk.42.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 517/ 579] blk.42.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 518/ 579] blk.42.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 519/ 579] blk.42.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 520/ 579] blk.43.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 521/ 579] blk.43.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 522/ 579] blk.43.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 523/ 579] blk.43.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 524/ 579] blk.43.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 525/ 579] blk.43.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 526/ 579] blk.43.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 527/ 579] blk.43.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 528/ 579] blk.43.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 529/ 579] blk.43.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 530/ 579] blk.43.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 531/ 579] blk.43.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 532/ 579] blk.44.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 533/ 579] blk.44.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 534/ 579] blk.44.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 535/ 579] blk.44.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 536/ 579] blk.44.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 537/ 579] blk.44.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 538/ 579] blk.44.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 539/ 579] blk.44.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 540/ 579] blk.44.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 541/ 579] blk.44.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 542/ 579] blk.44.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 543/ 579] blk.44.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 544/ 579] blk.45.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 545/ 579] blk.45.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 546/ 579] blk.45.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 547/ 579] blk.45.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 548/ 579] blk.45.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 549/ 579] blk.45.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 550/ 579] blk.45.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 551/ 579] blk.45.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 552/ 579] blk.45.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 553/ 579] blk.45.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 554/ 579] blk.45.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 555/ 579] blk.45.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 556/ 579] blk.46.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 557/ 579] blk.46.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 558/ 579] blk.46.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 559/ 579] blk.46.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 560/ 579] blk.46.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 561/ 579] blk.46.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 562/ 579] blk.46.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 563/ 579] blk.46.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 564/ 579] blk.46.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 565/ 579] blk.46.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 566/ 579] blk.46.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 567/ 579] blk.46.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 568/ 579] blk.47.attn_k.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 569/ 579] blk.47.attn_k.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 0.22 MiB -> 0.08 MiB | |
[ 570/ 579] blk.47.attn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 571/ 579] blk.47.attn_output.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 572/ 579] blk.47.attn_q.bias - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 573/ 579] blk.47.attn_q.weight - [ 896, 896, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 896 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 1.53 MiB -> 0.53 MiB | |
[ 574/ 579] blk.47.attn_v.bias - [ 128, 1, 1, 1], type = f32, size = 0.000 MB | |
[ 575/ 579] blk.47.attn_v.weight - [ 896, 128, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 128 are not divisible by 256, required for q6_K - using fallback quantization q8_0 | |
converting to q8_0 .. size = 0.22 MiB -> 0.12 MiB | |
[ 576/ 579] blk.47.ffn_down.weight - [ 4864, 896, 1, 1], type = bf16, converting to q6_K .. size = 8.31 MiB -> 3.41 MiB | |
[ 577/ 579] blk.47.ffn_gate.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
[ 578/ 579] blk.47.ffn_norm.weight - [ 896, 1, 1, 1], type = f32, size = 0.003 MB | |
[ 579/ 579] blk.47.ffn_up.weight - [ 896, 4864, 1, 1], type = bf16, | |
llama_tensor_get_type : tensor cols 896 x 4864 are not divisible by 256, required for q4_K - using fallback quantization q5_0 | |
converting to q5_0 .. size = 8.31 MiB -> 2.86 MiB | |
llama_model_quantize_internal: model size = 1884.85 MB | |
llama_model_quantize_internal: quant size = 698.72 MB | |
llama_model_quantize_internal: WARNING: 289 of 337 tensor(s) required fallback quantization | |
main: quantize time = 1814.01 ms | |
main: total time = 1814.01 ms |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uv run src/imatrix_dataset.py \ | |
--output $HF/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/calibration-dataset.txt \ | |
--datasource-plugin $HF/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/sea_commoncrawl_high_quality_plugin.py --plugin-class SeaCommonCrawlHQDataSource \ | |
--model $HF/Sailor2-1B-Chat/Sailor2-1B-Chat_Q4_K_M.gguf \ | |
--config $HF/Sailor2-1B-Chat/README.md --url sailor2/sea-commoncrawl-high-quality --num-samples 500 --shuffle --chunk-size 32768 | |
2025-01-06 10:21:16,204 - INFO - Loaded languages: ['en', 'zh', 'id', 'th', 'vi', 'ms', 'lo', 'my', 'jv', 'km', 'su', 'tl'] | |
2025-01-06 10:21:16,444 - INFO - PyTorch version 2.5.1 available. | |
2025-01-06 10:21:16,519 - INFO - Successfully loaded plugin class SeaCommonCrawlHQDataSource from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/sea_commoncrawl_high_quality_plugin.py. | |
2025-01-06 10:21:16,519 - INFO - Downloading 500 samples for en, skipping the first 0 entries. | |
2025-01-06 10:21:16,519 - ERROR - Unsupported language 'en'. Available languages: ['my', 'ceb', 'ilo', 'id', 'jv', 'km', 'lo', 'min', 'su', 'tl', 'th', 'vi', 'war'] | |
2025-01-06 10:21:16,520 - INFO - Appended 0 entries to raw_transactions_en.json. | |
2025-01-06 10:21:16,520 - INFO - Downloading 500 samples for zh, skipping the first 0 entries. | |
2025-01-06 10:21:16,520 - ERROR - Unsupported language 'zh'. Available languages: ['my', 'ceb', 'ilo', 'id', 'jv', 'km', 'lo', 'min', 'su', 'tl', 'th', 'vi', 'war'] | |
2025-01-06 10:21:16,520 - INFO - Appended 0 entries to raw_transactions_zh.json. | |
2025-01-06 10:21:16,525 - INFO - raw_transactions_id.json already contains enough samples (500), skipping API call. | |
2025-01-06 10:21:16,534 - INFO - raw_transactions_th.json already contains enough samples (500), skipping API call. | |
2025-01-06 10:21:16,546 - INFO - raw_transactions_vi.json already contains enough samples (500), skipping API call. | |
2025-01-06 10:21:16,553 - INFO - Downloading 500 samples for ms, skipping the first 0 entries. | |
2025-01-06 10:21:16,553 - ERROR - Unsupported language 'ms'. Available languages: ['my', 'ceb', 'ilo', 'id', 'jv', 'km', 'lo', 'min', 'su', 'tl', 'th', 'vi', 'war'] | |
2025-01-06 10:21:16,553 - INFO - Appended 0 entries to raw_transactions_ms.json. | |
2025-01-06 10:21:16,561 - INFO - raw_transactions_lo.json already contains enough samples (500), skipping API call. | |
2025-01-06 10:21:16,577 - INFO - raw_transactions_my.json already contains enough samples (500), skipping API call. | |
2025-01-06 10:21:16,592 - INFO - raw_transactions_jv.json already contains enough samples (500), skipping API call. | |
2025-01-06 10:21:16,614 - INFO - raw_transactions_km.json already contains enough samples (500), skipping API call. | |
2025-01-06 10:21:16,624 - INFO - raw_transactions_su.json already contains enough samples (500), skipping API call. | |
2025-01-06 10:21:16,632 - INFO - raw_transactions_tl.json already contains enough samples (500), skipping API call. | |
llama_model_loader: loaded meta data with 33 key-value pairs and 579 tensors from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/Sailor2-1B-Chat_Q4_K_M.gguf (version GGUF V3 (latest)) | |
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. | |
llama_model_loader: - kv 0: general.architecture str = qwen2 | |
llama_model_loader: - kv 1: general.type str = model | |
llama_model_loader: - kv 2: general.name str = Sailor2 1B Chat | |
llama_model_loader: - kv 3: general.finetune str = Chat | |
llama_model_loader: - kv 4: general.basename str = Sailor2 | |
llama_model_loader: - kv 5: general.size_label str = 1B | |
llama_model_loader: - kv 6: general.license str = apache-2.0 | |
llama_model_loader: - kv 7: general.base_model.count u32 = 1 | |
llama_model_loader: - kv 8: general.base_model.0.name str = Sailor2 1B | |
llama_model_loader: - kv 9: general.base_model.0.organization str = Sail | |
llama_model_loader: - kv 10: general.base_model.0.repo_url str = https://huggingface.co/sail/Sailor2-1B | |
llama_model_loader: - kv 11: general.tags arr[str,6] = ["multilingual", "sea", "sailor", "sf... | |
llama_model_loader: - kv 12: general.languages arr[str,12] = ["en", "zh", "id", "th", "vi", "ms", ... | |
llama_model_loader: - kv 13: qwen2.block_count u32 = 48 | |
llama_model_loader: - kv 14: qwen2.context_length u32 = 32768 | |
llama_model_loader: - kv 15: qwen2.embedding_length u32 = 896 | |
llama_model_loader: - kv 16: qwen2.feed_forward_length u32 = 4864 | |
llama_model_loader: - kv 17: qwen2.attention.head_count u32 = 14 | |
llama_model_loader: - kv 18: qwen2.attention.head_count_kv u32 = 2 | |
llama_model_loader: - kv 19: qwen2.rope.freq_base f32 = 1000000.000000 | |
llama_model_loader: - kv 20: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001 | |
llama_model_loader: - kv 21: general.file_type u32 = 15 | |
llama_model_loader: - kv 22: tokenizer.ggml.model str = gpt2 | |
llama_model_loader: - kv 23: tokenizer.ggml.pre str = qwen2 | |
llama_model_loader: - kv 24: tokenizer.ggml.tokens arr[str,151936] = ["!", "\"", "#", "$", "%", "&", "'", ... | |
llama_model_loader: - kv 25: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... | |
llama_model_loader: - kv 26: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",... | |
llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 151645 | |
llama_model_loader: - kv 28: tokenizer.ggml.padding_token_id u32 = 151643 | |
llama_model_loader: - kv 29: tokenizer.ggml.bos_token_id u32 = 151643 | |
llama_model_loader: - kv 30: tokenizer.ggml.add_bos_token bool = false | |
llama_model_loader: - kv 31: tokenizer.chat_template str = {% for message in messages %}{% if lo... | |
llama_model_loader: - kv 32: general.quantization_version u32 = 2 | |
llama_model_loader: - type f32: 241 tensors | |
llama_model_loader: - type q5_0: 265 tensors | |
llama_model_loader: - type q8_0: 24 tensors | |
llama_model_loader: - type q4_K: 24 tensors | |
llama_model_loader: - type q6_K: 24 tensors | |
llama_model_loader: - type bf16: 1 tensors | |
llm_load_vocab: special tokens cache size = 22 | |
llm_load_vocab: token to piece cache size = 0.9310 MB | |
llm_load_print_meta: format = GGUF V3 (latest) | |
llm_load_print_meta: arch = qwen2 | |
llm_load_print_meta: vocab type = BPE | |
llm_load_print_meta: n_vocab = 151936 | |
llm_load_print_meta: n_merges = 151387 | |
llm_load_print_meta: vocab_only = 0 | |
llm_load_print_meta: n_ctx_train = 32768 | |
llm_load_print_meta: n_embd = 896 | |
llm_load_print_meta: n_layer = 48 | |
llm_load_print_meta: n_head = 14 | |
llm_load_print_meta: n_head_kv = 2 | |
llm_load_print_meta: n_rot = 64 | |
llm_load_print_meta: n_swa = 0 | |
llm_load_print_meta: n_embd_head_k = 64 | |
llm_load_print_meta: n_embd_head_v = 64 | |
llm_load_print_meta: n_gqa = 7 | |
llm_load_print_meta: n_embd_k_gqa = 128 | |
llm_load_print_meta: n_embd_v_gqa = 128 | |
llm_load_print_meta: f_norm_eps = 0.0e+00 | |
llm_load_print_meta: f_norm_rms_eps = 1.0e-06 | |
llm_load_print_meta: f_clamp_kqv = 0.0e+00 | |
llm_load_print_meta: f_max_alibi_bias = 0.0e+00 | |
llm_load_print_meta: f_logit_scale = 0.0e+00 | |
llm_load_print_meta: n_ff = 4864 | |
llm_load_print_meta: n_expert = 0 | |
llm_load_print_meta: n_expert_used = 0 | |
llm_load_print_meta: causal attn = 1 | |
llm_load_print_meta: pooling type = 0 | |
llm_load_print_meta: rope type = 2 | |
llm_load_print_meta: rope scaling = linear | |
llm_load_print_meta: freq_base_train = 1000000.0 | |
llm_load_print_meta: freq_scale_train = 1 | |
llm_load_print_meta: n_ctx_orig_yarn = 32768 | |
llm_load_print_meta: rope_finetuned = unknown | |
llm_load_print_meta: ssm_d_conv = 0 | |
llm_load_print_meta: ssm_d_inner = 0 | |
llm_load_print_meta: ssm_d_state = 0 | |
llm_load_print_meta: ssm_dt_rank = 0 | |
llm_load_print_meta: ssm_dt_b_c_rms = 0 | |
llm_load_print_meta: model type = ?B | |
llm_load_print_meta: model ftype = Q4_K - Medium | |
llm_load_print_meta: model params = 988.06 M | |
llm_load_print_meta: model size = 820.44 MiB (6.97 BPW) | |
llm_load_print_meta: general.name = Sailor2 1B Chat | |
llm_load_print_meta: BOS token = 151643 '<|endoftext|>' | |
llm_load_print_meta: EOS token = 151645 '<|im_end|>' | |
llm_load_print_meta: PAD token = 151643 '<|endoftext|>' | |
llm_load_print_meta: LF token = 148848 'ÄĬ' | |
llm_load_print_meta: EOT token = 151645 '<|im_end|>' | |
llm_load_print_meta: EOG token = 151643 '<|endoftext|>' | |
llm_load_print_meta: EOG token = 151645 '<|im_end|>' | |
llm_load_print_meta: max token length = 256 | |
llm_load_tensors: ggml ctx size = 0.25 MiB | |
llm_load_tensors: offloading 0 repeating layers to GPU | |
llm_load_tensors: offloaded 0/49 layers to GPU | |
llm_load_tensors: CPU buffer size = 820.44 MiB | |
............................................................ | |
llama_new_context_with_model: n_ctx = 512 | |
llama_new_context_with_model: n_batch = 512 | |
llama_new_context_with_model: n_ubatch = 512 | |
llama_new_context_with_model: flash_attn = 0 | |
llama_new_context_with_model: freq_base = 1000000.0 | |
llama_new_context_with_model: freq_scale = 1 | |
llama_kv_cache_init: CPU KV buffer size = 12.00 MiB | |
llama_new_context_with_model: KV self size = 12.00 MiB, K (f16): 6.00 MiB, V (f16): 6.00 MiB | |
llama_new_context_with_model: CPU output buffer size = 0.58 MiB | |
llama_new_context_with_model: CPU compute buffer size = 298.50 MiB | |
llama_new_context_with_model: graph nodes = 1686 | |
llama_new_context_with_model: graph splits = 770 | |
AVX = 0 | AVX_VNNI = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | AVX512_BF16 = 0 | FMA = 0 | NEON = 1 | SVE = 0 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | RISCV_VECT = 0 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | SSSE3 = 0 | VSX = 0 | MATMUL_INT8 = 1 | LLAMAFILE = 1 | | |
Model metadata: {'tokenizer.chat_template': "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", 'tokenizer.ggml.add_bos_token': 'false', 'tokenizer.ggml.padding_token_id': '151643', 'tokenizer.ggml.eos_token_id': '151645', 'qwen2.attention.layer_norm_rms_epsilon': '0.000001', 'general.basename': 'Sailor2', 'qwen2.attention.head_count_kv': '2', 'general.size_label': '1B', 'general.base_model.0.name': 'Sailor2 1B', 'qwen2.embedding_length': '896', 'qwen2.context_length': '32768', 'qwen2.block_count': '48', 'general.base_model.0.organization': 'Sail', 'tokenizer.ggml.pre': 'qwen2', 'general.base_model.count': '1', 'qwen2.rope.freq_base': '1000000.000000', 'general.quantization_version': '2', 'general.license': 'apache-2.0', 'general.base_model.0.repo_url': 'https://huggingface.co/sail/Sailor2-1B', 'general.file_type': '15', 'general.finetune': 'Chat', 'general.name': 'Sailor2 1B Chat', 'qwen2.feed_forward_length': '4864', 'general.architecture': 'qwen2', 'qwen2.attention.head_count': '14', 'tokenizer.ggml.bos_token_id': '151643', 'general.type': 'model', 'tokenizer.ggml.model': 'gpt2'} | |
Available chat formats from metadata: chat_template.default | |
Using gguf chat template: {% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system | |
You are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<|im_end|> | |
' }}{% endif %}{{'<|im_start|>' + message['role'] + ' | |
' + message['content'] + '<|im_end|>' + ' | |
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant | |
' }}{% endif %} | |
Using chat eos_token: <|im_end|> | |
Using chat bos_token: <|endoftext|> | |
2025-01-06 10:21:31,952 - INFO - Combined dataset with balanced chunks written to /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/sea-commoncrawl-high-quality/calibration-dataset.txt | |
2025-01-06 10:21:31,962 - WARNING - raw_transactions_en.json contains only 0 samples, fewer than requested 500. | |
2025-01-06 10:21:31,962 - WARNING - raw_transactions_zh.json contains only 0 samples, fewer than requested 500. | |
2025-01-06 10:21:31,967 - INFO - raw_transactions_id.json contains 500 samples, meeting the requested count. | |
2025-01-06 10:21:31,971 - INFO - raw_transactions_th.json contains 500 samples, meeting the requested count. | |
2025-01-06 10:21:31,979 - INFO - raw_transactions_vi.json contains 500 samples, meeting the requested count. | |
2025-01-06 10:21:31,979 - WARNING - raw_transactions_ms.json contains only 0 samples, fewer than requested 500. | |
2025-01-06 10:21:31,987 - INFO - raw_transactions_lo.json contains 500 samples, meeting the requested count. | |
2025-01-06 10:21:31,996 - INFO - raw_transactions_my.json contains 500 samples, meeting the requested count. | |
2025-01-06 10:21:32,002 - INFO - raw_transactions_jv.json contains 500 samples, meeting the requested count. | |
2025-01-06 10:21:32,006 - INFO - raw_transactions_km.json contains 500 samples, meeting the requested count. | |
2025-01-06 10:21:32,011 - INFO - raw_transactions_su.json contains 500 samples, meeting the requested count. | |
2025-01-06 10:21:32,016 - INFO - raw_transactions_tl.json contains 500 samples, meeting the requested count. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
uv run ../src/kl_d_bench.py \ | |
--baseline-model $HF/Sailor2-1B-Chat/Sailor2-1B-Chat_bf16.gguf \ | |
--target-model $HF/Sailor2-1B-Chat/Sailor2-1B-Chat_Q4_K_M.gguf \ | |
--dataset $HF/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt \ | |
--batch-size 4096 --ubatch-size 4096 --context-size 4096 \ | |
--n-gpu-layers 99 --seed 42 --top-p 1 --top-k 1 --temp 0 \ | |
--early-stopping --compute-overall \ | |
--kld-precision 64 --model-precision 32 --parts 4 --verbosity DEBUG | tee baseline.log | |
beta=63), stopping probability=0.000883 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7742) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.240241, p-value=0.000015 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2688938595122996, EMA_p_value_std_dev: 0.016285304607153703 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=771, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=771, beta=63), stopping probability=0.000909 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8165) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.302724, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.26851627416008733, EMA_p_value_std_dev: 0.016133462446776332 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=772, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=772, beta=63), stopping probability=0.000936 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0698) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.284739, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2680663488687824, EMA_p_value_std_dev: 0.015973730155451085 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=773, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=773, beta=63), stopping probability=0.000964 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8095) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.293976, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2666827292876756, EMA_p_value_std_dev: 0.01581446060608855 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=774, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=774, beta=63), stopping probability=0.000992 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2047) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.271693, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2648925872407296, EMA_p_value_std_dev: 0.0156567794572488 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=775, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=775, beta=63), stopping probability=0.001021 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7742) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.276494, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.26271040401176154, EMA_p_value_std_dev: 0.015500604329960628 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=776, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=776, beta=63), stopping probability=0.001051 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8092) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.241267, p-value=0.000014 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.26067552440929165, EMA_p_value_std_dev: 0.015346046001708951 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=777, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=777, beta=63), stopping probability=0.001082 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8765) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.231344, p-value=0.000042 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2586934636533882, EMA_p_value_std_dev: 0.01519315179176191 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=778, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=778, beta=63), stopping probability=0.001114 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0934) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.315993, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2572210098362066, EMA_p_value_std_dev: 0.015041782721332775 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=779, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=779, beta=63), stopping probability=0.001146 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8585) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.309270, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.255767728756761, EMA_p_value_std_dev: 0.014891924009030642 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=780, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=780, beta=63), stopping probability=0.001179 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1832) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.288047, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2541088720318699, EMA_p_value_std_dev: 0.014743560327191102 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=781, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=781, beta=63), stopping probability=0.001213 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0072) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.298400, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.25244715671843315, EMA_p_value_std_dev: 0.014596682070996304 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=782, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=782, beta=63), stopping probability=0.001248 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9237) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.260753, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.250503843583963, EMA_p_value_std_dev: 0.014451085723711444 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=783, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=783, beta=63), stopping probability=0.001283 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9022) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.283089, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24878428755671259, EMA_p_value_std_dev: 0.014306941634020093 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=784, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=784, beta=63), stopping probability=0.001320 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8552) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.237844, p-value=0.000020 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24733916910617576, EMA_p_value_std_dev: 0.014164318940434193 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=785, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=785, beta=63), stopping probability=0.001358 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9908) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.311142, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24643247075686348, EMA_p_value_std_dev: 0.014023118944732595 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=786, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=786, beta=63), stopping probability=0.001396 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2203) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.255076, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24573709720374193, EMA_p_value_std_dev: 0.013883325035745154 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=787, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=787, beta=63), stopping probability=0.001435 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7459) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.220365, p-value=0.000139 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2451113709091207, EMA_p_value_std_dev: 0.013745440227385608 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=788, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=788, beta=63), stopping probability=0.001476 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7105) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.226885, p-value=0.000069 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24427054037170579, EMA_p_value_std_dev: 0.013608917223079888 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=789, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=789, beta=63), stopping probability=0.001517 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9023) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.241110, p-value=0.000014 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24283904748523719, EMA_p_value_std_dev: 0.013473763705463285 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=790, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=790, beta=63), stopping probability=0.001560 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7047) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.179415, p-value=0.006465 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24151596773486267, EMA_p_value_std_dev: 0.013367956510830222 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=791, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=791, beta=63), stopping probability=0.001603 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8903) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.230674, p-value=0.000046 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.24069167649245243, EMA_p_value_std_dev: 0.013263155355783304 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=792, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=792, beta=63), stopping probability=0.001648 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8513) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.224114, p-value=0.000093 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2398741778403336, EMA_p_value_std_dev: 0.013159448469866318 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=793, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=793, beta=63), stopping probability=0.001693 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7172) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.207256, p-value=0.000528 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.23911171899593944, EMA_p_value_std_dev: 0.013056339978072564 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=794, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=794, beta=63), stopping probability=0.001740 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9032) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.263216, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2384097402062265, EMA_p_value_std_dev: 0.012954276250620457 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=795, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=795, beta=63), stopping probability=0.001788 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6911) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.166299, p-value=0.017993 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2378173721703721, EMA_p_value_std_dev: 0.012904608371642319 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=796, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=796, beta=63), stopping probability=0.001837 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9540) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.225186, p-value=0.000083 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.2383331629303817, EMA_p_value_std_dev: 0.012855392734947896 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=797, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=797, beta=63), stopping probability=0.001887 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8705) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Kuiper statistic=0.230093, p-value=0.000049 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated EMA_relative_change: 0.23877735473612513, EMA_p_value_std_dev: 0.012806719320656527 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Updated Beta parameters: alpha=798, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Chunk 26: Beta parameters updated (alpha=798, beta=63), stopping probability=0.001938 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Added chunk 25 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Added chunk 25 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Processing chunk 27 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Generating logits for model, chunk 27 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:57:56 - INFO - Processing chunks from 27 to 27 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 09:57:56 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 09:58:02 - DEBUG - Inference time: 5508.97 ms | |
[llama_gguf_optmize v0.6.0] 09:58:02 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 09:58:02 - DEBUG - Reusing freed chunk 1 for chunk 27. | |
[llama_gguf_optmize v0.6.0] 09:58:02 - DEBUG - Written chunk 27 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 09:58:02 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[27] 5966.55 ms | |
[llama_gguf_optmize v0.6.0] 09:58:02 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 09:58:02 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 09:58:02 - INFO - Generating logits for model, chunk 27 | |
[llama_gguf_optmize v0.6.0] 09:58:02 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 09:58:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:58:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:58:03 - INFO - Processing chunks from 27 to 27 | |
[llama_gguf_optmize v0.6.0] 09:58:03 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 09:58:03 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 09:58:03 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 09:58:05 - DEBUG - Inference time: 2433.17 ms | |
[llama_gguf_optmize v0.6.0] 09:58:05 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 09:58:05 - DEBUG - Reusing freed chunk 1 for chunk 27. | |
[llama_gguf_optmize v0.6.0] 09:58:05 - DEBUG - Written chunk 27 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[27] 2864.40 ms | |
[llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Comparing logits for chunk 27 | |
[llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 09:58:06 - INFO - Processing chunks 27 to 27... | |
[llama_gguf_optmize v0.6.0] 09:58:07 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 09:58:12 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 09:58:16 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 09:58:21 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - | |
===== KL-divergence statistics for Chunk 27 ===== | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Average : 0.017473 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - StdDev : 0.031769 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Median : 0.009938 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Maximum : 1.188030 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_99 : 0.123103 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_95 : 0.059611 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_90 : 0.039959 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_10 : 0.000158 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_05 : 0.000040 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - KLD_01 : 0.000006 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Samples seen: 110464 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8017) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.272066, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.23888920872313707, EMA_p_value_std_dev: 0.01275909250729215 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=799, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=799, beta=63), stopping probability=0.001991 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9508) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.268840, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2377259615997975, EMA_p_value_std_dev: 0.0127119413071079 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=800, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=800, beta=63), stopping probability=0.002045 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6981) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.252049, p-value=0.000004 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.23591169572546333, EMA_p_value_std_dev: 0.01258551252350339 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=801, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=801, beta=63), stopping probability=0.002100 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7505) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.223127, p-value=0.000104 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.23446352186955832, EMA_p_value_std_dev: 0.01246042462392792 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=802, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=802, beta=63), stopping probability=0.002156 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6757) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.182211, p-value=0.005130 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2330309860214457, EMA_p_value_std_dev: 0.012358898145795579 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=803, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=803, beta=63), stopping probability=0.002214 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0154) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.217157, p-value=0.000195 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.23213882015305, EMA_p_value_std_dev: 0.012258177610971501 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=804, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=804, beta=63), stopping probability=0.002273 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8828) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.222165, p-value=0.000115 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2312065479553198, EMA_p_value_std_dev: 0.012158329936266579 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=805, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=805, beta=63), stopping probability=0.002334 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8292) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.213186, p-value=0.000292 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.23000529998341054, EMA_p_value_std_dev: 0.012059159058411576 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=806, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=806, beta=63), stopping probability=0.002396 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9065) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.251223, p-value=0.000004 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.22879749687625603, EMA_p_value_std_dev: 0.011961100797814749 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=807, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=807, beta=63), stopping probability=0.002459 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8110) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.246411, p-value=0.000007 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.22699861546616754, EMA_p_value_std_dev: 0.01184302407408586 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=808, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=808, beta=63), stopping probability=0.002524 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7232) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.215461, p-value=0.000232 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2256015762923993, EMA_p_value_std_dev: 0.01172618852665655 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=809, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=809, beta=63), stopping probability=0.002590 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7150) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.213783, p-value=0.000275 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2241247424997084, EMA_p_value_std_dev: 0.011610660680713055 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=810, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=810, beta=63), stopping probability=0.002658 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7476) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.202093, p-value=0.000869 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.22233142482444823, EMA_p_value_std_dev: 0.011498368123227137 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=811, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=811, beta=63), stopping probability=0.002727 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7112) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.198473, p-value=0.001221 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.22055133791234857, EMA_p_value_std_dev: 0.0113887046849472 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=812, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=812, beta=63), stopping probability=0.002798 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7828) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.193870, p-value=0.001861 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2185829190246973, EMA_p_value_std_dev: 0.011281912535895328 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=813, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=813, beta=63), stopping probability=0.002870 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6570) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.240256, p-value=0.000015 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2170139478479097, EMA_p_value_std_dev: 0.011176752113465757 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=814, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=814, beta=63), stopping probability=0.002944 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6815) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.249868, p-value=0.000005 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2154057105397977, EMA_p_value_std_dev: 0.011073237280591591 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=815, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=815, beta=63), stopping probability=0.003020 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7612) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.221196, p-value=0.000127 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2141628531260812, EMA_p_value_std_dev: 0.0109712527302956 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=816, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=816, beta=63), stopping probability=0.003097 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9144) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.303059, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21386197728605422, EMA_p_value_std_dev: 0.010869969613415723 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=817, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=817, beta=63), stopping probability=0.003176 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6848) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.216198, p-value=0.000215 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21374136953118952, EMA_p_value_std_dev: 0.010762493050285725 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=818, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=818, beta=63), stopping probability=0.003257 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9191) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.205106, p-value=0.000651 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21363601376680427, EMA_p_value_std_dev: 0.010657807388672646 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=819, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=819, beta=63), stopping probability=0.003340 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8246) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.209633, p-value=0.000417 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21325782906615226, EMA_p_value_std_dev: 0.010554049380292438 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=820, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=820, beta=63), stopping probability=0.003424 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7985) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.205940, p-value=0.000600 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21178367703251127, EMA_p_value_std_dev: 0.01045148023900518 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=821, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=821, beta=63), stopping probability=0.003511 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7514) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.146186, p-value=0.070638 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2103286351849488, EMA_p_value_std_dev: 0.010660246241518052 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=822, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=822, beta=63), stopping probability=0.003599 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8360) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.260919, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21039756231697235, EMA_p_value_std_dev: 0.010867173745141422 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=823, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=823, beta=63), stopping probability=0.003689 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8516) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.229992, p-value=0.000049 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21082692068513115, EMA_p_value_std_dev: 0.011072707527303162 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=824, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=824, beta=63), stopping probability=0.003781 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8691) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.218698, p-value=0.000166 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21129115878588434, EMA_p_value_std_dev: 0.01127647064766372 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=825, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=825, beta=63), stopping probability=0.003875 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8716) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.280307, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2117191411337825, EMA_p_value_std_dev: 0.01147886222050015 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=826, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=826, beta=63), stopping probability=0.003971 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7481) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.206746, p-value=0.000555 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21144490837360805, EMA_p_value_std_dev: 0.011366693220233369 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=827, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=827, beta=63), stopping probability=0.004069 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9878) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.198536, p-value=0.001214 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.210901808443724, EMA_p_value_std_dev: 0.011258356569532802 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=828, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=828, beta=63), stopping probability=0.004169 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7435) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.226841, p-value=0.000070 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.21071404001394126, EMA_p_value_std_dev: 0.011151066430182337 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=829, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=829, beta=63), stopping probability=0.004272 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8910) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Kuiper statistic=0.262072, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated EMA_relative_change: 0.2101031538088608, EMA_p_value_std_dev: 0.011045086710279017 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Updated Beta parameters: alpha=830, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Chunk 27: Beta parameters updated (alpha=830, beta=63), stopping probability=0.004376 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Added chunk 26 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Added chunk 26 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Processing chunk 28 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Generating logits for model, chunk 28 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:58:26 - INFO - Processing chunks from 28 to 28 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 09:58:26 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 09:58:32 - DEBUG - Inference time: 5506.24 ms | |
[llama_gguf_optmize v0.6.0] 09:58:32 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 09:58:32 - DEBUG - Reusing freed chunk 0 for chunk 28. | |
[llama_gguf_optmize v0.6.0] 09:58:32 - DEBUG - Written chunk 28 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 09:58:32 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[28] 5961.03 ms | |
[llama_gguf_optmize v0.6.0] 09:58:32 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 09:58:32 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 09:58:32 - INFO - Generating logits for model, chunk 28 | |
[llama_gguf_optmize v0.6.0] 09:58:32 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 09:58:32 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:58:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:58:33 - INFO - Processing chunks from 28 to 28 | |
[llama_gguf_optmize v0.6.0] 09:58:33 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 09:58:33 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 09:58:33 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 09:58:35 - DEBUG - Inference time: 2387.65 ms | |
[llama_gguf_optmize v0.6.0] 09:58:35 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 09:58:35 - DEBUG - Reusing freed chunk 0 for chunk 28. | |
[llama_gguf_optmize v0.6.0] 09:58:35 - DEBUG - Written chunk 28 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 09:58:35 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[28] 2827.37 ms | |
[llama_gguf_optmize v0.6.0] 09:58:35 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 09:58:35 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 09:58:36 - INFO - Comparing logits for chunk 28 | |
[llama_gguf_optmize v0.6.0] 09:58:36 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 09:58:36 - INFO - Processing chunks 28 to 28... | |
[llama_gguf_optmize v0.6.0] 09:58:36 - DEBUG - Processing chunk 0, part 0 | |
[llama_gguf_optmize v0.6.0] 09:58:41 - DEBUG - Processing chunk 0, part 1 | |
[llama_gguf_optmize v0.6.0] 09:58:46 - DEBUG - Processing chunk 0, part 2 | |
[llama_gguf_optmize v0.6.0] 09:58:51 - DEBUG - Processing chunk 0, part 3 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - | |
===== KL-divergence statistics for Chunk 28 ===== | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Average : 0.018790 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - StdDev : 0.031870 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Median : 0.010542 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Maximum : 1.052105 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_99 : 0.130811 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_95 : 0.064162 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_90 : 0.044110 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_10 : 0.000147 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_05 : 0.000041 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - KLD_01 : 0.000005 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Samples seen: 114560 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6217) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.190697, p-value=0.002472 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20951070129353994, EMA_p_value_std_dev: 0.010945107437383847 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=831, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=831, beta=63), stopping probability=0.004483 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5423) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.169638, p-value=0.014000 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20901211074394935, EMA_p_value_std_dev: 0.010895055688396448 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=832, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=832, beta=63), stopping probability=0.004592 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8205) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.225417, p-value=0.000081 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20907457906126226, EMA_p_value_std_dev: 0.01084681810443832 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=833, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=833, beta=63), stopping probability=0.004703 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8936) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.257672, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20909066633951992, EMA_p_value_std_dev: 0.010799153042659758 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=834, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=834, beta=63), stopping probability=0.004816 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8596) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.302350, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20873611212024348, EMA_p_value_std_dev: 0.010751964876789349 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=835, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=835, beta=63), stopping probability=0.004932 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0615) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.207984, p-value=0.000491 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20905847756135215, EMA_p_value_std_dev: 0.010706560816431494 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=836, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=836, beta=63), stopping probability=0.005050 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9975) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.317972, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20962640714475958, EMA_p_value_std_dev: 0.01060189091144209 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=837, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=837, beta=63), stopping probability=0.005170 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9478) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.249355, p-value=0.000005 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.21038511192913664, EMA_p_value_std_dev: 0.010498321272010263 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=838, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=838, beta=63), stopping probability=0.005293 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7482) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.280567, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.2110762777034972, EMA_p_value_std_dev: 0.010395786720460145 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=839, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=839, beta=63), stopping probability=0.005419 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9465) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.254116, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.21102228327932743, EMA_p_value_std_dev: 0.010294271719573868 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=840, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=840, beta=63), stopping probability=0.005547 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9963) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.248135, p-value=0.000006 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.21003657408922335, EMA_p_value_std_dev: 0.010191614255310125 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=841, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=841, beta=63), stopping probability=0.005678 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8382) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.253306, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20856413372106217, EMA_p_value_std_dev: 0.010089975923599613 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=842, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=842, beta=63), stopping probability=0.005811 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7913) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.218225, p-value=0.000174 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20712294597503422, EMA_p_value_std_dev: 0.009990092008236688 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=843, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=843, beta=63), stopping probability=0.005947 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7314) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.225749, p-value=0.000078 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20547225195411664, EMA_p_value_std_dev: 0.009891189247641074 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=844, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=844, beta=63), stopping probability=0.006085 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9878) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.211992, p-value=0.000330 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20397804206542478, EMA_p_value_std_dev: 0.009793893057693915 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=845, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=845, beta=63), stopping probability=0.006227 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8821) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.215932, p-value=0.000221 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20249593100570495, EMA_p_value_std_dev: 0.009697460365988828 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=846, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=846, beta=63), stopping probability=0.006371 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7559) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.233789, p-value=0.000032 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.20095921213445803, EMA_p_value_std_dev: 0.00960190294376644 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=847, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=847, beta=63), stopping probability=0.006518 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6696) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.231183, p-value=0.000043 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19936908051653568, EMA_p_value_std_dev: 0.009507418345900697 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=848, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=848, beta=63), stopping probability=0.006667 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7902) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.150130, p-value=0.055099 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.1982094797079848, EMA_p_value_std_dev: 0.009657679550404722 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=849, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=849, beta=63), stopping probability=0.006820 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8530) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.238366, p-value=0.000019 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19859936433372627, EMA_p_value_std_dev: 0.009806786556793515 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=850, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=850, beta=63), stopping probability=0.006976 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8141) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.226231, p-value=0.000074 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19886634819964819, EMA_p_value_std_dev: 0.009954568337186538 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=851, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=851, beta=63), stopping probability=0.007134 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7906) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.246177, p-value=0.000008 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.1993195911392359, EMA_p_value_std_dev: 0.01010090342341853 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=852, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=852, beta=63), stopping probability=0.007296 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8883) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.216912, p-value=0.000200 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19890080321034134, EMA_p_value_std_dev: 0.010245605316422634 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=853, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=853, beta=63), stopping probability=0.007461 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7475) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.230223, p-value=0.000048 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19768773877895596, EMA_p_value_std_dev: 0.010144174593233182 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=854, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=854, beta=63), stopping probability=0.007629 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6878) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.193731, p-value=0.001885 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19676437941729086, EMA_p_value_std_dev: 0.010051058498849796 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=855, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=855, beta=63), stopping probability=0.007800 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9430) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.284455, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19649887582291267, EMA_p_value_std_dev: 0.009958961847875711 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=856, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=856, beta=63), stopping probability=0.007974 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7985) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.266932, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19602159591796153, EMA_p_value_std_dev: 0.009867792953801312 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=857, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=857, beta=63), stopping probability=0.008152 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7209) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.234544, p-value=0.000030 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19577299792613292, EMA_p_value_std_dev: 0.009777684781637324 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=858, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=858, beta=63), stopping probability=0.008333 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8464) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.269643, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19549371974477028, EMA_p_value_std_dev: 0.009688526957402787 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=859, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=859, beta=63), stopping probability=0.008517 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6068) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.249788, p-value=0.000005 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.194536324017785, EMA_p_value_std_dev: 0.009592010634012663 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=860, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=860, beta=63), stopping probability=0.008705 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8211) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.175310, p-value=0.009004 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.19407273980890022, EMA_p_value_std_dev: 0.009536458185588514 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=861, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=861, beta=63), stopping probability=0.008896 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7137) | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Kuiper statistic=0.180015, p-value=0.006154 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated EMA_relative_change: 0.1933769920912596, EMA_p_value_std_dev: 0.009483881298940889 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Updated Beta parameters: alpha=862, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Chunk 28: Beta parameters updated (alpha=862, beta=63), stopping probability=0.009091 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Added chunk 27 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Added chunk 27 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Processing chunk 29 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - INFO - Generating logits for model, chunk 29 | |
[llama_gguf_optmize v0.6.0] 09:58:55 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 09:58:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:58:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:58:56 - INFO - Processing chunks from 29 to 29 | |
[llama_gguf_optmize v0.6.0] 09:58:56 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 09:58:56 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 09:58:56 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 09:59:01 - DEBUG - Inference time: 5540.58 ms | |
[llama_gguf_optmize v0.6.0] 09:59:01 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 09:59:01 - DEBUG - Reusing freed chunk 1 for chunk 29. | |
[llama_gguf_optmize v0.6.0] 09:59:01 - DEBUG - Written chunk 29 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[29] 5976.15 ms | |
[llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Generating logits for model, chunk 29 | |
[llama_gguf_optmize v0.6.0] 09:59:02 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:59:02 - INFO - Processing chunks from 29 to 29 | |
[llama_gguf_optmize v0.6.0] 09:59:02 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 09:59:02 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 09:59:02 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 09:59:05 - DEBUG - Inference time: 2415.97 ms | |
[llama_gguf_optmize v0.6.0] 09:59:05 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 09:59:05 - DEBUG - Reusing freed chunk 1 for chunk 29. | |
[llama_gguf_optmize v0.6.0] 09:59:05 - DEBUG - Written chunk 29 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[29] 2866.26 ms | |
[llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Comparing logits for chunk 29 | |
[llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 09:59:05 - INFO - Processing chunks 29 to 29... | |
[llama_gguf_optmize v0.6.0] 09:59:06 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 09:59:11 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 09:59:16 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 09:59:21 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - | |
===== KL-divergence statistics for Chunk 29 ===== | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Average : 0.023875 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - StdDev : 0.056183 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Median : 0.015091 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Maximum : 2.612865 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_99 : 0.157537 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_95 : 0.073438 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_90 : 0.050380 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_10 : 0.000327 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_05 : 0.000100 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - KLD_01 : 0.000019 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Samples seen: 118656 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6817) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.198107, p-value=0.001263 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19250419732823, EMA_p_value_std_dev: 0.009429974574036395 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=863, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=863, beta=63), stopping probability=0.009290 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9477) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.191354, p-value=0.002332 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19149207027982196, EMA_p_value_std_dev: 0.009373102447043759 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=864, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=864, beta=63), stopping probability=0.009492 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8893) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.233536, p-value=0.000033 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19039336748115465, EMA_p_value_std_dev: 0.009316727154076557 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=865, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=865, beta=63), stopping probability=0.009697 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8413) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.188607, p-value=0.002970 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18990725785470047, EMA_p_value_std_dev: 0.009246758815976313 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=866, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=866, beta=63), stopping probability=0.009907 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7550) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.157864, p-value=0.032910 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18965997066580317, EMA_p_value_std_dev: 0.009294417125171518 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=867, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=867, beta=63), stopping probability=0.010120 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7233) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.150879, p-value=0.052502 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18942778557938633, EMA_p_value_std_dev: 0.009435999776592315 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=868, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=868, beta=63), stopping probability=0.010337 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0673) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.270116, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18990666483957971, EMA_p_value_std_dev: 0.009580280484148083 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=869, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=869, beta=63), stopping probability=0.010557 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8256) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.178165, p-value=0.007159 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1909349556356431, EMA_p_value_std_dev: 0.009711852292599184 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=870, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=870, beta=63), stopping probability=0.010782 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4929) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.178731, p-value=0.006836 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19145733548841087, EMA_p_value_std_dev: 0.009835847145944624 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=871, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=871, beta=63), stopping probability=0.011011 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9225) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.165091, p-value=0.019670 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19209455192114294, EMA_p_value_std_dev: 0.009946756899788231 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=872, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=872, beta=63), stopping probability=0.011244 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7165) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.170546, p-value=0.013062 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.19101514846852571, EMA_p_value_std_dev: 0.009921315002228545 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=873, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=873, beta=63), stopping probability=0.011481 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7260) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.187866, p-value=0.003168 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18964943434900638, EMA_p_value_std_dev: 0.009886925246226244 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=874, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=874, beta=63), stopping probability=0.011722 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8075) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.184364, p-value=0.004279 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18831657632244825, EMA_p_value_std_dev: 0.009857143633315777 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=875, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=875, beta=63), stopping probability=0.011967 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6313) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.174575, p-value=0.009545 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1869100917177987, EMA_p_value_std_dev: 0.009826191611951373 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=876, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=876, beta=63), stopping probability=0.012216 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8036) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.175161, p-value=0.009111 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18544472899929018, EMA_p_value_std_dev: 0.009768786678885111 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=877, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=877, beta=63), stopping probability=0.012470 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6812) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.213393, p-value=0.000286 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18413913642847052, EMA_p_value_std_dev: 0.009711013731376105 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=878, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=878, beta=63), stopping probability=0.012728 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8652) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.202419, p-value=0.000842 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18296929777592766, EMA_p_value_std_dev: 0.009658027802929606 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=879, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=879, beta=63), stopping probability=0.012990 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7395) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.150711, p-value=0.053075 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1824757230181702, EMA_p_value_std_dev: 0.009780807204849412 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=880, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=880, beta=63), stopping probability=0.013257 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7075) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.122184, p-value=0.257722 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18256295661506186, EMA_p_value_std_dev: 0.010783983547278421 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=881, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=881, beta=63), stopping probability=0.013529 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0504) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.208116, p-value=0.000485 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1833455463398918, EMA_p_value_std_dev: 0.011788507768123214 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=882, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=882, beta=63), stopping probability=0.013805 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8642) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.162970, p-value=0.022953 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18439528609658437, EMA_p_value_std_dev: 0.012755740724192246 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=883, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=883, beta=63), stopping probability=0.014085 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7127) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.207980, p-value=0.000492 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18551219764948002, EMA_p_value_std_dev: 0.013713858443126038 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=884, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=884, beta=63), stopping probability=0.014371 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7147) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.191884, p-value=0.002225 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18647880947638623, EMA_p_value_std_dev: 0.014701582380537523 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=885, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=885, beta=63), stopping probability=0.014661 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7985) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.204411, p-value=0.000696 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1861452700391996, EMA_p_value_std_dev: 0.014653248065166546 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=886, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=886, beta=63), stopping probability=0.014956 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6098) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.193218, p-value=0.001974 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18523425792913542, EMA_p_value_std_dev: 0.014603766556867593 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=887, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=887, beta=63), stopping probability=0.015256 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6096) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.147813, p-value=0.063833 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1842810976268743, EMA_p_value_std_dev: 0.014736947085957325 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=888, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=888, beta=63), stopping probability=0.015560 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7767) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.180126, p-value=0.006098 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18386611316415907, EMA_p_value_std_dev: 0.0148631865644551 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=889, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=889, beta=63), stopping probability=0.015870 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6113) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.197855, p-value=0.001293 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18357931828594098, EMA_p_value_std_dev: 0.01498927689676091 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=890, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=890, beta=63), stopping probability=0.016185 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9883) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.187283, p-value=0.003332 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1832937378104956, EMA_p_value_std_dev: 0.015110977106029613 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=891, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=891, beta=63), stopping probability=0.016504 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7478) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.169445, p-value=0.014208 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1825287936952436, EMA_p_value_std_dev: 0.015221823418513202 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=892, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=892, beta=63), stopping probability=0.016829 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7156) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.139692, p-value=0.104077 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.18171797062134473, EMA_p_value_std_dev: 0.015509202193431746 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=893, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=893, beta=63), stopping probability=0.017159 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8553) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Kuiper statistic=0.196803, p-value=0.001425 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated EMA_relative_change: 0.1814942148176164, EMA_p_value_std_dev: 0.01579938747071239 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Updated Beta parameters: alpha=894, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Chunk 29: Beta parameters updated (alpha=894, beta=63), stopping probability=0.017495 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Added chunk 28 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Added chunk 28 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Processing chunk 30 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Generating logits for model, chunk 30 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:59:25 - INFO - Processing chunks from 30 to 30 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 09:59:25 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 09:59:31 - DEBUG - Inference time: 5696.00 ms | |
[llama_gguf_optmize v0.6.0] 09:59:31 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 09:59:31 - DEBUG - Reusing freed chunk 0 for chunk 30. | |
[llama_gguf_optmize v0.6.0] 09:59:31 - DEBUG - Written chunk 30 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[30] 6152.06 ms | |
[llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Generating logits for model, chunk 30 | |
[llama_gguf_optmize v0.6.0] 09:59:32 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:59:32 - INFO - Processing chunks from 30 to 30 | |
[llama_gguf_optmize v0.6.0] 09:59:32 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 09:59:32 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 09:59:32 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 09:59:35 - DEBUG - Inference time: 2489.70 ms | |
[llama_gguf_optmize v0.6.0] 09:59:35 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 09:59:35 - DEBUG - Reusing freed chunk 0 for chunk 30. | |
[llama_gguf_optmize v0.6.0] 09:59:35 - DEBUG - Written chunk 30 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[30] 2948.58 ms | |
[llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Comparing logits for chunk 30 | |
[llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 09:59:35 - INFO - Processing chunks 30 to 30... | |
[llama_gguf_optmize v0.6.0] 09:59:36 - DEBUG - Processing chunk 0, part 0 | |
[llama_gguf_optmize v0.6.0] 09:59:41 - DEBUG - Processing chunk 0, part 1 | |
[llama_gguf_optmize v0.6.0] 09:59:46 - DEBUG - Processing chunk 0, part 2 | |
[llama_gguf_optmize v0.6.0] 09:59:51 - DEBUG - Processing chunk 0, part 3 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - | |
===== KL-divergence statistics for Chunk 30 ===== | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Average : 0.023869 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - StdDev : 0.050627 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Median : 0.013036 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Maximum : 1.599863 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_99 : 0.216543 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_95 : 0.081015 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_90 : 0.052226 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_10 : 0.000090 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_05 : 0.000027 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - KLD_01 : 0.000002 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Samples seen: 122752 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6836) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.146891, p-value=0.067620 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18191758039599043, EMA_p_value_std_dev: 0.01609751092266995 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=895, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=895, beta=63), stopping probability=0.017836 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0542) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.178032, p-value=0.007236 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.1825729772447379, EMA_p_value_std_dev: 0.016385522095396027 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=896, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=896, beta=63), stopping probability=0.018182 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7627) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.211714, p-value=0.000339 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18323875641332094, EMA_p_value_std_dev: 0.01669335300483294 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=897, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=897, beta=63), stopping probability=0.018533 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8204) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.202158, p-value=0.000863 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18328449294404353, EMA_p_value_std_dev: 0.016818806078764494 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=898, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=898, beta=63), stopping probability=0.018890 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6627) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.235876, p-value=0.000025 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18310875543887137, EMA_p_value_std_dev: 0.01694474682384702 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=899, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=899, beta=63), stopping probability=0.019253 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0013) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.224845, p-value=0.000086 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18237319873303012, EMA_p_value_std_dev: 0.01680671345466865 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=900, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=900, beta=63), stopping probability=0.019621 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0104) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.245632, p-value=0.000008 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18133880922474546, EMA_p_value_std_dev: 0.016642658557545428 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=901, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=901, beta=63), stopping probability=0.019995 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7857) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.156054, p-value=0.037251 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18101223181656273, EMA_p_value_std_dev: 0.016641765154245237 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=902, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=902, beta=63), stopping probability=0.020375 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8351) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.156545, p-value=0.036027 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18018531859532455, EMA_p_value_std_dev: 0.016675770961532072 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=903, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=903, beta=63), stopping probability=0.020761 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6924) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.180531, p-value=0.005898 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17976517812919693, EMA_p_value_std_dev: 0.016700238700806735 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=904, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=904, beta=63), stopping probability=0.021152 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7732) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.204033, p-value=0.000722 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17949049107576262, EMA_p_value_std_dev: 0.01672317322229408 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=905, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=905, beta=63), stopping probability=0.021549 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8893) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.201695, p-value=0.000902 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17842482482329572, EMA_p_value_std_dev: 0.016744037859040488 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=906, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=906, beta=63), stopping probability=0.021953 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7264) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.156620, p-value=0.035845 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17799823107031182, EMA_p_value_std_dev: 0.016760828618271557 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=907, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=907, beta=63), stopping probability=0.022362 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7479) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.161100, p-value=0.026240 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17710937488216924, EMA_p_value_std_dev: 0.016754519298730387 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=908, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=908, beta=63), stopping probability=0.022777 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7171) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.184654, p-value=0.004175 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.1762893677866171, EMA_p_value_std_dev: 0.016750576978363848 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=909, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=909, beta=63), stopping probability=0.023199 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5743) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.161655, p-value=0.025223 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17585805803058802, EMA_p_value_std_dev: 0.016734836458805854 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=910, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=910, beta=63), stopping probability=0.023627 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7675) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.215542, p-value=0.000230 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17556769456262067, EMA_p_value_std_dev: 0.016721211695255273 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=911, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=911, beta=63), stopping probability=0.024061 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0359) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.253651, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17559518984386258, EMA_p_value_std_dev: 0.016688050534628768 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=912, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=912, beta=63), stopping probability=0.024502 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7348) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.254734, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17539173679611128, EMA_p_value_std_dev: 0.016630636361397035 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=913, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=913, beta=63), stopping probability=0.024949 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8462) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.153288, p-value=0.044845 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17605271581834617, EMA_p_value_std_dev: 0.01666787728521364 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=914, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=914, beta=63), stopping probability=0.025402 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1379) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.211264, p-value=0.000355 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17687509057339498, EMA_p_value_std_dev: 0.016701011960392448 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=915, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=915, beta=63), stopping probability=0.025862 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1191) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.240381, p-value=0.000015 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17754332413638754, EMA_p_value_std_dev: 0.016734055556191132 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=916, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=916, beta=63), stopping probability=0.026329 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7390) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.257402, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17828764857511964, EMA_p_value_std_dev: 0.016766770833788303 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=917, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=917, beta=63), stopping probability=0.026802 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1200) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.290751, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.17806872651282796, EMA_p_value_std_dev: 0.01679916277273027 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=918, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=918, beta=63), stopping probability=0.027282 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.8495) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.601933, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.1787127247141417, EMA_p_value_std_dev: 0.01663315596681239 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=919, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=919, beta=63), stopping probability=0.027769 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3067) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.335889, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.1804889955339877, EMA_p_value_std_dev: 0.016467305982880893 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=920, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=920, beta=63), stopping probability=0.028263 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8563) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.248336, p-value=0.000006 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18281142111731657, EMA_p_value_std_dev: 0.016303070031760385 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=921, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=921, beta=63), stopping probability=0.028763 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0688) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.282558, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18512093100649699, EMA_p_value_std_dev: 0.01614047302107115 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=922, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=922, beta=63), stopping probability=0.029271 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9378) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.387011, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18635525129767042, EMA_p_value_std_dev: 0.015979497939496382 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=923, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=923, beta=63), stopping probability=0.029786 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1062) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.302890, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18691711187031534, EMA_p_value_std_dev: 0.01582012858033542 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=924, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=924, beta=63), stopping probability=0.030307 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8930) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.390215, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18757687752328953, EMA_p_value_std_dev: 0.015662348930552978 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=925, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=925, beta=63), stopping probability=0.030836 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0362) | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Kuiper statistic=0.359070, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated EMA_relative_change: 0.18815794118822118, EMA_p_value_std_dev: 0.015506117300052701 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Updated Beta parameters: alpha=926, beta=63 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Chunk 30: Beta parameters updated (alpha=926, beta=63), stopping probability=0.031372 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Added chunk 29 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Added chunk 29 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Processing chunk 31 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Generating logits for model, chunk 31 | |
[llama_gguf_optmize v0.6.0] 09:59:55 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 09:59:55 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:59:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 09:59:56 - INFO - Processing chunks from 31 to 31 | |
[llama_gguf_optmize v0.6.0] 09:59:56 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 09:59:56 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 09:59:56 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:00:01 - DEBUG - Inference time: 5557.11 ms | |
[llama_gguf_optmize v0.6.0] 10:00:01 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:00:01 - DEBUG - Reusing freed chunk 1 for chunk 31. | |
[llama_gguf_optmize v0.6.0] 10:00:01 - DEBUG - Written chunk 31 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[31] 6070.42 ms | |
[llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Generating logits for model, chunk 31 | |
[llama_gguf_optmize v0.6.0] 10:00:02 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:00:02 - INFO - Processing chunks from 31 to 31 | |
[llama_gguf_optmize v0.6.0] 10:00:02 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:00:02 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:00:02 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:00:05 - DEBUG - Inference time: 2426.35 ms | |
[llama_gguf_optmize v0.6.0] 10:00:05 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:00:05 - DEBUG - Reusing freed chunk 1 for chunk 31. | |
[llama_gguf_optmize v0.6.0] 10:00:05 - DEBUG - Written chunk 31 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[31] 2870.95 ms | |
[llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Comparing logits for chunk 31 | |
[llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:00:05 - INFO - Processing chunks 31 to 31... | |
[llama_gguf_optmize v0.6.0] 10:00:06 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 10:00:11 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 10:00:16 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 10:00:21 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - | |
===== KL-divergence statistics for Chunk 31 ===== | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Average : 0.019687 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - StdDev : 0.036965 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Median : 0.010690 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Maximum : 0.851223 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_99 : 0.159542 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_95 : 0.067962 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_90 : 0.044213 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_10 : 0.000060 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_05 : 0.000011 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - KLD_01 : 0.000001 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Samples seen: 126848 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6927) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.238384, p-value=0.000019 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18869146416694635, EMA_p_value_std_dev: 0.015351528723005109 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=927, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=927, beta=63), stopping probability=0.031916 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7856) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.250618, p-value=0.000004 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.188669287332136, EMA_p_value_std_dev: 0.015198479479940036 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=928, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=928, beta=63), stopping probability=0.032467 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7349) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.230823, p-value=0.000045 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1878636685077469, EMA_p_value_std_dev: 0.015047065208510057 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=929, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=929, beta=63), stopping probability=0.033025 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0293) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.263164, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18729181396684863, EMA_p_value_std_dev: 0.014897159688416437 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=930, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=930, beta=63), stopping probability=0.033591 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8292) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.269381, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18611502917891015, EMA_p_value_std_dev: 0.014748748737977036 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=931, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=931, beta=63), stopping probability=0.034164 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7559) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.211306, p-value=0.000353 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1853762768855138, EMA_p_value_std_dev: 0.01460316079725557 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=932, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=932, beta=63), stopping probability=0.034745 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8171) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.262061, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1851869174773789, EMA_p_value_std_dev: 0.01445902937148727 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=933, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=933, beta=63), stopping probability=0.035333 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1562) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.370594, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18559050279986408, EMA_p_value_std_dev: 0.014316373841511644 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=934, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=934, beta=63), stopping probability=0.035930 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2742) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.376297, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18597982104166252, EMA_p_value_std_dev: 0.014175142328005926 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=935, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=935, beta=63), stopping probability=0.036534 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9316) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.212418, p-value=0.000316 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.18713519073508453, EMA_p_value_std_dev: 0.014035576080494888 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=936, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=936, beta=63), stopping probability=0.037146 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0650) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.277339, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1885239783351104, EMA_p_value_std_dev: 0.013896978986022341 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=937, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=937, beta=63), stopping probability=0.037765 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5680) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.175611, p-value=0.008791 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1897216407257481, EMA_p_value_std_dev: 0.013797246170931674 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=938, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=938, beta=63), stopping probability=0.038393 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1007) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.273884, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1919089277277359, EMA_p_value_std_dev: 0.013698507929875746 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=939, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=939, beta=63), stopping probability=0.039029 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8968) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.371589, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.1937795216325741, EMA_p_value_std_dev: 0.013600754602774327 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=940, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=940, beta=63), stopping probability=0.039673 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1806) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.407451, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19537137030485127, EMA_p_value_std_dev: 0.013504304720442952 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=941, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=941, beta=63), stopping probability=0.040325 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0034) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.328915, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19629573535305053, EMA_p_value_std_dev: 0.013408817081769704 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=942, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=942, beta=63), stopping probability=0.040985 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1941) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.226067, p-value=0.000076 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19679364326751483, EMA_p_value_std_dev: 0.013275401296363945 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=943, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=943, beta=63), stopping probability=0.041654 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5288) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.146160, p-value=0.070754 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19728251730767662, EMA_p_value_std_dev: 0.013458525256111017 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=944, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=944, beta=63), stopping probability=0.042331 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5380) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.146032, p-value=0.071314 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19764756904946143, EMA_p_value_std_dev: 0.013712241403840515 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=945, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=945, beta=63), stopping probability=0.043016 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8229) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.258164, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.19912607221536155, EMA_p_value_std_dev: 0.013963423554145338 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=946, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=946, beta=63), stopping probability=0.043710 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6967) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.177329, p-value=0.007660 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20059143090287126, EMA_p_value_std_dev: 0.014199435625683604 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=947, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=947, beta=63), stopping probability=0.044412 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6770) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.152317, p-value=0.047805 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20124690892408376, EMA_p_value_std_dev: 0.014397233723543287 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=948, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=948, beta=63), stopping probability=0.045123 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5802) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.104288, p-value=0.520621 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20272264329982842, EMA_p_value_std_dev: 0.016454065405029906 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=949, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=949, beta=63), stopping probability=0.045842 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6867) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.246912, p-value=0.000007 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20492957581962032, EMA_p_value_std_dev: 0.01855918477469726 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=950, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=950, beta=63), stopping probability=0.046570 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8449) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.227609, p-value=0.000064 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20639210885204304, EMA_p_value_std_dev: 0.02064322704619125 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=951, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=951, beta=63), stopping probability=0.047307 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6280) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.221255, p-value=0.000127 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20754218096397742, EMA_p_value_std_dev: 0.022715594957562986 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=952, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=952, beta=63), stopping probability=0.048053 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6059) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.143206, p-value=0.084672 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20882092086249684, EMA_p_value_std_dev: 0.02474659862293689 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=953, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=953, beta=63), stopping probability=0.048807 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1033) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.310659, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.20984029700613635, EMA_p_value_std_dev: 0.02487725026810016 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=954, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=954, beta=63), stopping probability=0.049570 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9476) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.251940, p-value=0.000004 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.21126444767400612, EMA_p_value_std_dev: 0.025006602228985837 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=955, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=955, beta=63), stopping probability=0.050343 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9842) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.290263, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.2128288537198804, EMA_p_value_std_dev: 0.025134735218726372 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=956, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=956, beta=63), stopping probability=0.051124 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9564) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.256887, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.21358030064543895, EMA_p_value_std_dev: 0.02526172871393049 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=957, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=957, beta=63), stopping probability=0.051915 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0224) | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Kuiper statistic=0.349671, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated EMA_relative_change: 0.21331913525365526, EMA_p_value_std_dev: 0.02500976000807308 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Updated Beta parameters: alpha=958, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Chunk 31: Beta parameters updated (alpha=958, beta=63), stopping probability=0.052714 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Added chunk 30 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Added chunk 30 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Processing chunk 32 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - INFO - Generating logits for model, chunk 32 | |
[llama_gguf_optmize v0.6.0] 10:00:25 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:00:26 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:00:26 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:00:26 - INFO - Processing chunks from 32 to 32 | |
[llama_gguf_optmize v0.6.0] 10:00:26 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:00:26 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:00:26 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:00:31 - DEBUG - Inference time: 5488.69 ms | |
[llama_gguf_optmize v0.6.0] 10:00:31 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:00:31 - DEBUG - Reusing freed chunk 0 for chunk 32. | |
[llama_gguf_optmize v0.6.0] 10:00:31 - DEBUG - Written chunk 32 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:00:32 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[32] 6178.54 ms | |
[llama_gguf_optmize v0.6.0] 10:00:32 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:00:32 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:00:32 - INFO - Generating logits for model, chunk 32 | |
[llama_gguf_optmize v0.6.0] 10:00:32 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:00:32 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:00:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:00:33 - INFO - Processing chunks from 32 to 32 | |
[llama_gguf_optmize v0.6.0] 10:00:33 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:00:33 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:00:33 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:00:35 - DEBUG - Inference time: 2858.16 ms | |
[llama_gguf_optmize v0.6.0] 10:00:35 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:00:35 - DEBUG - Reusing freed chunk 0 for chunk 32. | |
[llama_gguf_optmize v0.6.0] 10:00:35 - DEBUG - Written chunk 32 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[32] 3543.51 ms | |
[llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Comparing logits for chunk 32 | |
[llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:00:36 - INFO - Processing chunks 32 to 32... | |
[llama_gguf_optmize v0.6.0] 10:00:37 - DEBUG - Processing chunk 0, part 0 | |
[llama_gguf_optmize v0.6.0] 10:00:42 - DEBUG - Processing chunk 0, part 1 | |
[llama_gguf_optmize v0.6.0] 10:00:47 - DEBUG - Processing chunk 0, part 2 | |
[llama_gguf_optmize v0.6.0] 10:00:52 - DEBUG - Processing chunk 0, part 3 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - | |
===== KL-divergence statistics for Chunk 32 ===== | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Average : 0.020281 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - StdDev : 0.043231 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Median : 0.012936 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Maximum : 0.956846 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_99 : 0.140956 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_95 : 0.057223 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_90 : 0.040363 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_10 : 0.000410 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_05 : 0.000092 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - KLD_01 : 0.000010 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Samples seen: 130944 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9363) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.144270, p-value=0.079416 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2139865103982876, EMA_p_value_std_dev: 0.025114553002234953 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=959, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=959, beta=63), stopping probability=0.053523 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1307) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.209281, p-value=0.000432 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21576477655766255, EMA_p_value_std_dev: 0.025217827721165816 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=960, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=960, beta=63), stopping probability=0.054341 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6833) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.261547, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21769290842066516, EMA_p_value_std_dev: 0.02532007102715168 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=961, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=961, beta=63), stopping probability=0.055168 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3918) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.206770, p-value=0.000553 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2191344637291135, EMA_p_value_std_dev: 0.025420683367937284 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=962, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=962, beta=63), stopping probability=0.056005 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4828) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.214770, p-value=0.000249 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21927621525563668, EMA_p_value_std_dev: 0.025520011981844892 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=963, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=963, beta=63), stopping probability=0.056851 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5855) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.178020, p-value=0.007243 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21887235287967335, EMA_p_value_std_dev: 0.02529645279486314 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=964, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=964, beta=63), stopping probability=0.057706 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5533) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.178721, p-value=0.006842 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21773994350509024, EMA_p_value_std_dev: 0.02508121338642607 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=965, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=965, beta=63), stopping probability=0.058571 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4723) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.227189, p-value=0.000067 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2166956447101478, EMA_p_value_std_dev: 0.024867990565742612 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=966, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=966, beta=63), stopping probability=0.059445 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3813) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.275148, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21630718876672925, EMA_p_value_std_dev: 0.02465787172244793 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=967, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=967, beta=63), stopping probability=0.060329 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3104) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.336502, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21611681253731457, EMA_p_value_std_dev: 0.02445029166453877 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=968, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=968, beta=63), stopping probability=0.061223 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4111) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.265237, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21618856050897417, EMA_p_value_std_dev: 0.024236847720903794 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=969, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=969, beta=63), stopping probability=0.062127 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4447) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.287081, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21599498600332812, EMA_p_value_std_dev: 0.02399538275546079 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=970, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=970, beta=63), stopping probability=0.063040 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4187) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.256671, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2154096227865109, EMA_p_value_std_dev: 0.02375603762378272 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=971, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=971, beta=63), stopping probability=0.063963 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5433) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.231241, p-value=0.000043 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21439422105291428, EMA_p_value_std_dev: 0.023519259149220588 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=972, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=972, beta=63), stopping probability=0.064895 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4854) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.163081, p-value=0.022770 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21349215294722396, EMA_p_value_std_dev: 0.023386180061999582 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=973, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=973, beta=63), stopping probability=0.065838 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1110) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.174863, p-value=0.009329 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2125865140733292, EMA_p_value_std_dev: 0.02325251098634077 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=974, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=974, beta=63), stopping probability=0.066791 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.8156) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.464435, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2140898874922575, EMA_p_value_std_dev: 0.023120175317511718 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=975, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=975, beta=63), stopping probability=0.067753 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2185) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.349152, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21613662896103558, EMA_p_value_std_dev: 0.02298916307486868 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=976, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=976, beta=63), stopping probability=0.068726 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5286) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.162162, p-value=0.024326 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.21906443653978835, EMA_p_value_std_dev: 0.022877924878034604 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=977, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=977, beta=63), stopping probability=0.069708 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6833) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.141685, p-value=0.092671 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.22203022995622976, EMA_p_value_std_dev: 0.023038457894683253 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=978, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=978, beta=63), stopping probability=0.070701 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5829) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.076702, p-value=0.927508 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.22328496286891272, EMA_p_value_std_dev: 0.02683357316032534 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=979, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=979, beta=63), stopping probability=0.071704 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5891) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.180606, p-value=0.005861 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.22662163424772927, EMA_p_value_std_dev: 0.030583342654101738 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=980, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=980, beta=63), stopping probability=0.072717 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8566) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.286037, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2301054955948655, EMA_p_value_std_dev: 0.034295708142880986 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=981, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=981, beta=63), stopping probability=0.073741 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5148) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.242831, p-value=0.000011 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23358542332786353, EMA_p_value_std_dev: 0.038000364615506106 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=982, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=982, beta=63), stopping probability=0.074774 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7537) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.177871, p-value=0.007331 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23631508016882372, EMA_p_value_std_dev: 0.04174429208308859 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=983, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=983, beta=63), stopping probability=0.075818 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9021) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.207957, p-value=0.000493 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23650650839521895, EMA_p_value_std_dev: 0.04136345029514671 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=984, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=984, beta=63), stopping probability=0.076872 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9773) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.197064, p-value=0.001391 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23574396654117347, EMA_p_value_std_dev: 0.040981957503621325 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=985, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=985, beta=63), stopping probability=0.077937 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9060) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.295940, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2357163599359977, EMA_p_value_std_dev: 0.0406042701495125 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=986, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=986, beta=63), stopping probability=0.079012 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4868) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.302146, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23484456887271477, EMA_p_value_std_dev: 0.040230366882264335 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=987, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=987, beta=63), stopping probability=0.080098 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9032) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.348012, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.23391652162477375, EMA_p_value_std_dev: 0.03983511048597645 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=988, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=988, beta=63), stopping probability=0.081194 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.6432) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.344232, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2328675651650862, EMA_p_value_std_dev: 0.039443959061218566 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=989, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=989, beta=63), stopping probability=0.082300 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6584) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Kuiper statistic=0.335251, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated EMA_relative_change: 0.2309232090152675, EMA_p_value_std_dev: 0.0390505056132884 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Updated Beta parameters: alpha=990, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Chunk 32: Beta parameters updated (alpha=990, beta=63), stopping probability=0.083418 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Added chunk 31 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Added chunk 31 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Processing chunk 33 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Generating logits for model, chunk 33 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:00:57 - INFO - Processing chunks from 33 to 33 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:00:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:01:03 - DEBUG - Inference time: 5570.12 ms | |
[llama_gguf_optmize v0.6.0] 10:01:03 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:01:03 - DEBUG - Reusing freed chunk 1 for chunk 33. | |
[llama_gguf_optmize v0.6.0] 10:01:03 - DEBUG - Written chunk 33 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[33] 6050.90 ms | |
[llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Generating logits for model, chunk 33 | |
[llama_gguf_optmize v0.6.0] 10:01:04 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:01:04 - INFO - Processing chunks from 33 to 33 | |
[llama_gguf_optmize v0.6.0] 10:01:04 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:01:04 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:01:04 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:01:07 - DEBUG - Inference time: 2728.24 ms | |
[llama_gguf_optmize v0.6.0] 10:01:07 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:01:07 - DEBUG - Reusing freed chunk 1 for chunk 33. | |
[llama_gguf_optmize v0.6.0] 10:01:07 - DEBUG - Written chunk 33 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:01:07 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[33] 3170.56 ms | |
[llama_gguf_optmize v0.6.0] 10:01:07 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:01:07 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:01:08 - INFO - Comparing logits for chunk 33 | |
[llama_gguf_optmize v0.6.0] 10:01:08 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:01:08 - INFO - Processing chunks 33 to 33... | |
[llama_gguf_optmize v0.6.0] 10:01:08 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 10:01:13 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 10:01:18 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 10:01:23 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - | |
===== KL-divergence statistics for Chunk 33 ===== | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Average : 0.017978 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - StdDev : 0.094371 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Median : 0.007920 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Maximum : 5.516145 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_99 : 0.141806 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_95 : 0.060467 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_90 : 0.037510 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_10 : 0.000070 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_05 : 0.000024 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - KLD_01 : 0.000002 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Samples seen: 135040 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6411) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.306680, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.229238792984905, EMA_p_value_std_dev: 0.038660976837533306 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=991, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=991, beta=63), stopping probability=0.084545 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9587) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.335894, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2275008583558687, EMA_p_value_std_dev: 0.03827533360247188 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=992, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=992, beta=63), stopping probability=0.085684 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8698) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.205334, p-value=0.000636 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22642311257617218, EMA_p_value_std_dev: 0.03789637622689043 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=993, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=993, beta=63), stopping probability=0.086833 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8143) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.287186, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22657812788005516, EMA_p_value_std_dev: 0.03752119891158167 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=994, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=994, beta=63), stopping probability=0.087993 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4877) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.460681, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22813538672665867, EMA_p_value_std_dev: 0.03714976399001603 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=995, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=995, beta=63), stopping probability=0.089163 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.01 (KL div: 2.0857) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.487324, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22963056576169025, EMA_p_value_std_dev: 0.036782034134029036 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=996, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=996, beta=63), stopping probability=0.090345 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.01 to 0.009999999999999998 (KL div: 1.3339) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.405774, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23028215367520405, EMA_p_value_std_dev: 0.03641797238337607 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=997, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=997, beta=63), stopping probability=0.091537 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0539) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.175940, p-value=0.008563 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23155537997974882, EMA_p_value_std_dev: 0.036092902321990514 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=998, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=998, beta=63), stopping probability=0.092740 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1686) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.284873, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2328345919098627, EMA_p_value_std_dev: 0.03577107481915819 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=999, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=999, beta=63), stopping probability=0.093953 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5660) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.223374, p-value=0.000101 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23441896678675472, EMA_p_value_std_dev: 0.03545234727672423 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1000, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1000, beta=63), stopping probability=0.095178 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7314) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.293233, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23641054425020835, EMA_p_value_std_dev: 0.035136799024698226 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1001, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1001, beta=63), stopping probability=0.096414 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7444) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.324802, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23693537406965479, EMA_p_value_std_dev: 0.034824398366355556 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1002, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1002, beta=63), stopping probability=0.097660 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7465) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.322545, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23618081606876756, EMA_p_value_std_dev: 0.03447747601056208 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1003, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1003, beta=63), stopping probability=0.098917 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7737) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.268203, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2352563142294785, EMA_p_value_std_dev: 0.0341340137322417 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1004, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1004, beta=63), stopping probability=0.100186 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6303) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.312766, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23415290470370978, EMA_p_value_std_dev: 0.03379352905831523 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1005, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1005, beta=63), stopping probability=0.101465 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6184) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.315261, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23268732902795386, EMA_p_value_std_dev: 0.03345644073407296 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1006, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1006, beta=63), stopping probability=0.102756 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2487) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.280510, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23162046365459313, EMA_p_value_std_dev: 0.033122714802005154 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1007, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1007, beta=63), stopping probability=0.104057 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5641) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.231879, p-value=0.000040 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2305703162002142, EMA_p_value_std_dev: 0.03279249295057837 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1008, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1008, beta=63), stopping probability=0.105369 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6324) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.367976, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2300719994004717, EMA_p_value_std_dev: 0.03246556558413072 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1009, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1009, beta=63), stopping probability=0.106693 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9036) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.380311, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22961042264367604, EMA_p_value_std_dev: 0.03214189931904001 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1010, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1010, beta=63), stopping probability=0.108028 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9168) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.263567, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.22988999780263603, EMA_p_value_std_dev: 0.03182146068789297 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1011, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1011, beta=63), stopping probability=0.109373 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9082) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.417340, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23170197663002684, EMA_p_value_std_dev: 0.031504218535340515 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1012, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1012, beta=63), stopping probability=0.110730 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8475) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.331480, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23207751260281062, EMA_p_value_std_dev: 0.031189967856235744 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1013, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1013, beta=63), stopping probability=0.112098 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3253) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.508208, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23388925841975144, EMA_p_value_std_dev: 0.030878851827655117 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1014, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1014, beta=63), stopping probability=0.113477 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9557) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.799918, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.2366826140302733, EMA_p_value_std_dev: 0.03057083918145959 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1015, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1015, beta=63), stopping probability=0.114867 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.01 (KL div: 2.1675) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.768038, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.23785136526320838, EMA_p_value_std_dev: 0.03026589506079652 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1016, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1016, beta=63), stopping probability=0.116269 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.01 to 0.009999999999999998 (KL div: 0.7935) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.225815, p-value=0.000078 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24055131318786382, EMA_p_value_std_dev: 0.02996433951892635 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1017, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1017, beta=63), stopping probability=0.117681 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7249) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.221892, p-value=0.000118 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24134959893890842, EMA_p_value_std_dev: 0.02966599996858246 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1018, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1018, beta=63), stopping probability=0.119105 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9298) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.253720, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24052801280363847, EMA_p_value_std_dev: 0.029370631147999007 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1019, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1019, beta=63), stopping probability=0.120540 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9556) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.178115, p-value=0.007188 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24068100260732636, EMA_p_value_std_dev: 0.029109505153098722 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1020, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1020, beta=63), stopping probability=0.121986 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8815) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.264432, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24055713869275214, EMA_p_value_std_dev: 0.028850982988051155 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1021, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1021, beta=63), stopping probability=0.123444 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7463) | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Kuiper statistic=0.159542, p-value=0.029286 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated EMA_relative_change: 0.24153930590698958, EMA_p_value_std_dev: 0.028689515013827844 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Updated Beta parameters: alpha=1022, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Chunk 33: Beta parameters updated (alpha=1022, beta=63), stopping probability=0.124912 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Added chunk 32 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Added chunk 32 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Processing chunk 34 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - INFO - Generating logits for model, chunk 34 | |
[llama_gguf_optmize v0.6.0] 10:01:27 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:01:28 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:01:28 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:01:28 - INFO - Processing chunks from 34 to 34 | |
[llama_gguf_optmize v0.6.0] 10:01:28 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:01:28 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:01:28 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:01:33 - DEBUG - Inference time: 5468.46 ms | |
[llama_gguf_optmize v0.6.0] 10:01:33 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:01:33 - DEBUG - Reusing freed chunk 0 for chunk 34. | |
[llama_gguf_optmize v0.6.0] 10:01:33 - DEBUG - Written chunk 34 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[34] 5956.76 ms | |
[llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Generating logits for model, chunk 34 | |
[llama_gguf_optmize v0.6.0] 10:01:34 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:01:34 - INFO - Processing chunks from 34 to 34 | |
[llama_gguf_optmize v0.6.0] 10:01:34 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:01:34 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:01:34 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:01:37 - DEBUG - Inference time: 2377.65 ms | |
[llama_gguf_optmize v0.6.0] 10:01:37 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:01:37 - DEBUG - Reusing freed chunk 0 for chunk 34. | |
[llama_gguf_optmize v0.6.0] 10:01:37 - DEBUG - Written chunk 34 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[34] 2844.40 ms | |
[llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Comparing logits for chunk 34 | |
[llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:01:37 - INFO - Processing chunks 34 to 34... | |
[llama_gguf_optmize v0.6.0] 10:01:38 - DEBUG - Processing chunk 0, part 0 | |
[llama_gguf_optmize v0.6.0] 10:01:43 - DEBUG - Processing chunk 0, part 1 | |
[llama_gguf_optmize v0.6.0] 10:01:48 - DEBUG - Processing chunk 0, part 2 | |
[llama_gguf_optmize v0.6.0] 10:01:53 - DEBUG - Processing chunk 0, part 3 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - | |
===== KL-divergence statistics for Chunk 34 ===== | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Average : 0.021730 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - StdDev : 0.040784 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Median : 0.013903 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Maximum : 1.035068 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_99 : 0.152550 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_95 : 0.066096 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_90 : 0.045996 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_10 : 0.000371 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_05 : 0.000061 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - KLD_01 : 0.000001 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Samples seen: 139136 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5317) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.217718, p-value=0.000184 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.24295342355634775, EMA_p_value_std_dev: 0.02852956555305134 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1023, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1023, beta=63), stopping probability=0.126392 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7409) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.197920, p-value=0.001285 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.24409591418484713, EMA_p_value_std_dev: 0.028369477641266985 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1024, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1024, beta=63), stopping probability=0.127883 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9407) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.229463, p-value=0.000052 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.244229275590456, EMA_p_value_std_dev: 0.028215544108497925 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1025, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1025, beta=63), stopping probability=0.129385 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9690) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.240560, p-value=0.000015 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.24324385099553297, EMA_p_value_std_dev: 0.028063129436666716 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1026, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1026, beta=63), stopping probability=0.130899 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1387) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.198403, p-value=0.001229 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.24203270127370588, EMA_p_value_std_dev: 0.027789643844840287 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1027, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1027, beta=63), stopping probability=0.132423 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8587) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.211017, p-value=0.000364 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.24073121656866228, EMA_p_value_std_dev: 0.027518676267322687 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1028, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1028, beta=63), stopping probability=0.133959 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1580) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.266168, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23960924190686192, EMA_p_value_std_dev: 0.027249393712194994 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1029, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1029, beta=63), stopping probability=0.135506 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1038) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.270395, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23845806018206225, EMA_p_value_std_dev: 0.026982870963473096 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1030, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1030, beta=63), stopping probability=0.137064 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9303) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.264160, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23674860378672488, EMA_p_value_std_dev: 0.02671902728515536 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1031, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1031, beta=63), stopping probability=0.138634 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9536) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.250305, p-value=0.000005 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23501454160028998, EMA_p_value_std_dev: 0.026454120214184845 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1032, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1032, beta=63), stopping probability=0.140214 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9703) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.201221, p-value=0.000943 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23321660244797246, EMA_p_value_std_dev: 0.026194441847944182 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1033, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1033, beta=63), stopping probability=0.141806 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7468) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.260977, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23221878914030983, EMA_p_value_std_dev: 0.02593735310586036 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1034, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1034, beta=63), stopping probability=0.143409 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6759) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.141370, p-value=0.094403 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23238355784222522, EMA_p_value_std_dev: 0.0260987171976906 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1035, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1035, beta=63), stopping probability=0.145023 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4519) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.270133, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2342698072267933, EMA_p_value_std_dev: 0.02625847217952801 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1036, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1036, beta=63), stopping probability=0.146648 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4915) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.201484, p-value=0.000920 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2362094235824962, EMA_p_value_std_dev: 0.02641561851452433 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1037, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1037, beta=63), stopping probability=0.148285 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4343) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.248530, p-value=0.000006 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23794365942868442, EMA_p_value_std_dev: 0.026572236033795864 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1038, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1038, beta=63), stopping probability=0.149932 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5905) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.209651, p-value=0.000417 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2387231254852501, EMA_p_value_std_dev: 0.026726826100216632 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1039, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1039, beta=63), stopping probability=0.151591 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1072) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.259334, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23861385496025986, EMA_p_value_std_dev: 0.026464274736517402 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1040, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1040, beta=63), stopping probability=0.153261 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6880) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.247281, p-value=0.000007 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2380460421764414, EMA_p_value_std_dev: 0.02620433200685003 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1041, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1041, beta=63), stopping probability=0.154942 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4223) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.198061, p-value=0.001268 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23735491173104142, EMA_p_value_std_dev: 0.02594841866915034 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1042, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1042, beta=63), stopping probability=0.156634 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4305) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.172736, p-value=0.011027 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2365637114488755, EMA_p_value_std_dev: 0.025737165826422607 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1043, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1043, beta=63), stopping probability=0.158336 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6979) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.218038, p-value=0.000178 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23581184355079468, EMA_p_value_std_dev: 0.025528297310724462 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1044, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1044, beta=63), stopping probability=0.160050 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6218) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.246796, p-value=0.000007 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.235183790721094, EMA_p_value_std_dev: 0.0253215050423255 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1045, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1045, beta=63), stopping probability=0.161775 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5037) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.272758, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2342673308457656, EMA_p_value_std_dev: 0.025116783837538587 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1046, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1046, beta=63), stopping probability=0.163511 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4553) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.175468, p-value=0.008892 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23398060522554723, EMA_p_value_std_dev: 0.024920847084818447 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1047, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1047, beta=63), stopping probability=0.165258 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0303) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.155984, p-value=0.037429 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23313061081800382, EMA_p_value_std_dev: 0.024833680379604928 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1048, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1048, beta=63), stopping probability=0.167016 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4972) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.240245, p-value=0.000015 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23335889381464234, EMA_p_value_std_dev: 0.024747612729476558 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1049, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1049, beta=63), stopping probability=0.168785 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5462) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.214386, p-value=0.000259 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23358793139871248, EMA_p_value_std_dev: 0.02466204846058643 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1050, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1050, beta=63), stopping probability=0.170564 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3947) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.225743, p-value=0.000078 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23281429887073993, EMA_p_value_std_dev: 0.024577225813936676 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1051, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1051, beta=63), stopping probability=0.172355 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3117) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.312723, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23307515631905748, EMA_p_value_std_dev: 0.024498647372868686 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1052, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1052, beta=63), stopping probability=0.174156 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4292) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.293042, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.23193390027229516, EMA_p_value_std_dev: 0.024255371391164852 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1053, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1053, beta=63), stopping probability=0.175968 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3837) | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Kuiper statistic=0.307479, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated EMA_relative_change: 0.2305947835246991, EMA_p_value_std_dev: 0.024014543188691638 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Updated Beta parameters: alpha=1054, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Chunk 34: Beta parameters updated (alpha=1054, beta=63), stopping probability=0.177791 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Added chunk 33 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Added chunk 33 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Processing chunk 35 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Generating logits for model, chunk 35 | |
[llama_gguf_optmize v0.6.0] 10:01:57 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:01:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:01:58 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:01:58 - INFO - Processing chunks from 35 to 35 | |
[llama_gguf_optmize v0.6.0] 10:01:58 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:01:58 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:01:58 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:02:03 - DEBUG - Inference time: 5539.83 ms | |
[llama_gguf_optmize v0.6.0] 10:02:03 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:02:03 - DEBUG - Reusing freed chunk 1 for chunk 35. | |
[llama_gguf_optmize v0.6.0] 10:02:03 - DEBUG - Written chunk 35 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[35] 5996.12 ms | |
[llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Generating logits for model, chunk 35 | |
[llama_gguf_optmize v0.6.0] 10:02:04 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:02:04 - INFO - Processing chunks from 35 to 35 | |
[llama_gguf_optmize v0.6.0] 10:02:04 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:02:04 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:02:04 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:02:07 - DEBUG - Inference time: 2381.45 ms | |
[llama_gguf_optmize v0.6.0] 10:02:07 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:02:07 - DEBUG - Reusing freed chunk 1 for chunk 35. | |
[llama_gguf_optmize v0.6.0] 10:02:07 - DEBUG - Written chunk 35 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[35] 2829.10 ms | |
[llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Comparing logits for chunk 35 | |
[llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:02:07 - INFO - Processing chunks 35 to 35... | |
[llama_gguf_optmize v0.6.0] 10:02:08 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 10:02:13 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 10:02:18 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 10:02:23 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - | |
===== KL-divergence statistics for Chunk 35 ===== | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Average : 0.017046 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - StdDev : 0.029465 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Median : 0.010546 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Maximum : 0.998195 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_99 : 0.122316 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_95 : 0.054708 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_90 : 0.037489 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_10 : 0.000099 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_05 : 0.000023 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - KLD_01 : 0.000002 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Samples seen: 143232 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4719) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.231841, p-value=0.000040 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.229991650177346, EMA_p_value_std_dev: 0.023775348569332302 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1055, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1055, beta=63), stopping probability=0.179624 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7159) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.245714, p-value=0.000008 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22852434215210302, EMA_p_value_std_dev: 0.02353836261820639 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1056, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1056, beta=63), stopping probability=0.181468 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9527) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.295468, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22759412918717778, EMA_p_value_std_dev: 0.023303740588205446 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1057, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1057, beta=63), stopping probability=0.183323 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7218) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.238751, p-value=0.000018 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2271809491170695, EMA_p_value_std_dev: 0.023071452738103498 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1058, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1058, beta=63), stopping probability=0.185189 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9788) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.246510, p-value=0.000007 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22614972804114233, EMA_p_value_std_dev: 0.022841470272614836 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1059, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1059, beta=63), stopping probability=0.187065 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8300) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.275827, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22535000398533986, EMA_p_value_std_dev: 0.022613701085523224 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1060, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1060, beta=63), stopping probability=0.188951 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7924) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.274405, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22391417865488344, EMA_p_value_std_dev: 0.022388208585972503 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1061, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1061, beta=63), stopping probability=0.190848 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9235) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.247650, p-value=0.000006 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22232164638148866, EMA_p_value_std_dev: 0.022164959913771216 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1062, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1062, beta=63), stopping probability=0.192756 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0368) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.296018, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22116357644109644, EMA_p_value_std_dev: 0.021943901319678945 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1063, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1063, beta=63), stopping probability=0.194674 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7341) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.146548, p-value=0.069074 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2206881167507447, EMA_p_value_std_dev: 0.02203313797774197 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1064, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1064, beta=63), stopping probability=0.196602 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8225) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.201430, p-value=0.000925 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22136926101865342, EMA_p_value_std_dev: 0.022120478993687822 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1065, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1065, beta=63), stopping probability=0.198541 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7450) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.229467, p-value=0.000052 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22209545599143662, EMA_p_value_std_dev: 0.022206889989912573 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1066, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1066, beta=63), stopping probability=0.200490 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8283) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.204843, p-value=0.000667 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2225646492046494, EMA_p_value_std_dev: 0.02229170235371785 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1067, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1067, beta=63), stopping probability=0.202449 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8649) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.198434, p-value=0.001225 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22177208599472592, EMA_p_value_std_dev: 0.022374307387846935 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1068, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1068, beta=63), stopping probability=0.204419 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6550) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.191980, p-value=0.002205 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.22029544299797896, EMA_p_value_std_dev: 0.022159036984806965 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1069, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1069, beta=63), stopping probability=0.206398 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8260) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.181989, p-value=0.005225 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21858696659928734, EMA_p_value_std_dev: 0.021958282872788372 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1070, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1070, beta=63), stopping probability=0.208388 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7525) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.204898, p-value=0.000664 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21691862663638586, EMA_p_value_std_dev: 0.02175831038004752 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1071, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1071, beta=63), stopping probability=0.210387 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8666) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.277202, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21590028593013438, EMA_p_value_std_dev: 0.021561675857442127 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1072, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1072, beta=63), stopping probability=0.212397 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7438) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.264873, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2149069643571508, EMA_p_value_std_dev: 0.021368623037411588 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1073, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1073, beta=63), stopping probability=0.214417 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6912) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.245990, p-value=0.000008 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2139956274612318, EMA_p_value_std_dev: 0.021178210864829204 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1074, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1074, beta=63), stopping probability=0.216446 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0023) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.274725, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21305945632570636, EMA_p_value_std_dev: 0.020969910082550464 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1075, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1075, beta=63), stopping probability=0.218485 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6252) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.194901, p-value=0.001695 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.212055115085474, EMA_p_value_std_dev: 0.020768287763984997 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1076, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1076, beta=63), stopping probability=0.220534 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7810) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.214637, p-value=0.000252 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2112063401100939, EMA_p_value_std_dev: 0.02056847481732563 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1077, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1077, beta=63), stopping probability=0.222593 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8730) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.299467, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21124217054441738, EMA_p_value_std_dev: 0.020370655973114167 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1078, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1078, beta=63), stopping probability=0.224662 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6109) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.222629, p-value=0.000109 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21180089087291926, EMA_p_value_std_dev: 0.020174691514941973 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1079, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1079, beta=63), stopping probability=0.226740 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7242) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.259169, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2118807181296927, EMA_p_value_std_dev: 0.01998067991533809 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1080, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1080, beta=63), stopping probability=0.228827 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6874) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.279829, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21193295010325178, EMA_p_value_std_dev: 0.01978247876288141 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1081, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1081, beta=63), stopping probability=0.230924 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8141) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.219605, p-value=0.000151 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.2116573693628416, EMA_p_value_std_dev: 0.019585871375283514 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1082, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1082, beta=63), stopping probability=0.233031 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7047) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.220232, p-value=0.000141 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.21073020723201022, EMA_p_value_std_dev: 0.019391244718988895 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1083, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1083, beta=63), stopping probability=0.235146 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8581) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.237512, p-value=0.000021 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.20949157342536542, EMA_p_value_std_dev: 0.01919857846428616 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1084, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1084, beta=63), stopping probability=0.237271 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7365) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.215366, p-value=0.000234 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.20831593664406034, EMA_p_value_std_dev: 0.01900804498945383 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1085, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1085, beta=63), stopping probability=0.239406 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8974) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Kuiper statistic=0.290972, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated EMA_relative_change: 0.20732097893749413, EMA_p_value_std_dev: 0.018819412306477757 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Updated Beta parameters: alpha=1086, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Chunk 35: Beta parameters updated (alpha=1086, beta=63), stopping probability=0.241549 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Added chunk 34 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Added chunk 34 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Processing chunk 36 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Generating logits for model, chunk 36 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:02:27 - INFO - Processing chunks from 36 to 36 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:02:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:02:33 - DEBUG - Inference time: 5489.10 ms | |
[llama_gguf_optmize v0.6.0] 10:02:33 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:02:33 - DEBUG - Reusing freed chunk 0 for chunk 36. | |
[llama_gguf_optmize v0.6.0] 10:02:33 - DEBUG - Written chunk 36 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:02:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[36] 5965.84 ms | |
[llama_gguf_optmize v0.6.0] 10:02:33 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:02:33 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:02:33 - INFO - Generating logits for model, chunk 36 | |
[llama_gguf_optmize v0.6.0] 10:02:34 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:02:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:02:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:02:34 - INFO - Processing chunks from 36 to 36 | |
[llama_gguf_optmize v0.6.0] 10:02:34 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:02:34 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:02:34 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:02:36 - DEBUG - Inference time: 2370.28 ms | |
[llama_gguf_optmize v0.6.0] 10:02:36 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:02:36 - DEBUG - Reusing freed chunk 0 for chunk 36. | |
[llama_gguf_optmize v0.6.0] 10:02:36 - DEBUG - Written chunk 36 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[36] 2833.22 ms | |
[llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Comparing logits for chunk 36 | |
[llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:02:37 - INFO - Processing chunks 36 to 36... | |
[llama_gguf_optmize v0.6.0] 10:02:38 - DEBUG - Processing chunk 0, part 0 | |
[llama_gguf_optmize v0.6.0] 10:02:43 - DEBUG - Processing chunk 0, part 1 | |
[llama_gguf_optmize v0.6.0] 10:02:48 - DEBUG - Processing chunk 0, part 2 | |
[llama_gguf_optmize v0.6.0] 10:02:52 - DEBUG - Processing chunk 0, part 3 | |
[llama_gguf_optmize v0.6.0] 10:02:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - | |
===== KL-divergence statistics for Chunk 36 ===== | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Average : 0.023975 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - StdDev : 0.043240 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Median : 0.012804 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Maximum : 1.404639 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_99 : 0.179422 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_95 : 0.083223 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_90 : 0.055958 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_10 : 0.000306 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_05 : 0.000030 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - KLD_01 : 0.000002 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Samples seen: 147328 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6906) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.285642, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20635517802483416, EMA_p_value_std_dev: 0.018632731654406723 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1087, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1087, beta=63), stopping probability=0.243702 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7761) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.275808, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2052341518728502, EMA_p_value_std_dev: 0.01844789448901718 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1088, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1088, beta=63), stopping probability=0.245863 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8835) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.269023, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20386734430389788, EMA_p_value_std_dev: 0.018264920064393092 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1089, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1089, beta=63), stopping probability=0.248034 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9470) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.293725, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20219174573839677, EMA_p_value_std_dev: 0.018082729239815416 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1090, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1090, beta=63), stopping probability=0.250213 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7899) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.311546, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2006757149943174, EMA_p_value_std_dev: 0.017902355802777114 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1091, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1091, beta=63), stopping probability=0.252401 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7656) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.314581, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19911954331153953, EMA_p_value_std_dev: 0.01772378165348401 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1092, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1092, beta=63), stopping probability=0.254598 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0394) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.296893, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19769728682264512, EMA_p_value_std_dev: 0.01754698881686911 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1093, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1093, beta=63), stopping probability=0.256803 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8176) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.255264, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19634940567156806, EMA_p_value_std_dev: 0.017371968771648334 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1094, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1094, beta=63), stopping probability=0.259017 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8093) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.345960, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19558027136078926, EMA_p_value_std_dev: 0.01719869456673238 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1095, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1095, beta=63), stopping probability=0.261240 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7149) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.247624, p-value=0.000006 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19573025806265457, EMA_p_value_std_dev: 0.017027165527192797 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1096, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1096, beta=63), stopping probability=0.263471 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6971) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.310503, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.1964089278778327, EMA_p_value_std_dev: 0.01685734748899065 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1097, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1097, beta=63), stopping probability=0.265710 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6401) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.157052, p-value=0.034800 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.1977745314644047, EMA_p_value_std_dev: 0.01684442550547747 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1098, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1098, beta=63), stopping probability=0.267958 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4940) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.187747, p-value=0.003201 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19867425157779264, EMA_p_value_std_dev: 0.016828693370982598 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1099, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1099, beta=63), stopping probability=0.270214 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4804) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.153410, p-value=0.044484 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.1992169459875737, EMA_p_value_std_dev: 0.01687471552870729 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1100, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1100, beta=63), stopping probability=0.272478 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4834) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.221110, p-value=0.000129 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2004557246454641, EMA_p_value_std_dev: 0.016920044704221357 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1101, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1101, beta=63), stopping probability=0.274750 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7841) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.214462, p-value=0.000257 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2004588420372314, EMA_p_value_std_dev: 0.01696443038583044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1102, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1102, beta=63), stopping probability=0.277029 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7207) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.129072, p-value=0.184892 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20122084198663445, EMA_p_value_std_dev: 0.01758883197214421 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1103, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1103, beta=63), stopping probability=0.279317 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8159) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.169108, p-value=0.014576 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20245176059312933, EMA_p_value_std_dev: 0.01819303357312788 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1104, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1104, beta=63), stopping probability=0.281613 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6898) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.180597, p-value=0.005865 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20238049399201696, EMA_p_value_std_dev: 0.01881527143704294 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1105, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1105, beta=63), stopping probability=0.283916 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7723) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.159274, p-value=0.029840 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2025447121751636, EMA_p_value_std_dev: 0.01940405256512261 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1106, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1106, beta=63), stopping probability=0.286227 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8109) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.268553, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.2030654259999787, EMA_p_value_std_dev: 0.019987345803635582 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1107, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1107, beta=63), stopping probability=0.288545 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9117) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.213492, p-value=0.000283 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20318729639642705, EMA_p_value_std_dev: 0.019912760995769226 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1108, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1108, beta=63), stopping probability=0.290871 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8840) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.201986, p-value=0.000877 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20329047595464889, EMA_p_value_std_dev: 0.01984166552132383 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1109, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1109, beta=63), stopping probability=0.293204 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6828) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.153182, p-value=0.045158 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20385516252564653, EMA_p_value_std_dev: 0.01985360471912983 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1110, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1110, beta=63), stopping probability=0.295544 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8052) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.161799, p-value=0.024965 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20316742333723936, EMA_p_value_std_dev: 0.019857977005861057 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1111, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1111, beta=63), stopping probability=0.297892 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7852) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.163061, p-value=0.022803 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.20186340095271643, EMA_p_value_std_dev: 0.019847363549239102 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1112, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1112, beta=63), stopping probability=0.300246 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8331) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.194056, p-value=0.001830 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.201082257778073, EMA_p_value_std_dev: 0.019833142129520878 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1113, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1113, beta=63), stopping probability=0.302608 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9415) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.214789, p-value=0.000248 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19993315920251228, EMA_p_value_std_dev: 0.01982063163342824 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1114, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1114, beta=63), stopping probability=0.304977 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6252) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.155345, p-value=0.039083 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19940435204196336, EMA_p_value_std_dev: 0.019787765721201712 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1115, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1115, beta=63), stopping probability=0.307352 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6228) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.198625, p-value=0.001204 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.1996424203704434, EMA_p_value_std_dev: 0.019763365170388878 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1116, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1116, beta=63), stopping probability=0.309734 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8260) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.181206, p-value=0.005577 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.19954265232522048, EMA_p_value_std_dev: 0.019731922714223197 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1117, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1117, beta=63), stopping probability=0.312122 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8778) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Kuiper statistic=0.169374, p-value=0.014285 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated EMA_relative_change: 0.1993443498241292, EMA_p_value_std_dev: 0.01969551882901338 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Updated Beta parameters: alpha=1118, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Chunk 36: Beta parameters updated (alpha=1118, beta=63), stopping probability=0.314518 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Added chunk 35 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Added chunk 35 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Processing chunk 37 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Generating logits for model, chunk 37 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:02:57 - INFO - Processing chunks from 37 to 37 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:02:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:03:03 - DEBUG - Inference time: 5504.06 ms | |
[llama_gguf_optmize v0.6.0] 10:03:03 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:03:03 - DEBUG - Reusing freed chunk 1 for chunk 37. | |
[llama_gguf_optmize v0.6.0] 10:03:03 - DEBUG - Written chunk 37 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:03:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[37] 5955.99 ms | |
[llama_gguf_optmize v0.6.0] 10:03:03 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:03:03 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:03:03 - INFO - Generating logits for model, chunk 37 | |
[llama_gguf_optmize v0.6.0] 10:03:03 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:03:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:03:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:03:04 - INFO - Processing chunks from 37 to 37 | |
[llama_gguf_optmize v0.6.0] 10:03:04 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:03:04 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:03:04 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:03:06 - DEBUG - Inference time: 2460.95 ms | |
[llama_gguf_optmize v0.6.0] 10:03:06 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:03:06 - DEBUG - Reusing freed chunk 1 for chunk 37. | |
[llama_gguf_optmize v0.6.0] 10:03:06 - DEBUG - Written chunk 37 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[37] 2911.48 ms | |
[llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Comparing logits for chunk 37 | |
[llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:03:07 - INFO - Processing chunks 37 to 37... | |
[llama_gguf_optmize v0.6.0] 10:03:08 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 10:03:12 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 10:03:17 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 10:03:22 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - | |
===== KL-divergence statistics for Chunk 37 ===== | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Average : 0.020722 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - StdDev : 0.048673 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Median : 0.006612 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Maximum : 1.303131 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_99 : 0.205267 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_95 : 0.082247 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_90 : 0.050595 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_10 : 0.000154 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_05 : 0.000039 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - KLD_01 : 0.000006 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Samples seen: 151424 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5804) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.149089, p-value=0.058888 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.19856028508907136, EMA_p_value_std_dev: 0.019743286832800716 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1119, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1119, beta=63), stopping probability=0.316919 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7628) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.220193, p-value=0.000142 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.19803070197380473, EMA_p_value_std_dev: 0.01979176897792631 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1120, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1120, beta=63), stopping probability=0.319327 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6144) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.155443, p-value=0.038827 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.19827898388385787, EMA_p_value_std_dev: 0.019840657427701497 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1121, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1121, beta=63), stopping probability=0.321741 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7981) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.123277, p-value=0.245025 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.19891885348651805, EMA_p_value_std_dev: 0.020636539485962415 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1122, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1122, beta=63), stopping probability=0.324162 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9022) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.192591, p-value=0.002088 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20077767671064403, EMA_p_value_std_dev: 0.021443241485750653 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1123, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1123, beta=63), stopping probability=0.326588 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8008) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.154529, p-value=0.041287 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20159560501380647, EMA_p_value_std_dev: 0.022249283969564878 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1124, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1124, beta=63), stopping probability=0.329020 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7342) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.205089, p-value=0.000652 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20252938785779467, EMA_p_value_std_dev: 0.023046475802404747 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1125, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1125, beta=63), stopping probability=0.331458 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9251) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.257543, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20363668078580763, EMA_p_value_std_dev: 0.023875023070521632 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1126, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1126, beta=63), stopping probability=0.333902 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7520) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.194976, p-value=0.001684 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.2041251504379074, EMA_p_value_std_dev: 0.02381630093272934 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1127, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1127, beta=63), stopping probability=0.336352 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0671) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.246558, p-value=0.000007 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.2047460276130154, EMA_p_value_std_dev: 0.023760425510504137 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1128, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1128, beta=63), stopping probability=0.338807 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8078) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.123553, p-value=0.241895 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20557426598833872, EMA_p_value_std_dev: 0.02459990544462103 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1129, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1129, beta=63), stopping probability=0.341267 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8329) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.160592, p-value=0.027200 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20655906899759363, EMA_p_value_std_dev: 0.025407707297274706 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1130, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1130, beta=63), stopping probability=0.343733 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7066) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.121501, p-value=0.265874 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20753465066283125, EMA_p_value_std_dev: 0.02649574778073413 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1131, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1131, beta=63), stopping probability=0.346204 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1070) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.307975, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21066615326329394, EMA_p_value_std_dev: 0.027576250406409226 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1132, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1132, beta=63), stopping probability=0.348680 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2450) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.461805, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.2137686561077186, EMA_p_value_std_dev: 0.028645989423102827 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1133, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1133, beta=63), stopping probability=0.351161 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4938) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.490583, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.2164039391258559, EMA_p_value_std_dev: 0.02952192145228879 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1134, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1134, beta=63), stopping probability=0.353648 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1350) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.499754, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21828067100989199, EMA_p_value_std_dev: 0.030413494179814347 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1135, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1135, beta=63), stopping probability=0.356139 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9922) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.464526, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.2172450124691678, EMA_p_value_std_dev: 0.030110119582706298 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1136, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1136, beta=63), stopping probability=0.358634 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0542) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.471835, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21548852322783463, EMA_p_value_std_dev: 0.029809771139868803 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1137, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1137, beta=63), stopping probability=0.361135 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8456) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.410652, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21385375813495958, EMA_p_value_std_dev: 0.029512418672748612 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1138, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1138, beta=63), stopping probability=0.363639 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.01 (KL div: 2.1096) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.549682, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21291056993221288, EMA_p_value_std_dev: 0.029218032296487943 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1139, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1139, beta=63), stopping probability=0.366149 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.01 to 0.009999999999999998 (KL div: 1.0323) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.491973, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21210592889037871, EMA_p_value_std_dev: 0.028926582424330476 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1140, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1140, beta=63), stopping probability=0.368662 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0571) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.440484, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21141991555572165, EMA_p_value_std_dev: 0.02863803976464778 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1141, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1141, beta=63), stopping probability=0.371180 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0720) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.402919, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.21062300924975513, EMA_p_value_std_dev: 0.028352375317995417 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1142, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1142, beta=63), stopping probability=0.373702 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0696) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.442134, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20951486778941683, EMA_p_value_std_dev: 0.028069560374198412 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1143, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1143, beta=63), stopping probability=0.376227 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2705) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.504888, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20846373478587435, EMA_p_value_std_dev: 0.027789566509465784 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1144, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1144, beta=63), stopping probability=0.378757 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8706) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.443892, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20748355208898017, EMA_p_value_std_dev: 0.02751236558353386 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1145, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1145, beta=63), stopping probability=0.381290 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8241) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.436702, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20642722825278853, EMA_p_value_std_dev: 0.02723792973683811 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1146, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1146, beta=63), stopping probability=0.383827 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7402) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.459261, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20521047941477913, EMA_p_value_std_dev: 0.026966231387713147 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1147, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1147, beta=63), stopping probability=0.386368 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8887) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.537932, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20404238765690583, EMA_p_value_std_dev: 0.02669724322962071 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1148, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1148, beta=63), stopping probability=0.388912 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8723) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.408331, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20316155496874544, EMA_p_value_std_dev: 0.026430938228405244 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1149, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1149, beta=63), stopping probability=0.391459 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0964) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Kuiper statistic=0.449726, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated EMA_relative_change: 0.20251973199391252, EMA_p_value_std_dev: 0.0261672896195769 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Updated Beta parameters: alpha=1150, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Chunk 37: Beta parameters updated (alpha=1150, beta=63), stopping probability=0.394010 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Added chunk 36 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Added chunk 36 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Processing chunk 38 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Generating logits for model, chunk 38 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:03:27 - INFO - Processing chunks from 38 to 38 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:03:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:03:33 - DEBUG - Inference time: 5434.89 ms | |
[llama_gguf_optmize v0.6.0] 10:03:33 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:03:33 - DEBUG - Reusing freed chunk 0 for chunk 38. | |
[llama_gguf_optmize v0.6.0] 10:03:33 - DEBUG - Written chunk 38 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:03:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[38] 5901.49 ms | |
[llama_gguf_optmize v0.6.0] 10:03:33 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:03:33 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:03:33 - INFO - Generating logits for model, chunk 38 | |
[llama_gguf_optmize v0.6.0] 10:03:33 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:03:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:03:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:03:34 - INFO - Processing chunks from 38 to 38 | |
[llama_gguf_optmize v0.6.0] 10:03:34 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:03:34 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:03:34 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:03:36 - DEBUG - Inference time: 2431.00 ms | |
[llama_gguf_optmize v0.6.0] 10:03:36 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:03:36 - DEBUG - Reusing freed chunk 0 for chunk 38. | |
[llama_gguf_optmize v0.6.0] 10:03:36 - DEBUG - Written chunk 38 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[38] 2875.10 ms | |
[llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Comparing logits for chunk 38 | |
[llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:03:37 - INFO - Processing chunks 38 to 38... | |
[llama_gguf_optmize v0.6.0] 10:03:38 - DEBUG - Processing chunk 0, part 0 | |
[llama_gguf_optmize v0.6.0] 10:03:42 - DEBUG - Processing chunk 0, part 1 | |
[llama_gguf_optmize v0.6.0] 10:03:47 - DEBUG - Processing chunk 0, part 2 | |
[llama_gguf_optmize v0.6.0] 10:03:52 - DEBUG - Processing chunk 0, part 3 | |
[llama_gguf_optmize v0.6.0] 10:03:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - | |
===== KL-divergence statistics for Chunk 38 ===== | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Average : 0.016250 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - StdDev : 0.040581 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Median : 0.005430 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Maximum : 0.867015 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_99 : 0.154382 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_95 : 0.063324 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_90 : 0.038309 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_10 : 0.000051 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_05 : 0.000015 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - KLD_01 : 0.000002 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Samples seen: 155520 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9198) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.416543, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.201911829270496, EMA_p_value_std_dev: 0.02590627090562162 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1151, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1151, beta=63), stopping probability=0.396563 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9922) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.402917, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20089384285677128, EMA_p_value_std_dev: 0.025647855853338047 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1152, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1152, beta=63), stopping probability=0.399120 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9956) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.398977, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19942686925020994, EMA_p_value_std_dev: 0.025392018491201 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1153, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1153, beta=63), stopping probability=0.401680 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1362) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.248244, p-value=0.000006 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19831833203644575, EMA_p_value_std_dev: 0.025138759613406537 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1154, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1154, beta=63), stopping probability=0.404242 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9809) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.348590, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19839187461324506, EMA_p_value_std_dev: 0.024888026992915757 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1155, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1155, beta=63), stopping probability=0.406807 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0207) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.353305, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.1983928621131688, EMA_p_value_std_dev: 0.02463979543031336 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1156, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1156, beta=63), stopping probability=0.409374 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9592) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.338464, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.1985051262527758, EMA_p_value_std_dev: 0.024394039977534605 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1157, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1157, beta=63), stopping probability=0.411944 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7115) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.269913, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19796012125115162, EMA_p_value_std_dev: 0.024150735562536132 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1158, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1158, beta=63), stopping probability=0.414517 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7876) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.294456, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19675187051035464, EMA_p_value_std_dev: 0.023909833660928095 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1159, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1159, beta=63), stopping probability=0.417091 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6346) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.146509, p-value=0.069243 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19656507584035846, EMA_p_value_std_dev: 0.02398022126792473 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1160, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1160, beta=63), stopping probability=0.419668 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0388) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.368993, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.1992454162873803, EMA_p_value_std_dev: 0.02404990675854253 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1161, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1161, beta=63), stopping probability=0.422247 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.6961) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.422904, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20168775361460103, EMA_p_value_std_dev: 0.024118897136404725 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1162, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1162, beta=63), stopping probability=0.424827 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.7939) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.370570, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20414655051564776, EMA_p_value_std_dev: 0.024187199759909933 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1163, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1163, beta=63), stopping probability=0.427409 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4051) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.535019, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.2063611756363553, EMA_p_value_std_dev: 0.024254821078623246 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1164, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1164, beta=63), stopping probability=0.429993 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9686) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.428347, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20630197293478086, EMA_p_value_std_dev: 0.024012879238364235 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1165, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1165, beta=63), stopping probability=0.432579 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2613) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.478363, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20615098862037462, EMA_p_value_std_dev: 0.023773350767961724 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1166, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1166, beta=63), stopping probability=0.435166 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0706) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.400558, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20614713804008342, EMA_p_value_std_dev: 0.02353621159405148 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1167, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1167, beta=63), stopping probability=0.437754 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1636) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.400914, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20538595035910184, EMA_p_value_std_dev: 0.023301437883400816 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1168, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1168, beta=63), stopping probability=0.440344 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0874) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.416375, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20412213261407697, EMA_p_value_std_dev: 0.02306900604051389 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1169, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1169, beta=63), stopping probability=0.442934 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0121) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.467666, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20289496032955318, EMA_p_value_std_dev: 0.022838892705259763 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1170, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1170, beta=63), stopping probability=0.445526 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0947) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.426623, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20149833407886586, EMA_p_value_std_dev: 0.022611074750524797 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1171, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1171, beta=63), stopping probability=0.448118 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0942) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.427826, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20011804243926112, EMA_p_value_std_dev: 0.02238552927988831 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1172, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1172, beta=63), stopping probability=0.450712 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9952) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.267659, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.1993807359524497, EMA_p_value_std_dev: 0.022162235913274066 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1173, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1173, beta=63), stopping probability=0.453305 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9887) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.419614, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19983164306622628, EMA_p_value_std_dev: 0.0219411698979918 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1174, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1174, beta=63), stopping probability=0.455900 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0660) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.355905, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20051789000698755, EMA_p_value_std_dev: 0.021722309016211337 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1175, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1175, beta=63), stopping probability=0.458495 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9555) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.302470, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20138051982434463, EMA_p_value_std_dev: 0.021505631267554247 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1176, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1176, beta=63), stopping probability=0.461090 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1002) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.327944, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.2013130789248529, EMA_p_value_std_dev: 0.02129111487936225 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1177, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1177, beta=63), stopping probability=0.463686 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0780) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.396955, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20080194358725376, EMA_p_value_std_dev: 0.021078736025290225 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1178, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1178, beta=63), stopping probability=0.466281 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1719) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.293309, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20057834967624028, EMA_p_value_std_dev: 0.020868475696868255 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1179, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1179, beta=63), stopping probability=0.468877 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8539) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.259329, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.20022860778140159, EMA_p_value_std_dev: 0.02066031934372062 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1180, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1180, beta=63), stopping probability=0.471472 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2477) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.283435, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.1998932174986564, EMA_p_value_std_dev: 0.02045423929157665 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1181, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1181, beta=63), stopping probability=0.474068 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8436) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Kuiper statistic=0.310823, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated EMA_relative_change: 0.19913509731864695, EMA_p_value_std_dev: 0.02025021488678012 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Updated Beta parameters: alpha=1182, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Chunk 38: Beta parameters updated (alpha=1182, beta=63), stopping probability=0.476663 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Added chunk 37 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Added chunk 37 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Processing chunk 39 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Generating logits for model, chunk 39 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:03:57 - INFO - Processing chunks from 39 to 39 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:03:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:04:03 - DEBUG - Inference time: 5579.97 ms | |
[llama_gguf_optmize v0.6.0] 10:04:03 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:04:03 - DEBUG - Reusing freed chunk 1 for chunk 39. | |
[llama_gguf_optmize v0.6.0] 10:04:03 - DEBUG - Written chunk 39 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:04:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[39] 6032.68 ms | |
[llama_gguf_optmize v0.6.0] 10:04:03 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:04:03 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:04:03 - INFO - Generating logits for model, chunk 39 | |
[llama_gguf_optmize v0.6.0] 10:04:03 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:04:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:04:04 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:04:04 - INFO - Processing chunks from 39 to 39 | |
[llama_gguf_optmize v0.6.0] 10:04:04 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:04:04 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:04:04 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:04:06 - DEBUG - Inference time: 2551.68 ms | |
[llama_gguf_optmize v0.6.0] 10:04:06 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:04:06 - DEBUG - Reusing freed chunk 1 for chunk 39. | |
[llama_gguf_optmize v0.6.0] 10:04:06 - DEBUG - Written chunk 39 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[39] 2990.04 ms | |
[llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Comparing logits for chunk 39 | |
[llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:04:07 - INFO - Processing chunks 39 to 39... | |
[llama_gguf_optmize v0.6.0] 10:04:08 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 10:04:13 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 10:04:17 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 10:04:22 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - | |
===== KL-divergence statistics for Chunk 39 ===== | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Average : 0.022853 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - StdDev : 0.044143 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Median : 0.010927 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Maximum : 0.821665 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_99 : 0.205363 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_95 : 0.080140 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_90 : 0.052837 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_10 : 0.000035 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_05 : 0.000011 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - KLD_01 : 0.000002 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Samples seen: 159616 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7015) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.244775, p-value=0.000009 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19832408036247104, EMA_p_value_std_dev: 0.02004825793589753 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1183, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1183, beta=63), stopping probability=0.479257 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0561) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.338448, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19809465609728455, EMA_p_value_std_dev: 0.019848315525388883 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1184, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1184, beta=63), stopping probability=0.481851 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1271) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.333474, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19777018849712724, EMA_p_value_std_dev: 0.01965036868749063 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1185, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1185, beta=63), stopping probability=0.484444 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0941) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.218133, p-value=0.000176 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1983820843124998, EMA_p_value_std_dev: 0.019455131630581646 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1186, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1186, beta=63), stopping probability=0.487037 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9890) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.297070, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19954712316666187, EMA_p_value_std_dev: 0.019261842054517395 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1187, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1187, beta=63), stopping probability=0.489628 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9828) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.389064, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.2004792051625368, EMA_p_value_std_dev: 0.019070489610358295 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1188, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1188, beta=63), stopping probability=0.492219 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9941) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.276730, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20174281711196088, EMA_p_value_std_dev: 0.01888104573741464 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1189, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1189, beta=63), stopping probability=0.494809 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0308) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.309965, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20257014841968282, EMA_p_value_std_dev: 0.018693491565782833 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1190, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1190, beta=63), stopping probability=0.497397 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0220) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.322690, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20251628304032693, EMA_p_value_std_dev: 0.0185070246561153 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1191, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1191, beta=63), stopping probability=0.499984 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1446) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.347640, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20164543541240018, EMA_p_value_std_dev: 0.0183224177624293 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1192, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1192, beta=63), stopping probability=0.502570 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9548) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.280277, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.2006651720990951, EMA_p_value_std_dev: 0.01813965234505754 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1193, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1193, beta=63), stopping probability=0.505154 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7744) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.237285, p-value=0.000022 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19981930857078498, EMA_p_value_std_dev: 0.017958805610156593 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1194, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1194, beta=63), stopping probability=0.507737 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0745) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.362847, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19988010533724337, EMA_p_value_std_dev: 0.017779762822793397 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1195, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1195, beta=63), stopping probability=0.510317 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9897) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.288847, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20022416843045196, EMA_p_value_std_dev: 0.01760250595610994 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1196, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1196, beta=63), stopping probability=0.512896 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0258) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.327183, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20038790582020463, EMA_p_value_std_dev: 0.017427017226586492 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1197, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1197, beta=63), stopping probability=0.515473 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9997) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.404930, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20086537465056234, EMA_p_value_std_dev: 0.017253279100696316 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1198, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1198, beta=63), stopping probability=0.518048 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9203) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.345161, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.20063846377516412, EMA_p_value_std_dev: 0.01708117776642972 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1199, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1199, beta=63), stopping probability=0.520621 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2231) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.349792, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.199998793629323, EMA_p_value_std_dev: 0.016910793142970736 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1200, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1200, beta=63), stopping probability=0.523192 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0991) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.345998, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1989156795225175, EMA_p_value_std_dev: 0.01674210798170941 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1201, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1201, beta=63), stopping probability=0.525760 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9091) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.314237, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19752245990760275, EMA_p_value_std_dev: 0.016575105457360486 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1202, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1202, beta=63), stopping probability=0.528326 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7795) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.344233, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1960782783610691, EMA_p_value_std_dev: 0.01640976878318696 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1203, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1203, beta=63), stopping probability=0.530889 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2928) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.415585, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19509470771830098, EMA_p_value_std_dev: 0.01624608134234257 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1204, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1204, beta=63), stopping probability=0.533449 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9409) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.337595, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1945805369882103, EMA_p_value_std_dev: 0.016084026683707484 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1205, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1205, beta=63), stopping probability=0.536007 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9194) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.282607, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1943284822430689, EMA_p_value_std_dev: 0.015923588818431714 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1206, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1206, beta=63), stopping probability=0.538562 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8761) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.254666, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19408318606235514, EMA_p_value_std_dev: 0.015764763009220514 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1207, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1207, beta=63), stopping probability=0.541114 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8892) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.285813, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19357125932079697, EMA_p_value_std_dev: 0.01560752143935325 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1208, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1208, beta=63), stopping probability=0.543663 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8426) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.270977, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19273555385313904, EMA_p_value_std_dev: 0.015451848053829074 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1209, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1209, beta=63), stopping probability=0.546208 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8968) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.277298, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19155860722729548, EMA_p_value_std_dev: 0.015297727336748071 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1210, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1210, beta=63), stopping probability=0.548751 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7592) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.268375, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.19014372768292254, EMA_p_value_std_dev: 0.015145143595364223 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1211, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1211, beta=63), stopping probability=0.551290 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9192) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.257452, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.18861345179322678, EMA_p_value_std_dev: 0.014994078383721174 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1212, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1212, beta=63), stopping probability=0.553826 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7065) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.249416, p-value=0.000005 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.1870489812801139, EMA_p_value_std_dev: 0.01484453346945921 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1213, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1213, beta=63), stopping probability=0.556358 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1269) | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Kuiper statistic=0.240344, p-value=0.000015 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated EMA_relative_change: 0.18552924805624807, EMA_p_value_std_dev: 0.014696521599503465 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - DEBUG - Updated Beta parameters: alpha=1214, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Chunk 39: Beta parameters updated (alpha=1214, beta=63), stopping probability=0.558886 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Added chunk 38 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Added chunk 38 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Processing chunk 40 | |
[llama_gguf_optmize v0.6.0] 10:04:26 - INFO - Generating logits for model, chunk 40 | |
[llama_gguf_optmize v0.6.0] 10:04:27 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:04:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:04:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:04:27 - INFO - Processing chunks from 40 to 40 | |
[llama_gguf_optmize v0.6.0] 10:04:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:04:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:04:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:04:32 - DEBUG - Inference time: 5591.33 ms | |
[llama_gguf_optmize v0.6.0] 10:04:32 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:04:32 - DEBUG - Reusing freed chunk 0 for chunk 40. | |
[llama_gguf_optmize v0.6.0] 10:04:32 - DEBUG - Written chunk 40 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:04:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[40] 6069.64 ms | |
[llama_gguf_optmize v0.6.0] 10:04:33 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:04:33 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:04:33 - INFO - Generating logits for model, chunk 40 | |
[llama_gguf_optmize v0.6.0] 10:04:33 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:04:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:04:34 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:04:34 - INFO - Processing chunks from 40 to 40 | |
[llama_gguf_optmize v0.6.0] 10:04:34 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:04:34 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:04:34 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:04:36 - DEBUG - Inference time: 2557.50 ms | |
[llama_gguf_optmize v0.6.0] 10:04:36 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:04:36 - DEBUG - Reusing freed chunk 0 for chunk 40. | |
[llama_gguf_optmize v0.6.0] 10:04:36 - DEBUG - Written chunk 40 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[40] 3036.02 ms | |
[llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Comparing logits for chunk 40 | |
[llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:04:37 - INFO - Processing chunks 40 to 40... | |
[llama_gguf_optmize v0.6.0] 10:04:38 - DEBUG - Processing chunk 0, part 0 | |
[llama_gguf_optmize v0.6.0] 10:04:42 - DEBUG - Processing chunk 0, part 1 | |
[llama_gguf_optmize v0.6.0] 10:04:47 - DEBUG - Processing chunk 0, part 2 | |
[llama_gguf_optmize v0.6.0] 10:04:52 - DEBUG - Processing chunk 0, part 3 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - | |
===== KL-divergence statistics for Chunk 40 ===== | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Average : 0.022817 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - StdDev : 0.039763 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Median : 0.014336 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Maximum : 1.233832 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_99 : 0.147886 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_95 : 0.071295 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_90 : 0.049504 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_10 : 0.000671 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_05 : 0.000154 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - KLD_01 : 0.000018 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Samples seen: 163712 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9845) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.211900, p-value=0.000333 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18415315382708042, EMA_p_value_std_dev: 0.014551384307121492 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1215, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1215, beta=63), stopping probability=0.561411 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0958) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.234702, p-value=0.000029 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18304127753284116, EMA_p_value_std_dev: 0.014407665675477617 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1216, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1216, beta=63), stopping probability=0.563932 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1113) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.294325, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18242256932024695, EMA_p_value_std_dev: 0.014265383126958768 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1217, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1217, beta=63), stopping probability=0.566448 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7279) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.223235, p-value=0.000103 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18240002144599868, EMA_p_value_std_dev: 0.014124464163656975 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1218, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1218, beta=63), stopping probability=0.568961 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4120) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.260310, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1825563967621823, EMA_p_value_std_dev: 0.01398497226309037 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1219, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1219, beta=63), stopping probability=0.571470 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5241) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.267744, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18261289135793043, EMA_p_value_std_dev: 0.013845913010905608 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1220, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1220, beta=63), stopping probability=0.573974 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4960) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.193183, p-value=0.001980 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18269921896361846, EMA_p_value_std_dev: 0.013716527647014264 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1221, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1221, beta=63), stopping probability=0.576475 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7425) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.171316, p-value=0.012312 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18230421580229247, EMA_p_value_std_dev: 0.013632970653980697 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1222, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1222, beta=63), stopping probability=0.578970 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8252) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.167203, p-value=0.016823 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18139264338799796, EMA_p_value_std_dev: 0.013575026691803702 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1223, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1223, beta=63), stopping probability=0.581462 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5788) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.115448, p-value=0.345287 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18146442001154856, EMA_p_value_std_dev: 0.0149468488742328 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1224, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1224, beta=63), stopping probability=0.583948 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5734) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.159187, p-value=0.030023 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18173795523995676, EMA_p_value_std_dev: 0.016273303944147895 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1225, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1225, beta=63), stopping probability=0.586430 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7347) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.150472, p-value=0.053899 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18178992325463023, EMA_p_value_std_dev: 0.01753437050641644 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1226, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1226, beta=63), stopping probability=0.588908 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6175) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.192455, p-value=0.002114 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18271919355032123, EMA_p_value_std_dev: 0.018797650919454775 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1227, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1227, beta=63), stopping probability=0.591380 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9252) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.152035, p-value=0.048697 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18326666887506923, EMA_p_value_std_dev: 0.020014830759121072 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1228, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1228, beta=63), stopping probability=0.593848 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7852) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.172339, p-value=0.011373 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18313993513670304, EMA_p_value_std_dev: 0.02004057654094759 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1229, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1229, beta=63), stopping probability=0.596310 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5187) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.180070, p-value=0.006126 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18300217149636783, EMA_p_value_std_dev: 0.02008808478448712 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1230, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1230, beta=63), stopping probability=0.598767 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8167) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.271500, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18315027690448446, EMA_p_value_std_dev: 0.020087796913603587 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1231, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1231, beta=63), stopping probability=0.601220 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4374) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.194070, p-value=0.001828 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18357833000093635, EMA_p_value_std_dev: 0.020087925828999494 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1232, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1232, beta=63), stopping probability=0.603666 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3852) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.275612, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18507515105495953, EMA_p_value_std_dev: 0.019936291019206145 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1233, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1233, beta=63), stopping probability=0.606108 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6261) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.137678, p-value=0.116724 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1876618720863669, EMA_p_value_std_dev: 0.020249866039475494 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1234, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1234, beta=63), stopping probability=0.608544 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6083) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.148421, p-value=0.061436 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.18915015337895197, EMA_p_value_std_dev: 0.020569135977644124 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1235, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1235, beta=63), stopping probability=0.610975 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6170) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.119422, p-value=0.291710 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.19028735394530996, EMA_p_value_std_dev: 0.02156507868262764 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1236, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1236, beta=63), stopping probability=0.613400 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5834) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.177607, p-value=0.007489 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.19166425673558565, EMA_p_value_std_dev: 0.02254045858793605 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1237, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1237, beta=63), stopping probability=0.615819 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7252) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.229998, p-value=0.000049 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.19229124569886136, EMA_p_value_std_dev: 0.0235060113992945 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1238, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1238, beta=63), stopping probability=0.618232 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6031) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.150675, p-value=0.053199 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1936176495159693, EMA_p_value_std_dev: 0.0244674525853469 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1239, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1239, beta=63), stopping probability=0.620640 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7286) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.225753, p-value=0.000078 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.19578815911422617, EMA_p_value_std_dev: 0.025476495081659733 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1240, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1240, beta=63), stopping probability=0.623042 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8159) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.188960, p-value=0.002880 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1970531228280722, EMA_p_value_std_dev: 0.025449999885628245 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1241, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1241, beta=63), stopping probability=0.625437 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6867) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.286192, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1990868101764685, EMA_p_value_std_dev: 0.025430420017874177 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1242, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1242, beta=63), stopping probability=0.627827 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0010) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.244432, p-value=0.000009 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.20039796973230772, EMA_p_value_std_dev: 0.025411082780855798 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1243, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1243, beta=63), stopping probability=0.630210 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7574) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.230555, p-value=0.000046 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.2002660254514454, EMA_p_value_std_dev: 0.0251703094173119 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1244, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1244, beta=63), stopping probability=0.632588 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6570) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.204280, p-value=0.000705 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.19995189776482528, EMA_p_value_std_dev: 0.024931597125656634 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1245, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1245, beta=63), stopping probability=0.634958 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0295) | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Kuiper statistic=0.298881, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated EMA_relative_change: 0.1995768951118408, EMA_p_value_std_dev: 0.024685991750714505 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - DEBUG - Updated Beta parameters: alpha=1246, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Chunk 40: Beta parameters updated (alpha=1246, beta=63), stopping probability=0.637323 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Added chunk 39 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Added chunk 39 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Processing chunk 41 | |
[llama_gguf_optmize v0.6.0] 10:04:56 - INFO - Generating logits for model, chunk 41 | |
[llama_gguf_optmize v0.6.0] 10:04:57 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:04:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:04:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:04:57 - INFO - Processing chunks from 41 to 41 | |
[llama_gguf_optmize v0.6.0] 10:04:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:04:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:04:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:05:02 - DEBUG - Inference time: 5622.76 ms | |
[llama_gguf_optmize v0.6.0] 10:05:02 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:05:02 - DEBUG - Reusing freed chunk 1 for chunk 41. | |
[llama_gguf_optmize v0.6.0] 10:05:02 - DEBUG - Written chunk 41 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[41] 6086.10 ms | |
[llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Generating logits for model, chunk 41 | |
[llama_gguf_optmize v0.6.0] 10:05:03 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:05:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:05:04 - INFO - Processing chunks from 41 to 41 | |
[llama_gguf_optmize v0.6.0] 10:05:04 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:05:04 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:05:04 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:05:06 - DEBUG - Inference time: 2533.56 ms | |
[llama_gguf_optmize v0.6.0] 10:05:06 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:05:06 - DEBUG - Reusing freed chunk 1 for chunk 41. | |
[llama_gguf_optmize v0.6.0] 10:05:06 - DEBUG - Written chunk 41 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[41] 2980.00 ms | |
[llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Comparing logits for chunk 41 | |
[llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:05:07 - INFO - Processing chunks 41 to 41... | |
[llama_gguf_optmize v0.6.0] 10:05:08 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 10:05:12 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 10:05:17 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 10:05:22 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - | |
===== KL-divergence statistics for Chunk 41 ===== | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Average : 0.088268 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - StdDev : 0.368675 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Median : 0.010919 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Maximum : 5.727451 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_99 : 2.007464 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_95 : 0.372391 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_90 : 0.073360 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_10 : 0.000081 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_05 : 0.000017 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - KLD_01 : 0.000003 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Samples seen: 167808 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5695) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.300169, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.19880665375542622, EMA_p_value_std_dev: 0.024442836332856585 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1247, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1247, beta=63), stopping probability=0.639681 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4829) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.242589, p-value=0.000012 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.19854969229178585, EMA_p_value_std_dev: 0.02420210378142371 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1248, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1248, beta=63), stopping probability=0.642033 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6487) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.196865, p-value=0.001416 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.19857427732482955, EMA_p_value_std_dev: 0.023966983190355347 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1249, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1249, beta=63), stopping probability=0.644378 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6738) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.295708, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.19864710401796346, EMA_p_value_std_dev: 0.02373421835760726 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1250, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1250, beta=63), stopping probability=0.646716 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8521) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.175377, p-value=0.008956 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.19976606224317628, EMA_p_value_std_dev: 0.023536313223037553 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1251, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1251, beta=63), stopping probability=0.649048 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6392) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.105363, p-value=0.502521 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20165028748995512, EMA_p_value_std_dev: 0.025531991457439562 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1252, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1252, beta=63), stopping probability=0.651372 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6232) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.107715, p-value=0.463637 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20330904586088588, EMA_p_value_std_dev: 0.027901543234661798 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1253, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1253, beta=63), stopping probability=0.653690 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6053) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.179737, p-value=0.006296 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20515840902804128, EMA_p_value_std_dev: 0.030238565197619464 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1254, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1254, beta=63), stopping probability=0.656001 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7804) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.240999, p-value=0.000014 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20674194097997511, EMA_p_value_std_dev: 0.0325522491466953 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1255, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1255, beta=63), stopping probability=0.658305 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.7112) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.269520, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20725815352674146, EMA_p_value_std_dev: 0.03485906637964742 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1256, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1256, beta=63), stopping probability=0.660602 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0054) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.617073, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21072104696176958, EMA_p_value_std_dev: 0.03657275153787879 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1257, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1257, beta=63), stopping probability=0.662892 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1484) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.637021, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2119822503528795, EMA_p_value_std_dev: 0.036236010109627785 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1258, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1258, beta=63), stopping probability=0.665174 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0051) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.517013, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21266011400190615, EMA_p_value_std_dev: 0.03587461818571727 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1259, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1259, beta=63), stopping probability=0.667450 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7967) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.651907, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21386495143129233, EMA_p_value_std_dev: 0.03551677065895113 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1260, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1260, beta=63), stopping probability=0.669718 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9646) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.489648, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2137334590449094, EMA_p_value_std_dev: 0.035162490871628094 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1261, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1261, beta=63), stopping probability=0.671978 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8162) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.468335, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2136172104673364, EMA_p_value_std_dev: 0.0348117450251836 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1262, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1262, beta=63), stopping probability=0.674231 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9394) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.598200, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2137939939652445, EMA_p_value_std_dev: 0.034464497868557396 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1263, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1263, beta=63), stopping probability=0.676477 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8695) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.457366, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.213873622072587, EMA_p_value_std_dev: 0.03412071450231854 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1264, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1264, beta=63), stopping probability=0.678715 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6466) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.439076, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21317919959268702, EMA_p_value_std_dev: 0.03378036037515791 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1265, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1265, beta=63), stopping probability=0.680946 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8503) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.516228, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21298657643005214, EMA_p_value_std_dev: 0.03344340128041571 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1266, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1266, beta=63), stopping probability=0.683168 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.8186) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.482051, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2121102754965759, EMA_p_value_std_dev: 0.03310980335264356 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1267, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1267, beta=63), stopping probability=0.685384 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8648) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.442446, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2108181991576191, EMA_p_value_std_dev: 0.03277953306420094 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1268, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1268, beta=63), stopping probability=0.687591 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9698) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.544295, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.21004935517728326, EMA_p_value_std_dev: 0.03245255722188554 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1269, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1269, beta=63), stopping probability=0.689790 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.7258) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.450246, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20927723508267043, EMA_p_value_std_dev: 0.03212884296359723 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1270, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1270, beta=63), stopping probability=0.691982 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7858) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.472731, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20849258799749779, EMA_p_value_std_dev: 0.03180835775503535 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1271, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1271, beta=63), stopping probability=0.694166 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7971) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.517062, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20775925596607903, EMA_p_value_std_dev: 0.03149106938642887 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1272, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1272, beta=63), stopping probability=0.696341 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8802) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.439296, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20687801401038455, EMA_p_value_std_dev: 0.031176945969299243 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1273, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1273, beta=63), stopping probability=0.698509 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3924) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.436007, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20554881848863776, EMA_p_value_std_dev: 0.03086595593325548 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1274, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1274, beta=63), stopping probability=0.700668 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9368) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.409633, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20427237441918955, EMA_p_value_std_dev: 0.030558068022821256 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1275, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1275, beta=63), stopping probability=0.702820 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6745) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.408335, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.2026802487294875, EMA_p_value_std_dev: 0.030253251294293612 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1276, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1276, beta=63), stopping probability=0.704963 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.4462) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.195639, p-value=0.001585 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20156479547843942, EMA_p_value_std_dev: 0.02995854544209238 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1277, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1277, beta=63), stopping probability=0.707098 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1131) | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Kuiper statistic=0.227577, p-value=0.000064 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated EMA_relative_change: 0.20104358878461712, EMA_p_value_std_dev: 0.029666713206384598 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - DEBUG - Updated Beta parameters: alpha=1278, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Chunk 41: Beta parameters updated (alpha=1278, beta=63), stopping probability=0.709225 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Added chunk 40 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Added chunk 40 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Processing chunk 42 | |
[llama_gguf_optmize v0.6.0] 10:05:26 - INFO - Generating logits for model, chunk 42 | |
[llama_gguf_optmize v0.6.0] 10:05:27 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:05:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:05:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:05:27 - INFO - Processing chunks from 42 to 42 | |
[llama_gguf_optmize v0.6.0] 10:05:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:05:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:05:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:05:32 - DEBUG - Inference time: 5512.93 ms | |
[llama_gguf_optmize v0.6.0] 10:05:32 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:05:32 - DEBUG - Reusing freed chunk 0 for chunk 42. | |
[llama_gguf_optmize v0.6.0] 10:05:32 - DEBUG - Written chunk 42 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[42] 5960.89 ms | |
[llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Generating logits for model, chunk 42 | |
[llama_gguf_optmize v0.6.0] 10:05:33 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:05:33 - INFO - Processing chunks from 42 to 42 | |
[llama_gguf_optmize v0.6.0] 10:05:33 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:05:33 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:05:33 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:05:36 - DEBUG - Inference time: 2362.95 ms | |
[llama_gguf_optmize v0.6.0] 10:05:36 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:05:36 - DEBUG - Reusing freed chunk 0 for chunk 42. | |
[llama_gguf_optmize v0.6.0] 10:05:36 - DEBUG - Written chunk 42 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[42] 2823.82 ms | |
[llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Comparing logits for chunk 42 | |
[llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:05:36 - INFO - Processing chunks 42 to 42... | |
[llama_gguf_optmize v0.6.0] 10:05:37 - DEBUG - Processing chunk 0, part 0 | |
[llama_gguf_optmize v0.6.0] 10:05:42 - DEBUG - Processing chunk 0, part 1 | |
[llama_gguf_optmize v0.6.0] 10:05:47 - DEBUG - Processing chunk 0, part 2 | |
[llama_gguf_optmize v0.6.0] 10:05:52 - DEBUG - Processing chunk 0, part 3 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - | |
===== KL-divergence statistics for Chunk 42 ===== | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Average : 0.029643 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - StdDev : 0.079060 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Median : 0.014136 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Maximum : 1.827285 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_99 : 0.274111 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_95 : 0.095251 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_90 : 0.059316 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_10 : 0.000440 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_05 : 0.000120 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - KLD_01 : 0.000009 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Samples seen: 171904 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6324) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.106303, p-value=0.486853 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2022661162713936, EMA_p_value_std_dev: 0.031540786785459955 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1279, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1279, beta=63), stopping probability=0.711343 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8205) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.170212, p-value=0.013400 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20514435243066695, EMA_p_value_std_dev: 0.033381935292090305 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1280, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1280, beta=63), stopping probability=0.713453 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7277) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.204109, p-value=0.000716 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2067893596234715, EMA_p_value_std_dev: 0.035203890835373876 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1281, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1281, beta=63), stopping probability=0.715555 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8972) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.227733, p-value=0.000063 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2083588741481209, EMA_p_value_std_dev: 0.03700942044258173 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1282, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1282, beta=63), stopping probability=0.717648 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1634) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.157988, p-value=0.032630 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20906427744963219, EMA_p_value_std_dev: 0.03876396103908638 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1283, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1283, beta=63), stopping probability=0.719733 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7852) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.180839, p-value=0.005750 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20879074598619898, EMA_p_value_std_dev: 0.03851161673364751 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1284, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1284, beta=63), stopping probability=0.721809 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6603) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.144616, p-value=0.077771 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20852429329098093, EMA_p_value_std_dev: 0.038458749710268324 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1285, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1285, beta=63), stopping probability=0.723877 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8212) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.197077, p-value=0.001389 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20897178377098766, EMA_p_value_std_dev: 0.03840527646989945 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1286, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1286, beta=63), stopping probability=0.725935 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.3966) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.256516, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20939739090564058, EMA_p_value_std_dev: 0.03835244440963511 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1287, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1287, beta=63), stopping probability=0.727986 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5015) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.283271, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.20976858774447682, EMA_p_value_std_dev: 0.038309661490528724 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1288, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1288, beta=63), stopping probability=0.730027 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5258) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.176194, p-value=0.008391 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21076322976549547, EMA_p_value_std_dev: 0.03826533161349413 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1289, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1289, beta=63), stopping probability=0.732060 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0783) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.252884, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21187622060147568, EMA_p_value_std_dev: 0.03792000969260212 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1290, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1290, beta=63), stopping probability=0.734085 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5038) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.288531, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21237671237249917, EMA_p_value_std_dev: 0.037579184607895603 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1291, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1291, beta=63), stopping probability=0.736100 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7971) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.227059, p-value=0.000068 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21318768678023203, EMA_p_value_std_dev: 0.037241687037344186 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1292, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1292, beta=63), stopping probability=0.738107 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5681) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.155539, p-value=0.038576 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21375465781865935, EMA_p_value_std_dev: 0.03703681623604003 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1293, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1293, beta=63), stopping probability=0.740104 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8355) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.177381, p-value=0.007628 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21351422517532448, EMA_p_value_std_dev: 0.03683413965248452 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1294, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1294, beta=63), stopping probability=0.742093 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3387) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.207645, p-value=0.000508 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21338721230918015, EMA_p_value_std_dev: 0.03663280219966807 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1295, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1295, beta=63), stopping probability=0.744073 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9423) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.234205, p-value=0.000031 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21297401564527696, EMA_p_value_std_dev: 0.036433430165139065 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1296, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1296, beta=63), stopping probability=0.746044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7971) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.310959, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21258078074103862, EMA_p_value_std_dev: 0.03623614165383823 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1297, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1297, beta=63), stopping probability=0.748006 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6857) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.257589, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2123082754202434, EMA_p_value_std_dev: 0.03590817917831324 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1298, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1298, beta=63), stopping probability=0.749959 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7492) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.228256, p-value=0.000060 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21186113209991453, EMA_p_value_std_dev: 0.035552172031377685 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1299, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1299, beta=63), stopping probability=0.751903 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9638) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.221177, p-value=0.000128 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21126285849922438, EMA_p_value_std_dev: 0.03519806562519139 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1300, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1300, beta=63), stopping probability=0.753838 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6178) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.147061, p-value=0.066908 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21067972734522966, EMA_p_value_std_dev: 0.03514522595543294 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1301, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1301, beta=63), stopping probability=0.755764 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8364) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.198149, p-value=0.001258 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21072968910772774, EMA_p_value_std_dev: 0.03509155515307647 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1302, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1302, beta=63), stopping probability=0.757681 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0706) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.224865, p-value=0.000086 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2108575870277415, EMA_p_value_std_dev: 0.035038323427388565 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1303, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1303, beta=63), stopping probability=0.759588 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5945) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.258750, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21117272364032938, EMA_p_value_std_dev: 0.03498568897664276 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1304, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1304, beta=63), stopping probability=0.761487 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9182) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.285970, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2106980960383623, EMA_p_value_std_dev: 0.03493372504686675 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1305, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1305, beta=63), stopping probability=0.763376 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8334) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.176634, p-value=0.008100 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21027165753169746, EMA_p_value_std_dev: 0.03462030151143729 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1306, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1306, beta=63), stopping probability=0.765256 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7833) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.298575, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.2111591860694384, EMA_p_value_std_dev: 0.03431100323076525 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1307, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1307, beta=63), stopping probability=0.767127 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6886) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.294515, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21175362041065265, EMA_p_value_std_dev: 0.0340048843668094 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1308, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1308, beta=63), stopping probability=0.768989 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8047) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.293896, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21198490043787058, EMA_p_value_std_dev: 0.033701820828814004 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1309, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1309, beta=63), stopping probability=0.770841 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5418) | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Kuiper statistic=0.313232, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated EMA_relative_change: 0.21102881737906223, EMA_p_value_std_dev: 0.033401780395795534 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Updated Beta parameters: alpha=1310, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Chunk 42: Beta parameters updated (alpha=1310, beta=63), stopping probability=0.772684 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Added chunk 41 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Added chunk 41 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Processing chunk 43 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Generating logits for model, chunk 43 | |
[llama_gguf_optmize v0.6.0] 10:05:56 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:05:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:05:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:05:57 - INFO - Processing chunks from 43 to 43 | |
[llama_gguf_optmize v0.6.0] 10:05:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:05:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:05:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:06:02 - DEBUG - Inference time: 5552.30 ms | |
[llama_gguf_optmize v0.6.0] 10:06:02 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:06:02 - DEBUG - Reusing freed chunk 1 for chunk 43. | |
[llama_gguf_optmize v0.6.0] 10:06:02 - DEBUG - Written chunk 43 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[43] 5993.54 ms | |
[llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Generating logits for model, chunk 43 | |
[llama_gguf_optmize v0.6.0] 10:06:03 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:06:03 - INFO - Processing chunks from 43 to 43 | |
[llama_gguf_optmize v0.6.0] 10:06:03 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:06:03 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:06:03 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:06:06 - DEBUG - Inference time: 2494.15 ms | |
[llama_gguf_optmize v0.6.0] 10:06:06 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:06:06 - DEBUG - Reusing freed chunk 1 for chunk 43. | |
[llama_gguf_optmize v0.6.0] 10:06:06 - DEBUG - Written chunk 43 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:06:06 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[43] 3258.35 ms | |
[llama_gguf_optmize v0.6.0] 10:06:06 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:06:06 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:06:07 - INFO - Comparing logits for chunk 43 | |
[llama_gguf_optmize v0.6.0] 10:06:07 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:06:07 - INFO - Processing chunks 43 to 43... | |
[llama_gguf_optmize v0.6.0] 10:06:08 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 10:06:12 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 10:06:17 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 10:06:22 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - | |
===== KL-divergence statistics for Chunk 43 ===== | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Average : 0.015078 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - StdDev : 0.027749 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Median : 0.009472 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Maximum : 1.034442 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_99 : 0.106170 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_95 : 0.050940 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_90 : 0.033877 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_10 : 0.000124 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_05 : 0.000032 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - KLD_01 : 0.000003 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Samples seen: 176000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7378) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.220932, p-value=0.000131 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20959013124716863, EMA_p_value_std_dev: 0.033069182474144095 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1311, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1311, beta=63), stopping probability=0.774518 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7845) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.253908, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20867367428639014, EMA_p_value_std_dev: 0.03273989905231318 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1312, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1312, beta=63), stopping probability=0.776342 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7525) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.240595, p-value=0.000015 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20782895815073554, EMA_p_value_std_dev: 0.032413886909146254 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1313, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1313, beta=63), stopping probability=0.778158 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7044) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.220552, p-value=0.000136 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20705824768842862, EMA_p_value_std_dev: 0.03209125953634867 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1314, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1314, beta=63), stopping probability=0.779964 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8737) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.305598, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20640832591656091, EMA_p_value_std_dev: 0.031771850368284796 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1315, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1315, beta=63), stopping probability=0.781760 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4061) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.305228, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20533867977009607, EMA_p_value_std_dev: 0.031455518327506204 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1316, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1316, beta=63), stopping probability=0.783547 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7415) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.283495, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.204349981705377, EMA_p_value_std_dev: 0.031142345237818485 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1317, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1317, beta=63), stopping probability=0.785325 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9904) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.287520, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20311241563141066, EMA_p_value_std_dev: 0.0308323090447727 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1318, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1318, beta=63), stopping probability=0.787094 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8866) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.214954, p-value=0.000244 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20172149950498752, EMA_p_value_std_dev: 0.030525845707633843 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1319, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1319, beta=63), stopping probability=0.788853 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9740) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.247427, p-value=0.000007 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.200873010894941, EMA_p_value_std_dev: 0.030222432399040582 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1320, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1320, beta=63), stopping probability=0.790602 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9305) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.303661, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20061728908476187, EMA_p_value_std_dev: 0.02992204563742835 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1321, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1321, beta=63), stopping probability=0.792342 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9432) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.255466, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20056297901002276, EMA_p_value_std_dev: 0.029624652533143975 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1322, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1322, beta=63), stopping probability=0.794073 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9104) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.288039, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.2001633658715458, EMA_p_value_std_dev: 0.02933022592788985 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1323, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1323, beta=63), stopping probability=0.795795 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7925) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.233278, p-value=0.000034 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19991393617704076, EMA_p_value_std_dev: 0.029037801351536562 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1324, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1324, beta=63), stopping probability=0.797507 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9864) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.271577, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19952365671328334, EMA_p_value_std_dev: 0.028748298547526255 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1325, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1325, beta=63), stopping probability=0.799209 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8532) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.225459, p-value=0.000081 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19917882935527537, EMA_p_value_std_dev: 0.02846188432775515 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1326, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1326, beta=63), stopping probability=0.800902 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7116) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.217728, p-value=0.000183 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.1987215007511755, EMA_p_value_std_dev: 0.028178741461945326 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1327, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1327, beta=63), stopping probability=0.802586 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8519) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.284951, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19841627537068623, EMA_p_value_std_dev: 0.027898422912713182 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1328, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1328, beta=63), stopping probability=0.804260 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6267) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.238640, p-value=0.000018 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19810790130006306, EMA_p_value_std_dev: 0.02762091654981684 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1329, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1329, beta=63), stopping probability=0.805925 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7317) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.194372, p-value=0.001779 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19786133715689705, EMA_p_value_std_dev: 0.027353050682146895 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1330, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1330, beta=63), stopping probability=0.807580 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7022) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.173259, p-value=0.010585 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19770967992945712, EMA_p_value_std_dev: 0.027125824803657956 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1331, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1331, beta=63), stopping probability=0.809226 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7193) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.239103, p-value=0.000018 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19764819288116084, EMA_p_value_std_dev: 0.02690108171355477 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1332, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1332, beta=63), stopping probability=0.810862 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7571) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.244160, p-value=0.000010 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19727673397212636, EMA_p_value_std_dev: 0.02667856748112819 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1333, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1333, beta=63), stopping probability=0.812489 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7307) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.232133, p-value=0.000039 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19639891691990594, EMA_p_value_std_dev: 0.02645824580951608 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1334, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1334, beta=63), stopping probability=0.814106 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8236) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.304983, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19629811676192363, EMA_p_value_std_dev: 0.026241472652143286 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1335, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1335, beta=63), stopping probability=0.815714 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7241) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.194075, p-value=0.001827 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19617795997719187, EMA_p_value_std_dev: 0.025987792349102718 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1336, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1336, beta=63), stopping probability=0.817313 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1344) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.310488, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.1975647276023671, EMA_p_value_std_dev: 0.02573666234576847 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1337, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1337, beta=63), stopping probability=0.818902 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8733) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.328994, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.19895052765996826, EMA_p_value_std_dev: 0.025488048346332105 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1338, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1338, beta=63), stopping probability=0.820482 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6425) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.171687, p-value=0.011964 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20093767057416795, EMA_p_value_std_dev: 0.025285740032353222 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1339, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1339, beta=63), stopping probability=0.822052 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6041) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.239548, p-value=0.000017 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20302332517792826, EMA_p_value_std_dev: 0.025085427808888765 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1340, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1340, beta=63), stopping probability=0.823612 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.5793) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.306774, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20419739519553295, EMA_p_value_std_dev: 0.024888551683711138 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1341, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1341, beta=63), stopping probability=0.825164 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7529) | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Kuiper statistic=0.238555, p-value=0.000019 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated EMA_relative_change: 0.20556220319036517, EMA_p_value_std_dev: 0.02469361860075733 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - DEBUG - Updated Beta parameters: alpha=1342, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Chunk 43: Beta parameters updated (alpha=1342, beta=63), stopping probability=0.826706 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Added chunk 42 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Added chunk 42 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Processing chunk 44 | |
[llama_gguf_optmize v0.6.0] 10:06:26 - INFO - Generating logits for model, chunk 44 | |
[llama_gguf_optmize v0.6.0] 10:06:27 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:06:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:06:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:06:27 - INFO - Processing chunks from 44 to 44 | |
[llama_gguf_optmize v0.6.0] 10:06:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:06:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:06:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:06:32 - DEBUG - Inference time: 5532.49 ms | |
[llama_gguf_optmize v0.6.0] 10:06:32 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:06:32 - DEBUG - Reusing freed chunk 0 for chunk 44. | |
[llama_gguf_optmize v0.6.0] 10:06:32 - DEBUG - Written chunk 44 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[44] 5998.05 ms | |
[llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Generating logits for model, chunk 44 | |
[llama_gguf_optmize v0.6.0] 10:06:33 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:06:33 - INFO - Processing chunks from 44 to 44 | |
[llama_gguf_optmize v0.6.0] 10:06:33 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:06:33 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:06:33 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:06:36 - DEBUG - Inference time: 2377.85 ms | |
[llama_gguf_optmize v0.6.0] 10:06:36 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:06:36 - DEBUG - Reusing freed chunk 0 for chunk 44. | |
[llama_gguf_optmize v0.6.0] 10:06:36 - DEBUG - Written chunk 44 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:06:36 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[44] 2806.82 ms | |
[llama_gguf_optmize v0.6.0] 10:06:36 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:06:36 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:06:36 - INFO - Comparing logits for chunk 44 | |
[llama_gguf_optmize v0.6.0] 10:06:37 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:06:37 - INFO - Processing chunks 44 to 44... | |
[llama_gguf_optmize v0.6.0] 10:06:37 - DEBUG - Processing chunk 0, part 0 | |
[llama_gguf_optmize v0.6.0] 10:06:42 - DEBUG - Processing chunk 0, part 1 | |
[llama_gguf_optmize v0.6.0] 10:06:47 - DEBUG - Processing chunk 0, part 2 | |
[llama_gguf_optmize v0.6.0] 10:06:52 - DEBUG - Processing chunk 0, part 3 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - | |
===== KL-divergence statistics for Chunk 44 ===== | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Average : 0.013907 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - StdDev : 0.024257 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Median : 0.008231 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Maximum : 0.448273 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_99 : 0.102345 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_95 : 0.045147 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_90 : 0.030872 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_10 : 0.000125 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_05 : 0.000032 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - KLD_01 : 0.000003 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Samples seen: 180096 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7744) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.255707, p-value=0.000002 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20597591222366296, EMA_p_value_std_dev: 0.024500627318318688 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1343, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1343, beta=63), stopping probability=0.828238 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0510) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.201941, p-value=0.000881 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20597946728969163, EMA_p_value_std_dev: 0.02426012362917181 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1344, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1344, beta=63), stopping probability=0.829761 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9825) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.213709, p-value=0.000277 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20533023197238517, EMA_p_value_std_dev: 0.024021912668185895 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1345, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1345, beta=63), stopping probability=0.831275 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6188) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.296154, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20489070560597794, EMA_p_value_std_dev: 0.023786077849553527 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1346, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1346, beta=63), stopping probability=0.832779 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6336) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.261948, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.2046187925727715, EMA_p_value_std_dev: 0.023552621335059594 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1347, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1347, beta=63), stopping probability=0.834274 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7234) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.294161, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20402280542315226, EMA_p_value_std_dev: 0.023321497108942615 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1348, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1348, beta=63), stopping probability=0.835759 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7507) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.272002, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.2034539725860579, EMA_p_value_std_dev: 0.023090100105393377 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1349, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1349, beta=63), stopping probability=0.837235 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8618) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.281568, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20233562383533124, EMA_p_value_std_dev: 0.022859780872372764 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1350, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1350, beta=63), stopping probability=0.838702 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8193) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.247235, p-value=0.000007 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20125252274676206, EMA_p_value_std_dev: 0.022631783182355033 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1351, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1351, beta=63), stopping probability=0.840159 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6188) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.271131, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20008428072878942, EMA_p_value_std_dev: 0.022406060325265247 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1352, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1352, beta=63), stopping probability=0.841607 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9571) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.363115, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19937247478089482, EMA_p_value_std_dev: 0.02218258907000282 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1353, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1353, beta=63), stopping probability=0.843046 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7011) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.211785, p-value=0.000337 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19974889328429676, EMA_p_value_std_dev: 0.021962812056532978 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1354, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1354, beta=63), stopping probability=0.844476 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8277) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.260511, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20052639685675192, EMA_p_value_std_dev: 0.02174522593706971 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1355, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1355, beta=63), stopping probability=0.845896 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8212) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.327725, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20156667116525742, EMA_p_value_std_dev: 0.021529817486466964 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1356, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1356, beta=63), stopping probability=0.847307 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8072) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.225249, p-value=0.000083 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.2024970827259509, EMA_p_value_std_dev: 0.021316510005908847 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1357, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1357, beta=63), stopping probability=0.848708 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9979) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.305461, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20334277830554, EMA_p_value_std_dev: 0.021105330263991987 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1358, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1358, beta=63), stopping probability=0.850101 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9985) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.262670, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20406811714214917, EMA_p_value_std_dev: 0.020895170779034074 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1359, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1359, beta=63), stopping probability=0.851484 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7810) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.304222, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20441252167095278, EMA_p_value_std_dev: 0.020687109054360952 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1360, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1360, beta=63), stopping probability=0.852858 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9271) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.306409, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20394202282806217, EMA_p_value_std_dev: 0.020481122743107892 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1361, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1361, beta=63), stopping probability=0.854222 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9112) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.288849, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20285238991486046, EMA_p_value_std_dev: 0.020276827884545257 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1362, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1362, beta=63), stopping probability=0.855578 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9720) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.344417, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.2018839210232717, EMA_p_value_std_dev: 0.02007457086998419 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1363, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1363, beta=63), stopping probability=0.856924 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7924) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.353036, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.20042657737185116, EMA_p_value_std_dev: 0.0198743271454752 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1364, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1364, beta=63), stopping probability=0.858262 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1501) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.262497, p-value=0.000001 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19971218038667207, EMA_p_value_std_dev: 0.01967608517792828 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1365, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1365, beta=63), stopping probability=0.859590 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7023) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.221785, p-value=0.000120 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19937099124299124, EMA_p_value_std_dev: 0.019480349408137212 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1366, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1366, beta=63), stopping probability=0.860909 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7291) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.233151, p-value=0.000035 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19850484273286464, EMA_p_value_std_dev: 0.019286549209559824 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1367, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1367, beta=63), stopping probability=0.862219 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8681) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.239855, p-value=0.000016 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19765197360158893, EMA_p_value_std_dev: 0.01909466273115571 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1368, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1368, beta=63), stopping probability=0.863520 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7917) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.290688, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19675223343971926, EMA_p_value_std_dev: 0.018904690283498497 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1369, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1369, beta=63), stopping probability=0.864812 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.7665) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.251732, p-value=0.000004 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19578254806638493, EMA_p_value_std_dev: 0.018716608154110723 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1370, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1370, beta=63), stopping probability=0.866095 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0187) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.253775, p-value=0.000003 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.19471266800728596, EMA_p_value_std_dev: 0.01853005234882434 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1371, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1371, beta=63), stopping probability=0.867368 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9477) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.290850, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.1940927492279791, EMA_p_value_std_dev: 0.01834528129500981 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1372, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1372, beta=63), stopping probability=0.868633 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4982) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.454601, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.1939156600055606, EMA_p_value_std_dev: 0.018162306144762046 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1373, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1373, beta=63), stopping probability=0.869889 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9604) | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Kuiper statistic=0.425724, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated EMA_relative_change: 0.1934764332014863, EMA_p_value_std_dev: 0.017981156210264738 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Updated Beta parameters: alpha=1374, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Chunk 44: Beta parameters updated (alpha=1374, beta=63), stopping probability=0.871136 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Added chunk 43 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Added chunk 43 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Processing chunk 45 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Generating logits for model, chunk 45 | |
[llama_gguf_optmize v0.6.0] 10:06:56 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:06:56 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:06:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:06:57 - INFO - Processing chunks from 45 to 45 | |
[llama_gguf_optmize v0.6.0] 10:06:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:06:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:06:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:07:02 - DEBUG - Inference time: 5511.72 ms | |
[llama_gguf_optmize v0.6.0] 10:07:02 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:07:02 - DEBUG - Reusing freed chunk 1 for chunk 45. | |
[llama_gguf_optmize v0.6.0] 10:07:02 - DEBUG - Written chunk 45 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[45] 5955.09 ms | |
[llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Generating logits for model, chunk 45 | |
[llama_gguf_optmize v0.6.0] 10:07:03 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:07:03 - INFO - Processing chunks from 45 to 45 | |
[llama_gguf_optmize v0.6.0] 10:07:03 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:07:03 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:07:03 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:07:06 - DEBUG - Inference time: 2553.92 ms | |
[llama_gguf_optmize v0.6.0] 10:07:06 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:07:06 - DEBUG - Reusing freed chunk 1 for chunk 45. | |
[llama_gguf_optmize v0.6.0] 10:07:06 - DEBUG - Written chunk 45 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[45] 3010.06 ms | |
[llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Comparing logits for chunk 45 | |
[llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:07:06 - INFO - Processing chunks 45 to 45... | |
[llama_gguf_optmize v0.6.0] 10:07:07 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 10:07:12 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 10:07:17 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 10:07:22 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - | |
===== KL-divergence statistics for Chunk 45 ===== | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Average : 0.010650 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - StdDev : 0.035294 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Median : 0.002136 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Maximum : 1.557601 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_99 : 0.109571 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_95 : 0.044345 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_90 : 0.026093 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_10 : 0.000027 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_05 : 0.000010 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - KLD_01 : 0.000001 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Samples seen: 184192 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8674) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.327367, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19373761988344293, EMA_p_value_std_dev: 0.01780180762415367 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1375, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1375, beta=63), stopping probability=0.872375 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2809) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.488927, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19529684681703785, EMA_p_value_std_dev: 0.017624234686671084 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1376, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1376, beta=63), stopping probability=0.873604 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0857) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.515545, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19513301651137888, EMA_p_value_std_dev: 0.017448432946010378 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1377, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1377, beta=63), stopping probability=0.874824 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4604) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.467076, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1950849800113235, EMA_p_value_std_dev: 0.017274384827712763 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1378, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1378, beta=63), stopping probability=0.876036 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3222) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.489094, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1943672228614953, EMA_p_value_std_dev: 0.017102072839395167 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1379, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1379, beta=63), stopping probability=0.877239 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1491) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.453447, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19308063836262665, EMA_p_value_std_dev: 0.016931479662822203 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1380, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1380, beta=63), stopping probability=0.878433 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0880) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.578688, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19218163334778893, EMA_p_value_std_dev: 0.01676258815318555 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1381, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1381, beta=63), stopping probability=0.879618 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5636) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.386481, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1920432222529456, EMA_p_value_std_dev: 0.016595381336357528 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1382, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1382, beta=63), stopping probability=0.880795 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0558) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.443406, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1922167730605071, EMA_p_value_std_dev: 0.01642984240752737 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1383, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1383, beta=63), stopping probability=0.881963 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5466) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.450869, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19231887979298729, EMA_p_value_std_dev: 0.01626595472951229 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1384, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1384, beta=63), stopping probability=0.883122 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2267) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.504228, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19182901826773702, EMA_p_value_std_dev: 0.016103701831085414 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1385, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1385, beta=63), stopping probability=0.884273 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4188) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.539858, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.19082725529634034, EMA_p_value_std_dev: 0.01594306740532034 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1386, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1386, beta=63), stopping probability=0.885415 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1190) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.395500, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18998487102455622, EMA_p_value_std_dev: 0.01578403530795227 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1387, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1387, beta=63), stopping probability=0.886548 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1553) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.441480, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18938853525132646, EMA_p_value_std_dev: 0.015626589555755445 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1388, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1388, beta=63), stopping probability=0.887673 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1460) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.452177, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18856187859899315, EMA_p_value_std_dev: 0.015470714324936784 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1389, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1389, beta=63), stopping probability=0.888790 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2774) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.446688, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18755459934014806, EMA_p_value_std_dev: 0.01531639394954554 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1390, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1390, beta=63), stopping probability=0.889898 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0531) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.402983, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1863004462387506, EMA_p_value_std_dev: 0.015163612919898824 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1391, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1391, beta=63), stopping probability=0.890997 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3102) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.494214, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1851957797405128, EMA_p_value_std_dev: 0.015012355881022833 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1392, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1392, beta=63), stopping probability=0.892088 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2002) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.476660, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18414439509437322, EMA_p_value_std_dev: 0.01486260763110963 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1393, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1393, beta=63), stopping probability=0.893171 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.01 (KL div: 2.0707) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.465702, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18311699470691442, EMA_p_value_std_dev: 0.014714353119989311 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1394, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1394, beta=63), stopping probability=0.894245 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.01 to 0.009999999999999998 (KL div: 1.3616) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.470953, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18180504159279975, EMA_p_value_std_dev: 0.014567577447617419 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1395, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1395, beta=63), stopping probability=0.895311 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4147) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.414894, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.18037265036275865, EMA_p_value_std_dev: 0.014422265862577435 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1396, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1396, beta=63), stopping probability=0.896369 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.7744) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.541431, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17949847589255696, EMA_p_value_std_dev: 0.014278403760598225 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1397, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1397, beta=63), stopping probability=0.897418 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3666) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.571251, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17875314823667607, EMA_p_value_std_dev: 0.01413597668308626 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1398, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1398, beta=63), stopping probability=0.898460 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3465) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.487830, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17841024947310938, EMA_p_value_std_dev: 0.013994970315672473 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1399, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1399, beta=63), stopping probability=0.899493 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.4957) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.486265, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1777668274245904, EMA_p_value_std_dev: 0.013855370486773641 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1400, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1400, beta=63), stopping probability=0.900518 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5842) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.469226, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17651659534918693, EMA_p_value_std_dev: 0.013717163166168073 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1401, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1401, beta=63), stopping probability=0.901534 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5361) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.486709, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1752121287154263, EMA_p_value_std_dev: 0.013580334463585548 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1402, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1402, beta=63), stopping probability=0.902543 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9682) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.250655, p-value=0.000004 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.1743422633959022, EMA_p_value_std_dev: 0.013444890400501276 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1403, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1403, beta=63), stopping probability=0.903544 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9978) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.401300, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17534948176484363, EMA_p_value_std_dev: 0.01331079739194627 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1404, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1404, beta=63), stopping probability=0.904536 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9800) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.349745, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17669089032606594, EMA_p_value_std_dev: 0.013178041961149703 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1405, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1405, beta=63), stopping probability=0.905521 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0637) | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Kuiper statistic=0.432890, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated EMA_relative_change: 0.17854140851653372, EMA_p_value_std_dev: 0.013046610765775332 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Updated Beta parameters: alpha=1406, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Chunk 45: Beta parameters updated (alpha=1406, beta=63), stopping probability=0.906497 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Added chunk 44 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Added chunk 44 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Processing chunk 46 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Generating logits for model, chunk 46 | |
[llama_gguf_optmize v0.6.0] 10:07:26 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:07:26 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:07:27 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:07:27 - INFO - Processing chunks from 46 to 46 | |
[llama_gguf_optmize v0.6.0] 10:07:27 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:07:27 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:07:27 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:07:32 - DEBUG - Inference time: 5539.19 ms | |
[llama_gguf_optmize v0.6.0] 10:07:32 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:07:32 - DEBUG - Reusing freed chunk 0 for chunk 46. | |
[llama_gguf_optmize v0.6.0] 10:07:32 - DEBUG - Written chunk 46 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[46] 5981.72 ms | |
[llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Generating logits for model, chunk 46 | |
[llama_gguf_optmize v0.6.0] 10:07:33 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:07:33 - INFO - Processing chunks from 46 to 46 | |
[llama_gguf_optmize v0.6.0] 10:07:33 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:07:33 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:07:33 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:07:36 - DEBUG - Inference time: 2526.32 ms | |
[llama_gguf_optmize v0.6.0] 10:07:36 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:07:36 - DEBUG - Reusing freed chunk 0 for chunk 46. | |
[llama_gguf_optmize v0.6.0] 10:07:36 - DEBUG - Written chunk 46 at physical slot 0 | |
[llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[46] 2969.42 ms | |
[llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Comparing logits for chunk 46 | |
[llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:07:36 - INFO - Processing chunks 46 to 46... | |
[llama_gguf_optmize v0.6.0] 10:07:37 - DEBUG - Processing chunk 0, part 0 | |
[llama_gguf_optmize v0.6.0] 10:07:42 - DEBUG - Processing chunk 0, part 1 | |
[llama_gguf_optmize v0.6.0] 10:07:47 - DEBUG - Processing chunk 0, part 2 | |
[llama_gguf_optmize v0.6.0] 10:07:52 - DEBUG - Processing chunk 0, part 3 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - | |
===== KL-divergence statistics for Chunk 46 ===== | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Average : 0.013298 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - StdDev : 0.045913 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Median : 0.003842 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Maximum : 2.100630 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_99 : 0.126697 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_95 : 0.051914 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_90 : 0.030688 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_10 : 0.000059 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_05 : 0.000026 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - KLD_01 : 0.000006 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Samples seen: 188288 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.6374) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.248124, p-value=0.000006 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.18015566282993478, EMA_p_value_std_dev: 0.012916499950107135 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1407, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1407, beta=63), stopping probability=0.907466 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3237) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.443958, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1819879599216119, EMA_p_value_std_dev: 0.01278768475687118 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1408, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1408, beta=63), stopping probability=0.908427 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2777) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.430895, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1836787888473592, EMA_p_value_std_dev: 0.012660154495187754 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1409, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1409, beta=63), stopping probability=0.909380 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0592) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.461061, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.18472483726270453, EMA_p_value_std_dev: 0.012533896347866514 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1410, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1410, beta=63), stopping probability=0.910325 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5845) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.623516, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1854816462235328, EMA_p_value_std_dev: 0.012408897625564807 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1411, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1411, beta=63), stopping probability=0.911263 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1124) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.276803, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1859408645170756, EMA_p_value_std_dev: 0.012285119543779708 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1412, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1412, beta=63), stopping probability=0.912192 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2944) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.369194, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1874334710239158, EMA_p_value_std_dev: 0.012162576148360358 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1413, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1413, beta=63), stopping probability=0.913114 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0662) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.190255, p-value=0.002570 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.18980416857719462, EMA_p_value_std_dev: 0.012052720208882914 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1414, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1414, beta=63), stopping probability=0.914029 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.6823) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.315798, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.1930429796757973, EMA_p_value_std_dev: 0.011943960081858213 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1415, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1415, beta=63), stopping probability=0.914935 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.7128) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.285565, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.19512124753785398, EMA_p_value_std_dev: 0.01183628478733458 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1416, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1416, beta=63), stopping probability=0.915834 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2742) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.445845, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.19803045677756245, EMA_p_value_std_dev: 0.01172968372186652 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1417, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1417, beta=63), stopping probability=0.916726 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0358) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.352422, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.19988061648213415, EMA_p_value_std_dev: 0.011624146002025377 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1418, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1418, beta=63), stopping probability=0.917610 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9999) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.507044, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20113096520459944, EMA_p_value_std_dev: 0.011508195344177498 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1419, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1419, beta=63), stopping probability=0.918486 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2357) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.563951, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20238936274699312, EMA_p_value_std_dev: 0.01139340129467401 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1420, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1420, beta=63), stopping probability=0.919355 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9587) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.466152, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20248951086713773, EMA_p_value_std_dev: 0.011279752116764368 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1421, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1421, beta=63), stopping probability=0.920217 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2355) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.378620, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20250717075894314, EMA_p_value_std_dev: 0.011167236589404363 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1422, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1422, beta=63), stopping probability=0.921071 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8901) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.465080, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20226298536479984, EMA_p_value_std_dev: 0.011055843404425092 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1423, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1423, beta=63), stopping probability=0.921918 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8713) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.441437, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20194486993278105, EMA_p_value_std_dev: 0.010945561366465989 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1424, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1424, beta=63), stopping probability=0.922758 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9222) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.350723, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2017500257387452, EMA_p_value_std_dev: 0.01083637939184187 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1425, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1425, beta=63), stopping probability=0.923591 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0780) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.386131, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2012321042933323, EMA_p_value_std_dev: 0.010728286507414623 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1426, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1426, beta=63), stopping probability=0.924416 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9812) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.429754, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2003445419812342, EMA_p_value_std_dev: 0.010621271849509546 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1427, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1427, beta=63), stopping probability=0.925234 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8973) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.292766, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20006041436519076, EMA_p_value_std_dev: 0.010515324733798317 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1428, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1428, beta=63), stopping probability=0.926045 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9508) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.465415, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2005401406305359, EMA_p_value_std_dev: 0.010410434440566307 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1429, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1429, beta=63), stopping probability=0.926848 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3206) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.361451, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20144335012129755, EMA_p_value_std_dev: 0.010306590428010649 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1430, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1430, beta=63), stopping probability=0.927645 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9614) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.349292, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20223887414048455, EMA_p_value_std_dev: 0.010203782259478184 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1431, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1431, beta=63), stopping probability=0.928435 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2997) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.450469, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2029175274392632, EMA_p_value_std_dev: 0.010101999602426828 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1432, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1432, beta=63), stopping probability=0.929218 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1748) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.337447, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20307110405857418, EMA_p_value_std_dev: 0.010001232156454093 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1433, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1433, beta=63), stopping probability=0.929994 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1143) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.512215, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20375518225947398, EMA_p_value_std_dev: 0.009901469865754936 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1434, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1434, beta=63), stopping probability=0.930762 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0349) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.399761, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.2046636582354639, EMA_p_value_std_dev: 0.009802702703905767 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1435, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1435, beta=63), stopping probability=0.931524 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8302) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.364375, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20513761083995613, EMA_p_value_std_dev: 0.009704920744497449 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1436, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1436, beta=63), stopping probability=0.932280 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0539) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.449156, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20553861540282709, EMA_p_value_std_dev: 0.009608114160134228 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1437, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1437, beta=63), stopping probability=0.933028 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2783) | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Kuiper statistic=0.431671, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated EMA_relative_change: 0.20497948643843966, EMA_p_value_std_dev: 0.009512273221387436 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Updated Beta parameters: alpha=1438, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Chunk 46: Beta parameters updated (alpha=1438, beta=63), stopping probability=0.933770 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Added chunk 45 to freed chunks list in baseline_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Added chunk 45 to freed chunks list in target_logits.h5. | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Processing chunk 47 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - INFO - Generating logits for model, chunk 47 | |
[llama_gguf_optmize v0.6.0] 10:07:56 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:07:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:07:57 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:07:57 - INFO - Processing chunks from 47 to 47 | |
[llama_gguf_optmize v0.6.0] 10:07:57 - DEBUG - Resuming with existing HDF5 file: baseline_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:07:57 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:07:57 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:08:02 - DEBUG - Inference time: 5522.79 ms | |
[llama_gguf_optmize v0.6.0] 10:08:02 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:08:02 - DEBUG - Reusing freed chunk 1 for chunk 47. | |
[llama_gguf_optmize v0.6.0] 10:08:02 - DEBUG - Written chunk 47 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[47] 5973.30 ms | |
[llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Generating logits for model, chunk 47 | |
[llama_gguf_optmize v0.6.0] 10:08:03 - DEBUG - Number of logits: 151936. | |
[llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Loaded precomputed tokens from /Users/Shared/Public/huggingface/Sailor2-1B-Chat/imatrix/combined/calibration-dataset.txt.tokens.npy | |
[llama_gguf_optmize v0.6.0] 10:08:03 - INFO - Processing chunks from 47 to 47 | |
[llama_gguf_optmize v0.6.0] 10:08:03 - DEBUG - Resuming with existing HDF5 file: target_logits.h5 | |
[llama_gguf_optmize v0.6.0] 10:08:03 - DEBUG - require_bos: add_bos_token=false (require_bos=False) | |
[llama_gguf_optmize v0.6.0] 10:08:03 - DEBUG - require_eos: add_eos_token=true (require_eos=True) | |
[llama_gguf_optmize v0.6.0] 10:08:06 - DEBUG - Inference time: 2441.65 ms | |
[llama_gguf_optmize v0.6.0] 10:08:06 - DEBUG - Logits shape (4096, 151936) dtype float32 | |
[llama_gguf_optmize v0.6.0] 10:08:06 - DEBUG - Reusing freed chunk 1 for chunk 47. | |
[llama_gguf_optmize v0.6.0] 10:08:06 - DEBUG - Written chunk 47 at physical slot 1 | |
[llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Estimated runtime: 0.00 minutes for 0 remaining chunks | |
[47] 2901.51 ms | |
[llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Processed 1 chunks | |
[llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Final file size: 4785.13 MB | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Comparing logits for chunk 47 | |
[llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Loaded prior distribution, early stopping stats, and Bayesian prior state from file. | |
[llama_gguf_optmize v0.6.0] 10:08:06 - INFO - Processing chunks 47 to 47... | |
[llama_gguf_optmize v0.6.0] 10:08:07 - DEBUG - Processing chunk 1, part 0 | |
[llama_gguf_optmize v0.6.0] 10:08:12 - DEBUG - Processing chunk 1, part 1 | |
[llama_gguf_optmize v0.6.0] 10:08:17 - DEBUG - Processing chunk 1, part 2 | |
[llama_gguf_optmize v0.6.0] 10:08:22 - DEBUG - Processing chunk 1, part 3 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - kl_values_list size [(1024,), (1024,), (1024,), (1024,)] | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - | |
===== KL-divergence statistics for Chunk 47 ===== | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Average : 0.013446 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - StdDev : 0.038014 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Median : 0.004572 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Minimum : 0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Maximum : 1.367855 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_99 : 0.132956 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_95 : 0.051674 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_90 : 0.031284 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_10 : 0.000070 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_05 : 0.000027 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_01 : 0.000005 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Samples seen: 192384 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - segmentation 0/32 (4096 + 128 - 1/ 128) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8388) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.399639, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20388181698368996, EMA_p_value_std_dev: 0.009417388296004643 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1439, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1439, beta=63), stopping probability=0.934505 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8734) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.324240, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20317003826948501, EMA_p_value_std_dev: 0.009323449848315927 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1440, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1440, beta=63), stopping probability=0.935233 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2912) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.491830, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20281882756056532, EMA_p_value_std_dev: 0.009230448436643043 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1441, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1441, beta=63), stopping probability=0.935955 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1543) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.440519, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20262776469082627, EMA_p_value_std_dev: 0.009138374714051597 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1442, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1442, beta=63), stopping probability=0.936670 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3082) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.492756, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20252943645898308, EMA_p_value_std_dev: 0.009047219426843 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1443, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1443, beta=63), stopping probability=0.937378 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2478) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.511295, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.2020333673031973, EMA_p_value_std_dev: 0.00895697341362431 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1444, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1444, beta=63), stopping probability=0.938080 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1389) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.396545, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20117852315556806, EMA_p_value_std_dev: 0.008867627603823407 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1445, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1445, beta=63), stopping probability=0.938776 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8975) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.401806, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.20005219601702548, EMA_p_value_std_dev: 0.00877917301847527 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1446, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1446, beta=63), stopping probability=0.939465 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.8883) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.448957, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19893248562726049, EMA_p_value_std_dev: 0.008691600767615978 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1447, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1447, beta=63), stopping probability=0.940148 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1712) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.447905, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1977191771819592, EMA_p_value_std_dev: 0.008604902049959008 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1448, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1448, beta=63), stopping probability=0.940824 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3553) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.528435, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19646245424095188, EMA_p_value_std_dev: 0.008519068152010667 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1449, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1449, beta=63), stopping probability=0.941495 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.3431) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.373173, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19598351144640036, EMA_p_value_std_dev: 0.008434090447194469 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1450, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1450, beta=63), stopping probability=0.942158 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1415) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.361916, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1951829581381491, EMA_p_value_std_dev: 0.008349960394984542 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1451, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1451, beta=63), stopping probability=0.942816 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2414) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.418907, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19490159783843797, EMA_p_value_std_dev: 0.008266669540045408 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1452, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1452, beta=63), stopping probability=0.943467 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.2933) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.487322, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1945613540723793, EMA_p_value_std_dev: 0.008184209511384292 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1453, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1453, beta=63), stopping probability=0.944113 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0357) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.402752, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19407491175664904, EMA_p_value_std_dev: 0.008102572021509071 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1454, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1454, beta=63), stopping probability=0.944752 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0752) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.454919, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1937169024396292, EMA_p_value_std_dev: 0.008021748865595377 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1455, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1455, beta=63), stopping probability=0.945385 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.5048) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.433627, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19315495431469856, EMA_p_value_std_dev: 0.007941731920661063 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1456, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1456, beta=63), stopping probability=0.946012 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9912) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.340474, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19268823600263346, EMA_p_value_std_dev: 0.007862513144790289 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1457, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1457, beta=63), stopping probability=0.946633 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9953) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.305864, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1919570421793953, EMA_p_value_std_dev: 0.00778408458628876 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1458, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1458, beta=63), stopping probability=0.947248 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 0.9142) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.235537, p-value=0.000026 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19157126028615618, EMA_p_value_std_dev: 0.007706556065898349 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1459, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1459, beta=63), stopping probability=0.947858 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0974) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.238718, p-value=0.000018 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19114791501163086, EMA_p_value_std_dev: 0.007629808578691157 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1460, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1460, beta=63), stopping probability=0.948461 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0130) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.343926, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.1910475156736969, EMA_p_value_std_dev: 0.007553826647660544 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1461, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1461, beta=63), stopping probability=0.949059 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.1018) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.297309, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19107500060885518, EMA_p_value_std_dev: 0.007478602621892268 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1462, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1462, beta=63), stopping probability=0.949650 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Learning rate updated from 0.009999999999999998 to 0.009999999999999998 (KL div: 1.0897) | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Kuiper statistic=0.318075, p-value=0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Adjusted decay rate: 0.5 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated theta_E 0.30142758880617465 and theta_P 0.050000000000000044 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated EMA_relative_change: 0.19050290709013015, EMA_p_value_std_dev: 0.007404128959205302 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Condition met: Incremented alpha. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - DEBUG - Updated Beta parameters: alpha=1463, beta=63 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Chunk 47: Beta parameters updated (alpha=1463, beta=63), stopping probability=0.950236 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Saved early stopping stats and Bayesian prior state to the output file. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Early stopping at chunk 47, segment 24 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - confidence_level: 0.95 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Saved prior distribution, early stopping stats, and Bayesian prior state to file. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - TOTAL CHUNKS processed. | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - | |
===== Overall KL-divergence statistics ===== | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Average : 0.023298 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - StdDev : 0.096553 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Minimum : -0.000000 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Maximum : 6.588896 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_99 : 0.204897 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_95 : 0.068418 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_90 : 0.043672 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Median : 0.010238 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_10 : 0.000105 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_05 : 0.000026 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - KLD_01 : 0.000002 | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - Completed processing 2325 chunks. | |
Cumulative statistics stored in kl_divergence.h5. | |
ggml_metal_free: deallocating | |
[llama_gguf_optmize v0.6.0] 10:08:26 - INFO - kl_d_bench completed successfully. |
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
python separate_unused_samples.py --calibration-file calibration-dataset.txt --output-dir-250 train250 --output-dir-rest train500 --model ../../Sailor2-1B-Chat_Q4_K_M.gguf | |
llama_load_model_from_file: using device Metal (Apple M3 Max) - 40959 MiB free | |
llama_model_loader: loaded meta data with 33 key-value pairs and 579 tensors from ../../Sailor2-1B-Chat_Q4_K_M.gguf (version GGUF V3 (latest)) | |
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output. | |
llama_model_loader: - kv 0: general.architecture str = qwen2 | |
llama_model_loader: - kv 1: general.type str = model | |
llama_model_loader: - kv 2: general.name str = Sailor2 1B Chat | |
llama_model_loader: - kv 3: general.finetune str = Chat | |
llama_model_loader: - kv 4: general.basename str = Sailor2 | |
llama_model_loader: - kv 5: general.size_label str = 1B | |
llama_model_loader: - kv 6: general.license str = apache-2.0 | |
llama_model_loader: - kv 7: general.base_model.count u32 = 1 | |
llama_model_loader: - kv 8: general.base_model.0.name str = Sailor2 1B | |
llama_model_loader: - kv 9: general.base_model.0.organization str = Sail | |
llama_model_loader: - kv 10: general.base_model.0.repo_url str = https://huggingface.co/sail/Sailor2-1B | |
llama_model_loader: - kv 11: general.tags arr[str,6] = ["multilingual", "sea", "sailor", "sf... | |
llama_model_loader: - kv 12: general.languages arr[str,12] = ["en", "zh", "id", "th", "vi", "ms", ... | |
llama_model_loader: - kv 13: qwen2.block_count u32 = 48 | |
llama_model_loader: - kv 14: qwen2.context_length u32 = 32768 | |
llama_model_loader: - kv 15: qwen2.embedding_length u32 = 896 | |
llama_model_loader: - kv 16: qwen2.feed_forward_length u32 = 4864 | |
llama_model_loader: - kv 17: qwen2.attention.head_count u32 = 14 | |
llama_model_loader: - kv 18: qwen2.attention.head_count_kv u32 = 2 | |
llama_model_loader: - kv 19: qwen2.rope.freq_base f32 = 1000000.000000 | |
llama_model_loader: - kv 20: qwen2.attention.layer_norm_rms_epsilon f32 = 0.000001 | |
llama_model_loader: - kv 21: general.file_type u32 = 15 | |
llama_model_loader: - kv 22: tokenizer.ggml.model str = gpt2 | |
llama_model_loader: - kv 23: tokenizer.ggml.pre str = qwen2 | |
llama_model_loader: - kv 24: tokenizer.ggml.tokens arr[str,151936] = ["!", "\"", "#", "$", "%", "&", "'", ... | |
llama_model_loader: - kv 25: tokenizer.ggml.token_type arr[i32,151936] = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ... | |
llama_model_loader: - kv 26: tokenizer.ggml.merges arr[str,151387] = ["Ġ Ġ", "ĠĠ ĠĠ", "i n", "Ġ t",... | |
llama_model_loader: - kv 27: tokenizer.ggml.eos_token_id u32 = 151645 | |
llama_model_loader: - kv 28: tokenizer.ggml.padding_token_id u32 = 151643 | |
llama_model_loader: - kv 29: tokenizer.ggml.bos_token_id u32 = 151643 | |
llama_model_loader: - kv 30: tokenizer.ggml.add_bos_token bool = false | |
llama_model_loader: - kv 31: tokenizer.chat_template str = {% for message in messages %}{% if lo... | |
llama_model_loader: - kv 32: general.quantization_version u32 = 2 | |
llama_model_loader: - type f32: 241 tensors | |
llama_model_loader: - type q5_0: 265 tensors | |
llama_model_loader: - type q8_0: 25 tensors | |
llama_model_loader: - type q4_K: 24 tensors | |
llama_model_loader: - type q6_K: 24 tensors | |
llm_load_vocab: control token: 151659 '<|fim_prefix|>' is not marked as EOG | |
llm_load_vocab: control token: 151656 '<|video_pad|>' is not marked as EOG | |
llm_load_vocab: control token: 151655 '<|image_pad|>' is not marked as EOG | |
llm_load_vocab: control token: 151653 '<|vision_end|>' is not marked as EOG | |
llm_load_vocab: control token: 151652 '<|vision_start|>' is not marked as EOG | |
llm_load_vocab: control token: 151651 '<|quad_end|>' is not marked as EOG | |
llm_load_vocab: control token: 151649 '<|box_end|>' is not marked as EOG | |
llm_load_vocab: control token: 151648 '<|box_start|>' is not marked as EOG | |
llm_load_vocab: control token: 151646 '<|object_ref_start|>' is not marked as EOG | |
llm_load_vocab: control token: 151644 '<|im_start|>' is not marked as EOG | |
llm_load_vocab: control token: 151661 '<|fim_suffix|>' is not marked as EOG | |
llm_load_vocab: control token: 151647 '<|object_ref_end|>' is not marked as EOG | |
llm_load_vocab: control token: 151660 '<|fim_middle|>' is not marked as EOG | |
llm_load_vocab: control token: 151654 '<|vision_pad|>' is not marked as EOG | |
llm_load_vocab: control token: 151650 '<|quad_start|>' is not marked as EOG | |
llm_load_vocab: special tokens cache size = 22 | |
llm_load_vocab: token to piece cache size = 0.9310 MB | |
llm_load_print_meta: format = GGUF V3 (latest) | |
llm_load_print_meta: arch = qwen2 | |
llm_load_print_meta: vocab type = BPE | |
llm_load_print_meta: n_vocab = 151936 | |
llm_load_print_meta: n_merges = 151387 | |
llm_load_print_meta: vocab_only = 0 | |
llm_load_print_meta: n_ctx_train = 32768 | |
llm_load_print_meta: n_embd = 896 | |
llm_load_print_meta: n_layer = 48 | |
llm_load_print_meta: n_head = 14 | |
llm_load_print_meta: n_head_kv = 2 | |
llm_load_print_meta: n_rot = 64 | |
llm_load_print_meta: n_swa = 0 | |
llm_load_print_meta: n_embd_head_k = 64 | |
llm_load_print_meta: n_embd_head_v = 64 | |
llm_load_print_meta: n_gqa = 7 | |
llm_load_print_meta: n_embd_k_gqa = 128 | |
llm_load_print_meta: n_embd_v_gqa = 128 | |
llm_load_print_meta: f_norm_eps = 0.0e+00 | |
llm_load_print_meta: f_norm_rms_eps = 1.0e-06 | |
llm_load_print_meta: f_clamp_kqv = 0.0e+00 | |
llm_load_print_meta: f_max_alibi_bias = 0.0e+00 | |
llm_load_print_meta: f_logit_scale = 0.0e+00 | |
llm_load_print_meta: n_ff = 4864 | |
llm_load_print_meta: n_expert = 0 | |
llm_load_print_meta: n_expert_used = 0 | |
llm_load_print_meta: causal attn = 1 | |
llm_load_print_meta: pooling type = 0 | |
llm_load_print_meta: rope type = 2 | |
llm_load_print_meta: rope scaling = linear | |
llm_load_print_meta: freq_base_train = 1000000.0 | |
llm_load_print_meta: freq_scale_train = 1 | |
llm_load_print_meta: n_ctx_orig_yarn = 32768 | |
llm_load_print_meta: rope_finetuned = unknown | |
llm_load_print_meta: ssm_d_conv = 0 | |
llm_load_print_meta: ssm_d_inner = 0 | |
llm_load_print_meta: ssm_d_state = 0 | |
llm_load_print_meta: ssm_dt_rank = 0 | |
llm_load_print_meta: ssm_dt_b_c_rms = 0 | |
llm_load_print_meta: model type = 14B | |
llm_load_print_meta: model ftype = Q4_K - Medium | |
llm_load_print_meta: model params = 988.06 M | |
llm_load_print_meta: model size = 698.72 MiB (5.93 BPW) | |
llm_load_print_meta: general.name = Sailor2 1B Chat | |
llm_load_print_meta: BOS token = 151643 '<|endoftext|>' | |
llm_load_print_meta: EOS token = 151645 '<|im_end|>' | |
llm_load_print_meta: EOT token = 151645 '<|im_end|>' | |
llm_load_print_meta: PAD token = 151643 '<|endoftext|>' | |
llm_load_print_meta: LF token = 148848 'ÄĬ' | |
llm_load_print_meta: FIM PRE token = 151659 '<|fim_prefix|>' | |
llm_load_print_meta: FIM SUF token = 151661 '<|fim_suffix|>' | |
llm_load_print_meta: FIM MID token = 151660 '<|fim_middle|>' | |
llm_load_print_meta: FIM PAD token = 151662 '<|fim_pad|>' | |
llm_load_print_meta: FIM REP token = 151663 '<|repo_name|>' | |
llm_load_print_meta: FIM SEP token = 151664 '<|file_sep|>' | |
llm_load_print_meta: EOG token = 151643 '<|endoftext|>' | |
llm_load_print_meta: EOG token = 151645 '<|im_end|>' | |
llm_load_print_meta: EOG token = 151662 '<|fim_pad|>' | |
llm_load_print_meta: EOG token = 151663 '<|repo_name|>' | |
llm_load_print_meta: EOG token = 151664 '<|file_sep|>' | |
llm_load_print_meta: max token length = 256 | |
llm_load_tensors: tensor 'token_embd.weight' (q5_0) (and 578 others) cannot be used with preferred buffer type CPU_AARCH64, using CPU instead | |
llm_load_tensors: offloading 0 repeating layers to GPU | |
llm_load_tensors: offloaded 0/49 layers to GPU | |
llm_load_tensors: CPU_Mapped model buffer size = 698.72 MiB | |
...................................................................... | |
llama_new_context_with_model: n_seq_max = 1 | |
llama_new_context_with_model: n_ctx = 512 | |
llama_new_context_with_model: n_ctx_per_seq = 512 | |
llama_new_context_with_model: n_batch = 512 | |
llama_new_context_with_model: n_ubatch = 512 | |
llama_new_context_with_model: flash_attn = 0 | |
llama_new_context_with_model: freq_base = 1000000.0 | |
llama_new_context_with_model: freq_scale = 1 | |
llama_new_context_with_model: n_ctx_per_seq (512) < n_ctx_train (32768) -- the full capacity of the model will not be utilized | |
ggml_metal_init: allocating | |
ggml_metal_init: found device: Apple M3 Max | |
ggml_metal_init: picking default device: Apple M3 Max | |
ggml_metal_init: using embedded metal library | |
ggml_metal_init: GPU name: Apple M3 Max | |
ggml_metal_init: GPU family: MTLGPUFamilyApple9 (1009) | |
ggml_metal_init: GPU family: MTLGPUFamilyCommon3 (3003) | |
ggml_metal_init: GPU family: MTLGPUFamilyMetal3 (5001) | |
ggml_metal_init: simdgroup reduction = true | |
ggml_metal_init: simdgroup matrix mul. = true | |
ggml_metal_init: has bfloat = true | |
ggml_metal_init: use bfloat = false | |
ggml_metal_init: hasUnifiedMemory = true | |
ggml_metal_init: recommendedMaxWorkingSetSize = 42949.67 MB | |
ggml_metal_init: loaded kernel_add 0x146e2d2c0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_add_row 0x146e2d9d0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_sub 0x146e2df80 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_sub_row 0x146e2e530 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul 0x146e2eae0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_row 0x146e2f090 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_div 0x146e2f640 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_div_row 0x146e2fbf0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_repeat_f32 0x146e301a0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_repeat_f16 0x146e306a0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_repeat_i32 0x146e30ba0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_repeat_i16 0x146e310a0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_scale 0x146e31bc0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_scale_4 0x146e32370 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_clamp 0x146e32b80 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_tanh 0x146e332a0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_relu 0x146e339c0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_sigmoid 0x146e340e0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_gelu 0x146e34800 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_gelu_4 0x146e34fd0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_gelu_quick 0x146e356f0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_gelu_quick_4 0x146e35e10 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_silu 0x146e36530 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_silu_4 0x146e36dd0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_elu 0x146e374f0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_soft_max_f16 0x146e377b0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_soft_max_f16_4 0x146e38420 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_soft_max_f32 0x146e38960 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_soft_max_f32_4 0x146e38ea0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_diag_mask_inf 0x146e39160 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_diag_mask_inf_8 0x146e39600 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_f32 0x146e398c0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_f16 0x146e3a150 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: skipping kernel_get_rows_bf16 (not supported) | |
ggml_metal_init: loaded kernel_get_rows_q4_0 0x146e3a410 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_q4_1 0x146e3a8b0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_q5_0 0x146e3ad50 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_q5_1 0x146e3b1f0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_q8_0 0x146e3b690 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_q2_K 0x146e3bb30 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_q3_K 0x146e3bfd0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_q4_K 0x146e3c470 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_q5_K 0x146e3c910 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_q6_K 0x146e3cdb0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_iq2_xxs 0x146e3d070 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_iq2_xs 0x146e3d680 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_iq3_xxs 0x146e3dc90 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_iq3_s 0x146e3e2a0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_iq2_s 0x146e3ebc0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_iq1_s 0x146e3f1d0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_iq1_m 0x146e3f7e0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_iq4_nl 0x146e3fdf0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_iq4_xs 0x146e40400 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_get_rows_i32 0x146e40a10 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_rms_norm 0x146e41200 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_group_norm 0x146e416a0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_norm 0x146e41b40 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_ssm_conv_f32 0x146e41e00 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_ssm_scan_f32 0x146e42410 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_f32_f32 0x146e42c00 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: skipping kernel_mul_mv_bf16_f32 (not supported) | |
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row (not supported) | |
ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4 (not supported) | |
ggml_metal_init: skipping kernel_mul_mv_bf16_bf16 (not supported) | |
ggml_metal_init: loaded kernel_mul_mv_f16_f32 0x146e42ec0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_f16_f32_1row 0x146e43360 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_f16_f32_l4 0x146e43800 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_f16_f16 0x146e43ca0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_q4_0_f32 0x146e44140 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_q4_1_f32 0x146e445e0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_q5_0_f32 0x146e44a80 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_q5_1_f32 0x146e44f20 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_q8_0_f32 0x146e453c0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_2 0x146e45910 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_3 0x146e45e60 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_4 0x146e463b0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_5 0x146e46900 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_2 0x146e46e50 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_3 0x146e473a0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_4 0x146e478f0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_5 0x146e47e40 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_2 0x146e48390 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_3 0x146e488e0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_4 0x146e48e30 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_5 0x146e49380 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_2 0x146e498d0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_3 0x146e49e20 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_4 0x146e4a370 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_5 0x146e4a8c0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_2 0x146e4ae10 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_3 0x146e4b360 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_4 0x146e4b8b0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_5 0x146e4be00 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_2 0x146e4c350 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_3 0x146e4c8a0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_4 0x146e4cdf0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_5 0x146e4d340 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_2 0x146e4d890 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_3 0x146e4dde0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_4 0x146e4e330 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_5 0x146e4e880 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_2 0x146e4edd0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_3 0x146e3e8b0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_4 0x146e4f240 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_5 0x146e4f9f0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_2 0x146e4ff40 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_3 0x146e50490 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_4 0x146e509e0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_5 0x146e50f30 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_2 0x146e51480 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_3 0x146e519d0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_4 0x146e51f20 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_5 0x146e52470 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_q2_K_f32 0x146e52910 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_q3_K_f32 0x146e52db0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_q4_K_f32 0x146e53250 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_q5_K_f32 0x146e536f0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_q6_K_f32 0x146e53b90 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_iq2_xxs_f32 0x146e54030 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_iq2_xs_f32 0x146e544d0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_iq3_xxs_f32 0x146e54970 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_iq3_s_f32 0x146e54e10 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_iq2_s_f32 0x146e552b0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_iq1_s_f32 0x146e55750 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_iq1_m_f32 0x146e55bf0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_iq4_nl_f32 0x146e56090 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_iq4_xs_f32 0x146e56530 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_id_f32_f32 0x146e569d0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_id_f16_f32 0x146e56e70 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32 (not supported) | |
ggml_metal_init: loaded kernel_mul_mv_id_q4_0_f32 0x146e57310 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_id_q4_1_f32 0x146e577b0 | th_max = 1024 | th_width = 32 | |
ggml_metal_init: loaded kernel_mul_mv_id_q5_0_f32 0x146e57c50 | th_max = 1024 | th_width = 32 |
View raw
(Sorry about that, but we can’t show files that are this big right now.)
View raw
(Sorry about that, but we can’t show files that are this big right now.)
View raw
(Sorry about that, but we can’t show files that are this big right now.)
View raw
(Sorry about that, but we can’t show files that are this big right now.)
View raw
(Sorry about that, but we can’t show files that are this big right now.)
View raw
(Sorry about that, but we can’t show files that are this big right now.)
View raw
(Sorry about that, but we can’t show files that are this big right now.)
View raw
(Sorry about that, but we can’t show files that are this big right now.)
View raw
(Sorry about that, but we can’t show files that are this big right now.)
View raw
(Sorry about that, but we can’t show files that are this big right now.)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment