Created
September 14, 2023 03:56
-
-
Save kingbri1/41ce50c1889020cfd9dfe79099d99132 to your computer and use it in GitHub Desktop.
Pyg-13b-supercot2-measurement
This file has been truncated, but you can view the full file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"measurement": [ | |
{ | |
"key": "model.layers.0.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.01824951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.0180511474609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.009307861328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.0092620849609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0279388427734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.01788330078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.00928497314453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.00922393798828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0094757080078125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.01071929931640625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.00921630859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.00672149658203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0059967041015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0059967041015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.00586700439453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0055694580078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.005756378173828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.0.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.01971435546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.0194091796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.0091094970703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.00902557373046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.02423095703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.019195556640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.00907135009765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.00899505615234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.00925445556640625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.009979248046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.00896453857421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.00547027587890625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0042877197265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0042877197265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.00394439697265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.00336456298828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.003856658935546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.0.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.06103515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.050872802734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.033966064453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.0271453857421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.050811767578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.039276123046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.0289459228515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0223388671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0231170654296875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.02703857421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0190277099609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01351165771484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.007659912109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.006824493408203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.007015228271484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.004070281982421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.00460052490234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.0.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.0221405029296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.0130462646484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.00966644287109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.009185791015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.013214111328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0116729736328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.01013946533203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.006069183349609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.00634765625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.006702423095703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.005809783935546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0035686492919921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.003139495849609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.00274658203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0022945404052734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0020351409912109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.00241851806640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.0.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.07269287109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.0677490234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.0596923828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.032470703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.036285400390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.0335693359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.032958984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.0294189453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.02789306640625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.018402099609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.01617431640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.00928497314453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.00860595703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.00806427001953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.00522613525390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.00507354736328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.004730224609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.0.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.1103515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.10302734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.09088134765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.04925537109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.054962158203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.05078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.0499267578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.044586181640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.042236328125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.0277099609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.0240631103515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.01381683349609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.01216888427734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.01128387451171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.007366180419921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.00714111328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.0052642822265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.0.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.0623779296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.050933837890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.031829833984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.0253753662109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.04730224609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.041656494140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.02734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.0214996337890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.0213775634765625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.02093505859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.01885986328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.01143646240234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.00865936279296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.00812530517578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.007663726806640625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.0063018798828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.00634002685546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.1.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.0157470703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.0124969482421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.008453369140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.006744384765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.01219940185546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.00969696044921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.00732421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.005523681640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.005878448486328125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.006450653076171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.00478363037109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0033206939697265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.001903533935546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0016984939575195312, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0017747879028320312, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0011072158813476562, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.001140594482421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.1.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.01482391357421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.01154327392578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.0078277587890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.00620269775390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.01006317138671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.00884246826171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.006801605224609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.00496673583984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.00506591796875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.005096435546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.004230499267578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.002567291259765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0017156600952148438, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0015039443969726562, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0013866424560546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0009746551513671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.001010894775390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.1.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.083251953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.0701904296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.0555419921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.037200927734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.04620361328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.04241943359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.03912353515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0301055908203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0280914306640625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.02362060546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0203399658203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01183319091796875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.009490966796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.00811004638671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.00626373291015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.005428314208984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0044403076171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.1.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.147705078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.09344482421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.06488037109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.06329345703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.089111328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0794677734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.0704345703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.04052734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.04290771484375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.04595947265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0391845703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0236358642578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.01898193359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0154876708984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01372528076171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01151275634765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.01265716552734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.1.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.11834716796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.11199951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.0999755859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.05230712890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.0577392578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.053375244140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.052825927734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.04815673828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.045928955078125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.029205322265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.025421142578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.0146484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.01336669921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0126190185546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.008148193359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.00800323486328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.006740570068359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.1.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.1494140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.1416015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.1270751953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.0662841796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.0728759765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.0673828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.06683349609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.061065673828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.058197021484375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.036468505859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.03173828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.01812744140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.015869140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.01486968994140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.00940704345703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.00923919677734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.00586700439453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.1.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.1392822265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.1268310546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.10498046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.06683349609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.0888671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.080078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.068359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.061676025390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.05914306640625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.0494384765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.022369384765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.01727294921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.0164031982421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.01434326171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.011932373046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.010589599609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.2.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.0352783203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.032318115234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.0275726318359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.015869140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.020263671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0171661376953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.0161895751953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.01401519775390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0133056640625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.01025390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.00821685791015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.005153656005859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.00399017333984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0036296844482421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0027408599853515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0023708343505859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.001804351806640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.2.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.02862548828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.026153564453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.02227783203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.0128326416015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.01509857177734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0138092041015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.01309967041015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.01129150390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.01064300537109375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.00760650634765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.006557464599609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.00380706787109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0031986236572265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.002895355224609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.002017974853515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0018644332885742188, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0013942718505859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.2.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.1324462890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.12310791015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.10809326171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.060577392578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.06781005859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.06268310546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.0616455078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.05419921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0511474609375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.034271240234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0298004150390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01708984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0146636962890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.01336669921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.00881195068359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.00847625732421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.00542449951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.2.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.1640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.13232421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.09552001953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.07281494140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.09063720703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.08197021484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.07757568359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0540771484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.04974365234375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.04681396484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0401611328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.024017333984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.020538330078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0173492431640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0141143798828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.012969970703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.012542724609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.2.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.165283203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.1566162109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.1409912109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.0753173828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.08306884765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.0767822265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.07611083984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.0693359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.0665283203125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.042266845703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.03656005859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.021148681640625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0187835693359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0176239013671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0114593505859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01128387451171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.00836181640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.2.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.2005615234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.190185546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.1715087890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.09149169921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.1009521484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.09320068359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.09246826171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.084228515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.08087158203125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.050994873046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.044189453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.0254364013671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0221710205078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0207061767578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0133514404296875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01311492919921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.0083465576171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.2.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.1937255859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.177734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.1566162109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.086669921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.099853515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.091796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.0887451171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.0780029296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.07501220703125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.05047607421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.04364013671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.0252685546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.0216064453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.01995849609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.01361083984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.0129241943359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.00957489013671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.3.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.048248291015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.04449462890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.03826904296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.0219879150390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.026580810546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0236968994140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.02239990234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.01953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.018646240234375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0136260986328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.011383056640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.00688934326171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0055084228515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.005031585693359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.003665924072265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0032711029052734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0024318695068359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.3.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.039794921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.03668212890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.031494140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.018096923828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0213775634765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.01947021484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.0184478759765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.01605224609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.01525115966796875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.01081085205078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0092926025390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0054168701171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.00455474853515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.00415802001953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0028839111328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.00267791748046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.002056121826171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.3.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.177978515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.1658935546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.14599609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.0821533203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.092529296875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.08489990234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.08349609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.073486328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0697021484375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0404052734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0233612060546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.019927978515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0181884765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01212310791015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01165008544921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.007381439208984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.3.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.1297607421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.11181640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.0806884765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.059112548828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0823974609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0704345703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.061309814453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.04779052734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.04315185546875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.042266845703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.035125732421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0225372314453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.01837158203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0165863037109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0139007568359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.012176513671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.01279449462890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.3.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.1842041015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.173828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.156005859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.085693359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.0947265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.08758544921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.086669921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.0782470703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.0748291015625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.04852294921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.042266845703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.0243377685546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.02215576171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.020751953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.013458251953125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.0131988525390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.01114654541015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.3.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.2257080078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.2132568359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.19140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.10479736328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.11590576171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.10699462890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.10601806640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.09564208984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.09136962890625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.058868408203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.05108642578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.0294189453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0258636474609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0240020751953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0156402587890625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01531219482421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.01067352294921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.3.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.0265045166015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.0234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.0204925537109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.0140228271484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.0159759521484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.01415252685546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.01445770263671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.012420654296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.01241302490234375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.01134490966796875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.00939178466796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.00385284423828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.0035552978515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.00324249267578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.0025882720947265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.0020313262939453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.0017747879028320312, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.4.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.06341552734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.058074951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.0499267578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.028900146484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.033447265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.03082275390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.0295257568359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0254364013671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.024017333984375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.01690673828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.014678955078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.00844573974609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.00714874267578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.006465911865234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.00441741943359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0041351318359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.002986907958984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.4.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.056243896484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.051513671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.044189453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.025634765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0298919677734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0274505615234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.0261688232421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0225677490234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0213470458984375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.01514434814453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.01306915283203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.007572174072265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.00634002685546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.005725860595703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0039520263671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0036792755126953125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.00261688232421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.4.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.1845703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.171630859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.150634765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.0855712890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0960693359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.08880615234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.08709716796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0762939453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.07196044921875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0487060546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.042205810546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.024261474609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.020782470703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0188751220703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0124969482421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01200103759765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.007648468017578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.4.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.183349609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.1605224609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.1302490234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.0855712890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.10113525390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0927734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.08807373046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.06402587890625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.052459716796875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.045135498046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.026580810546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0229034423828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.020050048828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01494598388671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0139312744140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.012451171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.4.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.19482421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.1837158203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.1641845703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.0908203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.1002197265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.0927734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.09185791015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.0823974609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.07830810546875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.05084228515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.044219970703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.0253753662109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0222930908203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.020599365234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.01329803466796875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.0130157470703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.00891876220703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.4.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.2364501953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.2227783203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.199462890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.1102294921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.12139892578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.11248779296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.11151123046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.09991455078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.09503173828125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.061553955078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.05364990234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.030670166015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0266571044921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.02459716796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.01580810546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01544952392578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.009613037109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.4.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.2342529296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.212158203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.1829833984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.10687255859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.1224365234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.11285400390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.109619140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.09356689453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.088134765625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.06219482421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.053863525390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.031097412109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.026580810546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.02398681640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.0166473388671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.015777587890625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.01143646240234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.5.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.07391357421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.0677490234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.058013916015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.033782958984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0391845703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.036224365234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.03448486328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.029693603515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.027984619140625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.01983642578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.01727294921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.00992584228515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.00844573974609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.00763702392578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.00522613525390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0048675537109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.003681182861328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.5.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.0643310546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.058929443359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.0504150390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.0294342041015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.03460693359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.031768798828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.030059814453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0258941650390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0245361328125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0175323486328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.01513671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.008758544921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.00732421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.006622314453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.004596710205078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0042572021484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0031280517578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.5.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.195068359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.180908203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.1585693359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.090576171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.10198974609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.093994140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.09210205078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.08050537109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.07598876953125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.051727294921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0447998046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0258026123046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.02203369140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.01995849609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0133209228515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01275634765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0081634521484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.5.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.18115234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.1588134765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.12445068359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.08355712890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0982666015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0899658203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.08648681640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0679931640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.06005859375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.050689697265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.043731689453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.025726318359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0225067138671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0196533203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01458740234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01374053955078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.01244354248046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.5.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.1812744140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.1705322265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.152099609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.0845947265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.09375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.086669921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.085693359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.0765380859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.07275390625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.0477294921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.04150390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.023834228515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.02105712890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0194549560546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.01264190673828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.0123443603515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.009002685546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.5.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.2415771484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.2274169921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.202880859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.11285400390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.12493896484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.11553955078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.11431884765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.10205078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.09698486328125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.06329345703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.05511474609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.03155517578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0273590087890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.025146484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.016326904296875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01593017578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.00989532470703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.5.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.232421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.21044921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.1802978515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.106201171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.12213134765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.1126708984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.10919189453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.09271240234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.08721923828125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.06207275390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.05389404296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.031097412109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.02667236328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.0240478515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.0167236328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.0158233642578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.01187896728515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.6.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.08935546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.0828857421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.07208251953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.041351318359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.04718017578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.043609619140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.042083740234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0367431640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.034698486328125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.023956298828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.020843505859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01198577880859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0103302001953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0094146728515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0063323974609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.00598907470703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.00447845458984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.6.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.074951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.0694580078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.06036376953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.03466796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0400390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0367431640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.035247802734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.030853271484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.029266357421875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0203094482421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0175323486328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01016998291015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.008575439453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0078277587890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.005329132080078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.00502777099609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.00356292724609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.6.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.2110595703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.1968994140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.1734619140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.09844970703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.1103515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.101806640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.09991455078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.087890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.08306884765625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.055938720703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.048492431640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.027862548828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.02386474609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.021697998046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01433563232421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01378631591796875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0086669921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.6.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.18505859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.1563720703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.1229248046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.08514404296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.1029052734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.09381103515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.0882568359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.06683349609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0611572265625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0528564453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0455322265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0265960693359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0228118896484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0194854736328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01488494873046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01367950439453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0124664306640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.6.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.1761474609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.1651611328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.1473388671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.08233642578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.09124755859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.08441162109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.08349609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.0743408203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.07049560546875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.04638671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.04034423828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.0231475830078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0202484130859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.018646240234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.01214599609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.0118255615234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.00811767578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.6.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.2440185546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.2294921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.2047119140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.1142578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.1265869140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.1170654296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.11572265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.103271484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.097900390625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.06427001953125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.055877685546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.032073974609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.027740478515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0254669189453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.016632080078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.016204833984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.01020050048828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.6.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.2381591796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.2149658203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.1845703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.1090087890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.125732421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.11572265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.112060546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.09490966796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.0892333984375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.0638427734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.055328369140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.032012939453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.0273284912109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.0245819091796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.0171966552734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.0162506103515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.01218414306640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.7.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.08746337890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.08111572265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.070556640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.040435791015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0462646484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.042694091796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.041107177734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0360107421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0340576171875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0234832763671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0204010009765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01174163818359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.010009765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0091094970703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0061492919921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.00579833984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.004123687744140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.7.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.07427978515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.06903076171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.05987548828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.034332275390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0396728515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.03662109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.03497314453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.03057861328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.02899169921875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.020111083984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0174713134765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01004791259765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0084991455078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.007740020751953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0052490234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.004913330078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.00348663330078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.7.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.207763671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.19384765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.1707763671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.09710693359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.10906982421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.10040283203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.09857177734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.086669921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.08197021484375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.055419921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0478515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.027618408203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0235595703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.02142333984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01422119140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01361846923828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.00860595703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.7.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.20263671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.1768798828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.1416015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.09271240234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.11114501953125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.10137939453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.096923828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.07684326171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0692138671875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.05731201171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.04949951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0289764404296875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0251922607421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0223541259765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0165252685546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01534271240234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.01428985595703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.7.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.1634521484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.153564453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.136474609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.0765380859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.0849609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.07867431640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.07757568359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.069091796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.0655517578125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.043212890625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.037628173828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.021575927734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0189361572265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.017425537109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0113372802734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.0110321044921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.007778167724609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.7.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.2384033203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.22412109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.19970703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.1119384765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.1240234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.11474609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.1134033203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.10107421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.09588623046875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.06317138671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.054901123046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.031524658203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.027618408203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0254058837890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0165557861328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.0161285400390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.01123046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.7.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.2279052734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.2059326171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.1761474609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.104248046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.12139892578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.11090087890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.107177734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.09088134765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.08538818359375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.06146240234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.05322265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.031005859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.026702880859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.024139404296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.017059326171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.0161285400390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.01287078857421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.8.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.09759521484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.09075927734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.0791015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.045166015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.051605224609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.047637939453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.045928955078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.040252685546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0380859375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0261993408203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.022735595703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01308441162109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0111846923828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.01018524169921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.006862640380859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.006488800048828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0046234130859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.8.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.081298828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.0755615234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.06561279296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.037628173828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0433349609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.039947509765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.0382080078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.033538818359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.031768798828125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.021942138671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.019073486328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0109710693359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0092926025390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.00848388671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0057373046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.005401611328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0038242340087890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.8.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.2215576171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.20654296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.18212890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.10333251953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.116455078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.10699462890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.10504150390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.09246826171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.08758544921875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.059112548828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.051025390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0294952392578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.025054931640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0228271484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01519012451171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0145263671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0091094970703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.8.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.1998291015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.17626953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.142822265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.0927734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.10797119140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.09906005859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.09539794921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.07635498046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.06878662109375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.055816650390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.048370361328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.028289794921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0249786376953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0219879150390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0161590576171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01532745361328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.01380157470703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.8.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.1702880859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.16015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.1424560546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.08001708984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.08880615234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.0821533203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.08099365234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.07220458984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.068603515625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.04534912109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.039398193359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.02264404296875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0200042724609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0184478759765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0120391845703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01171112060546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.0087432861328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.8.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.238037109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.223876953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.199462890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.11163330078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.1239013671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.11474609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.1131591796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.1007080078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.095703125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.0628662109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.054718017578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.0313720703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0271759033203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0249176025390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0162811279296875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01580810546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.01004791259765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.8.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.2406005859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.2171630859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.18603515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.1102294921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.12744140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.11712646484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.11322021484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.095947265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.090087890625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.06475830078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.05621337890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.032562255859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.0280303955078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.025299072265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.0176544189453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.0166778564453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.013153076171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.9.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.10589599609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.09832763671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.08599853515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.04913330078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.056182861328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0517578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.049957275390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.043853759765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.041534423828125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.028594970703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0248565673828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01432037353515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.012451171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.01142120361328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.007640838623046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.00724029541015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.005733489990234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.9.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.091064453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.08477783203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.07391357421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.042266845703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.048583984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0447998046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.042938232421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.037750244140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.035858154296875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.024658203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.021392822265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0123291015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.01047515869140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0095672607421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0064697265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.006092071533203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.004352569580078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.9.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.2197265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.205322265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.180908203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.10272216796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.1158447265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.10614013671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.1041259765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0919189453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.08709716796875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.058807373046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.050689697265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.029327392578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.024993896484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.02276611328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01515960693359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.014495849609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0092620849609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.9.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.2117919921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.1829833984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.144775390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.09722900390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.11602783203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.1063232421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.10089111328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.078857421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.07110595703125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.05963134765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.05206298828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.030303955078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.02703857421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0237579345703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0174407958984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0162811279296875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0160064697265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.9.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.175537109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.1650390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.14697265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.08245849609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.09161376953125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.0848388671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.08355712890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.074462890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.07061767578125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.04669189453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.040679931640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.023345947265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.020599365234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.01898193359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0123748779296875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01202392578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.00890350341796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.9.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.241943359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.227294921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.202392578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.11328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.1258544921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.11639404296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.1148681640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.10235595703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.0970458984375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.06390380859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.055572509765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.0318603515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.027557373046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.025299072265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0165252685546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.016021728515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.01016998291015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.9.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.2476806640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.2236328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.191162109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.11370849609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.1314697265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.12091064453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.116943359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.09881591796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.09271240234375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.06707763671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.058074951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.033660888671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.029022216796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.026153564453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.0184173583984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.0173492431640625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.0137939453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.10.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.11126708984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.1036376953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.0904541015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.05169677734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.058868408203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.0543212890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.052490234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0460205078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0435791015625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.029937744140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0260009765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0149688720703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.01287078857421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.011749267578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.00787353515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.00746917724609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0054931640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.10.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.09185791015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.08551025390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.074462890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.042510986328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.048980712890625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.045257568359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.043243408203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0379638671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.035980224609375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0247955322265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0215911865234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01241302490234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0105438232421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.009613037109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.006500244140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.00611114501953125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.004367828369140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.10.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.2200927734375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.20556640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.1806640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.1026611328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.11590576171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.10687255859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.10418701171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.09185791015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0867919921875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.05877685546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.050994873046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.029296875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.024993896484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.02276611328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01512908935546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01445770263671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.00933074951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.10.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.2244873046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.1995849609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.1636962890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.10394287109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.12164306640625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.11199951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.1072998046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.08740234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.079833984375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0628662109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0545654296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.031829833984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0281219482421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.02508544921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.01806640625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0170440673828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0157470703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.10.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.170654296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.160400390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.1424560546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.08026123046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.08935546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.08251953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.08135986328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.072265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.06866455078125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.04559326171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.039642333984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.0228271484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0201873779296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.018646240234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.01224517822265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01190185546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.0090789794921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.10.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.2392578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.224609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.199951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.112060546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.12457275390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.11505126953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.11370849609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.10101318359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.09576416015625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.063232421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.054931640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.031524658203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0272674560546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.024993896484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.016357421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.015899658203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.01006317138671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.10.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.2509765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.22705078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.194091796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.11529541015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.1322021484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.121826171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.11834716796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.10015869140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.0938720703125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.0673828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.058380126953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.03387451171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.029266357421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.0263671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.01849365234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.0175628662109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.013641357421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.11.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.1094970703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.101806640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.08880615234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.05078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0579833984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.05352783203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.051605224609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.045318603515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.042877197265625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.02947998046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0255889892578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0147247314453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.01256561279296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.01146697998046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.00772857666015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.007297515869140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0052032470703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.11.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.09210205078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.08575439453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.07464599609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.042694091796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.0491943359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.045440673828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.04345703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.038177490234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.036224365234375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0249176025390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0217132568359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01245880126953125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.01055145263671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0096435546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.006519317626953125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.006130218505859375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.004314422607421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.11.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.2216796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.2069091796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.181884765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.10357666015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.1170654296875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.10772705078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.1051025390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.092529296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.08734130859375, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.059326171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0513916015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.02960205078125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.02520751953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0229644775390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.015289306640625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.01457977294921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.00942230224609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.11.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.2191162109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.1954345703125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.16015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.10125732421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.12371826171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.11322021484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.1053466796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.08642578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.080078125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0634765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.05499267578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.03192138671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.027191162109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.024444580078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0176544189453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0160064697265625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.01493072509765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.11.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.16162109375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.143310546875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.08087158203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.09002685546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.08331298828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.08197021484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.07275390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.069091796875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.045989990234375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.0400390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.02301025390625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0204620361328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.01885986328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0123443603515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01197052001953125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.00933837890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.11.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.2381591796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.2236328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.1986083984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.1116943359375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.124267578125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.11480712890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.11322021484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.10040283203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.09527587890625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.06298828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.054840087890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.03143310546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0272216796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0249481201171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.016265869140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.015777587890625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.01018524169921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.11.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.254150390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.229736328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.19580078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.1168212890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.13427734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 221204736.0, | |
"err": 0.12384033203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1715892650462965, | |
"total_bits": 224481536.0, | |
"err": 0.1201171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.523441116898148, | |
"total_bits": 249385216.0, | |
"err": 0.10137939453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6644983362268517, | |
"total_bits": 259369088.0, | |
"err": 0.0947265625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 285332544.0, | |
"err": 0.068603515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 291983616.0, | |
"err": 0.059478759765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 356111424.0, | |
"err": 0.03448486328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.227144820601852, | |
"total_bits": 369971456.0, | |
"err": 0.0296478271484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.523441116898148, | |
"total_bits": 390942976.0, | |
"err": 0.026611328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 426890304.0, | |
"err": 0.0188140869140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.235026945891204, | |
"total_bits": 441308224.0, | |
"err": 0.0178375244140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 575099136.0, | |
"err": 0.01373291015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.12.self_attn.q_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.1148681640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.10687255859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.09344482421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.053375244140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.060760498046875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.05621337890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.054229736328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.047637939453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.04498291015625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.030853271484375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.026824951171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.01541900634765625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.01316070556640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.01200103759765625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0080413818359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0076141357421875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0052947998046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.12.self_attn.k_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.09600830078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.0894775390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.0780029296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.04461669921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.051300048828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.047454833984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.045379638671875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.039886474609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.0377197265625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.0259552001953125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.02264404296875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.012969970703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.01099395751953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.01003265380859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.006778717041015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.00637054443359375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.004428863525390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.12.self_attn.v_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.2298583984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.2144775390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.1881103515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.10736083984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.12158203125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.11187744140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.10906982421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.09600830078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.09063720703125, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.061737060546875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.053497314453125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0308074951171875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0262451171875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0239410400390625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0159912109375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.0152435302734375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.0100555419921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.12.self_attn.o_proj", | |
"numel": 26214400, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.17529296875, | |
"total_bits": 57024000.0, | |
"err": 0.230224609375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 62266880.00000001, | |
"err": 0.206298828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.72529296875, | |
"total_bits": 71441920.0, | |
"err": 0.171142578125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.22529296875, | |
"total_bits": 84549120.0, | |
"err": 0.10699462890625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 79464320.0, | |
"err": 0.125244140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, | |
"total_bits": 81927680.0, | |
"err": 0.1146240234375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.17529296875, | |
"total_bits": 83238400.0, | |
"err": 0.1099853515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.52529296875, | |
"total_bits": 92413440.0, | |
"err": 0.0904541015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.662646484375, | |
"total_bits": 96014080.0, | |
"err": 0.08355712890625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.0313232421875, | |
"total_bits": 105678720.0, | |
"err": 0.064208984375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.12529296875, | |
"total_bits": 108142080.0, | |
"err": 0.0555419921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.0313232421875, | |
"total_bits": 131893120.0, | |
"err": 0.0323486328125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.22529296875, | |
"total_bits": 136977920.0, | |
"err": 0.0283203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.52529296875, | |
"total_bits": 144842240.0, | |
"err": 0.0252227783203125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.0313232421875, | |
"total_bits": 158107520.0, | |
"err": 0.0180206298828125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.2313232421875, | |
"total_bits": 163350400.0, | |
"err": 0.016998291015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.12529296875, | |
"total_bits": 212999679.99999997, | |
"err": 0.01508331298828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.12.mlp.gate_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.1766357421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.165771484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.147216796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.08319091796875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.0926513671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.08563232421875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.0843505859375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.0748291015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.07098388671875, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.04730224609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.04119873046875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.0236968994140625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0210723876953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.019439697265625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0127410888671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.01236724853515625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.0097198486328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.12.mlp.up_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1751085069444445, | |
"total_bits": 153951744.0, | |
"err": 0.24169921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.3751085069444446, | |
"total_bits": 168107520.0, | |
"err": 0.22705078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.7251085069444443, | |
"total_bits": 192880128.0, | |
"err": 0.20166015625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.2251085069444443, | |
"total_bits": 228269568.0, | |
"err": 0.11334228515625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.031277126736111, | |
"total_bits": 214550400.0, | |
"err": 0.126220703125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.1251085069444446, | |
"total_bits": 221191680.0, | |
"err": 0.11669921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.05:4b/0.95:3b 32g s4", | |
"bpw": 3.1751085069444445, | |
"total_bits": 224730624.0, | |
"err": 0.11505126953125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:4b/0.6:3b 32g s4", | |
"bpw": 3.5251085069444446, | |
"total_bits": 249503232.0, | |
"err": 0.10205078125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.6:4b/0.4:3b 64g s4", | |
"bpw": 3.6625542534722224, | |
"total_bits": 259231488.0, | |
"err": 0.09674072265625, | |
"qparams": { | |
"group_size": 64, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.6, | |
0.4 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 128g s4", | |
"bpw": 4.031277126736111, | |
"total_bits": 285329280.0, | |
"err": 0.06414794921875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:4b 32g s4", | |
"bpw": 4.125108506944445, | |
"total_bits": 291970560.0, | |
"err": 0.0556640625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:5b 128g s4", | |
"bpw": 5.031277126736111, | |
"total_bits": 356108160.0, | |
"err": 0.032012939453125, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
5 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:6b/0.9:5b 32g s4", | |
"bpw": 5.225108506944444, | |
"total_bits": 369827328.0, | |
"err": 0.0276031494140625, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.4:6b/0.6:5b 32g s4", | |
"bpw": 5.525108506944444, | |
"total_bits": 391060992.0, | |
"err": 0.0252532958984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
6, | |
5 | |
], | |
"bits_prop": [ | |
0.4, | |
0.6 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:6b 128g s4", | |
"bpw": 6.031277126736111, | |
"total_bits": 426887040.0, | |
"err": 0.0166015625, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
6 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:8b/0.9:6b 128g s4", | |
"bpw": 6.231277126736111, | |
"total_bits": 441042816.0, | |
"err": 0.016082763671875, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
8, | |
6 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:8b 32g s4", | |
"bpw": 8.125108506944445, | |
"total_bits": 575086080.0, | |
"err": 0.01020050048828125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
8 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
} | |
] | |
}, | |
{ | |
"key": "model.layers.12.mlp.down_proj", | |
"numel": 70778880, | |
"options": [ | |
{ | |
"desc": "0.05:3b/0.95:2b 32g s4", | |
"bpw": 2.1715892650462965, | |
"total_bits": 153702656.0, | |
"err": 0.258544921875, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.05, | |
0.95 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.25:3b/0.75:2b 32g s4", | |
"bpw": 2.37529296875, | |
"total_bits": 168120576.0, | |
"err": 0.2333984375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.25, | |
0.75 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.4:3b/0.5:2b 32g s4", | |
"bpw": 2.727144820601852, | |
"total_bits": 193024256.0, | |
"err": 0.198486328125, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3, | |
2 | |
], | |
"bits_prop": [ | |
0.1, | |
0.4, | |
0.5 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "0.1:4b/0.9:3b 32g s4", | |
"bpw": 3.227144820601852, | |
"total_bits": 228413696.0, | |
"err": 0.118896484375, | |
"qparams": { | |
"group_size": 32, | |
"bits": [ | |
4, | |
3 | |
], | |
"bits_prop": [ | |
0.1, | |
0.9 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 128g s4", | |
"bpw": 3.0313232421875, | |
"total_bits": 214553664.0, | |
"err": 0.136474609375, | |
"qparams": { | |
"group_size": 128, | |
"bits": [ | |
3 | |
], | |
"bits_prop": [ | |
1.0 | |
], | |
"scale_bits": 4 | |
} | |
}, | |
{ | |
"desc": "1.0:3b 32g s4", | |
"bpw": 3.12529296875, |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment