Skip to content

Instantly share code, notes, and snippets.

@adamo1139
Created April 20, 2026 15:38
Show Gist options
  • Select an option

  • Save adamo1139/f9b9cac57c73a5a83e05047a68f10da7 to your computer and use it in GitHub Desktop.

Select an option

Save adamo1139/f9b9cac57c73a5a83e05047a68f10da7 to your computer and use it in GitHub Desktop.
GLM 4.7 EXL3 KLD testing
#ulimit -n 100000
#exllamav3==0.0.28
#flash_attn==2.8.3
#torch==2.8.0+cu128
#ran on rented 4090 48GB modded gpu from Vast.AI
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python model_diff.py \
-ma /home/ubuntu/workspace/models/glm47-bf16 \
-mb /home/ubuntu/workspace/models/glm47-2bpw_H6
-- A perplexity: 5.44790776
-- B perplexity: 7.56554846
-- A label in top-K:
K = 1: 0.6365
K = 2: 0.7498
K = 3: 0.7997
K = 4: 0.8296
K = 5: 0.8496
-- B label in top-K:
K = 1: 0.5820
K = 2: 0.7017
K = 3: 0.7581
K = 4: 0.7919
K = 5: 0.8151
-- Top-K agreement, A vs B:
K = 1: 0.7596
K = 2: 0.4099
K = 3: 0.1765
K = 4: 0.0649
K = 5: 0.0220
-- KL divergence (A, B): 0.59911569
-- KL divergence (B, A): 0.51078425
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python model_diff.py \
-ma /home/ubuntu/workspace/models/glm47-bf16 \
-mb /home/ubuntu/workspace/models/glm47-3bpw_H6
-- A perplexity: 5.44790776
-- B perplexity: 6.01840956
-- A label in top-K:
K = 1: 0.6365
K = 2: 0.7498
K = 3: 0.7997
K = 4: 0.8296
K = 5: 0.8496
-- B label in top-K:
K = 1: 0.6212
K = 2: 0.7363
K = 3: 0.7877
K = 4: 0.8188
K = 5: 0.8402
-- Top-K agreement, A vs B:
K = 1: 0.8629
K = 2: 0.5994
K = 3: 0.3556
K = 4: 0.1875
K = 5: 0.0901
-- KL divergence (A, B): 0.21367304
-- KL divergence (B, A): 0.20173767
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python model_diff.py \
-ma /home/ubuntu/workspace/models/glm47-bf16 \
-mb /home/ubuntu/workspace/models/glm47-4bpw_H6
-- A perplexity: 5.44790776
-- B perplexity: 5.70074813
-- A label in top-K:
K = 1: 0.6365
K = 2: 0.7498
K = 3: 0.7997
K = 4: 0.8296
K = 5: 0.8496
-- B label in top-K:
K = 1: 0.6303
K = 2: 0.7445
K = 3: 0.7948
K = 4: 0.8247
K = 5: 0.8453
-- Top-K agreement, A vs B:
K = 1: 0.9113
K = 2: 0.7162
K = 3: 0.5023
K = 4: 0.3186
K = 5: 0.1892
-- KL divergence (A, B): 0.10793664
-- KL divergence (B, A): 0.10609606
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python model_diff.py \
-ma /home/ubuntu/workspace/models/glm47-bf16 \
-mb /home/ubuntu/workspace/models/glm47-5bpw_H8
-- A perplexity: 5.44790776
-- B perplexity: 5.48369300
-- A label in top-K:
K = 1: 0.6365
K = 2: 0.7498
K = 3: 0.7997
K = 4: 0.8296
K = 5: 0.8496
-- B label in top-K:
K = 1: 0.6360
K = 2: 0.7486
K = 3: 0.7993
K = 4: 0.8289
K = 5: 0.8490
-- Top-K agreement, A vs B:
K = 1: 0.9358
K = 2: 0.7850
K = 3: 0.6051
K = 4: 0.4321
K = 5: 0.2904
-- KL divergence (A, B): 0.07210313
-- KL divergence (B, A): 0.07154187
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python model_diff.py \
-ma /home/ubuntu/workspace/models/glm47-bf16 \
-mb /home/ubuntu/workspace/models/glm47-6bpw_H8
-- A perplexity: 5.44790776
-- B perplexity: 5.46985636
-- A label in top-K:
K = 1: 0.6365
K = 2: 0.7498
K = 3: 0.7997
K = 4: 0.8296
K = 5: 0.8496
-- B label in top-K:
K = 1: 0.6364
K = 2: 0.7492
K = 3: 0.7997
K = 4: 0.8290
K = 5: 0.8492
-- Top-K agreement, A vs B:
K = 1: 0.9472
K = 2: 0.8163
K = 3: 0.6543
K = 4: 0.4921
K = 5: 0.3507
-- KL divergence (A, B): 0.05867771
-- KL divergence (B, A): 0.05889685
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python model_diff.py \
-ma /home/ubuntu/workspace/models/glm47-bf16 \
-mb /home/ubuntu/workspace/models/glm47-8bpw_H8
-- A perplexity: 5.44790776
-- B perplexity: 5.47170094
-- A label in top-K:
K = 1: 0.6365
K = 2: 0.7498
K = 3: 0.7997
K = 4: 0.8296
K = 5: 0.8496
-- B label in top-K:
K = 1: 0.6362
K = 2: 0.7492
K = 3: 0.7995
K = 4: 0.8295
K = 5: 0.8498
-- Top-K agreement, A vs B:
K = 1: 0.9531
K = 2: 0.8342
K = 3: 0.6852
K = 4: 0.5307
K = 5: 0.3931
-- KL divergence (A, B): 0.05094092
-- KL divergence (B, A): 0.05181698
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python model_diff.py \
-ma /home/ubuntu/workspace/models/glm47-bf16 \
-mb /home/ubuntu/workspace/models/glm47-210bpw-tuned
-- A perplexity: 5.44790776
-- B perplexity: 6.65374450
-- A label in top-K:
K = 1: 0.6365
K = 2: 0.7498
K = 3: 0.7997
K = 4: 0.8296
K = 5: 0.8496
-- B label in top-K:
K = 1: 0.6007
K = 2: 0.7192
K = 3: 0.7736
K = 4: 0.8062
K = 5: 0.8284
-- Top-K agreement, A vs B:
K = 1: 0.7945
K = 2: 0.4672
K = 3: 0.2254
K = 4: 0.0949
K = 5: 0.0365
-- KL divergence (A, B): 0.44456966
-- KL divergence (B, A): 0.38990254
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python model_diff.py \
-ma /home/ubuntu/workspace/models/glm47-bf16 \
-mb /home/ubuntu/workspace/models/glm47-257bpw-tuned
-- A perplexity: 5.44790776
-- B perplexity: 6.04089632
-- A label in top-K:
K = 1: 0.6365
K = 2: 0.7498
K = 3: 0.7997
K = 4: 0.8296
K = 5: 0.8496
-- B label in top-K:
K = 1: 0.6160
K = 2: 0.7328
K = 3: 0.7860
K = 4: 0.8173
K = 5: 0.8393
-- Top-K agreement, A vs B:
K = 1: 0.8280
K = 2: 0.5291
K = 3: 0.2835
K = 4: 0.1323
K = 5: 0.0577
-- KL divergence (A, B): 0.31167571
-- KL divergence (B, A): 0.28659851
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python model_diff.py \
-ma /home/ubuntu/workspace/models/glm47-bf16 \
-mb /home/ubuntu/workspace/models/glm47-315bpw-tuned
-- A perplexity: 5.44790776
-- B perplexity: 5.65554713
-- A label in top-K:
K = 1: 0.6365
K = 2: 0.7498
K = 3: 0.7997
K = 4: 0.8296
K = 5: 0.8496
-- B label in top-K:
K = 1: 0.6291
K = 2: 0.7441
K = 3: 0.7949
K = 4: 0.8252
K = 5: 0.8462
-- Top-K agreement, A vs B:
K = 1: 0.8852
K = 2: 0.6495
K = 3: 0.4167
K = 4: 0.2402
K = 5: 0.1274
-- KL divergence (A, B): 0.15319949
-- KL divergence (B, A): 0.15014105
PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python model_diff.py \
-ma /home/ubuntu/workspace/models/glm47-bf16 \
-mb /home/ubuntu/workspace/models/glm47-384bpw-tuned
-- A perplexity: 5.44790776
-- B perplexity: 5.66231484
-- A label in top-K:
K = 1: 0.6365
K = 2: 0.7498
K = 3: 0.7997
K = 4: 0.8296
K = 5: 0.8496
-- B label in top-K:
K = 1: 0.6309
K = 2: 0.7443
K = 3: 0.7951
K = 4: 0.8249
K = 5: 0.8456
-- Top-K agreement, A vs B:
K = 1: 0.9093
K = 2: 0.7117
K = 3: 0.4969
K = 4: 0.3149
K = 5: 0.1856
-- KL divergence (A, B): 0.10877120
-- KL divergence (B, A): 0.10692901
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment