N8python · April 18, 2025 16:07 · awni · Apr 18, 2025 · N8python · Apr 18, 2025
diff --git a/quick-fix.py b/quick-fix.py
 from mlx.utils import tree_flatten, tree_map
 from mlx_lm import load, generate
 import mlx.core as mx
 from mlx_lm.utils import (
    dequantize_model,
    fetch_from_hub,
    get_model_path,
    quantize_model,
    save_config,
    save_weights,
    upload_to_hub,
 )
 model, tokenizer = load("gemma-3-27b-it-qat-4bit")
 weights = dict(tree_flatten(model.parameters()))
 for k, v in weights.items():
    if v.dtype == mx.float16:
        weights[k] = v.astype(mx.bfloat16)

 model.load_weights(list(weights.items()))

 save_weights("gemma-3-27b-it-qat-4bit", weights, donate_weights=True)
	from mlx.utils import tree_flatten, tree_map
	from mlx_lm import load, generate
	import mlx.core as mx
	from mlx_lm.utils import (
	dequantize_model,
	fetch_from_hub,
	get_model_path,
	quantize_model,
	save_config,
	save_weights,
	upload_to_hub,
	)
	model, tokenizer = load("gemma-3-27b-it-qat-4bit")
	weights = dict(tree_flatten(model.parameters()))
	for k, v in weights.items():
	if v.dtype == mx.float16:
	weights[k] = v.astype(mx.bfloat16)

	model.load_weights(list(weights.items()))

	save_weights("gemma-3-27b-it-qat-4bit", weights, donate_weights=True)