Skip to content

Instantly share code, notes, and snippets.

View lunzima's full-sized avatar
🎮
Playing Genshin Impact

Lunzima lunzima

🎮
Playing Genshin Impact
View GitHub Profile
@lunzima
lunzima / quant.patch
Created February 18, 2025 12:14
llama-quant.cpp dynamic quantization mod
diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp
index fb798265..cfa73700 100644
--- a/src/llama-quant.cpp
+++ b/src/llama-quant.cpp
@@ -187,6 +187,7 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t
}
} else if (ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S ||
ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || ftype == LLAMA_FTYPE_MOSTLY_IQ2_M || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) {
+ bool is_one_bit = (ftype == LLAMA_FTYPE_MOSTLY_IQ1_M || ftype == LLAMA_FTYPE_MOSTLY_IQ1_S);
if (name.find("attn_v.weight") != std::string::npos) {