Skip to content

Instantly share code, notes, and snippets.

@Quasimondo
Created March 15, 2026 09:46
Show Gist options
  • Select an option

  • Save Quasimondo/103b7e71e552d077ba2c770e14906417 to your computer and use it in GitHub Desktop.

Select an option

Save Quasimondo/103b7e71e552d077ba2c770e14906417 to your computer and use it in GitHub Desktop.
Convert Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf for ollama (including vision encoder)
"""
Merge: take censored GGUF as base (all metadata + vision tensors),
replace only the language model tensors with the uncensored ones.
KV fields are copied as raw bytes from censored to avoid any serialization
bugs in GGUFWriter (which has broken array handling for multi-element arrays).
The only exception is general.architecture which GGUFWriter writes automatically.
Tensor mapping:
- LM tensors (blk.*, token_embd, output, norm): from uncensored
- ssm_dt.bias (uncensored) → ssm_dt (censored name), same shape [32]
- All other tensors (v.*, mm.*, mtp.*): from censored (vision encoder)
"""
import struct
from gguf import GGUFReader, GGUFWriter, GGUFValueType
####################################################################################################
# NOTE: you will have to fix the paths here to match the ones in your system
####################################################################################################
# I have only tested this with the Qwen3.5-9B-Q4_K_M model. it might works with other versions, too
# but I can't guaruantee that. If you try other models you have to match the correct version of
# the original (censored) Qwen model and find the matching sha256 version in your ollama cache after
# downloading it
CENSORED = '/usr/share/ollama/.ollama/models/blobs/sha256-dec52a44569a2a25341c4e4d3fee25846eed4f6f0b936278e3a3c900bb99d37c'
UNCENSORED = '/absolute/path/to/your/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf'
OUTPUT = '/absolute/path/to/your/Qwen3.5-9B-Uncensored-HauhauCS-merged.gguf'
####################################################################################################
# Language model tensor prefixes — everything else (v.*, mm.*, mtp.*) stays from censored
LM_PREFIXES = ('blk.', 'token_embd', 'output', 'norm')
def is_lm_tensor(name):
return any(name.startswith(p) for p in LM_PREFIXES)
print('Reading sources...')
r_cen = GGUFReader(CENSORED)
r_unc = GGUFReader(UNCENSORED)
lm_censored = {t.name for t in r_cen.tensors if is_lm_tensor(t.name)}
lm_uncensored = {t.name for t in r_unc.tensors if is_lm_tensor(t.name)}
vis_censored = {t.name for t in r_cen.tensors if not is_lm_tensor(t.name)}
print(f' Censored: {len(r_cen.tensors)} tensors ({len(lm_censored)} LM, {len(vis_censored)} vision/other)')
print(f' Uncensored: {len(r_unc.tensors)} tensors ({len(lm_uncensored)} LM)')
only_in_unc = lm_uncensored - lm_censored
only_in_cen = lm_censored - lm_uncensored
print(f' LM tensors only in uncensored (will be renamed): {only_in_unc}')
print(f' LM tensors only in censored (will use censored weights): {only_in_cen}')
# Build uncensored tensor lookup: also map ssm_dt.bias -> ssm_dt
unc_tensors = {}
for t in r_unc.tensors:
unc_tensors[t.name] = t
if t.name.endswith('.ssm_dt.bias'):
# Rename to match censored naming (.ssm_dt)
renamed = t.name[:-5] # strip '.bias'
unc_tensors[renamed] = t
# GGUFWriter is used only for tensor writing — it auto-writes general.architecture
# KV fields will be injected as raw bytes directly
writer = GGUFWriter(OUTPUT, arch='qwen35')
# --- Tensors: LM from uncensored, vision/other from censored ---
print('Adding tensors...')
lm_count = vis_count = renamed_count = 0
for tensor in r_cen.tensors:
if is_lm_tensor(tensor.name) and tensor.name in unc_tensors:
t = unc_tensors[tensor.name]
writer.add_tensor(tensor.name, t.data, raw_dtype=t.tensor_type)
if t.name != tensor.name:
renamed_count += 1
lm_count += 1
else:
writer.add_tensor(tensor.name, tensor.data, raw_dtype=tensor.tensor_type)
vis_count += 1
print(f' {lm_count} LM tensors from uncensored ({renamed_count} renamed), {vis_count} tensors from censored')
# --- Build raw KV bytes from censored (skip header pseudo-fields and architecture) ---
# GGUFWriter auto-adds: GGUF.version, GGUF.tensor_count, GGUF.kv_count, general.architecture
SKIP_KV = {'GGUF.version', 'GGUF.tensor_count', 'GGUF.kv_count', 'general.architecture'}
print('Building raw KV bytes from censored...')
raw_kv = bytearray()
raw_kv_count = 0
for name, field in r_cen.fields.items():
if name in SKIP_KV:
continue
raw_kv += b''.join(bytes(p) for p in field.parts)
raw_kv_count += 1
print(f' {raw_kv_count} KV fields, {len(raw_kv)} bytes total')
# Write header (GGUFWriter handles magic, version, tensor_count, kv_count)
print(f'Writing to {OUTPUT} ...')
writer.write_header_to_file()
# Patch kv_count in header to our raw count + 1 (for general.architecture added by writer)
# Header layout: magic(4) + version(4) + tensor_count(8) + kv_count(8), kv_count at offset 16
writer.fout[0].seek(16)
writer.fout[0].write(struct.pack('<Q', raw_kv_count + 1)) # +1 for general.architecture
writer.fout[0].seek(0, 2) # seek to end
# Write general.architecture KV (already in writer.kv_data[0] from GGUFWriter.__init__)
# Use write_kv_data_to_file to emit just that one field
writer.write_kv_data_to_file()
# Now append all other KV fields as raw bytes
writer.fout[0].write(raw_kv)
writer.fout[0].flush()
writer.write_tensors_to_file()
writer.close()
print('Done!')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment