Quasimondo · March 15, 2026 09:46
diff --git a/merge_gguf.py b/merge_gguf.py
 """
 Merge: take censored GGUF as base (all metadata + vision tensors),
 replace only the language model tensors with the uncensored ones.

 KV fields are copied as raw bytes from censored to avoid any serialization
 bugs in GGUFWriter (which has broken array handling for multi-element arrays).
 The only exception is general.architecture which GGUFWriter writes automatically.

 Tensor mapping:
  - LM tensors (blk.*, token_embd, output, norm): from uncensored
    - ssm_dt.bias (uncensored) → ssm_dt (censored name), same shape [32]
  - All other tensors (v.*, mm.*, mtp.*): from censored (vision encoder)
 """
 import struct
 from gguf import GGUFReader, GGUFWriter, GGUFValueType

 ####################################################################################################
 # NOTE: you will have to fix the paths here to match the ones in your system
 ####################################################################################################
 # I have only tested this with the Qwen3.5-9B-Q4_K_M model. it might works with other versions, too
 # but I can't guaruantee that. If you try other models you have to match the correct version of
 # the original (censored) Qwen model and find the matching sha256 version in your ollama cache after
 # downloading it

 CENSORED   = '/usr/share/ollama/.ollama/models/blobs/sha256-dec52a44569a2a25341c4e4d3fee25846eed4f6f0b936278e3a3c900bb99d37c'
 UNCENSORED = '/absolute/path/to/your/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf'
 OUTPUT     = '/absolute/path/to/your/Qwen3.5-9B-Uncensored-HauhauCS-merged.gguf'

 ####################################################################################################

 # Language model tensor prefixes — everything else (v.*, mm.*, mtp.*) stays from censored
 LM_PREFIXES = ('blk.', 'token_embd', 'output', 'norm')

 def is_lm_tensor(name):
    return any(name.startswith(p) for p in LM_PREFIXES)

 print('Reading sources...')
 r_cen = GGUFReader(CENSORED)
 r_unc = GGUFReader(UNCENSORED)

 lm_censored   = {t.name for t in r_cen.tensors if is_lm_tensor(t.name)}
 lm_uncensored = {t.name for t in r_unc.tensors if is_lm_tensor(t.name)}
 vis_censored  = {t.name for t in r_cen.tensors if not is_lm_tensor(t.name)}

 print(f'  Censored:   {len(r_cen.tensors)} tensors ({len(lm_censored)} LM, {len(vis_censored)} vision/other)')
 print(f'  Uncensored: {len(r_unc.tensors)} tensors ({len(lm_uncensored)} LM)')

 only_in_unc = lm_uncensored - lm_censored
 only_in_cen = lm_censored - lm_uncensored
 print(f'  LM tensors only in uncensored (will be renamed): {only_in_unc}')
 print(f'  LM tensors only in censored (will use censored weights): {only_in_cen}')

 # Build uncensored tensor lookup: also map ssm_dt.bias -> ssm_dt
 unc_tensors = {}
 for t in r_unc.tensors:
    unc_tensors[t.name] = t
    if t.name.endswith('.ssm_dt.bias'):
        # Rename to match censored naming (.ssm_dt)
        renamed = t.name[:-5]  # strip '.bias'
        unc_tensors[renamed] = t

 # GGUFWriter is used only for tensor writing — it auto-writes general.architecture
 # KV fields will be injected as raw bytes directly
 writer = GGUFWriter(OUTPUT, arch='qwen35')

 # --- Tensors: LM from uncensored, vision/other from censored ---
 print('Adding tensors...')
 lm_count = vis_count = renamed_count = 0
 for tensor in r_cen.tensors:
    if is_lm_tensor(tensor.name) and tensor.name in unc_tensors:
        t = unc_tensors[tensor.name]
        writer.add_tensor(tensor.name, t.data, raw_dtype=t.tensor_type)
        if t.name != tensor.name:
            renamed_count += 1
        lm_count += 1
    else:
        writer.add_tensor(tensor.name, tensor.data, raw_dtype=tensor.tensor_type)
        vis_count += 1

 print(f'  {lm_count} LM tensors from uncensored ({renamed_count} renamed), {vis_count} tensors from censored')

 # --- Build raw KV bytes from censored (skip header pseudo-fields and architecture) ---
 # GGUFWriter auto-adds: GGUF.version, GGUF.tensor_count, GGUF.kv_count, general.architecture
 SKIP_KV = {'GGUF.version', 'GGUF.tensor_count', 'GGUF.kv_count', 'general.architecture'}

 print('Building raw KV bytes from censored...')
 raw_kv = bytearray()
 raw_kv_count = 0
 for name, field in r_cen.fields.items():
    if name in SKIP_KV:
        continue
    raw_kv += b''.join(bytes(p) for p in field.parts)
    raw_kv_count += 1

 print(f'  {raw_kv_count} KV fields, {len(raw_kv)} bytes total')

 # Write header (GGUFWriter handles magic, version, tensor_count, kv_count)
 print(f'Writing to {OUTPUT} ...')
 writer.write_header_to_file()

 # Patch kv_count in header to our raw count + 1 (for general.architecture added by writer)
 # Header layout: magic(4) + version(4) + tensor_count(8) + kv_count(8), kv_count at offset 16
 writer.fout[0].seek(16)
 writer.fout[0].write(struct.pack('<Q', raw_kv_count + 1))  # +1 for general.architecture
 writer.fout[0].seek(0, 2)  # seek to end

 # Write general.architecture KV (already in writer.kv_data[0] from GGUFWriter.__init__)
 # Use write_kv_data_to_file to emit just that one field
 writer.write_kv_data_to_file()

 # Now append all other KV fields as raw bytes
 writer.fout[0].write(raw_kv)
 writer.fout[0].flush()

 writer.write_tensors_to_file()
 writer.close()
 print('Done!')
	"""
	Merge: take censored GGUF as base (all metadata + vision tensors),
	replace only the language model tensors with the uncensored ones.

	KV fields are copied as raw bytes from censored to avoid any serialization
	bugs in GGUFWriter (which has broken array handling for multi-element arrays).
	The only exception is general.architecture which GGUFWriter writes automatically.

	Tensor mapping:
	- LM tensors (blk.*, token_embd, output, norm): from uncensored
	- ssm_dt.bias (uncensored) → ssm_dt (censored name), same shape [32]
	- All other tensors (v., mm., mtp.*): from censored (vision encoder)
	"""
	import struct
	from gguf import GGUFReader, GGUFWriter, GGUFValueType

	####################################################################################################
	# NOTE: you will have to fix the paths here to match the ones in your system
	####################################################################################################
	# I have only tested this with the Qwen3.5-9B-Q4_K_M model. it might works with other versions, too
	# but I can't guaruantee that. If you try other models you have to match the correct version of
	# the original (censored) Qwen model and find the matching sha256 version in your ollama cache after
	# downloading it

	CENSORED = '/usr/share/ollama/.ollama/models/blobs/sha256-dec52a44569a2a25341c4e4d3fee25846eed4f6f0b936278e3a3c900bb99d37c'
	UNCENSORED = '/absolute/path/to/your/Qwen3.5-9B-Uncensored-HauhauCS-Aggressive-Q4_K_M.gguf'
	OUTPUT = '/absolute/path/to/your/Qwen3.5-9B-Uncensored-HauhauCS-merged.gguf'

	####################################################################################################

	# Language model tensor prefixes — everything else (v., mm., mtp.*) stays from censored
	LM_PREFIXES = ('blk.', 'token_embd', 'output', 'norm')

	def is_lm_tensor(name):
	return any(name.startswith(p) for p in LM_PREFIXES)

	print('Reading sources...')
	r_cen = GGUFReader(CENSORED)
	r_unc = GGUFReader(UNCENSORED)

	lm_censored = {t.name for t in r_cen.tensors if is_lm_tensor(t.name)}
	lm_uncensored = {t.name for t in r_unc.tensors if is_lm_tensor(t.name)}
	vis_censored = {t.name for t in r_cen.tensors if not is_lm_tensor(t.name)}

	print(f' Censored: {len(r_cen.tensors)} tensors ({len(lm_censored)} LM, {len(vis_censored)} vision/other)')
	print(f' Uncensored: {len(r_unc.tensors)} tensors ({len(lm_uncensored)} LM)')

	only_in_unc = lm_uncensored - lm_censored
	only_in_cen = lm_censored - lm_uncensored
	print(f' LM tensors only in uncensored (will be renamed): {only_in_unc}')
	print(f' LM tensors only in censored (will use censored weights): {only_in_cen}')

	# Build uncensored tensor lookup: also map ssm_dt.bias -> ssm_dt
	unc_tensors = {}
	for t in r_unc.tensors:
	unc_tensors[t.name] = t
	if t.name.endswith('.ssm_dt.bias'):
	# Rename to match censored naming (.ssm_dt)
	renamed = t.name[:-5] # strip '.bias'
	unc_tensors[renamed] = t

	# GGUFWriter is used only for tensor writing — it auto-writes general.architecture
	# KV fields will be injected as raw bytes directly
	writer = GGUFWriter(OUTPUT, arch='qwen35')

	# --- Tensors: LM from uncensored, vision/other from censored ---
	print('Adding tensors...')
	lm_count = vis_count = renamed_count = 0
	for tensor in r_cen.tensors:
	if is_lm_tensor(tensor.name) and tensor.name in unc_tensors:
	t = unc_tensors[tensor.name]
	writer.add_tensor(tensor.name, t.data, raw_dtype=t.tensor_type)
	if t.name != tensor.name:
	renamed_count += 1
	lm_count += 1
	else:
	writer.add_tensor(tensor.name, tensor.data, raw_dtype=tensor.tensor_type)
	vis_count += 1

	print(f' {lm_count} LM tensors from uncensored ({renamed_count} renamed), {vis_count} tensors from censored')

	# --- Build raw KV bytes from censored (skip header pseudo-fields and architecture) ---
	# GGUFWriter auto-adds: GGUF.version, GGUF.tensor_count, GGUF.kv_count, general.architecture
	SKIP_KV = {'GGUF.version', 'GGUF.tensor_count', 'GGUF.kv_count', 'general.architecture'}

	print('Building raw KV bytes from censored...')
	raw_kv = bytearray()
	raw_kv_count = 0
	for name, field in r_cen.fields.items():
	if name in SKIP_KV:
	continue
	raw_kv += b''.join(bytes(p) for p in field.parts)
	raw_kv_count += 1

	print(f' {raw_kv_count} KV fields, {len(raw_kv)} bytes total')

	# Write header (GGUFWriter handles magic, version, tensor_count, kv_count)
	print(f'Writing to {OUTPUT} ...')
	writer.write_header_to_file()

	# Patch kv_count in header to our raw count + 1 (for general.architecture added by writer)
	# Header layout: magic(4) + version(4) + tensor_count(8) + kv_count(8), kv_count at offset 16
	writer.fout[0].seek(16)
	writer.fout[0].write(struct.pack('<Q', raw_kv_count + 1)) # +1 for general.architecture
	writer.fout[0].seek(0, 2) # seek to end

	# Write general.architecture KV (already in writer.kv_data[0] from GGUFWriter.__init__)
	# Use write_kv_data_to_file to emit just that one field
	writer.write_kv_data_to_file()

	# Now append all other KV fields as raw bytes
	writer.fout[0].write(raw_kv)
	writer.fout[0].flush()

	writer.write_tensors_to_file()
	writer.close()
	print('Done!')
No results found