Skip to content

Instantly share code, notes, and snippets.

@Romern
Created June 10, 2026 06:34
Show Gist options
  • Select an option

  • Save Romern/4131cb00cad2be9b1cb70c0129d668c4 to your computer and use it in GitHub Desktop.

Select an option

Save Romern/4131cb00cad2be9b1cb70c0129d668c4 to your computer and use it in GitHub Desktop.
Wsass dump fix (Claude opus 4.8 high)

WSASS and similar tools dump PPL-protected LSASS by abusing an out-of-date WerFaultSecure.exe (the missing PPL check is the whole point of using the old binary, so a version-matched build is not an option).

On a modern OS that old binary is no longer ABI-compatible with the host's wer.dll / faultrep.dll, and WER's stitched minidump writer mis-drives MiniDumpWriteDump: seeks in the I/O callback are dropped, so memory writes are appended instead of placed. The resulting file keeps a valid MDMP magic but:

  • the MINIDUMP_HEADER's NumberOfStreams / StreamDirectoryRva are stale (they point at offset 0x20, which actually contains SystemInfo stream data);
  • the real stream directory is relocated to the end of the file, and its stream RVAs are logical offsets that don't match the physical layout;
  • process memory is stored as a (VA,size) descriptor array plus a heavily duplicated set of partial passes, interleaved with the WER module-image cache.

Neither pypykatz nor mimikatz can parse it (pypykatz raises a confusing UnicodeDecodeError deep in the minidump library). pypykatz now detects this layout and points you here.

Usage

python3 werfault_decode.py <input.werdump> <output.minidump>
pypykatz lsa minidump <output.minidump>

The decoder rebuilds a standard minidump (SystemInfo + ModuleList + MemoryList), reconstructing the module list from the PE images recovered out of memory

#!/usr/bin/env python3
"""
Decode a WerFaultSecure / WSASS "stitched" LSASS dump into a standard minidump.
Background
----------
The WSASS technique (https://github.com/TwoSevenOneT/WSASS) dumps PPL-protected
LSASS using an out-of-date WerFaultSecure.exe. On a modern OS that binary is no
longer ABI-compatible with the host's wer.dll/faultrep.dll, and the WER
"stitched minidump" writer ends up mis-driving MiniDumpWriteDump: file seeks in
the I/O callback are dropped, so memory writes are *appended* instead of placed.
The result keeps a valid 'MDMP' magic but is otherwise non-standard and cannot be
parsed by pypykatz or mimikatz:
* the MINIDUMP_HEADER's NumberOfStreams / StreamDirectoryRva are stale (point at
offset 0x20, which actually holds SystemInfo stream data);
* the real stream directory is relocated to the end of the file, and its stream
RVAs are *logical* offsets that do not match the physical file layout;
* process memory is written as a (VA,size) descriptor array plus a heavily
duplicated set of partial "passes", interleaved with ~340 MB of WER auxiliary
module-image data.
This tool reconstructs a clean minidump (SystemInfo + ModuleList + MemoryList)
that standard tools can parse. The ModuleList is rebuilt from the PE images found
in the recovered memory. Validated on a Win11 24H2 (Build 26100) sample: the
recovered credentials are byte-identical to a normal MiniDumpWriteDump dump of the
same machine.
Usage: python3 werfault_decode.py <input.werdump> <output.minidump>
"""
import struct, sys, re, bisect
def decode(SRC, DST):
raw = open(SRC, 'rb').read(); sz = len(raw)
u16 = lambda o: struct.unpack('<H', raw[o:o+2])[0]
u32 = lambda o: struct.unpack('<I', raw[o:o+4])[0]
u64 = lambda o: struct.unpack('<Q', raw[o:o+8])[0]
if raw[:4] not in (b'MDMP', b'\x89PNG'):
print("[!] not an MDMP/PNG-magic file, continuing anyway")
# 1) locate the Memory64 (VA,size) descriptor array: longest ascending run of
# 16-byte (VA in user space, page-aligned size) records, searched unaligned.
print("[*] locating memory descriptor array ...")
best = (0, 0)
# scan front region (descriptors live in the metadata area, well before the bulk)
limit = min(sz, 0x200000)
def good(v, d):
return 0x10000 <= v < 0x7fffffffffff and d and d % 0x1000 == 0 and d <= 0x40000000
o = 0x20
while o < limit:
if good(u64(o), u64(o+8)):
c = 0; p = o; prev = 0
while p+16 <= sz:
v = u64(p); d = u64(p+8)
if good(v, d) and v >= prev:
prev = v; c += 1; p += 16
else:
break
if c > best[0]:
best = (c, o)
o += 1 # check every offset; the array is not 8-aligned
run_n, run_off = best
run_end = run_off + run_n*16
# The longest ascending run may include a few tiny leading system regions that are not
# part of the real Memory64 array, shifting all cumulative offsets. Pick the start that
# makes the duplicated-pass BCRYPT key-handle anchors collapse onto the fewest shared
# bases (i.e. self-consistent passes) -- a strong, content-independent signal.
from collections import Counter as _C
key_anchors = [] # (file_off, key_va)
for m in re.finditer(bytes([0x52,0x55,0x55,0x55]), raw): # 'RUUU'
st = m.start()-4
if st < 0 or st+0x28 > sz: continue
kp = u64(st+16)
if raw[st+0x24:st+0x28] == b'KSSM' and 0x1b0000000000 <= kp < 0x200000000000:
key_anchors.append((st, kp-0x20))
def start_quality(start_off):
n = (run_end - start_off)//16
ds_ = [(u64(start_off+k*16), u64(start_off+k*16+8)) for k in range(n)]
cu = [0]
for va, dd in ds_[:-1]: cu.append(cu[-1]+dd)
st_ = [d[0] for d in ds_]
v = _C()
for fo, kva in key_anchors:
i = bisect.bisect_right(st_, kva)-1
if i >= 0 and ds_[i][0] <= kva < ds_[i][0]+ds_[i][1]:
v[fo - cu[i] - (kva-ds_[i][0])] += 1
return v.most_common(1)[0][1] if v else 0
beststart = (-1, run_off)
for s in range(run_off, run_off+0x200, 16):
q = start_quality(s)
if q > beststart[0]:
beststart = (q, s)
DESC = beststart[1]
# Trim leading tiny system pages (e.g. KUSER_SHARED_DATA @0x7ffe0000) that precede the
# real Memory64 array and otherwise skew every cumulative offset.
while DESC+16 <= run_end:
va0 = u64(DESC); sz0 = u64(DESC+8)
if 0x7ffe0000 <= va0 < 0x7fff0000 and sz0 <= 0x2000:
DESC += 16
else:
break
ndesc = (run_end - DESC)//16
descs = [(u64(DESC+k*16), u64(DESC+k*16+8)) for k in range(ndesc)]
cum = [0]
for va, ds in descs[:-1]:
cum.append(cum[-1]+ds)
total = cum[-1] + descs[-1][1]
starts = [d[0] for d in descs]
print(f" descriptor array @0x{DESC:x}: {ndesc} regions, {total/1024/1024:.1f} MB")
def score(fo, size):
if fo < 0 or fo+size > sz: return -1
n = min(size, 0x2000); c = 0
for q in range(fo, fo+n, 8):
v = u64(q)
if (0x1b0000000000 <= v < 0x200000000000 or 0x3000000000 <= v < 0x3500000000
or 0x7ff000000000 <= v < 0x800000000000):
c += 1
return c
# 2) candidate "pass" bases: derived from duplicated copies of a low region, plus
# a module end-blob base found by aligning module first-pages to MZ headers.
print("[*] finding memory-pass bases ...")
# PE images in the file
pe = []
for m in re.finditer(b'MZ', raw):
off = m.start()
if off+0x40 <= sz:
e = u32(off+0x3c)
if 0 < e < 0x1000 and off+e+4 <= sz and raw[off+e:off+e+4] == b'PE\x00\x00':
pe.append(off)
pe_set = set(pe)
# module-first-page descriptors: size 0x1000, high VA
modpages = [k for k in range(ndesc) if descs[k][1] == 0x1000 and descs[k][0] >= 0x7ff000000000]
from collections import Counter
votes = Counter()
for k in modpages:
for po in pe:
b = po - cum[k]
if 0x1000000 <= b < sz:
votes[b] += 1
endblob = votes.most_common(1)[0][0] if votes else 0
# collect duplicated-pass bases from co-located BCRYPT key handles (robust anchors).
# Each pass writes the heap regions in cumulative order at its own base; the EARLIEST
# pass (lowest base) is the complete "block 0" we use for heaps.
def di(va):
i = bisect.bisect_right(starts, va)-1
return i if (i >= 0 and descs[i][0] <= va < descs[i][0]+descs[i][1]) else None
anchor_bases = set()
for m in re.finditer(bytes([0x52,0x55,0x55,0x55]), raw): # 'RUUU'
st = m.start()-4
if st < 0 or st+0x28 > sz: continue
kp = u64(st+16)
if raw[st+0x24:st+0x28] == b'KSSM' and 0x1b0000000000 <= kp < 0x200000000000:
i = di(kp-0x20)
if i is not None:
b = st - cum[i] - ((kp-0x20)-descs[i][0])
if 0 <= b < sz: anchor_bases.add(b)
heapbase = min(anchor_bases) if anchor_bases else (DESC+ndesc*16+0x2000)
bases = sorted(anchor_bases | {heapbase, endblob})
print(f" module end-blob base @0x{endblob:x}; heap base @0x{heapbase:x}; {len(bases)} passes")
# 3) per-region source selection.
# * high-VA module/stack regions come from the end-blob (one coherent late pass);
# * low heap regions come from the first complete pass (heap base);
# * the small band in between (not covered by the first pass) is taken from whichever
# pass gives the most valid intra-dump pointers.
print("[*] selecting per-region sources ...")
modstart = next((k for k in range(ndesc) if descs[k][0] >= 0x7ff000000000), ndesc)
# capacity of the first heap pass = distance to the next copy of the descriptor array
sig = raw[DESC:DESC+32]
nxt = raw.find(sig, DESC+16)
heap_cap = (nxt - heapbase) if nxt != -1 else (cum[modstart] if modstart < ndesc else cum[-1])
src = [0]*ndesc
for k in range(ndesc):
va, ds = descs[k]
if k >= modstart:
src[k] = endblob + cum[k]
elif cum[k] < heap_cap:
src[k] = heapbase + cum[k]
else:
best = (-1, heapbase+cum[k])
for B in bases:
s = score(B+cum[k], ds)
if s > best[0]: best = (s, B+cum[k])
src[k] = best[1]
# 4) rebuild ModuleList from PE images (names from export directory)
def va2off(va):
i = bisect.bisect_right(starts, va)-1
if i >= 0 and descs[i][0] <= va < descs[i][0]+descs[i][1]:
return src[i]+(va-descs[i][0])
return None
def parse_pe(fo, img_va):
if fo is None or raw[fo:fo+2] != b'MZ': return None
e = u32(fo+0x3c)
if not (0 < e < 0x1000) or raw[fo+e:fo+e+4] != b'PE\x00\x00': return None
fh = fo+e+4; ts = u32(fh+4); opt = fh+20
if u16(opt) != 0x20b: return None
soi = u32(opt+0x38); cs = u32(opt+0x40); name = None; er = u32(opt+0x70)
if er:
eo = va2off(img_va+er)
if eo:
no = va2off(img_va+u32(eo+0x0c))
if no:
end = raw.find(b'\x00', no)
try: name = raw[no:end].decode('latin1')
except Exception: name = None
return soi, ts, cs, name
modules = []
for k in range(ndesc):
fo = src[k]
if raw[fo:fo+2] != b'MZ': continue
pe_ = parse_pe(fo, descs[k][0])
if not pe_: continue
soi, ts, cs, name = pe_
modules.append((descs[k][0], soi, ts, cs, name or "mod_%x.dll" % descs[k][0]))
print(f" reconstructed {len(modules)} modules")
# SystemInfo lives near file start; the directory's SystemInfo stream is the 56 bytes
# whose ProcessorArchitecture is sane. We scan a small window for it.
sysinfo = None
for o in range(0x20, 0x400):
arch = u16(o); maj = u32(o+8); build = u32(o+0x10)
if arch in (0, 9) and maj == 10 and 1000 < build < 100000:
sysinfo = bytearray(raw[o:o+56]); break
if sysinfo is None:
sysinfo = bytearray(56); struct.pack_into('<H', sysinfo, 0, 9)
struct.pack_into('<I', sysinfo, 8, 10)
struct.pack_into('<I', sysinfo, 24, 0) # zero CSDVersionRva
# 5) emit standard minidump
out = bytearray(); NS = 3; dir_rva = 32; out += b'\x00'*(dir_rva+NS*12)
sysinfo_rva = len(out); out += sysinfo
modlist_rva = len(out); names_base = modlist_rva+4+len(modules)*108
nb = bytearray(); nrvas = []
for (va, soi, ts, cs, nm) in modules:
nrvas.append(names_base+len(nb)); enc = nm.encode('utf-16-le')
nb += struct.pack('<I', len(enc))+enc+b'\x00\x00'
ml = bytearray(struct.pack('<I', len(modules)))
for (va, soi, ts, cs, nm), nr in zip(modules, nrvas):
r = bytearray(108); struct.pack_into('<QIIII', r, 0, va, soi, cs, ts, nr); ml += r
out += ml; out += nb
memlist_rva = len(out); tbl = memlist_rva+4
out += b'\x00'*(4+ndesc*16)
recs = []
for k in range(ndesc):
va, ds = descs[k]; so = src[k]
data = raw[so:so+ds]
if len(data) < ds: data = data.ljust(ds, b'\x00')
rva = len(out); out += data; recs.append((va, ds, rva))
struct.pack_into('<I', out, memlist_rva, ndesc)
for i, (va, ds, rva) in enumerate(recs):
struct.pack_into('<QII', out, tbl+i*16, va, ds, rva)
struct.pack_into('<I', out, 0, 0x504d444d); struct.pack_into('<I', out, 4, 0xa793)
struct.pack_into('<I', out, 8, NS); struct.pack_into('<I', out, 12, dir_rva)
for i, (t, s, r) in enumerate([(7, len(sysinfo), sysinfo_rva),
(4, len(ml)+len(nb), modlist_rva),
(5, 4+ndesc*16, memlist_rva)]):
struct.pack_into('<III', out, dir_rva+i*12, t, s, r)
open(DST, 'wb').write(out)
print(f"[+] wrote {DST}: {len(out)/1024/1024:.1f} MB ({len(modules)} modules, {ndesc} memory ranges)")
if __name__ == '__main__':
if len(sys.argv) != 3:
print(__doc__); sys.exit(1)
decode(sys.argv[1], sys.argv[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment