Created
February 9, 2025 05:01
-
-
Save Hipnosis183/542123a6a61e84e71d1b1c3c9cd596e2 to your computer and use it in GitHub Desktop.
Remove Metadata from ZIP Files
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Based on https://github.com/thypon/python-stripzip | |
import mmap | |
import os | |
import sys | |
from struct import Struct | |
# Remove zip metadata. | |
def zip_strip(zip): | |
# Remove extra data. | |
def remove_extra(mm, offset, length, compressed_size = 0): | |
extra_header_struct = Struct('<HH') | |
zip64_extra_struct = Struct('<HHQQ') | |
mlen = offset + length | |
while offset < mlen: | |
values = list(extra_header_struct.unpack_from(mm, offset)) | |
_, header_length = values | |
extra_struct = Struct('<HH' + 'B'*header_length) | |
values = list(extra_struct.unpack_from(mm, offset)) | |
header_id, header_length, rest = values[0], values[1], values[2:] | |
# Clear metadata. | |
if header_id in (0x5455, 0x7875, 0x0a, 0x0A): | |
values[0] = 0x0000 | |
for i in range(2, len(values)): | |
values[i] = 0x00 | |
extra_struct.pack_into(mm, offset, *values) | |
if header_id == 0x0001: | |
values = list(zip64_extra_struct.unpack_from(mm, offset)) | |
header_id, header_length, uncompressed_size, compressed_size = values | |
offset += extra_header_struct.size + header_length | |
return compressed_size | |
FILE_HEADER_SIGNATURE = 0x04034b50 | |
CENDIR_HEADER_SIGNATURE = 0x02014b50 | |
signature_struct = Struct('<L') | |
local_file_header_struct = Struct('<LHHHHHLLLHH') | |
central_directory_header_struct = Struct('<LHHHHHHLLLHHHHHLL') | |
archive_size = os.fstat(zip.fileno()).st_size | |
mm = mmap.mmap(zip.fileno(), 0) | |
offset = 0 | |
while offset < archive_size: | |
if signature_struct.unpack_from(mm, offset) != (FILE_HEADER_SIGNATURE,): | |
break | |
values = list(local_file_header_struct.unpack_from(mm, offset)) | |
_, _, _, _, _, _, _, compressed_size, _, name_length, extra_field_length = values | |
# Reset date and time values. | |
values[4] = 0 | |
values[5] = 0 | |
local_file_header_struct.pack_into(mm, offset, *values) | |
offset += local_file_header_struct.size + name_length | |
if extra_field_length != 0: | |
compressed_size = remove_extra( | |
mm, offset, extra_field_length, compressed_size) | |
offset += compressed_size + extra_field_length | |
while offset < archive_size: | |
if signature_struct.unpack_from(mm, offset) != (CENDIR_HEADER_SIGNATURE,): | |
break | |
values = list(central_directory_header_struct.unpack_from(mm, offset)) | |
_, _, _, _, _, _, _, _, _, _, file_name_length, extra_field_length, file_comment_length, _, _, _, _ = values | |
# Reset date and time values. | |
values[5] = 0 | |
values[6] = 0 | |
central_directory_header_struct.pack_into(mm, offset, *values) | |
offset += central_directory_header_struct.size + \ | |
file_name_length + extra_field_length + file_comment_length | |
if extra_field_length != 0: | |
remove_extra(mm, offset-extra_field_length, extra_field_length) | |
# Abort if file is invalid. | |
if offset == 0: | |
print('Invalid zip file.') | |
sys.exit(1) | |
def main(): | |
if len(sys.argv) < 2: | |
print('Select a zip file.') | |
return | |
# Process zip file. | |
zip = open(sys.argv[1], 'r+b') | |
zip_strip(zip) | |
zip.close() | |
# Run script. | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment