Last active
June 18, 2025 11:15
-
-
Save itemir/f5bc9fded6483cd79c89ebf4ca1cfd30 to your computer and use it in GitHub Desktop.
Python script to calculate MD5 hash of a multipart uploaded file (relevant for Object Storages like OCI Object Storage or AWS S3)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import argparse | |
import hashlib | |
import sys | |
def md5(f, count): | |
hash_md5 = hashlib.md5() | |
eof = False | |
for i in range(count * 16): | |
chunk = f.read(65536) | |
if chunk == '': | |
eof = True | |
break | |
hash_md5.update(chunk) | |
return (hash_md5.digest(), eof) | |
parser = argparse.ArgumentParser() | |
parser.add_argument('filename', | |
help='File that will be used to calculate the MD5 sum on') | |
parser.add_argument('partsize', | |
type=int, | |
help='Size of individual parts in (MiB)') | |
parser.add_argument('--base64', | |
action='store_true', | |
help='Display in base64 instead of hexadecimal') | |
cli_options = parser.parse_args() | |
try: | |
f=open(cli_options.filename, 'rb') | |
except IOError: | |
print 'Cannot open file' | |
sys.exit(1) | |
eof = False | |
hash_list = [] | |
while eof == False: | |
(md5_hash, eof) = md5(f, cli_options.partsize) | |
hash_list.append(md5_hash) | |
f.close() | |
multipart_hash = hashlib.md5(''.join(hash_list)).hexdigest() | |
if cli_options.base64 == True: | |
multipart_hash = multipart_hash.decode('hex').encode('base64').strip() | |
print '%s-%d' % (multipart_hash, len(hash_list)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi
There is a an issue with this script. The script won't show the correct Hash for a file when --> module(<FILE_SIZE>,<PART_SIZE>) = 0
For example a 2GiB file with a part_size of 256MiB
This is due to the exit condition on the function md5
In order to fix this the function needs to be changed to :