-
-
Save itemir/f5bc9fded6483cd79c89ebf4ca1cfd30 to your computer and use it in GitHub Desktop.
#!/usr/bin/python | |
import argparse | |
import hashlib | |
import sys | |
def md5(f, count): | |
hash_md5 = hashlib.md5() | |
eof = False | |
for i in range(count * 16): | |
chunk = f.read(65536) | |
if chunk == '': | |
eof = True | |
break | |
hash_md5.update(chunk) | |
return (hash_md5.digest(), eof) | |
parser = argparse.ArgumentParser() | |
parser.add_argument('filename', | |
help='File that will be used to calculate the MD5 sum on') | |
parser.add_argument('partsize', | |
type=int, | |
help='Size of individual parts in (MiB)') | |
parser.add_argument('--base64', | |
action='store_true', | |
help='Display in base64 instead of hexadecimal') | |
cli_options = parser.parse_args() | |
try: | |
f=open(cli_options.filename, 'rb') | |
except IOError: | |
print 'Cannot open file' | |
sys.exit(1) | |
eof = False | |
hash_list = [] | |
while eof == False: | |
(md5_hash, eof) = md5(f, cli_options.partsize) | |
hash_list.append(md5_hash) | |
f.close() | |
multipart_hash = hashlib.md5(''.join(hash_list)).hexdigest() | |
if cli_options.base64 == True: | |
multipart_hash = multipart_hash.decode('hex').encode('base64').strip() | |
print '%s-%d' % (multipart_hash, len(hash_list)) |
hello,
How can i use this py?
Updated for Python 3. Added verbose option to show md5 checksums in hex for each part of the multipart.
https://gist.github.com/kevco-us/cb408aa4123112a40428c974d74f8918
Updated for Python 3. Added verbose option to show md5 checksums in hex for each part of the multipart. https://gist.github.com/kevco-us/cb408aa4123112a40428c974d74f8918
Thank you for this.
The Linux based options like s3md5 didn't work for me. Even with WSL Linux installed it wouldn't see my external drive which I had some very large files I needed to check.
But I can run Python in my Windows PowerShell and that worked well.
Hi
There is a an issue with this script. The script won't show the correct Hash for a file when --> module(<FILE_SIZE>,<PART_SIZE>) = 0
For example a 2GiB file with a part_size of 256MiB
This is due to the exit condition on the function md5
if chunk == '':
eof = True
break
In order to fix this the function needs to be changed to :
import os
def md5(f, count):
hash_md5 = hashlib.md5()
eof = False
file_size=os.fstat(f.fileno()).st_size
for i in range(count * 16):
chunk = f.read(65536)
hash_md5.update(chunk)
if f.tell() == file_size:
eof = True
break
return (hash_md5.digest(), eof)
Created a version in java here : https://gist.github.com/matthewshannon/891ac1359a540fd8322e6049f153c1b7