Last active
August 12, 2017 12:30
-
-
Save CarstVaartjes/bbeede5b4212aba72457e3dc32b86905 to your computer and use it in GitHub Desktop.
Deduplicate photo directories based on the basename + size
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install exifread | |
import os | |
import shutil | |
import exifread | |
def get_date(filename): | |
try: | |
with open(filename, 'rb') as fh: | |
tags = exifread.process_file(fh, stop_tag="Image DateTime") | |
dateTaken = tags["Image DateTime"] | |
dateTaken = dateTaken.values[:10].replace(':', '') | |
return dateTaken | |
except KeyError: | |
return 'unknown' | |
existing_size = {} | |
for dirpath, dirnames, filenames in os.walk("."): | |
for filename in [f for f in filenames if f[0] != '.' and | |
f[-4:].upper() in ['.JPG', '.GIF', '.PNG', '.MOV', '.AVI', 'RAW']]: | |
full_name = os.path.join(dirpath, filename) | |
print(full_name) | |
size = os.path.getsize(full_name) | |
datestamp = get_date(full_name) | |
try: | |
basename = filename.split(' ')[0].split('.')[0] + '_' + datestamp + '_' + str(size) | |
except UnicodeDecodeError: | |
continue | |
if basename not in existing_size: | |
existing_size[basename] = [] | |
existing_size[basename].append(full_name) | |
existing_size = {k: v for k, v in existing_size.items() if len(v) > 1} | |
remove = False | |
for file_list in existing_size.values(): | |
rem_list = file_list[:-1] | |
print(rem_list, file_list[-1]) | |
if remove: | |
for filename in rem_list: | |
os.remove(filename) | |
# now for real: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment