Created
December 15, 2019 12:46
-
-
Save fengye/92708a1ae2102fcbf722165d67418396 to your computer and use it in GitHub Desktop.
Parse Google Photo takeout and categories them into date-named folders
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from glob import glob | |
import os.path | |
import os | |
import re | |
import json | |
import datetime | |
import shutil | |
class PhotoEntry: | |
def __init__(self, dirname, filename, json_filename): | |
self.dirname = dirname | |
self.filename = filename | |
self.json_filename = json_filename | |
self.parsed = False | |
def getPhotoFullname(self): | |
return os.path.join(self.dirname, self.filename) | |
def getJsonFullname(self): | |
return os.path.join(self.dirname, self.json_filename) | |
def parse(self): | |
json_filename = self.getJsonFullname() | |
with open(json_filename) as f: | |
self.data = json.load(f) | |
self.parsed = True | |
def isParsed(self): | |
return self.parsed and self.data != None | |
def getPhotoTakenTimeFormatted(self): | |
if not self.isParsed(): | |
return None | |
else: | |
return self.data["photoTakenTime"]["formatted"] | |
def getPhotoTakenTimestamp(self): | |
if not self.isParsed(): | |
return None | |
else: | |
return self.data["photoTakenTime"]["timestamp"] | |
def getPhotoTakenTime(self): | |
if not self.isParsed(): | |
return None | |
else: | |
return datetime.datetime.fromtimestamp(int(self.data["photoTakenTime"]["timestamp"]), datetime.timezone.utc) | |
def main(): | |
print("Google Takeout - Photos JSON parser and categoriser") | |
# list all the .json files | |
filenames = glob("./original_data/*.json", recursive=True) | |
error_filenames = [] | |
entries = [] | |
for json_filename in filenames: | |
dirname = os.path.dirname(json_filename) | |
short_json_filename = os.path.basename(json_filename) | |
version = None | |
error = False | |
m = re.search('^([a-zA-Z0-9\s_\\.\-]+)(\([0-9]+\))*(.json)$', short_json_filename) | |
if m[0] != None and m[3] == '.json': | |
if m[2] != None: | |
mv = re.search('^\(([0-9]+)\)$', m[2]) | |
if mv[0] != None: | |
version = int(mv[1]) | |
else: | |
error = True | |
else: | |
error = True | |
if error: | |
print("Unrecognised file: " + json_filename) | |
error_filenames.append(json_filename) | |
else: | |
if version == None: | |
photo_filename = m[1] | |
else: | |
photo_original_filename = m[1] | |
names = os.path.splitext(photo_original_filename) | |
extension = names[-1] | |
photo_filename_list = [''.join(names[0:-1]), "(", str(version), ")", names[-1]] | |
photo_filename = ''.join(photo_filename_list) | |
if os.path.exists(os.path.join(dirname, photo_filename)): | |
print("Found: " + photo_filename + " -> " + short_json_filename) | |
entries.append(PhotoEntry(dirname, photo_filename, short_json_filename)) | |
else: | |
print("[ERROR] Doesn't exists: " + photo_filename + " -> " + short_json_filename) | |
error_filenames.append(json_filename) | |
# parse each valid json | |
target_dir = "./organised_data" | |
for photo_entry in entries: | |
photo_entry.parse() | |
photo_time = photo_entry.getPhotoTakenTime() | |
print("Processing " + photo_entry.filename + " @ " + str(photo_time)) | |
datefolder = photo_time.date().isoformat() | |
if not os.path.exists(os.path.join(target_dir, datefolder)): | |
os.mkdir(os.path.join(target_dir, datefolder)) | |
print("Copy \"" + photo_entry.filename + "\" to \"" + datefolder + "\"...") | |
shutil.copyfile(photo_entry.getPhotoFullname(), os.path.join(target_dir, datefolder, photo_entry.filename)) | |
print("Copy \"" + photo_entry.json_filename + "\" to \"" + datefolder + "\"...") | |
shutil.copyfile(photo_entry.getJsonFullname(), os.path.join(target_dir, datefolder, photo_entry.json_filename)) | |
print("Processed " + str(len(entries)) + " files") | |
if len(error_filenames) > 0: | |
with open("error.txt", "w") as f: | |
for error_filename in error_filenames: | |
f.write(error_filename + "\n") | |
print("All errors has been written to error.txt") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment