Last active
January 17, 2025 16:02
-
-
Save joltcan/0cdb33f44206a4e0ddcbabbbcf9e28fc to your computer and use it in GitHub Desktop.
Set instagram metadata to images downloaded from Instagram export
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
This script reads a JSON file containing metadata for images and updates the EXIF data of the images accordingly | |
I use it on the output from Instagram's data export feature to update the EXIF data of the images with the metadata | |
so that I can import them in my own photo library and have the correct metadata. | |
Dependencies: pip install pillow piexif | |
Usage: | |
python3 dumpjson.py <path_to_json_file> | |
Instagram export: | |
Visit https://accountscenter.instagram.com/info_and_permissions/dyi/ | |
Download or transfer your information -> Instagram -> "some of your information" -> Content -> \ | |
Download to device -> Date range (all time) -> Format: JSON -> Media Quality (high). | |
Press "Create files" and wait until you get a confirmation email. Download and unpack the zip-file | |
and place this script inside the folder and run as under Usage above. | |
Author: Fredrik Lundhag | |
Email: [email protected] | |
License: MIT | |
""" | |
# JSON processing | |
import json | |
import argparse | |
# Date/time processing | |
from datetime import datetime, timezone | |
import time | |
# exif processing | |
from PIL import Image | |
import piexif | |
from io import BytesIO | |
# system calls | |
import os | |
import sys | |
# Set to True to enable verbose output | |
verbose = False | |
def to_rational(number): | |
"""Convert a floating-point number into a tuple of rational values.""" | |
return int(number * 1000000), 1000000 # Rational value with six decimal places of precision | |
def dms_to_rational(degrees): | |
"""Convert decimal degrees into (degrees, minutes, seconds) rational tuples.""" | |
d = int(degrees) | |
m = int((degrees - d) * 60) | |
s = (degrees - d - m / 60) * 3600 | |
return (to_rational(d), to_rational(m), to_rational(s)) | |
def write_exif_from_json(image_path, exif_data): | |
try: | |
# Open the image | |
img = Image.open(image_path) | |
# Check for existing EXIF data, initialize if missing | |
try: | |
exif_dict = piexif.load(img.info.get("exif", None)) | |
except (piexif.InvalidImageDataError, FileNotFoundError, TypeError): | |
exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None} | |
# Map exif_data to appropriate EXIF tags | |
gps_ifd = {} | |
for entry in exif_data: | |
if "latitude" in entry and "longitude" in entry: | |
gps_ifd[piexif.GPSIFD.GPSLatitudeRef] = b'N' if entry["latitude"] >= 0 else b'S' | |
gps_ifd[piexif.GPSIFD.GPSLatitude] = dms_to_rational(abs(entry["latitude"])) | |
gps_ifd[piexif.GPSIFD.GPSLongitudeRef] = b'E' if entry["longitude"] >= 0 else b'W' | |
gps_ifd[piexif.GPSIFD.GPSLongitude] = dms_to_rational(abs(entry["longitude"])) | |
if "iso" in entry: | |
exif_dict["Exif"][piexif.ExifIFD.ISOSpeedRatings] = int(entry["iso"]) | |
if "aperture" in entry: | |
exif_dict["Exif"][piexif.ExifIFD.ApertureValue] = (int(float(entry["aperture"]) * 10000), 10000) | |
if "shutter_speed" in entry: | |
exif_dict["Exif"][piexif.ExifIFD.ShutterSpeedValue] = (int(float(entry["shutter_speed"]) * 10000), 10000) | |
if "lens_model" in entry: | |
exif_dict["Exif"][piexif.ExifIFD.LensModel] = entry["lens_model"].encode("utf-8") | |
if "date_time_original" in entry: | |
exif_dict["Exif"][piexif.ExifIFD.DateTimeOriginal] = entry["date_time_original"].encode("utf-8") | |
if "title" in entry: | |
# Encode the title as UTF-16 for proper handling of special characters | |
exif_dict["0th"][piexif.ImageIFD.ImageDescription] = entry["title"] | |
if "DateTimeOriginal" in entry: | |
date_time_original = entry["DateTimeOriginal"] | |
exif_dict["Exif"][piexif.ExifIFD.DateTimeOriginal] = date_time_original.encode("utf-8") | |
exif_dict["0th"][piexif.ImageIFD.DateTime] = date_time_original.encode("utf-8") | |
# Add GPS data to the EXIF dictionary | |
if gps_ifd: | |
exif_dict["GPS"] = gps_ifd | |
# Generate a thumbnail | |
thumbnail_size = (160, 160) # Standard thumbnail size | |
img.thumbnail(thumbnail_size) | |
# Save thumbnail to a BytesIO buffer | |
buffer = BytesIO() | |
img.save(buffer, format="JPEG") | |
exif_dict["thumbnail"] = buffer.getvalue() | |
# Dump updated EXIF | |
exif_bytes = piexif.dump(exif_dict) | |
# Save the image back to the same file | |
img_format = img.format # Detect original format (e.g., "JPEG", "PNG", etc.) | |
img = Image.open(image_path) # Reopen the original image to avoid thumbnail resizing issues | |
img.save(image_path, format=img_format, exif=exif_bytes) | |
if verbose: print(f"Updated EXIF for {image_path}") | |
except Exception as e: | |
print(f"Error updating EXIF for {image_path}: {e}") | |
# Set file modification time to the metadata timestamp | |
try: | |
for entry in exif_data: | |
if "DateTimeOriginal" in entry: | |
metadata_timestamp = datetime.strptime(entry["DateTimeOriginal"], "%Y-%m-%d %H:%M:%S") | |
os.utime(image_path, (time.mktime(metadata_timestamp.timetuple()), time.mktime(metadata_timestamp.timetuple()))) | |
if verbose: print(f"Setting file modification time to {metadata_timestamp}") | |
except Exception as e: | |
print(f"Error setting file modification time for {image_path}: {e}") | |
def parse_json(file_path): | |
with open(file_path, 'r', encoding='utf-8') as f: | |
data = json.load(f) | |
# Check if data is a dictionary (single entry) or a list (multiple entries) | |
if isinstance(data, dict): | |
data = [data] # Wrap single entry in a list for uniform handling | |
# Traverse the structure and extract relevant information | |
for item in data: | |
# Extract the parent title, default to "Untitled" if not present | |
try: | |
parent_title = item.get("title", "Untitled") | |
except AttributeError: | |
parent_title = "" | |
for media_item in item.get("media", []): | |
uri = media_item.get("uri", "") | |
if uri: # Only process if a picture is present | |
# Skip non-image files | |
if not uri.lower().endswith((".jpg", ".jpeg", ".png", ".tiff")): | |
if verbose: print(f"Skipping non-image file: {uri}") | |
continue | |
print(f"Processing: {uri}") | |
# Determine the title: use the parent title if the media item has no title | |
title = media_item.get("title", False) or parent_title | |
timestamp = media_item.get("creation_timestamp", 0) | |
human_time = datetime.fromtimestamp(timestamp, timezone.utc).strftime('%Y-%m-%d %H:%M:%S') | |
if verbose: print(f"Timestamp: {human_time}") | |
# Extract EXIF data | |
exif_data = media_item.get("media_metadata", {}).get("photo_metadata", {}).get("exif_data", []) | |
# Extract title | |
if title: | |
exif_data.append({"title": title.strip()}) | |
# Add EXIF DateTimeOriginal | |
exif_data.append({"DateTimeOriginal": human_time}) | |
# Extract latitude and longitudes | |
location = None | |
for exif_entry in exif_data: | |
if "latitude" in exif_entry and "longitude" in exif_entry: | |
location = f"{exif_entry['latitude']}, {exif_entry['longitude']}" | |
break # Use the first valid latitude/longitude pair | |
write_exif_from_json(uri, exif_data) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Parse a JSON file and extract image information.") | |
parser.add_argument("json_file", help="Path to the JSON file.") | |
args = parser.parse_args() | |
parse_json(args.json_file) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment