Skip to content

Instantly share code, notes, and snippets.

@joltcan
Last active January 17, 2025 16:02
Show Gist options
  • Save joltcan/0cdb33f44206a4e0ddcbabbbcf9e28fc to your computer and use it in GitHub Desktop.
Save joltcan/0cdb33f44206a4e0ddcbabbbcf9e28fc to your computer and use it in GitHub Desktop.
Set instagram metadata to images downloaded from Instagram export
#!/usr/bin/env python3
"""
This script reads a JSON file containing metadata for images and updates the EXIF data of the images accordingly
I use it on the output from Instagram's data export feature to update the EXIF data of the images with the metadata
so that I can import them in my own photo library and have the correct metadata.
Dependencies: pip install pillow piexif
Usage:
python3 dumpjson.py <path_to_json_file>
Instagram export:
Visit https://accountscenter.instagram.com/info_and_permissions/dyi/
Download or transfer your information -> Instagram -> "some of your information" -> Content -> \
Download to device -> Date range (all time) -> Format: JSON -> Media Quality (high).
Press "Create files" and wait until you get a confirmation email. Download and unpack the zip-file
and place this script inside the folder and run as under Usage above.
Author: Fredrik Lundhag
Email: [email protected]
License: MIT
"""
# JSON processing
import json
import argparse
# Date/time processing
from datetime import datetime, timezone
import time
# exif processing
from PIL import Image
import piexif
from io import BytesIO
# system calls
import os
import sys
# Set to True to enable verbose output
verbose = False
def to_rational(number):
"""Convert a floating-point number into a tuple of rational values."""
return int(number * 1000000), 1000000 # Rational value with six decimal places of precision
def dms_to_rational(degrees):
"""Convert decimal degrees into (degrees, minutes, seconds) rational tuples."""
d = int(degrees)
m = int((degrees - d) * 60)
s = (degrees - d - m / 60) * 3600
return (to_rational(d), to_rational(m), to_rational(s))
def write_exif_from_json(image_path, exif_data):
try:
# Open the image
img = Image.open(image_path)
# Check for existing EXIF data, initialize if missing
try:
exif_dict = piexif.load(img.info.get("exif", None))
except (piexif.InvalidImageDataError, FileNotFoundError, TypeError):
exif_dict = {"0th": {}, "Exif": {}, "GPS": {}, "1st": {}, "thumbnail": None}
# Map exif_data to appropriate EXIF tags
gps_ifd = {}
for entry in exif_data:
if "latitude" in entry and "longitude" in entry:
gps_ifd[piexif.GPSIFD.GPSLatitudeRef] = b'N' if entry["latitude"] >= 0 else b'S'
gps_ifd[piexif.GPSIFD.GPSLatitude] = dms_to_rational(abs(entry["latitude"]))
gps_ifd[piexif.GPSIFD.GPSLongitudeRef] = b'E' if entry["longitude"] >= 0 else b'W'
gps_ifd[piexif.GPSIFD.GPSLongitude] = dms_to_rational(abs(entry["longitude"]))
if "iso" in entry:
exif_dict["Exif"][piexif.ExifIFD.ISOSpeedRatings] = int(entry["iso"])
if "aperture" in entry:
exif_dict["Exif"][piexif.ExifIFD.ApertureValue] = (int(float(entry["aperture"]) * 10000), 10000)
if "shutter_speed" in entry:
exif_dict["Exif"][piexif.ExifIFD.ShutterSpeedValue] = (int(float(entry["shutter_speed"]) * 10000), 10000)
if "lens_model" in entry:
exif_dict["Exif"][piexif.ExifIFD.LensModel] = entry["lens_model"].encode("utf-8")
if "date_time_original" in entry:
exif_dict["Exif"][piexif.ExifIFD.DateTimeOriginal] = entry["date_time_original"].encode("utf-8")
if "title" in entry:
# Encode the title as UTF-16 for proper handling of special characters
exif_dict["0th"][piexif.ImageIFD.ImageDescription] = entry["title"]
if "DateTimeOriginal" in entry:
date_time_original = entry["DateTimeOriginal"]
exif_dict["Exif"][piexif.ExifIFD.DateTimeOriginal] = date_time_original.encode("utf-8")
exif_dict["0th"][piexif.ImageIFD.DateTime] = date_time_original.encode("utf-8")
# Add GPS data to the EXIF dictionary
if gps_ifd:
exif_dict["GPS"] = gps_ifd
# Generate a thumbnail
thumbnail_size = (160, 160) # Standard thumbnail size
img.thumbnail(thumbnail_size)
# Save thumbnail to a BytesIO buffer
buffer = BytesIO()
img.save(buffer, format="JPEG")
exif_dict["thumbnail"] = buffer.getvalue()
# Dump updated EXIF
exif_bytes = piexif.dump(exif_dict)
# Save the image back to the same file
img_format = img.format # Detect original format (e.g., "JPEG", "PNG", etc.)
img = Image.open(image_path) # Reopen the original image to avoid thumbnail resizing issues
img.save(image_path, format=img_format, exif=exif_bytes)
if verbose: print(f"Updated EXIF for {image_path}")
except Exception as e:
print(f"Error updating EXIF for {image_path}: {e}")
# Set file modification time to the metadata timestamp
try:
for entry in exif_data:
if "DateTimeOriginal" in entry:
metadata_timestamp = datetime.strptime(entry["DateTimeOriginal"], "%Y-%m-%d %H:%M:%S")
os.utime(image_path, (time.mktime(metadata_timestamp.timetuple()), time.mktime(metadata_timestamp.timetuple())))
if verbose: print(f"Setting file modification time to {metadata_timestamp}")
except Exception as e:
print(f"Error setting file modification time for {image_path}: {e}")
def parse_json(file_path):
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Check if data is a dictionary (single entry) or a list (multiple entries)
if isinstance(data, dict):
data = [data] # Wrap single entry in a list for uniform handling
# Traverse the structure and extract relevant information
for item in data:
# Extract the parent title, default to "Untitled" if not present
try:
parent_title = item.get("title", "Untitled")
except AttributeError:
parent_title = ""
for media_item in item.get("media", []):
uri = media_item.get("uri", "")
if uri: # Only process if a picture is present
# Skip non-image files
if not uri.lower().endswith((".jpg", ".jpeg", ".png", ".tiff")):
if verbose: print(f"Skipping non-image file: {uri}")
continue
print(f"Processing: {uri}")
# Determine the title: use the parent title if the media item has no title
title = media_item.get("title", False) or parent_title
timestamp = media_item.get("creation_timestamp", 0)
human_time = datetime.fromtimestamp(timestamp, timezone.utc).strftime('%Y-%m-%d %H:%M:%S')
if verbose: print(f"Timestamp: {human_time}")
# Extract EXIF data
exif_data = media_item.get("media_metadata", {}).get("photo_metadata", {}).get("exif_data", [])
# Extract title
if title:
exif_data.append({"title": title.strip()})
# Add EXIF DateTimeOriginal
exif_data.append({"DateTimeOriginal": human_time})
# Extract latitude and longitudes
location = None
for exif_entry in exif_data:
if "latitude" in exif_entry and "longitude" in exif_entry:
location = f"{exif_entry['latitude']}, {exif_entry['longitude']}"
break # Use the first valid latitude/longitude pair
write_exif_from_json(uri, exif_data)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Parse a JSON file and extract image information.")
parser.add_argument("json_file", help="Path to the JSON file.")
args = parser.parse_args()
parse_json(args.json_file)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment