Created
February 9, 2022 13:54
-
-
Save jswetzen/edd7d553079082df1d440ab07b522187 to your computer and use it in GitHub Desktop.
Python script for importing iCloud notes into Joplin via Gmail
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Import Data from iCloud Notes to Joplin | |
# Developed and tested on Ubuntu only | |
# 1. Add a gmail connection to Notes | |
# 2. Copy all your note folders into that gmail space in Notes | |
# 3. Wait for all the notes to sync to your mailbox | |
# 4. Download all emails tagged with Notes with takeout.google.com | |
# 5. For each of the resulting mbox files, run this script | |
# 6. Import the markdown folder (with front matter) into Joplin | |
# Requires | |
# - bs4 | |
# - markdownify | |
# - python-magic (with required system packages) | |
# - Wand (with required system packages) | |
# Mailbox file name should be <prefix><name>.mbox | |
# Resulting directory will be <name>/ | |
name = 'Subfolder' | |
prefix = 'Notes-' | |
import os | |
import mailbox | |
import email | |
import os | |
import mailbox | |
import email | |
from email import policy | |
from email.parser import BytesParser | |
from dateutil import parser | |
from bs4 import BeautifulSoup | |
from markdownify import markdownify as md | |
import magic | |
from wand.image import Image | |
try: | |
os.mkdir(name) | |
except FileExistsError: | |
pass | |
saved_files = [] | |
num = 0 | |
for message in mailbox.mbox(f'{prefix}{name}.mbox', factory=BytesParser(policy=policy.default).parse): | |
num+=1 | |
title, encoding = email.header.decode_header(message['Subject'])[0] | |
if encoding: | |
title = title.decode(encoding) | |
title = title.replace('"', '\\"') | |
date = parser.parse(message['Date']).isoformat() | |
attachments = {} | |
if message.is_multipart(): | |
html = None | |
for part in message.get_payload(): | |
# print(message) | |
content_type = part.get_content_type() | |
if content_type == 'text/html': | |
if not html: | |
html = BeautifulSoup(part.get_content(), 'html.parser') | |
else: | |
print(f'Warning: Multiple html parts for "{title}"') | |
elif content_type in ['image/jpeg', 'image/png', 'application/octet-stream']: | |
for k in part.keys(): | |
pass | |
#print(k, part.get(k)) | |
filename = part.get_filename() | |
savename = str(num) + str(len(attachments)) + filename | |
payload = part.get_payload(decode=True) | |
if content_type == 'application/octet-stream': | |
mime_type = magic.from_buffer(payload, mime=True) | |
if mime_type in ['image/png', 'image/jpeg', 'image/tiff']: | |
savename += '.png' | |
else: | |
print("Unsupported attachment type: ", mime_type) | |
continue | |
saved_files.append(savename) | |
with Image(blob=payload) as img: | |
with img.convert('png') as output_img: | |
img.save(filename=f'_resources/{savename}') | |
attachment_id = part.get('Content-Id').strip('<>') | |
attachments[attachment_id] = (filename, savename) | |
for obj in html.find_all('object'): | |
cid = str(obj['data']) | |
if cid.startswith('cid:'): | |
cid = cid[4:] | |
img = html.new_tag('img') | |
img['alt'], img['src'] = attachments.get(cid, ('', '')) | |
img['src'] = '../_resources/' + img['src'] | |
obj.replace_with(img) | |
text = md(str(html), header_style='ATX') | |
else: | |
text = md(message.get_content(), header_style='ATX') | |
with open(f'{name}/{num}.md', 'w') as f: | |
f.write(f'---\n') | |
f.write(f'title: "{title}"\n') | |
f.write(f'author: Johan Swetzén\n') | |
f.write(f'source_application: iCloud Notes\n') | |
f.write(f'created: {date}\n') | |
f.write(f'updated: {date}\n') | |
f.write(f'---\n\n') | |
f.write(text) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment