jswetzen · February 9, 2022 13:54
diff --git a/joplin-icloud-gmail-import.py b/joplin-icloud-gmail-import.py
 # Import Data from iCloud Notes to Joplin
 # Developed and tested on Ubuntu only
 # 1. Add a gmail connection to Notes
 # 2. Copy all your note folders into that gmail space in Notes
 # 3. Wait for all the notes to sync to your mailbox
 # 4. Download all emails tagged with Notes with takeout.google.com
 # 5. For each of the resulting mbox files, run this script
 # 6. Import the markdown folder (with front matter) into Joplin

 # Requires
 # - bs4
 # - markdownify
 # - python-magic (with required system packages)
 # - Wand (with required system packages)

 # Mailbox file name should be <prefix><name>.mbox
 # Resulting directory will be <name>/
 name = 'Subfolder'
 prefix = 'Notes-'

 import os
 import mailbox
 import email

 import os
 import mailbox
 import email
 from email import policy
 from email.parser import BytesParser
 from dateutil import parser
 from bs4 import BeautifulSoup
 from markdownify import markdownify as md
 import magic
 from wand.image import Image

 try:
    os.mkdir(name)
 except FileExistsError:
    pass

 saved_files = []

 num = 0
 for message in mailbox.mbox(f'{prefix}{name}.mbox', factory=BytesParser(policy=policy.default).parse):
    num+=1
    title, encoding = email.header.decode_header(message['Subject'])[0]
    if encoding:
        title = title.decode(encoding)
    title = title.replace('"', '\\"')
    date = parser.parse(message['Date']).isoformat()
    attachments = {}
    if message.is_multipart():
        html = None
        for part in message.get_payload():
            # print(message)
            content_type = part.get_content_type()
            if content_type == 'text/html':
                if not html:
                    html = BeautifulSoup(part.get_content(), 'html.parser')
                else:
                    print(f'Warning: Multiple html parts for "{title}"')
            elif content_type in ['image/jpeg', 'image/png', 'application/octet-stream']:
                for k in part.keys():
                    pass
                    #print(k, part.get(k))
                filename = part.get_filename()
                savename = str(num) + str(len(attachments)) + filename
                payload = part.get_payload(decode=True)
                if content_type == 'application/octet-stream':
                    mime_type = magic.from_buffer(payload, mime=True)
                    if mime_type in ['image/png', 'image/jpeg', 'image/tiff']:
                        savename += '.png'
                    else:
                        print("Unsupported attachment type: ", mime_type)
                        continue
                saved_files.append(savename)
                with Image(blob=payload) as img:
                    with img.convert('png') as output_img:
                        img.save(filename=f'_resources/{savename}')
                        attachment_id = part.get('Content-Id').strip('<>')
                        attachments[attachment_id] = (filename, savename)
        for obj in html.find_all('object'):
            cid = str(obj['data'])
            if cid.startswith('cid:'):
                cid = cid[4:]
            img = html.new_tag('img')
            img['alt'], img['src'] = attachments.get(cid, ('', ''))
            img['src'] = '../_resources/' + img['src']
            obj.replace_with(img)
        text = md(str(html), header_style='ATX')
    else:
        text = md(message.get_content(), header_style='ATX')
    with open(f'{name}/{num}.md', 'w') as f:
        f.write(f'---\n')
        f.write(f'title: "{title}"\n')
        f.write(f'author: Johan Swetzén\n')
        f.write(f'source_application: iCloud Notes\n')
        f.write(f'created: {date}\n')
        f.write(f'updated: {date}\n')
        f.write(f'---\n\n')
        f.write(text)
	# Import Data from iCloud Notes to Joplin
	# Developed and tested on Ubuntu only
	# 1. Add a gmail connection to Notes
	# 2. Copy all your note folders into that gmail space in Notes
	# 3. Wait for all the notes to sync to your mailbox
	# 4. Download all emails tagged with Notes with takeout.google.com
	# 5. For each of the resulting mbox files, run this script
	# 6. Import the markdown folder (with front matter) into Joplin

	# Requires
	# - bs4
	# - markdownify
	# - python-magic (with required system packages)
	# - Wand (with required system packages)

	# Mailbox file name should be <prefix><name>.mbox
	# Resulting directory will be <name>/
	name = 'Subfolder'
	prefix = 'Notes-'

	import os
	import mailbox
	import email

	import os
	import mailbox
	import email
	from email import policy
	from email.parser import BytesParser
	from dateutil import parser
	from bs4 import BeautifulSoup
	from markdownify import markdownify as md
	import magic
	from wand.image import Image

	try:
	os.mkdir(name)
	except FileExistsError:
	pass

	saved_files = []

	num = 0
	for message in mailbox.mbox(f'{prefix}{name}.mbox', factory=BytesParser(policy=policy.default).parse):
	num+=1
	title, encoding = email.header.decode_header(message['Subject'])[0]
	if encoding:
	title = title.decode(encoding)
	title = title.replace('"', '\\"')
	date = parser.parse(message['Date']).isoformat()
	attachments = {}
	if message.is_multipart():
	html = None
	for part in message.get_payload():
	# print(message)
	content_type = part.get_content_type()
	if content_type == 'text/html':
	if not html:
	html = BeautifulSoup(part.get_content(), 'html.parser')
	else:
	print(f'Warning: Multiple html parts for "{title}"')
	elif content_type in ['image/jpeg', 'image/png', 'application/octet-stream']:
	for k in part.keys():
	pass
	#print(k, part.get(k))
	filename = part.get_filename()
	savename = str(num) + str(len(attachments)) + filename
	payload = part.get_payload(decode=True)
	if content_type == 'application/octet-stream':
	mime_type = magic.from_buffer(payload, mime=True)
	if mime_type in ['image/png', 'image/jpeg', 'image/tiff']:
	savename += '.png'
	else:
	print("Unsupported attachment type: ", mime_type)
	continue
	saved_files.append(savename)
	with Image(blob=payload) as img:
	with img.convert('png') as output_img:
	img.save(filename=f'_resources/{savename}')
	attachment_id = part.get('Content-Id').strip('<>')
	attachments[attachment_id] = (filename, savename)
	for obj in html.find_all('object'):
	cid = str(obj['data'])
	if cid.startswith('cid:'):
	cid = cid[4:]
	img = html.new_tag('img')
	img['alt'], img['src'] = attachments.get(cid, ('', ''))
	img['src'] = '../_resources/' + img['src']
	obj.replace_with(img)
	text = md(str(html), header_style='ATX')
	else:
	text = md(message.get_content(), header_style='ATX')
	with open(f'{name}/{num}.md', 'w') as f:
	f.write(f'---\n')
	f.write(f'title: "{title}"\n')
	f.write(f'author: Johan Swetzén\n')
	f.write(f'source_application: iCloud Notes\n')
	f.write(f'created: {date}\n')
	f.write(f'updated: {date}\n')
	f.write(f'---\n\n')
	f.write(text)
No results found