Created
March 3, 2019 13:36
-
-
Save crepererum/137de687af0f9d8301e907e735d2f516 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
import gzip | |
import json | |
import os | |
import os.path | |
import urllib.request | |
EXPORT_FILE = 'crepererum-net.ghost.2019-02-23.json' | |
OUT_PATH = 'out' | |
SITE_URL = 'https://crepererum.net' | |
TEMPLATE = '''+++ | |
title = "{title}" | |
date = {date} | |
[taxonomies] | |
categories = ["blog"] | |
tags = [{tags}] | |
+++ | |
{markdown} | |
''' | |
def get_markdown(post): | |
mobiledoc = json.loads(post['mobiledoc']) | |
cards = mobiledoc['cards'] | |
if len(cards) != 1: | |
return None | |
assert len(cards) == 1 | |
card0 = cards[0] | |
assert len(card0) == 2 | |
ctype, cdata = card0 | |
assert ctype == 'card-markdown' | |
return cdata['markdown'] | |
def download_uncompressed(url, target): | |
response = urllib.request.urlopen(url) | |
data = response.read() | |
if data.startswith(b'\x1f\x8b'): | |
data = gzip.decompress(data) | |
with open(target, 'wb') as fp: | |
fp.write(data) | |
def fetch_images(md, basedir): | |
while True: | |
start = md.find('/content/images') | |
if start == -1: | |
break | |
ends = ( | |
md.find(marker, start) | |
for marker in (')', '"') | |
) | |
end = min(( | |
p | |
for p in ends | |
if p != -1 | |
)) | |
path = md[start:end] | |
basename = os.path.basename(path) | |
outpath = os.path.join(basedir, basename) | |
url = SITE_URL + path | |
print(f' fetch {url}') | |
download_uncompressed(url, outpath) | |
md = md[:start] + basename + md[end:] | |
return md | |
def get_date(post): | |
return post['published_at'].split(' ')[0] | |
def process_post(post, tag_dict): | |
slug = post['slug'] | |
print(f'converting {slug}:') | |
status = post['status'] | |
if status != 'published': | |
print(' skipping (not published)') | |
return | |
pid = post['id'] | |
tags = tag_dict.get(pid, []) | |
title = post['title'] | |
md = get_markdown(post) | |
if md is None: | |
print('skipped (not markdown)') | |
return | |
date = get_date(post) | |
basedir = os.path.join( | |
OUT_PATH, | |
slug, | |
) | |
try: | |
os.makedirs(basedir) | |
except FileExistsError: | |
pass | |
md = fetch_images(md, basedir) | |
index_md = TEMPLATE.format( | |
markdown=md, | |
date=date, | |
title=title, | |
tags=', '.join( | |
f'"{t}"' | |
for t in sorted(tags) | |
) | |
) | |
with open(os.path.join(basedir, 'index.md'), 'w') as fp: | |
fp.write(index_md) | |
print(' done') | |
def process_tags(db0_data): | |
tag_ids = { | |
tag['id']: tag['name'] | |
for tag in db0_data['tags'] | |
} | |
tag_dict = defaultdict(list) | |
for relation in db0_data['posts_tags']: | |
tag_dict[relation['post_id']].append(tag_ids[relation['tag_id']]) | |
return dict(tag_dict) | |
with open(EXPORT_FILE) as fp: | |
data = json.load(fp) | |
db0 = data['db'][0] | |
db0_data = db0['data'] | |
posts = db0_data['posts'] | |
tag_dict = process_tags(db0_data) | |
for post in posts: | |
process_post(post, tag_dict) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment