Created
April 11, 2020 07:35
-
-
Save AyeGill/72744b0e54ebf898268beb8c992cabd5 to your computer and use it in GitHub Desktop.
Fix roam files exported to org
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import urllib as ul | |
import re | |
import sys | |
import os.path | |
#Pandoc turns this: [foo]([[My Foo Page]]) | |
#Into this: [[file:%5B%5BMy%20Foo%20Page%5B%5B][foo]] | |
#We want this: [[file:My Foo Page.org][foo]] | |
#So we process and "un-escape the escaped characters". | |
#Then we add .org to links and remove the superfluous brackets | |
#This may break some links with %s in them - sorry. | |
#Also breaks file links to non-org files. | |
slink_re = r"\[\[([^\[]*?)\]\]" #unaliased link | |
alink_re = r"\[\[file\:(.*?)\]\[(.*?)\]\]" | |
def slinkfixr(matchobj): | |
if matchobj.group(1)[:4] == "http": | |
return matchobj.group(0) #the entire thing | |
return "[[file:" + matchobj.group(1) + "][" + matchobj.group(1) + "]]" | |
#No need to "unquote" these. | |
#We will add the ".org" in the next step, so don't do it here. | |
def alinkfixr(matchobj): | |
return "[[file:" + ul.unquote(matchobj.group(1)).strip("[]") + ".org][" + matchobj.group(2) + "]]" | |
def fix_links(text): | |
t1 = re.sub(slink_re, slinkfixr, text) | |
t2 = re.sub(alink_re, alinkfixr, t1) | |
return t2 | |
def run(filename): | |
f = open(filename, "r+") | |
old = f.read() | |
new = fix_links(old) | |
print(new) | |
f.seek(0) | |
f.write(new) | |
def dryrun(filename): | |
f = open(filename, "r") | |
old = f.read() | |
new = fix_links(old) | |
print(new) | |
for filename in sys.argv[1:]: | |
if os.path.isfile(filename): | |
run(filename) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment