Created
July 16, 2023 15:30
-
-
Save almet/1999c83414d753c281d0a89f536c8f1d to your computer and use it in GitHub Desktop.
Obsidian Pelican
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""A pelican plugin to read Obsidian files and import them as pelican Articles. | |
This reads the tags made with hashtags and render them as pelican tags instead | |
(they won't be present in the output). | |
Adds the title in the output, and specify a default dummy date. | |
""" | |
from pathlib import Path | |
from itertools import chain | |
import os | |
import re | |
from pelican import signals | |
from pelican.readers import MarkdownReader | |
from pelican.contents import Tag | |
from pelican.utils import pelican_open | |
from pelican.utils import get_date, slugify | |
from markdown import Markdown | |
ARTICLE_PATHS = {} | |
FILE_PATHS = {} | |
link = r'\[\[\s*(?P<filename>[^|\]]+)(\|\s*(?P<linkname>.+))?\]\]' | |
file_re = re.compile(r'!' + link) | |
link_re = re.compile(link) | |
tag_re = re.compile(r'#([\w]+)') | |
""" | |
# Test cases | |
[[my link]] | |
[[ my work ]] | |
[[ my work | is finished ]] | |
![[ a file.jpg ]] | |
![[file.jpg]] | |
""" | |
def get_file_and_linkname(match): | |
group = match.groupdict() | |
filename = group['filename'].strip() | |
linkname = group['linkname'] if group['linkname'] else filename | |
linkname = linkname.strip() | |
return filename, linkname | |
class ObsidianMarkdownReader(MarkdownReader): | |
""" | |
Change the format of various links to the accepted case of pelican. | |
""" | |
def __init__(self, *args, **kwargs): | |
super(ObsidianMarkdownReader, self).__init__(*args, **kwargs) | |
self.settings["MARKDOWN"]["extensions"].append("markdown.extensions.toc") | |
self.settings["MARKDOWN"]["extensions"].append("sane_lists") | |
def replace_obsidian_links(self, text): | |
def link_replacement(match): | |
filename, linkname = get_file_and_linkname(match) | |
path = ARTICLE_PATHS.get(filename) | |
if path: | |
link_structure = '[{linkname}]({{filename}}{path}{filename}.md)'.format( | |
linkname=linkname, path=path, filename=filename | |
) | |
else: | |
link_structure = '{linkname}'.format(linkname=linkname) | |
return link_structure | |
def file_replacement(match): | |
filename, linkname = get_file_and_linkname(match) | |
path = FILE_PATHS.get(filename) | |
if path: | |
link_structure = ''.format( | |
linkname=linkname, path=path, filename=filename | |
) | |
else: | |
# don't show it at all since it will be broken | |
link_structure = '' | |
return link_structure | |
text = file_re.sub(file_replacement, text) | |
text = link_re.sub(link_replacement, text) | |
return text | |
def replace_tags(self, text): | |
return re.sub(r'#([\w]+)', r'', text) | |
def get_tags(self, text): | |
return re.findall(r'#([\w]+)', text) | |
def read(self, source_path): | |
"""Parse content and metadata of markdown files | |
It also changes the links to the acceptable format for pelican | |
""" | |
self._source_path = source_path | |
self._md = Markdown(**self.settings['MARKDOWN']) | |
tags = [] | |
with pelican_open(source_path) as text: | |
text = self.replace_obsidian_links(text) | |
tags = self.get_tags(text) | |
text = self.replace_tags(text) | |
content = self._md.convert(text) | |
if hasattr(self._md, "Meta"): | |
metadata = self._parse_metadata(self._md.Meta) | |
else: | |
metadata = {} | |
if tags: | |
metadata['tags'] = [Tag(tag, self.settings) for tag in tags] | |
# Add the TOC to the metadata. | |
if len(self._md.toc) > 300: | |
metadata["table_of_contents"] = self._md.toc | |
# Get the title from the first h1 | |
if "title" not in metadata and len(self._md.toc_tokens): | |
first_title = self._md.toc_tokens[0] | |
metadata["title"] = first_title["name"] | |
content = content.replace( | |
'<h1 id="{id}">{name}</h1>'.format(**first_title), "" | |
) | |
# Get the date from the filename, if possible. | |
parts = os.path.splitext(os.path.basename(source_path))[0].split("-") | |
metadata["date"] = get_date("2023-01-30") | |
if "read_on" in metadata: | |
metadata["date"] = datetime.strptime(metadata["read_on"], "%B %Y") | |
elif len(parts) >= 3: | |
metadata["date"] = get_date("-".join(parts[:3])) | |
if "slug" not in metadata: | |
metadata["slug"] = slugify( | |
metadata["title"], self.settings.get("SLUG_REGEX_SUBSTITUTIONS", []) | |
) | |
category = os.path.basename( | |
os.path.abspath(os.path.join(source_path, os.pardir)) | |
) | |
metadata["category"] = self.process_metadata("category", category) | |
return content, metadata | |
def populate_files_and_articles(article_generator): | |
global ARTICLE_PATHS | |
global FILE_PATHS | |
base_path = Path(article_generator.path) | |
articles = base_path.glob('**/*.md') | |
for article in articles: | |
full_path, filename_w_ext = os.path.split(article) | |
filename, ext = os.path.splitext(filename_w_ext) | |
path = str(full_path).replace(str(base_path), '') + '/' | |
ARTICLE_PATHS[filename] = path | |
globs = [base_path.glob('**/*.{}'.format(ext)) for ext in ['png', 'jpg', 'svg', 'apkg', 'gif']] | |
files = chain(*globs) | |
for _file in files: | |
full_path, filename_w_ext = os.path.split(_file) | |
path = str(full_path).replace(str(base_path), '') + '/' | |
FILE_PATHS[filename_w_ext] = path | |
def modify_reader(article_generator): | |
populate_files_and_articles(article_generator) | |
article_generator.readers.readers['md'] = ObsidianMarkdownReader(article_generator.settings) | |
def modify_metadata(article_generator, metadata): | |
""" | |
Modify the tags so we can define the tags as we are used to in obsidian. | |
""" | |
for tag in metadata.get('tags', []): | |
if '#' in tag.name: | |
tag.name = tag.name.replace('#', '') | |
def register(): | |
signals.article_generator_context.connect(modify_metadata) | |
signals.article_generator_init.connect(modify_reader) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment