Last active
February 19, 2025 18:56
-
-
Save peter88213/91f4938a39db1b69160f902723e86a9e to your computer and use it in GitHub Desktop.
A class that converts Markdown links to wikilinks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
"""Provide a class that converts Markdown links to wikilinks. | |
Copyright (c) 2025 Peter Triesberger | |
For further information see https://github.com/peter88213 | |
License: GNU GPLv3 (https://www.gnu.org/licenses/gpl-3.0.en.html) | |
""" | |
class MdLinkParser: | |
"""Parser implementing a state machine for Markdown link conversion.""" | |
BODY = 0 | |
DESC = 1 | |
LINK = 2 | |
URL = 3 | |
def __init__(self): | |
self.markup = { | |
'[': self.handle_desc_start, | |
']': self.handle_desc_end, | |
'(': self.handle_url_start, | |
')': self.handle_url_end, | |
} | |
self.results = [] | |
# list of characters and strings | |
self.descBuffer = [] | |
# list of characters, buffering the read-in description | |
self.urlBuffer = [] | |
# list of characters, buffering the read-in URL | |
self.state = self.BODY | |
def to_wikilinks(self, text): | |
"""Return text with Markdown links converted into wikilinks.""" | |
self.reset() | |
self.feed(text) | |
self.close() | |
return ''.join(self.results) | |
def reset(self): | |
"""Reset the instance. Loses all unprocessed data.""" | |
self.results.clear() | |
self.descBuffer.clear() | |
self.urlBuffer.clear() | |
self.state = self.BODY | |
def feed(self, data): | |
"""Feed some text to the parser.""" | |
for c in data: | |
self.markup.get(c, self.handle_data)(c) | |
def handle_desc_start(self, c): | |
if self.state == self.BODY: | |
self.state = self.DESC | |
else: | |
self.handle_data(c) | |
def handle_desc_end(self, c): | |
if self.state == self.DESC: | |
self.state = self.LINK | |
else: | |
self.handle_data(c) | |
def handle_url_start(self, c): | |
if self.state == self.LINK: | |
self.state = self.URL | |
else: | |
self.handle_data(c) | |
def handle_url_end(self, c): | |
if self.state == self.URL: | |
# Create a wikilink and append it to the results. | |
self.results.append('[[') | |
if self.urlBuffer: | |
urlStr = ''.join(self.urlBuffer) | |
urlStr = urlStr.removeprefix('./') | |
urlStr = unquote(urlStr) | |
self.results.append(urlStr) | |
if self.descBuffer: | |
self.results.append('|') | |
self.results.extend(self.descBuffer) | |
else: | |
# Turn the description into an URL. | |
urlStr = ''.join(self.descBuffer) | |
urlStr = urlStr.replace(':', '/') | |
urlStr = unquote(urlStr) | |
self.results.append(urlStr) | |
self.results.append(']]') | |
self.urlBuffer.clear() | |
self.descBuffer.clear() | |
self.state = self.BODY | |
else: | |
self.handle_data(c) | |
def handle_data(self, c): | |
if self.state == self.DESC: | |
self.descBuffer.append(c) | |
return | |
if self.state == self.URL: | |
self.urlBuffer.append(c) | |
return | |
if self.state == self.LINK: | |
# Expected '(', but got another character: | |
# the bracketed text is not a link description, so restore the body text. | |
self.results.append('[') | |
self.results.extend(self.descBuffer) | |
self.results.append(']') | |
self.descBuffer.clear() | |
self.state = self.BODY | |
self.results.append(c) | |
def close(self): | |
"""Append all buffered data to the results.""" | |
if self.descBuffer: | |
self.results.append('[') | |
self.results.extend(self.descBuffer) | |
self.descBuffer.clear() | |
if self.urlBuffer: | |
self.results.append('](') | |
self.results.extend(self.urlBuffer) | |
self.urlBuffer.clear() | |
# incomplete Markdown links are adopted unchanged |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment