-
-
Save paudirac/7df3d5852c391ab5aa4d5667b14d32a1 to your computer and use it in GitHub Desktop.
Full code for markdown parser of blog post http://self.paudirac.com/blog/classical-web-pages/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import sys | |
from paudirac.markdown import to_html | |
to_html(sys.stdin.read(), writer=sys.stdout) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import abc | |
import re | |
_IS_BLANK = re.compile(r'\s+') | |
_IS_H = re.compile(r'^#+') | |
_IS_LIST = re.compile(r'^\*') | |
def is_blank(line): | |
return _IS_BLANK.match(line) | |
def lines(source): | |
for line in source.split('\n'): | |
yield line + '\n' | |
class Block: | |
def __init__(self, contents): | |
self.contents = contents | |
def __repr__(self): | |
return f'Block(contents={self.contents})' | |
def is_h(self): | |
return _IS_H.match(self.contents[0]) | |
def is_list(self): | |
return _IS_LIST.match(self.contents[0]) | |
def blocks(lns): | |
buffer = [] | |
for line in lns: | |
if is_blank(line): | |
if len(buffer): | |
yield Block(contents=buffer) | |
buffer = [] | |
else: | |
buffer.append(line) | |
if len(buffer): | |
yield Block(contents=buffer) | |
class Element(abc.ABC): | |
@abc.abstractmethod | |
def accept(self, visitor): | |
pass | |
class Html(Element): | |
def __init__(self, body): | |
self.body = body | |
def __repr__(self): | |
return f'Html(body={self.body})' | |
def accept(self, visitor): | |
visitor.visit_html(self) | |
class Body(Element): | |
def __init__(self, contents): | |
self.contents = contents | |
def __repr__(self): | |
return f'Body(contents={self.contents})' | |
def accept(self, visitor): | |
visitor.visit_body(self) | |
class Text(Element): | |
def __init__(self, text): | |
self.text = text | |
def __repr__(self): | |
return f'{self._name}(text="{self.text}")' | |
@property | |
def _name(self): | |
return self.__class__.__name__ | |
def accept(self, visitor): | |
visitor.visit_text(self) | |
class H(Element): | |
def __init__(self, level, text): | |
self.level = level | |
self.text = text | |
def __repr__(self): | |
return f'H(level={self.level}, text={self.text})' | |
@classmethod | |
def from_block(cls, blk): | |
contents = blk.contents[0] | |
m = _IS_H.match(contents) | |
if not m: | |
raise ValueError(f'Block {blk} is not a valid H block') | |
_, end = m.span() | |
text = contents[end:-1].strip() | |
text = Text(text=text) | |
return cls(level=end, text=text) | |
def accept(self, visitor): | |
visitor.visit_h(self) | |
class Item(Element): | |
def __init__(self, text): | |
self.text = text | |
def accept(self, visitor): | |
visitor.visit_item(self) | |
@classmethod | |
def from_string(cls, string): | |
return cls(text=Text(string)) | |
class List(Element): | |
def __init__(self, items): | |
self.items = items | |
def __repr__(self): | |
return f'List(items={self.items})' | |
@classmethod | |
def from_block(cls, blk): | |
assert all(item[0] == '*' for item in blk.contents), "Invalid list" | |
items = [item[1:].strip() for item in blk.contents] | |
items = [Item.from_string(item) for item in items] | |
return cls(items=items) | |
def accept(self, visitor): | |
visitor.visit_list(self) | |
class P(Element): | |
def __init__(self, text): | |
self.text = text | |
def __repr__(self): | |
return f'P(text={self.text})' | |
@classmethod | |
def from_block(cls, blk): | |
text = Text(text=' '.join(line.strip() for line in blk.contents)) | |
return cls(text=text) | |
def accept(self, visitor): | |
visitor.visit_p(self) | |
def _parse(blks): | |
for blk in blks: | |
if blk.is_h(): | |
yield H.from_block(blk) | |
elif blk.is_list(): | |
yield List.from_block(blk) | |
else: | |
yield P.from_block(blk) | |
def parse(blks): | |
contents = _parse(blks) | |
contents = list(contents) | |
body = Body(contents=contents) | |
return Html(body=body) | |
class HtmlVisitor: | |
def __init__(self, writer): | |
self.writer = writer | |
def _emit(self, text): | |
self.writer.write(text) | |
def visit_html(self, html): | |
self._emit('<html>') | |
html.body.accept(self) | |
self._emit('</html>') | |
def visit_body(self, body): | |
self._emit('<body>') | |
for element in body.contents: | |
element.accept(self) | |
self._emit('</body>') | |
def visit_text(self, text): | |
self._emit(text.text) | |
def visit_h(self, h): | |
self._emit(f'<h{h.level}>') | |
h.text.accept(self) | |
self._emit(f'</h{h.level}>') | |
def visit_item(self, item): | |
self._emit('<li>') | |
item.text.accept(self) | |
self._emit('</li>') | |
def visit_list(self, lst): | |
self._emit('<ul>') | |
for item in lst.items: | |
item.accept(self) | |
self._emit('</ul>') | |
def visit_p(self, p): | |
self._emit('<p>') | |
p.text.accept(self) | |
self._emit('</p>') | |
class ReprVisitor: | |
def __init__(self, writer): | |
self.writer = writer | |
def visit_html(self, html): | |
self.writer.write(repr(html)) | |
class ScreamingVisitor(HtmlVisitor): | |
def visit_text(self, text): | |
self._emit(text.text.upper()) | |
import random | |
class HoiganVisitor(HtmlVisitor): | |
def visit_text(self, text): | |
oigans = [random.choice([ | |
'OIGAN!', | |
'OYGA!', | |
'HOIGAN!', | |
'HOYGAN!']) for _ in text.text.split()] | |
self._emit(' '.join(oigans)) | |
def to_html(source, writer): | |
lns = lines(source) | |
blks = blocks(lns) | |
html = parse(blks) | |
#visitor = HtmlVisitor(writer=writer) | |
#visitor = ReprVisitor(writer=writer) | |
#visitor = ScreamingVisitor(writer=writer) | |
visitor = HoiganVisitor(writer=writer) | |
html.accept(visitor) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment