Skip to content

Instantly share code, notes, and snippets.

@paudirac
Created May 24, 2020 16:37
Show Gist options
  • Save paudirac/7df3d5852c391ab5aa4d5667b14d32a1 to your computer and use it in GitHub Desktop.
Save paudirac/7df3d5852c391ab5aa4d5667b14d32a1 to your computer and use it in GitHub Desktop.
Full code for markdown parser of blog post http://self.paudirac.com/blog/classical-web-pages/
#!/usr/bin/env python3
import sys
from paudirac.markdown import to_html
to_html(sys.stdin.read(), writer=sys.stdout)
import abc
import re
_IS_BLANK = re.compile(r'\s+')
_IS_H = re.compile(r'^#+')
_IS_LIST = re.compile(r'^\*')
def is_blank(line):
return _IS_BLANK.match(line)
def lines(source):
for line in source.split('\n'):
yield line + '\n'
class Block:
def __init__(self, contents):
self.contents = contents
def __repr__(self):
return f'Block(contents={self.contents})'
def is_h(self):
return _IS_H.match(self.contents[0])
def is_list(self):
return _IS_LIST.match(self.contents[0])
def blocks(lns):
buffer = []
for line in lns:
if is_blank(line):
if len(buffer):
yield Block(contents=buffer)
buffer = []
else:
buffer.append(line)
if len(buffer):
yield Block(contents=buffer)
class Element(abc.ABC):
@abc.abstractmethod
def accept(self, visitor):
pass
class Html(Element):
def __init__(self, body):
self.body = body
def __repr__(self):
return f'Html(body={self.body})'
def accept(self, visitor):
visitor.visit_html(self)
class Body(Element):
def __init__(self, contents):
self.contents = contents
def __repr__(self):
return f'Body(contents={self.contents})'
def accept(self, visitor):
visitor.visit_body(self)
class Text(Element):
def __init__(self, text):
self.text = text
def __repr__(self):
return f'{self._name}(text="{self.text}")'
@property
def _name(self):
return self.__class__.__name__
def accept(self, visitor):
visitor.visit_text(self)
class H(Element):
def __init__(self, level, text):
self.level = level
self.text = text
def __repr__(self):
return f'H(level={self.level}, text={self.text})'
@classmethod
def from_block(cls, blk):
contents = blk.contents[0]
m = _IS_H.match(contents)
if not m:
raise ValueError(f'Block {blk} is not a valid H block')
_, end = m.span()
text = contents[end:-1].strip()
text = Text(text=text)
return cls(level=end, text=text)
def accept(self, visitor):
visitor.visit_h(self)
class Item(Element):
def __init__(self, text):
self.text = text
def accept(self, visitor):
visitor.visit_item(self)
@classmethod
def from_string(cls, string):
return cls(text=Text(string))
class List(Element):
def __init__(self, items):
self.items = items
def __repr__(self):
return f'List(items={self.items})'
@classmethod
def from_block(cls, blk):
assert all(item[0] == '*' for item in blk.contents), "Invalid list"
items = [item[1:].strip() for item in blk.contents]
items = [Item.from_string(item) for item in items]
return cls(items=items)
def accept(self, visitor):
visitor.visit_list(self)
class P(Element):
def __init__(self, text):
self.text = text
def __repr__(self):
return f'P(text={self.text})'
@classmethod
def from_block(cls, blk):
text = Text(text=' '.join(line.strip() for line in blk.contents))
return cls(text=text)
def accept(self, visitor):
visitor.visit_p(self)
def _parse(blks):
for blk in blks:
if blk.is_h():
yield H.from_block(blk)
elif blk.is_list():
yield List.from_block(blk)
else:
yield P.from_block(blk)
def parse(blks):
contents = _parse(blks)
contents = list(contents)
body = Body(contents=contents)
return Html(body=body)
class HtmlVisitor:
def __init__(self, writer):
self.writer = writer
def _emit(self, text):
self.writer.write(text)
def visit_html(self, html):
self._emit('<html>')
html.body.accept(self)
self._emit('</html>')
def visit_body(self, body):
self._emit('<body>')
for element in body.contents:
element.accept(self)
self._emit('</body>')
def visit_text(self, text):
self._emit(text.text)
def visit_h(self, h):
self._emit(f'<h{h.level}>')
h.text.accept(self)
self._emit(f'</h{h.level}>')
def visit_item(self, item):
self._emit('<li>')
item.text.accept(self)
self._emit('</li>')
def visit_list(self, lst):
self._emit('<ul>')
for item in lst.items:
item.accept(self)
self._emit('</ul>')
def visit_p(self, p):
self._emit('<p>')
p.text.accept(self)
self._emit('</p>')
class ReprVisitor:
def __init__(self, writer):
self.writer = writer
def visit_html(self, html):
self.writer.write(repr(html))
class ScreamingVisitor(HtmlVisitor):
def visit_text(self, text):
self._emit(text.text.upper())
import random
class HoiganVisitor(HtmlVisitor):
def visit_text(self, text):
oigans = [random.choice([
'OIGAN!',
'OYGA!',
'HOIGAN!',
'HOYGAN!']) for _ in text.text.split()]
self._emit(' '.join(oigans))
def to_html(source, writer):
lns = lines(source)
blks = blocks(lns)
html = parse(blks)
#visitor = HtmlVisitor(writer=writer)
#visitor = ReprVisitor(writer=writer)
#visitor = ScreamingVisitor(writer=writer)
visitor = HoiganVisitor(writer=writer)
html.accept(visitor)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment