Created
December 29, 2022 01:46
-
-
Save aholmes/07ad998b320e34b36e26f2bea3a48375 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import List, Iterator | |
from markdown_it import MarkdownIt | |
from markdown_it.token import Token | |
class Node: | |
token: Token|None | |
nodes: 'List[Node]' | |
def __init__(self, token: Token|None=None) -> None: | |
self.token=token | |
self.nodes = [] | |
def accept(self, visitor: 'NodeVisitor'): | |
for node in self.nodes: | |
if node.token is None: | |
continue | |
if node.token.type == 'heading_open': | |
visitor.visit_header(node) | |
else: | |
visitor.visit(node) | |
# https://stackoverflow.com/a/20242504 | |
def __str__(self, level=0): | |
print_val = self.token.type if self.token else '<None>' | |
ret = '\t'*level+repr(print_val)+'\n' | |
for node in self.nodes: | |
ret += node.__str__(level+1) | |
return ret | |
def __repr__(self): | |
return '<tree node representation>' | |
class NodeVisitor: | |
def visit_header(self, node: 'Node'): | |
if node.nodes[0].token is None: | |
node.accept(self) | |
return | |
print(node.nodes[0].token.content) | |
node.accept(self) | |
def visit(self, node: 'Node'): | |
node.accept(self) | |
def generate_ast(tokens: Iterator[Token], root_node: Node): | |
for token in tokens: | |
if token.type.endswith('_close'): | |
return | |
node = Node(token) | |
root_node.nodes.append(node) | |
if token.block and token.type != 'inline': | |
generate_ast(tokens, node) | |
def go(): | |
md = MarkdownIt() | |
tokens: List[Token] | |
with open('README.md', 'rt') as f: | |
tokens = md.parse(f.read()) | |
ast=Node() | |
generate_ast(iter(tokens), ast) | |
node_visitor = NodeVisitor() | |
ast.accept(node_visitor) | |
go() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment