Created
November 16, 2017 07:50
-
-
Save zhu327/4607b63150d8b61b289e906976fa675d to your computer and use it in GitHub Desktop.
imp语言解释器实现 http://python.jobbole.com/82206/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import sys | |
import re | |
def lex(characters, token_exprs): | |
pos = 0 | |
tokens = [] | |
while pos < len(characters): | |
match = None | |
for token_expr in token_exprs: | |
pattern, tag = token_expr | |
regex = re.compile(pattern) | |
match = regex.match(characters, pos) # pos 指定匹配的索引位置 | |
if match: | |
text = match.group(0) # 获取整个匹配到的字符串 | |
if tag: | |
token = (text, tag) | |
tokens.append(token) | |
break # 一旦匹配到了就跳出for, 配置下一个词 | |
if not match: | |
sys.stderr.write('Illegal character: %sn' % characters[pos]) | |
sys.exit(1) | |
else: | |
pos = match.end(0) # 匹配的结果位置 | |
return tokens | |
RESERVED = 'RESERVED' # 操作符 | |
INT = 'INT' # 整数 | |
ID = 'ID' # 标识符 | |
token_exprs = [ | |
(r'[ \n\t]+', None), | |
(r'#[^\n]*', None), | |
(r'\:=', RESERVED), | |
(r'\(', RESERVED), | |
(r'\)', RESERVED), | |
(r';', RESERVED), | |
(r'\+', RESERVED), | |
(r'-', RESERVED), | |
(r'\*', RESERVED), | |
(r'/', RESERVED), | |
(r'<=', RESERVED), | |
(r'<', RESERVED), | |
(r'>=', RESERVED), | |
(r'>', RESERVED), | |
(r'!=', RESERVED), | |
(r'=', RESERVED), | |
(r'and', RESERVED), | |
(r'or', RESERVED), | |
(r'not', RESERVED), | |
(r'if', RESERVED), | |
(r'then', RESERVED), | |
(r'else', RESERVED), | |
(r'while', RESERVED), | |
(r'do', RESERVED), | |
(r'end', RESERVED), | |
(r'[0-9]+', INT), | |
(r'[A-Za-z][A-Za-z0-9_]*', ID), | |
] | |
def imp_lex(characters): | |
return lex(characters, token_exprs) | |
class Result: | |
def __init__(self, value, pos): | |
self.value = value # 值, 作为AST树的一部分 | |
self.pos = pos # 位置 | |
def __repr__(self): | |
return 'Result(%s, %d)' % (self.value, self.pos) | |
class Parser: | |
def __call__(self, tokens, pos): | |
return None # subclasses will override this | |
def __add__(self, other): | |
return Concat(self, other) | |
def __mul__(self, other): | |
return Exp(self, other) | |
def __or__(self, other): | |
return Alternate(self, other) | |
def __xor__(self, function): | |
return Process(self, function) | |
class Reserved(Parser): | |
def __init__(self, value, tag): | |
self.value = value | |
self.tag = tag | |
def __call__(self, tokens, pos): | |
if pos < len(tokens) and \ | |
tokens[pos][0] == self.value and \ | |
tokens[pos][1] is self.tag: | |
return Result(tokens[pos][0], pos + 1) # 找到的同时, 移动到下一个位置 | |
else: | |
return None | |
class Tag(Parser): | |
def __init__(self, tag): | |
self.tag = tag | |
def __call__(self, tokens, pos): | |
if pos < len(tokens) and tokens[pos][1] is self.tag: | |
return Result(tokens[pos][0], pos + 1) | |
else: | |
return None | |
# parser = Concat(Concat(Tag(INT), Reserved('+', RESERVED)), Tag(INT)) 1 + 2 | |
# parser = Tag(INT) + Reserved('+', RESERVED) + Tag(INT) | |
class Concat(Parser): | |
def __init__(self, left, right): | |
self.left = left | |
self.right = right | |
def __call__(self, tokens, pos): | |
left_result = self.left(tokens, pos) | |
if left_result: | |
right_result = self.right(tokens, left_result.pos) | |
if right_result: | |
combined_value = (left_result.value, right_result.value) | |
return Result(combined_value, right_result.pos) | |
return None | |
""" | |
parser = Reserved('+', RESERVED) | | |
Reserved('-', RESERVED) | | |
Reserved('*', RESERVED) | | |
Reserved('/', RESERVED) | |
""" | |
class Alternate(Parser): | |
def __init__(self, left, right): | |
self.left = left | |
self.right = right | |
def __call__(self, tokens, pos): | |
left_result = self.left(tokens, pos) | |
if left_result: | |
return left_result | |
else: | |
right_result = self.right(tokens, pos) | |
return right_result | |
class Opt(Parser): | |
def __init__(self, parser): | |
self.parser = parser | |
def __call__(self, tokens, pos): | |
result = self.parser(tokens, pos) | |
if result: | |
return result | |
else: | |
return Result(None, pos) # 返回一个空的结果 | |
class Rep(Parser): | |
def __init__(self, parser): | |
self.parser = parser | |
def __call__(self, tokens, pos): | |
results = [] | |
result = self.parser(tokens, pos) | |
while result: | |
results.append(result.value) | |
pos = result.pos | |
result = self.parser(tokens, pos) | |
return Result(results, pos) # 重复解析, 直到没有结果 | |
class Lazy(Parser): | |
def __init__(self, parser_func): | |
self.parser = None | |
self.parser_func = parser_func | |
def __call__(self, tokens, pos): | |
if not self.parser: | |
self.parser = self.parser_func() | |
return self.parser(tokens, pos) # 延迟解析, 只有被调用时, 才生成解析器 | |
class Phrase(Parser): # 输入的解析器必须解析到结尾, 否则返回None | |
def __init__(self, parser): | |
self.parser = parser | |
def __call__(self, tokens, pos): | |
result = self.parser(tokens, pos) | |
if result and result.pos == len(tokens): | |
return result | |
else: | |
return None | |
class Exp(Parser): # 组合, 解析器, 分隔符解析器 | |
def __init__(self, parser, separator): | |
self.parser = parser | |
self.separator = separator # 会返回一个函数 | |
def __call__(self, tokens, pos): | |
result = self.parser(tokens, pos) # 正常解析 | |
def process_next(parsed): | |
(sepfunc, right) = parsed | |
return sepfunc(result.value, right) # 使用separator返回的函数处理, 前后2个结果value | |
next_parser = self.separator + self.parser ^ process_next # + 会返回(函数, 解析结果), ^ 会调用process_next | |
next_result = result | |
while next_result: # 迭代, 直到产生最后的结果, 类似reduce | |
next_result = next_parser(tokens, result.pos) | |
if next_result: | |
result = next_result | |
return result | |
class Process(Parser): # 异或处理, 产生结果的同时调用函数, 处理结果 | |
def __init__(self, parser, function): | |
self.parser = parser | |
self.function = function | |
def __call__(self, tokens, pos): | |
result = self.parser(tokens, pos) | |
if result: | |
result.value = self.function(result.value) | |
return result | |
class Equality: # 用于判断是否相等 | |
def __eq__(self, other): | |
return isinstance(other, self.__class__) and \ | |
self.__dict__ == other.__dict__ | |
def __ne__(self, other): | |
return not self.__eq__(other) | |
class Aexp(Equality): # 算术表达式 | |
pass | |
class IntAexp(Aexp): # int值 | |
def __init__(self, i): | |
self.i = i | |
def __repr__(self): | |
return 'IntAexp(%d)' % self.i | |
def eval(self, env): | |
return self.i | |
class VarAexp(Aexp): # 变量 | |
def __init__(self, name): | |
self.name = name | |
def __repr__(self): | |
return 'VarAexp(%s)' % self.name | |
def eval(self, env): | |
if self.name in env: | |
return env[self.name] | |
else: | |
return 0 | |
class BinopAexp(Aexp): # 算术运算 | |
def __init__(self, op, left, right): | |
self.op = op # 操作符 | |
self.left = left # 左边的Aexp | |
self.right = right # 右边的Aexp | |
def __repr__(self): | |
return 'BinopAexp(%s, %s, %s)' % (self.op, self.left, self.right) | |
def eval(self, env): | |
left_value = self.left.eval(env) | |
right_value = self.right.eval(env) | |
if self.op == '+': | |
value = left_value + right_value | |
elif self.op == '-': | |
value = left_value - right_value | |
elif self.op == '*': | |
value = left_value * right_value | |
elif self.op == '/': | |
value = left_value / right_value | |
else: | |
raise RuntimeError('unknown operator: ' + self.op) | |
return value | |
class Bexp(Equality): # 布尔表达式 | |
pass | |
class RelopBexp(Bexp): # 布尔运算 | |
def __init__(self, op, left, right): | |
self.op = op | |
self.left = left | |
self.right = right | |
def __repr__(self): | |
return 'RelopBexp(%s, %s, %s)' % (self.op, self.left, self.right) | |
def eval(self, env): | |
left_value = self.left.eval(env) | |
right_value = self.right.eval(env) | |
if self.op == '<': | |
value = left_value < right_value | |
elif self.op == '<=': | |
value = left_value <= right_value | |
elif self.op == '>': | |
value = left_value > right_value | |
elif self.op == '>=': | |
value = left_value >= right_value | |
elif self.op == '=': | |
value = left_value == right_value | |
elif self.op == '!=': | |
value = left_value != right_value | |
else: | |
raise RuntimeError('unknown operator: ' + self.op) | |
return value | |
class AndBexp(Bexp): # and | |
def __init__(self, left, right): | |
self.left = left | |
self.right = right | |
def __repr__(self): | |
return 'AndBexp(%s, %s)' % (self.left, self.right) | |
def eval(self, env): | |
left_value = self.left.eval(env) | |
right_value = self.right.eval(env) | |
return left_value and right_value | |
class OrBexp(Bexp): # or | |
def __init__(self, left, right): | |
self.left = left | |
self.right = right | |
def __repr__(self): | |
return 'OrBexp(%s, %s)' % (self.left, self.right) | |
def eval(self, env): | |
left_value = self.left.eval(env) | |
right_value = self.right.eval(env) | |
return left_value or right_value | |
class NotBexp(Bexp): # not | |
def __init__(self, exp): | |
self.exp = exp | |
def __repr__(self): | |
return 'NotBexp(%s)' % self.exp | |
def eval(self, env): | |
value = self.exp.eval(env) | |
return not value | |
class Statement(Equality): # 关系表达式 | |
pass | |
class AssignStatement(Statement): # 赋值语句 | |
def __init__(self, name, aexp): | |
self.name = name # 变量 | |
self.aexp = aexp # 算术表达式 | |
def __repr__(self): | |
return 'AssignStatement(%s, %s)' % (self.name, self.aexp) | |
def eval(self, env): | |
value = self.aexp.eval(env) | |
env[self.name] = value | |
class CompoundStatement(Statement): # 复合语句 | |
def __init__(self, first, second): | |
self.first = first | |
self.second = second | |
def __repr__(self): | |
return 'CompoundStatement(%s, %s)' % (self.first, self.second) | |
def eval(self, env): | |
self.first.eval(env) | |
self.second.eval(env) | |
class IfStatement(Statement): # 条件语句 | |
def __init__(self, condition, true_stmt, false_stmt): | |
self.condition = condition # 条件 | |
self.true_stmt = true_stmt # 真块 | |
self.false_stmt = false_stmt # 假块 | |
def __repr__(self): | |
return 'IfStatement(%s, %s, %s)' % (self.condition, self.true_stmt, self.false_stmt) | |
def eval(self, env): | |
condition_value = self.condition.eval(env) | |
if condition_value: | |
self.true_stmt.eval(env) | |
else: | |
if self.false_stmt: | |
self.false_stmt.eval(env) | |
class WhileStatement(Statement): # 循环语句 | |
def __init__(self, condition, body): | |
self.condition = condition # 条件表达式 | |
self.body = body # 循环体 | |
def __repr__(self): | |
return 'WhileStatement(%s, %s)' % (self.condition, self.body) | |
def eval(self, env): | |
condition_value = self.condition.eval(env) | |
while condition_value: | |
self.body.eval(env) | |
condition_value = self.condition.eval(env) | |
def keyword(kw): | |
return Reserved(kw, RESERVED) | |
id = Tag(ID) | |
num = Tag(INT) ^ (lambda i: int(i)) # 转int类型 | |
def aexp_value(): # 算术值 转成IntAexp或VarAexp对象 | |
return (num ^ (lambda i: IntAexp(i))) | \ | |
(id ^ (lambda v: VarAexp(v))) | |
def process_group(parsed): # 在 1 + 2 只取中间的结果 | |
((_, p), _) = parsed | |
return p | |
aexp_precedence_levels = [ # 运算优先级 | |
['*', '/'], | |
['+', '-'], | |
] | |
def precedence(value_parser, precedence_levels, combine): | |
def op_parser(precedence_level): # 最终返回的是combine 生成的函数 | |
return any_operator_in_list(precedence_level) ^ combine # 一个或的paser完成后调用后面的function | |
parser = value_parser * op_parser(precedence_levels[0]) | |
for precedence_level in precedence_levels[1:]: | |
parser = parser * op_parser(precedence_level) # 下一个优先级的分割符号 | |
return parser # 实际上这里会先匹配 * / ,然后再在内部匹配+ 1 | |
def any_operator_in_list(ops): # 输入运算符列表, 组合成|的解析器 | |
op_parsers = [keyword(op) for op in ops] | |
parser = reduce(lambda l, r: l | r, op_parsers) | |
return parser | |
def process_binop(op): # 生成算术运算表达式工厂函数 | |
return lambda l, r: BinopAexp(op, l, r) | |
def aexp(): # 最中能匹配所有表达式的paser | |
return precedence(aexp_term(), | |
aexp_precedence_levels, | |
process_binop) | |
def aexp_group(): # 组表达式, 用括号包住 | |
return keyword('(') + Lazy(aexp) + keyword(')') ^ process_group # 匹配括号中间的内容, 并返回括号中间的表达式 | |
def aexp_term(): # 要么是算术值(值或者变量), 要么用算术运算群 | |
return aexp_value() | aexp_group() | |
""" | |
E0 = value_parser = aexp_term() | |
E1 = E0 * op_parser(precedence_levels[0]) | |
E2 = E1 * op_parser(precedence_levels[1]) | |
4 * a + b / 2 - (6 + c) | |
E0(4) * E0(a) + E0(b) / E0(2) - E0(6+c) | |
E1(4*a) + E1(b/2) - E1(6+c) | |
E2((4*a)+(b/2)-(6+c)) | |
""" | |
def process_relop(parsed): | |
((left, op), right) = parsed | |
return RelopBexp(op, left, right) | |
def bexp_relop(): # bool运算表达式 | |
relops = ['<', '<=', '>', '>=', '=', '!='] | |
return aexp() + any_operator_in_list(relops) + aexp() ^ process_relop | |
def bexp_not(): # not 表达式 | |
return keyword('not') + Lazy(bexp_term) ^ (lambda parsed: NotBexp(parsed[1])) | |
def bexp_group(): # 括号 | |
return keyword('(') + Lazy(bexp) + keyword(')') ^ process_group | |
def bexp_term(): | |
return bexp_not() | \ | |
bexp_relop() | \ | |
bexp_group() | |
bexp_precedence_levels = [ | |
['and'], | |
['or'], | |
] | |
def process_logic(op): | |
if op == 'and': | |
return lambda l, r: AndBexp(l, r) | |
elif op == 'or': | |
return lambda l, r: OrBexp(l, r) | |
else: | |
raise RuntimeError('unknown logic operator: ' + op) | |
def bexp(): | |
return precedence(bexp_term(), | |
bexp_precedence_levels, | |
process_logic) | |
def assign_stmt(): # 赋值语句 | |
def process(parsed): | |
((name, _), exp) = parsed | |
return AssignStatement(name, exp) | |
return id + keyword(':=') + aexp() ^ process | |
def stmt_list(): | |
separator = keyword(';') ^ (lambda x: lambda l, r: CompoundStatement(l, r)) | |
return Exp(stmt(), separator) # 复合语句 | |
def if_stmt(): # if 语句 | |
def process(parsed): | |
(((((_, condition), _), true_stmt), false_parsed), _) = parsed | |
if false_parsed: | |
(_, false_stmt) = false_parsed | |
else: | |
false_stmt = None | |
return IfStatement(condition, true_stmt, false_stmt) | |
return keyword('if') + bexp() + \ | |
keyword('then') + Lazy(stmt_list) + \ | |
Opt(keyword('else') + Lazy(stmt_list)) + \ | |
keyword('end') ^ process # bexp 条件语句 | |
def while_stmt(): # 循环语句 | |
def process(parsed): | |
((((_, condition), _), body), _) = parsed | |
return WhileStatement(condition, body) | |
return keyword('while') + bexp() + \ | |
keyword('do') + Lazy(stmt_list) + \ | |
keyword('end') ^ process | |
def stmt(): | |
return assign_stmt() | \ | |
if_stmt() | \ | |
while_stmt() | |
def parser(): | |
return Phrase(stmt_list()) | |
def imp_parse(tokens): | |
ast = parser()(tokens, 0) | |
return ast | |
""" | |
if __name__ == '__main__': | |
if len(sys.argv) != 3: | |
sys.stderr.write('usage: %s filename parsername' % sys.argv[0]) | |
sys.exit(1) | |
filename = sys.argv[1] | |
file = open(filename) | |
characters = file.read() | |
file.close() | |
tokens = imp_lex(characters) | |
parser = globals()[sys.argv[2]]() | |
result = parser(tokens, 0) | |
print result | |
""" | |
def usage(): | |
sys.stderr.write('Usage: imp filenamen') | |
sys.exit(1) | |
if __name__ == '__main__': | |
if len(sys.argv) != 2: | |
usage() | |
filename = sys.argv[1] | |
text = open(filename).read() | |
tokens = imp_lex(text) | |
parse_result = imp_parse(tokens) | |
if not parse_result: | |
sys.stderr.write('Parse error!n') | |
sys.exit(1) | |
ast = parse_result.value | |
env = {} | |
ast.eval(env) | |
sys.stdout.write('Final variable values:\n') | |
for name in env: | |
sys.stdout.write('%s: %s\n' % (name, env[name])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment