Last active
December 8, 2022 00:49
-
-
Save aholmes/d86153f62093a6f257cb792ae9a228df to your computer and use it in GitHub Desktop.
A compiler generated by ChatGPT.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is a tokenizer, parser, and compiler that has been mostly generated by ChatGPT. | |
# A handful of errors have been corrected, as well as the addition of the ";" token | |
# and the ability to write multiple statements in a single line. This last part needed | |
# to be done by hand because ChatGPT lost the context of the BNF it created somewhere | |
# along the way, and was not able to regenerate a tokenizer/parser/compiler that | |
# worked with the original BNF. | |
# | |
# 94.42% of this code was created by ChatGPT (see differences.patch for more information). | |
# | |
# The conversation can be read here: | |
# https://gpt.best/dUkxp2UA | |
# | |
def tokenize(code): | |
tokens = [] | |
pos = 0 | |
while pos < len(code): | |
if code[pos] in ["+", "-", "*", "/", "%", "=", "(", ")"]: | |
tokens.append(code[pos]) | |
pos += 1 | |
elif code[pos] == " ": | |
pos += 1 | |
elif code[pos] == ";": | |
tokens.append(code[pos]) | |
pos += 1 | |
elif code[pos:pos+5].lower() == "print": | |
tokens.append("print") | |
pos += 5 | |
else: | |
value = "" | |
while pos < len(code) and code[pos] not in ["+", "-", "*", "/", "%", "=", "(", ")", " ", ";"]: | |
value += code[pos] | |
pos += 1 | |
tokens.append(value) | |
return tokens | |
def is_integer(n): | |
try: | |
float(n) | |
except ValueError: | |
return False | |
else: | |
return float(n).is_integer() | |
def parse(tokens): | |
pos = 0 | |
def parse_program(): | |
nonlocal pos | |
statements = [] | |
while pos < len(tokens): | |
st = parse_statement() | |
if st is not None: | |
statements.append(st) | |
return statements | |
def parse_statement(): | |
nonlocal pos | |
if tokens[pos] == "print": | |
pos += 1 | |
return ("print", parse_expression()) | |
elif tokens[pos] == ";": | |
pos += 1 | |
result = parse_program() | |
if len(result) == 0: | |
return None | |
return ("statement", result) | |
elif tokens[pos] not in ["+", "-", "*", "/", "%", "=", "(", ")"]: | |
name = tokens[pos] | |
pos += 1 | |
if tokens[pos] == "=": | |
pos += 1 | |
return ("assign", name, parse_expression()) | |
def parse_expression(): | |
nonlocal pos | |
return parse_sum() | |
def parse_sum(): | |
nonlocal pos | |
result = parse_product() | |
while pos < len(tokens) and tokens[pos] in ["+", "-"]: | |
operator = tokens[pos] | |
pos += 1 | |
right = parse_product() | |
result = (operator, result, right) | |
return result | |
def parse_product(): | |
nonlocal pos | |
result = parse_value() | |
while pos < len(tokens) and tokens[pos] in ["*", "/", "%"]: | |
operator = tokens[pos] | |
pos += 1 | |
right = parse_value() | |
result = (operator, result, right) | |
return result | |
def parse_value(): | |
nonlocal pos | |
type = tokens[pos] | |
if type in ["+", "-", "*", "/", "%"]: | |
pos += 1 | |
return (type, parse_value()) | |
elif type == "(": | |
pos += 1 | |
result = parse_expression() | |
pos += 1 | |
return result | |
elif type in ["+", "-", "*", "/", "%", "=", "(", ")"]: | |
pos += 1 | |
return type | |
elif type == 'print': | |
pos += 1 | |
result = ('print', parse_expression()) | |
pos += 1 | |
return result | |
elif is_integer(type): | |
pos += 1 | |
return int(type) | |
else: | |
pos += 1 | |
return type | |
return parse_program() | |
def compile(parse_tree): | |
def compile_program(parse_tree): | |
# create a dictionary to store variable values | |
variables = {} | |
results = [] | |
for st in parse_tree: | |
result = compile_statement(st, variables) | |
if result is not None: | |
results.append(result) | |
return results | |
def compile_statement(parse_tree, variables): | |
type = parse_tree[0] | |
if type == "print": | |
result = compile_expression(parse_tree[1], variables) | |
print(result) | |
return result | |
elif type == "assign": | |
# store the value of the expression in the variable | |
name = parse_tree[1] | |
value = compile_expression(parse_tree[2], variables) | |
variables[name] = value | |
return value | |
elif type == "statement": | |
result = compile_statement(parse_tree[1][0], variables) | |
return result | |
def compile_expression(parse_tree, variables): | |
if isinstance(parse_tree, (list, tuple)): | |
if len(parse_tree) == 0: | |
return None | |
# parse_tree is subscriptable, so we can access its elements | |
type = parse_tree[0] | |
if type in ["+", "-", "*", "/", "%"]: | |
# perform the mathematical operation and return the result | |
if type == "+": | |
return compile_expression(parse_tree[1], variables) + compile_expression(parse_tree[2], variables) | |
elif type == "-": | |
return compile_expression(parse_tree[1], variables) - compile_expression(parse_tree[2], variables) | |
elif type == "*": | |
return compile_expression(parse_tree[1], variables) * compile_expression(parse_tree[2], variables) | |
elif type == "/": | |
return compile_expression(parse_tree[1], variables) / compile_expression(parse_tree[2], variables) | |
elif type == "%": | |
return compile_expression(parse_tree[1], variables) % compile_expression(parse_tree[2], variables) | |
elif type == "assign": | |
# extract the name of the variable and the value of the expression | |
name = parse_tree[1] | |
value = compile_expression(parse_tree[2], variables) | |
# return the value of the expression | |
return value | |
elif type in ["+", "-", "*", "/", "%", "=", "(", ")"]: | |
return type | |
elif type in ["num", "var"]: | |
if type == "num": | |
# return the numeric value directly | |
return parse_tree[1] | |
elif type == "var": | |
# return the value of the variable | |
name = parse_tree[1] | |
if name in variables: | |
return variables[name] | |
else: | |
print(f"Error: variable {name} is not defined") | |
return None | |
elif type == "print": | |
return compile_statement(parse_tree, variables) | |
else: | |
return variables[parse_tree] | |
else: | |
# parse_tree is not subscriptable, so we can't access its elements | |
if variables.get(parse_tree): | |
return variables[parse_tree] | |
else: | |
return parse_tree | |
return compile_program(parse_tree) | |
# test the compile function with a simple expression | |
def test_compile_expression(): | |
# compile the expression "10 + 30" | |
result = compile(parse(tokenize("print 10 + 30"))) | |
# check the result | |
assert result == [40] | |
# test the compile function with a simple assignment | |
def test_compile_assignment(): | |
# compile the expression "result = 10 + 30" | |
result = compile(parse(tokenize("result = 10 + 30"))) | |
# check the result | |
assert result == [40] | |
# test the compile function with multiple statements | |
def test_compile_statements(): | |
# compile the expression "result = 10 + 30 print result" | |
result = compile(parse(tokenize("result = 10 + 30; print result;"))) | |
# check the result | |
assert result == [40, 40] | |
# test the compile function with nested expressions | |
def test_compile_nested_expressions(): | |
# compile the expression "result = (10 + 30) * 2" | |
result = compile(parse(tokenize("result = (10 + 30) * 2;"))) | |
# check the result | |
assert result == [80] | |
# test the compile function with a complex expression | |
def test_compile_complex_expression(): | |
# compile the expression "result = ((10 + 30) * 2) / 6" | |
result = compile(parse(tokenize("result = ((10 + 30) * 2) / 6"))) | |
# check the result | |
assert result == [13.333333333333334] | |
test_compile_expression() | |
test_compile_assignment() | |
test_compile_statements() | |
test_compile_nested_expressions() | |
test_compile_complex_expression() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is the script assembled from the different responses ChatGPT gave | |
# | |
def tokenize(code): | |
tokens = [] | |
pos = 0 | |
while pos < len(code): | |
if code[pos] in ["+", "-", "*", "/", "%", "=", "(", ")"]: | |
tokens.append(code[pos]) | |
pos += 1 | |
elif code[pos] == " ": | |
pos += 1 | |
elif code[pos] == ";": | |
tokens.append(code[pos]) | |
pos += 1 | |
elif code[pos:pos+5].lower() == "print": | |
tokens.append("print") | |
pos += 5 | |
else: | |
value = "" | |
while pos < len(code) and code[pos] not in ["+", "-", "*", "/", "%", "=", "(", ")", " ", ";"]: | |
value += code[pos] | |
pos += 1 | |
tokens.append(value) | |
return tokens | |
def is_integer(n): | |
try: | |
float(n) | |
except ValueError: | |
return False | |
else: | |
return float(n).is_integer() | |
def parse(tokens): | |
pos = 0 | |
def parse_program(): | |
nonlocal pos | |
statements = [] | |
while pos < len(tokens): | |
st = parse_statement() | |
if st is not None: | |
statements.append(st) | |
return statements | |
def parse_statement(): | |
nonlocal pos | |
if tokens[pos] == "print": | |
pos += 1 | |
return ("print", parse_expression()) | |
elif tokens[pos] == ";": | |
pos += 1 | |
result = parse_program() | |
if len(result) == 0: | |
return None | |
return ("statement", result) | |
elif tokens[pos] not in ["+", "-", "*", "/", "%", "=", "(", ")"]: | |
name = tokens[pos] | |
pos += 1 | |
if tokens[pos] == "=": | |
pos += 1 | |
return ("assign", name, parse_expression()) | |
def parse_expression(): | |
nonlocal pos | |
return parse_sum() | |
def parse_sum(): | |
nonlocal pos | |
result = parse_product() | |
while pos < len(tokens) and tokens[pos] in ["+", "-"]: | |
operator = tokens[pos] | |
pos += 1 | |
right = parse_product() | |
result = (operator, result, right) | |
return result | |
def parse_product(): | |
nonlocal pos | |
result = parse_value() | |
while pos < len(tokens) and tokens[pos] in ["*", "/", "%"]: | |
operator = tokens[pos] | |
pos += 1 | |
right = parse_value() | |
result = (operator, result, right) | |
return result | |
def parse_value(): | |
nonlocal pos | |
type = tokens[pos] | |
if type in ["+", "-", "*", "/", "%"]: | |
pos += 1 | |
return (type, parse_value()) | |
elif type == "(": | |
pos += 1 | |
result = parse_expression() | |
pos += 1 | |
return result | |
elif type in ["+", "-", "*", "/", "%", "=", "(", ")"]: | |
pos += 1 | |
return type | |
elif type == 'print': | |
pos += 1 | |
result = ('print', parse_expression()) | |
pos += 1 | |
return result | |
elif is_integer(type): | |
pos += 1 | |
return int(type) | |
else: | |
pos += 1 | |
return type | |
return parse_program() | |
def compile(parse_tree): | |
def compile_program(parse_tree): | |
# create a dictionary to store variable values | |
variables = {} | |
results = [] | |
for st in parse_tree: | |
result = compile_statement(st, variables) | |
if result is not None: | |
results.append(result) | |
return results | |
def compile_statement(parse_tree, variables): | |
type = parse_tree[0] | |
if type == "print": | |
result = compile_expression(parse_tree[1], variables) | |
print(result) | |
return result | |
elif type == "assign": | |
# store the value of the expression in the variable | |
name = parse_tree[1] | |
value = compile_expression(parse_tree[2], variables) | |
variables[name] = value | |
return value | |
def compile_expression(parse_tree, variables): | |
if isinstance(parse_tree, (list, tuple)): | |
# parse_tree is subscriptable, so we can access its elements | |
type = parse_tree[0] | |
if type in ["+", "-", "*", "/", "%"]: | |
# perform the mathematical operation and return the result | |
if type == "+": | |
return compile_expression(parse_tree[1], variables) + compile_expression(parse_tree[2], variables) | |
elif type == "-": | |
return compile_expression(parse_tree[1], variables) - compile_expression(parse_tree[2], variables) | |
elif type == "*": | |
return compile_expression(parse_tree[1], variables) * compile_expression(parse_tree[2], variables) | |
elif type == "/": | |
return compile_expression(parse_tree[1], variables) / compile_expression(parse_tree[2], variables) | |
elif type == "%": | |
return compile_expression(parse_tree[1], variables) % compile_expression(parse_tree[2], variables) | |
elif type == "assign": | |
# extract the name of the variable and the value of the expression | |
name = parse_tree[1] | |
value = compile_expression(parse_tree[2], variables) | |
# return the value of the expression | |
return value | |
elif type in ["+", "-", "*", "/", "%", "=", "(", ")"]: | |
return type | |
elif type in ["num", "var"]: | |
if type == "num": | |
# return the numeric value directly | |
return parse_tree[1] | |
elif type == "var": | |
# return the value of the variable | |
name = parse_tree[1] | |
if name in variables: | |
return variables[name] | |
else: | |
print(f"Error: variable {name} is not defined") | |
return None | |
else: | |
# parse_tree is not subscriptable, so we can't access its elements | |
return parse_tree | |
return compile_program(parse_tree) | |
# test the compile function with a simple expression | |
def test_compile_expression(): | |
# compile the expression "10 + 30" | |
result = compile("print 10 + 30;") | |
# check the result | |
assert result == [40] | |
# test the compile function with a simple assignment | |
def test_compile_assignment(): | |
# compile the expression "result = 10 + 30" | |
result = compile("result = 10 + 30;") | |
# check the result | |
assert result == [40] | |
# test the compile function with multiple statements | |
def test_compile_statements(): | |
# compile the expression "result = 10 + 30; print result;" | |
result = compile("result = 10 + 30; print result;") | |
# check the result | |
assert result == [40, 40] | |
# test the compile function with nested expressions | |
def test_compile_nested_expressions(): | |
# compile the expression "result = (10 + 30) * 2;" | |
result = compile("result = (10 + 30) * 2;") | |
# check the result | |
assert result == [80] | |
# test the compile function with a complex expression | |
def test_compile_complex_expression(): | |
# compile the expression "result = ((10 + 30) * 2) / 6;" | |
result = compile("result = ((10 + 30) * 2) / 6;") | |
# check the result | |
assert result == [13.333333333333334] | |
test_compile_expression() | |
test_compile_assignment() | |
test_compile_statements() | |
test_compile_nested_expressions() | |
test_compile_complex_expression() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
These are the differences between the original and the fixed code. | |
Setting aside the differences in what method the tests are calling, likely caused | |
by a poor prompt from myself, the original and the fixed code differ by 5.57%. | |
This means 94.42% of this code is created by ChatGPT. | |
--- chatgpt_compiler_original.py 2022-12-07 16:24:38.209911500 -0800 | |
+++ chatgpt_compiler.py 2022-12-07 16:25:15.979911500 -0800 | |
@@ -136,9 +136,16 @@ | |
value = compile_expression(parse_tree[2], variables) | |
variables[name] = value | |
return value | |
+ elif type == "statement": | |
+ result = compile_statement(parse_tree[1][0], variables) | |
+ return result | |
+ | |
def compile_expression(parse_tree, variables): | |
if isinstance(parse_tree, (list, tuple)): | |
+ if len(parse_tree) == 0: | |
+ return None | |
+ | |
# parse_tree is subscriptable, so we can access its elements | |
type = parse_tree[0] | |
if type in ["+", "-", "*", "/", "%"]: | |
@@ -173,9 +180,17 @@ | |
else: | |
print(f"Error: variable {name} is not defined") | |
return None | |
+ elif type == "print": | |
+ return compile_statement(parse_tree, variables) | |
+ else: | |
+ return variables[parse_tree] | |
else: | |
# parse_tree is not subscriptable, so we can't access its elements | |
- return parse_tree | |
+ if variables.get(parse_tree): | |
+ return variables[parse_tree] | |
+ else: | |
+ return parse_tree | |
+ | |
return compile_program(parse_tree) | |
@@ -197,39 +212,38 @@ | |
# test the compile function with a simple expression | |
def test_compile_expression(): | |
# compile the expression "10 + 30" | |
- result = compile("print 10 + 30;") | |
+ result = compile(parse(tokenize("print 10 + 30"))) | |
# check the result | |
assert result == [40] | |
# test the compile function with a simple assignment | |
def test_compile_assignment(): | |
# compile the expression "result = 10 + 30" | |
- result = compile("result = 10 + 30;") | |
+ result = compile(parse(tokenize("result = 10 + 30"))) | |
# check the result | |
assert result == [40] | |
# test the compile function with multiple statements | |
def test_compile_statements(): | |
- # compile the expression "result = 10 + 30; print result;" | |
- result = compile("result = 10 + 30; print result;") | |
+ # compile the expression "result = 10 + 30 print result" | |
+ result = compile(parse(tokenize("result = 10 + 30; print result;"))) | |
# check the result | |
assert result == [40, 40] | |
# test the compile function with nested expressions | |
def test_compile_nested_expressions(): | |
- # compile the expression "result = (10 + 30) * 2;" | |
- result = compile("result = (10 + 30) * 2;") | |
+ # compile the expression "result = (10 + 30) * 2" | |
+ result = compile(parse(tokenize("result = (10 + 30) * 2;"))) | |
# check the result | |
assert result == [80] | |
# test the compile function with a complex expression | |
def test_compile_complex_expression(): | |
- # compile the expression "result = ((10 + 30) * 2) / 6;" | |
- result = compile("result = ((10 + 30) * 2) / 6;") | |
+ # compile the expression "result = ((10 + 30) * 2) / 6" | |
+ result = compile(parse(tokenize("result = ((10 + 30) * 2) / 6"))) | |
# check the result | |
assert result == [13.333333333333334] | |
- | |
test_compile_expression() | |
test_compile_assignment() | |
test_compile_statements() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment