Created
November 27, 2015 15:27
-
-
Save an-OK-squirrel/8e0088539d1ee876d74d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import re | |
L_LETTERS = 'abcdefghijklmnopqrstuvwxyz' | |
U_LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
DIGITS = '0123456789' | |
ALL_LETTERS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' | |
OPERATORS = L_LETTERS + '+*/-=' | |
WHITESPACE = ' \t' | |
def does_regex_match(regex, string): | |
thing = re.match(regex, string) | |
try: | |
return str(thing) is not None and thing.group() == string | |
except AttributeError: | |
return False | |
def split_into_st(program): | |
# split_into_st('apple') | |
# ['a', 'p', 'p', 'l', 'e'] | |
result = [] | |
char_index = 0 | |
token_type = 0 # 0 is none, 1 is op, 2 is num, 3 is string | |
# actually, for that matter, what other types are there? derp, lots of them | |
token = '' | |
while char_index < len(program): | |
char = program[char_index] | |
if token_type == 0: # Token is empty | |
if char in WHITESPACE: # Y U whitespace in codegolf? | |
token = '' | |
token_type = 0 | |
elif char in OPERATORS: # Do things | |
token += char | |
token_type = 1 | |
result.append([token, token_type]) | |
token = '' # reset | |
token_type = 0 | |
elif char == '.': | |
token += '.' | |
token_type = 1 | |
elif char in DIGITS: | |
token += char | |
token_type = 2 | |
elif char == '"': | |
token_type = 3 | |
token = '"' | |
elif token_type == 1: | |
token += char | |
result.append([token, token_type]) | |
token = '' | |
token_type = 0 | |
elif token_type == 2: | |
if char in DIGITS: | |
token += char | |
elif char == '.': | |
pass # token += char | |
else: | |
result.append([token, token_type]) | |
token = '' | |
token_type = 0 | |
char_index -= 1 | |
elif token_type == 3: | |
if char == '"': | |
result.append([token, token_type]) # We don't want "abc", | |
# rather "abc | |
token = '' | |
token_type = 0 | |
else: | |
token += char | |
char_index += 1 | |
result.append([token, token_type]) | |
return result | |
types = ['none', 'operator', '.operator'] | |
def parse_token_st(tokens): | |
result = [] | |
for token in tokens: | |
token_type = token[1] | |
if token_type == 0: | |
pass | |
elif token_type == 1: | |
result.append({'token_type': 'operator', 'token_value': token[0]}) | |
elif token_type == 2: | |
result.append({'token_type': 'integer', | |
'token_value': int(token[0])}) | |
elif token_type == 3: | |
result.append({'token_type': 'string', | |
'token_value': str(token[0][1:])}) | |
return result | |
replace_chars = { | |
'à': ' 0' | |
} | |
def char_replace(code): | |
result = "" | |
for char in code: | |
print(1) | |
if char in replace_chars: | |
result += replace_chars[char] | |
else: | |
result += char | |
print(result) | |
return result | |
def fully_parse(code): | |
return parse_token_st(split_into_st(char_replace(code))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment