Created
April 22, 2026 07:47
-
-
Save deniska/bd0efc089b705a8d845842b740e11065 to your computer and use it in GitHub Desktop.
Simple assembler for uxn written in python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import sys | |
| from dataclasses import dataclass | |
| import collections | |
| import enum | |
| import string | |
| class TokenType(enum.Enum): | |
| identifier = enum.auto() | |
| number = enum.auto() | |
| string = enum.auto() | |
| comma = enum.auto() | |
| colon = enum.auto() | |
| newline = enum.auto() | |
| @dataclass | |
| class Token: | |
| token_type: TokenType | |
| value: object | |
| identifier_start = set(string.ascii_letters + '.') | |
| identifier_cont = identifier_start | set(string.digits) | |
| class Tokenizer: | |
| def __init__(self, input_file): | |
| self.input_file = input_file | |
| self.stash = None | |
| def next(self): | |
| if self.stash is None: | |
| return self.input_file.read(1) | |
| else: | |
| c = self.stash | |
| self.stash = None | |
| return c | |
| def putback(self, c): | |
| self.stash = c | |
| def tokenize(self): | |
| while True: | |
| c = self.next() | |
| if c == '': | |
| break | |
| elif c in ' \t\r': | |
| continue | |
| elif c == '\n': | |
| yield Token(TokenType.newline, '\n') | |
| elif c in identifier_start: | |
| cur_token = [c] | |
| while (c := self.next()) in identifier_cont: | |
| cur_token.append(c) | |
| name = ''.join(cur_token) | |
| yield Token(TokenType.identifier, name) | |
| self.putback(c) | |
| elif c in string.digits: | |
| digits = [c] | |
| base = 10 | |
| digit_chars = string.digits | |
| if c == '0': | |
| if (c := self.next()) == 'x': | |
| base = 16 | |
| digit_chars = string.hexdigits | |
| else: | |
| self.putback(c) | |
| while (c := self.next()) in digit_chars: | |
| digits.append(c) | |
| yield Token(TokenType.number, int(''.join(digits), base)) | |
| self.putback(c) | |
| elif c == ':': | |
| yield Token(TokenType.colon, ':') | |
| elif c == ',': | |
| yield Token(TokenType.comma, ',') | |
| elif c == ';': | |
| while (c := self.next()) not in ('\n', ''): | |
| pass | |
| self.putback(c) | |
| elif c == '"': | |
| chars = [] | |
| while True: | |
| c = self.next() | |
| if c == '"': | |
| break | |
| elif c == '\\': | |
| c = self.next() | |
| if c == '"': | |
| chars.append(c) | |
| elif c == '\\': | |
| chars.append(c) | |
| else: | |
| raise ValueError(f'Unknown string escape: {c!r}') | |
| elif c in ('\n', ''): | |
| raise ValueError('Unclosed string') | |
| chars.append(c) | |
| yield Token(TokenType.string, ''.join(chars)) | |
| else: | |
| raise ValueError(f'Unknown character: {c!r}') | |
| class Assembler: | |
| def __init__(self, input_file, output_file): | |
| self.cur = 0 | |
| self.out_buf = bytearray() | |
| self.output_file = output_file | |
| self.tokens = Tokenizer(input_file).tokenize() | |
| self.stash = None | |
| self.prev_label = '' | |
| self.labels = {} | |
| self.pending_labels_wide = collections.defaultdict(list) | |
| self.pending_labels = collections.defaultdict(list) | |
| self.relative_addrs = set() | |
| def next(self): | |
| if self.stash: | |
| tok = self.stash | |
| self.stash = None | |
| return tok | |
| else: | |
| try: | |
| tok = next(self.tokens) | |
| return tok | |
| except StopIteration: | |
| return None | |
| def putback(self, tok): | |
| self.stash = tok | |
| def write_byte(self, b): | |
| if b > 255: | |
| raise ValueError(f'Numeric immediate too big for byte: {b}') | |
| while len(self.out_buf) <= self.cur: | |
| self.out_buf.append(0) | |
| self.out_buf[self.cur] = b | |
| self.cur += 1 | |
| def write_short(self, num): | |
| if num > 65535: | |
| raise ValueError(f'Numeric immediate too big for short: {num}') | |
| self.write_byte(num >> 8) | |
| self.write_byte(num & 0xff) | |
| def add_label(self, tok): | |
| name = tok.value | |
| if name[0] == '.': | |
| name = self.prev_label + name | |
| else: | |
| self.prev_label = name | |
| if name in self.labels: | |
| raise ValueError(f'Label {name!r} already defined') | |
| self.labels[name] = self.cur | |
| def assemble(self): | |
| while True: | |
| tok = self.next() | |
| if tok is None: | |
| break | |
| elif tok.token_type == TokenType.newline: | |
| continue | |
| elif tok.token_type == TokenType.identifier: | |
| n = self.next() | |
| if n.token_type == TokenType.colon: | |
| self.add_label(tok) | |
| tok = self.next() | |
| else: | |
| self.putback(n) | |
| if tok is not None and tok.token_type != TokenType.newline: | |
| self.parse_identifier(tok) | |
| else: | |
| raise ValueError(f'Dunno what to do with {tok}') | |
| for label, addrs in self.pending_labels_wide.items(): | |
| if label not in self.labels: | |
| raise ValueError(f'Label not found: {label!r}') | |
| label_addr = self.labels[label] | |
| for addr in addrs: | |
| self.cur = addr | |
| if addr in self.relative_addrs: | |
| self.write_short((label_addr - addr - 2) & 0xffff) | |
| else: | |
| self.write_short(label_addr) | |
| for label, addrs in self.pending_labels.items(): | |
| if label not in self.labels: | |
| raise ValueError(f'Label not found: {label!r}') | |
| label_addr = self.labels[label] | |
| for addr in addrs: | |
| self.cur = addr | |
| if addr in self.relative_addrs: | |
| self.write_byte((label_addr - addr - 2) & 0xff) | |
| else: | |
| self.write_byte(label_addr) | |
| self.output_file.write(self.out_buf[0x100:]) | |
| def expect(self, token_type): | |
| tok = self.next() | |
| if tok is None: | |
| raise ValueError(f'Expected {token_type}, got EOF') | |
| if tok.token_type != token_type: | |
| raise ValueError(f'Expected {token_type}, got {tok.token_type}') | |
| return tok | |
| def expectmany(self, *token_types): | |
| tok = self.next() | |
| for token_type in token_types: | |
| if tok.token_type == token_type: | |
| return tok | |
| raise ValueError(f'Expected one of: {token_types}, got {tok.token_type}') | |
| def parse_immediate(self, is_wide, is_relative): | |
| tok = self.expectmany(TokenType.number, TokenType.identifier) | |
| if tok.token_type == TokenType.identifier: | |
| name = tok.value | |
| if is_relative: | |
| self.relative_addrs.add(self.cur) | |
| if name[0] == '.': | |
| name = self.prev_label + name | |
| if is_wide: | |
| self.pending_labels_wide[name].append(self.cur) | |
| self.write_short(0) | |
| else: | |
| self.pending_labels[name].append(self.cur) | |
| self.write_byte(0) | |
| elif tok.token_type == TokenType.number: | |
| num = tok.value | |
| if is_wide: | |
| self.write_short(num) | |
| else: | |
| self.write_byte(num) | |
| def parse_identifier(self, tok): | |
| ident = tok.value | |
| identl = ident.lower() | |
| if ident == 'org': | |
| tok = self.expect(TokenType.number) | |
| self.cur = tok.value | |
| self.expect(TokenType.newline) | |
| elif ident == 'rb': | |
| tok = self.expect(TokenType.number) | |
| self.cur += tok.value | |
| self.expect(TokenType.newline) | |
| elif ident == 'bytes': | |
| while True: | |
| tok = self.next() | |
| if tok.token_type == TokenType.string: | |
| for b in tok.value.encode('ascii'): | |
| self.write_byte(b) | |
| elif tok.token_type == TokenType.number: | |
| self.write_byte(tok.value) | |
| else: | |
| raise ValueError(f'Expected number or string, got {tok.token_type}') | |
| tok = self.next() | |
| if tok is None or tok.token_type == TokenType.newline: | |
| break | |
| if tok.token_type != TokenType.comma: | |
| raise ValueError(f'Expected comma, got {tok.token_type}') | |
| elif identl in opcode_to_byte: | |
| self.write_byte(opcode_to_byte[identl]) | |
| is_relative = False | |
| for rel in relative: | |
| if identl.startswith(rel): | |
| is_relative = True | |
| break | |
| if identl in have_immediate: | |
| self.parse_immediate(identl in imm2, is_relative) | |
| self.expect(TokenType.newline) | |
| else: | |
| raise ValueError(f'Unknown instruction or directive: {ident!r}') | |
| def main(): | |
| Assembler(open(sys.argv[1]), open(sys.argv[2], 'wb')).assemble() | |
| opcodes = [ | |
| 'BRK', 'INC', 'POP', 'NIP', 'SWP', 'ROT', 'DUP', 'OVR', | |
| 'EQU', 'NEQ', 'GTH', 'LTH', 'JMP', 'JCN', 'JSR', 'STH', | |
| 'LDZ', 'STZ', 'LDR', 'STR', 'LDA', 'STA', 'DEI', 'DEO', | |
| 'ADD', 'SUB', 'MUL', 'DIV', 'AND', 'ORA', 'EOR', 'SFT', | |
| 'JCI', 'INC2', 'POP2', 'NIP2', 'SWP2', 'ROT2', 'DUP2', 'OVR2', | |
| 'EQU2', 'NEQ2', 'GTH2', 'LTH2', 'JMP2', 'JCN2', 'JSR2', 'STH2', | |
| 'LDZ2', 'STZ2', 'LDR2', 'STR2', 'LDA2', 'STA2', 'DEI2', 'DEO2', | |
| 'ADD2', 'SUB2', 'MUL2', 'DIV2', 'AND2', 'ORA2', 'EOR2', 'SFT2', | |
| 'JMI', 'INCr', 'POPr', 'NIPr', 'SWPr', 'ROTr', 'DUPr', 'OVRr', | |
| 'EQUr', 'NEQr', 'GTHr', 'LTHr', 'JMPr', 'JCNr', 'JSRr', 'STHr', | |
| 'LDZr', 'STZr', 'LDRr', 'STRr', 'LDAr', 'STAr', 'DEIr', 'DEOr', | |
| 'ADDr', 'SUBr', 'MULr', 'DIVr', 'ANDr', 'ORAr', 'EORr', 'SFTr', | |
| 'JSI', 'INC2r', 'POP2r', 'NIP2r', 'SWP2r', 'ROT2r', 'DUP2r', 'OVR2r', | |
| 'EQU2r', 'NEQ2r', 'GTH2r', 'LTH2r', 'JMP2r', 'JCN2r', 'JSR2r', 'STH2r', | |
| 'LDZ2r', 'STZ2r', 'LDR2r', 'STR2r', 'LDA2r', 'STA2r', 'DEI2r', 'DEO2r', | |
| 'ADD2r', 'SUB2r', 'MUL2r', 'DIV2r', 'AND2r', 'ORA2r', 'EOR2r', 'SFT2r', | |
| 'LIT', 'INCk', 'POPk', 'NIPk', 'SWPk', 'ROTk', 'DUPk', 'OVRk', | |
| 'EQUk', 'NEQk', 'GTHk', 'LTHk', 'JMPk', 'JCNk', 'JSRk', 'STHk', | |
| 'LDZk', 'STZk', 'LDRk', 'STRk', 'LDAk', 'STAk', 'DEIk', 'DEOk', | |
| 'ADDk', 'SUBk', 'MULk', 'DIVk', 'ANDk', 'ORAk', 'EORk', 'SFTk', | |
| 'LIT2', 'INC2k', 'POP2k', 'NIP2k', 'SWP2k', 'ROT2k', 'DUP2k', 'OVR2k', | |
| 'EQU2k', 'NEQ2k', 'GTH2k', 'LTH2k', 'JMP2k', 'JCN2k', 'JSR2k', 'STH2k', | |
| 'LDZ2k', 'STZ2k', 'LDR2k', 'STR2k', 'LDA2k', 'STA2k', 'DEI2k', 'DEO2k', | |
| 'ADD2k', 'SUB2k', 'MUL2k', 'DIV2k', 'AND2k', 'ORA2k', 'EOR2k', 'SFT2k', | |
| 'LITr', 'INCkr', 'POPkr', 'NIPkr', 'SWPkr', 'ROTkr', 'DUPkr', 'OVRkr', | |
| 'EQUkr', 'NEQkr', 'GTHkr', 'LTHkr', 'JMPkr', 'JCNkr', 'JSRkr', 'STHkr', | |
| 'LDZkr', 'STZkr', 'LDRkr', 'STRkr', 'LDAkr', 'STAkr', 'DEIkr', 'DEOkr', | |
| 'ADDkr', 'SUBkr', 'MULkr', 'DIVkr', 'ANDkr', 'ORAkr', 'EORkr', 'SFTkr', | |
| 'LIT2r', 'INC2kr', 'POP2kr', 'NIP2kr', 'SWP2kr', 'ROT2kr', 'DUP2kr', 'OVR2kr', | |
| 'EQU2kr', 'NEQ2kr', 'GTH2kr', 'LTH2kr', 'JMP2kr', 'JCN2kr', 'JSR2kr', 'STH2kr', | |
| 'LDZ2kr', 'STZ2kr', 'LDR2kr', 'STR2kr', 'LDA2kr', 'STA2kr', 'DEI2kr', 'DEO2kr', | |
| 'ADD2kr', 'SUB2kr', 'MUL2kr', 'DIV2kr', 'AND2kr', 'ORA2kr', 'EOR2kr', 'SFT2kr', | |
| ] | |
| opcode_to_byte = {} | |
| s = max(len(o) for o in opcodes) | |
| for i, opcode in enumerate(opcodes): | |
| #print(f'{opcode:<{s}} = 0x{i:02x},') | |
| opcode_to_byte[opcode.lower()] = i | |
| imm1 = { | |
| 'lit', | |
| 'litr', | |
| } | |
| imm2 = { | |
| 'jci', | |
| 'jsi', | |
| 'jmi', | |
| 'lit2', | |
| 'lit2r', | |
| } | |
| have_immediate = imm1 | imm2 | |
| relative = { | |
| 'jci', | |
| 'jsi', | |
| 'jmi', | |
| 'ldr', | |
| 'str', | |
| } | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment