Skip to content

Instantly share code, notes, and snippets.

@deniska
Created April 22, 2026 07:47
Show Gist options
  • Select an option

  • Save deniska/bd0efc089b705a8d845842b740e11065 to your computer and use it in GitHub Desktop.

Select an option

Save deniska/bd0efc089b705a8d845842b740e11065 to your computer and use it in GitHub Desktop.
Simple assembler for uxn written in python
import sys
from dataclasses import dataclass
import collections
import enum
import string
class TokenType(enum.Enum):
identifier = enum.auto()
number = enum.auto()
string = enum.auto()
comma = enum.auto()
colon = enum.auto()
newline = enum.auto()
@dataclass
class Token:
token_type: TokenType
value: object
identifier_start = set(string.ascii_letters + '.')
identifier_cont = identifier_start | set(string.digits)
class Tokenizer:
def __init__(self, input_file):
self.input_file = input_file
self.stash = None
def next(self):
if self.stash is None:
return self.input_file.read(1)
else:
c = self.stash
self.stash = None
return c
def putback(self, c):
self.stash = c
def tokenize(self):
while True:
c = self.next()
if c == '':
break
elif c in ' \t\r':
continue
elif c == '\n':
yield Token(TokenType.newline, '\n')
elif c in identifier_start:
cur_token = [c]
while (c := self.next()) in identifier_cont:
cur_token.append(c)
name = ''.join(cur_token)
yield Token(TokenType.identifier, name)
self.putback(c)
elif c in string.digits:
digits = [c]
base = 10
digit_chars = string.digits
if c == '0':
if (c := self.next()) == 'x':
base = 16
digit_chars = string.hexdigits
else:
self.putback(c)
while (c := self.next()) in digit_chars:
digits.append(c)
yield Token(TokenType.number, int(''.join(digits), base))
self.putback(c)
elif c == ':':
yield Token(TokenType.colon, ':')
elif c == ',':
yield Token(TokenType.comma, ',')
elif c == ';':
while (c := self.next()) not in ('\n', ''):
pass
self.putback(c)
elif c == '"':
chars = []
while True:
c = self.next()
if c == '"':
break
elif c == '\\':
c = self.next()
if c == '"':
chars.append(c)
elif c == '\\':
chars.append(c)
else:
raise ValueError(f'Unknown string escape: {c!r}')
elif c in ('\n', ''):
raise ValueError('Unclosed string')
chars.append(c)
yield Token(TokenType.string, ''.join(chars))
else:
raise ValueError(f'Unknown character: {c!r}')
class Assembler:
def __init__(self, input_file, output_file):
self.cur = 0
self.out_buf = bytearray()
self.output_file = output_file
self.tokens = Tokenizer(input_file).tokenize()
self.stash = None
self.prev_label = ''
self.labels = {}
self.pending_labels_wide = collections.defaultdict(list)
self.pending_labels = collections.defaultdict(list)
self.relative_addrs = set()
def next(self):
if self.stash:
tok = self.stash
self.stash = None
return tok
else:
try:
tok = next(self.tokens)
return tok
except StopIteration:
return None
def putback(self, tok):
self.stash = tok
def write_byte(self, b):
if b > 255:
raise ValueError(f'Numeric immediate too big for byte: {b}')
while len(self.out_buf) <= self.cur:
self.out_buf.append(0)
self.out_buf[self.cur] = b
self.cur += 1
def write_short(self, num):
if num > 65535:
raise ValueError(f'Numeric immediate too big for short: {num}')
self.write_byte(num >> 8)
self.write_byte(num & 0xff)
def add_label(self, tok):
name = tok.value
if name[0] == '.':
name = self.prev_label + name
else:
self.prev_label = name
if name in self.labels:
raise ValueError(f'Label {name!r} already defined')
self.labels[name] = self.cur
def assemble(self):
while True:
tok = self.next()
if tok is None:
break
elif tok.token_type == TokenType.newline:
continue
elif tok.token_type == TokenType.identifier:
n = self.next()
if n.token_type == TokenType.colon:
self.add_label(tok)
tok = self.next()
else:
self.putback(n)
if tok is not None and tok.token_type != TokenType.newline:
self.parse_identifier(tok)
else:
raise ValueError(f'Dunno what to do with {tok}')
for label, addrs in self.pending_labels_wide.items():
if label not in self.labels:
raise ValueError(f'Label not found: {label!r}')
label_addr = self.labels[label]
for addr in addrs:
self.cur = addr
if addr in self.relative_addrs:
self.write_short((label_addr - addr - 2) & 0xffff)
else:
self.write_short(label_addr)
for label, addrs in self.pending_labels.items():
if label not in self.labels:
raise ValueError(f'Label not found: {label!r}')
label_addr = self.labels[label]
for addr in addrs:
self.cur = addr
if addr in self.relative_addrs:
self.write_byte((label_addr - addr - 2) & 0xff)
else:
self.write_byte(label_addr)
self.output_file.write(self.out_buf[0x100:])
def expect(self, token_type):
tok = self.next()
if tok is None:
raise ValueError(f'Expected {token_type}, got EOF')
if tok.token_type != token_type:
raise ValueError(f'Expected {token_type}, got {tok.token_type}')
return tok
def expectmany(self, *token_types):
tok = self.next()
for token_type in token_types:
if tok.token_type == token_type:
return tok
raise ValueError(f'Expected one of: {token_types}, got {tok.token_type}')
def parse_immediate(self, is_wide, is_relative):
tok = self.expectmany(TokenType.number, TokenType.identifier)
if tok.token_type == TokenType.identifier:
name = tok.value
if is_relative:
self.relative_addrs.add(self.cur)
if name[0] == '.':
name = self.prev_label + name
if is_wide:
self.pending_labels_wide[name].append(self.cur)
self.write_short(0)
else:
self.pending_labels[name].append(self.cur)
self.write_byte(0)
elif tok.token_type == TokenType.number:
num = tok.value
if is_wide:
self.write_short(num)
else:
self.write_byte(num)
def parse_identifier(self, tok):
ident = tok.value
identl = ident.lower()
if ident == 'org':
tok = self.expect(TokenType.number)
self.cur = tok.value
self.expect(TokenType.newline)
elif ident == 'rb':
tok = self.expect(TokenType.number)
self.cur += tok.value
self.expect(TokenType.newline)
elif ident == 'bytes':
while True:
tok = self.next()
if tok.token_type == TokenType.string:
for b in tok.value.encode('ascii'):
self.write_byte(b)
elif tok.token_type == TokenType.number:
self.write_byte(tok.value)
else:
raise ValueError(f'Expected number or string, got {tok.token_type}')
tok = self.next()
if tok is None or tok.token_type == TokenType.newline:
break
if tok.token_type != TokenType.comma:
raise ValueError(f'Expected comma, got {tok.token_type}')
elif identl in opcode_to_byte:
self.write_byte(opcode_to_byte[identl])
is_relative = False
for rel in relative:
if identl.startswith(rel):
is_relative = True
break
if identl in have_immediate:
self.parse_immediate(identl in imm2, is_relative)
self.expect(TokenType.newline)
else:
raise ValueError(f'Unknown instruction or directive: {ident!r}')
def main():
Assembler(open(sys.argv[1]), open(sys.argv[2], 'wb')).assemble()
opcodes = [
'BRK', 'INC', 'POP', 'NIP', 'SWP', 'ROT', 'DUP', 'OVR',
'EQU', 'NEQ', 'GTH', 'LTH', 'JMP', 'JCN', 'JSR', 'STH',
'LDZ', 'STZ', 'LDR', 'STR', 'LDA', 'STA', 'DEI', 'DEO',
'ADD', 'SUB', 'MUL', 'DIV', 'AND', 'ORA', 'EOR', 'SFT',
'JCI', 'INC2', 'POP2', 'NIP2', 'SWP2', 'ROT2', 'DUP2', 'OVR2',
'EQU2', 'NEQ2', 'GTH2', 'LTH2', 'JMP2', 'JCN2', 'JSR2', 'STH2',
'LDZ2', 'STZ2', 'LDR2', 'STR2', 'LDA2', 'STA2', 'DEI2', 'DEO2',
'ADD2', 'SUB2', 'MUL2', 'DIV2', 'AND2', 'ORA2', 'EOR2', 'SFT2',
'JMI', 'INCr', 'POPr', 'NIPr', 'SWPr', 'ROTr', 'DUPr', 'OVRr',
'EQUr', 'NEQr', 'GTHr', 'LTHr', 'JMPr', 'JCNr', 'JSRr', 'STHr',
'LDZr', 'STZr', 'LDRr', 'STRr', 'LDAr', 'STAr', 'DEIr', 'DEOr',
'ADDr', 'SUBr', 'MULr', 'DIVr', 'ANDr', 'ORAr', 'EORr', 'SFTr',
'JSI', 'INC2r', 'POP2r', 'NIP2r', 'SWP2r', 'ROT2r', 'DUP2r', 'OVR2r',
'EQU2r', 'NEQ2r', 'GTH2r', 'LTH2r', 'JMP2r', 'JCN2r', 'JSR2r', 'STH2r',
'LDZ2r', 'STZ2r', 'LDR2r', 'STR2r', 'LDA2r', 'STA2r', 'DEI2r', 'DEO2r',
'ADD2r', 'SUB2r', 'MUL2r', 'DIV2r', 'AND2r', 'ORA2r', 'EOR2r', 'SFT2r',
'LIT', 'INCk', 'POPk', 'NIPk', 'SWPk', 'ROTk', 'DUPk', 'OVRk',
'EQUk', 'NEQk', 'GTHk', 'LTHk', 'JMPk', 'JCNk', 'JSRk', 'STHk',
'LDZk', 'STZk', 'LDRk', 'STRk', 'LDAk', 'STAk', 'DEIk', 'DEOk',
'ADDk', 'SUBk', 'MULk', 'DIVk', 'ANDk', 'ORAk', 'EORk', 'SFTk',
'LIT2', 'INC2k', 'POP2k', 'NIP2k', 'SWP2k', 'ROT2k', 'DUP2k', 'OVR2k',
'EQU2k', 'NEQ2k', 'GTH2k', 'LTH2k', 'JMP2k', 'JCN2k', 'JSR2k', 'STH2k',
'LDZ2k', 'STZ2k', 'LDR2k', 'STR2k', 'LDA2k', 'STA2k', 'DEI2k', 'DEO2k',
'ADD2k', 'SUB2k', 'MUL2k', 'DIV2k', 'AND2k', 'ORA2k', 'EOR2k', 'SFT2k',
'LITr', 'INCkr', 'POPkr', 'NIPkr', 'SWPkr', 'ROTkr', 'DUPkr', 'OVRkr',
'EQUkr', 'NEQkr', 'GTHkr', 'LTHkr', 'JMPkr', 'JCNkr', 'JSRkr', 'STHkr',
'LDZkr', 'STZkr', 'LDRkr', 'STRkr', 'LDAkr', 'STAkr', 'DEIkr', 'DEOkr',
'ADDkr', 'SUBkr', 'MULkr', 'DIVkr', 'ANDkr', 'ORAkr', 'EORkr', 'SFTkr',
'LIT2r', 'INC2kr', 'POP2kr', 'NIP2kr', 'SWP2kr', 'ROT2kr', 'DUP2kr', 'OVR2kr',
'EQU2kr', 'NEQ2kr', 'GTH2kr', 'LTH2kr', 'JMP2kr', 'JCN2kr', 'JSR2kr', 'STH2kr',
'LDZ2kr', 'STZ2kr', 'LDR2kr', 'STR2kr', 'LDA2kr', 'STA2kr', 'DEI2kr', 'DEO2kr',
'ADD2kr', 'SUB2kr', 'MUL2kr', 'DIV2kr', 'AND2kr', 'ORA2kr', 'EOR2kr', 'SFT2kr',
]
opcode_to_byte = {}
s = max(len(o) for o in opcodes)
for i, opcode in enumerate(opcodes):
#print(f'{opcode:<{s}} = 0x{i:02x},')
opcode_to_byte[opcode.lower()] = i
imm1 = {
'lit',
'litr',
}
imm2 = {
'jci',
'jsi',
'jmi',
'lit2',
'lit2r',
}
have_immediate = imm1 | imm2
relative = {
'jci',
'jsi',
'jmi',
'ldr',
'str',
}
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment