Skip to content

Instantly share code, notes, and snippets.

@recmo
Created April 10, 2025 08:24
Show Gist options
  • Save recmo/56ccd445210e26b6e92f2574f93cb0b3 to your computer and use it in GitHub Desktop.
Save recmo/56ccd445210e26b6e92f2574f93cb0b3 to your computer and use it in GitHub Desktop.
#!/usr/bin/env rust-script
//!
//! Implements a tail-calling interpreter in Rust.
//!
// Ideally we would create a State struct to hold the state variables and pass that along,
// but the compiler won't pass it through registers. We also can't use a macro to avoid
// repeating all the registers, as this would
// We want the dispatch table, program counter and vm state to be passed in registers as
// much as possible. This introduces some complications. The code would be cleanest
// if we created a `State` struct, to hold the state variables, and passed that along.
// But structs larger than ~16 bytes cause the compiler to allocate on the stack instead
// of splatting it over the registers.
// So we need all our opcode functions to have many arguments, which is repetitive. We
// also cannot use a macro to avoid repeating all the registers, as this violates
// the macro hygiene rules.
use std::mem::transmute;
// Passing the dispatch table along has the additional problem that this creates a recursive type.
// To handle this we create an erased type that replaces the recursion with a similarly sized dummy,
// and use transmute to add more levels of recursion to the type as necessary.
type Erased = fn(&[fn(); 256], *const u8, *mut i32, u64) -> u64;
type Handler = fn(&[Erased; 256], *const u8, *mut i32, u64) -> u64;
const DISPATCH: [Handler; 256] = [
add, sub, mul, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop,
];
/// Increment program counter and execute instruction.
///
/// This should compile to the minimal inlined asm sequence
///
/// x86_64:
/// ```asm
/// movzx eax, byte ptr [rsi]
/// inc rsi
/// mov rax, qword ptr [rdi + 8*rax]
/// jmp rax
/// ```
///
/// aarch64:
/// ```asm
/// ldrb w8, [x1], #1
/// ldr x4, [x0, x8, lsl #3]
/// br x4
/// ```
#[inline(always)]
pub fn dispatch(d: &[Erased; 256], mut pc: *const u8, sp: *mut i32, counter: u64) -> u64 {
// let unerased: &[Handler; 256] = unsafe { transmute(d) };
let opcode = unsafe { *pc };
pc = unsafe { pc.offset(1) };
DISPATCH[opcode as usize](d, pc, sp, counter)
}
///
/// x86_64:
/// ```asm
/// movzx eax, byte ptr [rsi]
/// inc rsi
/// lea r8, [rip + .Lanon.5790076eeabd1277365ad2c26eb1b477.0]
/// jmp qword ptr [r8 + 8*rax]
/// ```
/// aarch64:
///
/// ```asm
/// ldrb w8, [x1], #1
/// adrp x9, l_anon.5790076eeabd1277365ad2c26eb1b477.0@PAGE
/// add x9, x9, l_anon.5790076eeabd1277365ad2c26eb1b477.0@PAGEOFF
/// ldr x4, [x9, x8, lsl #3]
/// br x4
/// ```
// #[inline(always)]
// pub fn dispatch(mut pc: *const u8, sp: *mut i32, counter: u64) -> u64 {
// let opcode = unsafe { *pc };
// pc = unsafe { pc.offset(1) };
// DISPATCH[opcode as usize](d, pc, sp, counter)
// }
pub fn stop(d: &[Erased; 256], pc: *const u8, sp: *mut i32, counter: u64) -> u64 {
counter
}
pub fn add(d: &[Erased; 256], pc: *const u8, sp: *mut i32, mut counter: u64) -> u64 {
counter += 1;
dispatch(d, pc, sp, counter)
}
pub fn sub(d: &[Erased; 256], pc: *const u8, sp: *mut i32, mut counter: u64) -> u64 {
counter -= 1;
dispatch(d, pc, sp, counter)
}
pub fn mul(d: &[Erased; 256], pc: *const u8, sp: *mut i32, mut counter: u64) -> u64 {
counter *= 1337;
dispatch(d, pc, sp, counter)
}
pub fn main() {
// add sub add stop
let code = [0, 1, 0, 2];
let mut stack = [0; 32];
let erased: &[Erased; 256] = unsafe { transmute(&DISPATCH) };
let pc = code[..].as_ptr();
let sp = stack[..].as_mut_ptr();
let counter = 0;
dispatch(erased, pc, sp, counter);
assert_eq!(stack[0], 1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment