Created
April 10, 2025 08:24
-
-
Save recmo/56ccd445210e26b6e92f2574f93cb0b3 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env rust-script | |
//! | |
//! Implements a tail-calling interpreter in Rust. | |
//! | |
// Ideally we would create a State struct to hold the state variables and pass that along, | |
// but the compiler won't pass it through registers. We also can't use a macro to avoid | |
// repeating all the registers, as this would | |
// We want the dispatch table, program counter and vm state to be passed in registers as | |
// much as possible. This introduces some complications. The code would be cleanest | |
// if we created a `State` struct, to hold the state variables, and passed that along. | |
// But structs larger than ~16 bytes cause the compiler to allocate on the stack instead | |
// of splatting it over the registers. | |
// So we need all our opcode functions to have many arguments, which is repetitive. We | |
// also cannot use a macro to avoid repeating all the registers, as this violates | |
// the macro hygiene rules. | |
use std::mem::transmute; | |
// Passing the dispatch table along has the additional problem that this creates a recursive type. | |
// To handle this we create an erased type that replaces the recursion with a similarly sized dummy, | |
// and use transmute to add more levels of recursion to the type as necessary. | |
type Erased = fn(&[fn(); 256], *const u8, *mut i32, u64) -> u64; | |
type Handler = fn(&[Erased; 256], *const u8, *mut i32, u64) -> u64; | |
const DISPATCH: [Handler; 256] = [ | |
add, sub, mul, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, stop, | |
]; | |
/// Increment program counter and execute instruction. | |
/// | |
/// This should compile to the minimal inlined asm sequence | |
/// | |
/// x86_64: | |
/// ```asm | |
/// movzx eax, byte ptr [rsi] | |
/// inc rsi | |
/// mov rax, qword ptr [rdi + 8*rax] | |
/// jmp rax | |
/// ``` | |
/// | |
/// aarch64: | |
/// ```asm | |
/// ldrb w8, [x1], #1 | |
/// ldr x4, [x0, x8, lsl #3] | |
/// br x4 | |
/// ``` | |
#[inline(always)] | |
pub fn dispatch(d: &[Erased; 256], mut pc: *const u8, sp: *mut i32, counter: u64) -> u64 { | |
// let unerased: &[Handler; 256] = unsafe { transmute(d) }; | |
let opcode = unsafe { *pc }; | |
pc = unsafe { pc.offset(1) }; | |
DISPATCH[opcode as usize](d, pc, sp, counter) | |
} | |
/// | |
/// x86_64: | |
/// ```asm | |
/// movzx eax, byte ptr [rsi] | |
/// inc rsi | |
/// lea r8, [rip + .Lanon.5790076eeabd1277365ad2c26eb1b477.0] | |
/// jmp qword ptr [r8 + 8*rax] | |
/// ``` | |
/// aarch64: | |
/// | |
/// ```asm | |
/// ldrb w8, [x1], #1 | |
/// adrp x9, l_anon.5790076eeabd1277365ad2c26eb1b477.0@PAGE | |
/// add x9, x9, l_anon.5790076eeabd1277365ad2c26eb1b477.0@PAGEOFF | |
/// ldr x4, [x9, x8, lsl #3] | |
/// br x4 | |
/// ``` | |
// #[inline(always)] | |
// pub fn dispatch(mut pc: *const u8, sp: *mut i32, counter: u64) -> u64 { | |
// let opcode = unsafe { *pc }; | |
// pc = unsafe { pc.offset(1) }; | |
// DISPATCH[opcode as usize](d, pc, sp, counter) | |
// } | |
pub fn stop(d: &[Erased; 256], pc: *const u8, sp: *mut i32, counter: u64) -> u64 { | |
counter | |
} | |
pub fn add(d: &[Erased; 256], pc: *const u8, sp: *mut i32, mut counter: u64) -> u64 { | |
counter += 1; | |
dispatch(d, pc, sp, counter) | |
} | |
pub fn sub(d: &[Erased; 256], pc: *const u8, sp: *mut i32, mut counter: u64) -> u64 { | |
counter -= 1; | |
dispatch(d, pc, sp, counter) | |
} | |
pub fn mul(d: &[Erased; 256], pc: *const u8, sp: *mut i32, mut counter: u64) -> u64 { | |
counter *= 1337; | |
dispatch(d, pc, sp, counter) | |
} | |
pub fn main() { | |
// add sub add stop | |
let code = [0, 1, 0, 2]; | |
let mut stack = [0; 32]; | |
let erased: &[Erased; 256] = unsafe { transmute(&DISPATCH) }; | |
let pc = code[..].as_ptr(); | |
let sp = stack[..].as_mut_ptr(); | |
let counter = 0; | |
dispatch(erased, pc, sp, counter); | |
assert_eq!(stack[0], 1); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment