Skip to content

Instantly share code, notes, and snippets.

@rrampage
Last active June 12, 2025 15:18
Show Gist options
  • Save rrampage/43c56d4a56f8f73320d17ff7b3a49be6 to your computer and use it in GitHub Desktop.
Save rrampage/43c56d4a56f8f73320d17ff7b3a49be6 to your computer and use it in GitHub Desktop.
ARM64 threads using Linux clone3 syscall
.equ SYS_clone3, 435
.equ SYS_write, 64
.equ SYS_exit, 93
.equ CHILD_STACK_SIZE, 0x4000 // 16 KB per child
.equ CLONE_VM, 0x00000100
.equ CLONE_FS, 0x00000200
.equ CLONE_FILES, 0x00000400
.equ CLONE_SIGHAND, 0x00000800
.equ CLONE_PARENT, 0x00008000
.equ CLONE_THREAD, 0x00010000
.equ CLONE_IO, 0x80000000
.equ CLONE_FLAGS, 0x80018f00
.equ STDOUT_FILENO, 1
.equ NUM_THREADS, 32
.section .data
hello_msg: .ascii "Hello from \x1b[1;31mthread\x1b[0m!\n"
hello_msg_len = . - hello_msg
parent_msg: .ascii "Hello from \x1b[1;33mmain\x1b[0m!\n"
parent_msg_len = . - parent_msg
.section .bss
.align 16
// Stack space for child threads
child_stacks: .space (NUM_THREADS * CHILD_STACK_SIZE)
clone_args: .space 88 // clone_args structure
.text
.global _start
_start:
// Create 3 threads
mov x19, #0 // Thread counter
create_thread_loop:
cmp x19, #NUM_THREADS
b.ge wait_loop
// Create thread with current thread number
mov x0, x19
bl create_thread_clone3
add x19, x19, #1
b create_thread_loop
wait_loop:
// Simple busy wait for threads to complete
mov x0, #STDOUT_FILENO
adr x1, parent_msg
mov x2, #parent_msg_len
mov x8, #SYS_write
svc #0
mov x0, #0x20000000
wait_inner:
sub x0, x0, #1
cbnz x0, wait_inner
// Exit program
mov x0, #0 // Exit status
mov x8, #SYS_exit
svc #0
// Create thread using clone3 syscall
// x0 = thread ID
create_thread_clone3:
// Save thread ID and registers
sub sp, sp, #32
// str x0, [sp] // Save thread ID
str lr, [sp, #8] // Save return address
// Calculate this thread's stack area
mov x1, #CHILD_STACK_SIZE
mul x1, x0, x1 // thread_id * CHILD_STACK_SIZE
adr x2, child_stacks
add x20, x2, x1 // x20 = base of this thread's stack
// sub x20, x20, #32 // Leave space for thread data
// Store thread function and ID at the top of stack
str x0, [x20] // Store thread ID
//mov x0, x21
// str x20, [x20, #16] // Store stack base for child
// Set up clone_args structure
adr x16, clone_args // Get address of clone_args
ldr x2, =CLONE_FLAGS
str x2, [x16] // flags (offset 0)
str x20, [x16, #40] // stack (offset 40) - stack top
mov x2, #CHILD_STACK_SIZE
str x2, [x16, #48] // stack_size (offset 48)
// Call clone3
mov x0, x16 // clone_args pointer
mov x1, #88 // size of clone_args structure
mov x8, #SYS_clone3
svc #0
// Check if we're in the child process
cmp x0, #0
b.eq child_process
// Parent process - restore and return
ldr lr, [sp, #8] // Restore return address
add sp, sp, #32
ret
child_process:
// We're in the child thread
// Set up our stack pointer
adr x2, thread_func
// ldr x2, [x20] // Load function pointer
ldr x19, [x20] // Load thread ID
// Call the thread function
blr x2 // Branch to function pointer
// Exit thread
mov x0, x20
mov x8, #SYS_exit
svc #0
// Thread function - prints "Hello X" where X is thread ID
thread_func:
// Thread ID is in x19 register
add w4, w19, #'0' // Add ASCII offset (48)
// add x1, sp, #1
// str x4, [sp, #32]
// add x1, sp, #-128
strb w4, [sp, -16]
//brk #0
// strb w4, [sp, #16]
mov x0, #STDOUT_FILENO
add x1, sp, #-16
mov x2, #1
mov x8, #SYS_write
svc #0
// Print "Hello "
mov x0, #STDOUT_FILENO
adr x1, hello_msg
mov x2, #hello_msg_len
mov x8, #SYS_write
svc #0
ret
.equ SYS_clone3, 435
.equ SYS_mmap, 222
.equ SYS_write, 64
.equ SYS_exit, 93
.equ PROT_READ, 1
.equ PROT_WRITE, 2
.equ MAP_PRIVATE, 2
.equ MAP_ANONYMOUS, 32
.equ MAP_GROWSDOWN, 256
.equ STACK_SIZE, (4096 * 1024) // 4 MB mmap stack
.equ CLONE_VM, 0x00000100
.equ CLONE_FS, 0x00000200
.equ CLONE_FILES, 0x00000400
.equ CLONE_SIGHAND, 0x00000800
.equ CLONE_PARENT, 0x00008000
.equ CLONE_THREAD, 0x00010000
.equ CLONE_IO, 0x80000000
.equ CLONE_FLAGS, (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND | CLONE_PARENT | CLONE_THREAD | CLONE_IO) // 0x80018f00
.equ STDOUT_FILENO, 1
.equ NUM_THREADS, 32
.section .data
hello_msg: .ascii "Hello from \x1b[1;31mthread\x1b[0m!\n"
hello_msg_len = . - hello_msg
parent_msg: .ascii "Hello from \x1b[1;33mmain\x1b[0m!\n"
parent_msg_len = . - parent_msg
.section .bss
clone_args: .space 88 // clone_args structure
.text
.global _start
_start:
mov x19, #0 // Thread counter
create_thread_loop:
cmp x19, #NUM_THREADS
b.ge wait_loop
// Create thread with current thread number
mov x0, x19
bl create_thread_clone3
add x19, x19, #1
b create_thread_loop
wait_loop:
// Simple busy wait for threads to complete
mov x0, #STDOUT_FILENO
adr x1, parent_msg
mov x2, #parent_msg_len
mov x8, #SYS_write
svc #0
mov x0, #0x20000000
wait_inner:
sub x0, x0, #1
cbnz x0, wait_inner
// Exit program
mov x0, #0 // Exit status
mov x8, #SYS_exit
svc #0
stack_create:
// Set up arguments for mmap syscall
mov x0, #0 // addr = NULL
mov x1, #STACK_SIZE // length = STACK_SIZE
mov x2, #(PROT_READ | PROT_WRITE) // prot = PROT_READ | PROT_WRITE
mov x3, #(MAP_PRIVATE | MAP_ANONYMOUS | MAP_GROWSDOWN) // flags
mov x4, #-1 // fd = -1 (for anonymous mapping)
mov x5, #0 // offset = 0
// Make syscall
mov x8, #SYS_mmap // syscall number in x8
svc #0 // supervisor call
ret
// Create thread using clone3 syscall
// x0 = thread ID
create_thread_clone3:
// Save thread ID and registers
sub sp, sp, #16
// str x0, [sp] // Save thread ID
str lr, [sp, #8] // Save return address
mov x22, x0 // Save threadId in x22 for now
bl stack_create
cmp x0, #-1
b.eq thread_create_error
mov x20, x0 // Save stack base in x20
// Store thread function and ID at the top of stack
str x22, [x20] // Store thread ID
// Set up clone_args structure
adr x16, clone_args // Get address of clone_args
ldr x2, =CLONE_FLAGS
str x2, [x16] // flags (offset 0)
str x20, [x16, #40] // stack (offset 40)
mov x2, #STACK_SIZE
str x2, [x16, #48] // stack_size (offset 48)
// Call clone3
mov x0, x16 // clone_args pointer
mov x1, #88 // size of clone_args structure
mov x8, #SYS_clone3
svc #0
// Check if we're in the child process
cmp x0, #0
b.eq child_process
// Parent process - restore and return
ldr lr, [sp, #8] // Restore return address
add sp, sp, #16
ret
thread_create_error:
add sp, sp, #16
ret
child_process:
// We're in the child thread
// Set up our stack pointer
adr x2, thread_func
// ldr x2, [x20] // Load function pointer
ldr x19, [x20] // Load thread ID
// Call the thread function
blr x2 // Branch to function pointer
// Exit thread
mov x0, x20
mov x8, #SYS_exit
svc #0
// Thread function - prints "Hello X" where X is thread ID
thread_func:
// Thread ID is in x19 register
add w4, w19, #'0' // Add ASCII offset (48)
strb w4, [sp, #-16]
//brk #0
mov x0, #STDOUT_FILENO
add x1, sp, #-16
mov x2, #1
mov x8, #SYS_write
svc #0
// Print "Hello "
mov x0, #STDOUT_FILENO
adr x1, hello_msg
mov x2, #hello_msg_len
mov x8, #SYS_write
svc #0
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment