#gpu #kernel #rust
- GPU kernels in Rust
- Comptime
- Automatic vectorization
- Instruction and shape specialization
- Loop unrolling
use cranelift::prelude::*; | |
use cranelift_module::{Linkage, Module}; | |
use cranelift_object::{ObjectBuilder, ObjectModule}; | |
fn main() { | |
// create a settings builder to configure the opt level | |
let mut settings_builder = settings::builder(); | |
// disable optimizations | |
// TODO: take the opt level in the CLI. | |
settings_builder.set("opt_level", "none").unwrap(); |
# Install xpm using npm | |
$ npm install --global xpm@latest | |
# Install xPack distribution of Arm toolchain with Apple M1 Silicon support | |
$ xpm install @xpack-dev-tools/[email protected] | |
# Add Arm toolchain binaries to PATH in your .bashrc or .zshrc | |
$ echo 'export PATH="$HOME/Library/xPacks/@xpack-dev-tools/arm-none-eabi-gcc/10.3.1-2.3.1/.content/bin:$PATH"' >> ~/.bashrc | |
$ source ~/.bashrc |
Use the following commands to compile and link the examples:
$ gcc -std=c17 -pedantic-errors -O0 -g -S mutex.c
$ as --gstabs -o mutex.o mutex.s
$ gcc -o mutex mutex.o -lpthread
This implementation makes use of the C11 Atomic Operations Library.