Created
June 26, 2025 15:12
-
-
Save googlefan256/c4ea9f71ccb4b0a45f52e78b9a684115 to your computer and use it in GitHub Desktop.
tinygrad in rust working example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::ffi::c_int; | |
fn inner() -> anyhow::Result<()> { | |
use tinygrad_in_rust::backend::cuda; | |
cuda::cuda_init("/usr/lib/x86_64-linux-gnu/libcuda.so")?; | |
cuda::nvrtc_init("/usr/local/cuda/lib64/libnvrtc.so")?; | |
cuda::nvjitlink_init("/usr/local/cuda/lib64/libnvJitLink.so")?; | |
let res = cuda::CuCompiler::new_default()?.compile( | |
r#" | |
extern "C" __global__ void add(int *a, int *b, int *c, int size) { | |
int idx = blockIdx.x * blockDim.x + threadIdx.x; | |
if (idx < size) | |
{ | |
c[idx] = a[idx] + b[idx]; | |
} | |
} | |
"#, | |
"test.cu", | |
true, | |
)?; | |
println!("Compiled successfully, size: {}", res.len()); | |
let arch = cuda::CudaArch::new_least()?; | |
println!("Using CUDA architecture: {:?}", arch); | |
let res = cuda::NvPTXCompiler::new(&arch)?.compile(&String::from_utf8(res)?, "test.cu")?; | |
println!("PTX compiled to binary successfully, size: {}", res.len()); | |
let context = cuda::CudaContext::new()?; | |
context.set_current()?; | |
let stream = cuda::CudaStream::new(&context)?; | |
let alloc = cuda::CudaAllocator::new(&context, &stream); | |
let m = cuda::CudaProgram::new(&res, "add", Some(200), &stream)?; | |
let mut a = alloc.alloc(1024 * size_of::<c_int>(), false)?; | |
alloc.copy_in(&a, &(0..1024).map(|x| x / 2 as c_int).collect::<Vec<_>>())?; | |
let mut b = alloc.alloc(1024 * size_of::<c_int>(), false)?; | |
alloc.copy_in(&b, &(0..1024).map(|x| 512 - x as c_int).collect::<Vec<_>>())?; | |
let mut o = alloc.alloc(1024 * size_of::<c_int>(), false)?; | |
m.run( | |
cuda::kernel_args!(&mut a.ptr, &mut b.ptr, &mut o.ptr, &mut c_int::from(1024)), | |
((1024 / size_of::<c_int>() as u32 + 255) / 256, 1, 1), | |
(256, 1, 1), | |
)?; | |
println!("Kernel executed successfully"); | |
let mut c = vec![c_int::default(); 1024]; | |
alloc.copy_out(&o, &mut c)?; | |
println!("Copied data to device, first 10 bytes: {:?}", &c[..10]); | |
let mut d = vec![c_int::default(); 1024]; | |
alloc.transfer(&o, &b, 1024 * size_of::<c_int>())?; | |
alloc.copy_out(&b, &mut d)?; | |
println!("Copied data to device, first 10 bytes: {:?}", &d[..10]); | |
alloc.free(a)?; | |
alloc.free(b)?; | |
alloc.free(o)?; | |
stream.drop()?; | |
context.drop()?; | |
Ok(()) | |
} | |
fn main() -> anyhow::Result<()> { | |
if let Err(e) = inner() { | |
eprintln!("Error: {}", e); | |
} | |
Ok(()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment