Created
November 22, 2022 12:21
-
-
Save delijati/200b1b8edfee4227da560b83e0d1c563 to your computer and use it in GitHub Desktop.
cuDNN on manjaro
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <cuda_runtime.h> | |
#include <cudnn.h> | |
/** | |
* Minimal example to apply sigmoid activation on a tensor | |
* using cuDNN. | |
**/ | |
int main(int argc, char** argv) | |
{ | |
int numGPUs; | |
int driverVersion = 0, runtimeVersion = 0; | |
cudaGetDeviceCount(&numGPUs); | |
std::cout << "Found " << numGPUs << " GPUs." << std::endl; | |
cudaSetDevice(0); // use GPU0 | |
int device; | |
struct cudaDeviceProp devProp; | |
cudaGetDevice(&device); | |
cudaGetDeviceProperties(&devProp, device); | |
cudaDriverGetVersion(&driverVersion); | |
cudaRuntimeGetVersion(&runtimeVersion); | |
std::cout << "Device: " << devProp.name << std::endl; | |
std::cout << "Driver Version: " << driverVersion<<"\n"; | |
std::cout << "Runtime Version: " << runtimeVersion<<"\n"; | |
std::cout << "Compute capability:" << devProp.major << "." << devProp.minor << std::endl; | |
std::cout << "Total amount of global memory: "<<(unsigned long long)devProp.totalGlobalMem<<" bytes\n"; | |
std::cout << "Total amount of constant memory: "<<devProp.totalConstMem<<"bytes\n"; | |
std::cout << "Total amount of shared memory per block: "<<devProp.sharedMemPerBlock<<" bytes\n"; | |
std::cout << "Total number of registers available per block: "<<devProp.regsPerBlock<<"\n"; | |
std::cout << "Warp size: "<<devProp.warpSize<<"\n"; | |
cudnnHandle_t handle_; | |
cudnnCreate(&handle_); | |
std::cout << "Created cuDNN handle" << std::endl; | |
// create the tensor descriptor | |
cudnnDataType_t dtype = CUDNN_DATA_FLOAT; | |
cudnnTensorFormat_t format = CUDNN_TENSOR_NCHW; | |
int n = 1, c = 1, h = 1, w = 10; | |
int NUM_ELEMENTS = n*c*h*w; | |
cudnnTensorDescriptor_t x_desc; | |
cudnnCreateTensorDescriptor(&x_desc); | |
cudnnSetTensor4dDescriptor(x_desc, format, dtype, n, c, h, w); | |
// create the tensor | |
float *x; | |
cudaMallocManaged(&x, NUM_ELEMENTS * sizeof(float)); | |
for(int i=0;i<NUM_ELEMENTS;i++) x[i] = i * 1.00f; | |
std::cout << "Original array: "; | |
for(int i=0;i<NUM_ELEMENTS;i++) std::cout << x[i] << " "; | |
// create activation function descriptor | |
float alpha[1] = {1}; | |
float beta[1] = {0.0}; | |
cudnnActivationDescriptor_t sigmoid_activation; | |
cudnnActivationMode_t mode = CUDNN_ACTIVATION_SIGMOID; | |
cudnnNanPropagation_t prop = CUDNN_NOT_PROPAGATE_NAN; | |
cudnnCreateActivationDescriptor(&sigmoid_activation); | |
cudnnSetActivationDescriptor(sigmoid_activation, mode, prop, 0.0f); | |
cudnnActivationForward( | |
handle_, | |
sigmoid_activation, | |
alpha, | |
x_desc, | |
x, | |
beta, | |
x_desc, | |
x | |
); | |
cudnnDestroy(handle_); | |
std::cout << std::endl << "Destroyed cuDNN handle." << std::endl; | |
std::cout << "New array: "; | |
for(int i=0;i<NUM_ELEMENTS;i++) std::cout << x[i] << " "; | |
std::cout << std::endl; | |
cudaFree(x); | |
return 0; | |
} | |
/* | |
Info: | |
$ lsb_release -a | |
LSB Version: n/a | |
Distributor ID: ManjaroLinux | |
Description: Manjaro Linux | |
Release: 22.0.0 | |
Codename: Sikaris | |
$ uname -a | |
Linux papagayo 5.15.78-1-MANJARO #1 SMP PREEMPT Thu Nov 10 20:50:09 UTC 2022 x86_64 GNU/Linux | |
$ nvidia-smi -L | |
GPU 0: NVIDIA GeForce MX450 | |
Build: | |
$ g++ -I/opt/cuda/include -I/opt/cuda/targets/ppc64le-linux/include -o hw.o -c hw.cpp | |
$ nvcc -ccbin g++ -m64 -gencode arch=compute_80,code=sm_80 -o hw hw.o -I/opt/cuda/include -I/opt/cuda/targets/ppc64le-linux/include -L/opt/cuda/lib64 -L/opt/cuda/targets/ppc64le-linux/lib -lcublasLt -lcudart -lcublas -lcudnn -lstdc++ -lm | |
$ ./hw | |
Found 1 GPUs. | |
Device: NVIDIA GeForce MX450 | |
Driver Version: 11080 | |
Runtime Version: 11080 | |
Compute capability:7.5 | |
Total amount of global memory: 1969815552 bytes | |
Total amount of constant memory: 65536bytes | |
Total amount of shared memory per block: 49152 bytes | |
Total number of registers available per block: 65536 | |
Warp size: 32 | |
Created cuDNN handle | |
Original array: 0 1 2 3 4 5 6 7 8 9 | |
Destroyed cuDNN handle. | |
New array: 0.5 0.731059 0.880797 0.952574 0.982014 0.993307 0.997527 0.999089 0.999665 0.999877 | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment