This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 1D grid of 1D blocks | |
__device__ int getGlobalIdx_1D_1D() | |
{ | |
return blockIdx.x *blockDim.x + threadIdx.x; | |
} | |
// 1D grid of 2D blocks | |
__device__ int getGlobalIdx_1D_2D() | |
{ | |
return blockIdx.x * blockDim.x * blockDim.y + threadIdx.y * blockDim.x + threadIdx.x; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// GPU timing example | |
// uses CUDA_SAFE_CALL [https://gist.github.com/waltner/ece68738c42d38c5e9bd1862c43b1146.js] | |
float elapsed_time = 0.0; | |
cudaEvent_t start, stop; | |
CUDA_SAFE_CALL(cudaEventCreate(&start)); | |
CUDA_SAFE_CALL(cudaEventCreate(&stop)); | |
CUDA_SAFE_CALL(cudaEventRecord(start,0)); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#ifndef CUDABASE_H_ | |
#define CUDABASE_H_ | |
#include <cuda.h> | |
#include <iostream> | |
#include <stdio.h> | |
# define CUDA_SAFE_CALL_NO_SYNC( call) do { \ | |
cudaError err = call; \ | |
if( cudaSuccess != err) { \ |