Created
May 31, 2019 22:54
-
-
Save PWhiddy/791d868929536f852fe2376bfd40d684 to your computer and use it in GitHub Desktop.
options for speeding up the stamp generation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// original function, generates single trajectory | |
RawImage KBMOSearch::stackedStamps(trajectory t, int radius, std::vector<RawImage*> imgs) | |
{ | |
if (radius<0) throw std::runtime_error("stamp radius must be at least 0"); | |
int dim = radius*2+1; | |
RawImage stamp(dim, dim); | |
std::vector<float> times = stack.getTimes(); | |
for (int i=0; i<imgs.size(); ++i) | |
{ | |
for (int x=0; x<dim; ++x) | |
{ | |
for (int y=0; y<dim; ++y) | |
{ | |
float pixVal = imgs[i]->getPixelInterp( | |
t.x + times[i] * t.xVel + static_cast<float>(x-radius), | |
t.y + times[i] * t.yVel + static_cast<float>(y-radius)); | |
if (pixVal == NO_DATA) pixVal = 0.0; | |
stamp.addToPixel(static_cast<int>(x),static_cast<int>(y), pixVal); | |
} | |
} | |
} | |
return stamp; | |
} | |
// batch (this is just the general idea jotted down, imagine it as untested psuedo-code. Not tested at all!) | |
std::vector<RawImage> KBMOSearch::stackedStampsBatch(std::vector<trajectory> t, int radius, std::vector<RawImage*> imgs) | |
{ | |
if (radius<0) throw std::runtime_error("stamp radius must be at least 0"); | |
int dim = radius*2+1; | |
std::vector<RawImage> stamps; | |
stamps.reserve(t.size()); | |
std::vector<float> times = stack.getTimes(); | |
#pragma omp parallel for | |
for (int s=0; s<t.size(); s++) { | |
stamps[i] = RawImage(dim,dim); | |
for (int i=0; i<imgs.size(); ++i) | |
{ | |
for (int x=0; x<dim; ++x) | |
{ | |
for (int y=0; y<dim; ++y) | |
{ | |
float pixVal = imgs[i]->getPixelInterp( | |
t.x + times[i] * t.xVel + static_cast<float>(x-radius), | |
t.y + times[i] * t.yVel + static_cast<float>(y-radius)); | |
if (pixVal == NO_DATA) pixVal = 0.0; | |
stamps[i].addToPixel(static_cast<int>(x),static_cast<int>(y), pixVal); | |
} | |
} | |
} | |
} | |
return stamps; | |
} | |
// gpu batch (also treat this only as a rough guideline) | |
std::vector<RawImage> KBMOSearch::stackedStampsBatchGPU(std::vector<trajectory> t, int radius, std::vector<RawImage*> imgs) | |
{ | |
if (radius<0) throw std::runtime_error("stamp radius must be at least 0"); | |
int dim = radius*2+1; | |
std::vector<RawImage> stamps; | |
stamps.reserve(t.size()); | |
std::vector<float> times = stack.getTimes(); | |
// 1. allocate GPU memory for trajctories, images, and stamps (cudaMalloc) | |
// 2. Then copy images and trajectories into that gpu memory (cudaMemcpy) | |
// 3. create thread blocks ( if the stamps are larger than 8*8, | |
// making each block have the dimensions of the stamp would probably work nice) | |
// 4. make a 1D grid that just has one threadblock for each stamp | |
// 5. launch a kernel that stacks together a single pixel (a block of these together makes one stamp), | |
// and writes the result into the gpu stamp memory allocated earlier | |
// stackStampKernel<<<grid,threads>>>(gpuTrajs, gpuImages, gpuStamps, radius, imageDims... imageCount...) | |
// 6. copy the stamps back to cpu | |
// 7. free gpu memory | |
// 8. return results | |
// the memory copying could be simpler using an api that handles it automatically, but I haven't used those features before | |
return stamps; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment