Skip to content

Instantly share code, notes, and snippets.

@tomilov
Created October 31, 2020 20:20

Revisions

  1. tomilov created this gist Oct 31, 2020.
    40 changes: 40 additions & 0 deletions StdDevs for 8x8 tiles of input image.cu
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,40 @@
    constexpr size_t tileSize = 8;

    const size_t width = ...;
    const size_t height = ...;
    thrust::cuda::pointer<const float3> p = ...;

    assert(width % tileSize == 0);
    assert(height % tileSize == 0);

    thrust::cuda::vector<float3> sums;
    sums.resize((width / tileSize) * (height / tileSize));
    auto c = thrust::make_counting_iterator(0u);
    auto getTileIndex = [width, height] __device__ (uint i) -> uint
    {
    uint block = i / (tileSize * tileSize);
    i %= tileSize * tileSize;
    #if 0
    uint x = block % (width / tileSize);
    uint y = block / (width / tileSize);
    return (tileSize * tileSize) * (x + y * (width / tileSize)) + (i / tileSize) * width + (i % tileSize);
    #else
    return (tileSize * tileSize) * block + (i / tileSize) * width + (i % tileSize);
    #endif
    };
    auto tile = thrust::make_transform_iterator(c, getTileIndex);
    auto block = thrust::make_transform_iterator(c, [] __device__ (uint i) -> uint { return i / (tileSize * tileSize); });
    auto input = thrust::make_permutation_iterator(p, tile);
    thrust::reduce_by_key(block, thrust::next(block, width * height), input, thrust::make_discard_iterator(), sums.begin());

    thrust::cuda::vector<float> stdDevs;
    stdDevs.resize(sums.size());
    auto sumAndInput = thrust::make_zip_iterator(thrust::make_permutation_iterator(sums.cbegin(), block), input);
    auto sqr = thrust::make_transform_iterator(sumAndInput, [tileSize] __device__ (thrust::tuple<float3, float3> sumAndInput) -> float3
    {
    float3 diff = thrust::get<1>(sumAndInput) - thurst::get<0>(sumAndInput) / (tileSize * tileSize);
    return diff * diff;
    });
    auto sqrt = [] __device__ (float3 sumSqr) -> float { return thrust::sqrt((sumSqr.x + sumSqr.y + sumSqr.z) / 3.0f); };
    auto output = thrust::make_output_transform_iterator(stdDevs.begin(), sqrt);
    thrust::reduce_by_key(block, thrust::next(block, width * height), sqr, thrust::make_discard_iterator(), output);