Skip to content

Instantly share code, notes, and snippets.

@JnCrMx
Last active January 26, 2021 10:29
Show Gist options
  • Save JnCrMx/b4b0a3e0705f358afa722e94f1109a6c to your computer and use it in GitHub Desktop.
Save JnCrMx/b4b0a3e0705f358afa722e94f1109a6c to your computer and use it in GitHub Desktop.
An extremely simple and basic 3d renderer written in C++ using HIP and lodepng.
#include <iostream>
#include <iomanip>
#include <string>
#include <vector>
#include <fstream>
#include <limits>
#include <chrono>
#include <hip/hip_runtime.h>
#include <hip/hip_vector_types.h>
#include "lodepng.h"
#define HIP_ASSERT(x) (assert((x)==hipSuccess))
#define DOT(a, b) (a.x*b.x + a.y*b.y + a.z*b.z)
struct triangle {
double3 a;
double3 b;
double3 c;
double3 normal;
float3 color;
};
double3 read_vector(std::ifstream& stream)
{
float x, y, z;
stream.read(reinterpret_cast<char *>(&x), sizeof(x));
stream.read(reinterpret_cast<char *>(&y), sizeof(y));
stream.read(reinterpret_cast<char *>(&z), sizeof(z));
return make_double3(x, y, z);
}
int read_stl(std::string fname, std::vector<triangle> &v)
{
std::ifstream file(fname.c_str(), std::ios::in | std::ios::binary);
if(!file)
{
std::cerr << "File not found: " << fname << std::endl;
return 2;
}
file.ignore(80);
int triangle_count;
file.read(reinterpret_cast<char *>(&triangle_count), sizeof(triangle_count));
v.resize(triangle_count);
for(int i=0; i<triangle_count; i++)
{
double3 normal = read_vector(file);
double3 a = read_vector(file);
double3 b = read_vector(file);
double3 c = read_vector(file);
v.push_back(triangle{a, b, c, normal, make_float3(1.0f, 1.0f, 1.0f)});
file.ignore(2);
}
return 0;
}
__device__ bool intersect(double3 normal, double dotPos,
double3 pos, double3 direction,
double minT, double maxT, double* storeT)
{
double div = DOT(normal, direction);
if(div >= 0.0) // front-face culling
return false;
double t = (dotPos - DOT(normal, pos)) / div;
*storeT = t;
if(t < minT || t > maxT)
return false;
return true;
}
__device__ void point_parameters(double3 base, double3 vec1, double3 vec2, double3 point,
double* r, double* s)
{
double p2 = point.y;
double a2 = base.y;
double x2 = vec1.y;
double y1 = vec2.x;
double y2 = vec2.y;
double p = y1 == 0 ? point.z : point.x;
double a = y1 == 0 ? base.z : base.x;
double x = y1 == 0 ? vec1.z: vec1.x;
double y = y1 == 0 ? vec2.z : y1;
*r = (p2 - a2 - ((y2 * p) / y) + ((y2 * a) / y)) / (x2 - ((x * y2) / y));
*s = (p - a - *r * x) / y;
}
__device__ bool hit_triangle(triangle triangle, double3 pos, double3 direction,
double minT, double maxT, double* storeT)
{
double dotPos = DOT(triangle.normal, triangle.a);
bool intersection = intersect(triangle.normal, dotPos, pos, direction, minT, maxT, storeT);
if(!intersection)
return false;
double3 vec1 = triangle.b - triangle.a;
double3 vec2 = triangle.c - triangle.a;
double3 interPoint = pos + *storeT * direction;
double r, s;
point_parameters(triangle.a, vec1, vec2, interPoint, &r, &s);
double sum = r + s;
return (0.0 <= r && r <= 1.0) && (0.0 <= s && s <= 1.0) && (0.0 <= sum && sum <= 1.0);
}
__global__ void render_pixel(uint8_t* __restrict__ screen, const triangle* __restrict__ triangles,
int width, int height, int triangleCount,
double viewMinX, double viewMinY,
double viewStepX, double viewStepY,
double3 viewPoint, double3 viewX, double3 viewY, double3 viewBase,
double3 light,
volatile int* progress)
{
int sx = hipBlockDim_x * hipBlockIdx_x + hipThreadIdx_x;
int sy = hipBlockDim_y * hipBlockIdx_y + hipThreadIdx_y;
if(sx >= width || sy >= height || sx < 0 || sy < 0)
{
if(!(hipThreadIdx_x || hipThreadIdx_y))
{
atomicAdd((int *)progress, 1);
__threadfence_system();
}
return;
}
double x = viewMinX + sx * viewStepX;
double y = viewMinY + sy * viewStepY;
double3 pos = viewBase + x * viewX + y * viewY;
double3 direction = pos - viewPoint;
bool hasHit = false;
double closestT = std::numeric_limits<double>::max();
triangle closestHit;
for(int i=0; i<triangleCount; i++)
{
triangle t = triangles[i];
double hitT;
bool hit = hit_triangle(t, pos, direction, 0.0, closestT, &hitT);
if(hit)
{
closestT = hitT;
closestHit = t;
hasHit = true;
}
}
if(hasHit)
{
float3 pixel = closestHit.color * (0.5 - DOT(closestHit.normal, light)/2);
screen[4*sy*width + 4*sx + 0] = (uint8_t) (pixel.x * 255.0f);
screen[4*sy*width + 4*sx + 1] = (uint8_t) (pixel.y * 255.0f);
screen[4*sy*width + 4*sx + 2] = (uint8_t) (pixel.z * 255.0f);
screen[4*sy*width + 4*sx + 3] = (uint8_t) (255);
}
if(!(hipThreadIdx_x || hipThreadIdx_y))
{
atomicAdd((int *)progress, 1);
__threadfence_system();
}
}
int main(int argc, char *argv[])
{
if(argc < 5)
{
std::cerr << "./simple_renderer INPUT OUTPUT WIDTH HEIGHT" << std::endl;
return 2;
}
std::vector<triangle> mesh;
std::cout << "Reading mesh from file..." << std::flush;
read_stl(argv[1], mesh);
std::cout << " Done!" << std::endl;
double3 viewPoint = make_double3(0, -100, 0);
double3 viewX = make_double3(1, 0, 0);
double3 viewY = make_double3(0, 0, -1);
double3 viewBase = make_double3(-0.5, -1, 0.5);
double viewMinX = 0.0, viewMaxX = 1.0, viewResolutionX = std::stoi(argv[3]);
double viewMinY = 0.0, viewMaxY = 1.0, viewResolutionY = std::stoi(argv[4]);
double viewWidth = (viewMaxX-viewMinX);
double viewHeight = (viewMaxY-viewMinY);
double viewStepX = (viewMaxX-viewMinX)/viewResolutionX;
double viewStepY = (viewMaxY-viewMinY)/viewResolutionY;
unsigned width = (int)viewResolutionX, height = (int)viewResolutionY;
double3 light = make_double3(1, 0, -1);
light /= sqrt(light.x*light.x + light.y*light.y + light.z*light.z);
std::cout << std::endl;
std::cout << "Model properties:" << std::endl;
std::cout << " Triangle count: " << mesh.size() << std::endl;
std::cout << "Render properties:" << std::endl;
std::cout << " Resolution: " << viewResolutionX << "x" << viewResolutionY << std::endl;
std::cout << " Plane: " << viewWidth << "x" << viewHeight << std::endl;
std::cout << " Plane steps: " << viewStepX << "x" << viewStepY << std::endl;
std::cout << " Light: <" << light.x << ", " << light.y << ", " << light.z << ">" << std::endl;
hipDeviceProp_t devProp;
hipGetDeviceProperties(&devProp, 0);
std::cout << "HIP properties:" << std::endl;
std::cout << " System minor: " << devProp.minor << std::endl;
std::cout << " System major: " << devProp.major << std::endl;
std::cout << " agent prop name: " << devProp.name << std::endl;
std::cout << std::endl;
int triangleCount = mesh.size();
triangle* hostTriangles = mesh.data();
triangle* deviceTriangles;
int pixelCount = width * height;
uint8_t* hostScreen = (uint8_t*) malloc(pixelCount * 4 * sizeof(uint8_t));
uint8_t* deviceScreen;
volatile int *hostProgress, *deviceProgress;
HIP_ASSERT(hipHostMalloc((void**)&hostProgress, sizeof(int), hipHostMallocMapped));
HIP_ASSERT(hipHostGetDevicePointer((void**)&deviceProgress, (void*)hostProgress, 0));
*hostProgress = 0;
HIP_ASSERT(hipMalloc((void**)&deviceTriangles, triangleCount * sizeof(triangle)));
HIP_ASSERT(hipMalloc((void**)&deviceScreen, pixelCount * 4 * sizeof(uint8_t)));
std::cout << "Uploading mesh to GPU..." << std::flush;
HIP_ASSERT(hipMemcpy(deviceTriangles, hostTriangles,
triangleCount * sizeof(triangle), hipMemcpyHostToDevice));
HIP_ASSERT(hipMemset(deviceScreen, 0, pixelCount * 4 * sizeof(uint8_t)));
std::cout << " Done!" << std::endl;
hipStream_t stream;
HIP_ASSERT(hipStreamCreate(&stream));
std::cout << "Rendering..." << std::endl;
auto t1 = std::chrono::high_resolution_clock::now();
int blockWidth = 16, blockHeight = 16;
int gridWidth = width/blockWidth + 1, gridHeight = height/blockHeight + 1;
hipLaunchKernelGGL(render_pixel,
dim3(gridWidth, gridHeight),
dim3(blockWidth, blockHeight),
0, stream,
deviceScreen, deviceTriangles,
width, height, triangleCount,
viewMinX, viewMinY,
viewStepX, viewStepY,
viewPoint, viewX, viewY, viewBase,
light,
deviceProgress);
int maxProgress = gridWidth * gridHeight;
int progress = 0;
float lastPercent = -10.0f;
std::cout << std::fixed << std::setprecision(2);
while(progress < maxProgress)
{
// keep CPU load down and don't add extra time to the end
std::this_thread::sleep_for(std::chrono::milliseconds(1));
progress = *hostProgress;
float percent = (progress*100.0f)/((float)maxProgress);
if((percent - lastPercent) >= 0.01f)
{
std::cout << ' ' << percent << "% " << progress << "/" << maxProgress << " ";
lastPercent = percent;
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = t2 - t1;
auto d1 = std::chrono::duration_cast<std::chrono::seconds>( duration ).count();
auto d2 = 100.0 * (d1/percent) - d1;
std::cout << d1 << "s elapsed " << (int)d2 << "s remaining" << std::string(20, ' ') << '\r' << std::flush;
}
}
HIP_ASSERT(hipStreamSynchronize(stream));
auto t2 = std::chrono::high_resolution_clock::now();
auto duration = t2 - t1;
auto seconds = std::chrono::duration_cast<std::chrono::seconds>(duration);
std::chrono::duration<double, std::milli> durationMillis(duration-seconds);
std::cout << 100.0f << "% " << "Done! This took " << seconds.count() << " seconds and " << durationMillis.count() << " milliseconds!" << std::endl;
HIP_ASSERT(hipStreamDestroy(stream));
std::cout << "Downloading image from GPU..." << std::flush;
HIP_ASSERT(hipMemcpy(hostScreen, deviceScreen,
pixelCount * 4 * sizeof(uint8_t), hipMemcpyDeviceToHost));
std::cout << " Done!" << std::endl;
HIP_ASSERT(hipFree(deviceTriangles));
HIP_ASSERT(hipFree(deviceScreen));
std::cout << "Preparing PNG..." << std::flush;
std::vector<uint8_t> image(hostScreen, hostScreen + pixelCount * 4 * sizeof(uint8_t));
std::cout << " Done!" << std::endl;
free(hostScreen);
std::cout << "Encoding PNG..." << std::flush;
lodepng::encode(argv[2], image, width, height);
std::cout << " Done!" << std::endl;
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment