-
-
Save futureengine2/7c8fbc6fefce1818ff1edcd4d7e7bfcf to your computer and use it in GitHub Desktop.
static void gi_on_gpu(u8* in_bitmap, int w, int h) { | |
#define num_cascades 7 | |
static bool initialized; | |
static gpu_bindgroup_t texture_bindgroup[2]; | |
static gpu_bindgroup_t cascade_uniform_bindgroup[num_cascades]; | |
static gpu_bindgroup_t render_uniform_bindgroup; | |
static gpu_buffer_t vertex_buffer; | |
static gpu_buffer_t uniform_buffer; | |
static gpu_pipeline_t pipeline; | |
static gpu_bindgroup_layout_t uniform_bindgroup_layout; | |
static gpu_bindgroup_layout_t texture_bindgroup_layout; | |
static lifetime_t texture_lifetime; | |
static gpu_texture_t textures[2]; | |
static gpu_texture_t input_texture; | |
lifetime_t* lifetime = g_platform->lifetime; | |
f32 d0 = 1.f; // distance between probes in cascade 0 | |
int r0 = 4; // number of rays in cascade 0 | |
int n0 = (int)floorf(2*w/d0); // number of probes in cascade 0 per dimension | |
int cn = num_cascades; | |
typedef struct { | |
f32 d0; | |
int r0; | |
int n0; | |
int ci; | |
int cn; | |
int do_render; | |
int add_sky_light; | |
int padding; | |
v2 resolution; | |
v2 padding2; | |
} uniform_t; | |
if (!initialized) { | |
lifetime_t temp_lifetime = {0}; | |
initialized = true; | |
// create bindgroup layouts | |
uniform_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){ | |
.name = "gi uniform bgl", | |
.entries = { | |
{ | |
.visibility = gpu_visibility_fragment, | |
.type = gpu_binding_type_buffer, | |
.buffer.type = gpu_buffer_binding_type_uniform, | |
}, | |
}, | |
}); | |
texture_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){ | |
.name = "gi texture bgl", | |
.entries = { | |
{ | |
.visibility = gpu_visibility_fragment, | |
.type = gpu_binding_type_sampler, | |
}, | |
{ | |
.visibility = gpu_visibility_fragment, | |
.type = gpu_binding_type_sampler, | |
}, | |
}, | |
}); | |
// create pipeline | |
pipeline = gpu_pipeline_make(lifetime, &(gpu_pipeline_desc_t){ | |
.name = "gi render shader", | |
.code = file_read("shaders/gi.glsl", &temp_lifetime).bytes, | |
.bgls = { | |
uniform_bindgroup_layout, | |
texture_bindgroup_layout, | |
}, | |
}); | |
// create uniform buffer (we pack all our different uniforms in one buffer), one per cascade and one for rendering | |
{ | |
gpu_uniform_packer_t p = gpu_uniform_packer_begin(sizeof(uniform_t), num_cascades+1, lifetime); | |
uniform_buffer = p.handle; | |
// set cascade uniforms | |
for (int i = 0; i < num_cascades; ++i) { | |
*(uniform_t*)p.data = (uniform_t){ | |
.d0 = d0, | |
.r0 = r0, | |
.n0 = n0, | |
.ci = i, | |
.cn = num_cascades, | |
.add_sky_light = 1, | |
.resolution = {(f32)w,(f32)h}, | |
}; | |
cascade_uniform_bindgroup[i] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
.name = "gi", | |
.layout = uniform_bindgroup_layout, | |
.entries = {gpu_uniform_packer_bindgroup_entry(&p)}, | |
}); | |
gpu_uniform_packer_next(&p); | |
} | |
// set render uniform | |
*(uniform_t*)p.data = (uniform_t){ | |
.d0 = d0, | |
.r0 = r0, | |
.n0 = n0, | |
.ci = 0, | |
.cn = num_cascades, | |
.do_render = 1, | |
.resolution = {(f32)w,(f32)h}, | |
}; | |
render_uniform_bindgroup = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
.name = "gi", | |
.layout = uniform_bindgroup_layout, | |
.entries = {gpu_uniform_packer_bindgroup_entry(&p)}, | |
}); | |
gpu_uniform_packer_end(&p); | |
} | |
// create textures | |
input_texture = gpu_texture_make(w, h, gpu_texture_format_rgb8, filter_type_nearest, false, lifetime); | |
gpu_texture_set_border(input_texture, (color_t){1,1,1,1}); | |
textures[0] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime); | |
textures[1] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime); | |
texture_bindgroup[0] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
.name = "gi", | |
.layout = texture_bindgroup_layout, | |
.entries = { | |
{.sampler = {input_texture}}, | |
{.sampler = {textures[0]}}, | |
}, | |
}); | |
texture_bindgroup[1] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
.name = "gi", | |
.layout = texture_bindgroup_layout, | |
.entries = { | |
{.sampler = {input_texture}}, | |
{.sampler = {textures[1]}}, | |
}, | |
}); | |
lifetime_destroy(&temp_lifetime); | |
} | |
// update input texture | |
gpu_texture_set_data(input_texture, in_bitmap); | |
// clear texture for pingponging | |
gpu_texture_clear(textures[(cn-1)%2], (color_t){0}); | |
// build cascades | |
for (int i = cn-1; i >= 0; --i) { | |
drawcall_render(&(drawcall_t){ | |
.pipeline = pipeline, | |
.last_vertex = 6, | |
.bindgroups = {cascade_uniform_bindgroup[i], texture_bindgroup[i%2]}, | |
.outputs = {textures[(i+1)%2]}, | |
}); | |
} | |
// render | |
drawcall_render(&(drawcall_t){ | |
.pipeline = pipeline, | |
.last_vertex = 6, | |
.bindgroups = {render_uniform_bindgroup, texture_bindgroup[cn%2]}, | |
}); | |
#undef num_cascades | |
} |
#ifdef VERTEX_SHADER | |
out vec2 fuv; | |
// a vertex shader that spits out a screen-size quad | |
// call with vertex count = 6 | |
void main(void) { | |
vec2[] positions = vec2[]( | |
vec2(-1,-1), | |
vec2(1,-1), | |
vec2(1,1), | |
vec2(-1,-1), | |
vec2(1,1), | |
vec2(-1,1) | |
); | |
vec2 vpos = positions[gl_VertexID%6]; | |
gl_Position = vec4(vpos, 0, 1); | |
fuv = vpos*0.5+0.5; | |
fuv.y = 1 - fuv.y; | |
} | |
#endif /* VERTEX_SHADER */ | |
#ifdef FRAGMENT_SHADER | |
layout (std140, binding = 0) uniform Uniform | |
{ | |
float d0; // distance between probes in cascade 0 | |
int r0; // number of rays in cascade 0 | |
int n0; // number of probes in cascade 0 (per dimension) | |
int ci; // cascade number | |
int cn; // total number of cascades | |
int should_do_render; // we switch on this to render instead of building the cascades | |
int add_sky_light; // set to 1 to add sky lighting to uppermost cascade | |
int padding; | |
vec2 u_resolution; // resolution of the input texture | |
vec2 padding4; | |
}; | |
layout(binding = 1) uniform sampler2D u_input; // world data that we raytrace through | |
layout(binding = 2) uniform sampler2D u_prev; // previous cascade (ping-pong this and the output texture) | |
in vec2 fuv; | |
layout(location = 0) out vec4 ocolor; | |
const float PI = 3.1415927; | |
// raymarch2d: Implementation of Amanatides & Woo voxel marching algo | |
struct raymarch2d_t { | |
int x; | |
int y; | |
int sx; | |
int sy; | |
int ex; | |
int ey; | |
float tmx; | |
float tmy; | |
float tdx; | |
float tdy; | |
}; | |
raymarch2d_t raymarch2d_make(float x0, float y0, float x1, float y1) { | |
raymarch2d_t res; | |
res.x = int(floor(x0)); | |
res.y = int(floor(y0)); | |
res.sx = x0 < x1 ? 1 : x1 < x0 ? -1 : 0; | |
res.sy = y0 < y1 ? 1 : y1 < y0 ? -1 : 0; | |
res.ex = int(floor(x1)) + 2*res.sx; | |
res.ey = int(floor(y1)) + 2*res.sy; | |
float dx = x1 - x0; | |
float dy = y1 - y0; | |
float l = 1.f/sqrt(dx*dx + dy*dy); | |
dx *= l; | |
dy *= l; | |
res.tmx = dx == 0 ? 10000000 : (x0 - res.x)/dx; | |
res.tmy = dy == 0 ? 10000000 : (y0 - res.y)/dy; | |
res.tdx = dx == 0 ? 0 : res.sx/dx; | |
res.tdy = dy == 0 ? 0 : res.sy/dy; | |
return res; | |
} | |
bool raymarch2d_next(inout raymarch2d_t r) { | |
if (r.tmx < r.tmy) { | |
r.tmx += r.tdx; | |
r.x += r.sx; | |
return r.x != r.ex; | |
} | |
else { | |
r.tmy += r.tdy; | |
r.y += r.sy; | |
return r.y != r.ey; | |
} | |
} | |
vec3 tonemap_aces(vec3 color) { | |
const float slope = 12.0; | |
vec4 x = vec4( | |
color.r, color.g, color.b, | |
(color.r * 0.299) + (color.g * 0.587) + (color.b * 0.114) | |
); | |
const float a = 2.51f; | |
const float b = 0.03f; | |
const float c = 2.43f; | |
const float d = 0.59f; | |
const float e = 0.14f; | |
vec4 tonemap = clamp((x * (a * x + b)) / (x * (c * x + d) + e), 0.0, 1.0); | |
float t = x.a; | |
t = t * t / (slope + t); | |
return mix(tonemap.rgb, tonemap.aaa, t); | |
} | |
vec3 sky_(vec2 angle) { | |
float a1 = angle[1]; | |
float a0 = angle[0]; | |
// Sky integral formula taken from | |
// Analytic Direct Illumination - Mathis | |
// https://www.shadertoy.com/view/NttSW7 | |
const vec3 SkyColor = vec3(0.2,0.5,1.); | |
const vec3 SunColor = vec3(1.,0.7,0.1)*10.; | |
const float SunA = 2.0; | |
const float SunS = 64.0; | |
const float SSunS = sqrt(SunS); | |
const float ISSunS = 1./SSunS; | |
vec3 SI = SkyColor*(a1-a0-0.5*(cos(a1)-cos(a0))); | |
SI += SunColor*(atan(SSunS*(SunA-a0))-atan(SSunS*(SunA-a1)))*ISSunS; | |
return SI / 6.0; | |
} | |
vec3 sky(vec2 angle) { | |
// Integrate the radiance from the sky over an interval of directions | |
if (angle[1] < 2.0 * PI) | |
return sky_(angle); | |
return | |
sky_(vec2(angle[0], 2.0 * PI)) + | |
sky_(vec2(0.0, angle[1] - 2.0 * PI)); | |
} | |
void main(void) { | |
if (should_do_render == 1) { | |
// sample probe in cascade 0 | |
float x = fuv.x * u_resolution.x; | |
float y = fuv.y * u_resolution.y; | |
float xi = round(x/d0); | |
float yi = round(y/d0); | |
vec3 c = vec3(0,0,0); | |
for (int r = 0; r < r0; ++r) { | |
vec2 pixelcoord = floor(vec2(xi*r0 + r, yi)) + 0.5; | |
c += texture(u_prev, pixelcoord / textureSize(u_prev, 0)).rgb; | |
} | |
ocolor = vec4(tonemap_aces(c/r0),1); | |
} | |
else { | |
// build cascade | |
int u = int(gl_FragCoord.x); | |
int v = int(gl_FragCoord.y); | |
int lm = 2;// ray distance branching factor. ray distance = 2^(lm*ci) | |
int rm = 1;// ray count branching factor. Num rays for cascade ci = r0*2^(rm*ci) = r0*(1 << rm*ci). NOTE: increasing this removes the property that total size of all cascades converges to 2x size of cascade 0, and instead leads to linear size increase | |
int n = n0 >> ci; // number of probes in one dimension | |
float d = d0*(1 << ci); // distance between probes | |
int rn = r0 << (rm*ci); // number of pixels/rays per probe | |
int yi = v; // probe index | |
int xi = u/rn; // probe index | |
int r = u - xi*rn; // ray index | |
float dx = d0*0.5f*(1 << ci); | |
float x = xi * d + dx; // probe pos | |
float y = yi * d + dx; // probe pos | |
float l = 0.5 * d0; // length of ray | |
float intensity = 1.0; | |
if (xi >= n || xi < 0 || yi >= n || yi < 0) { | |
ocolor = vec4(0,0,0,0); | |
return; | |
} | |
float ra = ci == 0 ? 0 : l*(1 << ((ci-1)*lm)); // start of ray length interval | |
float rb = l*(1 << (ci*lm)); // end of ray length interval | |
float alpha = 2*PI*(float(r)+0.5)/rn; | |
vec2 rot = vec2(cos(alpha), sin(alpha)); | |
vec2 a = vec2(x,y) + rot*ra; // start of ray | |
vec2 b = vec2(x,y) + rot*rb; // end of ray | |
raymarch2d_t raym = raymarch2d_make(a.x, a.y, b.x, b.y); | |
vec4 col = vec4(0,0,0,0); | |
while (raymarch2d_next(raym)) { | |
vec3 v = texture(u_input, vec2((raym.x+0.5)/u_resolution.x, (raym.y+0.5)/u_resolution.y)).rgb; | |
if (v != vec3(1,1,1)) { | |
col = vec4(v*intensity,1); | |
break; | |
} | |
} | |
// if no hit, get from upper cascade | |
// TODO: do proper alpha blending to support transparent materials. Since we're only dealing with opaque materials for now it's fine | |
if (col.a == 0) { | |
if (ci == cn-1) { | |
if (add_sky_light != 0) | |
col = vec4(sky(vec2(alpha, alpha + 2*PI/rn)) / (2*PI/rn), 1); | |
else | |
col = vec4(0,0,0,0); | |
} | |
else { | |
int xi2 = (xi+1)/2; // probe index in upper | |
int yi2 = (yi+1)/2; // probe index in upper | |
int r2 = r << rm; // ray index in upper | |
int rn2 = rn << rm; // num rays in upper | |
int n2 = n >> 1; // num probes in upper | |
float tx = 0.75 - 0.5*float(xi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid | |
float ty = 0.75 - 0.5*float(yi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid | |
// loop through all the nearby rays in the upper cascade | |
// TODO: in the case where there are >2 rays in the upper cascade for each ray in this cascade (i.e. rm > 1), | |
// we should choose a better weighting than just treating them all equally | |
vec4 upper = vec4(0,0,0,0); | |
float frac = 1.0 / (1 << rm); | |
for (int ri = 0; ri < (1 << rm); ++ri) { | |
vec2 pc1 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
vec2 pc2 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
vec2 pc3 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
vec2 pc4 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
vec4 c = mix( | |
mix(texture(u_prev, pc1 / textureSize(u_prev, 0)), texture(u_prev, pc2 / textureSize(u_prev, 0)), tx), | |
mix(texture(u_prev, pc3 / textureSize(u_prev, 0)), texture(u_prev, pc4 / textureSize(u_prev, 0)), tx), | |
ty | |
); | |
upper += c*frac; | |
} | |
col = upper; | |
} | |
} | |
ocolor = vec4(col.rgb, 1); | |
} | |
} | |
#endif /* FRAGMENT_SHADER */ |
I was wondering, do you have a complete implementation of this? Particularly how you defined the opaque occluder geometry for this.
Also, what is the minimum geometry size?
I was wondering, do you have a complete implementation of this? Particularly how you defined the opaque occluder geometry for this.
Hey Shaded, this is my recreation of Alexander's 2d implementation here where he draws on the screen to create occluders and emitters.
I did the same, and just put that info in a texture (called u_input
in the shader) using the following scheme:
black = black occluder,
white = transparent (just air)
any other color = an emitter
But the algorithm is kind of agnostic to how you do the raytracing. You can do cone tracing through a voxel hierarchy, or sphere-trace through SDF, or whatever.
Also, what is the minimum geometry size?
Depending on your raytrace method, the algorithm can be independent of the amount of geometry in terms of performance.
I believe in Path of Exile 2, Alexander uses something like a Gbuffer for raytracing (which works since it's top-down, so called 2.5D) so as long as you can render your geometry to a Gbuffer you're good.
I've seen other solutions that precomputes a voxelization of a scene and cone-trace through that, so that also makes it independent from geometry in terms of performance.
can this idea merge or at least use parts for what godot hdda does ?
I'm not super familiar with hddi, but if I understand correctly it's a datastructure that allows for efficient raytracing. So yeah, you could use this method to decide which rays to sample and how to combine the results, and then use hddi to trace those rays through the scene.
this idea has to be screen space? or it can go to world space so that there isnt visual artifacts when merging
Yes you can definitely do this in 3d world space. I havent done it myself but Alexander has a world-space demo on his YouTube.
I can't think of any reason artefacts should be bad (in fact i believe some artefacts like ringing in 2d dont appear in 3d). I don't know of anyone implementing a production-ready 3d version though.
There's still more research going on to reduce artefacts on the Graphics Programming Discord, recommend you check it out
i saw it but to me it looks it cant produce data on its own outside camera view(turn camera away from light loses the data on the wall), thats why i am trying to see if it can merge with hddagi to help it with world space like amd brixelizer caching idea does
Looks like he's got two 3D videos, one in screen-space and in world-space.
Here's an example of the world space, you can see that he gets light from the models to the left and right outside of the view frustum:
https://youtu.be/5Ua-h1pg6yM?si=c6wdsT-LzlQTPC_l&t=37
There's some other artifacts going on that are probably coming from things like number of cascades being low, or low ray multiplication factor, probably could've used more parameter tweaking.
This is a neat website tmpvar
made that lets you play with some of the parameters (screenspace only though) https://tmpvar.com/poc/radiance-cascades/#flatland-2d
Btw this method is literally just a cleverer way of laying out and combining the results of your probes. How you calculate the value of your rays is entirely up to you.
awesome thanks for chatting
Thanks for the explanation! I totally figured the lifetime_t was an arena of some sort. Like I said, it was all so clear and obvious just from reading this small bit of it. If you ever did even release just the headers any nerds like me would love to read them!
I’m actually using WebGPU for my renderer as well so I am familiar with it.