-
-
Save futureengine2/7c8fbc6fefce1818ff1edcd4d7e7bfcf to your computer and use it in GitHub Desktop.
static void gi_on_gpu(u8* in_bitmap, int w, int h) { | |
#define num_cascades 7 | |
static bool initialized; | |
static gpu_bindgroup_t texture_bindgroup[2]; | |
static gpu_bindgroup_t cascade_uniform_bindgroup[num_cascades]; | |
static gpu_bindgroup_t render_uniform_bindgroup; | |
static gpu_buffer_t vertex_buffer; | |
static gpu_buffer_t uniform_buffer; | |
static gpu_pipeline_t pipeline; | |
static gpu_bindgroup_layout_t uniform_bindgroup_layout; | |
static gpu_bindgroup_layout_t texture_bindgroup_layout; | |
static lifetime_t texture_lifetime; | |
static gpu_texture_t textures[2]; | |
static gpu_texture_t input_texture; | |
lifetime_t* lifetime = g_platform->lifetime; | |
f32 d0 = 1.f; // distance between probes in cascade 0 | |
int r0 = 4; // number of rays in cascade 0 | |
int n0 = (int)floorf(2*w/d0); // number of probes in cascade 0 per dimension | |
int cn = num_cascades; | |
typedef struct { | |
f32 d0; | |
int r0; | |
int n0; | |
int ci; | |
int cn; | |
int do_render; | |
int add_sky_light; | |
int padding; | |
v2 resolution; | |
v2 padding2; | |
} uniform_t; | |
if (!initialized) { | |
lifetime_t temp_lifetime = {0}; | |
initialized = true; | |
// create bindgroup layouts | |
uniform_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){ | |
.name = "gi uniform bgl", | |
.entries = { | |
{ | |
.visibility = gpu_visibility_fragment, | |
.type = gpu_binding_type_buffer, | |
.buffer.type = gpu_buffer_binding_type_uniform, | |
}, | |
}, | |
}); | |
texture_bindgroup_layout = gpu_bindgroup_layout_make(lifetime, &(gpu_bindgroup_layout_desc_t){ | |
.name = "gi texture bgl", | |
.entries = { | |
{ | |
.visibility = gpu_visibility_fragment, | |
.type = gpu_binding_type_sampler, | |
}, | |
{ | |
.visibility = gpu_visibility_fragment, | |
.type = gpu_binding_type_sampler, | |
}, | |
}, | |
}); | |
// create pipeline | |
pipeline = gpu_pipeline_make(lifetime, &(gpu_pipeline_desc_t){ | |
.name = "gi render shader", | |
.code = file_read("shaders/gi.glsl", &temp_lifetime).bytes, | |
.bgls = { | |
uniform_bindgroup_layout, | |
texture_bindgroup_layout, | |
}, | |
}); | |
// create uniform buffer (we pack all our different uniforms in one buffer), one per cascade and one for rendering | |
{ | |
gpu_uniform_packer_t p = gpu_uniform_packer_begin(sizeof(uniform_t), num_cascades+1, lifetime); | |
uniform_buffer = p.handle; | |
// set cascade uniforms | |
for (int i = 0; i < num_cascades; ++i) { | |
*(uniform_t*)p.data = (uniform_t){ | |
.d0 = d0, | |
.r0 = r0, | |
.n0 = n0, | |
.ci = i, | |
.cn = num_cascades, | |
.add_sky_light = 1, | |
.resolution = {(f32)w,(f32)h}, | |
}; | |
cascade_uniform_bindgroup[i] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
.name = "gi", | |
.layout = uniform_bindgroup_layout, | |
.entries = {gpu_uniform_packer_bindgroup_entry(&p)}, | |
}); | |
gpu_uniform_packer_next(&p); | |
} | |
// set render uniform | |
*(uniform_t*)p.data = (uniform_t){ | |
.d0 = d0, | |
.r0 = r0, | |
.n0 = n0, | |
.ci = 0, | |
.cn = num_cascades, | |
.do_render = 1, | |
.resolution = {(f32)w,(f32)h}, | |
}; | |
render_uniform_bindgroup = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
.name = "gi", | |
.layout = uniform_bindgroup_layout, | |
.entries = {gpu_uniform_packer_bindgroup_entry(&p)}, | |
}); | |
gpu_uniform_packer_end(&p); | |
} | |
// create textures | |
input_texture = gpu_texture_make(w, h, gpu_texture_format_rgb8, filter_type_nearest, false, lifetime); | |
gpu_texture_set_border(input_texture, (color_t){1,1,1,1}); | |
textures[0] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime); | |
textures[1] = gpu_texture_make(r0*n0, n0, gpu_texture_format_rgba8, filter_type_nearest, false, lifetime); | |
texture_bindgroup[0] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
.name = "gi", | |
.layout = texture_bindgroup_layout, | |
.entries = { | |
{.sampler = {input_texture}}, | |
{.sampler = {textures[0]}}, | |
}, | |
}); | |
texture_bindgroup[1] = gpu_bindgroup_make(lifetime, &(gpu_bindgroup_desc_t){ | |
.name = "gi", | |
.layout = texture_bindgroup_layout, | |
.entries = { | |
{.sampler = {input_texture}}, | |
{.sampler = {textures[1]}}, | |
}, | |
}); | |
lifetime_destroy(&temp_lifetime); | |
} | |
// update input texture | |
gpu_texture_set_data(input_texture, in_bitmap); | |
// clear texture for pingponging | |
gpu_texture_clear(textures[(cn-1)%2], (color_t){0}); | |
// build cascades | |
for (int i = cn-1; i >= 0; --i) { | |
drawcall_render(&(drawcall_t){ | |
.pipeline = pipeline, | |
.last_vertex = 6, | |
.bindgroups = {cascade_uniform_bindgroup[i], texture_bindgroup[i%2]}, | |
.outputs = {textures[(i+1)%2]}, | |
}); | |
} | |
// render | |
drawcall_render(&(drawcall_t){ | |
.pipeline = pipeline, | |
.last_vertex = 6, | |
.bindgroups = {render_uniform_bindgroup, texture_bindgroup[cn%2]}, | |
}); | |
#undef num_cascades | |
} |
#ifdef VERTEX_SHADER | |
out vec2 fuv; | |
// a vertex shader that spits out a screen-size quad | |
// call with vertex count = 6 | |
void main(void) { | |
vec2[] positions = vec2[]( | |
vec2(-1,-1), | |
vec2(1,-1), | |
vec2(1,1), | |
vec2(-1,-1), | |
vec2(1,1), | |
vec2(-1,1) | |
); | |
vec2 vpos = positions[gl_VertexID%6]; | |
gl_Position = vec4(vpos, 0, 1); | |
fuv = vpos*0.5+0.5; | |
fuv.y = 1 - fuv.y; | |
} | |
#endif /* VERTEX_SHADER */ | |
#ifdef FRAGMENT_SHADER | |
layout (std140, binding = 0) uniform Uniform | |
{ | |
float d0; // distance between probes in cascade 0 | |
int r0; // number of rays in cascade 0 | |
int n0; // number of probes in cascade 0 (per dimension) | |
int ci; // cascade number | |
int cn; // total number of cascades | |
int should_do_render; // we switch on this to render instead of building the cascades | |
int add_sky_light; // set to 1 to add sky lighting to uppermost cascade | |
int padding; | |
vec2 u_resolution; // resolution of the input texture | |
vec2 padding4; | |
}; | |
layout(binding = 1) uniform sampler2D u_input; // world data that we raytrace through | |
layout(binding = 2) uniform sampler2D u_prev; // previous cascade (ping-pong this and the output texture) | |
in vec2 fuv; | |
layout(location = 0) out vec4 ocolor; | |
const float PI = 3.1415927; | |
// raymarch2d: Implementation of Amanatides & Woo voxel marching algo | |
struct raymarch2d_t { | |
int x; | |
int y; | |
int sx; | |
int sy; | |
int ex; | |
int ey; | |
float tmx; | |
float tmy; | |
float tdx; | |
float tdy; | |
}; | |
raymarch2d_t raymarch2d_make(float x0, float y0, float x1, float y1) { | |
raymarch2d_t res; | |
res.x = int(floor(x0)); | |
res.y = int(floor(y0)); | |
res.sx = x0 < x1 ? 1 : x1 < x0 ? -1 : 0; | |
res.sy = y0 < y1 ? 1 : y1 < y0 ? -1 : 0; | |
res.ex = int(floor(x1)) + 2*res.sx; | |
res.ey = int(floor(y1)) + 2*res.sy; | |
float dx = x1 - x0; | |
float dy = y1 - y0; | |
float l = 1.f/sqrt(dx*dx + dy*dy); | |
dx *= l; | |
dy *= l; | |
res.tmx = dx == 0 ? 10000000 : (x0 - res.x)/dx; | |
res.tmy = dy == 0 ? 10000000 : (y0 - res.y)/dy; | |
res.tdx = dx == 0 ? 0 : res.sx/dx; | |
res.tdy = dy == 0 ? 0 : res.sy/dy; | |
return res; | |
} | |
bool raymarch2d_next(inout raymarch2d_t r) { | |
if (r.tmx < r.tmy) { | |
r.tmx += r.tdx; | |
r.x += r.sx; | |
return r.x != r.ex; | |
} | |
else { | |
r.tmy += r.tdy; | |
r.y += r.sy; | |
return r.y != r.ey; | |
} | |
} | |
vec3 tonemap_aces(vec3 color) { | |
const float slope = 12.0; | |
vec4 x = vec4( | |
color.r, color.g, color.b, | |
(color.r * 0.299) + (color.g * 0.587) + (color.b * 0.114) | |
); | |
const float a = 2.51f; | |
const float b = 0.03f; | |
const float c = 2.43f; | |
const float d = 0.59f; | |
const float e = 0.14f; | |
vec4 tonemap = clamp((x * (a * x + b)) / (x * (c * x + d) + e), 0.0, 1.0); | |
float t = x.a; | |
t = t * t / (slope + t); | |
return mix(tonemap.rgb, tonemap.aaa, t); | |
} | |
vec3 sky_(vec2 angle) { | |
float a1 = angle[1]; | |
float a0 = angle[0]; | |
// Sky integral formula taken from | |
// Analytic Direct Illumination - Mathis | |
// https://www.shadertoy.com/view/NttSW7 | |
const vec3 SkyColor = vec3(0.2,0.5,1.); | |
const vec3 SunColor = vec3(1.,0.7,0.1)*10.; | |
const float SunA = 2.0; | |
const float SunS = 64.0; | |
const float SSunS = sqrt(SunS); | |
const float ISSunS = 1./SSunS; | |
vec3 SI = SkyColor*(a1-a0-0.5*(cos(a1)-cos(a0))); | |
SI += SunColor*(atan(SSunS*(SunA-a0))-atan(SSunS*(SunA-a1)))*ISSunS; | |
return SI / 6.0; | |
} | |
vec3 sky(vec2 angle) { | |
// Integrate the radiance from the sky over an interval of directions | |
if (angle[1] < 2.0 * PI) | |
return sky_(angle); | |
return | |
sky_(vec2(angle[0], 2.0 * PI)) + | |
sky_(vec2(0.0, angle[1] - 2.0 * PI)); | |
} | |
void main(void) { | |
if (should_do_render == 1) { | |
// sample probe in cascade 0 | |
float x = fuv.x * u_resolution.x; | |
float y = fuv.y * u_resolution.y; | |
float xi = round(x/d0); | |
float yi = round(y/d0); | |
vec3 c = vec3(0,0,0); | |
for (int r = 0; r < r0; ++r) { | |
vec2 pixelcoord = floor(vec2(xi*r0 + r, yi)) + 0.5; | |
c += texture(u_prev, pixelcoord / textureSize(u_prev, 0)).rgb; | |
} | |
ocolor = vec4(tonemap_aces(c/r0),1); | |
} | |
else { | |
// build cascade | |
int u = int(gl_FragCoord.x); | |
int v = int(gl_FragCoord.y); | |
int lm = 2;// ray distance branching factor. ray distance = 2^(lm*ci) | |
int rm = 1;// ray count branching factor. Num rays for cascade ci = r0*2^(rm*ci) = r0*(1 << rm*ci). NOTE: increasing this removes the property that total size of all cascades converges to 2x size of cascade 0, and instead leads to linear size increase | |
int n = n0 >> ci; // number of probes in one dimension | |
float d = d0*(1 << ci); // distance between probes | |
int rn = r0 << (rm*ci); // number of pixels/rays per probe | |
int yi = v; // probe index | |
int xi = u/rn; // probe index | |
int r = u - xi*rn; // ray index | |
float dx = d0*0.5f*(1 << ci); | |
float x = xi * d + dx; // probe pos | |
float y = yi * d + dx; // probe pos | |
float l = 0.5 * d0; // length of ray | |
float intensity = 1.0; | |
if (xi >= n || xi < 0 || yi >= n || yi < 0) { | |
ocolor = vec4(0,0,0,0); | |
return; | |
} | |
float ra = ci == 0 ? 0 : l*(1 << ((ci-1)*lm)); // start of ray length interval | |
float rb = l*(1 << (ci*lm)); // end of ray length interval | |
float alpha = 2*PI*(float(r)+0.5)/rn; | |
vec2 rot = vec2(cos(alpha), sin(alpha)); | |
vec2 a = vec2(x,y) + rot*ra; // start of ray | |
vec2 b = vec2(x,y) + rot*rb; // end of ray | |
raymarch2d_t raym = raymarch2d_make(a.x, a.y, b.x, b.y); | |
vec4 col = vec4(0,0,0,0); | |
while (raymarch2d_next(raym)) { | |
vec3 v = texture(u_input, vec2((raym.x+0.5)/u_resolution.x, (raym.y+0.5)/u_resolution.y)).rgb; | |
if (v != vec3(1,1,1)) { | |
col = vec4(v*intensity,1); | |
break; | |
} | |
} | |
// if no hit, get from upper cascade | |
// TODO: do proper alpha blending to support transparent materials. Since we're only dealing with opaque materials for now it's fine | |
if (col.a == 0) { | |
if (ci == cn-1) { | |
if (add_sky_light != 0) | |
col = vec4(sky(vec2(alpha, alpha + 2*PI/rn)) / (2*PI/rn), 1); | |
else | |
col = vec4(0,0,0,0); | |
} | |
else { | |
int xi2 = (xi+1)/2; // probe index in upper | |
int yi2 = (yi+1)/2; // probe index in upper | |
int r2 = r << rm; // ray index in upper | |
int rn2 = rn << rm; // num rays in upper | |
int n2 = n >> 1; // num probes in upper | |
float tx = 0.75 - 0.5*float(xi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid | |
float ty = 0.75 - 0.5*float(yi%2); // weighting of upper cascade. we can do this magic because we know how the probes are laid out in the grid | |
// loop through all the nearby rays in the upper cascade | |
// TODO: in the case where there are >2 rays in the upper cascade for each ray in this cascade (i.e. rm > 1), | |
// we should choose a better weighting than just treating them all equally | |
vec4 upper = vec4(0,0,0,0); | |
float frac = 1.0 / (1 << rm); | |
for (int ri = 0; ri < (1 << rm); ++ri) { | |
vec2 pc1 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
vec2 pc2 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2-1, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
vec2 pc3 = floor(vec2(clamp(xi2-1, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
vec2 pc4 = floor(vec2(clamp(xi2, 0, n2-1)*rn2 + r2 + ri, clamp(yi2, 0, n2-1))) + 0.5; // pixel coordinate of upper probe for ray r2+ri | |
vec4 c = mix( | |
mix(texture(u_prev, pc1 / textureSize(u_prev, 0)), texture(u_prev, pc2 / textureSize(u_prev, 0)), tx), | |
mix(texture(u_prev, pc3 / textureSize(u_prev, 0)), texture(u_prev, pc4 / textureSize(u_prev, 0)), tx), | |
ty | |
); | |
upper += c*frac; | |
} | |
col = upper; | |
} | |
} | |
ocolor = vec4(col.rgb, 1); | |
} | |
} | |
#endif /* FRAGMENT_SHADER */ |
Looks like he's got two 3D videos, one in screen-space and in world-space.
Here's an example of the world space, you can see that he gets light from the models to the left and right outside of the view frustum:
https://youtu.be/5Ua-h1pg6yM?si=c6wdsT-LzlQTPC_l&t=37
There's some other artifacts going on that are probably coming from things like number of cascades being low, or low ray multiplication factor, probably could've used more parameter tweaking.
This is a neat website tmpvar
made that lets you play with some of the parameters (screenspace only though) https://tmpvar.com/poc/radiance-cascades/#flatland-2d
Btw this method is literally just a cleverer way of laying out and combining the results of your probes. How you calculate the value of your rays is entirely up to you.
awesome thanks for chatting
i saw it but to me it looks it cant produce data on its own outside camera view(turn camera away from light loses the data on the wall), thats why i am trying to see if it can merge with hddagi to help it with world space like amd brixelizer caching idea does