diff --git a/Cargo.toml b/Cargo.toml index 03c104ca..681d4619 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ gltf = { version = "1.1", default-features = false } log = "0.4" mint = "0.5" naga = { version = "22", features = ["wgsl-in"] } +nanorand = { version = "0.7", default-features = false } profiling = "1" slab = "0.4" strum = { version = "0.25", features = ["derive"] } @@ -79,7 +80,7 @@ glam = { workspace = true } log = { workspace = true } mint = { workspace = true, features = ["serde"] } naga = { workspace = true } -nanorand = { version = "0.7", default-features = false, features = ["wyrand"] } +nanorand = { workspace = true, features = ["wyrand"] } profiling = { workspace = true } ron = "0.8" serde = { version = "1", features = ["serde_derive"] } @@ -95,7 +96,6 @@ egui-winit = "0.28" console_error_panic_hook = "0.1.7" console_log = "1" web-sys = { workspace = true, features = ["Window"] } -getrandom = { version = "0.2", features = ["js"] } [target.'cfg(any(target_os = "windows", target_os = "linux"))'.dev-dependencies] renderdoc = "0.12" diff --git a/blade-graphics/src/util.rs b/blade-graphics/src/util.rs index 23e876e0..f0d2013e 100644 --- a/blade-graphics/src/util.rs +++ b/blade-graphics/src/util.rs @@ -94,14 +94,19 @@ impl super::TextureFormat { } } +impl super::Extent { + pub fn group_by(&self, size: [u32; 3]) -> [u32; 3] { + [ + (self.width + size[0] - 1) / size[0], + (self.height + size[1] - 1) / size[1], + (self.depth + size[2] - 1) / size[2], + ] + } +} + impl super::ComputePipeline { /// Return the dispatch group counts sufficient to cover the given extent. pub fn get_dispatch_for(&self, extent: super::Extent) -> [u32; 3] { - let wg_size = self.get_workgroup_size(); - [ - (extent.width + wg_size[0] - 1) / wg_size[0], - (extent.height + wg_size[1] - 1) / wg_size[1], - (extent.depth + wg_size[2] - 1) / wg_size[2], - ] + extent.group_by(self.get_workgroup_size()) } } diff --git a/blade-helpers/src/camera.rs b/blade-helpers/src/camera.rs index b866ca7f..246d014b 100644 --- a/blade-helpers/src/camera.rs +++ b/blade-helpers/src/camera.rs @@ -1,5 +1,7 @@ use super::ExposeHud; +const MAX_FLY_SPEED: f32 = 1000000.0; + pub struct ControlledCamera { pub inner: blade_render::Camera, pub fly_speed: f32, @@ -86,6 +88,14 @@ impl ControlledCamera { true } + + pub fn on_wheel(&mut self, delta: winit::event::MouseScrollDelta) { + let shift = match delta { + winit::event::MouseScrollDelta::LineDelta(_, lines) => lines, + winit::event::MouseScrollDelta::PixelDelta(position) => position.y as f32, + }; + self.fly_speed = (self.fly_speed * shift.exp()).clamp(1.0, MAX_FLY_SPEED); + } } impl ExposeHud for ControlledCamera { @@ -105,7 +115,7 @@ impl ExposeHud for ControlledCamera { }); ui.add(egui::Slider::new(&mut self.inner.fov_y, 0.5f32..=2.0f32).text("FOV")); ui.add( - egui::Slider::new(&mut self.fly_speed, 1f32..=100000f32) + egui::Slider::new(&mut self.fly_speed, 1f32..=MAX_FLY_SPEED) .text("Fly speed") .logarithmic(true), ); diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index a135a337..8940825f 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -15,27 +15,34 @@ impl ExposeHud for blade_render::RayConfig { ); ui.checkbox(&mut self.temporal_tap, "Temporal tap"); ui.add( - egui::widgets::Slider::new(&mut self.temporal_history, 0..=50).text("Temporal history"), + egui::widgets::Slider::new(&mut self.temporal_confidence, 0.0..=50.0) + .text("Temporal confidence"), ); ui.add(egui::widgets::Slider::new(&mut self.spatial_taps, 0..=10).text("Spatial taps")); ui.add( - egui::widgets::Slider::new(&mut self.spatial_tap_history, 0..=50) - .text("Spatial tap history"), + egui::widgets::Slider::new(&mut self.spatial_confidence, 0.0..=50.0) + .text("Spatial confidence"), ); + ui.add(egui::widgets::Slider::new(&mut self.group_mixer, 1..=10).text("Group mixer")); ui.add( - egui::widgets::Slider::new(&mut self.spatial_radius, 1..=50) - .text("Spatial radius (px)"), + egui::widgets::Slider::new(&mut self.spatial_min_distance, 1..=10) + .text("Spatial minimum distance (px)"), ); ui.add( egui::widgets::Slider::new(&mut self.t_start, 0.001..=0.5) .text("T min") .logarithmic(true), ); + ui.checkbox(&mut self.pairwise_mis, "Pairwise MIS"); + ui.add( + egui::widgets::Slider::new(&mut self.defensive_mis, 0.0..=1.0).text("Defensive MIS"), + ); } } impl ExposeHud for blade_render::DenoiserConfig { fn populate_hud(&mut self, ui: &mut egui::Ui) { + ui.checkbox(&mut self.enabled, "Enable denoiser"); ui.add(egui::Slider::new(&mut self.temporal_weight, 0.0..=1.0f32).text("Temporal weight")); ui.add(egui::Slider::new(&mut self.num_passes, 0..=5u32).text("A-trous passes")); } @@ -57,6 +64,15 @@ impl ExposeHud for blade_render::PostProcConfig { } } +impl ExposeHud for blade_render::FrameConfig { + fn populate_hud(&mut self, ui: &mut egui::Ui) { + ui.horizontal(|ui| { + self.reset_reservoirs |= ui.button("Reset Accumulation").clicked(); + ui.toggle_value(&mut self.frozen, "Freeze"); + }); + } +} + impl ExposeHud for blade_render::DebugConfig { fn populate_hud(&mut self, ui: &mut egui::Ui) { use strum::IntoEnumIterator as _; diff --git a/blade-render/code/a-trous.wgsl b/blade-render/code/a-trous.wgsl new file mode 100644 index 00000000..d4eda9e6 --- /dev/null +++ b/blade-render/code/a-trous.wgsl @@ -0,0 +1,79 @@ +#include "quaternion.inc.wgsl" +#include "surface.inc.wgsl" + +// Spatio-temporal variance-guided filtering +// https://research.nvidia.com/sites/default/files/pubs/2017-07_Spatiotemporal-Variance-Guided-Filtering%3A//svgf_preprint.pdf + +// Note: using "ilm" in place of "illumination and the 2nd moment of its luminance" + +struct Params { + extent: vec2, + iteration: u32, +} + +var params: Params; +var t_depth: texture_2d; +var t_flat_normal: texture_2d; +var input: texture_2d; +var output: texture_storage_2d; + +const LUMA: vec3 = vec3(0.2126, 0.7152, 0.0722); +const MIN_WEIGHT: f32 = 0.01; + +fn read_surface(pixel: vec2) -> Surface { + var surface = Surface(); + surface.flat_normal = normalize(textureLoad(t_flat_normal, pixel, 0).xyz); + surface.depth = textureLoad(t_depth, pixel, 0).x; + return surface; +} + +const GAUSSIAN_WEIGHTS = vec2(0.44198, 0.27901); +const SIGMA_L: f32 = 4.0; +const EPSILON: f32 = 0.001; + +fn compare_luminance(a_lum: f32, b_lum: f32, variance: f32) -> f32 { + return exp(-abs(a_lum - b_lum) / (SIGMA_L * variance + EPSILON)); +} + +fn w4(w: f32) -> vec4 { + return vec4(vec3(w), w * w); +} + +@compute @workgroup_size(8, 8) +fn atrous3x3(@builtin(global_invocation_id) global_id: vec3) { + let center = vec2(global_id.xy); + if (any(center >= params.extent)) { + return; + } + + let center_ilm = textureLoad(input, center, 0); + let center_luma = dot(center_ilm.xyz, LUMA); + let variance = sqrt(center_ilm.w); + let center_suf = read_surface(center); + var sum_weight = GAUSSIAN_WEIGHTS[0] * GAUSSIAN_WEIGHTS[0]; + var sum_ilm = w4(sum_weight) * center_ilm; + + for (var yy=-1; yy<=1; yy+=1) { + for (var xx=-1; xx<=1; xx+=1) { + let p = center + vec2(xx, yy) * (1i << params.iteration); + if (all(p == center) || any(p < vec2(0)) || any(p >= params.extent)) { + continue; + } + + //TODO: store in group-shared memory + let surface = read_surface(p); + var weight = GAUSSIAN_WEIGHTS[abs(xx)] * GAUSSIAN_WEIGHTS[abs(yy)]; + //TODO: make it stricter on higher iterations + weight *= compare_flat_normals(surface.flat_normal, center_suf.flat_normal); + //Note: should we use a projected depth instead of the surface one? + weight *= compare_depths(surface.depth, center_suf.depth); + let other_ilm = textureLoad(input, p, 0); + weight *= compare_luminance(center_luma, dot(other_ilm.xyz, LUMA), variance); + sum_ilm += w4(weight) * other_ilm; + sum_weight += weight; + } + } + + let filtered_ilm = select(center_ilm, sum_ilm / w4(sum_weight), sum_weight > MIN_WEIGHT); + textureStore(output, global_id.xy, filtered_ilm); +} diff --git a/blade-render/code/accum.inc.wgsl b/blade-render/code/accum.inc.wgsl new file mode 100644 index 00000000..5b30c0a2 --- /dev/null +++ b/blade-render/code/accum.inc.wgsl @@ -0,0 +1,21 @@ +const LUMA: vec3 = vec3(0.2126, 0.7152, 0.0722); +const MOTION_FACTOR: f32 = 0.1; + +var inout_diffuse: texture_storage_2d; + +fn accumulate_temporal( + pixel: vec2, cur_illumination: vec3, + temporal_weight: f32, prev_pixel: vec2, + motion_sqr: f32, +) { + var illumination = cur_illumination; + if (prev_pixel.x >= 0 && temporal_weight < 1.0) { + let factor = mix(temporal_weight, 1.0, min(pow(motion_sqr, 0.25) * MOTION_FACTOR, 1.0)); + let prev_illumination = textureLoad(inout_diffuse, prev_pixel).xyz; + illumination = mix(prev_illumination, illumination, factor); + } + + let luminocity = dot(illumination, LUMA); + let ilm = vec4(illumination, luminocity * luminocity); + textureStore(inout_diffuse, pixel, ilm); +} diff --git a/blade-render/code/blur.wgsl b/blade-render/code/blur.wgsl deleted file mode 100644 index 3207ef60..00000000 --- a/blade-render/code/blur.wgsl +++ /dev/null @@ -1,162 +0,0 @@ -#include "camera.inc.wgsl" -#include "gbuf.inc.wgsl" -#include "quaternion.inc.wgsl" -#include "surface.inc.wgsl" - -// Spatio-temporal variance-guided filtering -// https://research.nvidia.com/sites/default/files/pubs/2017-07_Spatiotemporal-Variance-Guided-Filtering%3A//svgf_preprint.pdf - -// Note: using "ilm" in place of "illumination and the 2nd moment of its luminance" - -struct Params { - extent: vec2, - temporal_weight: f32, - iteration: u32, - use_motion_vectors: u32, -} - -var camera: CameraParams; -var prev_camera: CameraParams; -var params: Params; -var t_depth: texture_2d; -var t_prev_depth: texture_2d; -var t_flat_normal: texture_2d; -var t_prev_flat_normal: texture_2d; -var t_motion: texture_2d; -var input: texture_2d; -var prev_input: texture_2d; -var output: texture_storage_2d; - -const LUMA: vec3 = vec3(0.2126, 0.7152, 0.0722); -const MIN_WEIGHT: f32 = 0.01; - -fn read_surface(pixel: vec2) -> Surface { - var surface = Surface(); - surface.flat_normal = normalize(textureLoad(t_flat_normal, pixel, 0).xyz); - surface.depth = textureLoad(t_depth, pixel, 0).x; - return surface; -} -fn read_prev_surface(pixel: vec2) -> Surface { - var surface = Surface(); - surface.flat_normal = normalize(textureLoad(t_prev_flat_normal, pixel, 0).xyz); - surface.depth = textureLoad(t_prev_depth, pixel, 0).x; - return surface; -} - -fn get_prev_pixel(pixel: vec2, pos_world: vec3) -> vec2 { - if (USE_MOTION_VECTORS && params.use_motion_vectors != 0u) { - let motion = textureLoad(t_motion, pixel, 0).xy / MOTION_SCALE; - return vec2(pixel) + 0.5 + motion; - } else { - return get_projected_pixel_float(prev_camera, pos_world); - } -} - -@compute @workgroup_size(8, 8) -fn temporal_accum(@builtin(global_invocation_id) global_id: vec3) { - let pixel = vec2(global_id.xy); - if (any(pixel >= params.extent)) { - return; - } - - let surface = read_surface(pixel); - let pos_world = camera.position + surface.depth * get_ray_direction(camera, pixel); - // considering all samples in 2x2 quad, to help with edges - var center_pixel = get_prev_pixel(pixel, pos_world); - var prev_pixels = array, 4>( - vec2(vec2(center_pixel.x - 0.5, center_pixel.y - 0.5)), - vec2(vec2(center_pixel.x + 0.5, center_pixel.y - 0.5)), - vec2(vec2(center_pixel.x + 0.5, center_pixel.y + 0.5)), - vec2(vec2(center_pixel.x - 0.5, center_pixel.y + 0.5)), - ); - //Note: careful about the pixel center when there is a perfect match - let w_bot_right = fract(center_pixel + vec2(0.5)); - var prev_weights = vec4( - (1.0 - w_bot_right.x) * (1.0 - w_bot_right.y), - w_bot_right.x * (1.0 - w_bot_right.y), - w_bot_right.x * w_bot_right.y, - (1.0 - w_bot_right.x) * w_bot_right.y, - ); - - var sum_weight = 0.0; - var sum_ilm = vec4(0.0); - //TODO: optimize depth load with a gather operation - for (var i = 0; i < 4; i += 1) { - let prev_pixel = prev_pixels[i]; - if (all(prev_pixel >= vec2(0)) && all(prev_pixel < params.extent)) { - let prev_surface = read_prev_surface(prev_pixel); - if (compare_flat_normals(surface.flat_normal, prev_surface.flat_normal) < 0.5) { - continue; - } - let projected_distance = length(pos_world - prev_camera.position); - if (compare_depths(prev_surface.depth, projected_distance) < 0.5) { - continue; - } - let w = prev_weights[i]; - sum_weight += w; - let illumination = w * textureLoad(prev_input, prev_pixel, 0).xyz; - let luminocity = dot(illumination, LUMA); - sum_ilm += vec4(illumination, luminocity * luminocity); - } - } - - let cur_illumination = textureLoad(input, pixel, 0).xyz; - let cur_luminocity = dot(cur_illumination, LUMA); - var mixed_ilm = vec4(cur_illumination, cur_luminocity * cur_luminocity); - if (sum_weight > MIN_WEIGHT) { - let prev_ilm = sum_ilm / vec4(vec3(sum_weight), max(0.001, sum_weight*sum_weight)); - mixed_ilm = mix(mixed_ilm, prev_ilm, sum_weight * (1.0 - params.temporal_weight)); - } - textureStore(output, global_id.xy, mixed_ilm); -} - -const GAUSSIAN_WEIGHTS = vec2(0.44198, 0.27901); -const SIGMA_L: f32 = 4.0; -const EPSILON: f32 = 0.001; - -fn compare_luminance(a_lum: f32, b_lum: f32, variance: f32) -> f32 { - return exp(-abs(a_lum - b_lum) / (SIGMA_L * variance + EPSILON)); -} - -fn w4(w: f32) -> vec4 { - return vec4(vec3(w), w * w); -} - -@compute @workgroup_size(8, 8) -fn atrous3x3(@builtin(global_invocation_id) global_id: vec3) { - let center = vec2(global_id.xy); - if (any(center >= params.extent)) { - return; - } - - let center_ilm = textureLoad(input, center, 0); - let center_luma = dot(center_ilm.xyz, LUMA); - let variance = sqrt(center_ilm.w); - let center_suf = read_surface(center); - var sum_weight = GAUSSIAN_WEIGHTS[0] * GAUSSIAN_WEIGHTS[0]; - var sum_ilm = w4(sum_weight) * center_ilm; - - for (var yy=-1; yy<=1; yy+=1) { - for (var xx=-1; xx<=1; xx+=1) { - let p = center + vec2(xx, yy) * (1i << params.iteration); - if (all(p == center) || any(p < vec2(0)) || any(p >= params.extent)) { - continue; - } - - //TODO: store in group-shared memory - let surface = read_surface(p); - var weight = GAUSSIAN_WEIGHTS[abs(xx)] * GAUSSIAN_WEIGHTS[abs(yy)]; - //TODO: make it stricter on higher iterations - weight *= compare_flat_normals(surface.flat_normal, center_suf.flat_normal); - //Note: should we use a projected depth instead of the surface one? - weight *= compare_depths(surface.depth, center_suf.depth); - let other_ilm = textureLoad(input, p, 0); - weight *= compare_luminance(center_luma, dot(other_ilm.xyz, LUMA), variance); - sum_ilm += w4(weight) * other_ilm; - sum_weight += weight; - } - } - - let filtered_ilm = select(center_ilm, sum_ilm / w4(sum_weight), sum_weight > MIN_WEIGHT); - textureStore(output, global_id.xy, filtered_ilm); -} diff --git a/blade-render/code/color.inc.wgsl b/blade-render/code/color.inc.wgsl new file mode 100644 index 00000000..8e84cfda --- /dev/null +++ b/blade-render/code/color.inc.wgsl @@ -0,0 +1,19 @@ +fn hsv_to_rgb(h: f32, s: f32, v: f32) -> vec3 { + let c = v * s; + let x = c * (1.0 - abs((h / 60.0) % 2.0 - 1.0)); + var q = vec3(v - c); + if (h < 60.0) { + q.r += c; q.g += x; + } else if (h < 120.0) { + q.g += c; q.r += x; + } else if (h < 180.0) { + q.g += c; q.b += x; + } else if (h < 240.0) { + q.b += c; q.g += x; + } else if (h < 300.0) { + q.b += c; q.r += x; + } else { + q.r += c; q.b += x; + } + return q; +} diff --git a/blade-render/code/env-importance.inc.wgsl b/blade-render/code/env-importance.inc.wgsl index 43421fce..f5a77914 100644 --- a/blade-render/code/env-importance.inc.wgsl +++ b/blade-render/code/env-importance.inc.wgsl @@ -11,7 +11,7 @@ fn compute_texel_solid_angle(itc: vec2, dim: vec2) -> f32 { return meridian_solid_angle * meridian_part; } -fn generate_environment_sample(rng: ptr, dim: vec2) -> EnvImportantSample { +fn generate_environment_sample(rng: ptr, dim: vec2) -> EnvImportantSample { var es = EnvImportantSample(); es.pdf = 1.0; var mip = i32(textureNumLevels(env_weights)); diff --git a/blade-render/code/fill-gbuf.wgsl b/blade-render/code/fill-gbuf.wgsl deleted file mode 100644 index 346edf51..00000000 --- a/blade-render/code/fill-gbuf.wgsl +++ /dev/null @@ -1,204 +0,0 @@ -#include "quaternion.inc.wgsl" -#include "camera.inc.wgsl" -#include "debug.inc.wgsl" -#include "debug-param.inc.wgsl" -#include "gbuf.inc.wgsl" - -//TODO: use proper WGSL -const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; - -// Has to match the host! -struct Vertex { - pos: vec3, - bitangent_sign: f32, - tex_coords: vec2, - normal: u32, - tangent: u32, -} -struct VertexBuffer { - data: array, -} -struct IndexBuffer { - data: array, -} -var vertex_buffers: binding_array; -var index_buffers: binding_array; -var textures: binding_array>; -var sampler_linear: sampler; -var sampler_nearest: sampler; - -struct HitEntry { - index_buf: u32, - vertex_buf: u32, - winding: f32, - // packed quaternion - geometry_to_world_rotation: u32, - geometry_to_object: mat4x3, - prev_object_to_world: mat4x3, - base_color_texture: u32, - // packed color factor - base_color_factor: u32, - normal_texture: u32, -} -var hit_entries: array; - -var camera: CameraParams; -var prev_camera: CameraParams; -var debug: DebugParams; -var acc_struct: acceleration_structure; - -var out_depth: texture_storage_2d; -var out_flat_normal: texture_storage_2d; -var out_basis: texture_storage_2d; -var out_albedo: texture_storage_2d; -var out_motion: texture_storage_2d; -var out_debug: texture_storage_2d; - -fn decode_normal(raw: u32) -> vec3 { - return unpack4x8snorm(raw).xyz; -} - -fn debug_raw_normal(pos: vec3, normal_raw: u32, rotation: vec4, debug_len: f32, color: u32) { - let nw = normalize(qrot(rotation, decode_normal(normal_raw))); - debug_line(pos, pos + debug_len * nw, color); -} - -@compute @workgroup_size(8, 4) -fn main(@builtin(global_invocation_id) global_id: vec3) { - if (any(global_id.xy >= camera.target_size)) { - return; - } - - var rq: ray_query; - let ray_dir = get_ray_direction(camera, vec2(global_id.xy)); - rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_CULL_NO_OPAQUE, 0xFFu, 0.0, camera.depth, camera.position, ray_dir)); - rayQueryProceed(&rq); - let intersection = rayQueryGetCommittedIntersection(&rq); - - var depth = 0.0; - var basis = vec4(0.0); - var flat_normal = vec3(0.0); - var albedo = vec3(1.0); - var motion = vec2(0.0); - let enable_debug = all(global_id.xy == debug.mouse_pos); - - if (intersection.kind != RAY_QUERY_INTERSECTION_NONE) { - let entry = hit_entries[intersection.instance_custom_index + intersection.geometry_index]; - depth = intersection.t; - - var indices = intersection.primitive_index * 3u + vec3(0u, 1u, 2u); - if (entry.index_buf != ~0u) { - let iptr = &index_buffers[entry.index_buf].data; - indices = vec3((*iptr)[indices.x], (*iptr)[indices.y], (*iptr)[indices.z]); - } - - let vptr = &vertex_buffers[entry.vertex_buf].data; - let vertices = array( - (*vptr)[indices.x], - (*vptr)[indices.y], - (*vptr)[indices.z], - ); - - let positions_object = entry.geometry_to_object * mat3x4( - vec4(vertices[0].pos, 1.0), vec4(vertices[1].pos, 1.0), vec4(vertices[2].pos, 1.0) - ); - let positions = intersection.object_to_world * mat3x4( - vec4(positions_object[0], 1.0), vec4(positions_object[1], 1.0), vec4(positions_object[2], 1.0) - ); - flat_normal = entry.winding * normalize(cross(positions[1].xyz - positions[0].xyz, positions[2].xyz - positions[0].xyz)); - - let barycentrics = vec3(1.0 - intersection.barycentrics.x - intersection.barycentrics.y, intersection.barycentrics); - let position_object = vec4(positions_object * barycentrics, 1.0); - let tex_coords = mat3x2(vertices[0].tex_coords, vertices[1].tex_coords, vertices[2].tex_coords) * barycentrics; - let normal_geo = normalize(mat3x3(decode_normal(vertices[0].normal), decode_normal(vertices[1].normal), decode_normal(vertices[2].normal)) * barycentrics); - let tangent_geo = normalize(mat3x3(decode_normal(vertices[0].tangent), decode_normal(vertices[1].tangent), decode_normal(vertices[2].tangent)) * barycentrics); - let bitangent_geo = normalize(cross(normal_geo, tangent_geo)) * vertices[0].bitangent_sign; - - let lod = 0.0; //TODO: this is actually complicated - - let geo_to_world_rot = normalize(unpack4x8snorm(entry.geometry_to_world_rotation)); - let tangent_space_geo = mat3x3(tangent_geo, bitangent_geo, normal_geo); - var normal_local: vec3; - if ((debug.texture_flags & DebugTextureFlags_NORMAL) != 0u) { - normal_local = vec3(0.0, 0.0, 1.0); // ignore normal map - } else { - let n_xy = textureSampleLevel(textures[entry.normal_texture], sampler_linear, tex_coords, lod).xy; - normal_local = vec3(n_xy, sqrt(max(0.0, 1.0 - dot(n_xy.xy, n_xy.xy)))); - } - var normal = qrot(geo_to_world_rot, tangent_space_geo * normal_local); - basis = shortest_arc_quat(vec3(0.0, 0.0, 1.0), normalize(normal)); - - let hit_position = camera.position + intersection.t * ray_dir; - if (enable_debug) { - debug_buf.entry.custom_index = intersection.instance_custom_index; - debug_buf.entry.depth = intersection.t; - debug_buf.entry.tex_coords = tex_coords; - debug_buf.entry.base_color_texture = entry.base_color_texture; - debug_buf.entry.normal_texture = entry.normal_texture; - debug_buf.entry.position = hit_position; - debug_buf.entry.flat_normal = flat_normal; - } - if (enable_debug && (debug.draw_flags & DebugDrawFlags_SPACE) != 0u) { - let normal_len = 0.15 * intersection.t; - let side = 0.05 * intersection.t; - debug_line(hit_position, hit_position + normal_len * qrot(geo_to_world_rot, normal_geo), 0xFFFFFFu); - debug_line(hit_position - side * tangent_geo, hit_position + side * tangent_geo, 0x808080u); - debug_line(hit_position - side * bitangent_geo, hit_position + side * bitangent_geo, 0x808080u); - } - if (enable_debug && (debug.draw_flags & DebugDrawFlags_GEOMETRY) != 0u) { - let debug_len = intersection.t * 0.2; - debug_line(positions[0].xyz, positions[1].xyz, 0x00FFFFu); - debug_line(positions[1].xyz, positions[2].xyz, 0x00FFFFu); - debug_line(positions[2].xyz, positions[0].xyz, 0x00FFFFu); - let poly_center = (positions[0].xyz + positions[1].xyz + positions[2].xyz) / 3.0; - debug_line(poly_center, poly_center + 0.2 * debug_len * flat_normal, 0xFF00FFu); - // note: dynamic indexing into positions isn't allowed by WGSL yet - debug_raw_normal(positions[0].xyz, vertices[0].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - debug_raw_normal(positions[1].xyz, vertices[1].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - debug_raw_normal(positions[2].xyz, vertices[2].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - // draw tangent space - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(1.0, 0.0, 0.0)), 0x0000FFu); - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 1.0, 0.0)), 0x00FF00u); - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 0.0, 1.0)), 0xFF0000u); - } - - let base_color_factor = unpack4x8unorm(entry.base_color_factor); - if ((debug.texture_flags & DebugTextureFlags_ALBEDO) != 0u) { - albedo = base_color_factor.xyz; - } else { - let base_color_sample = textureSampleLevel(textures[entry.base_color_texture], sampler_linear, tex_coords, lod); - albedo = (base_color_factor * base_color_sample).xyz; - } - - if (debug.view_mode == DebugMode_HitConsistency) { - let reprojected = get_projected_pixel(camera, hit_position); - let barycentrics_pos_diff = (intersection.object_to_world * position_object).xyz - hit_position; - let camera_projection_diff = vec2(global_id.xy) - vec2(reprojected); - let consistency = vec4(length(barycentrics_pos_diff), length(camera_projection_diff), 0.0, 0.0); - textureStore(out_debug, global_id.xy, consistency); - } - - let prev_position = (entry.prev_object_to_world * position_object).xyz; - let prev_screen = get_projected_pixel_float(prev_camera, prev_position); - //TODO: consider just storing integers here? - //TODO: technically this "0.5" is just a waste compute on both packing and unpacking - motion = prev_screen - vec2(global_id.xy) - 0.5; - if (debug.view_mode == DebugMode_Motion) { - textureStore(out_debug, global_id.xy, vec4(motion * MOTION_SCALE + vec2(0.5), 0.0, 1.0)); - } - } else { - if (enable_debug) { - debug_buf.entry = DebugEntry(); - } - if (debug.view_mode != DebugMode_Final) { - textureStore(out_debug, global_id.xy, vec4(0.0)); - } - } - - // TODO: option to avoid writing data for the sky - textureStore(out_depth, global_id.xy, vec4(depth, 0.0, 0.0, 0.0)); - textureStore(out_basis, global_id.xy, basis); - textureStore(out_flat_normal, global_id.xy, vec4(flat_normal, 0.0)); - textureStore(out_albedo, global_id.xy, vec4(albedo, 0.0)); - textureStore(out_motion, global_id.xy, vec4(motion * MOTION_SCALE, 0.0, 0.0)); -} diff --git a/blade-render/code/gbuf.inc.wgsl b/blade-render/code/gbuf.inc.wgsl deleted file mode 100644 index ecb4642d..00000000 --- a/blade-render/code/gbuf.inc.wgsl +++ /dev/null @@ -1,2 +0,0 @@ -const MOTION_SCALE: f32 = 0.02; -const USE_MOTION_VECTORS: bool = true; \ No newline at end of file diff --git a/blade-render/code/geometry.inc.wgsl b/blade-render/code/geometry.inc.wgsl new file mode 100644 index 00000000..2ba8994c --- /dev/null +++ b/blade-render/code/geometry.inc.wgsl @@ -0,0 +1,187 @@ +//TODO: https://github.com/gfx-rs/wgpu/pull/5429 +const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; + +// Has to match the host! +struct Vertex { + pos: vec3, + bitangent_sign: f32, + tex_coords: vec2, + normal: u32, + tangent: u32, +} +struct VertexBuffer { + data: array, +} +struct IndexBuffer { + data: array, +} +var vertex_buffers: binding_array; +var index_buffers: binding_array; +var textures: binding_array>; + +struct HitEntry { + index_buf: u32, + vertex_buf: u32, + winding: f32, + // packed quaternion + geometry_to_world_rotation: u32, + geometry_to_object: mat4x3, + prev_object_to_world: mat4x3, + base_color_texture: u32, + // packed color factor + base_color_factor: u32, + normal_texture: u32, +} +var hit_entries: array; + +fn decode_normal(raw: u32) -> vec3 { + return unpack4x8snorm(raw).xyz; +} + +fn debug_raw_normal(pos: vec3, normal_raw: u32, rotation: vec4, debug_len: f32, color: u32) { + let nw = normalize(qrot(rotation, decode_normal(normal_raw))); + debug_line(pos, pos + debug_len * nw, color); +} + +struct RichSurface { + inner: Surface, + position: vec3, + albedo: vec3, + motion: vec2, +} + +fn fetch_geometry(pixel_coord: vec2, is_primary: bool, enable_debug: bool) -> RichSurface { + var rq: ray_query; + let ray_dir = get_ray_direction(camera, pixel_coord); + rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_CULL_NO_OPAQUE, 0xFFu, 0.0, camera.depth, camera.position, ray_dir)); + rayQueryProceed(&rq); + let intersection = rayQueryGetCommittedIntersection(&rq); + + var rs = RichSurface(); + rs.albedo = vec3(1.0); + + if (intersection.kind == RAY_QUERY_INTERSECTION_NONE) { + if (enable_debug) { + debug_buf.entry = DebugEntry(); + } + return rs; + } + + let entry = hit_entries[intersection.instance_custom_index + intersection.geometry_index]; + + var indices = intersection.primitive_index * 3u + vec3(0u, 1u, 2u); + if (entry.index_buf != ~0u) { + let iptr = &index_buffers[entry.index_buf].data; + indices = vec3((*iptr)[indices.x], (*iptr)[indices.y], (*iptr)[indices.z]); + } + + let vptr = &vertex_buffers[entry.vertex_buf].data; + let vertices = array( + (*vptr)[indices.x], + (*vptr)[indices.y], + (*vptr)[indices.z], + ); + + let positions_object = entry.geometry_to_object * mat3x4( + vec4(vertices[0].pos, 1.0), vec4(vertices[1].pos, 1.0), vec4(vertices[2].pos, 1.0) + ); + let positions = intersection.object_to_world * mat3x4( + vec4(positions_object[0], 1.0), vec4(positions_object[1], 1.0), vec4(positions_object[2], 1.0) + ); + let flat_normal = entry.winding * normalize(cross(positions[1].xyz - positions[0].xyz, positions[2].xyz - positions[0].xyz)); + + let barycentrics = vec3(1.0 - intersection.barycentrics.x - intersection.barycentrics.y, intersection.barycentrics); + let position_object = vec4(positions_object * barycentrics, 1.0); + let tex_coords = mat3x2(vertices[0].tex_coords, vertices[1].tex_coords, vertices[2].tex_coords) * barycentrics; + let normal_geo = normalize(mat3x3(decode_normal(vertices[0].normal), decode_normal(vertices[1].normal), decode_normal(vertices[2].normal)) * barycentrics); + let tangent_geo = normalize(mat3x3(decode_normal(vertices[0].tangent), decode_normal(vertices[1].tangent), decode_normal(vertices[2].tangent)) * barycentrics); + let bitangent_geo = normalize(cross(normal_geo, tangent_geo)) * vertices[0].bitangent_sign; + + let lod = 0.0; //TODO: this is actually complicated + + let geo_to_world_rot = normalize(unpack4x8snorm(entry.geometry_to_world_rotation)); + let tangent_space_geo = mat3x3(tangent_geo, bitangent_geo, normal_geo); + var normal_local: vec3; + if ((debug.texture_flags & DebugTextureFlags_NORMAL) != 0u) { + normal_local = vec3(0.0, 0.0, 1.0); // ignore normal map + } else { + let n_xy = textureSampleLevel(textures[entry.normal_texture], sampler_linear, tex_coords, lod).xy; + normal_local = vec3(n_xy, sqrt(max(0.0, 1.0 - dot(n_xy.xy, n_xy.xy)))); + } + let normal = qrot(geo_to_world_rot, tangent_space_geo * normal_local); + let basis = shortest_arc_quat(vec3(0.0, 0.0, 1.0), normalize(normal)); + + let hit_position = camera.position + intersection.t * ray_dir; + if (enable_debug && is_primary) { + debug_buf.entry.custom_index = intersection.instance_custom_index; + debug_buf.entry.depth = intersection.t; + debug_buf.entry.tex_coords = tex_coords; + debug_buf.entry.base_color_texture = entry.base_color_texture; + debug_buf.entry.normal_texture = entry.normal_texture; + debug_buf.entry.position = hit_position; + debug_buf.entry.flat_normal = flat_normal; + } + if (enable_debug && (debug.draw_flags & DebugDrawFlags_SPACE) != 0u) { + let normal_len = 0.15 * intersection.t; + let side = 0.05 * intersection.t; + debug_line(hit_position, hit_position + normal_len * qrot(geo_to_world_rot, normal_geo), 0xFFFFFFu); + debug_line(hit_position - side * tangent_geo, hit_position + side * tangent_geo, 0x808080u); + debug_line(hit_position - side * bitangent_geo, hit_position + side * bitangent_geo, 0x808080u); + } + if (enable_debug && (debug.draw_flags & DebugDrawFlags_GEOMETRY) != 0u) { + let debug_len = intersection.t * 0.2; + debug_line(positions[0].xyz, positions[1].xyz, 0x00FFFFu); + debug_line(positions[1].xyz, positions[2].xyz, 0x00FFFFu); + debug_line(positions[2].xyz, positions[0].xyz, 0x00FFFFu); + let poly_center = (positions[0].xyz + positions[1].xyz + positions[2].xyz) / 3.0; + debug_line(poly_center, poly_center + 0.2 * debug_len * flat_normal, 0xFF00FFu); + // note: dynamic indexing into positions isn't allowed by WGSL yet + debug_raw_normal(positions[0].xyz, vertices[0].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + debug_raw_normal(positions[1].xyz, vertices[1].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + debug_raw_normal(positions[2].xyz, vertices[2].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + // draw tangent space + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(1.0, 0.0, 0.0)), 0x0000FFu); + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 1.0, 0.0)), 0x00FF00u); + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 0.0, 1.0)), 0xFF0000u); + } + + rs.albedo = unpack4x8unorm(entry.base_color_factor).xyz; + if ((debug.texture_flags & DebugTextureFlags_ALBEDO) == 0u) { + let base_color_sample = textureSampleLevel(textures[entry.base_color_texture], sampler_linear, tex_coords, lod); + rs.albedo *= base_color_sample.xyz; + } + + let prev_position = (entry.prev_object_to_world * position_object).xyz; + let prev_screen = get_projected_pixel_float(prev_camera, prev_position); + //TODO: consider just storing integers here? + //TODO: technically this "0.5" is just a waste compute on both packing and unpacking + rs.motion = prev_screen - vec2(pixel_coord) - 0.5; + rs.position = hit_position; + + if (is_primary) { + if (debug.view_mode == DebugMode_Depth) { + let value = 1.0 / intersection.t; + textureStore(out_debug, pixel_coord, vec4(value)); + } + if (debug.view_mode == DebugMode_Normal) { + textureStore(out_debug, pixel_coord, vec4(normal, 0.0)); + } + if (debug.view_mode == DebugMode_HitConsistency) { + let reprojected = get_projected_pixel(camera, hit_position); + let barycentrics_pos_diff = (intersection.object_to_world * position_object).xyz - hit_position; + let camera_projection_diff = vec2(pixel_coord - reprojected); + let consistency = vec4(length(barycentrics_pos_diff), length(camera_projection_diff), 0.0, 0.0); + textureStore(out_debug, pixel_coord, consistency); + } + if (debug.view_mode == DebugMode_Motion) { + let motion = rs.motion * MOTION_SCALE; + textureStore(out_debug, pixel_coord, vec4(motion, 0.0, 0.0)); + } + } + + // Write down the Surface + rs.inner.basis = basis; + rs.inner.flat_normal = flat_normal; + rs.inner.depth = intersection.t; + return rs; +} diff --git a/blade-render/code/motion.inc.wgsl b/blade-render/code/motion.inc.wgsl new file mode 100644 index 00000000..3e721716 --- /dev/null +++ b/blade-render/code/motion.inc.wgsl @@ -0,0 +1 @@ +const MOTION_SCALE: f32 = 0.02; diff --git a/blade-render/code/random.inc.wgsl b/blade-render/code/random.inc.wgsl index 3f68478c..1bce9317 100644 --- a/blade-render/code/random.inc.wgsl +++ b/blade-render/code/random.inc.wgsl @@ -28,7 +28,7 @@ fn rot32(x: u32, bits: u32) -> u32 { } // https://en.wikipedia.org/wiki/MurmurHash -fn murmur3(rng: ptr) -> u32 { +fn murmur3(rng: ptr) -> u32 { let c1 = 0xcc9e2d51u; let c2 = 0x1b873593u; let r1 = 15u; @@ -56,7 +56,11 @@ fn murmur3(rng: ptr) -> u32 { return hash; } -fn random_gen(rng: ptr) -> f32 { +fn random_u32(rng: ptr) -> u32 { + return murmur3(rng); +} + +fn random_gen(rng: ptr) -> f32 { let v = murmur3(rng); let one = bitcast(1.0); let mask = (1u << 23u) - 1u; diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index b27b1065..93cacd15 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -1,3 +1,4 @@ +#include "color.inc.wgsl" #include "quaternion.inc.wgsl" #include "random.inc.wgsl" #include "env-importance.inc.wgsl" @@ -5,36 +6,45 @@ #include "debug-param.inc.wgsl" #include "camera.inc.wgsl" #include "surface.inc.wgsl" -#include "gbuf.inc.wgsl" - -//TODO: use proper WGSL -const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; +#include "geometry.inc.wgsl" +#include "motion.inc.wgsl" +#include "accum.inc.wgsl" const PI: f32 = 3.1415926; -const MAX_RESERVOIRS: u32 = 2u; -// See "9.1 pairwise mis for robust reservoir reuse" -// "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" -const PAIRWISE_MIS: bool = true; -// Base MIS for canonical samples. The constant isolates a critical difference between -// Bitterli's pseudocode (where it's 1) and NVidia's RTXDI implementation (where it's 0). -// With Bitterli's 1 we have MIS not respecting the prior history enough. -const BASE_CANONICAL_MIS: f32 = 0.05; +const MAX_RESAMPLE: u32 = 4u; + +const DRAW_DEBUG: bool = false; // See "DECOUPLING SHADING AND REUSE" in // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; +const WRITE_DEBUG_IMAGE: bool = false; +//TODO: currently unused +const WRITE_MOTION_VECTORS: bool = false; + +//TODO: crashes on AMD 6850U if `GROUP_SIZE_TOTAL` > 32 +const GROUP_SIZE: vec2 = vec2(8, 4); +const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; + +var p_debug_len: f32; +var p_rng: RandomState; struct MainParams { frame_index: u32, num_environment_samples: u32, environment_importance_sampling: u32, temporal_tap: u32, - temporal_history: u32, + temporal_tap_confidence: f32, spatial_taps: u32, - spatial_tap_history: u32, - spatial_radius: i32, + spatial_tap_confidence: f32, + spatial_min_distance: i32, t_start: f32, + use_pairwise_mis: u32, + defensive_mis: f32, use_motion_vectors: u32, -}; + temporal_accumulation_weight: f32, + pad: f32, + grid_scale: vec2, +} var camera: CameraParams; var prev_camera: CameraParams; @@ -54,7 +64,14 @@ struct StoredReservoir { confidence: f32, } var reservoirs: array; -var prev_reservoirs: array; + +struct PixelCache { + surface: Surface, + reservoir: StoredReservoir, + //Note: we could store direction XY in local camera space instead + world_pos: vec3, +} +var pixel_cache: array; struct LightSample { radiance: vec3, @@ -83,12 +100,6 @@ fn get_reservoir_index(pixel: vec2, camera: CameraParams) -> i32 { } } -fn get_pixel_from_reservoir_index(index: i32, camera: CameraParams) -> vec2 { - let y = index / i32(camera.target_size.x); - let x = index - y * i32(camera.target_size.x); - return vec2(x, y); -} - fn bump_reservoir(r: ptr, history: f32) { (*r).history += history; } @@ -102,9 +113,11 @@ fn make_reservoir(ls: LightSample, light_index: u32, brdf: vec3) -> LiveRes r.history = 1.0; return r; } -fn merge_reservoir(r: ptr, other: LiveReservoir, random: f32) -> bool { + +fn merge_reservoir(r: ptr, other: LiveReservoir) -> bool { (*r).weight_sum += other.weight_sum; (*r).history += other.history; + let random = random_gen(&p_rng); if ((*r).weight_sum * random < other.weight_sum) { (*r).selected_light_index = other.selected_light_index; (*r).selected_uv = other.selected_uv; @@ -115,24 +128,24 @@ fn merge_reservoir(r: ptr, other: LiveReservoir, random return false; } } -fn unpack_reservoir(f: StoredReservoir, max_history: u32) -> LiveReservoir { +fn unpack_reservoir(f: StoredReservoir, max_confidence: f32, radiance: vec3) -> LiveReservoir { var r: LiveReservoir; r.selected_light_index = f.light_index; r.selected_uv = f.light_uv; r.selected_target_score = f.target_score; - r.radiance = vec3(0.0); // to be continued... - let history = min(f.confidence, f32(max_history)); + r.radiance = radiance; + let history = min(f.confidence, max_confidence); r.weight_sum = f.contribution_weight * f.target_score * history; r.history = history; return r; } -fn pack_reservoir_detail(r: LiveReservoir, denom_factor: f32) -> StoredReservoir { +fn pack_reservoir_detail(r: LiveReservoir, effective_history: f32) -> StoredReservoir { var f: StoredReservoir; f.light_index = r.selected_light_index; f.light_uv = r.selected_uv; f.target_score = r.selected_target_score; f.confidence = r.history; - let denom = f.target_score * denom_factor; + let denom = f.target_score * effective_history; f.contribution_weight = select(0.0, r.weight_sum / denom, denom > 0.0); return f; } @@ -140,14 +153,11 @@ fn pack_reservoir(r: LiveReservoir) -> StoredReservoir { return pack_reservoir_detail(r, r.history); } -var t_depth: texture_2d; -var t_prev_depth: texture_2d; -var t_basis: texture_2d; -var t_prev_basis: texture_2d; -var t_flat_normal: texture_2d; -var t_prev_flat_normal: texture_2d; -var t_motion: texture_2d; -var out_diffuse: texture_storage_2d; +var inout_depth: texture_storage_2d; +var inout_basis: texture_storage_2d; +var inout_flat_normal: texture_storage_2d; +var out_albedo: texture_storage_2d; +var out_motion: texture_storage_2d; var out_debug: texture_storage_2d; fn sample_circle(random: f32) -> vec2 { @@ -176,9 +186,9 @@ fn evaluate_environment(dir: vec3) -> vec3 { return textureSampleLevel(env_map, sampler_linear, uv, 0.0).xyz; } -fn sample_light_from_sphere(rng: ptr) -> LightSample { - let a = random_gen(rng); - let h = 1.0 - 2.0 * random_gen(rng); // make sure to allow h==1 +fn sample_light_from_sphere() -> LightSample { + let a = random_gen(&p_rng); + let h = 1.0 - 2.0 * random_gen(&p_rng); // make sure to allow h==1 let tangential = sqrt(1.0 - square(h)) * sample_circle(a); let dir = vec3(tangential.x, h, tangential.y); var ls = LightSample(); @@ -188,33 +198,33 @@ fn sample_light_from_sphere(rng: ptr) -> LightSample { return ls; } -fn sample_light_from_environment(rng: ptr) -> LightSample { +fn sample_light_from_environment() -> LightSample { let dim = textureDimensions(env_map, 0); - let es = generate_environment_sample(rng, dim); + let es = generate_environment_sample(&p_rng, dim); var ls = LightSample(); ls.pdf = es.pdf; // sample the incoming radiance ls.radiance = textureLoad(env_map, es.pixel, 0).xyz; // for determining direction - offset randomly within the texel // Note: this only works if the texels are sufficiently small - ls.uv = (vec2(es.pixel) + vec2(random_gen(rng), random_gen(rng))) / vec2(dim); + ls.uv = (vec2(es.pixel) + vec2(random_gen(&p_rng), random_gen(&p_rng))) / vec2(dim); return ls; } -fn read_surface(pixel: vec2) -> Surface { +fn read_prev_surface(pixel: vec2) -> Surface { var surface: Surface; - surface.basis = normalize(textureLoad(t_basis, pixel, 0)); - surface.flat_normal = normalize(textureLoad(t_flat_normal, pixel, 0).xyz); - surface.depth = textureLoad(t_depth, pixel, 0).x; + surface.basis = normalize(textureLoad(inout_basis, pixel)); + surface.flat_normal = normalize(textureLoad(inout_flat_normal, pixel).xyz); + surface.depth = textureLoad(inout_depth, pixel).x; return surface; } -fn read_prev_surface(pixel: vec2) -> Surface { - var surface: Surface; - surface.basis = normalize(textureLoad(t_prev_basis, pixel, 0)); - surface.flat_normal = normalize(textureLoad(t_prev_flat_normal, pixel, 0).xyz); - surface.depth = textureLoad(t_prev_depth, pixel, 0).x; - return surface; +fn thread_index_to_coord(thread_index: u32, group_id: vec3) -> vec2 { + let cluster_id = group_id.xy / parameters.grid_scale; + let cluster_offset = group_id.xy - cluster_id * parameters.grid_scale; + let local_id = vec2(thread_index % GROUP_SIZE.x, thread_index / GROUP_SIZE.x); + let global_id = (cluster_id * GROUP_SIZE + local_id) * parameters.grid_scale + cluster_offset; + return vec2(global_id); } fn evaluate_brdf(surface: Surface, dir: vec3) -> f32 { @@ -224,7 +234,7 @@ fn evaluate_brdf(surface: Surface, dir: vec3) -> f32 { return lambert_brdf * max(0.0, lambert_term); } -fn check_ray_occluded(acs: acceleration_structure, position: vec3, direction: vec3, debug_len: f32) -> bool { +fn check_ray_occluded(acs: acceleration_structure, position: vec3, direction: vec3) -> bool { var rq: ray_query; let flags = RAY_FLAG_TERMINATE_ON_FIRST_HIT | RAY_FLAG_CULL_NO_OPAQUE; rayQueryInitialize(&rq, acs, @@ -234,9 +244,9 @@ fn check_ray_occluded(acs: acceleration_structure, position: vec3, directio let intersection = rayQueryGetCommittedIntersection(&rq); let occluded = intersection.kind != RAY_QUERY_INTERSECTION_NONE; - if (debug_len != 0.0) { + if (DRAW_DEBUG && p_debug_len != 0.0) { let color = select(0xFFFFFFu, 0x0000FFu, occluded); - debug_line(position, position + debug_len * direction, color); + debug_line(position, position + p_debug_len * direction, color); } return occluded; } @@ -255,15 +265,6 @@ fn evaluate_reflected_light(surface: Surface, light_index: u32, light_uv: vec2, pos_world: vec3) -> vec2 { - if (USE_MOTION_VECTORS && parameters.use_motion_vectors != 0u) { - let motion = textureLoad(t_motion, pixel, 0).xy / MOTION_SCALE; - return vec2(pixel) + 0.5 + motion; - } else { - return get_projected_pixel_float(prev_camera, pos_world); - } -} - struct TargetScore { color: vec3, score: f32, @@ -274,7 +275,7 @@ fn make_target_score(color: vec3) -> TargetScore { } fn estimate_target_score_with_occlusion( - surface: Surface, position: vec3, light_index: u32, light_uv: vec2, acs: acceleration_structure, debug_len: f32 + surface: Surface, position: vec3, light_index: u32, light_uv: vec2, acs: acceleration_structure, ) -> TargetScore { if (light_index != 0u) { return TargetScore(); @@ -288,16 +289,16 @@ fn estimate_target_score_with_occlusion( return TargetScore(); } - if (check_ray_occluded(acs, position, direction, debug_len)) { + if (check_ray_occluded(acs, position, direction)) { return TargetScore(); - } else { - //Note: same as `evaluate_reflected_light` - let radiance = textureSampleLevel(env_map, sampler_nearest, light_uv, 0.0).xyz; - return make_target_score(brdf * radiance); } + + //Note: same as `evaluate_reflected_light` + let radiance = textureSampleLevel(env_map, sampler_nearest, light_uv, 0.0).xyz; + return make_target_score(brdf * radiance); } -fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debug_len: f32) -> f32 { +fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3) -> f32 { let dir = map_equirect_uv_to_dir(ls.uv); if (dot(dir, surface.flat_normal) <= 0.0) { return 0.0; @@ -313,211 +314,352 @@ fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debu return 0.0; } - if (check_ray_occluded(acc_struct, start_pos, dir, debug_len)) { + if (check_ray_occluded(acc_struct, start_pos, dir)) { return 0.0; } return brdf; } -struct HeuristicFactors { - weight: f32, - //history: f32, -} - -fn balance_heuristic(w0: f32, w1: f32, h0: f32, h1: f32) -> HeuristicFactors { - var hf: HeuristicFactors; - let balance_denom = h0 * w0 + h1 * w1; - hf.weight = select(h0 * w0 / balance_denom, 0.0, balance_denom <= 0.0); - //hf.history = select(pow(clamp(w1 / w0, 0.0, 1.0), 8.0), 1.0, w0 <= 0.0); - return hf; -} - -struct RestirOutput { - radiance: vec3, -} - -fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, enable_debug: bool) -> RestirOutput { - if (debug.view_mode == DebugMode_Depth) { - textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); - } - let ray_dir = get_ray_direction(camera, pixel); - let pixel_index = get_reservoir_index(pixel, camera); - if (surface.depth == 0.0) { - reservoirs[pixel_index] = StoredReservoir(); - let env = evaluate_environment(ray_dir); - return RestirOutput(env); - } - - let debug_len = select(0.0, surface.depth * 0.2, enable_debug); - let position = camera.position + surface.depth * ray_dir; - let normal = qrot(surface.basis, vec3(0.0, 0.0, 1.0)); - if (debug.view_mode == DebugMode_Normal) { - textureStore(out_debug, pixel, vec4(normal, 0.0)); - } - - var canonical = LiveReservoir(); +fn produce_canonical( + surface: Surface, position: vec3, +) -> LiveReservoir { + var reservoir = LiveReservoir(); for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { var ls: LightSample; if (parameters.environment_importance_sampling != 0u) { - ls = sample_light_from_environment(rng); + ls = sample_light_from_environment(); } else { - ls = sample_light_from_sphere(rng); + ls = sample_light_from_sphere(); } - let brdf = evaluate_sample(ls, surface, position, debug_len); + let brdf = evaluate_sample(ls, surface, position); if (brdf > 0.0) { let other = make_reservoir(ls, 0u, vec3(brdf)); - merge_reservoir(&canonical, other, random_gen(rng)); + merge_reservoir(&reservoir, other); } else { - bump_reservoir(&canonical, 1.0); + bump_reservoir(&reservoir, 1.0); } } + return reservoir; +} - //TODO: find best match in a 2x2 grid - let prev_pixel = vec2(get_prev_pixel(pixel, position)); +struct TemporalReprojection { + is_valid: bool, + pixel: vec2, + surface: Surface, + reservoir: StoredReservoir, +} - // First, gather the list of reservoirs to merge with - var accepted_reservoir_indices = array(); - var accepted_count = 0u; - var temporal_index = ~0u; - for (var tap = 0u; tap <= parameters.spatial_taps; tap += 1u) { - var other_pixel = prev_pixel; - if (tap != 0u) { - let r0 = max(prev_pixel - vec2(parameters.spatial_radius), vec2(0)); - let r1 = min(prev_pixel + vec2(parameters.spatial_radius + 1), vec2(prev_camera.target_size)); - other_pixel = vec2(mix(vec2(r0), vec2(r1), vec2(random_gen(rng), random_gen(rng)))); - } else if (parameters.temporal_tap == 0u) - { - continue; - } +fn find_temporal(surface: Surface, pixel: vec2, center_coord: vec2) -> TemporalReprojection { + var tr = TemporalReprojection(); + tr.is_valid = false; + if (surface.depth == 0.0) { + return tr; + } + + // Find best match in a 2x2 grid + let center_pixel = vec2(center_coord); + // Trick to start with closer pixels + let center_sum = vec2(center_coord - 0.5) + vec2(center_coord + 0.5); + var prev_pixels = array, 4>( + center_pixel.xy, + vec2(center_sum.x - center_pixel.x, center_pixel.y), + center_sum - center_pixel, + vec2(center_pixel.x, center_sum.y - center_pixel.y), + ); - let other_index = get_reservoir_index(other_pixel, prev_camera); - if (other_index < 0) { + for (var i = 0; i < 4 && !tr.is_valid; i += 1) { + tr.pixel = prev_pixels[i]; + let prev_reservoir_index = get_reservoir_index(tr.pixel, prev_camera); + if (prev_reservoir_index < 0) { continue; } - if (prev_reservoirs[other_index].confidence == 0.0) { + tr.reservoir = reservoirs[prev_reservoir_index]; + if (tr.reservoir.confidence == 0.0) { continue; } - - let other_surface = read_prev_surface(other_pixel); - let compatibility = compare_surfaces(surface, other_surface); - if (compatibility < 0.1) { - // if the surfaces are too different, there is no trust in this sample + tr.surface = read_prev_surface(tr.pixel); + if (compare_surfaces(surface, tr.surface) < 0.1) { continue; } - - if (tap == 0u) { - temporal_index = accepted_count; + tr.is_valid = true; + + if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_Reprojection) { + var colors = array, 4>( + vec3(1.0, 1.0, 1.0), + vec3(1.0, 0.0, 0.0), + vec3(0.0, 1.0, 0.0), + vec3(0.0, 0.0, 1.0), + ); + textureStore(out_debug, pixel, vec4(colors[i], 1.0)); } - accepted_reservoir_indices[accepted_count] = other_index; - if (accepted_count < MAX_RESERVOIRS) { - accepted_count += 1u; + } + return tr; +} + +struct ResampleBase { + surface: Surface, + canonical: LiveReservoir, + world_pos: vec3, + accepted_count: f32, +} +struct ResampleResult { + selected: bool, + mis_canonical: f32, + mis_sample: f32, +} + +fn ratio(a: f32, b: f32) -> f32 { + return select(0.0, a / (a+b), a+b > 0.0); +} + +// Resample following Algorithm 8 in section 9.1 of Bitterli thesis +fn resample( + dst: ptr, color_and_weight: ptr>, + base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, + max_confidence: f32, +) -> ResampleResult { + var src: LiveReservoir; + let neighbor = other.reservoir; + var rr = ResampleResult(); + if (parameters.use_pairwise_mis != 0u) { + let canonical = base.canonical; + let neighbor_history = min(neighbor.confidence, max_confidence); + let mis_scale = 1.0 / (base.accepted_count + parameters.defensive_mis); + { // scoping this to hint the register allocation + let t_canonical_at_neighbor = estimate_target_score_with_occlusion( + other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs); + let r_canonical = ratio(canonical.history * canonical.selected_target_score / base.accepted_count, neighbor_history * t_canonical_at_neighbor.score); + rr.mis_canonical = mis_scale * (parameters.defensive_mis / base.accepted_count + r_canonical); } + + let t_neighbor_at_canonical = estimate_target_score_with_occlusion( + base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct); + let r_neighbor = ratio(neighbor_history * neighbor.target_score, canonical.history * t_neighbor_at_canonical.score / base.accepted_count); + rr.mis_sample = mis_scale * r_neighbor; + + src.history = neighbor_history; + src.selected_light_index = neighbor.light_index; + src.selected_uv = neighbor.light_uv; + src.selected_target_score = t_neighbor_at_canonical.score; + src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * rr.mis_sample; + src.radiance = t_neighbor_at_canonical.color; + } else { + rr.mis_canonical = 1.0; + rr.mis_sample = 1.0; + let radiance = evaluate_reflected_light(base.surface, neighbor.light_index, neighbor.light_uv); + src = unpack_reservoir(neighbor, max_confidence, radiance); + } + + if (DECOUPLED_SHADING) { + *color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); + } + if (src.weight_sum <= 0.0) { + bump_reservoir(dst, src.history); + } else { + merge_reservoir(dst, src); + rr.selected = true; + } + return rr; +} + +struct ResampleOutput { + reservoir: StoredReservoir, + color: vec3, +} + +fn revive_canonical(ro: ResampleOutput) -> LiveReservoir { + let radiance = select(vec3(0.0), ro.color / ro.reservoir.contribution_weight, ro.reservoir.contribution_weight > 0.0); + return unpack_reservoir(ro.reservoir, 100.0, radiance); +} + +fn finalize_canonical(reservoir: LiveReservoir) -> ResampleOutput { + var ro = ResampleOutput(); + ro.reservoir = pack_reservoir(reservoir); + ro.color = ro.reservoir.contribution_weight * reservoir.radiance; + return ro; +} + +fn finalize_resampling( + reservoir: ptr, color_and_weight: ptr>, + base: ResampleBase, mis_canonical: f32, +) -> ResampleOutput { + var canonical = base.canonical; + if (parameters.use_pairwise_mis != 0u) { + canonical.weight_sum *= mis_canonical / canonical.history; + } + merge_reservoir(reservoir, canonical); + + let effective_history = select((*reservoir).history, 1.0, parameters.use_pairwise_mis != 0u); + var ro = ResampleOutput(); + ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); + + if (DECOUPLED_SHADING) { + if (canonical.selected_target_score > 0.0) { + let contribution_weight = canonical.weight_sum / canonical.selected_target_score; + *color_and_weight += canonical.weight_sum * vec4(contribution_weight * canonical.radiance, 1.0); + } + ro.color = (*color_and_weight).xyz / max((*color_and_weight).w, 0.001); + } else { + ro.color = ro.reservoir.contribution_weight * (*reservoir).radiance; + } + return ro; +} + +fn resample_temporal( + surface: Surface, cur_pixel: vec2, position: vec3, + local_index: u32, tr: TemporalReprojection, +) -> ResampleOutput { + if (surface.depth == 0.0) { + return ResampleOutput(); + } + + let canonical = produce_canonical(surface, position); + if (parameters.temporal_tap == 0u || !tr.is_valid) { + return finalize_canonical(canonical); } - // Next, evaluate the MIS of each of the samples versus the canonical one. var reservoir = LiveReservoir(); - var shaded_color = vec3(0.0); - var mis_canonical = BASE_CANONICAL_MIS; var color_and_weight = vec4(0.0); - for (var rid = 0u; rid < accepted_count; rid += 1u) { - let neighbor_index = accepted_reservoir_indices[rid]; - let neighbor = prev_reservoirs[neighbor_index]; - - let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, rid == temporal_index); - var other: LiveReservoir; - if (PAIRWISE_MIS) { - let neighbor_pixel = get_pixel_from_reservoir_index(neighbor_index, prev_camera); - let neighbor_history = min(neighbor.confidence, f32(max_history)); - { // scoping this to hint the register allocation - let neighbor_surface = read_prev_surface(neighbor_pixel); - let neighbor_dir = get_ray_direction(prev_camera, neighbor_pixel); - let neighbor_position = prev_camera.position + neighbor_surface.depth * neighbor_dir; - - let t_canonical_at_neighbor = estimate_target_score_with_occlusion( - neighbor_surface, neighbor_position, canonical.selected_light_index, canonical.selected_uv, prev_acc_struct, debug_len); - let mis_sub_canonical = balance_heuristic( - t_canonical_at_neighbor.score, canonical.selected_target_score, - neighbor_history * f32(accepted_count), canonical.history); - mis_canonical += 1.0 - mis_sub_canonical.weight; - } - - // Notes about t_neighbor_at_neighbor: - // 1. we assume lights aren't moving. Technically we should check if the - // target light has moved, and re-evaluate the occlusion. - // 2. we can use the cached target score, and there is no use of the target color - //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); - let t_neighbor_at_canonical = estimate_target_score_with_occlusion( - surface, position, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); - let mis_neighbor = balance_heuristic( - neighbor.target_score, t_neighbor_at_canonical.score, - neighbor_history * f32(accepted_count), canonical.history); - - other.history = neighbor_history; - other.selected_light_index = neighbor.light_index; - other.selected_uv = neighbor.light_uv; - other.selected_target_score = t_neighbor_at_canonical.score; - other.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; - //Note: should be needed according to the paper - // other.history *= min(mis_neighbor.history, mis_sub_canonical.history); - other.radiance = t_neighbor_at_canonical.color; - } else { - other = unpack_reservoir(neighbor, max_history); - other.radiance = evaluate_reflected_light(surface, other.selected_light_index, other.selected_uv); - } + let base = ResampleBase(surface, canonical, position, 1.0); - if (DECOUPLED_SHADING) { - color_and_weight += other.weight_sum * vec4(neighbor.contribution_weight * other.radiance, 1.0); + let prev_dir = get_ray_direction(prev_camera, tr.pixel); + let prev_world_pos = prev_camera.position + tr.surface.depth * prev_dir; + let other = PixelCache(tr.surface, tr.reservoir, prev_world_pos); + let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_tap_confidence); + let mis_canonical = rr.mis_canonical; + + if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_TemporalMatch) { + textureStore(out_debug, cur_pixel, vec4(1.0)); + } + if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_TemporalMisCanonical) { + textureStore(out_debug, cur_pixel, vec4(mis_canonical)); + } + + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical); +} + +fn resample_spatial( + surface: Surface, cur_pixel: vec2, position: vec3, + group_id: vec3, canonical: LiveReservoir, +) -> ResampleOutput { + if (surface.depth == 0.0) { + let dir = normalize(position - camera.position); + var ro = ResampleOutput(); + ro.color = evaluate_environment(dir); + return ro; + } + + // gather the list of neighbors (within the workgroup) to resample. + var accepted_count = 0u; + var accepted_local_indices = array(); + let max_accepted = min(MAX_RESAMPLE, parameters.spatial_taps); + let num_candidates = parameters.spatial_taps * 4u; + for (var i = 0u; i < num_candidates && accepted_count < max_accepted; i += 1u) { + let other_cache_index = random_u32(&p_rng) % GROUP_SIZE_TOTAL; + let diff = thread_index_to_coord(other_cache_index, group_id) - cur_pixel; + if (dot(diff, diff) < parameters.spatial_min_distance * parameters.spatial_min_distance) { + continue; } - if (other.weight_sum <= 0.0) { - bump_reservoir(&reservoir, other.history); - } else { - merge_reservoir(&reservoir, other, random_gen(rng)); + let other = pixel_cache[other_cache_index]; + // if the surfaces are too different, there is no trust in this sample + if (other.reservoir.confidence > 0.0 && compare_surfaces(surface, other.surface) > 0.1) { + accepted_local_indices[accepted_count] = other_cache_index; + accepted_count += 1u; } } - // Finally, merge in the canonical sample - if (PAIRWISE_MIS) { - canonical.weight_sum *= mis_canonical / canonical.history; - } - if (DECOUPLED_SHADING) { - //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? - let cw = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); - color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); + var reservoir = LiveReservoir(); + var color_and_weight = vec4(0.0); + let base = ResampleBase(surface, canonical, position, f32(accepted_count)); + var mis_canonical = f32(accepted_count == 0u); + + // evaluate the MIS of each of the samples versus the canonical one. + for (var lid = 0u; lid < accepted_count; lid += 1u) { + let other = pixel_cache[accepted_local_indices[lid]]; + let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_confidence); + mis_canonical += rr.mis_canonical; } - merge_reservoir(&reservoir, canonical, random_gen(rng)); - let effective_history = select(reservoir.history, BASE_CANONICAL_MIS + f32(accepted_count), PAIRWISE_MIS); - let stored = pack_reservoir_detail(reservoir, effective_history); - reservoirs[pixel_index] = stored; - var ro = RestirOutput(); - if (DECOUPLED_SHADING) { - ro.radiance = color_and_weight.xyz / max(color_and_weight.w, 0.001); - } else { - ro.radiance = stored.contribution_weight * reservoir.radiance; + if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_SpatialMatch) { + let value = base.accepted_count / max(1.0, f32(parameters.spatial_taps)); + textureStore(out_debug, cur_pixel, vec4(value)); } - return ro; + if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_SpatialMisCanonical) { + textureStore(out_debug, cur_pixel, vec4(mis_canonical)); + } + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical); } -@compute @workgroup_size(8, 4) -fn main(@builtin(global_invocation_id) global_id: vec3) { - if (any(global_id.xy >= camera.target_size)) { +fn compute_restir( + rs: RichSurface, pixel: vec2, local_index: u32, group_id: vec3, +) -> vec3 { + let center_coord = vec2(pixel) + 0.5 + select(vec2(0.0), rs.motion, parameters.use_motion_vectors != 0u); + let tr = find_temporal(rs.inner, pixel, center_coord); + let motion_sqr = dot(rs.motion, rs.motion); + + let temporal = resample_temporal(rs.inner, pixel, rs.position, local_index, tr); + pixel_cache[local_index] = PixelCache(rs.inner, temporal.reservoir, rs.position); + var prev_pixel = select(vec2(-1), tr.pixel, tr.is_valid); + + // sync with the workgroup to ensure all reservoirs are available. + workgroupBarrier(); + + let temporal_live = revive_canonical(temporal); + let spatial = resample_spatial(rs.inner, pixel, rs.position, group_id, temporal_live); + + let pixel_index = get_reservoir_index(pixel, camera); + reservoirs[pixel_index] = spatial.reservoir; + + accumulate_temporal(pixel, spatial.color, parameters.temporal_accumulation_weight, prev_pixel, motion_sqr); + return spatial.color; +} + +@compute @workgroup_size(GROUP_SIZE.x, GROUP_SIZE.y) +fn main( + @builtin(workgroup_id) group_id: vec3, + @builtin(local_invocation_index) local_index: u32, +) { + pixel_cache[local_index] = PixelCache(); + let pixel_coord = thread_index_to_coord(local_index, group_id); + if (any(vec2(pixel_coord) >= camera.target_size)) { return; } - let global_index = global_id.y * camera.target_size.x + global_id.x; - var rng = random_init(global_index, parameters.frame_index); + if (WRITE_DEBUG_IMAGE) { + var default_color = vec3(0.0); + if (debug.view_mode == DebugMode_Grouping) { + p_rng = random_init(group_id.y * 1000u + group_id.x, 0u); + let h = random_gen(&p_rng) * 360.0; + default_color = hsv_to_rgb(h, 1.0, 1.0); + } + textureStore(out_debug, pixel_coord, vec4(default_color, 0.0)); + } + + let enable_debug = DRAW_DEBUG && all(pixel_coord == vec2(debug.mouse_pos)); + let rs = fetch_geometry(pixel_coord, true, enable_debug); + + let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); + p_rng = random_init(global_index, parameters.frame_index); - let surface = read_surface(vec2(global_id.xy)); - let enable_debug = all(global_id.xy == debug.mouse_pos); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let ro = compute_restir(surface, vec2(global_id.xy), &rng, enable_restir_debug); - let color = ro.radiance; + p_debug_len = select(0.0, rs.inner.depth * 0.2, enable_restir_debug); + let color = compute_restir(rs, pixel_coord, local_index, group_id); + + //Note: important to do this after the temporal pass specifically + // TODO: option to avoid writing data for the sky + textureStore(inout_depth, pixel_coord, vec4(rs.inner.depth, 0.0, 0.0, 0.0)); + textureStore(inout_basis, pixel_coord, rs.inner.basis); + textureStore(inout_flat_normal, pixel_coord, vec4(rs.inner.flat_normal, 0.0)); + textureStore(out_albedo, pixel_coord, vec4(rs.albedo, 0.0)); + if (WRITE_MOTION_VECTORS) { + textureStore(out_motion, pixel_coord, vec4(rs.motion * MOTION_SCALE, 0.0, 0.0)); + } + if (enable_debug) { debug_buf.variance.color_sum += color; debug_buf.variance.color2_sum += color * color; debug_buf.variance.count += 1u; } - textureStore(out_diffuse, global_id.xy, vec4(color, 1.0)); } diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index c856538f..4dd56365 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -51,7 +51,13 @@ pub enum DebugMode { Normal = 2, Motion = 3, HitConsistency = 4, - Variance = 5, + Grouping = 5, + Reprojection = 6, + TemporalMatch = 10, + TemporalMisCanonical = 11, + SpatialMatch = 12, + SpatialMisCanonical = 13, + Variance = 100, } impl Default for DebugMode { @@ -90,15 +96,24 @@ pub struct RayConfig { pub num_environment_samples: u32, pub environment_importance_sampling: bool, pub temporal_tap: bool, - pub temporal_history: u32, + pub temporal_confidence: f32, pub spatial_taps: u32, - pub spatial_tap_history: u32, - pub spatial_radius: u32, + pub spatial_confidence: f32, + /// Minimal distance to a spatially reused pixel (in the current frame). + pub spatial_min_distance: u32, + /// Scale and mix the groups into clusters, to allow spatial samples to mix + /// outside of the original workgroup pixel bounds. + pub group_mixer: u32, pub t_start: f32, + /// See "9.1 pairwise mis for robust reservoir reuse" + /// "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" + pub pairwise_mis: bool, + pub defensive_mis: f32, } #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] pub struct DenoiserConfig { + pub enabled: bool, pub num_passes: u32, pub temporal_weight: f32, } @@ -199,13 +214,15 @@ impl RenderTarget { } struct RestirTargets { - reservoir_buf: [blade_graphics::Buffer; 2], + reservoir_buf: blade_graphics::Buffer, debug: RenderTarget<1>, - depth: RenderTarget<2>, - basis: RenderTarget<2>, - flat_normal: RenderTarget<2>, + depth: RenderTarget<1>, + basis: RenderTarget<1>, + flat_normal: RenderTarget<1>, albedo: RenderTarget<1>, motion: RenderTarget<1>, + // One stores the ReSTIR output color, + // another 2 are used for a-trous ping-pong. light_diffuse: RenderTarget<3>, camera_params: [CameraParams; 2], } @@ -218,19 +235,16 @@ impl RestirTargets { gpu: &blade_graphics::Context, ) -> Self { let total_reservoirs = size.width as usize * size.height as usize; - let mut reservoir_buf = [blade_graphics::Buffer::default(); 2]; - for (i, rb) in reservoir_buf.iter_mut().enumerate() { - *rb = gpu.create_buffer(blade_graphics::BufferDesc { - name: &format!("reservoirs{i}"), - size: reservoir_size as u64 * total_reservoirs as u64, - memory: blade_graphics::Memory::Device, - }); - } + let reservoir_buf = gpu.create_buffer(blade_graphics::BufferDesc { + name: "reservoirs", + size: reservoir_size as u64 * total_reservoirs as u64, + memory: blade_graphics::Memory::Device, + }); Self { reservoir_buf, debug: RenderTarget::new( - "deubg", + "debug", blade_graphics::TextureFormat::Rgba8Unorm, size, encoder, @@ -277,9 +291,7 @@ impl RestirTargets { } fn destroy(&self, gpu: &blade_graphics::Context) { - for rb in self.reservoir_buf.iter() { - gpu.destroy_buffer(*rb); - } + gpu.destroy_buffer(self.reservoir_buf); self.debug.destroy(gpu); self.depth.destroy(gpu); self.basis.destroy(gpu); @@ -291,7 +303,6 @@ impl RestirTargets { } struct Blur { - temporal_accum_pipeline: blade_graphics::ComputePipeline, atrous_pipeline: blade_graphics::ComputePipeline, } @@ -307,8 +318,6 @@ struct Blur { pub struct Renderer { shaders: Shaders, targets: RestirTargets, - post_proc_input_index: usize, - fill_pipeline: blade_graphics::ComputePipeline, main_pipeline: blade_graphics::ComputePipeline, post_proc_pipeline: blade_graphics::RenderPipeline, blur: Blur, @@ -360,57 +369,43 @@ struct MainParams { num_environment_samples: u32, environment_importance_sampling: u32, temporal_tap: u32, - temporal_history: u32, + temporal_confidence: f32, spatial_taps: u32, - spatial_tap_history: u32, - spatial_radius: u32, + spatial_confidence: f32, + spatial_min_distance: u32, t_start: f32, + use_pairwise_mis: u32, + defensive_mis: f32, use_motion_vectors: u32, + temporal_accumulation_weight: f32, + pad: u32, + grid_scale: [u32; 2], } #[derive(blade_macros::ShaderData)] -struct FillData<'a> { +struct MainData<'a> { camera: CameraParams, prev_camera: CameraParams, debug: DebugParams, + parameters: MainParams, acc_struct: blade_graphics::AccelerationStructure, + prev_acc_struct: blade_graphics::AccelerationStructure, hit_entries: blade_graphics::BufferPiece, index_buffers: &'a blade_graphics::BufferArray, vertex_buffers: &'a blade_graphics::BufferArray, textures: &'a blade_graphics::TextureArray, sampler_linear: blade_graphics::Sampler, - debug_buf: blade_graphics::BufferPiece, - out_depth: blade_graphics::TextureView, - out_basis: blade_graphics::TextureView, - out_flat_normal: blade_graphics::TextureView, - out_albedo: blade_graphics::TextureView, - out_motion: blade_graphics::TextureView, - out_debug: blade_graphics::TextureView, -} - -#[derive(blade_macros::ShaderData)] -struct MainData { - camera: CameraParams, - prev_camera: CameraParams, - debug: DebugParams, - parameters: MainParams, - acc_struct: blade_graphics::AccelerationStructure, - prev_acc_struct: blade_graphics::AccelerationStructure, - sampler_linear: blade_graphics::Sampler, sampler_nearest: blade_graphics::Sampler, env_map: blade_graphics::TextureView, env_weights: blade_graphics::TextureView, - t_depth: blade_graphics::TextureView, - t_prev_depth: blade_graphics::TextureView, - t_basis: blade_graphics::TextureView, - t_prev_basis: blade_graphics::TextureView, - t_flat_normal: blade_graphics::TextureView, - t_prev_flat_normal: blade_graphics::TextureView, - t_motion: blade_graphics::TextureView, debug_buf: blade_graphics::BufferPiece, reservoirs: blade_graphics::BufferPiece, - prev_reservoirs: blade_graphics::BufferPiece, - out_diffuse: blade_graphics::TextureView, + inout_depth: blade_graphics::TextureView, + inout_basis: blade_graphics::TextureView, + inout_flat_normal: blade_graphics::TextureView, + out_albedo: blade_graphics::TextureView, + out_motion: blade_graphics::TextureView, + inout_diffuse: blade_graphics::TextureView, out_debug: blade_graphics::TextureView, } @@ -418,27 +413,10 @@ struct MainData { #[derive(Clone, Copy, bytemuck::Zeroable, bytemuck::Pod)] struct BlurParams { extent: [u32; 2], - temporal_weight: f32, iteration: i32, - use_motion_vectors: u32, pad: u32, } -#[derive(blade_macros::ShaderData)] -struct TemporalAccumData { - camera: CameraParams, - prev_camera: CameraParams, - params: BlurParams, - input: blade_graphics::TextureView, - prev_input: blade_graphics::TextureView, - t_depth: blade_graphics::TextureView, - t_prev_depth: blade_graphics::TextureView, - t_flat_normal: blade_graphics::TextureView, - t_prev_flat_normal: blade_graphics::TextureView, - t_motion: blade_graphics::TextureView, - output: blade_graphics::TextureView, -} - #[derive(blade_macros::ShaderData)] struct AtrousData { params: BlurParams, @@ -487,9 +465,8 @@ struct HitEntry { #[derive(Clone, PartialEq)] pub struct Shaders { env_prepare: blade_asset::Handle, - fill_gbuf: blade_asset::Handle, ray_trace: blade_asset::Handle, - blur: blade_asset::Handle, + a_trous: blade_asset::Handle, post_proc: blade_asset::Handle, debug_draw: blade_asset::Handle, debug_blit: blade_asset::Handle, @@ -500,9 +477,8 @@ impl Shaders { let mut ctx = asset_hub.open_context(path, "shader finish"); let shaders = Self { env_prepare: ctx.load_shader("env-prepare.wgsl"), - fill_gbuf: ctx.load_shader("fill-gbuf.wgsl"), ray_trace: ctx.load_shader("ray-trace.wgsl"), - blur: ctx.load_shader("blur.wgsl"), + a_trous: ctx.load_shader("a-trous.wgsl"), post_proc: ctx.load_shader("post-proc.wgsl"), debug_draw: ctx.load_shader("debug-draw.wgsl"), debug_blit: ctx.load_shader("debug-blit.wgsl"), @@ -512,29 +488,14 @@ impl Shaders { } struct ShaderPipelines { - fill: blade_graphics::ComputePipeline, main: blade_graphics::ComputePipeline, - temporal_accum: blade_graphics::ComputePipeline, - atrous: blade_graphics::ComputePipeline, + a_trous: blade_graphics::ComputePipeline, post_proc: blade_graphics::RenderPipeline, env_prepare: blade_graphics::ComputePipeline, reservoir_size: u32, } impl ShaderPipelines { - fn create_gbuf_fill( - shader: &blade_graphics::Shader, - gpu: &blade_graphics::Context, - ) -> blade_graphics::ComputePipeline { - shader.check_struct_size::(); - shader.check_struct_size::(); - let layout = ::layout(); - gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { - name: "fill-gbuf", - data_layouts: &[&layout], - compute: shader.at("main"), - }) - } fn create_ray_trace( shader: &blade_graphics::Shader, gpu: &blade_graphics::Context, @@ -545,32 +506,26 @@ impl ShaderPipelines { shader.check_struct_size::(); shader.check_struct_size::(); let layout = ::layout(); - gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { + let pipeline = gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { name: "ray-trace", data_layouts: &[&layout], compute: shader.at("main"), - }) - } + }); - fn create_temporal_accum( - shader: &blade_graphics::Shader, - gpu: &blade_graphics::Context, - ) -> blade_graphics::ComputePipeline { - let layout = ::layout(); - gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { - name: "temporal-accum", - data_layouts: &[&layout], - compute: shader.at("temporal_accum"), - }) + let pl_struct_size = shader.get_struct_size("PixelCache"); + let group_size = pipeline.get_workgroup_size(); + let wg_required = pl_struct_size * group_size[0] * group_size[1]; + log::info!("Using {} workgroup memory for RT", wg_required); + pipeline } - fn create_atrous( + fn create_a_trous( shader: &blade_graphics::Shader, gpu: &blade_graphics::Context, ) -> blade_graphics::ComputePipeline { let layout = ::layout(); gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { - name: "atrous", + name: "a-trous", data_layouts: &[&layout], compute: shader.at("atrous3x3"), }) @@ -604,12 +559,10 @@ impl ShaderPipelines { shader_man: &blade_asset::AssetManager, ) -> Result { let sh_main = shader_man[shaders.ray_trace].raw.as_ref().unwrap(); - let sh_blur = shader_man[shaders.blur].raw.as_ref().unwrap(); + let sh_atrous = shader_man[shaders.a_trous].raw.as_ref().unwrap(); Ok(Self { - fill: Self::create_gbuf_fill(shader_man[shaders.fill_gbuf].raw.as_ref().unwrap(), gpu), main: Self::create_ray_trace(sh_main, gpu), - temporal_accum: Self::create_temporal_accum(sh_blur, gpu), - atrous: Self::create_atrous(sh_blur, gpu), + a_trous: Self::create_a_trous(sh_atrous, gpu), post_proc: Self::create_post_proc( shader_man[shaders.post_proc].raw.as_ref().unwrap(), config.surface_info, @@ -639,6 +592,11 @@ pub struct FrameResources { pub acceleration_structures: Vec, } +#[derive(Debug, Default)] +pub struct FrameKey { + post_proc_input_index: usize, +} + impl Renderer { /// Create a new renderer with a given configuration. /// @@ -696,13 +654,10 @@ impl Renderer { Self { shaders, targets, - post_proc_input_index: 0, - fill_pipeline: sp.fill, main_pipeline: sp.main, post_proc_pipeline: sp.post_proc, blur: Blur { - temporal_accum_pipeline: sp.temporal_accum, - atrous_pipeline: sp.atrous, + atrous_pipeline: sp.a_trous, }, acceleration_structure: blade_graphics::AccelerationStructure::default(), prev_acceleration_structure: blade_graphics::AccelerationStructure::default(), @@ -742,9 +697,7 @@ impl Renderer { gpu.destroy_sampler(self.samplers.nearest); gpu.destroy_sampler(self.samplers.linear); // pipelines - gpu.destroy_compute_pipeline(&mut self.blur.temporal_accum_pipeline); gpu.destroy_compute_pipeline(&mut self.blur.atrous_pipeline); - gpu.destroy_compute_pipeline(&mut self.fill_pipeline); gpu.destroy_compute_pipeline(&mut self.main_pipeline); gpu.destroy_render_pipeline(&mut self.post_proc_pipeline); } @@ -759,9 +712,8 @@ impl Renderer { let mut tasks = Vec::new(); let old = self.shaders.clone(); - tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.fill_gbuf)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.ray_trace)); - tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.blur)); + tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.a_trous)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.post_proc)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.debug_draw)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.debug_blit)); @@ -776,11 +728,6 @@ impl Renderer { let _ = task.join(); } - if self.shaders.fill_gbuf != old.fill_gbuf { - if let Ok(ref shader) = asset_hub.shaders[self.shaders.fill_gbuf].raw { - self.fill_pipeline = ShaderPipelines::create_gbuf_fill(shader, gpu); - } - } if self.shaders.ray_trace != old.ray_trace { if let Ok(ref shader) = asset_hub.shaders[self.shaders.ray_trace].raw { assert_eq!( @@ -790,11 +737,9 @@ impl Renderer { self.main_pipeline = ShaderPipelines::create_ray_trace(shader, gpu); } } - if self.shaders.blur != old.blur { - if let Ok(ref shader) = asset_hub.shaders[self.shaders.blur].raw { - self.blur.temporal_accum_pipeline = - ShaderPipelines::create_temporal_accum(shader, gpu); - self.blur.atrous_pipeline = ShaderPipelines::create_atrous(shader, gpu); + if self.shaders.a_trous != old.a_trous { + if let Ok(ref shader) = asset_hub.shaders[self.shaders.a_trous].raw { + self.blur.atrous_pipeline = ShaderPipelines::create_a_trous(shader, gpu); } } if self.shaders.post_proc != old.post_proc { @@ -1089,20 +1034,17 @@ impl Renderer { self.debug.reset_lines(&mut transfer); } let total_reservoirs = self.surface_size.width as u64 * self.surface_size.height as u64; - for reservoir_buf in self.targets.reservoir_buf.iter() { - transfer.fill_buffer( - reservoir_buf.at(0), - total_reservoirs * self.reservoir_size as u64, - 0, - ); - } + transfer.fill_buffer( + self.targets.reservoir_buf.at(0), + total_reservoirs * self.reservoir_size as u64, + 0, + ); } if !config.frozen { self.frame_index += 1; } self.targets.camera_params[self.frame_index % 2] = self.make_camera_params(camera); - self.post_proc_input_index = self.frame_index % 2; } /// Ray trace the scene. @@ -1114,40 +1056,33 @@ impl Renderer { command_encoder: &mut blade_graphics::CommandEncoder, debug_config: DebugConfig, ray_config: RayConfig, - ) { + denoiser_config: DenoiserConfig, + ) -> FrameKey { let debug = self.make_debug_params(&debug_config); let (cur, prev) = self.work_indices(); + let mut post_proc_input_index = 0; if let mut pass = command_encoder.compute() { - let mut pc = pass.with(&self.fill_pipeline); - let groups = self.fill_pipeline.get_dispatch_for(self.surface_size); - pc.bind( - 0, - &FillData { - camera: self.targets.camera_params[cur], - prev_camera: self.targets.camera_params[prev], - debug, - acc_struct: self.acceleration_structure, - hit_entries: self.hit_buffer.into(), - index_buffers: &self.index_buffers, - vertex_buffers: &self.vertex_buffers, - textures: &self.textures, - sampler_linear: self.samplers.linear, - debug_buf: self.debug.buffer_resource(), - out_depth: self.targets.depth.views[cur], - out_basis: self.targets.basis.views[cur], - out_flat_normal: self.targets.flat_normal.views[cur], - out_albedo: self.targets.albedo.views[0], - out_motion: self.targets.motion.views[0], - out_debug: self.targets.debug.views[0], - }, - ); - pc.dispatch(groups); - } - - if let mut pass = command_encoder.compute() { + let grid_scale = { + let limit = ray_config.group_mixer; + let r = self.frame_index as u32 ^ 0x5A; + [r % limit + 1, (r / limit) % limit + 1] + }; + let groups = { + let wg_size = self.main_pipeline.get_workgroup_size(); + let cluster_size = [ + wg_size[0] * grid_scale[0], + wg_size[1] * grid_scale[1], + wg_size[2], + ]; + let clusters = self.surface_size.group_by(cluster_size); + [ + clusters[0] * grid_scale[0], + clusters[1] * grid_scale[1], + clusters[2], + ] + }; let mut pc = pass.with(&self.main_pipeline); - let groups = self.main_pipeline.get_dispatch_for(self.surface_size); pc.bind( 0, &MainData { @@ -1160,12 +1095,21 @@ impl Renderer { environment_importance_sampling: ray_config.environment_importance_sampling as u32, temporal_tap: ray_config.temporal_tap as u32, - temporal_history: ray_config.temporal_history, + temporal_confidence: ray_config.temporal_confidence, spatial_taps: ray_config.spatial_taps, - spatial_tap_history: ray_config.spatial_tap_history, - spatial_radius: ray_config.spatial_radius, + spatial_confidence: ray_config.spatial_confidence, + spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, + use_pairwise_mis: ray_config.pairwise_mis as u32, + defensive_mis: ray_config.defensive_mis, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, + temporal_accumulation_weight: if denoiser_config.enabled { + denoiser_config.temporal_weight + } else { + 1.0 + }, + pad: 0, + grid_scale, }, acc_struct: self.acceleration_structure, prev_acc_struct: if self.frame_scene_built < self.frame_index @@ -1176,95 +1120,61 @@ impl Renderer { } else { self.prev_acceleration_structure }, + hit_entries: self.hit_buffer.into(), + index_buffers: &self.index_buffers, + vertex_buffers: &self.vertex_buffers, + textures: &self.textures, sampler_linear: self.samplers.linear, sampler_nearest: self.samplers.nearest, env_map: self.env_map.main_view, env_weights: self.env_map.weight_view, - t_depth: self.targets.depth.views[cur], - t_prev_depth: self.targets.depth.views[prev], - t_basis: self.targets.basis.views[cur], - t_prev_basis: self.targets.basis.views[prev], - t_flat_normal: self.targets.flat_normal.views[cur], - t_prev_flat_normal: self.targets.flat_normal.views[prev], - t_motion: self.targets.motion.views[0], debug_buf: self.debug.buffer_resource(), - reservoirs: self.targets.reservoir_buf[cur].into(), - prev_reservoirs: self.targets.reservoir_buf[prev].into(), - out_diffuse: self.targets.light_diffuse.views[cur], + reservoirs: self.targets.reservoir_buf.into(), + inout_depth: self.targets.depth.views[0], + inout_basis: self.targets.basis.views[0], + inout_flat_normal: self.targets.flat_normal.views[0], + out_albedo: self.targets.albedo.views[0], + out_motion: self.targets.motion.views[0], + inout_diffuse: self.targets.light_diffuse.views[post_proc_input_index], out_debug: self.targets.debug.views[0], }, ); pc.dispatch(groups); } - } - /// Perform noise reduction using SVGF. - #[profiling::function] - pub fn denoise( - &mut self, //TODO: borrow immutably - command_encoder: &mut blade_graphics::CommandEncoder, - denoiser_config: DenoiserConfig, - ) { - let mut params = BlurParams { - extent: [self.surface_size.width, self.surface_size.height], - temporal_weight: denoiser_config.temporal_weight, - iteration: 0, - use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, - pad: 0, - }; - let (cur, prev) = self.work_indices(); - let temp = 2; - - if denoiser_config.temporal_weight < 1.0 { - let mut pass = command_encoder.compute(); - let mut pc = pass.with(&self.blur.temporal_accum_pipeline); - let groups = self - .blur - .atrous_pipeline - .get_dispatch_for(self.surface_size); - pc.bind( - 0, - &TemporalAccumData { - camera: self.targets.camera_params[cur], - prev_camera: self.targets.camera_params[prev], - params, - input: self.targets.light_diffuse.views[cur], - prev_input: self.targets.light_diffuse.views[prev], - t_depth: self.targets.depth.views[cur], - t_prev_depth: self.targets.depth.views[prev], - t_flat_normal: self.targets.flat_normal.views[cur], - t_prev_flat_normal: self.targets.flat_normal.views[prev], - t_motion: self.targets.motion.views[0], - output: self.targets.light_diffuse.views[temp], - }, - ); - pc.dispatch(groups); - //Note: making `cur` contain the latest reprojection output - self.targets.light_diffuse.views.swap(cur, temp); + if denoiser_config.enabled { + let mut params = BlurParams { + extent: [self.surface_size.width, self.surface_size.height], + iteration: 0, + pad: 0, + }; + let mut ping_pong = [1, 2]; + for _ in 0..denoiser_config.num_passes { + let mut pass = command_encoder.compute(); + let mut pc = pass.with(&self.blur.atrous_pipeline); + let groups = self + .blur + .atrous_pipeline + .get_dispatch_for(self.surface_size); + pc.bind( + 0, + &AtrousData { + params, + input: self.targets.light_diffuse.views[post_proc_input_index], + t_depth: self.targets.depth.views[0], + t_flat_normal: self.targets.flat_normal.views[0], + output: self.targets.light_diffuse.views[ping_pong[0]], + }, + ); + pc.dispatch(groups); + post_proc_input_index = ping_pong[0]; + ping_pong.swap(0, 1); + params.iteration += 1; + } } - let mut ping_pong = [temp, prev]; - for _ in 0..denoiser_config.num_passes { - let mut pass = command_encoder.compute(); - let mut pc = pass.with(&self.blur.atrous_pipeline); - let groups = self - .blur - .atrous_pipeline - .get_dispatch_for(self.surface_size); - pc.bind( - 0, - &AtrousData { - params, - input: self.targets.light_diffuse.views[self.post_proc_input_index], - t_depth: self.targets.depth.views[cur], - t_flat_normal: self.targets.flat_normal.views[cur], - output: self.targets.light_diffuse.views[ping_pong[0]], - }, - ); - pc.dispatch(groups); - self.post_proc_input_index = ping_pong[0]; - ping_pong.swap(0, 1); - params.iteration += 1; + FrameKey { + post_proc_input_index, } } @@ -1273,6 +1183,7 @@ impl Renderer { pub fn post_proc( &self, pass: &mut blade_graphics::RenderCommandEncoder, + key: FrameKey, debug_config: DebugConfig, pp_config: PostProcConfig, debug_lines: &[DebugLine], @@ -1285,7 +1196,7 @@ impl Renderer { 0, &PostProcData { t_albedo: self.targets.albedo.views[0], - light_diffuse: self.targets.light_diffuse.views[self.post_proc_input_index], + light_diffuse: self.targets.light_diffuse.views[key.post_proc_input_index], t_debug: self.targets.debug.views[0], tone_map_params: ToneMapParams { enabled: 1, @@ -1302,7 +1213,7 @@ impl Renderer { self.debug.render_lines( debug_lines, self.targets.camera_params[cur], - self.targets.depth.views[cur], + self.targets.depth.views[0], pass, ); self.debug diff --git a/examples/init/env-sample.wgsl b/examples/init/env-sample.wgsl index 2c4d379d..7891133e 100644 --- a/examples/init/env-sample.wgsl +++ b/examples/init/env-sample.wgsl @@ -5,12 +5,13 @@ const PI: f32 = 3.1415926; const BUMP: f32 = 0.025; var env_main: texture_2d; +var p_rng: RandomState; @vertex fn vs_accum(@builtin(vertex_index) vi: u32) -> @builtin(position) vec4 { - var rng = random_init(vi, 0u); let dim = textureDimensions(env_main); - let es = generate_environment_sample(&rng, dim); + p_rng = random_init(vi, 0u); + let es = generate_environment_sample(&p_rng, dim); let extent = textureDimensions(env_weights, 0); let relative = (vec2(es.pixel) + vec2(0.5)) / vec2(extent); return vec4(relative.x - 1.0, 1.0 - relative.y, 0.0, 1.0); diff --git a/examples/scene/main.rs b/examples/scene/main.rs index f09de24a..c58c6a48 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -159,7 +159,6 @@ struct Example { last_render_time: time::Instant, render_times: VecDeque, ray_config: blade_render::RayConfig, - denoiser_enabled: bool, denoiser_config: blade_render::DenoiserConfig, post_proc_config: blade_render::PostProcConfig, debug_blit: Option, @@ -259,16 +258,19 @@ impl Example { render_times: VecDeque::with_capacity(FRAME_TIME_HISTORY), ray_config: blade_render::RayConfig { num_environment_samples: 1, - environment_importance_sampling: false, + environment_importance_sampling: true, temporal_tap: true, - temporal_history: 10, + temporal_confidence: 10.0, spatial_taps: 1, - spatial_tap_history: 5, - spatial_radius: 10, + spatial_confidence: 5.0, + spatial_min_distance: 2, + group_mixer: 10, t_start: 0.1, + pairwise_mis: true, + defensive_mis: 0.0, }, - denoiser_enabled: true, denoiser_config: blade_render::DenoiserConfig { + enabled: true, num_passes: 3, temporal_weight: 0.1, }, @@ -458,6 +460,7 @@ impl Example { // even while it's still being loaded. let do_render = self.scene_load_task.is_none() || (RENDER_WHILE_LOADING && self.scene_revision != 0); + let mut frame_key = blade_render::FrameKey::default(); if do_render { self.renderer.prepare( command_encoder, @@ -474,11 +477,12 @@ impl Example { //TODO: figure out why the main RT pipeline // causes a GPU crash when there are no objects if !self.objects.is_empty() { - self.renderer - .ray_trace(command_encoder, self.debug, self.ray_config); - if self.denoiser_enabled { - self.renderer.denoise(command_encoder, self.denoiser_config); - } + frame_key = self.renderer.ray_trace( + command_encoder, + self.debug, + self.ray_config, + self.denoiser_config, + ); } } @@ -508,6 +512,7 @@ impl Example { }; self.renderer.post_proc( &mut pass, + frame_key, self.debug, self.post_proc_config, &[], @@ -582,6 +587,8 @@ impl Example { return; } + ui.checkbox(&mut self.track_hot_reloads, "Hot reloading"); + let mut selection = blade_render::SelectionInfo::default(); if self.debug.mouse_pos.is_some() { selection = self.renderer.read_debug_selection_info(); @@ -669,7 +676,6 @@ impl Example { egui::CollapsingHeader::new("Denoise") .default_open(false) .show(ui, |ui| { - ui.checkbox(&mut self.denoiser_enabled, "Enable"); self.denoiser_config.populate_hud(ui); }); @@ -945,6 +951,9 @@ fn main() { } last_mouse_pos = [position.x as i32, position.y as i32]; } + winit::event::WindowEvent::MouseWheel { delta, .. } => { + example.camera.on_wheel(delta); + } winit::event::WindowEvent::HoveredFile(_) => { example.is_file_hovered = true; example diff --git a/src/lib.rs b/src/lib.rs index 5c8320b6..72e0a2c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -378,7 +378,6 @@ pub struct Engine { debug: blade_render::DebugConfig, pub frame_config: blade_render::FrameConfig, pub ray_config: blade_render::RayConfig, - pub denoiser_enabled: bool, pub denoiser_config: blade_render::DenoiserConfig, pub post_proc_config: blade_render::PostProcConfig, track_hot_reloads: bool, @@ -484,16 +483,19 @@ impl Engine { }, ray_config: blade_render::RayConfig { num_environment_samples: 1, - environment_importance_sampling: false, + environment_importance_sampling: true, temporal_tap: true, - temporal_history: 10, + temporal_confidence: 10.0, spatial_taps: 1, - spatial_tap_history: 5, - spatial_radius: 10, + spatial_confidence: 10.0, + spatial_min_distance: 2, + group_mixer: 10, t_start: 0.01, + pairwise_mis: true, + defensive_mis: 0.1, }, - denoiser_enabled: true, denoiser_config: blade_render::DenoiserConfig { + enabled: true, num_passes: 4, temporal_weight: 0.1, }, @@ -572,6 +574,7 @@ impl Engine { // We should be able to update TLAS and render content // even while it's still being loaded. + let mut frame_key = blade_render::FrameKey::default(); if self.load_tasks.is_empty() { self.render_objects.clear(); for (_, object) in self.objects.iter_mut() { @@ -627,11 +630,12 @@ impl Engine { self.frame_config.reset_reservoirs = false; if !self.render_objects.is_empty() { - self.renderer - .ray_trace(command_encoder, self.debug, self.ray_config); - if self.denoiser_enabled { - self.renderer.denoise(command_encoder, self.denoiser_config); - } + frame_key = self.renderer.ray_trace( + command_encoder, + self.debug, + self.ray_config, + self.denoiser_config, + ); } } @@ -701,6 +705,7 @@ impl Engine { if self.load_tasks.is_empty() { self.renderer.post_proc( &mut pass, + frame_key, self.debug, self.post_proc_config, &debug_lines, @@ -734,8 +739,7 @@ impl Engine { .default_open(false) .show(ui, |ui| { self.ray_config.populate_hud(ui); - self.frame_config.reset_reservoirs |= ui.button("Reset Accumulation").clicked(); - ui.checkbox(&mut self.denoiser_enabled, "Enable Denoiser"); + self.frame_config.populate_hud(ui); self.denoiser_config.populate_hud(ui); self.post_proc_config.populate_hud(ui); });