From a7eea0d329e85c66a5ace7622b865d08d70269df Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sun, 18 Aug 2024 22:48:11 -0700 Subject: [PATCH 01/26] WIP local storage for reservoirs --- blade-render/code/random.inc.wgsl | 4 + blade-render/code/ray-trace.wgsl | 238 +++++++++++++++++------------- 2 files changed, 138 insertions(+), 104 deletions(-) diff --git a/blade-render/code/random.inc.wgsl b/blade-render/code/random.inc.wgsl index 3f68478c..da144dbf 100644 --- a/blade-render/code/random.inc.wgsl +++ b/blade-render/code/random.inc.wgsl @@ -56,6 +56,10 @@ fn murmur3(rng: ptr) -> u32 { return hash; } +fn random_u32(rng: ptr) -> u32 { + return murmur3(rng); +} + fn random_gen(rng: ptr) -> f32 { let v = murmur3(rng); let one = bitcast(1.0); diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index b27b1065..1a5b1736 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -7,11 +7,11 @@ #include "surface.inc.wgsl" #include "gbuf.inc.wgsl" -//TODO: use proper WGSL +//TODO: https://github.com/gfx-rs/wgpu/pull/5429 const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; const PI: f32 = 3.1415926; -const MAX_RESERVOIRS: u32 = 2u; +const MAX_RESAMPLE: u32 = 4u; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; @@ -23,6 +23,9 @@ const BASE_CANONICAL_MIS: f32 = 0.05; // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; +const GROUP_SIZE: vec2 = vec2(8, 8); +const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; + struct MainParams { frame_index: u32, num_environment_samples: u32, @@ -34,7 +37,7 @@ struct MainParams { spatial_radius: i32, t_start: f32, use_motion_vectors: u32, -}; +} var camera: CameraParams; var prev_camera: CameraParams; @@ -56,6 +59,14 @@ struct StoredReservoir { var reservoirs: array; var prev_reservoirs: array; +struct PixelCache { + surface: Surface, + reservoir: StoredReservoir, + //Note: we could store direction XY in local camera space instead + world_pos: vec3, +} +var pixel_cache: array; + struct LightSample { radiance: vec3, pdf: f32, @@ -333,14 +344,77 @@ fn balance_heuristic(w0: f32, w1: f32, h0: f32, h1: f32) -> HeuristicFactors { return hf; } +struct ResampleBase { + surface: Surface, + canonical: LiveReservoir, + world_pos: vec3, + accepted_count: f32, +} +struct ResampleState { + reservoir: LiveReservoir, + mis_canonical: f32, + color_and_weight: vec4, +} + +/*fn resample(base: ResampleBase, state: ptr, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) { + var live: LiveReservoir; + let neighbor = other.reservoir; + if (PAIRWISE_MIS) { + let debug_len = select(0.0, other.surface.depth * 0.2, enable_debug); + let canonical = base.canonical; + let neighbor_history = min(neighbor.confidence, f32(max_history)); + { // scoping this to hint the register allocation + let t_canonical_at_neighbor = estimate_target_score_with_occlusion( + other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, prev_acc_struct, debug_len); + let mis_sub_canonical = balance_heuristic( + t_canonical_at_neighbor.score, canonical.selected_target_score, + neighbor_history * base.accepted_count, canonical.history); + (*state).mis_canonical += 1.0 - mis_sub_canonical.weight; + } + + // Notes about t_neighbor_at_neighbor: + // 1. we assume lights aren't moving. Technically we should check if the + // target light has moved, and re-evaluate the occlusion. + // 2. we can use the cached target score, and there is no use of the target color + //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); + let t_neighbor_at_canonical = estimate_target_score_with_occlusion( + base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); + let mis_neighbor = balance_heuristic( + neighbor.target_score, t_neighbor_at_canonical.score, + neighbor_history * base.accepted_count, canonical.history); + + live.history = neighbor_history; + live.selected_light_index = neighbor.light_index; + live.selected_uv = neighbor.light_uv; + live.selected_target_score = t_neighbor_at_canonical.score; + live.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; + //Note: should be needed according to the paper + // live.history *= min(mis_neighbor.history, mis_sub_canonical.history); + live.radiance = t_neighbor_at_canonical.color; + } else { + live = unpack_reservoir(neighbor, max_history); + live.radiance = evaluate_reflected_light(base.surface, live.selected_light_index, live.selected_uv); + } + + if (DECOUPLED_SHADING) { + (*state).color_and_weight += live.weight_sum * vec4(neighbor.contribution_weight * live.radiance, 1.0); + } + if (live.weight_sum <= 0.0) { + bump_reservoir(&(*state).reservoir, live.history); + } else { + merge_reservoir(&(*state).reservoir, live, random_gen(rng)); + } +}*/ + struct RestirOutput { radiance: vec3, } -fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, enable_debug: bool) -> RestirOutput { +fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, local_index: u32, enable_debug: bool) -> RestirOutput { if (debug.view_mode == DebugMode_Depth) { textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); } + pixel_cache[local_index] = PixelCache(); let ray_dir = get_ray_direction(camera, pixel); let pixel_index = get_reservoir_index(pixel, camera); if (surface.depth == 0.0) { @@ -356,6 +430,7 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr(normal, 0.0)); } + // 1: build the canonical sample var canonical = LiveReservoir(); for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { var ls: LightSample; @@ -376,132 +451,87 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr(get_prev_pixel(pixel, position)); - - // First, gather the list of reservoirs to merge with - var accepted_reservoir_indices = array(); var accepted_count = 0u; - var temporal_index = ~0u; - for (var tap = 0u; tap <= parameters.spatial_taps; tap += 1u) { - var other_pixel = prev_pixel; - if (tap != 0u) { - let r0 = max(prev_pixel - vec2(parameters.spatial_radius), vec2(0)); - let r1 = min(prev_pixel + vec2(parameters.spatial_radius + 1), vec2(prev_camera.target_size)); - other_pixel = vec2(mix(vec2(r0), vec2(r1), vec2(random_gen(rng), random_gen(rng)))); - } else if (parameters.temporal_tap == 0u) - { - continue; - } - - let other_index = get_reservoir_index(other_pixel, prev_camera); - if (other_index < 0) { - continue; - } - if (prev_reservoirs[other_index].confidence == 0.0) { - continue; - } - - let other_surface = read_prev_surface(other_pixel); - let compatibility = compare_surfaces(surface, other_surface); - if (compatibility < 0.1) { + var accepted_local_indices = array(); + + // 2: read the temporal sample. + let prev_reservoir_index = get_reservoir_index(prev_pixel, prev_camera); + if (prev_reservoir_index >= 0) { + let prev_reservoir = prev_reservoirs[prev_reservoir_index]; + let prev_surface = read_prev_surface(prev_pixel); + let prev_dir = get_ray_direction(prev_camera, prev_pixel); + let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; + pixel_cache[local_index] = PixelCache(prev_surface, prev_reservoir, prev_world_pos); + + if (parameters.temporal_tap != 0u && prev_reservoir.confidence > 0.0) { // if the surfaces are too different, there is no trust in this sample - continue; - } - - if (tap == 0u) { - temporal_index = accepted_count; - } - accepted_reservoir_indices[accepted_count] = other_index; - if (accepted_count < MAX_RESERVOIRS) { - accepted_count += 1u; + if (compare_surfaces(surface, prev_surface) > 0.1) { + accepted_local_indices[0] = local_index; + accepted_count = 1u; + } } } - // Next, evaluate the MIS of each of the samples versus the canonical one. - var reservoir = LiveReservoir(); - var shaded_color = vec3(0.0); - var mis_canonical = BASE_CANONICAL_MIS; - var color_and_weight = vec4(0.0); - for (var rid = 0u; rid < accepted_count; rid += 1u) { - let neighbor_index = accepted_reservoir_indices[rid]; - let neighbor = prev_reservoirs[neighbor_index]; - - let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, rid == temporal_index); - var other: LiveReservoir; - if (PAIRWISE_MIS) { - let neighbor_pixel = get_pixel_from_reservoir_index(neighbor_index, prev_camera); - let neighbor_history = min(neighbor.confidence, f32(max_history)); - { // scoping this to hint the register allocation - let neighbor_surface = read_prev_surface(neighbor_pixel); - let neighbor_dir = get_ray_direction(prev_camera, neighbor_pixel); - let neighbor_position = prev_camera.position + neighbor_surface.depth * neighbor_dir; - - let t_canonical_at_neighbor = estimate_target_score_with_occlusion( - neighbor_surface, neighbor_position, canonical.selected_light_index, canonical.selected_uv, prev_acc_struct, debug_len); - let mis_sub_canonical = balance_heuristic( - t_canonical_at_neighbor.score, canonical.selected_target_score, - neighbor_history * f32(accepted_count), canonical.history); - mis_canonical += 1.0 - mis_sub_canonical.weight; - } + // 3: sync with the workgroup to ensure all reservoirs are available. + workgroupBarrier(); - // Notes about t_neighbor_at_neighbor: - // 1. we assume lights aren't moving. Technically we should check if the - // target light has moved, and re-evaluate the occlusion. - // 2. we can use the cached target score, and there is no use of the target color - //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); - let t_neighbor_at_canonical = estimate_target_score_with_occlusion( - surface, position, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); - let mis_neighbor = balance_heuristic( - neighbor.target_score, t_neighbor_at_canonical.score, - neighbor_history * f32(accepted_count), canonical.history); - - other.history = neighbor_history; - other.selected_light_index = neighbor.light_index; - other.selected_uv = neighbor.light_uv; - other.selected_target_score = t_neighbor_at_canonical.score; - other.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; - //Note: should be needed according to the paper - // other.history *= min(mis_neighbor.history, mis_sub_canonical.history); - other.radiance = t_neighbor_at_canonical.color; - } else { - other = unpack_reservoir(neighbor, max_history); - other.radiance = evaluate_reflected_light(surface, other.selected_light_index, other.selected_uv); + // 4: gather the list of neighbors (withing the workgroup) to resample. + let max_accepted = min(MAX_RESAMPLE, accepted_count + parameters.spatial_taps); + let num_candidates = parameters.spatial_taps * 2u; + for (var candidates = num_candidates; candidates > 0u && accepted_count < max_accepted; candidates -= 1u) { + let other_cache_index = random_u32(rng) % (GROUP_SIZE.x * GROUP_SIZE.y); + let other = pixel_cache[other_cache_index]; + if (other_cache_index != local_index && other.reservoir.confidence > 0.0) { + // if the surfaces are too different, there is no trust in this sample + //if (compare_surfaces(surface, other.surface) > 0.1) { + //accepted_local_indices[accepted_count] = other_cache_index; + accepted_count += 1u; + //} } + } - if (DECOUPLED_SHADING) { - color_and_weight += other.weight_sum * vec4(neighbor.contribution_weight * other.radiance, 1.0); - } - if (other.weight_sum <= 0.0) { - bump_reservoir(&reservoir, other.history); - } else { - merge_reservoir(&reservoir, other, random_gen(rng)); - } + // 5: evaluate the MIS of each of the samples versus the canonical one. + let base = ResampleBase(surface, canonical, position, f32(accepted_count)); + var state = ResampleState(); + state.mis_canonical = BASE_CANONICAL_MIS; + for (var lid = 0u; lid < accepted_count; lid += 1u) { + let other_local_index = accepted_local_indices[lid]; + //let other = pixel_cache[other_local_index]; + //let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, other_local_index == local_index); + //resample(base, &state, other, max_history, rng, enable_debug); } - // Finally, merge in the canonical sample + // 6: merge in the canonical sample. if (PAIRWISE_MIS) { - canonical.weight_sum *= mis_canonical / canonical.history; + canonical.weight_sum *= state.mis_canonical / canonical.history; } if (DECOUPLED_SHADING) { //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? - let cw = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); - color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); + let cw = canonical.weight_sum / max(canonical.selected_target_score * state.mis_canonical, 0.1); + state.color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); } + //TODO: https://github.com/gfx-rs/wgpu/issues/6131 + var reservoir = state.reservoir; merge_reservoir(&reservoir, canonical, random_gen(rng)); + // 7: finish let effective_history = select(reservoir.history, BASE_CANONICAL_MIS + f32(accepted_count), PAIRWISE_MIS); let stored = pack_reservoir_detail(reservoir, effective_history); reservoirs[pixel_index] = stored; var ro = RestirOutput(); if (DECOUPLED_SHADING) { - ro.radiance = color_and_weight.xyz / max(color_and_weight.w, 0.001); + ro.radiance = state.color_and_weight.xyz / max(state.color_and_weight.w, 0.001); } else { - ro.radiance = stored.contribution_weight * reservoir.radiance; + ro.radiance = stored.contribution_weight * state.reservoir.radiance; } return ro; } -@compute @workgroup_size(8, 4) -fn main(@builtin(global_invocation_id) global_id: vec3) { +@compute @workgroup_size(GROUP_SIZE.x, GROUP_SIZE.y) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_index) local_index: u32, +) { if (any(global_id.xy >= camera.target_size)) { return; } @@ -512,7 +542,7 @@ fn main(@builtin(global_invocation_id) global_id: vec3) { let surface = read_surface(vec2(global_id.xy)); let enable_debug = all(global_id.xy == debug.mouse_pos); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let ro = compute_restir(surface, vec2(global_id.xy), &rng, enable_restir_debug); + let ro = compute_restir(surface, vec2(global_id.xy), &rng, local_index, enable_restir_debug); let color = ro.radiance; if (enable_debug) { debug_buf.variance.color_sum += color; From aa31733d255f717856b54a3e7ffc32ab28cd0955 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 19 Aug 2024 23:57:26 -0700 Subject: [PATCH 02/26] Work around workgroup memory addressing GPU hang --- blade-render/code/ray-trace.wgsl | 22 ++++++++++++---------- blade-render/src/render/mod.rs | 10 ++++++++-- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 1a5b1736..56ebcd90 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -23,7 +23,8 @@ const BASE_CANONICAL_MIS: f32 = 0.05; // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; -const GROUP_SIZE: vec2 = vec2(8, 8); +//TODO: crashes on AMD 6850U if `GROUP_SIZE_TOTAL` > 32 +const GROUP_SIZE: vec2 = vec2(8, 4); const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; struct MainParams { @@ -356,7 +357,7 @@ struct ResampleState { color_and_weight: vec4, } -/*fn resample(base: ResampleBase, state: ptr, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) { +fn resample(base: ResampleBase, state: ptr, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) { var live: LiveReservoir; let neighbor = other.reservoir; if (PAIRWISE_MIS) { @@ -399,12 +400,13 @@ struct ResampleState { if (DECOUPLED_SHADING) { (*state).color_and_weight += live.weight_sum * vec4(neighbor.contribution_weight * live.radiance, 1.0); } + /* if (live.weight_sum <= 0.0) { bump_reservoir(&(*state).reservoir, live.history); } else { merge_reservoir(&(*state).reservoir, live, random_gen(rng)); - } -}*/ + }*/ +} struct RestirOutput { radiance: vec3, @@ -483,10 +485,10 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr 0.0) { // if the surfaces are too different, there is no trust in this sample - //if (compare_surfaces(surface, other.surface) > 0.1) { - //accepted_local_indices[accepted_count] = other_cache_index; + if (compare_surfaces(surface, other.surface) > 0.1) { + accepted_local_indices[accepted_count] = other_cache_index; accepted_count += 1u; - //} + } } } @@ -496,9 +498,9 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr(); shader.check_struct_size::(); let layout = ::layout(); - gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { + let pipeline = gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { name: "ray-trace", data_layouts: &[&layout], compute: shader.at("main"), - }) + }); + + let pl_struct_size = shader.get_struct_size("PixelCache"); + let group_size = pipeline.get_workgroup_size(); + let wg_required = pl_struct_size * group_size[0] * group_size[1]; + log::info!("Using {} workgroup memory for RT", wg_required); + pipeline } fn create_temporal_accum( From 32fd30cc172393e356f85f3be2641a22ad04e9a7 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 21 Aug 2024 21:47:48 -0700 Subject: [PATCH 03/26] Hook up the resampling --- blade-render/code/ray-trace.wgsl | 69 +++++++++++++++++--------------- examples/scene/main.rs | 2 + 2 files changed, 39 insertions(+), 32 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 56ebcd90..fbf65a58 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -351,15 +351,16 @@ struct ResampleBase { world_pos: vec3, accepted_count: f32, } -struct ResampleState { - reservoir: LiveReservoir, +struct ResampleResult { + selected: bool, mis_canonical: f32, color_and_weight: vec4, } -fn resample(base: ResampleBase, state: ptr, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) { - var live: LiveReservoir; +fn resample(dst: ptr, base: ResampleBase, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) -> ResampleResult { + var src: LiveReservoir; let neighbor = other.reservoir; + var rr = ResampleResult(); if (PAIRWISE_MIS) { let debug_len = select(0.0, other.surface.depth * 0.2, enable_debug); let canonical = base.canonical; @@ -370,7 +371,7 @@ fn resample(base: ResampleBase, state: ptr, other: Pixe let mis_sub_canonical = balance_heuristic( t_canonical_at_neighbor.score, canonical.selected_target_score, neighbor_history * base.accepted_count, canonical.history); - (*state).mis_canonical += 1.0 - mis_sub_canonical.weight; + rr.mis_canonical = 1.0 - mis_sub_canonical.weight; } // Notes about t_neighbor_at_neighbor: @@ -384,28 +385,29 @@ fn resample(base: ResampleBase, state: ptr, other: Pixe neighbor.target_score, t_neighbor_at_canonical.score, neighbor_history * base.accepted_count, canonical.history); - live.history = neighbor_history; - live.selected_light_index = neighbor.light_index; - live.selected_uv = neighbor.light_uv; - live.selected_target_score = t_neighbor_at_canonical.score; - live.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; + src.history = neighbor_history; + src.selected_light_index = neighbor.light_index; + src.selected_uv = neighbor.light_uv; + src.selected_target_score = t_neighbor_at_canonical.score; + src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; //Note: should be needed according to the paper - // live.history *= min(mis_neighbor.history, mis_sub_canonical.history); - live.radiance = t_neighbor_at_canonical.color; + // src.history *= min(mis_neighbor.history, mis_sub_canonical.history); + src.radiance = t_neighbor_at_canonical.color; } else { - live = unpack_reservoir(neighbor, max_history); - live.radiance = evaluate_reflected_light(base.surface, live.selected_light_index, live.selected_uv); + src = unpack_reservoir(neighbor, max_history); + src.radiance = evaluate_reflected_light(base.surface, src.selected_light_index, src.selected_uv); } if (DECOUPLED_SHADING) { - (*state).color_and_weight += live.weight_sum * vec4(neighbor.contribution_weight * live.radiance, 1.0); + rr.color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); } - /* - if (live.weight_sum <= 0.0) { - bump_reservoir(&(*state).reservoir, live.history); + if (src.weight_sum <= 0.0) { + bump_reservoir(dst, src.history); } else { - merge_reservoir(&(*state).reservoir, live, random_gen(rng)); - }*/ + merge_reservoir(dst, src, random_gen(rng)); + rr.selected = true; + } + return rr; } struct RestirOutput { @@ -477,11 +479,11 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr 0u && accepted_count < max_accepted; candidates -= 1u) { - let other_cache_index = random_u32(rng) % (GROUP_SIZE.x * GROUP_SIZE.y); + let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; let other = pixel_cache[other_cache_index]; if (other_cache_index != local_index && other.reservoir.confidence > 0.0) { // if the surfaces are too different, there is no trust in this sample @@ -494,26 +496,29 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr(0.0); for (var lid = 0u; lid < accepted_count; lid += 1u) { let other_local_index = accepted_local_indices[lid]; let other = pixel_cache[other_local_index]; let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, other_local_index == local_index); - resample(base, &state, other, max_history, rng, enable_debug); + let rr = resample(&reservoir, base, other, max_history, rng, enable_debug); + mis_canonical += rr.mis_canonical; + if (DECOUPLED_SHADING) { + color_and_weight += rr.color_and_weight; + } } // 6: merge in the canonical sample. if (PAIRWISE_MIS) { - canonical.weight_sum *= state.mis_canonical / canonical.history; + canonical.weight_sum *= mis_canonical / canonical.history; } if (DECOUPLED_SHADING) { //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? - let cw = canonical.weight_sum / max(canonical.selected_target_score * state.mis_canonical, 0.1); - state.color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); + let cw = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); + color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); } - //TODO: https://github.com/gfx-rs/wgpu/issues/6131 - var reservoir = state.reservoir; merge_reservoir(&reservoir, canonical, random_gen(rng)); // 7: finish @@ -522,9 +527,9 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr Date: Wed, 21 Aug 2024 22:17:49 -0700 Subject: [PATCH 04/26] Reject spatial samples that are too close --- blade-render/code/ray-trace.wgsl | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index fbf65a58..3413ea63 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -12,6 +12,7 @@ const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; const PI: f32 = 3.1415926; const MAX_RESAMPLE: u32 = 4u; +const MIN_RESAMPLE_DISTANCE: i32 = 3; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; @@ -229,6 +230,10 @@ fn read_prev_surface(pixel: vec2) -> Surface { return surface; } +fn index_to_coord(index: u32) -> vec2 { + return vec2(vec2(index % GROUP_SIZE.x, index / GROUP_SIZE.x)); +} + fn evaluate_brdf(surface: Surface, dir: vec3) -> f32 { let lambert_brdf = 1.0 / PI; let lambert_term = qrot(qinv(surface.basis), dir).z; @@ -418,7 +423,6 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr(surface.depth / camera.depth)); } - pixel_cache[local_index] = PixelCache(); let ray_dir = get_ray_direction(camera, pixel); let pixel_index = get_reservoir_index(pixel, camera); if (surface.depth == 0.0) { @@ -474,16 +478,24 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr 0u && accepted_count < max_accepted; candidates -= 1u) { let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; + let diff = index_to_coord(other_cache_index) - local_xy; + if (dot(diff, diff) < MIN_RESAMPLE_DISTANCE * MIN_RESAMPLE_DISTANCE) { + continue; + } let other = pixel_cache[other_cache_index]; if (other_cache_index != local_index && other.reservoir.confidence > 0.0) { // if the surfaces are too different, there is no trust in this sample From 6eb0bbe8b4965c6d1cd3a9917a2aacef450120ca Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 21 Aug 2024 23:52:20 -0700 Subject: [PATCH 05/26] Jitter the ray compute grid --- Cargo.toml | 4 ++-- blade-helpers/src/hud.rs | 5 +++-- blade-render/Cargo.toml | 1 + blade-render/code/ray-trace.wgsl | 26 ++++++++++++------------ blade-render/src/render/mod.rs | 35 ++++++++++++++++++++++++++++---- examples/scene/main.rs | 3 ++- src/lib.rs | 3 ++- 7 files changed, 54 insertions(+), 23 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 03c104ca..681d4619 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,6 +22,7 @@ gltf = { version = "1.1", default-features = false } log = "0.4" mint = "0.5" naga = { version = "22", features = ["wgsl-in"] } +nanorand = { version = "0.7", default-features = false } profiling = "1" slab = "0.4" strum = { version = "0.25", features = ["derive"] } @@ -79,7 +80,7 @@ glam = { workspace = true } log = { workspace = true } mint = { workspace = true, features = ["serde"] } naga = { workspace = true } -nanorand = { version = "0.7", default-features = false, features = ["wyrand"] } +nanorand = { workspace = true, features = ["wyrand"] } profiling = { workspace = true } ron = "0.8" serde = { version = "1", features = ["serde_derive"] } @@ -95,7 +96,6 @@ egui-winit = "0.28" console_error_panic_hook = "0.1.7" console_log = "1" web-sys = { workspace = true, features = ["Window"] } -getrandom = { version = "0.2", features = ["js"] } [target.'cfg(any(target_os = "windows", target_os = "linux"))'.dev-dependencies] renderdoc = "0.12" diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index a135a337..e8d0fea8 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -22,9 +22,10 @@ impl ExposeHud for blade_render::RayConfig { egui::widgets::Slider::new(&mut self.spatial_tap_history, 0..=50) .text("Spatial tap history"), ); + ui.checkbox(&mut self.spatial_jitter, "Spatial jittering"); ui.add( - egui::widgets::Slider::new(&mut self.spatial_radius, 1..=50) - .text("Spatial radius (px)"), + egui::widgets::Slider::new(&mut self.spatial_min_distance, 1..=10) + .text("Spatial minimum distance (px)"), ); ui.add( egui::widgets::Slider::new(&mut self.t_start, 0.001..=0.5) diff --git a/blade-render/Cargo.toml b/blade-render/Cargo.toml index 85a67a8f..002166de 100644 --- a/blade-render/Cargo.toml +++ b/blade-render/Cargo.toml @@ -39,6 +39,7 @@ glam = { workspace = true } log = { workspace = true } mikktspace = { package = "bevy_mikktspace", version = "0.12", optional = true } mint = { workspace = true } +nanorand = { workspace = true, features = ["wyrand"] } profiling = { workspace = true } slab = { workspace = true, optional = true } strum = { workspace = true } diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 3413ea63..83acd7ab 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -12,7 +12,6 @@ const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; const PI: f32 = 3.1415926; const MAX_RESAMPLE: u32 = 4u; -const MIN_RESAMPLE_DISTANCE: i32 = 3; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; @@ -36,9 +35,10 @@ struct MainParams { temporal_history: u32, spatial_taps: u32, spatial_tap_history: u32, - spatial_radius: i32, + spatial_min_distance: i32, t_start: f32, use_motion_vectors: u32, + grid_offset: vec2, } var camera: CameraParams; @@ -231,7 +231,7 @@ fn read_prev_surface(pixel: vec2) -> Surface { } fn index_to_coord(index: u32) -> vec2 { - return vec2(vec2(index % GROUP_SIZE.x, index / GROUP_SIZE.x)); + return vec2((vec2(index, index / GROUP_SIZE.x) + GROUP_SIZE - parameters.grid_offset) % GROUP_SIZE); } fn evaluate_brdf(surface: Surface, dir: vec3) -> f32 { @@ -478,8 +478,6 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, rng: ptr 0u && accepted_count < max_accepted; candidates -= 1u) { let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; let diff = index_to_coord(other_cache_index) - local_xy; - if (dot(diff, diff) < MIN_RESAMPLE_DISTANCE * MIN_RESAMPLE_DISTANCE) { + if (dot(diff, diff) < parameters.spatial_min_distance * parameters.spatial_min_distance) { continue; } let other = pixel_cache[other_cache_index]; - if (other_cache_index != local_index && other.reservoir.confidence > 0.0) { + if (other.reservoir.confidence > 0.0) { // if the surfaces are too different, there is no trust in this sample if (compare_surfaces(surface, other.surface) > 0.1) { accepted_local_indices[accepted_count] = other_cache_index; @@ -551,22 +549,24 @@ fn main( @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_index) local_index: u32, ) { - if (any(global_id.xy >= camera.target_size)) { + pixel_cache[local_index].reservoir.confidence = 0.0; + let pixel_coord = global_id.xy - parameters.grid_offset; + if (any(pixel_coord >= camera.target_size)) { return; } - let global_index = global_id.y * camera.target_size.x + global_id.x; + let global_index = pixel_coord.y * camera.target_size.x + pixel_coord.x; var rng = random_init(global_index, parameters.frame_index); - let surface = read_surface(vec2(global_id.xy)); - let enable_debug = all(global_id.xy == debug.mouse_pos); + let surface = read_surface(vec2(pixel_coord)); + let enable_debug = all(pixel_coord == debug.mouse_pos); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let ro = compute_restir(surface, vec2(global_id.xy), &rng, local_index, enable_restir_debug); + let ro = compute_restir(surface, vec2(pixel_coord), &rng, local_index, enable_restir_debug); let color = ro.radiance; if (enable_debug) { debug_buf.variance.color_sum += color; debug_buf.variance.color2_sum += color * color; debug_buf.variance.count += 1u; } - textureStore(out_diffuse, global_id.xy, vec4(color, 1.0)); + textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); } diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index dc89c622..22347104 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -93,7 +93,11 @@ pub struct RayConfig { pub temporal_history: u32, pub spatial_taps: u32, pub spatial_tap_history: u32, - pub spatial_radius: u32, + /// Minimal distance to a spatially reused pixel (in the current frame). + pub spatial_min_distance: u32, + /// Enable jittering of the compute grid, to allow spatial samples to mix + /// outside of the original workgroup pixel bounds. + pub spatial_jitter: bool, pub t_start: f32, } @@ -322,6 +326,7 @@ pub struct Renderer { textures: blade_graphics::TextureArray, samplers: Samplers, reservoir_size: u32, + grid_jitter: [u32; 2], debug: DebugRender, surface_size: blade_graphics::Extent, surface_info: blade_graphics::SurfaceInfo, @@ -331,6 +336,7 @@ pub struct Renderer { // This way we can embed user info into the allocator. texture_resource_lookup: HashMap>, + random: nanorand::WyRand, } #[repr(C)] @@ -363,9 +369,10 @@ struct MainParams { temporal_history: u32, spatial_taps: u32, spatial_tap_history: u32, - spatial_radius: u32, + spatial_min_distance: u32, t_start: f32, use_motion_vectors: u32, + grid_offset: [u32; 2], } #[derive(blade_macros::ShaderData)] @@ -720,12 +727,14 @@ impl Renderer { textures: blade_graphics::TextureArray::new(), samplers, reservoir_size: sp.reservoir_size, + grid_jitter: [0; 2], debug, surface_size: config.surface_size, surface_info: config.surface_info, frame_index: 0, frame_scene_built: 0, texture_resource_lookup: HashMap::default(), + random: nanorand::WyRand::new(), } } @@ -1109,6 +1118,12 @@ impl Renderer { } self.targets.camera_params[self.frame_index % 2] = self.make_camera_params(camera); self.post_proc_input_index = self.frame_index % 2; + + self.grid_jitter = { + let wg_size = self.main_pipeline.get_workgroup_size(); + let random = nanorand::Rng::generate::(&mut self.random) as u32; + [random % wg_size[0], (random / wg_size[0]) % wg_size[1]] + }; } /// Ray trace the scene. @@ -1152,8 +1167,19 @@ impl Renderer { } if let mut pass = command_encoder.compute() { + let grid_offset = if ray_config.spatial_jitter { + self.grid_jitter + } else { + [0; 2] + }; + let groups = { + let mut grid_size = self.surface_size; + grid_size.width += grid_offset[0]; + grid_size.height += grid_offset[1]; + self.main_pipeline.get_dispatch_for(grid_size) + }; + let mut pc = pass.with(&self.main_pipeline); - let groups = self.main_pipeline.get_dispatch_for(self.surface_size); pc.bind( 0, &MainData { @@ -1169,9 +1195,10 @@ impl Renderer { temporal_history: ray_config.temporal_history, spatial_taps: ray_config.spatial_taps, spatial_tap_history: ray_config.spatial_tap_history, - spatial_radius: ray_config.spatial_radius, + spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, + grid_offset, }, acc_struct: self.acceleration_structure, prev_acc_struct: if self.frame_scene_built < self.frame_index diff --git a/examples/scene/main.rs b/examples/scene/main.rs index b31545c0..632e9f95 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -264,7 +264,8 @@ impl Example { temporal_history: 10, spatial_taps: 1, spatial_tap_history: 5, - spatial_radius: 10, + spatial_min_distance: 4, + spatial_jitter: true, t_start: 0.1, }, denoiser_enabled: true, diff --git a/src/lib.rs b/src/lib.rs index 5c8320b6..4c471825 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -489,7 +489,8 @@ impl Engine { temporal_history: 10, spatial_taps: 1, spatial_tap_history: 5, - spatial_radius: 10, + spatial_min_distance: 4, + spatial_jitter: true, t_start: 0.01, }, denoiser_enabled: true, From 41e9b6fa52b24c2d6356cf08a79f317be7c42c23 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 22 Aug 2024 22:58:12 -0700 Subject: [PATCH 06/26] Scaling jittering --- blade-render/code/ray-trace.wgsl | 41 +++++++++++++------------ blade-render/src/render/mod.rs | 52 ++++++++++++++++++++++---------- 2 files changed, 58 insertions(+), 35 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 83acd7ab..db06b5e4 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -39,6 +39,7 @@ struct MainParams { t_start: f32, use_motion_vectors: u32, grid_offset: vec2, + grid_scale: vec2, } var camera: CameraParams; @@ -96,12 +97,6 @@ fn get_reservoir_index(pixel: vec2, camera: CameraParams) -> i32 { } } -fn get_pixel_from_reservoir_index(index: i32, camera: CameraParams) -> vec2 { - let y = index / i32(camera.target_size.x); - let x = index - y * i32(camera.target_size.x); - return vec2(x, y); -} - fn bump_reservoir(r: ptr, history: f32) { (*r).history += history; } @@ -156,7 +151,8 @@ fn pack_reservoir(r: LiveReservoir) -> StoredReservoir { var t_depth: texture_2d; var t_prev_depth: texture_2d; var t_basis: texture_2d; -var t_prev_basis: texture_2d; +var t_prev_basis: texture_2d +; var t_flat_normal: texture_2d; var t_prev_flat_normal: texture_2d; var t_motion: texture_2d; @@ -230,8 +226,13 @@ fn read_prev_surface(pixel: vec2) -> Surface { return surface; } -fn index_to_coord(index: u32) -> vec2 { - return vec2((vec2(index, index / GROUP_SIZE.x) + GROUP_SIZE - parameters.grid_offset) % GROUP_SIZE); +fn thread_index_to_coord(thread_index: u32, group_id: vec3) -> vec2 { + let cluster_id = group_id.xy / parameters.grid_scale; + let cluster_offset = group_id.xy - cluster_id * parameters.grid_scale; + let local_id = vec2(thread_index % GROUP_SIZE.x, thread_index / GROUP_SIZE.x); + let global_id = (cluster_id * GROUP_SIZE + local_id) * parameters.grid_scale + cluster_offset; + //TODO: also use the offset + return vec2(global_id); } fn evaluate_brdf(surface: Surface, dir: vec3) -> f32 { @@ -419,7 +420,10 @@ struct RestirOutput { radiance: vec3, } -fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, local_index: u32, enable_debug: bool) -> RestirOutput { +fn compute_restir( + surface: Surface, pixel: vec2, rng: ptr, + local_index: u32, group_id: vec3, enable_debug: bool, +) -> RestirOutput { if (debug.view_mode == DebugMode_Depth) { textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); } @@ -487,10 +491,9 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr 0u && accepted_count < max_accepted; candidates -= 1u) { let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; - let diff = index_to_coord(other_cache_index) - local_xy; + let diff = thread_index_to_coord(other_cache_index, group_id) - pixel; if (dot(diff, diff) < parameters.spatial_min_distance * parameters.spatial_min_distance) { continue; } @@ -546,22 +549,22 @@ fn compute_restir(surface: Surface, pixel: vec2, rng: ptr, + @builtin(workgroup_id) group_id: vec3, @builtin(local_invocation_index) local_index: u32, ) { pixel_cache[local_index].reservoir.confidence = 0.0; - let pixel_coord = global_id.xy - parameters.grid_offset; - if (any(pixel_coord >= camera.target_size)) { + let pixel_coord = thread_index_to_coord(local_index, group_id); + if (any(vec2(pixel_coord) >= camera.target_size)) { return; } - let global_index = pixel_coord.y * camera.target_size.x + pixel_coord.x; + let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); var rng = random_init(global_index, parameters.frame_index); - let surface = read_surface(vec2(pixel_coord)); - let enable_debug = all(pixel_coord == debug.mouse_pos); + let surface = read_surface(pixel_coord); + let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let ro = compute_restir(surface, vec2(pixel_coord), &rng, local_index, enable_restir_debug); + let ro = compute_restir(surface, pixel_coord, &rng, local_index, group_id, enable_restir_debug); let color = ro.radiance; if (enable_debug) { debug_buf.variance.color_sum += color; diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 22347104..d5996c87 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -11,6 +11,7 @@ pub use env_map::EnvironmentMap; use std::{collections::HashMap, mem, num::NonZeroU32, path::Path, ptr}; const MAX_RESOURCES: u32 = 8192; +const GRID_SCALES: u32 = 4; const RADIANCE_FORMAT: blade_graphics::TextureFormat = blade_graphics::TextureFormat::Rgba16Float; fn mat4_transform(t: &blade_graphics::Transform) -> glam::Mat4 { @@ -299,6 +300,20 @@ struct Blur { atrous_pipeline: blade_graphics::ComputePipeline, } +#[derive(Clone, Copy)] +struct GridMapping { + offset: mint::Vector2, + scale: mint::Vector2, +} +impl Default for GridMapping { + fn default() -> Self { + Self { + offset: [0; 2].into(), + scale: [1; 2].into(), + } + } +} + /// Blade Renderer is a comprehensive rendering solution for /// end user applications. /// @@ -326,7 +341,7 @@ pub struct Renderer { textures: blade_graphics::TextureArray, samplers: Samplers, reservoir_size: u32, - grid_jitter: [u32; 2], + grid_mapping: GridMapping, debug: DebugRender, surface_size: blade_graphics::Extent, surface_info: blade_graphics::SurfaceInfo, @@ -373,6 +388,7 @@ struct MainParams { t_start: f32, use_motion_vectors: u32, grid_offset: [u32; 2], + grid_scale: [u32; 2], } #[derive(blade_macros::ShaderData)] @@ -727,7 +743,7 @@ impl Renderer { textures: blade_graphics::TextureArray::new(), samplers, reservoir_size: sp.reservoir_size, - grid_jitter: [0; 2], + grid_mapping: GridMapping::default(), debug, surface_size: config.surface_size, surface_info: config.surface_info, @@ -1119,10 +1135,19 @@ impl Renderer { self.targets.camera_params[self.frame_index % 2] = self.make_camera_params(camera); self.post_proc_input_index = self.frame_index % 2; - self.grid_jitter = { + self.grid_mapping = { let wg_size = self.main_pipeline.get_workgroup_size(); - let random = nanorand::Rng::generate::(&mut self.random) as u32; - [random % wg_size[0], (random / wg_size[0]) % wg_size[1]] + let random = nanorand::Rng::generate::(&mut self.random); + let r_offset = random as u32; + let r_scale = (random >> 32) as u32; + GridMapping { + offset: [r_offset % wg_size[0], (r_offset / wg_size[0]) % wg_size[1]].into(), + scale: [ + 1 << (r_scale % GRID_SCALES), + 1 << ((r_scale / GRID_SCALES) % GRID_SCALES), + ] + .into(), + } }; } @@ -1167,18 +1192,12 @@ impl Renderer { } if let mut pass = command_encoder.compute() { - let grid_offset = if ray_config.spatial_jitter { - self.grid_jitter + let grid_mapping = if ray_config.spatial_jitter { + self.grid_mapping } else { - [0; 2] + GridMapping::default() }; - let groups = { - let mut grid_size = self.surface_size; - grid_size.width += grid_offset[0]; - grid_size.height += grid_offset[1]; - self.main_pipeline.get_dispatch_for(grid_size) - }; - + let groups = self.main_pipeline.get_dispatch_for(self.surface_size); let mut pc = pass.with(&self.main_pipeline); pc.bind( 0, @@ -1198,7 +1217,8 @@ impl Renderer { spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, - grid_offset, + grid_offset: grid_mapping.offset.into(), + grid_scale: grid_mapping.scale.into(), }, acc_struct: self.acceleration_structure, prev_acc_struct: if self.frame_scene_built < self.frame_index From 8fd67a626a816cc57bbc2b53dc76f4b550dd2cd4 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 22 Aug 2024 23:33:57 -0700 Subject: [PATCH 07/26] Refine group mixing and visualization --- blade-helpers/src/hud.rs | 2 +- blade-render/Cargo.toml | 1 - blade-render/code/color.inc.wgsl | 19 ++++++++++++ blade-render/code/ray-trace.wgsl | 12 ++++++-- blade-render/src/render/mod.rs | 51 +++++--------------------------- examples/scene/main.rs | 2 +- src/lib.rs | 2 +- 7 files changed, 40 insertions(+), 49 deletions(-) create mode 100644 blade-render/code/color.inc.wgsl diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index e8d0fea8..ff4cf5a5 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -22,7 +22,7 @@ impl ExposeHud for blade_render::RayConfig { egui::widgets::Slider::new(&mut self.spatial_tap_history, 0..=50) .text("Spatial tap history"), ); - ui.checkbox(&mut self.spatial_jitter, "Spatial jittering"); + ui.add(egui::widgets::Slider::new(&mut self.group_mixer, 1..=10).text("Group mixer")); ui.add( egui::widgets::Slider::new(&mut self.spatial_min_distance, 1..=10) .text("Spatial minimum distance (px)"), diff --git a/blade-render/Cargo.toml b/blade-render/Cargo.toml index 002166de..85a67a8f 100644 --- a/blade-render/Cargo.toml +++ b/blade-render/Cargo.toml @@ -39,7 +39,6 @@ glam = { workspace = true } log = { workspace = true } mikktspace = { package = "bevy_mikktspace", version = "0.12", optional = true } mint = { workspace = true } -nanorand = { workspace = true, features = ["wyrand"] } profiling = { workspace = true } slab = { workspace = true, optional = true } strum = { workspace = true } diff --git a/blade-render/code/color.inc.wgsl b/blade-render/code/color.inc.wgsl new file mode 100644 index 00000000..8e84cfda --- /dev/null +++ b/blade-render/code/color.inc.wgsl @@ -0,0 +1,19 @@ +fn hsv_to_rgb(h: f32, s: f32, v: f32) -> vec3 { + let c = v * s; + let x = c * (1.0 - abs((h / 60.0) % 2.0 - 1.0)); + var q = vec3(v - c); + if (h < 60.0) { + q.r += c; q.g += x; + } else if (h < 120.0) { + q.g += c; q.r += x; + } else if (h < 180.0) { + q.g += c; q.b += x; + } else if (h < 240.0) { + q.b += c; q.g += x; + } else if (h < 300.0) { + q.b += c; q.r += x; + } else { + q.r += c; q.b += x; + } + return q; +} diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index db06b5e4..416a1acb 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -1,3 +1,4 @@ +#include "color.inc.wgsl" #include "quaternion.inc.wgsl" #include "random.inc.wgsl" #include "env-importance.inc.wgsl" @@ -26,6 +27,7 @@ const DECOUPLED_SHADING: bool = false; //TODO: crashes on AMD 6850U if `GROUP_SIZE_TOTAL` > 32 const GROUP_SIZE: vec2 = vec2(8, 4); const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; +const GROUP_VISUALIZE: bool = false; struct MainParams { frame_index: u32, @@ -38,7 +40,6 @@ struct MainParams { spatial_min_distance: i32, t_start: f32, use_motion_vectors: u32, - grid_offset: vec2, grid_scale: vec2, } @@ -231,7 +232,6 @@ fn thread_index_to_coord(thread_index: u32, group_id: vec3) -> vec2 { let cluster_offset = group_id.xy - cluster_id * parameters.grid_scale; let local_id = vec2(thread_index % GROUP_SIZE.x, thread_index / GROUP_SIZE.x); let global_id = (cluster_id * GROUP_SIZE + local_id) * parameters.grid_scale + cluster_offset; - //TODO: also use the offset return vec2(global_id); } @@ -557,6 +557,14 @@ fn main( if (any(vec2(pixel_coord) >= camera.target_size)) { return; } + if (GROUP_VISUALIZE) + { + var rng = random_init(group_id.y * 1000u + group_id.x, 0u); + let h = random_gen(&rng) * 360.0; + let color = hsv_to_rgb(h, 0.5, 1.0); + textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); + return; + } let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); var rng = random_init(global_index, parameters.frame_index); diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index d5996c87..1eec948f 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -11,7 +11,6 @@ pub use env_map::EnvironmentMap; use std::{collections::HashMap, mem, num::NonZeroU32, path::Path, ptr}; const MAX_RESOURCES: u32 = 8192; -const GRID_SCALES: u32 = 4; const RADIANCE_FORMAT: blade_graphics::TextureFormat = blade_graphics::TextureFormat::Rgba16Float; fn mat4_transform(t: &blade_graphics::Transform) -> glam::Mat4 { @@ -96,9 +95,9 @@ pub struct RayConfig { pub spatial_tap_history: u32, /// Minimal distance to a spatially reused pixel (in the current frame). pub spatial_min_distance: u32, - /// Enable jittering of the compute grid, to allow spatial samples to mix + /// Scale and mix the groups into clusters, to allow spatial samples to mix /// outside of the original workgroup pixel bounds. - pub spatial_jitter: bool, + pub group_mixer: u32, pub t_start: f32, } @@ -300,20 +299,6 @@ struct Blur { atrous_pipeline: blade_graphics::ComputePipeline, } -#[derive(Clone, Copy)] -struct GridMapping { - offset: mint::Vector2, - scale: mint::Vector2, -} -impl Default for GridMapping { - fn default() -> Self { - Self { - offset: [0; 2].into(), - scale: [1; 2].into(), - } - } -} - /// Blade Renderer is a comprehensive rendering solution for /// end user applications. /// @@ -341,7 +326,6 @@ pub struct Renderer { textures: blade_graphics::TextureArray, samplers: Samplers, reservoir_size: u32, - grid_mapping: GridMapping, debug: DebugRender, surface_size: blade_graphics::Extent, surface_info: blade_graphics::SurfaceInfo, @@ -351,7 +335,6 @@ pub struct Renderer { // This way we can embed user info into the allocator. texture_resource_lookup: HashMap>, - random: nanorand::WyRand, } #[repr(C)] @@ -387,7 +370,6 @@ struct MainParams { spatial_min_distance: u32, t_start: f32, use_motion_vectors: u32, - grid_offset: [u32; 2], grid_scale: [u32; 2], } @@ -743,14 +725,12 @@ impl Renderer { textures: blade_graphics::TextureArray::new(), samplers, reservoir_size: sp.reservoir_size, - grid_mapping: GridMapping::default(), debug, surface_size: config.surface_size, surface_info: config.surface_info, frame_index: 0, frame_scene_built: 0, texture_resource_lookup: HashMap::default(), - random: nanorand::WyRand::new(), } } @@ -1134,21 +1114,6 @@ impl Renderer { } self.targets.camera_params[self.frame_index % 2] = self.make_camera_params(camera); self.post_proc_input_index = self.frame_index % 2; - - self.grid_mapping = { - let wg_size = self.main_pipeline.get_workgroup_size(); - let random = nanorand::Rng::generate::(&mut self.random); - let r_offset = random as u32; - let r_scale = (random >> 32) as u32; - GridMapping { - offset: [r_offset % wg_size[0], (r_offset / wg_size[0]) % wg_size[1]].into(), - scale: [ - 1 << (r_scale % GRID_SCALES), - 1 << ((r_scale / GRID_SCALES) % GRID_SCALES), - ] - .into(), - } - }; } /// Ray trace the scene. @@ -1192,11 +1157,12 @@ impl Renderer { } if let mut pass = command_encoder.compute() { - let grid_mapping = if ray_config.spatial_jitter { - self.grid_mapping - } else { - GridMapping::default() + let grid_scale = { + let limit = ray_config.group_mixer; + let r = self.frame_index as u32 ^ 0x5A; + [r % limit + 1, (r / limit) % limit + 1] }; + let groups = self.main_pipeline.get_dispatch_for(self.surface_size); let mut pc = pass.with(&self.main_pipeline); pc.bind( @@ -1217,8 +1183,7 @@ impl Renderer { spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, - grid_offset: grid_mapping.offset.into(), - grid_scale: grid_mapping.scale.into(), + grid_scale, }, acc_struct: self.acceleration_structure, prev_acc_struct: if self.frame_scene_built < self.frame_index diff --git a/examples/scene/main.rs b/examples/scene/main.rs index 632e9f95..903a30ca 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -265,7 +265,7 @@ impl Example { spatial_taps: 1, spatial_tap_history: 5, spatial_min_distance: 4, - spatial_jitter: true, + group_mixer: 10, t_start: 0.1, }, denoiser_enabled: true, diff --git a/src/lib.rs b/src/lib.rs index 4c471825..b14b8ebe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -490,7 +490,7 @@ impl Engine { spatial_taps: 1, spatial_tap_history: 5, spatial_min_distance: 4, - spatial_jitter: true, + group_mixer: 10, t_start: 0.01, }, denoiser_enabled: true, From e625b50bd8d509fa090fcd0041b2c48902d0c227 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sat, 24 Aug 2024 00:52:38 -0700 Subject: [PATCH 08/26] True separation between temporal and spatial resampling --- blade-render/code/ray-trace.wgsl | 209 ++++++++++++++++++------------- 1 file changed, 123 insertions(+), 86 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 416a1acb..d614a167 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -360,10 +360,13 @@ struct ResampleBase { struct ResampleResult { selected: bool, mis_canonical: f32, - color_and_weight: vec4, } -fn resample(dst: ptr, base: ResampleBase, other: PixelCache, max_history: u32, rng: ptr, enable_debug: bool) -> ResampleResult { +fn resample( + dst: ptr, color_and_weight: ptr>, + base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, max_history: u32, + rng: ptr, enable_debug: bool, +) -> ResampleResult { var src: LiveReservoir; let neighbor = other.reservoir; var rr = ResampleResult(); @@ -373,7 +376,7 @@ fn resample(dst: ptr, base: ResampleBase, other: PixelC let neighbor_history = min(neighbor.confidence, f32(max_history)); { // scoping this to hint the register allocation let t_canonical_at_neighbor = estimate_target_score_with_occlusion( - other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, prev_acc_struct, debug_len); + other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs, debug_len); let mis_sub_canonical = balance_heuristic( t_canonical_at_neighbor.score, canonical.selected_target_score, neighbor_history * base.accepted_count, canonical.history); @@ -405,7 +408,7 @@ fn resample(dst: ptr, base: ResampleBase, other: PixelC } if (DECOUPLED_SHADING) { - rr.color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); + *color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); } if (src.weight_sum <= 0.0) { bump_reservoir(dst, src.history); @@ -416,33 +419,51 @@ fn resample(dst: ptr, base: ResampleBase, other: PixelC return rr; } -struct RestirOutput { - radiance: vec3, +struct ResampleOutput { + reservoir: StoredReservoir, + color: vec3, } -fn compute_restir( - surface: Surface, pixel: vec2, rng: ptr, - local_index: u32, group_id: vec3, enable_debug: bool, -) -> RestirOutput { - if (debug.view_mode == DebugMode_Depth) { - textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); +fn finalize_resampling( + reservoir: ptr, color_and_weight: ptr>, + base: ResampleBase, mis_canonical: f32, rng: ptr, +) -> ResampleOutput { + var ro = ResampleOutput(); + var canonical = base.canonical; + if (PAIRWISE_MIS && canonical.history > 0.0) { + //TODO: fix the case of `mis_canonical` being too low + canonical.weight_sum *= mis_canonical / canonical.history; } - let ray_dir = get_ray_direction(camera, pixel); - let pixel_index = get_reservoir_index(pixel, camera); - if (surface.depth == 0.0) { - reservoirs[pixel_index] = StoredReservoir(); - let env = evaluate_environment(ray_dir); - return RestirOutput(env); + merge_reservoir(reservoir, canonical, random_gen(rng)); + + if (base.accepted_count > 0.0) { + let effective_history = select((*reservoir).history, BASE_CANONICAL_MIS + base.accepted_count, PAIRWISE_MIS); + ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); + } else { + ro.reservoir = pack_reservoir(canonical); } - let debug_len = select(0.0, surface.depth * 0.2, enable_debug); - let position = camera.position + surface.depth * ray_dir; - let normal = qrot(surface.basis, vec3(0.0, 0.0, 1.0)); - if (debug.view_mode == DebugMode_Normal) { - textureStore(out_debug, pixel, vec4(normal, 0.0)); + if (DECOUPLED_SHADING) { + //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? + let contribution_weight = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); + *color_and_weight += canonical.weight_sum * vec4(contribution_weight * canonical.radiance, 1.0); + ro.color = (*color_and_weight).xyz / max((*color_and_weight).w, 0.001); + } else { + ro.color = ro.reservoir.contribution_weight * (*reservoir).radiance; } + return ro; +} - // 1: build the canonical sample +fn resample_temporal( + surface: Surface, cur_pixel: vec2, position: vec3, + rng: ptr, enable_debug: bool +) -> ResampleOutput { + if (surface.depth == 0.0) { + return ResampleOutput(); + } + let debug_len = select(0.0, surface.depth * 0.2, enable_debug); + + // build the canonical sample var canonical = LiveReservoir(); for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { var ls: LightSample; @@ -462,89 +483,107 @@ fn compute_restir( } //TODO: find best match in a 2x2 grid - let prev_pixel = vec2(get_prev_pixel(pixel, position)); - var accepted_count = 0u; - var accepted_local_indices = array(); + let prev_pixel = vec2(get_prev_pixel(cur_pixel, position)); - // 2: read the temporal sample. let prev_reservoir_index = get_reservoir_index(prev_pixel, prev_camera); - if (prev_reservoir_index >= 0) { - let prev_reservoir = prev_reservoirs[prev_reservoir_index]; - let prev_surface = read_prev_surface(prev_pixel); - let prev_dir = get_ray_direction(prev_camera, prev_pixel); - let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; - pixel_cache[local_index] = PixelCache(prev_surface, prev_reservoir, prev_world_pos); - - if (parameters.temporal_tap != 0u && prev_reservoir.confidence > 0.0) { - // if the surfaces are too different, there is no trust in this sample - if (compare_surfaces(surface, prev_surface) > 0.1) { - accepted_local_indices[0] = local_index; - accepted_count = 1u; - } - } + if (parameters.temporal_tap == 0u || prev_reservoir_index < 0) { + return ResampleOutput(pack_reservoir(canonical), vec3(0.0)); } - //TODO: store the reservoir from this iteration, not the previous one - // 3: sync with the workgroup to ensure all reservoirs are available. - workgroupBarrier(); + let prev_reservoir = prev_reservoirs[prev_reservoir_index]; + let prev_surface = read_prev_surface(prev_pixel); + // if the surfaces are too different, there is no trust in this sample + if (prev_reservoir.confidence == 0.0 || compare_surfaces(surface, prev_surface) < 0.1) { + return ResampleOutput(pack_reservoir(canonical), vec3(0.0)); + } + + var reservoir = LiveReservoir(); + var color_and_weight = vec4(0.0); + let base = ResampleBase(surface, canonical, position, 1.0); + + let prev_dir = get_ray_direction(prev_camera, prev_pixel); + let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; + let other = PixelCache(prev_surface, prev_reservoir, prev_world_pos); + let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, enable_debug); + let mis_canonical = BASE_CANONICAL_MIS + rr.mis_canonical; - // 4: gather the list of neighbors (within the workgroup) to resample. - let max_accepted = min(MAX_RESAMPLE, accepted_count + parameters.spatial_taps); + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); +} + +fn resample_spatial( + surface: Surface, cur_pixel: vec2, position: vec3, + group_id: vec3, canonical_stored: StoredReservoir, + rng: ptr, enable_debug: bool +) -> ResampleOutput { + if (surface.depth == 0.0) { + let dir = normalize(position - camera.position); + var ro = ResampleOutput(); + ro.color = evaluate_environment(dir); + return ro; + } + let debug_len = select(0.0, surface.depth * 0.2, enable_debug); + + // gather the list of neighbors (within the workgroup) to resample. + var accepted_count = 0u; + var accepted_local_indices = array(); + let max_accepted = min(MAX_RESAMPLE, parameters.spatial_taps); let num_candidates = parameters.spatial_taps * 3u; - for (var candidates = num_candidates; candidates > 0u && accepted_count < max_accepted; candidates -= 1u) { + for (var i = 0u; i < num_candidates && accepted_count < max_accepted; i += 1u) { let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; - let diff = thread_index_to_coord(other_cache_index, group_id) - pixel; + let diff = thread_index_to_coord(other_cache_index, group_id) - cur_pixel; if (dot(diff, diff) < parameters.spatial_min_distance * parameters.spatial_min_distance) { continue; } let other = pixel_cache[other_cache_index]; - if (other.reservoir.confidence > 0.0) { - // if the surfaces are too different, there is no trust in this sample - if (compare_surfaces(surface, other.surface) > 0.1) { - accepted_local_indices[accepted_count] = other_cache_index; - accepted_count += 1u; - } + // if the surfaces are too different, there is no trust in this sample + if (other.reservoir.confidence > 0.0 && compare_surfaces(surface, other.surface) > 0.1) { + accepted_local_indices[accepted_count] = other_cache_index; + accepted_count += 1u; } } - // 5: evaluate the MIS of each of the samples versus the canonical one. - let base = ResampleBase(surface, canonical, position, f32(accepted_count)); + let canonical = unpack_reservoir(canonical_stored, ~0u); var reservoir = LiveReservoir(); var mis_canonical = BASE_CANONICAL_MIS; var color_and_weight = vec4(0.0); + let base = ResampleBase(surface, canonical, position, f32(accepted_count)); + + // evaluate the MIS of each of the samples versus the canonical one. for (var lid = 0u; lid < accepted_count; lid += 1u) { - let other_local_index = accepted_local_indices[lid]; - let other = pixel_cache[other_local_index]; - let max_history = select(parameters.spatial_tap_history, parameters.temporal_history, other_local_index == local_index); - let rr = resample(&reservoir, base, other, max_history, rng, enable_debug); + let other = pixel_cache[accepted_local_indices[lid]]; + let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_history, rng, enable_debug); mis_canonical += rr.mis_canonical; - if (DECOUPLED_SHADING) { - color_and_weight += rr.color_and_weight; - } } - // 6: merge in the canonical sample. - if (PAIRWISE_MIS) { - canonical.weight_sum *= mis_canonical / canonical.history; - } - if (DECOUPLED_SHADING) { - //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? - let cw = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); - color_and_weight += canonical.weight_sum * vec4(cw * canonical.radiance, 1.0); + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); +} + +fn compute_restir( + pixel: vec2, local_index: u32, group_id: vec3, + rng: ptr, enable_debug: bool, +) -> vec3 { + let surface = read_surface(pixel); + if (debug.view_mode == DebugMode_Depth) { + textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); } - merge_reservoir(&reservoir, canonical, random_gen(rng)); + let ray_dir = get_ray_direction(camera, pixel); + let pixel_index = get_reservoir_index(pixel, camera); - // 7: finish - let effective_history = select(reservoir.history, BASE_CANONICAL_MIS + f32(accepted_count), PAIRWISE_MIS); - let stored = pack_reservoir_detail(reservoir, effective_history); - reservoirs[pixel_index] = stored; - var ro = RestirOutput(); - if (DECOUPLED_SHADING) { - ro.radiance = color_and_weight.xyz / max(color_and_weight.w, 0.001); - } else { - ro.radiance = stored.contribution_weight * reservoir.radiance; + let position = camera.position + surface.depth * ray_dir; + if (debug.view_mode == DebugMode_Normal) { + let normal = qrot(surface.basis, vec3(0.0, 0.0, 1.0)); + textureStore(out_debug, pixel, vec4(normal, 0.0)); } - return ro; + + let temporal = resample_temporal(surface, pixel, position, rng, enable_debug); + pixel_cache[local_index] = PixelCache(surface, temporal.reservoir, position); + + // sync with the workgroup to ensure all reservoirs are available. + workgroupBarrier(); + + let spatial = resample_spatial(surface, pixel, position, group_id, temporal.reservoir, rng, enable_debug); + reservoirs[pixel_index] = spatial.reservoir; + return spatial.color; } @compute @workgroup_size(GROUP_SIZE.x, GROUP_SIZE.y) @@ -569,11 +608,9 @@ fn main( let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); var rng = random_init(global_index, parameters.frame_index); - let surface = read_surface(pixel_coord); let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let ro = compute_restir(surface, pixel_coord, &rng, local_index, group_id, enable_restir_debug); - let color = ro.radiance; + let color = compute_restir(pixel_coord, local_index, group_id, &rng, enable_restir_debug); if (enable_debug) { debug_buf.variance.color_sum += color; debug_buf.variance.color2_sum += color * color; From f872720f7c74143054036e10f4fdcd59768f7556 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sat, 24 Aug 2024 23:40:06 -0700 Subject: [PATCH 09/26] Refactor MIS according to GRIS paper --- blade-render/code/ray-trace.wgsl | 47 ++++++++++++-------------------- 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index d614a167..054f6fc3 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -16,10 +16,7 @@ const MAX_RESAMPLE: u32 = 4u; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; -// Base MIS for canonical samples. The constant isolates a critical difference between -// Bitterli's pseudocode (where it's 1) and NVidia's RTXDI implementation (where it's 0). -// With Bitterli's 1 we have MIS not respecting the prior history enough. -const BASE_CANONICAL_MIS: f32 = 0.05; +const DEFENSIVE_MIS: bool = false; // See "DECOUPLING SHADING AND REUSE" in // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; @@ -338,19 +335,6 @@ fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debu return brdf; } -struct HeuristicFactors { - weight: f32, - //history: f32, -} - -fn balance_heuristic(w0: f32, w1: f32, h0: f32, h1: f32) -> HeuristicFactors { - var hf: HeuristicFactors; - let balance_denom = h0 * w0 + h1 * w1; - hf.weight = select(h0 * w0 / balance_denom, 0.0, balance_denom <= 0.0); - //hf.history = select(pow(clamp(w1 / w0, 0.0, 1.0), 8.0), 1.0, w0 <= 0.0); - return hf; -} - struct ResampleBase { surface: Surface, canonical: LiveReservoir, @@ -362,6 +346,8 @@ struct ResampleResult { mis_canonical: f32, } +const canonical_count: f32 = 1.0; + fn resample( dst: ptr, color_and_weight: ptr>, base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, max_history: u32, @@ -377,10 +363,10 @@ fn resample( { // scoping this to hint the register allocation let t_canonical_at_neighbor = estimate_target_score_with_occlusion( other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs, debug_len); - let mis_sub_canonical = balance_heuristic( - t_canonical_at_neighbor.score, canonical.selected_target_score, - neighbor_history * base.accepted_count, canonical.history); - rr.mis_canonical = 1.0 - mis_sub_canonical.weight; + let nom = canonical.selected_target_score * canonical.history; + let denom = canonical_count * nom + t_canonical_at_neighbor.score * neighbor_history * base.accepted_count; + let kf = 1.0 / select(base.accepted_count, canonical_count + base.accepted_count, DEFENSIVE_MIS); + rr.mis_canonical = kf * nom / max(0.01, denom); } // Notes about t_neighbor_at_neighbor: @@ -390,17 +376,16 @@ fn resample( //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); let t_neighbor_at_canonical = estimate_target_score_with_occlusion( base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); - let mis_neighbor = balance_heuristic( - neighbor.target_score, t_neighbor_at_canonical.score, - neighbor_history * base.accepted_count, canonical.history); + let nom = t_neighbor_at_canonical.score * canonical.history; + let denom = canonical_count * neighbor.target_score * neighbor_history + base.accepted_count * nom; + let kf = select(1.0, base.accepted_count / (canonical_count + base.accepted_count), DEFENSIVE_MIS); + let mis_neighbor = kf * nom / max(0.01, denom); src.history = neighbor_history; src.selected_light_index = neighbor.light_index; src.selected_uv = neighbor.light_uv; src.selected_target_score = t_neighbor_at_canonical.score; - src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor.weight; - //Note: should be needed according to the paper - // src.history *= min(mis_neighbor.history, mis_sub_canonical.history); + src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor; src.radiance = t_neighbor_at_canonical.color; } else { src = unpack_reservoir(neighbor, max_history); @@ -437,7 +422,7 @@ fn finalize_resampling( merge_reservoir(reservoir, canonical, random_gen(rng)); if (base.accepted_count > 0.0) { - let effective_history = select((*reservoir).history, BASE_CANONICAL_MIS + base.accepted_count, PAIRWISE_MIS); + let effective_history = select((*reservoir).history, base.accepted_count, PAIRWISE_MIS); ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); } else { ro.reservoir = pack_reservoir(canonical); @@ -505,7 +490,8 @@ fn resample_temporal( let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; let other = PixelCache(prev_surface, prev_reservoir, prev_world_pos); let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, enable_debug); - let mis_canonical = BASE_CANONICAL_MIS + rr.mis_canonical; + let total_samples = 2.0; + let mis_canonical = select(0.0, 1.0 / total_samples, DEFENSIVE_MIS) + rr.mis_canonical; return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); } @@ -544,7 +530,8 @@ fn resample_spatial( let canonical = unpack_reservoir(canonical_stored, ~0u); var reservoir = LiveReservoir(); - var mis_canonical = BASE_CANONICAL_MIS; + let total_samples = 1.0 + f32(accepted_count); + var mis_canonical = select(0.0, 1.0 / total_samples, DEFENSIVE_MIS); var color_and_weight = vec4(0.0); let base = ResampleBase(surface, canonical, position, f32(accepted_count)); From 5573b8eaa22891d44556374d0285fe7f64b45c45 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sun, 25 Aug 2024 23:01:31 -0700 Subject: [PATCH 10/26] Fix groups, canonical sampling, refactor MIS --- blade-graphics/src/util.rs | 17 +-- blade-render/code/ray-trace.wgsl | 178 +++++++++++++++++++------------ blade-render/src/render/mod.rs | 24 ++++- 3 files changed, 142 insertions(+), 77 deletions(-) diff --git a/blade-graphics/src/util.rs b/blade-graphics/src/util.rs index 23e876e0..f0d2013e 100644 --- a/blade-graphics/src/util.rs +++ b/blade-graphics/src/util.rs @@ -94,14 +94,19 @@ impl super::TextureFormat { } } +impl super::Extent { + pub fn group_by(&self, size: [u32; 3]) -> [u32; 3] { + [ + (self.width + size[0] - 1) / size[0], + (self.height + size[1] - 1) / size[1], + (self.depth + size[2] - 1) / size[2], + ] + } +} + impl super::ComputePipeline { /// Return the dispatch group counts sufficient to cover the given extent. pub fn get_dispatch_for(&self, extent: super::Extent) -> [u32; 3] { - let wg_size = self.get_workgroup_size(); - [ - (extent.width + wg_size[0] - 1) / wg_size[0], - (extent.height + wg_size[1] - 1) / wg_size[1], - (extent.depth + wg_size[2] - 1) / wg_size[2], - ] + extent.group_by(self.get_workgroup_size()) } } diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 054f6fc3..90cfdfef 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -16,7 +16,6 @@ const MAX_RESAMPLE: u32 = 4u; // See "9.1 pairwise mis for robust reservoir reuse" // "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" const PAIRWISE_MIS: bool = true; -const DEFENSIVE_MIS: bool = false; // See "DECOUPLING SHADING AND REUSE" in // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; @@ -121,12 +120,12 @@ fn merge_reservoir(r: ptr, other: LiveReservoir, random return false; } } -fn unpack_reservoir(f: StoredReservoir, max_history: u32) -> LiveReservoir { +fn unpack_reservoir(f: StoredReservoir, max_history: u32, radiance: vec3) -> LiveReservoir { var r: LiveReservoir; r.selected_light_index = f.light_index; r.selected_uv = f.light_uv; r.selected_target_score = f.target_score; - r.radiance = vec3(0.0); // to be continued... + r.radiance = radiance; let history = min(f.confidence, f32(max_history)); r.weight_sum = f.contribution_weight * f.target_score * history; r.history = history; @@ -305,11 +304,11 @@ fn estimate_target_score_with_occlusion( if (check_ray_occluded(acs, position, direction, debug_len)) { return TargetScore(); - } else { - //Note: same as `evaluate_reflected_light` - let radiance = textureSampleLevel(env_map, sampler_nearest, light_uv, 0.0).xyz; - return make_target_score(brdf * radiance); } + + //Note: same as `evaluate_reflected_light` + let radiance = textureSampleLevel(env_map, sampler_nearest, light_uv, 0.0).xyz; + return make_target_score(brdf * radiance); } fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debug_len: f32) -> f32 { @@ -335,6 +334,30 @@ fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debu return brdf; } +fn produce_canonical( + surface: Surface, position: vec3, + rng: ptr, debug_len: f32, +) -> LiveReservoir { + var reservoir = LiveReservoir(); + for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { + var ls: LightSample; + if (parameters.environment_importance_sampling != 0u) { + ls = sample_light_from_environment(rng); + } else { + ls = sample_light_from_sphere(rng); + } + + let brdf = evaluate_sample(ls, surface, position, debug_len); + if (brdf > 0.0) { + let other = make_reservoir(ls, 0u, vec3(brdf)); + merge_reservoir(&reservoir, other, random_gen(rng)); + } else { + bump_reservoir(&reservoir, 1.0); + } + } + return reservoir; +} + struct ResampleBase { surface: Surface, canonical: LiveReservoir, @@ -344,29 +367,27 @@ struct ResampleBase { struct ResampleResult { selected: bool, mis_canonical: f32, + mis_sample: f32, } -const canonical_count: f32 = 1.0; - +// Resample following Algorithm 8 in section 9.1 of Bitterli thesis fn resample( dst: ptr, color_and_weight: ptr>, base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, max_history: u32, - rng: ptr, enable_debug: bool, + rng: ptr, debug_len: f32, ) -> ResampleResult { var src: LiveReservoir; let neighbor = other.reservoir; var rr = ResampleResult(); if (PAIRWISE_MIS) { - let debug_len = select(0.0, other.surface.depth * 0.2, enable_debug); let canonical = base.canonical; let neighbor_history = min(neighbor.confidence, f32(max_history)); { // scoping this to hint the register allocation let t_canonical_at_neighbor = estimate_target_score_with_occlusion( other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs, debug_len); - let nom = canonical.selected_target_score * canonical.history; - let denom = canonical_count * nom + t_canonical_at_neighbor.score * neighbor_history * base.accepted_count; - let kf = 1.0 / select(base.accepted_count, canonical_count + base.accepted_count, DEFENSIVE_MIS); - rr.mis_canonical = kf * nom / max(0.01, denom); + let nom = canonical.selected_target_score * canonical.history / base.accepted_count; + let denom = t_canonical_at_neighbor.score * neighbor_history + nom; + rr.mis_canonical = select(0.0, nom / denom, denom > 0.0); } // Notes about t_neighbor_at_neighbor: @@ -376,10 +397,10 @@ fn resample( //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); let t_neighbor_at_canonical = estimate_target_score_with_occlusion( base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); - let nom = t_neighbor_at_canonical.score * canonical.history; - let denom = canonical_count * neighbor.target_score * neighbor_history + base.accepted_count * nom; - let kf = select(1.0, base.accepted_count / (canonical_count + base.accepted_count), DEFENSIVE_MIS); - let mis_neighbor = kf * nom / max(0.01, denom); + let nom = neighbor.target_score * neighbor_history; + let denom = nom + t_neighbor_at_canonical.score * canonical.history / base.accepted_count; + let mis_neighbor = select(0.0, nom / denom, denom > 0.0); + rr.mis_sample = mis_neighbor; src.history = neighbor_history; src.selected_light_index = neighbor.light_index; @@ -388,11 +409,14 @@ fn resample( src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor; src.radiance = t_neighbor_at_canonical.color; } else { - src = unpack_reservoir(neighbor, max_history); - src.radiance = evaluate_reflected_light(base.surface, src.selected_light_index, src.selected_uv); + rr.mis_canonical = 0.0; + rr.mis_sample = 1.0; + let radiance = evaluate_reflected_light(base.surface, neighbor.light_index, neighbor.light_uv); + src = unpack_reservoir(neighbor, max_history, radiance); } if (DECOUPLED_SHADING) { + //TODO: use `mis_neighbor`O *color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); } if (src.weight_sum <= 0.0) { @@ -409,29 +433,39 @@ struct ResampleOutput { color: vec3, } +fn revive_canonical(ro: ResampleOutput) -> LiveReservoir { + let radiance = select(vec3(0.0), ro.color / ro.reservoir.contribution_weight, ro.reservoir.contribution_weight > 0.0); + return unpack_reservoir(ro.reservoir, ~0u, radiance); +} + +fn finalize_canonical(reservoir: LiveReservoir) -> ResampleOutput { + var ro = ResampleOutput(); + ro.reservoir = pack_reservoir(reservoir); + ro.color = ro.reservoir.contribution_weight * reservoir.radiance; + return ro; +} + fn finalize_resampling( reservoir: ptr, color_and_weight: ptr>, base: ResampleBase, mis_canonical: f32, rng: ptr, ) -> ResampleOutput { var ro = ResampleOutput(); var canonical = base.canonical; - if (PAIRWISE_MIS && canonical.history > 0.0) { - //TODO: fix the case of `mis_canonical` being too low - canonical.weight_sum *= mis_canonical / canonical.history; - } + canonical.weight_sum *= mis_canonical / canonical.history; merge_reservoir(reservoir, canonical, random_gen(rng)); if (base.accepted_count > 0.0) { - let effective_history = select((*reservoir).history, base.accepted_count, PAIRWISE_MIS); + let effective_history = select((*reservoir).history, 1.0 + base.accepted_count, PAIRWISE_MIS); ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); } else { ro.reservoir = pack_reservoir(canonical); } if (DECOUPLED_SHADING) { - //FIXME: issue with near zero denominator. Do we need do use BASE_CANONICAL_MIS? - let contribution_weight = canonical.weight_sum / max(canonical.selected_target_score * mis_canonical, 0.1); - *color_and_weight += canonical.weight_sum * vec4(contribution_weight * canonical.radiance, 1.0); + if (canonical.selected_target_score > 0.0) { + let contribution_weight = canonical.weight_sum / canonical.selected_target_score; + *color_and_weight += canonical.weight_sum * vec4(contribution_weight * canonical.radiance, 1.0); + } ro.color = (*color_and_weight).xyz / max((*color_and_weight).w, 0.001); } else { ro.color = ro.reservoir.contribution_weight * (*reservoir).radiance; @@ -441,45 +475,29 @@ fn finalize_resampling( fn resample_temporal( surface: Surface, cur_pixel: vec2, position: vec3, - rng: ptr, enable_debug: bool + rng: ptr, debug_len: f32, ) -> ResampleOutput { + if (debug.view_mode == DebugMode_TemporalMatch || debug.view_mode == DebugMode_TemporalMisCanonical || debug.view_mode == DebugMode_TemporalMisError) { + textureStore(out_debug, cur_pixel, vec4(0.0)); + } if (surface.depth == 0.0) { return ResampleOutput(); } - let debug_len = select(0.0, surface.depth * 0.2, enable_debug); - - // build the canonical sample - var canonical = LiveReservoir(); - for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { - var ls: LightSample; - if (parameters.environment_importance_sampling != 0u) { - ls = sample_light_from_environment(rng); - } else { - ls = sample_light_from_sphere(rng); - } - - let brdf = evaluate_sample(ls, surface, position, debug_len); - if (brdf > 0.0) { - let other = make_reservoir(ls, 0u, vec3(brdf)); - merge_reservoir(&canonical, other, random_gen(rng)); - } else { - bump_reservoir(&canonical, 1.0); - } - } + let canonical = produce_canonical(surface, position, rng, debug_len); //TODO: find best match in a 2x2 grid let prev_pixel = vec2(get_prev_pixel(cur_pixel, position)); let prev_reservoir_index = get_reservoir_index(prev_pixel, prev_camera); if (parameters.temporal_tap == 0u || prev_reservoir_index < 0) { - return ResampleOutput(pack_reservoir(canonical), vec3(0.0)); + return finalize_canonical(canonical); } let prev_reservoir = prev_reservoirs[prev_reservoir_index]; let prev_surface = read_prev_surface(prev_pixel); // if the surfaces are too different, there is no trust in this sample if (prev_reservoir.confidence == 0.0 || compare_surfaces(surface, prev_surface) < 0.1) { - return ResampleOutput(pack_reservoir(canonical), vec3(0.0)); + return finalize_canonical(canonical); } var reservoir = LiveReservoir(); @@ -489,25 +507,36 @@ fn resample_temporal( let prev_dir = get_ray_direction(prev_camera, prev_pixel); let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; let other = PixelCache(prev_surface, prev_reservoir, prev_world_pos); - let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, enable_debug); - let total_samples = 2.0; - let mis_canonical = select(0.0, 1.0 / total_samples, DEFENSIVE_MIS) + rr.mis_canonical; + let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, debug_len); + let mis_canonical = 1.0 + rr.mis_canonical; + if (debug.view_mode == DebugMode_TemporalMatch) { + textureStore(out_debug, cur_pixel, vec4(1.0)); + } + if (debug.view_mode == DebugMode_TemporalMisCanonical) { + textureStore(out_debug, cur_pixel, vec4(mis_canonical / (1.0 + base.accepted_count))); + } + if (debug.view_mode == DebugMode_TemporalMisError) { + let total = mis_canonical + rr.mis_sample; + textureStore(out_debug, cur_pixel, vec4(abs(total - 1.0 - base.accepted_count))); + } return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); } fn resample_spatial( surface: Surface, cur_pixel: vec2, position: vec3, - group_id: vec3, canonical_stored: StoredReservoir, - rng: ptr, enable_debug: bool + group_id: vec3, canonical: LiveReservoir, + rng: ptr, debug_len: f32, ) -> ResampleOutput { if (surface.depth == 0.0) { + if (debug.view_mode == DebugMode_SpatialMatch || debug.view_mode == DebugMode_SpatialMisCanonical || debug.view_mode == DebugMode_SpatialMisError) { + textureStore(out_debug, cur_pixel, vec4(0.0)); + } let dir = normalize(position - camera.position); var ro = ResampleOutput(); ro.color = evaluate_environment(dir); return ro; } - let debug_len = select(0.0, surface.depth * 0.2, enable_debug); // gather the list of neighbors (within the workgroup) to resample. var accepted_count = 0u; @@ -528,20 +557,31 @@ fn resample_spatial( } } - let canonical = unpack_reservoir(canonical_stored, ~0u); var reservoir = LiveReservoir(); - let total_samples = 1.0 + f32(accepted_count); - var mis_canonical = select(0.0, 1.0 / total_samples, DEFENSIVE_MIS); var color_and_weight = vec4(0.0); let base = ResampleBase(surface, canonical, position, f32(accepted_count)); + var mis_canonical = 1.0; + var mis_sample_sum = 0.0; // evaluate the MIS of each of the samples versus the canonical one. for (var lid = 0u; lid < accepted_count; lid += 1u) { let other = pixel_cache[accepted_local_indices[lid]]; - let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_history, rng, enable_debug); + let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_history, rng, debug_len); mis_canonical += rr.mis_canonical; + mis_sample_sum += rr.mis_sample; } + if (debug.view_mode == DebugMode_SpatialMatch) { + let value = f32(accepted_count) / max(1.0, f32(parameters.spatial_taps)); + textureStore(out_debug, cur_pixel, vec4(value)); + } + if (debug.view_mode == DebugMode_SpatialMisCanonical) { + textureStore(out_debug, cur_pixel, vec4(mis_canonical / (1.0 + base.accepted_count))); + } + if (debug.view_mode == DebugMode_SpatialMisError) { + let total = mis_canonical + mis_sample_sum; + textureStore(out_debug, cur_pixel, vec4(abs(total - 1.0 - base.accepted_count))); + } return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); } @@ -553,22 +593,23 @@ fn compute_restir( if (debug.view_mode == DebugMode_Depth) { textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); } - let ray_dir = get_ray_direction(camera, pixel); - let pixel_index = get_reservoir_index(pixel, camera); - - let position = camera.position + surface.depth * ray_dir; if (debug.view_mode == DebugMode_Normal) { let normal = qrot(surface.basis, vec3(0.0, 0.0, 1.0)); textureStore(out_debug, pixel, vec4(normal, 0.0)); } + let debug_len = select(0.0, surface.depth * 0.2, enable_debug); + let ray_dir = get_ray_direction(camera, pixel); + let pixel_index = get_reservoir_index(pixel, camera); + let position = camera.position + surface.depth * ray_dir; - let temporal = resample_temporal(surface, pixel, position, rng, enable_debug); + let temporal = resample_temporal(surface, pixel, position, rng, debug_len); pixel_cache[local_index] = PixelCache(surface, temporal.reservoir, position); // sync with the workgroup to ensure all reservoirs are available. workgroupBarrier(); - let spatial = resample_spatial(surface, pixel, position, group_id, temporal.reservoir, rng, enable_debug); + let temporal_live = revive_canonical(temporal); + let spatial = resample_spatial(surface, pixel, position, group_id, temporal_live, rng, debug_len); reservoirs[pixel_index] = spatial.reservoir; return spatial.color; } @@ -598,6 +639,7 @@ fn main( let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; let color = compute_restir(pixel_coord, local_index, group_id, &rng, enable_restir_debug); + if (enable_debug) { debug_buf.variance.color_sum += color; debug_buf.variance.color2_sum += color * color; diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 1eec948f..14d8ad93 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -51,7 +51,13 @@ pub enum DebugMode { Normal = 2, Motion = 3, HitConsistency = 4, - Variance = 5, + TemporalMatch = 5, + TemporalMisCanonical = 6, + TemporalMisError = 7, + SpatialMatch = 8, + SpatialMisCanonical = 9, + SpatialMisError = 10, + Variance = 100, } impl Default for DebugMode { @@ -1162,8 +1168,20 @@ impl Renderer { let r = self.frame_index as u32 ^ 0x5A; [r % limit + 1, (r / limit) % limit + 1] }; - - let groups = self.main_pipeline.get_dispatch_for(self.surface_size); + let groups = { + let wg_size = self.main_pipeline.get_workgroup_size(); + let cluster_size = [ + wg_size[0] * grid_scale[0], + wg_size[1] * grid_scale[1], + wg_size[2], + ]; + let clusters = self.surface_size.group_by(cluster_size); + [ + clusters[0] * grid_scale[0], + clusters[1] * grid_scale[1], + clusters[2], + ] + }; let mut pc = pass.with(&self.main_pipeline); pc.bind( 0, From 7e37196c80f61462fecca587bb2030e1606729b5 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 2 Sep 2024 13:57:38 -0700 Subject: [PATCH 11/26] Remove extra reservoirs buffer --- blade-render/code/ray-trace.wgsl | 3 +-- blade-render/src/render/mod.rs | 35 ++++++++++++-------------------- 2 files changed, 14 insertions(+), 24 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 90cfdfef..b9d40617 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -57,7 +57,6 @@ struct StoredReservoir { confidence: f32, } var reservoirs: array; -var prev_reservoirs: array; struct PixelCache { surface: Surface, @@ -493,7 +492,7 @@ fn resample_temporal( return finalize_canonical(canonical); } - let prev_reservoir = prev_reservoirs[prev_reservoir_index]; + let prev_reservoir = reservoirs[prev_reservoir_index]; let prev_surface = read_prev_surface(prev_pixel); // if the surfaces are too different, there is no trust in this sample if (prev_reservoir.confidence == 0.0 || compare_surfaces(surface, prev_surface) < 0.1) { diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 14d8ad93..74e3dd9c 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -209,7 +209,7 @@ impl RenderTarget { } struct RestirTargets { - reservoir_buf: [blade_graphics::Buffer; 2], + reservoir_buf: blade_graphics::Buffer, debug: RenderTarget<1>, depth: RenderTarget<2>, basis: RenderTarget<2>, @@ -228,14 +228,11 @@ impl RestirTargets { gpu: &blade_graphics::Context, ) -> Self { let total_reservoirs = size.width as usize * size.height as usize; - let mut reservoir_buf = [blade_graphics::Buffer::default(); 2]; - for (i, rb) in reservoir_buf.iter_mut().enumerate() { - *rb = gpu.create_buffer(blade_graphics::BufferDesc { - name: &format!("reservoirs{i}"), - size: reservoir_size as u64 * total_reservoirs as u64, - memory: blade_graphics::Memory::Device, - }); - } + let reservoir_buf = gpu.create_buffer(blade_graphics::BufferDesc { + name: "reservoirs", + size: reservoir_size as u64 * total_reservoirs as u64, + memory: blade_graphics::Memory::Device, + }); Self { reservoir_buf, @@ -287,9 +284,7 @@ impl RestirTargets { } fn destroy(&self, gpu: &blade_graphics::Context) { - for rb in self.reservoir_buf.iter() { - gpu.destroy_buffer(*rb); - } + gpu.destroy_buffer(self.reservoir_buf); self.debug.destroy(gpu); self.depth.destroy(gpu); self.basis.destroy(gpu); @@ -420,7 +415,6 @@ struct MainData { t_motion: blade_graphics::TextureView, debug_buf: blade_graphics::BufferPiece, reservoirs: blade_graphics::BufferPiece, - prev_reservoirs: blade_graphics::BufferPiece, out_diffuse: blade_graphics::TextureView, out_debug: blade_graphics::TextureView, } @@ -1106,13 +1100,11 @@ impl Renderer { self.debug.reset_lines(&mut transfer); } let total_reservoirs = self.surface_size.width as u64 * self.surface_size.height as u64; - for reservoir_buf in self.targets.reservoir_buf.iter() { - transfer.fill_buffer( - reservoir_buf.at(0), - total_reservoirs * self.reservoir_size as u64, - 0, - ); - } + transfer.fill_buffer( + self.targets.reservoir_buf.at(0), + total_reservoirs * self.reservoir_size as u64, + 0, + ); } if !config.frozen { @@ -1224,8 +1216,7 @@ impl Renderer { t_prev_flat_normal: self.targets.flat_normal.views[prev], t_motion: self.targets.motion.views[0], debug_buf: self.debug.buffer_resource(), - reservoirs: self.targets.reservoir_buf[cur].into(), - prev_reservoirs: self.targets.reservoir_buf[prev].into(), + reservoirs: self.targets.reservoir_buf.into(), out_diffuse: self.targets.light_diffuse.views[cur], out_debug: self.targets.debug.views[0], }, From 53b94118f45f4f8f0a8d7b0fe2691376eea39fa1 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 2 Sep 2024 21:54:28 -0700 Subject: [PATCH 12/26] Merge G-Buffer pass into the Main --- blade-render/code/blur.wgsl | 2 +- blade-render/code/fill-gbuf.wgsl | 204 ---------------------------- blade-render/code/gbuf.inc.wgsl | 2 - blade-render/code/geometry.inc.wgsl | 182 +++++++++++++++++++++++++ blade-render/code/motion.inc.wgsl | 2 + blade-render/code/ray-trace.wgsl | 81 +++++------ blade-render/src/render/mod.rs | 110 ++++----------- 7 files changed, 245 insertions(+), 338 deletions(-) delete mode 100644 blade-render/code/fill-gbuf.wgsl delete mode 100644 blade-render/code/gbuf.inc.wgsl create mode 100644 blade-render/code/geometry.inc.wgsl create mode 100644 blade-render/code/motion.inc.wgsl diff --git a/blade-render/code/blur.wgsl b/blade-render/code/blur.wgsl index 3207ef60..3aec665b 100644 --- a/blade-render/code/blur.wgsl +++ b/blade-render/code/blur.wgsl @@ -1,5 +1,5 @@ #include "camera.inc.wgsl" -#include "gbuf.inc.wgsl" +#include "motion.inc.wgsl" #include "quaternion.inc.wgsl" #include "surface.inc.wgsl" diff --git a/blade-render/code/fill-gbuf.wgsl b/blade-render/code/fill-gbuf.wgsl deleted file mode 100644 index 346edf51..00000000 --- a/blade-render/code/fill-gbuf.wgsl +++ /dev/null @@ -1,204 +0,0 @@ -#include "quaternion.inc.wgsl" -#include "camera.inc.wgsl" -#include "debug.inc.wgsl" -#include "debug-param.inc.wgsl" -#include "gbuf.inc.wgsl" - -//TODO: use proper WGSL -const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; - -// Has to match the host! -struct Vertex { - pos: vec3, - bitangent_sign: f32, - tex_coords: vec2, - normal: u32, - tangent: u32, -} -struct VertexBuffer { - data: array, -} -struct IndexBuffer { - data: array, -} -var vertex_buffers: binding_array; -var index_buffers: binding_array; -var textures: binding_array>; -var sampler_linear: sampler; -var sampler_nearest: sampler; - -struct HitEntry { - index_buf: u32, - vertex_buf: u32, - winding: f32, - // packed quaternion - geometry_to_world_rotation: u32, - geometry_to_object: mat4x3, - prev_object_to_world: mat4x3, - base_color_texture: u32, - // packed color factor - base_color_factor: u32, - normal_texture: u32, -} -var hit_entries: array; - -var camera: CameraParams; -var prev_camera: CameraParams; -var debug: DebugParams; -var acc_struct: acceleration_structure; - -var out_depth: texture_storage_2d; -var out_flat_normal: texture_storage_2d; -var out_basis: texture_storage_2d; -var out_albedo: texture_storage_2d; -var out_motion: texture_storage_2d; -var out_debug: texture_storage_2d; - -fn decode_normal(raw: u32) -> vec3 { - return unpack4x8snorm(raw).xyz; -} - -fn debug_raw_normal(pos: vec3, normal_raw: u32, rotation: vec4, debug_len: f32, color: u32) { - let nw = normalize(qrot(rotation, decode_normal(normal_raw))); - debug_line(pos, pos + debug_len * nw, color); -} - -@compute @workgroup_size(8, 4) -fn main(@builtin(global_invocation_id) global_id: vec3) { - if (any(global_id.xy >= camera.target_size)) { - return; - } - - var rq: ray_query; - let ray_dir = get_ray_direction(camera, vec2(global_id.xy)); - rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_CULL_NO_OPAQUE, 0xFFu, 0.0, camera.depth, camera.position, ray_dir)); - rayQueryProceed(&rq); - let intersection = rayQueryGetCommittedIntersection(&rq); - - var depth = 0.0; - var basis = vec4(0.0); - var flat_normal = vec3(0.0); - var albedo = vec3(1.0); - var motion = vec2(0.0); - let enable_debug = all(global_id.xy == debug.mouse_pos); - - if (intersection.kind != RAY_QUERY_INTERSECTION_NONE) { - let entry = hit_entries[intersection.instance_custom_index + intersection.geometry_index]; - depth = intersection.t; - - var indices = intersection.primitive_index * 3u + vec3(0u, 1u, 2u); - if (entry.index_buf != ~0u) { - let iptr = &index_buffers[entry.index_buf].data; - indices = vec3((*iptr)[indices.x], (*iptr)[indices.y], (*iptr)[indices.z]); - } - - let vptr = &vertex_buffers[entry.vertex_buf].data; - let vertices = array( - (*vptr)[indices.x], - (*vptr)[indices.y], - (*vptr)[indices.z], - ); - - let positions_object = entry.geometry_to_object * mat3x4( - vec4(vertices[0].pos, 1.0), vec4(vertices[1].pos, 1.0), vec4(vertices[2].pos, 1.0) - ); - let positions = intersection.object_to_world * mat3x4( - vec4(positions_object[0], 1.0), vec4(positions_object[1], 1.0), vec4(positions_object[2], 1.0) - ); - flat_normal = entry.winding * normalize(cross(positions[1].xyz - positions[0].xyz, positions[2].xyz - positions[0].xyz)); - - let barycentrics = vec3(1.0 - intersection.barycentrics.x - intersection.barycentrics.y, intersection.barycentrics); - let position_object = vec4(positions_object * barycentrics, 1.0); - let tex_coords = mat3x2(vertices[0].tex_coords, vertices[1].tex_coords, vertices[2].tex_coords) * barycentrics; - let normal_geo = normalize(mat3x3(decode_normal(vertices[0].normal), decode_normal(vertices[1].normal), decode_normal(vertices[2].normal)) * barycentrics); - let tangent_geo = normalize(mat3x3(decode_normal(vertices[0].tangent), decode_normal(vertices[1].tangent), decode_normal(vertices[2].tangent)) * barycentrics); - let bitangent_geo = normalize(cross(normal_geo, tangent_geo)) * vertices[0].bitangent_sign; - - let lod = 0.0; //TODO: this is actually complicated - - let geo_to_world_rot = normalize(unpack4x8snorm(entry.geometry_to_world_rotation)); - let tangent_space_geo = mat3x3(tangent_geo, bitangent_geo, normal_geo); - var normal_local: vec3; - if ((debug.texture_flags & DebugTextureFlags_NORMAL) != 0u) { - normal_local = vec3(0.0, 0.0, 1.0); // ignore normal map - } else { - let n_xy = textureSampleLevel(textures[entry.normal_texture], sampler_linear, tex_coords, lod).xy; - normal_local = vec3(n_xy, sqrt(max(0.0, 1.0 - dot(n_xy.xy, n_xy.xy)))); - } - var normal = qrot(geo_to_world_rot, tangent_space_geo * normal_local); - basis = shortest_arc_quat(vec3(0.0, 0.0, 1.0), normalize(normal)); - - let hit_position = camera.position + intersection.t * ray_dir; - if (enable_debug) { - debug_buf.entry.custom_index = intersection.instance_custom_index; - debug_buf.entry.depth = intersection.t; - debug_buf.entry.tex_coords = tex_coords; - debug_buf.entry.base_color_texture = entry.base_color_texture; - debug_buf.entry.normal_texture = entry.normal_texture; - debug_buf.entry.position = hit_position; - debug_buf.entry.flat_normal = flat_normal; - } - if (enable_debug && (debug.draw_flags & DebugDrawFlags_SPACE) != 0u) { - let normal_len = 0.15 * intersection.t; - let side = 0.05 * intersection.t; - debug_line(hit_position, hit_position + normal_len * qrot(geo_to_world_rot, normal_geo), 0xFFFFFFu); - debug_line(hit_position - side * tangent_geo, hit_position + side * tangent_geo, 0x808080u); - debug_line(hit_position - side * bitangent_geo, hit_position + side * bitangent_geo, 0x808080u); - } - if (enable_debug && (debug.draw_flags & DebugDrawFlags_GEOMETRY) != 0u) { - let debug_len = intersection.t * 0.2; - debug_line(positions[0].xyz, positions[1].xyz, 0x00FFFFu); - debug_line(positions[1].xyz, positions[2].xyz, 0x00FFFFu); - debug_line(positions[2].xyz, positions[0].xyz, 0x00FFFFu); - let poly_center = (positions[0].xyz + positions[1].xyz + positions[2].xyz) / 3.0; - debug_line(poly_center, poly_center + 0.2 * debug_len * flat_normal, 0xFF00FFu); - // note: dynamic indexing into positions isn't allowed by WGSL yet - debug_raw_normal(positions[0].xyz, vertices[0].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - debug_raw_normal(positions[1].xyz, vertices[1].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - debug_raw_normal(positions[2].xyz, vertices[2].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); - // draw tangent space - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(1.0, 0.0, 0.0)), 0x0000FFu); - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 1.0, 0.0)), 0x00FF00u); - debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 0.0, 1.0)), 0xFF0000u); - } - - let base_color_factor = unpack4x8unorm(entry.base_color_factor); - if ((debug.texture_flags & DebugTextureFlags_ALBEDO) != 0u) { - albedo = base_color_factor.xyz; - } else { - let base_color_sample = textureSampleLevel(textures[entry.base_color_texture], sampler_linear, tex_coords, lod); - albedo = (base_color_factor * base_color_sample).xyz; - } - - if (debug.view_mode == DebugMode_HitConsistency) { - let reprojected = get_projected_pixel(camera, hit_position); - let barycentrics_pos_diff = (intersection.object_to_world * position_object).xyz - hit_position; - let camera_projection_diff = vec2(global_id.xy) - vec2(reprojected); - let consistency = vec4(length(barycentrics_pos_diff), length(camera_projection_diff), 0.0, 0.0); - textureStore(out_debug, global_id.xy, consistency); - } - - let prev_position = (entry.prev_object_to_world * position_object).xyz; - let prev_screen = get_projected_pixel_float(prev_camera, prev_position); - //TODO: consider just storing integers here? - //TODO: technically this "0.5" is just a waste compute on both packing and unpacking - motion = prev_screen - vec2(global_id.xy) - 0.5; - if (debug.view_mode == DebugMode_Motion) { - textureStore(out_debug, global_id.xy, vec4(motion * MOTION_SCALE + vec2(0.5), 0.0, 1.0)); - } - } else { - if (enable_debug) { - debug_buf.entry = DebugEntry(); - } - if (debug.view_mode != DebugMode_Final) { - textureStore(out_debug, global_id.xy, vec4(0.0)); - } - } - - // TODO: option to avoid writing data for the sky - textureStore(out_depth, global_id.xy, vec4(depth, 0.0, 0.0, 0.0)); - textureStore(out_basis, global_id.xy, basis); - textureStore(out_flat_normal, global_id.xy, vec4(flat_normal, 0.0)); - textureStore(out_albedo, global_id.xy, vec4(albedo, 0.0)); - textureStore(out_motion, global_id.xy, vec4(motion * MOTION_SCALE, 0.0, 0.0)); -} diff --git a/blade-render/code/gbuf.inc.wgsl b/blade-render/code/gbuf.inc.wgsl deleted file mode 100644 index ecb4642d..00000000 --- a/blade-render/code/gbuf.inc.wgsl +++ /dev/null @@ -1,2 +0,0 @@ -const MOTION_SCALE: f32 = 0.02; -const USE_MOTION_VECTORS: bool = true; \ No newline at end of file diff --git a/blade-render/code/geometry.inc.wgsl b/blade-render/code/geometry.inc.wgsl new file mode 100644 index 00000000..99b771b1 --- /dev/null +++ b/blade-render/code/geometry.inc.wgsl @@ -0,0 +1,182 @@ +//TODO: https://github.com/gfx-rs/wgpu/pull/5429 +const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; + +// Has to match the host! +struct Vertex { + pos: vec3, + bitangent_sign: f32, + tex_coords: vec2, + normal: u32, + tangent: u32, +} +struct VertexBuffer { + data: array, +} +struct IndexBuffer { + data: array, +} +var vertex_buffers: binding_array; +var index_buffers: binding_array; +var textures: binding_array>; + +struct HitEntry { + index_buf: u32, + vertex_buf: u32, + winding: f32, + // packed quaternion + geometry_to_world_rotation: u32, + geometry_to_object: mat4x3, + prev_object_to_world: mat4x3, + base_color_texture: u32, + // packed color factor + base_color_factor: u32, + normal_texture: u32, +} +var hit_entries: array; + +fn decode_normal(raw: u32) -> vec3 { + return unpack4x8snorm(raw).xyz; +} + +fn debug_raw_normal(pos: vec3, normal_raw: u32, rotation: vec4, debug_len: f32, color: u32) { + let nw = normalize(qrot(rotation, decode_normal(normal_raw))); + debug_line(pos, pos + debug_len * nw, color); +} + +struct RichSurface { + inner: Surface, + position: vec3, + albedo: vec3, + motion: vec2, +} + +fn fetch_geometry(pixel_coord: vec2, enable_debug: bool, is_primary: bool) -> RichSurface { + var rq: ray_query; + let ray_dir = get_ray_direction(camera, pixel_coord); + rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_CULL_NO_OPAQUE, 0xFFu, 0.0, camera.depth, camera.position, ray_dir)); + rayQueryProceed(&rq); + let intersection = rayQueryGetCommittedIntersection(&rq); + + var rs = RichSurface(); + rs.albedo = vec3(1.0); + + if (intersection.kind == RAY_QUERY_INTERSECTION_NONE) { + if (enable_debug) { + debug_buf.entry = DebugEntry(); + } + return rs; + } + + let entry = hit_entries[intersection.instance_custom_index + intersection.geometry_index]; + + var indices = intersection.primitive_index * 3u + vec3(0u, 1u, 2u); + if (entry.index_buf != ~0u) { + let iptr = &index_buffers[entry.index_buf].data; + indices = vec3((*iptr)[indices.x], (*iptr)[indices.y], (*iptr)[indices.z]); + } + + let vptr = &vertex_buffers[entry.vertex_buf].data; + let vertices = array( + (*vptr)[indices.x], + (*vptr)[indices.y], + (*vptr)[indices.z], + ); + + let positions_object = entry.geometry_to_object * mat3x4( + vec4(vertices[0].pos, 1.0), vec4(vertices[1].pos, 1.0), vec4(vertices[2].pos, 1.0) + ); + let positions = intersection.object_to_world * mat3x4( + vec4(positions_object[0], 1.0), vec4(positions_object[1], 1.0), vec4(positions_object[2], 1.0) + ); + let flat_normal = entry.winding * normalize(cross(positions[1].xyz - positions[0].xyz, positions[2].xyz - positions[0].xyz)); + + let barycentrics = vec3(1.0 - intersection.barycentrics.x - intersection.barycentrics.y, intersection.barycentrics); + let position_object = vec4(positions_object * barycentrics, 1.0); + let tex_coords = mat3x2(vertices[0].tex_coords, vertices[1].tex_coords, vertices[2].tex_coords) * barycentrics; + let normal_geo = normalize(mat3x3(decode_normal(vertices[0].normal), decode_normal(vertices[1].normal), decode_normal(vertices[2].normal)) * barycentrics); + let tangent_geo = normalize(mat3x3(decode_normal(vertices[0].tangent), decode_normal(vertices[1].tangent), decode_normal(vertices[2].tangent)) * barycentrics); + let bitangent_geo = normalize(cross(normal_geo, tangent_geo)) * vertices[0].bitangent_sign; + + let lod = 0.0; //TODO: this is actually complicated + + let geo_to_world_rot = normalize(unpack4x8snorm(entry.geometry_to_world_rotation)); + let tangent_space_geo = mat3x3(tangent_geo, bitangent_geo, normal_geo); + var normal_local: vec3; + if ((debug.texture_flags & DebugTextureFlags_NORMAL) != 0u) { + normal_local = vec3(0.0, 0.0, 1.0); // ignore normal map + } else { + let n_xy = textureSampleLevel(textures[entry.normal_texture], sampler_linear, tex_coords, lod).xy; + normal_local = vec3(n_xy, sqrt(max(0.0, 1.0 - dot(n_xy.xy, n_xy.xy)))); + } + let normal = qrot(geo_to_world_rot, tangent_space_geo * normal_local); + let basis = shortest_arc_quat(vec3(0.0, 0.0, 1.0), normalize(normal)); + + let hit_position = camera.position + intersection.t * ray_dir; + if (enable_debug && is_primary) { + debug_buf.entry.custom_index = intersection.instance_custom_index; + debug_buf.entry.depth = intersection.t; + debug_buf.entry.tex_coords = tex_coords; + debug_buf.entry.base_color_texture = entry.base_color_texture; + debug_buf.entry.normal_texture = entry.normal_texture; + debug_buf.entry.position = hit_position; + debug_buf.entry.flat_normal = flat_normal; + } + if (enable_debug && (debug.draw_flags & DebugDrawFlags_SPACE) != 0u) { + let normal_len = 0.15 * intersection.t; + let side = 0.05 * intersection.t; + debug_line(hit_position, hit_position + normal_len * qrot(geo_to_world_rot, normal_geo), 0xFFFFFFu); + debug_line(hit_position - side * tangent_geo, hit_position + side * tangent_geo, 0x808080u); + debug_line(hit_position - side * bitangent_geo, hit_position + side * bitangent_geo, 0x808080u); + } + if (enable_debug && (debug.draw_flags & DebugDrawFlags_GEOMETRY) != 0u) { + let debug_len = intersection.t * 0.2; + debug_line(positions[0].xyz, positions[1].xyz, 0x00FFFFu); + debug_line(positions[1].xyz, positions[2].xyz, 0x00FFFFu); + debug_line(positions[2].xyz, positions[0].xyz, 0x00FFFFu); + let poly_center = (positions[0].xyz + positions[1].xyz + positions[2].xyz) / 3.0; + debug_line(poly_center, poly_center + 0.2 * debug_len * flat_normal, 0xFF00FFu); + // note: dynamic indexing into positions isn't allowed by WGSL yet + debug_raw_normal(positions[0].xyz, vertices[0].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + debug_raw_normal(positions[1].xyz, vertices[1].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + debug_raw_normal(positions[2].xyz, vertices[2].normal, geo_to_world_rot, 0.5*debug_len, 0xFFFF00u); + // draw tangent space + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(1.0, 0.0, 0.0)), 0x0000FFu); + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 1.0, 0.0)), 0x00FF00u); + debug_line(hit_position, hit_position + debug_len * qrot(basis, vec3(0.0, 0.0, 1.0)), 0xFF0000u); + } + + rs.albedo = unpack4x8unorm(entry.base_color_factor).xyz; + if ((debug.texture_flags & DebugTextureFlags_ALBEDO) == 0u) { + let base_color_sample = textureSampleLevel(textures[entry.base_color_texture], sampler_linear, tex_coords, lod); + rs.albedo *= base_color_sample.xyz; + } + + if (is_primary) { + if (debug.view_mode == DebugMode_Depth) { + textureStore(out_debug, pixel_coord, vec4(intersection.t / camera.depth)); + } + if (debug.view_mode == DebugMode_Normal) { + textureStore(out_debug, pixel_coord, vec4(normal, 0.0)); + } + if (debug.view_mode == DebugMode_HitConsistency) { + let reprojected = get_projected_pixel(camera, hit_position); + let barycentrics_pos_diff = (intersection.object_to_world * position_object).xyz - hit_position; + let camera_projection_diff = vec2(pixel_coord - reprojected); + let consistency = vec4(length(barycentrics_pos_diff), length(camera_projection_diff), 0.0, 0.0); + textureStore(out_debug, pixel_coord, consistency); + } + } + + let prev_position = (entry.prev_object_to_world * position_object).xyz; + let prev_screen = get_projected_pixel_float(prev_camera, prev_position); + //TODO: consider just storing integers here? + //TODO: technically this "0.5" is just a waste compute on both packing and unpacking + rs.motion = prev_screen - vec2(pixel_coord) - 0.5; + rs.position = hit_position; + + // Write down the Surface + rs.inner.basis = basis; + rs.inner.flat_normal = flat_normal; + rs.inner.depth = intersection.t; + return rs; +} diff --git a/blade-render/code/motion.inc.wgsl b/blade-render/code/motion.inc.wgsl new file mode 100644 index 00000000..a9a9f48a --- /dev/null +++ b/blade-render/code/motion.inc.wgsl @@ -0,0 +1,2 @@ +const MOTION_SCALE: f32 = 0.02; +const USE_MOTION_VECTORS: bool = true; diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index b9d40617..db350f3f 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -6,10 +6,8 @@ #include "debug-param.inc.wgsl" #include "camera.inc.wgsl" #include "surface.inc.wgsl" -#include "gbuf.inc.wgsl" - -//TODO: https://github.com/gfx-rs/wgpu/pull/5429 -const RAY_FLAG_CULL_NO_OPAQUE: u32 = 0x80u; +#include "geometry.inc.wgsl" +#include "motion.inc.wgsl" const PI: f32 = 3.1415926; const MAX_RESAMPLE: u32 = 4u; @@ -23,7 +21,6 @@ const DECOUPLED_SHADING: bool = false; //TODO: crashes on AMD 6850U if `GROUP_SIZE_TOTAL` > 32 const GROUP_SIZE: vec2 = vec2(8, 4); const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; -const GROUP_VISUALIZE: bool = false; struct MainParams { frame_index: u32, @@ -106,6 +103,7 @@ fn make_reservoir(ls: LightSample, light_index: u32, brdf: vec3) -> LiveRes r.history = 1.0; return r; } + fn merge_reservoir(r: ptr, other: LiveReservoir, random: f32) -> bool { (*r).weight_sum += other.weight_sum; (*r).history += other.history; @@ -144,14 +142,9 @@ fn pack_reservoir(r: LiveReservoir) -> StoredReservoir { return pack_reservoir_detail(r, r.history); } -var t_depth: texture_2d; var t_prev_depth: texture_2d; -var t_basis: texture_2d; -var t_prev_basis: texture_2d -; -var t_flat_normal: texture_2d; +var t_prev_basis: texture_2d; var t_prev_flat_normal: texture_2d; -var t_motion: texture_2d; var out_diffuse: texture_storage_2d; var out_debug: texture_storage_2d; @@ -206,14 +199,6 @@ fn sample_light_from_environment(rng: ptr) -> LightSample return ls; } -fn read_surface(pixel: vec2) -> Surface { - var surface: Surface; - surface.basis = normalize(textureLoad(t_basis, pixel, 0)); - surface.flat_normal = normalize(textureLoad(t_flat_normal, pixel, 0).xyz); - surface.depth = textureLoad(t_depth, pixel, 0).x; - return surface; -} - fn read_prev_surface(pixel: vec2) -> Surface { var surface: Surface; surface.basis = normalize(textureLoad(t_prev_basis, pixel, 0)); @@ -268,9 +253,8 @@ fn evaluate_reflected_light(surface: Surface, light_index: u32, light_uv: vec2, pos_world: vec3) -> vec2 { +fn get_prev_pixel(pixel: vec2, pos_world: vec3, motion: vec2) -> vec2 { if (USE_MOTION_VECTORS && parameters.use_motion_vectors != 0u) { - let motion = textureLoad(t_motion, pixel, 0).xy / MOTION_SCALE; return vec2(pixel) + 0.5 + motion; } else { return get_projected_pixel_float(prev_camera, pos_world); @@ -473,7 +457,7 @@ fn finalize_resampling( } fn resample_temporal( - surface: Surface, cur_pixel: vec2, position: vec3, + surface: Surface, motion: vec2, cur_pixel: vec2, position: vec3, rng: ptr, debug_len: f32, ) -> ResampleOutput { if (debug.view_mode == DebugMode_TemporalMatch || debug.view_mode == DebugMode_TemporalMisCanonical || debug.view_mode == DebugMode_TemporalMisError) { @@ -485,7 +469,7 @@ fn resample_temporal( let canonical = produce_canonical(surface, position, rng, debug_len); //TODO: find best match in a 2x2 grid - let prev_pixel = vec2(get_prev_pixel(cur_pixel, position)); + let prev_pixel = vec2(get_prev_pixel(cur_pixel, position, motion)); let prev_reservoir_index = get_reservoir_index(prev_pixel, prev_camera); if (parameters.temporal_tap == 0u || prev_reservoir_index < 0) { @@ -585,64 +569,71 @@ fn resample_spatial( } fn compute_restir( + rs: RichSurface, pixel: vec2, local_index: u32, group_id: vec3, rng: ptr, enable_debug: bool, ) -> vec3 { - let surface = read_surface(pixel); - if (debug.view_mode == DebugMode_Depth) { - textureStore(out_debug, pixel, vec4(surface.depth / camera.depth)); - } - if (debug.view_mode == DebugMode_Normal) { - let normal = qrot(surface.basis, vec3(0.0, 0.0, 1.0)); - textureStore(out_debug, pixel, vec4(normal, 0.0)); - } - let debug_len = select(0.0, surface.depth * 0.2, enable_debug); - let ray_dir = get_ray_direction(camera, pixel); - let pixel_index = get_reservoir_index(pixel, camera); - let position = camera.position + surface.depth * ray_dir; + let debug_len = select(0.0, rs.inner.depth * 0.2, enable_debug); - let temporal = resample_temporal(surface, pixel, position, rng, debug_len); - pixel_cache[local_index] = PixelCache(surface, temporal.reservoir, position); + let temporal = resample_temporal(rs.inner, rs.motion, pixel, rs.position, rng, debug_len); + pixel_cache[local_index] = PixelCache(rs.inner, temporal.reservoir, rs.position); // sync with the workgroup to ensure all reservoirs are available. workgroupBarrier(); let temporal_live = revive_canonical(temporal); - let spatial = resample_spatial(surface, pixel, position, group_id, temporal_live, rng, debug_len); + let spatial = resample_spatial(rs.inner, pixel, rs.position, group_id, temporal_live, rng, debug_len); + + let pixel_index = get_reservoir_index(pixel, camera); reservoirs[pixel_index] = spatial.reservoir; return spatial.color; } +var out_depth: texture_storage_2d; +var out_basis: texture_storage_2d; +var out_flat_normal: texture_storage_2d; +var out_albedo: texture_storage_2d; +var out_motion: texture_storage_2d; + @compute @workgroup_size(GROUP_SIZE.x, GROUP_SIZE.y) fn main( @builtin(workgroup_id) group_id: vec3, @builtin(local_invocation_index) local_index: u32, ) { - pixel_cache[local_index].reservoir.confidence = 0.0; + pixel_cache[local_index] = PixelCache(); let pixel_coord = thread_index_to_coord(local_index, group_id); if (any(vec2(pixel_coord) >= camera.target_size)) { return; } - if (GROUP_VISUALIZE) - { + + if (debug.view_mode == DebugMode_Grouping) { var rng = random_init(group_id.y * 1000u + group_id.x, 0u); let h = random_gen(&rng) * 360.0; let color = hsv_to_rgb(h, 0.5, 1.0); - textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); + textureStore(out_debug, pixel_coord, vec4(color, 1.0)); return; } + let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); + let rs = fetch_geometry(pixel_coord, true, enable_debug); + + // TODO: option to avoid writing data for the sky + textureStore(out_depth, pixel_coord, vec4(rs.inner.depth, 0.0, 0.0, 0.0)); + textureStore(out_basis, pixel_coord, rs.inner.basis); + textureStore(out_flat_normal, pixel_coord, vec4(rs.inner.flat_normal, 0.0)); + textureStore(out_albedo, pixel_coord, vec4(rs.albedo, 0.0)); + textureStore(out_motion, pixel_coord, vec4(rs.motion * MOTION_SCALE, 0.0, 0.0)); + let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); var rng = random_init(global_index, parameters.frame_index); - let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let color = compute_restir(pixel_coord, local_index, group_id, &rng, enable_restir_debug); + let color = compute_restir(rs, pixel_coord, local_index, group_id, &rng, enable_restir_debug); + textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); if (enable_debug) { debug_buf.variance.color_sum += color; debug_buf.variance.color2_sum += color * color; debug_buf.variance.count += 1u; } - textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); } diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 74e3dd9c..e0e724c5 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -51,12 +51,13 @@ pub enum DebugMode { Normal = 2, Motion = 3, HitConsistency = 4, - TemporalMatch = 5, - TemporalMisCanonical = 6, - TemporalMisError = 7, - SpatialMatch = 8, - SpatialMisCanonical = 9, - SpatialMisError = 10, + Grouping = 5, + TemporalMatch = 10, + TemporalMisCanonical = 11, + TemporalMisError = 12, + SpatialMatch = 13, + SpatialMisCanonical = 14, + SpatialMisError = 15, Variance = 100, } @@ -313,7 +314,6 @@ pub struct Renderer { shaders: Shaders, targets: RestirTargets, post_proc_input_index: usize, - fill_pipeline: blade_graphics::ComputePipeline, main_pipeline: blade_graphics::ComputePipeline, post_proc_pipeline: blade_graphics::RenderPipeline, blur: Blur, @@ -375,46 +375,31 @@ struct MainParams { } #[derive(blade_macros::ShaderData)] -struct FillData<'a> { +struct MainData<'a> { camera: CameraParams, prev_camera: CameraParams, debug: DebugParams, + parameters: MainParams, acc_struct: blade_graphics::AccelerationStructure, + prev_acc_struct: blade_graphics::AccelerationStructure, hit_entries: blade_graphics::BufferPiece, index_buffers: &'a blade_graphics::BufferArray, vertex_buffers: &'a blade_graphics::BufferArray, textures: &'a blade_graphics::TextureArray, sampler_linear: blade_graphics::Sampler, - debug_buf: blade_graphics::BufferPiece, - out_depth: blade_graphics::TextureView, - out_basis: blade_graphics::TextureView, - out_flat_normal: blade_graphics::TextureView, - out_albedo: blade_graphics::TextureView, - out_motion: blade_graphics::TextureView, - out_debug: blade_graphics::TextureView, -} - -#[derive(blade_macros::ShaderData)] -struct MainData { - camera: CameraParams, - prev_camera: CameraParams, - debug: DebugParams, - parameters: MainParams, - acc_struct: blade_graphics::AccelerationStructure, - prev_acc_struct: blade_graphics::AccelerationStructure, - sampler_linear: blade_graphics::Sampler, sampler_nearest: blade_graphics::Sampler, env_map: blade_graphics::TextureView, env_weights: blade_graphics::TextureView, - t_depth: blade_graphics::TextureView, t_prev_depth: blade_graphics::TextureView, - t_basis: blade_graphics::TextureView, t_prev_basis: blade_graphics::TextureView, - t_flat_normal: blade_graphics::TextureView, t_prev_flat_normal: blade_graphics::TextureView, - t_motion: blade_graphics::TextureView, debug_buf: blade_graphics::BufferPiece, reservoirs: blade_graphics::BufferPiece, + out_depth: blade_graphics::TextureView, + out_basis: blade_graphics::TextureView, + out_flat_normal: blade_graphics::TextureView, + out_albedo: blade_graphics::TextureView, + out_motion: blade_graphics::TextureView, out_diffuse: blade_graphics::TextureView, out_debug: blade_graphics::TextureView, } @@ -492,7 +477,6 @@ struct HitEntry { #[derive(Clone, PartialEq)] pub struct Shaders { env_prepare: blade_asset::Handle, - fill_gbuf: blade_asset::Handle, ray_trace: blade_asset::Handle, blur: blade_asset::Handle, post_proc: blade_asset::Handle, @@ -505,7 +489,6 @@ impl Shaders { let mut ctx = asset_hub.open_context(path, "shader finish"); let shaders = Self { env_prepare: ctx.load_shader("env-prepare.wgsl"), - fill_gbuf: ctx.load_shader("fill-gbuf.wgsl"), ray_trace: ctx.load_shader("ray-trace.wgsl"), blur: ctx.load_shader("blur.wgsl"), post_proc: ctx.load_shader("post-proc.wgsl"), @@ -517,7 +500,6 @@ impl Shaders { } struct ShaderPipelines { - fill: blade_graphics::ComputePipeline, main: blade_graphics::ComputePipeline, temporal_accum: blade_graphics::ComputePipeline, atrous: blade_graphics::ComputePipeline, @@ -527,19 +509,6 @@ struct ShaderPipelines { } impl ShaderPipelines { - fn create_gbuf_fill( - shader: &blade_graphics::Shader, - gpu: &blade_graphics::Context, - ) -> blade_graphics::ComputePipeline { - shader.check_struct_size::(); - shader.check_struct_size::(); - let layout = ::layout(); - gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { - name: "fill-gbuf", - data_layouts: &[&layout], - compute: shader.at("main"), - }) - } fn create_ray_trace( shader: &blade_graphics::Shader, gpu: &blade_graphics::Context, @@ -617,7 +586,6 @@ impl ShaderPipelines { let sh_main = shader_man[shaders.ray_trace].raw.as_ref().unwrap(); let sh_blur = shader_man[shaders.blur].raw.as_ref().unwrap(); Ok(Self { - fill: Self::create_gbuf_fill(shader_man[shaders.fill_gbuf].raw.as_ref().unwrap(), gpu), main: Self::create_ray_trace(sh_main, gpu), temporal_accum: Self::create_temporal_accum(sh_blur, gpu), atrous: Self::create_atrous(sh_blur, gpu), @@ -708,7 +676,6 @@ impl Renderer { shaders, targets, post_proc_input_index: 0, - fill_pipeline: sp.fill, main_pipeline: sp.main, post_proc_pipeline: sp.post_proc, blur: Blur { @@ -755,7 +722,6 @@ impl Renderer { // pipelines gpu.destroy_compute_pipeline(&mut self.blur.temporal_accum_pipeline); gpu.destroy_compute_pipeline(&mut self.blur.atrous_pipeline); - gpu.destroy_compute_pipeline(&mut self.fill_pipeline); gpu.destroy_compute_pipeline(&mut self.main_pipeline); gpu.destroy_render_pipeline(&mut self.post_proc_pipeline); } @@ -770,7 +736,6 @@ impl Renderer { let mut tasks = Vec::new(); let old = self.shaders.clone(); - tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.fill_gbuf)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.ray_trace)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.blur)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.post_proc)); @@ -787,11 +752,6 @@ impl Renderer { let _ = task.join(); } - if self.shaders.fill_gbuf != old.fill_gbuf { - if let Ok(ref shader) = asset_hub.shaders[self.shaders.fill_gbuf].raw { - self.fill_pipeline = ShaderPipelines::create_gbuf_fill(shader, gpu); - } - } if self.shaders.ray_trace != old.ray_trace { if let Ok(ref shader) = asset_hub.shaders[self.shaders.ray_trace].raw { assert_eq!( @@ -1127,33 +1087,6 @@ impl Renderer { let debug = self.make_debug_params(&debug_config); let (cur, prev) = self.work_indices(); - if let mut pass = command_encoder.compute() { - let mut pc = pass.with(&self.fill_pipeline); - let groups = self.fill_pipeline.get_dispatch_for(self.surface_size); - pc.bind( - 0, - &FillData { - camera: self.targets.camera_params[cur], - prev_camera: self.targets.camera_params[prev], - debug, - acc_struct: self.acceleration_structure, - hit_entries: self.hit_buffer.into(), - index_buffers: &self.index_buffers, - vertex_buffers: &self.vertex_buffers, - textures: &self.textures, - sampler_linear: self.samplers.linear, - debug_buf: self.debug.buffer_resource(), - out_depth: self.targets.depth.views[cur], - out_basis: self.targets.basis.views[cur], - out_flat_normal: self.targets.flat_normal.views[cur], - out_albedo: self.targets.albedo.views[0], - out_motion: self.targets.motion.views[0], - out_debug: self.targets.debug.views[0], - }, - ); - pc.dispatch(groups); - } - if let mut pass = command_encoder.compute() { let grid_scale = { let limit = ray_config.group_mixer; @@ -1204,19 +1137,24 @@ impl Renderer { } else { self.prev_acceleration_structure }, + hit_entries: self.hit_buffer.into(), + index_buffers: &self.index_buffers, + vertex_buffers: &self.vertex_buffers, + textures: &self.textures, sampler_linear: self.samplers.linear, sampler_nearest: self.samplers.nearest, env_map: self.env_map.main_view, env_weights: self.env_map.weight_view, - t_depth: self.targets.depth.views[cur], t_prev_depth: self.targets.depth.views[prev], - t_basis: self.targets.basis.views[cur], t_prev_basis: self.targets.basis.views[prev], - t_flat_normal: self.targets.flat_normal.views[cur], t_prev_flat_normal: self.targets.flat_normal.views[prev], - t_motion: self.targets.motion.views[0], debug_buf: self.debug.buffer_resource(), reservoirs: self.targets.reservoir_buf.into(), + out_depth: self.targets.depth.views[cur], + out_basis: self.targets.basis.views[cur], + out_flat_normal: self.targets.flat_normal.views[cur], + out_albedo: self.targets.albedo.views[0], + out_motion: self.targets.motion.views[0], out_diffuse: self.targets.light_diffuse.views[cur], out_debug: self.targets.debug.views[0], }, From de0ebd50703f3bc1c5d818f7926d2b000a06ec36 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 3 Sep 2024 00:14:27 -0700 Subject: [PATCH 13/26] Merge temporal accumulation into the Main pass --- blade-helpers/src/hud.rs | 1 + blade-render/code/a-trous.wgsl | 83 ++++++++++++ blade-render/code/accum.inc.wgsl | 19 +++ blade-render/code/blur.wgsl | 162 ----------------------- blade-render/code/ray-trace.wgsl | 79 +++++++----- blade-render/src/render/mod.rs | 215 +++++++++++-------------------- examples/scene/main.rs | 17 +-- src/lib.rs | 17 +-- 8 files changed, 247 insertions(+), 346 deletions(-) create mode 100644 blade-render/code/a-trous.wgsl create mode 100644 blade-render/code/accum.inc.wgsl delete mode 100644 blade-render/code/blur.wgsl diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index ff4cf5a5..d8b9b279 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -37,6 +37,7 @@ impl ExposeHud for blade_render::RayConfig { impl ExposeHud for blade_render::DenoiserConfig { fn populate_hud(&mut self, ui: &mut egui::Ui) { + ui.checkbox(&mut self.enabled, "Enable denoiser"); ui.add(egui::Slider::new(&mut self.temporal_weight, 0.0..=1.0f32).text("Temporal weight")); ui.add(egui::Slider::new(&mut self.num_passes, 0..=5u32).text("A-trous passes")); } diff --git a/blade-render/code/a-trous.wgsl b/blade-render/code/a-trous.wgsl new file mode 100644 index 00000000..19277d20 --- /dev/null +++ b/blade-render/code/a-trous.wgsl @@ -0,0 +1,83 @@ +#include "camera.inc.wgsl" +#include "quaternion.inc.wgsl" +#include "surface.inc.wgsl" + +// Spatio-temporal variance-guided filtering +// https://research.nvidia.com/sites/default/files/pubs/2017-07_Spatiotemporal-Variance-Guided-Filtering%3A//svgf_preprint.pdf + +// Note: using "ilm" in place of "illumination and the 2nd moment of its luminance" + +struct Params { + extent: vec2, + iteration: u32, +} + +var camera: CameraParams; +var prev_camera: CameraParams; +var params: Params; +var t_depth: texture_2d; +var t_flat_normal: texture_2d; +var t_motion: texture_2d; +var input: texture_2d; +var output: texture_storage_2d; + +const LUMA: vec3 = vec3(0.2126, 0.7152, 0.0722); +const MIN_WEIGHT: f32 = 0.01; + +fn read_surface(pixel: vec2) -> Surface { + var surface = Surface(); + surface.flat_normal = normalize(textureLoad(t_flat_normal, pixel, 0).xyz); + surface.depth = textureLoad(t_depth, pixel, 0).x; + return surface; +} + +const GAUSSIAN_WEIGHTS = vec2(0.44198, 0.27901); +const SIGMA_L: f32 = 4.0; +const EPSILON: f32 = 0.001; + +fn compare_luminance(a_lum: f32, b_lum: f32, variance: f32) -> f32 { + return exp(-abs(a_lum - b_lum) / (SIGMA_L * variance + EPSILON)); +} + +fn w4(w: f32) -> vec4 { + return vec4(vec3(w), w * w); +} + +@compute @workgroup_size(8, 8) +fn atrous3x3(@builtin(global_invocation_id) global_id: vec3) { + let center = vec2(global_id.xy); + if (any(center >= params.extent)) { + return; + } + + let center_ilm = textureLoad(input, center, 0); + let center_luma = dot(center_ilm.xyz, LUMA); + let variance = sqrt(center_ilm.w); + let center_suf = read_surface(center); + var sum_weight = GAUSSIAN_WEIGHTS[0] * GAUSSIAN_WEIGHTS[0]; + var sum_ilm = w4(sum_weight) * center_ilm; + + for (var yy=-1; yy<=1; yy+=1) { + for (var xx=-1; xx<=1; xx+=1) { + let p = center + vec2(xx, yy) * (1i << params.iteration); + if (all(p == center) || any(p < vec2(0)) || any(p >= params.extent)) { + continue; + } + + //TODO: store in group-shared memory + let surface = read_surface(p); + var weight = GAUSSIAN_WEIGHTS[abs(xx)] * GAUSSIAN_WEIGHTS[abs(yy)]; + //TODO: make it stricter on higher iterations + weight *= compare_flat_normals(surface.flat_normal, center_suf.flat_normal); + //Note: should we use a projected depth instead of the surface one? + weight *= compare_depths(surface.depth, center_suf.depth); + let other_ilm = textureLoad(input, p, 0); + weight *= compare_luminance(center_luma, dot(other_ilm.xyz, LUMA), variance); + sum_ilm += w4(weight) * other_ilm; + sum_weight += weight; + } + } + + let filtered_ilm = select(center_ilm, sum_ilm / w4(sum_weight), sum_weight > MIN_WEIGHT); + textureStore(output, global_id.xy, filtered_ilm); +} diff --git a/blade-render/code/accum.inc.wgsl b/blade-render/code/accum.inc.wgsl new file mode 100644 index 00000000..cc599ee3 --- /dev/null +++ b/blade-render/code/accum.inc.wgsl @@ -0,0 +1,19 @@ +const LUMA: vec3 = vec3(0.2126, 0.7152, 0.0722); + +var inout_diffuse: texture_storage_2d; + +fn accumulate_temporal( + surface: Surface, position: vec3, pixel_coord: vec2, + cur_illumination: vec3, temporal_weight: f32, + prev_surface: Surface, prev_pixel: vec2, prev_valid: bool, +) { + let cur_luminocity = dot(cur_illumination, LUMA); + var ilm = vec4(cur_illumination, cur_luminocity * cur_luminocity); + if (prev_valid && temporal_weight < 1.0) { + let illumination = textureLoad(inout_diffuse, prev_pixel).xyz; + let luminocity = dot(illumination, LUMA); + let prev_ilm = vec4(illumination, luminocity * luminocity); + ilm = mix(prev_ilm, ilm, temporal_weight); + } + textureStore(inout_diffuse, pixel_coord, ilm); +} diff --git a/blade-render/code/blur.wgsl b/blade-render/code/blur.wgsl deleted file mode 100644 index 3aec665b..00000000 --- a/blade-render/code/blur.wgsl +++ /dev/null @@ -1,162 +0,0 @@ -#include "camera.inc.wgsl" -#include "motion.inc.wgsl" -#include "quaternion.inc.wgsl" -#include "surface.inc.wgsl" - -// Spatio-temporal variance-guided filtering -// https://research.nvidia.com/sites/default/files/pubs/2017-07_Spatiotemporal-Variance-Guided-Filtering%3A//svgf_preprint.pdf - -// Note: using "ilm" in place of "illumination and the 2nd moment of its luminance" - -struct Params { - extent: vec2, - temporal_weight: f32, - iteration: u32, - use_motion_vectors: u32, -} - -var camera: CameraParams; -var prev_camera: CameraParams; -var params: Params; -var t_depth: texture_2d; -var t_prev_depth: texture_2d; -var t_flat_normal: texture_2d; -var t_prev_flat_normal: texture_2d; -var t_motion: texture_2d; -var input: texture_2d; -var prev_input: texture_2d; -var output: texture_storage_2d; - -const LUMA: vec3 = vec3(0.2126, 0.7152, 0.0722); -const MIN_WEIGHT: f32 = 0.01; - -fn read_surface(pixel: vec2) -> Surface { - var surface = Surface(); - surface.flat_normal = normalize(textureLoad(t_flat_normal, pixel, 0).xyz); - surface.depth = textureLoad(t_depth, pixel, 0).x; - return surface; -} -fn read_prev_surface(pixel: vec2) -> Surface { - var surface = Surface(); - surface.flat_normal = normalize(textureLoad(t_prev_flat_normal, pixel, 0).xyz); - surface.depth = textureLoad(t_prev_depth, pixel, 0).x; - return surface; -} - -fn get_prev_pixel(pixel: vec2, pos_world: vec3) -> vec2 { - if (USE_MOTION_VECTORS && params.use_motion_vectors != 0u) { - let motion = textureLoad(t_motion, pixel, 0).xy / MOTION_SCALE; - return vec2(pixel) + 0.5 + motion; - } else { - return get_projected_pixel_float(prev_camera, pos_world); - } -} - -@compute @workgroup_size(8, 8) -fn temporal_accum(@builtin(global_invocation_id) global_id: vec3) { - let pixel = vec2(global_id.xy); - if (any(pixel >= params.extent)) { - return; - } - - let surface = read_surface(pixel); - let pos_world = camera.position + surface.depth * get_ray_direction(camera, pixel); - // considering all samples in 2x2 quad, to help with edges - var center_pixel = get_prev_pixel(pixel, pos_world); - var prev_pixels = array, 4>( - vec2(vec2(center_pixel.x - 0.5, center_pixel.y - 0.5)), - vec2(vec2(center_pixel.x + 0.5, center_pixel.y - 0.5)), - vec2(vec2(center_pixel.x + 0.5, center_pixel.y + 0.5)), - vec2(vec2(center_pixel.x - 0.5, center_pixel.y + 0.5)), - ); - //Note: careful about the pixel center when there is a perfect match - let w_bot_right = fract(center_pixel + vec2(0.5)); - var prev_weights = vec4( - (1.0 - w_bot_right.x) * (1.0 - w_bot_right.y), - w_bot_right.x * (1.0 - w_bot_right.y), - w_bot_right.x * w_bot_right.y, - (1.0 - w_bot_right.x) * w_bot_right.y, - ); - - var sum_weight = 0.0; - var sum_ilm = vec4(0.0); - //TODO: optimize depth load with a gather operation - for (var i = 0; i < 4; i += 1) { - let prev_pixel = prev_pixels[i]; - if (all(prev_pixel >= vec2(0)) && all(prev_pixel < params.extent)) { - let prev_surface = read_prev_surface(prev_pixel); - if (compare_flat_normals(surface.flat_normal, prev_surface.flat_normal) < 0.5) { - continue; - } - let projected_distance = length(pos_world - prev_camera.position); - if (compare_depths(prev_surface.depth, projected_distance) < 0.5) { - continue; - } - let w = prev_weights[i]; - sum_weight += w; - let illumination = w * textureLoad(prev_input, prev_pixel, 0).xyz; - let luminocity = dot(illumination, LUMA); - sum_ilm += vec4(illumination, luminocity * luminocity); - } - } - - let cur_illumination = textureLoad(input, pixel, 0).xyz; - let cur_luminocity = dot(cur_illumination, LUMA); - var mixed_ilm = vec4(cur_illumination, cur_luminocity * cur_luminocity); - if (sum_weight > MIN_WEIGHT) { - let prev_ilm = sum_ilm / vec4(vec3(sum_weight), max(0.001, sum_weight*sum_weight)); - mixed_ilm = mix(mixed_ilm, prev_ilm, sum_weight * (1.0 - params.temporal_weight)); - } - textureStore(output, global_id.xy, mixed_ilm); -} - -const GAUSSIAN_WEIGHTS = vec2(0.44198, 0.27901); -const SIGMA_L: f32 = 4.0; -const EPSILON: f32 = 0.001; - -fn compare_luminance(a_lum: f32, b_lum: f32, variance: f32) -> f32 { - return exp(-abs(a_lum - b_lum) / (SIGMA_L * variance + EPSILON)); -} - -fn w4(w: f32) -> vec4 { - return vec4(vec3(w), w * w); -} - -@compute @workgroup_size(8, 8) -fn atrous3x3(@builtin(global_invocation_id) global_id: vec3) { - let center = vec2(global_id.xy); - if (any(center >= params.extent)) { - return; - } - - let center_ilm = textureLoad(input, center, 0); - let center_luma = dot(center_ilm.xyz, LUMA); - let variance = sqrt(center_ilm.w); - let center_suf = read_surface(center); - var sum_weight = GAUSSIAN_WEIGHTS[0] * GAUSSIAN_WEIGHTS[0]; - var sum_ilm = w4(sum_weight) * center_ilm; - - for (var yy=-1; yy<=1; yy+=1) { - for (var xx=-1; xx<=1; xx+=1) { - let p = center + vec2(xx, yy) * (1i << params.iteration); - if (all(p == center) || any(p < vec2(0)) || any(p >= params.extent)) { - continue; - } - - //TODO: store in group-shared memory - let surface = read_surface(p); - var weight = GAUSSIAN_WEIGHTS[abs(xx)] * GAUSSIAN_WEIGHTS[abs(yy)]; - //TODO: make it stricter on higher iterations - weight *= compare_flat_normals(surface.flat_normal, center_suf.flat_normal); - //Note: should we use a projected depth instead of the surface one? - weight *= compare_depths(surface.depth, center_suf.depth); - let other_ilm = textureLoad(input, p, 0); - weight *= compare_luminance(center_luma, dot(other_ilm.xyz, LUMA), variance); - sum_ilm += w4(weight) * other_ilm; - sum_weight += weight; - } - } - - let filtered_ilm = select(center_ilm, sum_ilm / w4(sum_weight), sum_weight > MIN_WEIGHT); - textureStore(output, global_id.xy, filtered_ilm); -} diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index db350f3f..8ba0facd 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -8,6 +8,7 @@ #include "surface.inc.wgsl" #include "geometry.inc.wgsl" #include "motion.inc.wgsl" +#include "accum.inc.wgsl" const PI: f32 = 3.1415926; const MAX_RESAMPLE: u32 = 4u; @@ -34,6 +35,7 @@ struct MainParams { t_start: f32, use_motion_vectors: u32, grid_scale: vec2, + temporal_accumulation_weight: f32, } var camera: CameraParams; @@ -62,6 +64,12 @@ struct PixelCache { world_pos: vec3, } var pixel_cache: array; +struct ReprojectionCache { + surface: Surface, + pixel_coord: vec2, + is_valid: bool, +} +var reprojection_cache: array; struct LightSample { radiance: vec3, @@ -142,10 +150,11 @@ fn pack_reservoir(r: LiveReservoir) -> StoredReservoir { return pack_reservoir_detail(r, r.history); } -var t_prev_depth: texture_2d; -var t_prev_basis: texture_2d; -var t_prev_flat_normal: texture_2d; -var out_diffuse: texture_storage_2d; +var inout_depth: texture_storage_2d; +var inout_basis: texture_storage_2d; +var inout_flat_normal: texture_storage_2d; +var out_albedo: texture_storage_2d; +var out_motion: texture_storage_2d; var out_debug: texture_storage_2d; fn sample_circle(random: f32) -> vec2 { @@ -201,9 +210,9 @@ fn sample_light_from_environment(rng: ptr) -> LightSample fn read_prev_surface(pixel: vec2) -> Surface { var surface: Surface; - surface.basis = normalize(textureLoad(t_prev_basis, pixel, 0)); - surface.flat_normal = normalize(textureLoad(t_prev_flat_normal, pixel, 0).xyz); - surface.depth = textureLoad(t_prev_depth, pixel, 0).x; + surface.basis = normalize(textureLoad(inout_basis, pixel)); + surface.flat_normal = normalize(textureLoad(inout_flat_normal, pixel).xyz); + surface.depth = textureLoad(inout_depth, pixel).x; return surface; } @@ -458,8 +467,9 @@ fn finalize_resampling( fn resample_temporal( surface: Surface, motion: vec2, cur_pixel: vec2, position: vec3, - rng: ptr, debug_len: f32, + local_index: u32, rng: ptr, debug_len: f32, ) -> ResampleOutput { + reprojection_cache[local_index].is_valid = false; if (debug.view_mode == DebugMode_TemporalMatch || debug.view_mode == DebugMode_TemporalMisCanonical || debug.view_mode == DebugMode_TemporalMisError) { textureStore(out_debug, cur_pixel, vec4(0.0)); } @@ -469,27 +479,32 @@ fn resample_temporal( let canonical = produce_canonical(surface, position, rng, debug_len); //TODO: find best match in a 2x2 grid - let prev_pixel = vec2(get_prev_pixel(cur_pixel, position, motion)); + var prev = ReprojectionCache(); + prev.pixel_coord = vec2(get_prev_pixel(cur_pixel, position, motion)); - let prev_reservoir_index = get_reservoir_index(prev_pixel, prev_camera); + let prev_reservoir_index = get_reservoir_index(prev.pixel_coord, prev_camera); if (parameters.temporal_tap == 0u || prev_reservoir_index < 0) { return finalize_canonical(canonical); } let prev_reservoir = reservoirs[prev_reservoir_index]; - let prev_surface = read_prev_surface(prev_pixel); + prev.surface = read_prev_surface(prev.pixel_coord); + prev.is_valid = compare_surfaces(surface, prev.surface) > 0.1; // if the surfaces are too different, there is no trust in this sample - if (prev_reservoir.confidence == 0.0 || compare_surfaces(surface, prev_surface) < 0.1) { + if (prev_reservoir.confidence == 0.0 || !prev.is_valid) { return finalize_canonical(canonical); } + // Write down the reprojection cache, no need to carry this around + reprojection_cache[local_index] = prev; + var reservoir = LiveReservoir(); var color_and_weight = vec4(0.0); let base = ResampleBase(surface, canonical, position, 1.0); - let prev_dir = get_ray_direction(prev_camera, prev_pixel); - let prev_world_pos = prev_camera.position + prev_surface.depth * prev_dir; - let other = PixelCache(prev_surface, prev_reservoir, prev_world_pos); + let prev_dir = get_ray_direction(prev_camera, prev.pixel_coord); + let prev_world_pos = prev_camera.position + prev.surface.depth * prev_dir; + let other = PixelCache(prev.surface, prev_reservoir, prev_world_pos); let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, debug_len); let mis_canonical = 1.0 + rr.mis_canonical; @@ -503,6 +518,7 @@ fn resample_temporal( let total = mis_canonical + rr.mis_sample; textureStore(out_debug, cur_pixel, vec4(abs(total - 1.0 - base.accepted_count))); } + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); } @@ -575,7 +591,7 @@ fn compute_restir( ) -> vec3 { let debug_len = select(0.0, rs.inner.depth * 0.2, enable_debug); - let temporal = resample_temporal(rs.inner, rs.motion, pixel, rs.position, rng, debug_len); + let temporal = resample_temporal(rs.inner, rs.motion, pixel, rs.position, local_index, rng, debug_len); pixel_cache[local_index] = PixelCache(rs.inner, temporal.reservoir, rs.position); // sync with the workgroup to ensure all reservoirs are available. @@ -586,15 +602,18 @@ fn compute_restir( let pixel_index = get_reservoir_index(pixel, camera); reservoirs[pixel_index] = spatial.reservoir; + + //Note: restoring it from the LDS allows to lower the register pressure during spatial re-use + let rc = reprojection_cache[local_index]; + accumulate_temporal( + rs.inner, rs.position, pixel, + spatial.color, parameters.temporal_accumulation_weight, + rc.surface, rc.pixel_coord, rc.is_valid, + ); + return spatial.color; } -var out_depth: texture_storage_2d; -var out_basis: texture_storage_2d; -var out_flat_normal: texture_storage_2d; -var out_albedo: texture_storage_2d; -var out_motion: texture_storage_2d; - @compute @workgroup_size(GROUP_SIZE.x, GROUP_SIZE.y) fn main( @builtin(workgroup_id) group_id: vec3, @@ -617,19 +636,19 @@ fn main( let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); let rs = fetch_geometry(pixel_coord, true, enable_debug); - // TODO: option to avoid writing data for the sky - textureStore(out_depth, pixel_coord, vec4(rs.inner.depth, 0.0, 0.0, 0.0)); - textureStore(out_basis, pixel_coord, rs.inner.basis); - textureStore(out_flat_normal, pixel_coord, vec4(rs.inner.flat_normal, 0.0)); - textureStore(out_albedo, pixel_coord, vec4(rs.albedo, 0.0)); - textureStore(out_motion, pixel_coord, vec4(rs.motion * MOTION_SCALE, 0.0, 0.0)); - let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); var rng = random_init(global_index, parameters.frame_index); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; let color = compute_restir(rs, pixel_coord, local_index, group_id, &rng, enable_restir_debug); - textureStore(out_diffuse, pixel_coord, vec4(color, 1.0)); + + //Note: important to do this after the temporal pass specifically + // TODO: option to avoid writing data for the sky + textureStore(inout_depth, pixel_coord, vec4(rs.inner.depth, 0.0, 0.0, 0.0)); + textureStore(inout_basis, pixel_coord, rs.inner.basis); + textureStore(inout_flat_normal, pixel_coord, vec4(rs.inner.flat_normal, 0.0)); + textureStore(out_albedo, pixel_coord, vec4(rs.albedo, 0.0)); + textureStore(out_motion, pixel_coord, vec4(rs.motion * MOTION_SCALE, 0.0, 0.0)); if (enable_debug) { debug_buf.variance.color_sum += color; diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index e0e724c5..51d3f635 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -110,6 +110,7 @@ pub struct RayConfig { #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] pub struct DenoiserConfig { + pub enabled: bool, pub num_passes: u32, pub temporal_weight: f32, } @@ -212,11 +213,13 @@ impl RenderTarget { struct RestirTargets { reservoir_buf: blade_graphics::Buffer, debug: RenderTarget<1>, - depth: RenderTarget<2>, - basis: RenderTarget<2>, - flat_normal: RenderTarget<2>, + depth: RenderTarget<1>, + basis: RenderTarget<1>, + flat_normal: RenderTarget<1>, albedo: RenderTarget<1>, motion: RenderTarget<1>, + // One stores the ReSTIR output color, + // another 2 are used for a-trous ping-pong. light_diffuse: RenderTarget<3>, camera_params: [CameraParams; 2], } @@ -238,7 +241,7 @@ impl RestirTargets { Self { reservoir_buf, debug: RenderTarget::new( - "deubg", + "debug", blade_graphics::TextureFormat::Rgba8Unorm, size, encoder, @@ -297,7 +300,6 @@ impl RestirTargets { } struct Blur { - temporal_accum_pipeline: blade_graphics::ComputePipeline, atrous_pipeline: blade_graphics::ComputePipeline, } @@ -313,7 +315,6 @@ struct Blur { pub struct Renderer { shaders: Shaders, targets: RestirTargets, - post_proc_input_index: usize, main_pipeline: blade_graphics::ComputePipeline, post_proc_pipeline: blade_graphics::RenderPipeline, blur: Blur, @@ -372,6 +373,8 @@ struct MainParams { t_start: f32, use_motion_vectors: u32, grid_scale: [u32; 2], + temporal_accumulation_weight: f32, + pad: f32, } #[derive(blade_macros::ShaderData)] @@ -390,17 +393,14 @@ struct MainData<'a> { sampler_nearest: blade_graphics::Sampler, env_map: blade_graphics::TextureView, env_weights: blade_graphics::TextureView, - t_prev_depth: blade_graphics::TextureView, - t_prev_basis: blade_graphics::TextureView, - t_prev_flat_normal: blade_graphics::TextureView, debug_buf: blade_graphics::BufferPiece, reservoirs: blade_graphics::BufferPiece, - out_depth: blade_graphics::TextureView, - out_basis: blade_graphics::TextureView, - out_flat_normal: blade_graphics::TextureView, + inout_depth: blade_graphics::TextureView, + inout_basis: blade_graphics::TextureView, + inout_flat_normal: blade_graphics::TextureView, out_albedo: blade_graphics::TextureView, out_motion: blade_graphics::TextureView, - out_diffuse: blade_graphics::TextureView, + inout_diffuse: blade_graphics::TextureView, out_debug: blade_graphics::TextureView, } @@ -408,27 +408,10 @@ struct MainData<'a> { #[derive(Clone, Copy, bytemuck::Zeroable, bytemuck::Pod)] struct BlurParams { extent: [u32; 2], - temporal_weight: f32, iteration: i32, - use_motion_vectors: u32, pad: u32, } -#[derive(blade_macros::ShaderData)] -struct TemporalAccumData { - camera: CameraParams, - prev_camera: CameraParams, - params: BlurParams, - input: blade_graphics::TextureView, - prev_input: blade_graphics::TextureView, - t_depth: blade_graphics::TextureView, - t_prev_depth: blade_graphics::TextureView, - t_flat_normal: blade_graphics::TextureView, - t_prev_flat_normal: blade_graphics::TextureView, - t_motion: blade_graphics::TextureView, - output: blade_graphics::TextureView, -} - #[derive(blade_macros::ShaderData)] struct AtrousData { params: BlurParams, @@ -478,7 +461,7 @@ struct HitEntry { pub struct Shaders { env_prepare: blade_asset::Handle, ray_trace: blade_asset::Handle, - blur: blade_asset::Handle, + a_trous: blade_asset::Handle, post_proc: blade_asset::Handle, debug_draw: blade_asset::Handle, debug_blit: blade_asset::Handle, @@ -490,7 +473,7 @@ impl Shaders { let shaders = Self { env_prepare: ctx.load_shader("env-prepare.wgsl"), ray_trace: ctx.load_shader("ray-trace.wgsl"), - blur: ctx.load_shader("blur.wgsl"), + a_trous: ctx.load_shader("a-trous.wgsl"), post_proc: ctx.load_shader("post-proc.wgsl"), debug_draw: ctx.load_shader("debug-draw.wgsl"), debug_blit: ctx.load_shader("debug-blit.wgsl"), @@ -501,8 +484,7 @@ impl Shaders { struct ShaderPipelines { main: blade_graphics::ComputePipeline, - temporal_accum: blade_graphics::ComputePipeline, - atrous: blade_graphics::ComputePipeline, + a_trous: blade_graphics::ComputePipeline, post_proc: blade_graphics::RenderPipeline, env_prepare: blade_graphics::ComputePipeline, reservoir_size: u32, @@ -532,25 +514,13 @@ impl ShaderPipelines { pipeline } - fn create_temporal_accum( - shader: &blade_graphics::Shader, - gpu: &blade_graphics::Context, - ) -> blade_graphics::ComputePipeline { - let layout = ::layout(); - gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { - name: "temporal-accum", - data_layouts: &[&layout], - compute: shader.at("temporal_accum"), - }) - } - - fn create_atrous( + fn create_a_trous( shader: &blade_graphics::Shader, gpu: &blade_graphics::Context, ) -> blade_graphics::ComputePipeline { let layout = ::layout(); gpu.create_compute_pipeline(blade_graphics::ComputePipelineDesc { - name: "atrous", + name: "a-trous", data_layouts: &[&layout], compute: shader.at("atrous3x3"), }) @@ -584,11 +554,10 @@ impl ShaderPipelines { shader_man: &blade_asset::AssetManager, ) -> Result { let sh_main = shader_man[shaders.ray_trace].raw.as_ref().unwrap(); - let sh_blur = shader_man[shaders.blur].raw.as_ref().unwrap(); + let sh_atrous = shader_man[shaders.a_trous].raw.as_ref().unwrap(); Ok(Self { main: Self::create_ray_trace(sh_main, gpu), - temporal_accum: Self::create_temporal_accum(sh_blur, gpu), - atrous: Self::create_atrous(sh_blur, gpu), + a_trous: Self::create_a_trous(sh_atrous, gpu), post_proc: Self::create_post_proc( shader_man[shaders.post_proc].raw.as_ref().unwrap(), config.surface_info, @@ -618,6 +587,11 @@ pub struct FrameResources { pub acceleration_structures: Vec, } +#[derive(Debug, Default)] +pub struct FrameKey { + post_proc_input_index: usize, +} + impl Renderer { /// Create a new renderer with a given configuration. /// @@ -675,12 +649,10 @@ impl Renderer { Self { shaders, targets, - post_proc_input_index: 0, main_pipeline: sp.main, post_proc_pipeline: sp.post_proc, blur: Blur { - temporal_accum_pipeline: sp.temporal_accum, - atrous_pipeline: sp.atrous, + atrous_pipeline: sp.a_trous, }, acceleration_structure: blade_graphics::AccelerationStructure::default(), prev_acceleration_structure: blade_graphics::AccelerationStructure::default(), @@ -720,7 +692,6 @@ impl Renderer { gpu.destroy_sampler(self.samplers.nearest); gpu.destroy_sampler(self.samplers.linear); // pipelines - gpu.destroy_compute_pipeline(&mut self.blur.temporal_accum_pipeline); gpu.destroy_compute_pipeline(&mut self.blur.atrous_pipeline); gpu.destroy_compute_pipeline(&mut self.main_pipeline); gpu.destroy_render_pipeline(&mut self.post_proc_pipeline); @@ -737,7 +708,7 @@ impl Renderer { let old = self.shaders.clone(); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.ray_trace)); - tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.blur)); + tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.a_trous)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.post_proc)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.debug_draw)); tasks.extend(asset_hub.shaders.hot_reload(&mut self.shaders.debug_blit)); @@ -761,11 +732,9 @@ impl Renderer { self.main_pipeline = ShaderPipelines::create_ray_trace(shader, gpu); } } - if self.shaders.blur != old.blur { - if let Ok(ref shader) = asset_hub.shaders[self.shaders.blur].raw { - self.blur.temporal_accum_pipeline = - ShaderPipelines::create_temporal_accum(shader, gpu); - self.blur.atrous_pipeline = ShaderPipelines::create_atrous(shader, gpu); + if self.shaders.a_trous != old.a_trous { + if let Ok(ref shader) = asset_hub.shaders[self.shaders.a_trous].raw { + self.blur.atrous_pipeline = ShaderPipelines::create_a_trous(shader, gpu); } } if self.shaders.post_proc != old.post_proc { @@ -1071,7 +1040,6 @@ impl Renderer { self.frame_index += 1; } self.targets.camera_params[self.frame_index % 2] = self.make_camera_params(camera); - self.post_proc_input_index = self.frame_index % 2; } /// Ray trace the scene. @@ -1083,9 +1051,11 @@ impl Renderer { command_encoder: &mut blade_graphics::CommandEncoder, debug_config: DebugConfig, ray_config: RayConfig, - ) { + denoiser_config: DenoiserConfig, + ) -> FrameKey { let debug = self.make_debug_params(&debug_config); let (cur, prev) = self.work_indices(); + let mut post_proc_input_index = 0; if let mut pass = command_encoder.compute() { let grid_scale = { @@ -1127,6 +1097,12 @@ impl Renderer { t_start: ray_config.t_start, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, grid_scale, + temporal_accumulation_weight: if denoiser_config.enabled { + denoiser_config.temporal_weight + } else { + 1.0 + }, + pad: 0.0, }, acc_struct: self.acceleration_structure, prev_acc_struct: if self.frame_scene_built < self.frame_index @@ -1145,91 +1121,53 @@ impl Renderer { sampler_nearest: self.samplers.nearest, env_map: self.env_map.main_view, env_weights: self.env_map.weight_view, - t_prev_depth: self.targets.depth.views[prev], - t_prev_basis: self.targets.basis.views[prev], - t_prev_flat_normal: self.targets.flat_normal.views[prev], debug_buf: self.debug.buffer_resource(), reservoirs: self.targets.reservoir_buf.into(), - out_depth: self.targets.depth.views[cur], - out_basis: self.targets.basis.views[cur], - out_flat_normal: self.targets.flat_normal.views[cur], + inout_depth: self.targets.depth.views[0], + inout_basis: self.targets.basis.views[0], + inout_flat_normal: self.targets.flat_normal.views[0], out_albedo: self.targets.albedo.views[0], out_motion: self.targets.motion.views[0], - out_diffuse: self.targets.light_diffuse.views[cur], + inout_diffuse: self.targets.light_diffuse.views[post_proc_input_index], out_debug: self.targets.debug.views[0], }, ); pc.dispatch(groups); } - } - /// Perform noise reduction using SVGF. - #[profiling::function] - pub fn denoise( - &mut self, //TODO: borrow immutably - command_encoder: &mut blade_graphics::CommandEncoder, - denoiser_config: DenoiserConfig, - ) { - let mut params = BlurParams { - extent: [self.surface_size.width, self.surface_size.height], - temporal_weight: denoiser_config.temporal_weight, - iteration: 0, - use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, - pad: 0, - }; - let (cur, prev) = self.work_indices(); - let temp = 2; - - if denoiser_config.temporal_weight < 1.0 { - let mut pass = command_encoder.compute(); - let mut pc = pass.with(&self.blur.temporal_accum_pipeline); - let groups = self - .blur - .atrous_pipeline - .get_dispatch_for(self.surface_size); - pc.bind( - 0, - &TemporalAccumData { - camera: self.targets.camera_params[cur], - prev_camera: self.targets.camera_params[prev], - params, - input: self.targets.light_diffuse.views[cur], - prev_input: self.targets.light_diffuse.views[prev], - t_depth: self.targets.depth.views[cur], - t_prev_depth: self.targets.depth.views[prev], - t_flat_normal: self.targets.flat_normal.views[cur], - t_prev_flat_normal: self.targets.flat_normal.views[prev], - t_motion: self.targets.motion.views[0], - output: self.targets.light_diffuse.views[temp], - }, - ); - pc.dispatch(groups); - //Note: making `cur` contain the latest reprojection output - self.targets.light_diffuse.views.swap(cur, temp); + if denoiser_config.enabled { + let mut params = BlurParams { + extent: [self.surface_size.width, self.surface_size.height], + iteration: 0, + pad: 0, + }; + let mut ping_pong = [1, 2]; + for _ in 0..denoiser_config.num_passes { + let mut pass = command_encoder.compute(); + let mut pc = pass.with(&self.blur.atrous_pipeline); + let groups = self + .blur + .atrous_pipeline + .get_dispatch_for(self.surface_size); + pc.bind( + 0, + &AtrousData { + params, + input: self.targets.light_diffuse.views[post_proc_input_index], + t_depth: self.targets.depth.views[0], + t_flat_normal: self.targets.flat_normal.views[0], + output: self.targets.light_diffuse.views[ping_pong[0]], + }, + ); + pc.dispatch(groups); + post_proc_input_index = ping_pong[0]; + ping_pong.swap(0, 1); + params.iteration += 1; + } } - let mut ping_pong = [temp, prev]; - for _ in 0..denoiser_config.num_passes { - let mut pass = command_encoder.compute(); - let mut pc = pass.with(&self.blur.atrous_pipeline); - let groups = self - .blur - .atrous_pipeline - .get_dispatch_for(self.surface_size); - pc.bind( - 0, - &AtrousData { - params, - input: self.targets.light_diffuse.views[self.post_proc_input_index], - t_depth: self.targets.depth.views[cur], - t_flat_normal: self.targets.flat_normal.views[cur], - output: self.targets.light_diffuse.views[ping_pong[0]], - }, - ); - pc.dispatch(groups); - self.post_proc_input_index = ping_pong[0]; - ping_pong.swap(0, 1); - params.iteration += 1; + FrameKey { + post_proc_input_index, } } @@ -1238,6 +1176,7 @@ impl Renderer { pub fn post_proc( &self, pass: &mut blade_graphics::RenderCommandEncoder, + key: FrameKey, debug_config: DebugConfig, pp_config: PostProcConfig, debug_lines: &[DebugLine], @@ -1250,7 +1189,7 @@ impl Renderer { 0, &PostProcData { t_albedo: self.targets.albedo.views[0], - light_diffuse: self.targets.light_diffuse.views[self.post_proc_input_index], + light_diffuse: self.targets.light_diffuse.views[key.post_proc_input_index], t_debug: self.targets.debug.views[0], tone_map_params: ToneMapParams { enabled: 1, @@ -1267,7 +1206,7 @@ impl Renderer { self.debug.render_lines( debug_lines, self.targets.camera_params[cur], - self.targets.depth.views[cur], + self.targets.depth.views[0], pass, ); self.debug diff --git a/examples/scene/main.rs b/examples/scene/main.rs index 903a30ca..1e9c6e57 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -159,7 +159,6 @@ struct Example { last_render_time: time::Instant, render_times: VecDeque, ray_config: blade_render::RayConfig, - denoiser_enabled: bool, denoiser_config: blade_render::DenoiserConfig, post_proc_config: blade_render::PostProcConfig, debug_blit: Option, @@ -268,8 +267,8 @@ impl Example { group_mixer: 10, t_start: 0.1, }, - denoiser_enabled: true, denoiser_config: blade_render::DenoiserConfig { + enabled: true, num_passes: 3, temporal_weight: 0.1, }, @@ -459,6 +458,7 @@ impl Example { // even while it's still being loaded. let do_render = self.scene_load_task.is_none() || (RENDER_WHILE_LOADING && self.scene_revision != 0); + let mut frame_key = blade_render::FrameKey::default(); if do_render { self.renderer.prepare( command_encoder, @@ -475,11 +475,12 @@ impl Example { //TODO: figure out why the main RT pipeline // causes a GPU crash when there are no objects if !self.objects.is_empty() { - self.renderer - .ray_trace(command_encoder, self.debug, self.ray_config); - if self.denoiser_enabled { - self.renderer.denoise(command_encoder, self.denoiser_config); - } + frame_key = self.renderer.ray_trace( + command_encoder, + self.debug, + self.ray_config, + self.denoiser_config, + ); } } @@ -509,6 +510,7 @@ impl Example { }; self.renderer.post_proc( &mut pass, + frame_key, self.debug, self.post_proc_config, &[], @@ -672,7 +674,6 @@ impl Example { egui::CollapsingHeader::new("Denoise") .default_open(false) .show(ui, |ui| { - ui.checkbox(&mut self.denoiser_enabled, "Enable"); self.denoiser_config.populate_hud(ui); }); diff --git a/src/lib.rs b/src/lib.rs index b14b8ebe..32f6e738 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -378,7 +378,6 @@ pub struct Engine { debug: blade_render::DebugConfig, pub frame_config: blade_render::FrameConfig, pub ray_config: blade_render::RayConfig, - pub denoiser_enabled: bool, pub denoiser_config: blade_render::DenoiserConfig, pub post_proc_config: blade_render::PostProcConfig, track_hot_reloads: bool, @@ -493,8 +492,8 @@ impl Engine { group_mixer: 10, t_start: 0.01, }, - denoiser_enabled: true, denoiser_config: blade_render::DenoiserConfig { + enabled: true, num_passes: 4, temporal_weight: 0.1, }, @@ -573,6 +572,7 @@ impl Engine { // We should be able to update TLAS and render content // even while it's still being loaded. + let mut frame_key = blade_render::FrameKey::default(); if self.load_tasks.is_empty() { self.render_objects.clear(); for (_, object) in self.objects.iter_mut() { @@ -628,11 +628,12 @@ impl Engine { self.frame_config.reset_reservoirs = false; if !self.render_objects.is_empty() { - self.renderer - .ray_trace(command_encoder, self.debug, self.ray_config); - if self.denoiser_enabled { - self.renderer.denoise(command_encoder, self.denoiser_config); - } + frame_key = self.renderer.ray_trace( + command_encoder, + self.debug, + self.ray_config, + self.denoiser_config, + ); } } @@ -702,6 +703,7 @@ impl Engine { if self.load_tasks.is_empty() { self.renderer.post_proc( &mut pass, + frame_key, self.debug, self.post_proc_config, &debug_lines, @@ -736,7 +738,6 @@ impl Engine { .show(ui, |ui| { self.ray_config.populate_hud(ui); self.frame_config.reset_reservoirs |= ui.button("Reset Accumulation").clicked(); - ui.checkbox(&mut self.denoiser_enabled, "Enable Denoiser"); self.denoiser_config.populate_hud(ui); self.post_proc_config.populate_hud(ui); }); From ddf0f17c27172662df826baf88ed10d78f1dac1f Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 3 Sep 2024 00:50:19 -0700 Subject: [PATCH 14/26] Fix debug modes rendering --- blade-render/code/geometry.inc.wgsl | 23 ++++++++++++++--------- blade-render/code/ray-trace.wgsl | 25 +++++++------------------ blade-render/src/render/mod.rs | 6 ++---- 3 files changed, 23 insertions(+), 31 deletions(-) diff --git a/blade-render/code/geometry.inc.wgsl b/blade-render/code/geometry.inc.wgsl index 99b771b1..2ba8994c 100644 --- a/blade-render/code/geometry.inc.wgsl +++ b/blade-render/code/geometry.inc.wgsl @@ -50,7 +50,7 @@ struct RichSurface { motion: vec2, } -fn fetch_geometry(pixel_coord: vec2, enable_debug: bool, is_primary: bool) -> RichSurface { +fn fetch_geometry(pixel_coord: vec2, is_primary: bool, enable_debug: bool) -> RichSurface { var rq: ray_query; let ray_dir = get_ray_direction(camera, pixel_coord); rayQueryInitialize(&rq, acc_struct, RayDesc(RAY_FLAG_CULL_NO_OPAQUE, 0xFFu, 0.0, camera.depth, camera.position, ray_dir)); @@ -151,9 +151,17 @@ fn fetch_geometry(pixel_coord: vec2, enable_debug: bool, is_primary: bool) rs.albedo *= base_color_sample.xyz; } + let prev_position = (entry.prev_object_to_world * position_object).xyz; + let prev_screen = get_projected_pixel_float(prev_camera, prev_position); + //TODO: consider just storing integers here? + //TODO: technically this "0.5" is just a waste compute on both packing and unpacking + rs.motion = prev_screen - vec2(pixel_coord) - 0.5; + rs.position = hit_position; + if (is_primary) { if (debug.view_mode == DebugMode_Depth) { - textureStore(out_debug, pixel_coord, vec4(intersection.t / camera.depth)); + let value = 1.0 / intersection.t; + textureStore(out_debug, pixel_coord, vec4(value)); } if (debug.view_mode == DebugMode_Normal) { textureStore(out_debug, pixel_coord, vec4(normal, 0.0)); @@ -165,15 +173,12 @@ fn fetch_geometry(pixel_coord: vec2, enable_debug: bool, is_primary: bool) let consistency = vec4(length(barycentrics_pos_diff), length(camera_projection_diff), 0.0, 0.0); textureStore(out_debug, pixel_coord, consistency); } + if (debug.view_mode == DebugMode_Motion) { + let motion = rs.motion * MOTION_SCALE; + textureStore(out_debug, pixel_coord, vec4(motion, 0.0, 0.0)); + } } - let prev_position = (entry.prev_object_to_world * position_object).xyz; - let prev_screen = get_projected_pixel_float(prev_camera, prev_position); - //TODO: consider just storing integers here? - //TODO: technically this "0.5" is just a waste compute on both packing and unpacking - rs.motion = prev_screen - vec2(pixel_coord) - 0.5; - rs.position = hit_position; - // Write down the Surface rs.inner.basis = basis; rs.inner.flat_normal = flat_normal; diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 8ba0facd..97f7c6d2 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -470,9 +470,6 @@ fn resample_temporal( local_index: u32, rng: ptr, debug_len: f32, ) -> ResampleOutput { reprojection_cache[local_index].is_valid = false; - if (debug.view_mode == DebugMode_TemporalMatch || debug.view_mode == DebugMode_TemporalMisCanonical || debug.view_mode == DebugMode_TemporalMisError) { - textureStore(out_debug, cur_pixel, vec4(0.0)); - } if (surface.depth == 0.0) { return ResampleOutput(); } @@ -512,11 +509,8 @@ fn resample_temporal( textureStore(out_debug, cur_pixel, vec4(1.0)); } if (debug.view_mode == DebugMode_TemporalMisCanonical) { - textureStore(out_debug, cur_pixel, vec4(mis_canonical / (1.0 + base.accepted_count))); - } - if (debug.view_mode == DebugMode_TemporalMisError) { - let total = mis_canonical + rr.mis_sample; - textureStore(out_debug, cur_pixel, vec4(abs(total - 1.0 - base.accepted_count))); + let mis = mis_canonical / (1.0 + base.accepted_count); + textureStore(out_debug, cur_pixel, vec4(mis)); } return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); @@ -528,9 +522,6 @@ fn resample_spatial( rng: ptr, debug_len: f32, ) -> ResampleOutput { if (surface.depth == 0.0) { - if (debug.view_mode == DebugMode_SpatialMatch || debug.view_mode == DebugMode_SpatialMisCanonical || debug.view_mode == DebugMode_SpatialMisError) { - textureStore(out_debug, cur_pixel, vec4(0.0)); - } let dir = normalize(position - camera.position); var ro = ResampleOutput(); ro.color = evaluate_environment(dir); @@ -575,11 +566,8 @@ fn resample_spatial( textureStore(out_debug, cur_pixel, vec4(value)); } if (debug.view_mode == DebugMode_SpatialMisCanonical) { - textureStore(out_debug, cur_pixel, vec4(mis_canonical / (1.0 + base.accepted_count))); - } - if (debug.view_mode == DebugMode_SpatialMisError) { - let total = mis_canonical + mis_sample_sum; - textureStore(out_debug, cur_pixel, vec4(abs(total - 1.0 - base.accepted_count))); + let mis = mis_canonical / (1.0 + base.accepted_count); + textureStore(out_debug, cur_pixel, vec4(mis)); } return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); } @@ -628,9 +616,10 @@ fn main( if (debug.view_mode == DebugMode_Grouping) { var rng = random_init(group_id.y * 1000u + group_id.x, 0u); let h = random_gen(&rng) * 360.0; - let color = hsv_to_rgb(h, 0.5, 1.0); + let color = hsv_to_rgb(h, 0.5, 1.0) + vec3(0.5); textureStore(out_debug, pixel_coord, vec4(color, 1.0)); - return; + } else if (debug.view_mode != DebugMode_Final) { + textureStore(out_debug, pixel_coord, vec4(0.0)); } let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 51d3f635..5b457314 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -54,10 +54,8 @@ pub enum DebugMode { Grouping = 5, TemporalMatch = 10, TemporalMisCanonical = 11, - TemporalMisError = 12, - SpatialMatch = 13, - SpatialMisCanonical = 14, - SpatialMisError = 15, + SpatialMatch = 12, + SpatialMisCanonical = 13, Variance = 100, } From c6a9833d45ba557e81906313e3ed5e286c34ffb8 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 3 Sep 2024 23:22:18 -0700 Subject: [PATCH 15/26] Tweak Grouping debug mode --- blade-helpers/src/hud.rs | 9 +++++++++ blade-render/code/ray-trace.wgsl | 2 +- src/lib.rs | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index d8b9b279..de54412a 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -59,6 +59,15 @@ impl ExposeHud for blade_render::PostProcConfig { } } +impl ExposeHud for blade_render::FrameConfig { + fn populate_hud(&mut self, ui: &mut egui::Ui) { + ui.horizontal(|ui| { + self.reset_reservoirs |= ui.button("Reset Accumulation").clicked(); + ui.toggle_value(&mut self.frozen, "Freeze"); + }); + } +} + impl ExposeHud for blade_render::DebugConfig { fn populate_hud(&mut self, ui: &mut egui::Ui) { use strum::IntoEnumIterator as _; diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 97f7c6d2..120e7483 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -616,7 +616,7 @@ fn main( if (debug.view_mode == DebugMode_Grouping) { var rng = random_init(group_id.y * 1000u + group_id.x, 0u); let h = random_gen(&rng) * 360.0; - let color = hsv_to_rgb(h, 0.5, 1.0) + vec3(0.5); + let color = hsv_to_rgb(h, 1.0, 1.0); textureStore(out_debug, pixel_coord, vec4(color, 1.0)); } else if (debug.view_mode != DebugMode_Final) { textureStore(out_debug, pixel_coord, vec4(0.0)); diff --git a/src/lib.rs b/src/lib.rs index 32f6e738..f7147f80 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -737,7 +737,7 @@ impl Engine { .default_open(false) .show(ui, |ui| { self.ray_config.populate_hud(ui); - self.frame_config.reset_reservoirs |= ui.button("Reset Accumulation").clicked(); + self.frame_config.populate_hud(ui); self.denoiser_config.populate_hud(ui); self.post_proc_config.populate_hud(ui); }); From 07e573e7a3f1e6e46e3c72d06854b2e73e5997eb Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 4 Sep 2024 22:45:04 -0700 Subject: [PATCH 16/26] Search within 2x2 grid for temporal reprojection --- blade-render/code/ray-trace.wgsl | 44 +++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 120e7483..33e269c1 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -473,25 +473,45 @@ fn resample_temporal( if (surface.depth == 0.0) { return ResampleOutput(); } + let canonical = produce_canonical(surface, position, rng, debug_len); + if (parameters.temporal_tap == 0u) { + return finalize_canonical(canonical); + } - //TODO: find best match in a 2x2 grid + // Find best match in a 2x2 grid + let center_coord = get_prev_pixel(cur_pixel, position, motion); + let center_pixel = vec2(center_coord); + // Trick to start with closer pixels + let center_sum = vec2(center_coord - 0.5) + vec2(center_coord + 0.5); + var prev_pixels = array, 4>( + center_pixel.xy, + vec2(center_sum.x - center_pixel.x, center_pixel.y), + center_sum - center_pixel, + vec2(center_pixel.x, center_sum.y - center_pixel.y), + ); var prev = ReprojectionCache(); - prev.pixel_coord = vec2(get_prev_pixel(cur_pixel, position, motion)); - - let prev_reservoir_index = get_reservoir_index(prev.pixel_coord, prev_camera); - if (parameters.temporal_tap == 0u || prev_reservoir_index < 0) { - return finalize_canonical(canonical); + var prev_reservoir = StoredReservoir(); + for (var i = 0; i < 4 && !prev.is_valid; i += 1) { + prev.pixel_coord = prev_pixels[i]; + let prev_reservoir_index = get_reservoir_index(prev.pixel_coord, prev_camera); + if (prev_reservoir_index < 0) { + continue; + } + prev_reservoir = reservoirs[prev_reservoir_index]; + if (prev_reservoir.confidence == 0.0) { + continue; + } + prev.surface = read_prev_surface(prev.pixel_coord); + if (compare_surfaces(surface, prev.surface) < 0.1) { + continue; + } + prev.is_valid = true; } - let prev_reservoir = reservoirs[prev_reservoir_index]; - prev.surface = read_prev_surface(prev.pixel_coord); - prev.is_valid = compare_surfaces(surface, prev.surface) > 0.1; - // if the surfaces are too different, there is no trust in this sample - if (prev_reservoir.confidence == 0.0 || !prev.is_valid) { + if (!prev.is_valid) { return finalize_canonical(canonical); } - // Write down the reprojection cache, no need to carry this around reprojection_cache[local_index] = prev; From 0d662fa911d5250b12b608dfcd81afb5d1860f1f Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 5 Sep 2024 23:31:11 -0700 Subject: [PATCH 17/26] Refactor temporal reprojection out of the temporal pass --- blade-render/code/accum.inc.wgsl | 9 ++- blade-render/code/ray-trace.wgsl | 115 +++++++++++++++---------------- 2 files changed, 61 insertions(+), 63 deletions(-) diff --git a/blade-render/code/accum.inc.wgsl b/blade-render/code/accum.inc.wgsl index cc599ee3..7e704da7 100644 --- a/blade-render/code/accum.inc.wgsl +++ b/blade-render/code/accum.inc.wgsl @@ -3,17 +3,16 @@ const LUMA: vec3 = vec3(0.2126, 0.7152, 0.0722); var inout_diffuse: texture_storage_2d; fn accumulate_temporal( - surface: Surface, position: vec3, pixel_coord: vec2, - cur_illumination: vec3, temporal_weight: f32, - prev_surface: Surface, prev_pixel: vec2, prev_valid: bool, + pixel: vec2, cur_illumination: vec3, + temporal_weight: f32, prev_pixel: vec2, ) { let cur_luminocity = dot(cur_illumination, LUMA); var ilm = vec4(cur_illumination, cur_luminocity * cur_luminocity); - if (prev_valid && temporal_weight < 1.0) { + if (prev_pixel.x >= 0 && temporal_weight < 1.0) { let illumination = textureLoad(inout_diffuse, prev_pixel).xyz; let luminocity = dot(illumination, LUMA); let prev_ilm = vec4(illumination, luminocity * luminocity); ilm = mix(prev_ilm, ilm, temporal_weight); } - textureStore(inout_diffuse, pixel_coord, ilm); + textureStore(inout_diffuse, pixel, ilm); } diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 33e269c1..682a685a 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -64,12 +64,6 @@ struct PixelCache { world_pos: vec3, } var pixel_cache: array; -struct ReprojectionCache { - surface: Surface, - pixel_coord: vec2, - is_valid: bool, -} -var reprojection_cache: array; struct LightSample { radiance: vec3, @@ -350,6 +344,50 @@ fn produce_canonical( return reservoir; } +struct TemporalReprojection { + is_valid: bool, + pixel: vec2, + surface: Surface, + reservoir: StoredReservoir, +} + +fn find_temporal(surface: Surface, center_coord: vec2) -> TemporalReprojection { + var tr = TemporalReprojection(); + tr.is_valid = false; + if (surface.depth == 0.0) { + return tr; + } + + // Find best match in a 2x2 grid + let center_pixel = vec2(center_coord); + // Trick to start with closer pixels + let center_sum = vec2(center_coord - 0.5) + vec2(center_coord + 0.5); + var prev_pixels = array, 4>( + center_pixel.xy, + vec2(center_sum.x - center_pixel.x, center_pixel.y), + center_sum - center_pixel, + vec2(center_pixel.x, center_sum.y - center_pixel.y), + ); + + for (var i = 0; i < 4 && !tr.is_valid; i += 1) { + tr.pixel = prev_pixels[i]; + let prev_reservoir_index = get_reservoir_index(tr.pixel, prev_camera); + if (prev_reservoir_index < 0) { + continue; + } + tr.reservoir = reservoirs[prev_reservoir_index]; + if (tr.reservoir.confidence == 0.0) { + continue; + } + tr.surface = read_prev_surface(tr.pixel); + if (compare_surfaces(surface, tr.surface) < 0.1) { + continue; + } + tr.is_valid = true; + } + return tr; +} + struct ResampleBase { surface: Surface, canonical: LiveReservoir, @@ -466,62 +504,26 @@ fn finalize_resampling( } fn resample_temporal( - surface: Surface, motion: vec2, cur_pixel: vec2, position: vec3, - local_index: u32, rng: ptr, debug_len: f32, + surface: Surface, cur_pixel: vec2, position: vec3, + local_index: u32, tr: TemporalReprojection, + rng: ptr, debug_len: f32, ) -> ResampleOutput { - reprojection_cache[local_index].is_valid = false; if (surface.depth == 0.0) { return ResampleOutput(); } let canonical = produce_canonical(surface, position, rng, debug_len); - if (parameters.temporal_tap == 0u) { + if (parameters.temporal_tap == 0u || !tr.is_valid) { return finalize_canonical(canonical); } - // Find best match in a 2x2 grid - let center_coord = get_prev_pixel(cur_pixel, position, motion); - let center_pixel = vec2(center_coord); - // Trick to start with closer pixels - let center_sum = vec2(center_coord - 0.5) + vec2(center_coord + 0.5); - var prev_pixels = array, 4>( - center_pixel.xy, - vec2(center_sum.x - center_pixel.x, center_pixel.y), - center_sum - center_pixel, - vec2(center_pixel.x, center_sum.y - center_pixel.y), - ); - var prev = ReprojectionCache(); - var prev_reservoir = StoredReservoir(); - for (var i = 0; i < 4 && !prev.is_valid; i += 1) { - prev.pixel_coord = prev_pixels[i]; - let prev_reservoir_index = get_reservoir_index(prev.pixel_coord, prev_camera); - if (prev_reservoir_index < 0) { - continue; - } - prev_reservoir = reservoirs[prev_reservoir_index]; - if (prev_reservoir.confidence == 0.0) { - continue; - } - prev.surface = read_prev_surface(prev.pixel_coord); - if (compare_surfaces(surface, prev.surface) < 0.1) { - continue; - } - prev.is_valid = true; - } - - if (!prev.is_valid) { - return finalize_canonical(canonical); - } - // Write down the reprojection cache, no need to carry this around - reprojection_cache[local_index] = prev; - var reservoir = LiveReservoir(); var color_and_weight = vec4(0.0); let base = ResampleBase(surface, canonical, position, 1.0); - let prev_dir = get_ray_direction(prev_camera, prev.pixel_coord); - let prev_world_pos = prev_camera.position + prev.surface.depth * prev_dir; - let other = PixelCache(prev.surface, prev_reservoir, prev_world_pos); + let prev_dir = get_ray_direction(prev_camera, tr.pixel); + let prev_world_pos = prev_camera.position + tr.surface.depth * prev_dir; + let other = PixelCache(tr.surface, tr.reservoir, prev_world_pos); let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, debug_len); let mis_canonical = 1.0 + rr.mis_canonical; @@ -599,8 +601,12 @@ fn compute_restir( ) -> vec3 { let debug_len = select(0.0, rs.inner.depth * 0.2, enable_debug); - let temporal = resample_temporal(rs.inner, rs.motion, pixel, rs.position, local_index, rng, debug_len); + let center_coord = get_prev_pixel(pixel, rs.position, rs.motion); + let tr = find_temporal(rs.inner, center_coord); + + let temporal = resample_temporal(rs.inner, pixel, rs.position, local_index, tr, rng, debug_len); pixel_cache[local_index] = PixelCache(rs.inner, temporal.reservoir, rs.position); + var prev_pixel = select(vec2(-1), tr.pixel, tr.is_valid); // sync with the workgroup to ensure all reservoirs are available. workgroupBarrier(); @@ -611,14 +617,7 @@ fn compute_restir( let pixel_index = get_reservoir_index(pixel, camera); reservoirs[pixel_index] = spatial.reservoir; - //Note: restoring it from the LDS allows to lower the register pressure during spatial re-use - let rc = reprojection_cache[local_index]; - accumulate_temporal( - rs.inner, rs.position, pixel, - spatial.color, parameters.temporal_accumulation_weight, - rc.surface, rc.pixel_coord, rc.is_valid, - ); - + accumulate_temporal(pixel, spatial.color, parameters.temporal_accumulation_weight, prev_pixel); return spatial.color; } From ab1bf33cd78a608eff77aee1f86996bdc05d579b Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Fri, 6 Sep 2024 00:26:34 -0700 Subject: [PATCH 18/26] Use confidence term more widely, include in the RayConfig --- blade-helpers/src/hud.rs | 7 ++-- blade-render/code/ray-trace.wgsl | 56 ++++++++++++++++++-------------- blade-render/src/render/mod.rs | 13 ++++---- examples/scene/main.rs | 4 +-- src/lib.rs | 4 +-- 5 files changed, 47 insertions(+), 37 deletions(-) diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index de54412a..efcba8c6 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -15,12 +15,13 @@ impl ExposeHud for blade_render::RayConfig { ); ui.checkbox(&mut self.temporal_tap, "Temporal tap"); ui.add( - egui::widgets::Slider::new(&mut self.temporal_history, 0..=50).text("Temporal history"), + egui::widgets::Slider::new(&mut self.temporal_confidence, 0.0..=50.0) + .text("Temporal confidence"), ); ui.add(egui::widgets::Slider::new(&mut self.spatial_taps, 0..=10).text("Spatial taps")); ui.add( - egui::widgets::Slider::new(&mut self.spatial_tap_history, 0..=50) - .text("Spatial tap history"), + egui::widgets::Slider::new(&mut self.spatial_confidence, 0.0..=50.0) + .text("Spatial confidence"), ); ui.add(egui::widgets::Slider::new(&mut self.group_mixer, 1..=10).text("Group mixer")); ui.add( diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 682a685a..f5cd7485 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -28,9 +28,9 @@ struct MainParams { num_environment_samples: u32, environment_importance_sampling: u32, temporal_tap: u32, - temporal_history: u32, + temporal_tap_confidence: f32, spatial_taps: u32, - spatial_tap_history: u32, + spatial_tap_confidence: f32, spatial_min_distance: i32, t_start: f32, use_motion_vectors: u32, @@ -119,13 +119,13 @@ fn merge_reservoir(r: ptr, other: LiveReservoir, random return false; } } -fn unpack_reservoir(f: StoredReservoir, max_history: u32, radiance: vec3) -> LiveReservoir { +fn unpack_reservoir(f: StoredReservoir, max_confidence: f32, radiance: vec3) -> LiveReservoir { var r: LiveReservoir; r.selected_light_index = f.light_index; r.selected_uv = f.light_uv; r.selected_target_score = f.target_score; r.radiance = radiance; - let history = min(f.confidence, f32(max_history)); + let history = min(f.confidence, max_confidence); r.weight_sum = f.contribution_weight * f.target_score * history; r.history = history; return r; @@ -351,7 +351,7 @@ struct TemporalReprojection { reservoir: StoredReservoir, } -fn find_temporal(surface: Surface, center_coord: vec2) -> TemporalReprojection { +fn find_temporal(surface: Surface, pixel: vec2, center_coord: vec2) -> TemporalReprojection { var tr = TemporalReprojection(); tr.is_valid = false; if (surface.depth == 0.0) { @@ -384,6 +384,16 @@ fn find_temporal(surface: Surface, center_coord: vec2) -> TemporalReproject continue; } tr.is_valid = true; + + if (debug.view_mode == DebugMode_Reprojection) { + var colors = array, 4>( + vec3(1.0, 1.0, 1.0), + vec3(1.0, 0.0, 0.0), + vec3(0.0, 1.0, 0.0), + vec3(0.0, 0.0, 1.0), + ); + textureStore(out_debug, pixel, vec4(colors[i], 1.0)); + } } return tr; } @@ -403,15 +413,15 @@ struct ResampleResult { // Resample following Algorithm 8 in section 9.1 of Bitterli thesis fn resample( dst: ptr, color_and_weight: ptr>, - base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, max_history: u32, - rng: ptr, debug_len: f32, + base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, + max_confidence: f32, rng: ptr, debug_len: f32, ) -> ResampleResult { var src: LiveReservoir; let neighbor = other.reservoir; var rr = ResampleResult(); if (PAIRWISE_MIS) { let canonical = base.canonical; - let neighbor_history = min(neighbor.confidence, f32(max_history)); + let neighbor_history = min(neighbor.confidence, max_confidence); { // scoping this to hint the register allocation let t_canonical_at_neighbor = estimate_target_score_with_occlusion( other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs, debug_len); @@ -442,7 +452,7 @@ fn resample( rr.mis_canonical = 0.0; rr.mis_sample = 1.0; let radiance = evaluate_reflected_light(base.surface, neighbor.light_index, neighbor.light_uv); - src = unpack_reservoir(neighbor, max_history, radiance); + src = unpack_reservoir(neighbor, max_confidence, radiance); } if (DECOUPLED_SHADING) { @@ -465,7 +475,7 @@ struct ResampleOutput { fn revive_canonical(ro: ResampleOutput) -> LiveReservoir { let radiance = select(vec3(0.0), ro.color / ro.reservoir.contribution_weight, ro.reservoir.contribution_weight > 0.0); - return unpack_reservoir(ro.reservoir, ~0u, radiance); + return unpack_reservoir(ro.reservoir, 100.0, radiance); } fn finalize_canonical(reservoir: LiveReservoir) -> ResampleOutput { @@ -479,17 +489,17 @@ fn finalize_resampling( reservoir: ptr, color_and_weight: ptr>, base: ResampleBase, mis_canonical: f32, rng: ptr, ) -> ResampleOutput { - var ro = ResampleOutput(); var canonical = base.canonical; - canonical.weight_sum *= mis_canonical / canonical.history; + var effective_history = canonical.history; + if (PAIRWISE_MIS) + { + canonical.weight_sum *= mis_canonical / canonical.history; + effective_history = 1.0 + base.accepted_count; + } merge_reservoir(reservoir, canonical, random_gen(rng)); - if (base.accepted_count > 0.0) { - let effective_history = select((*reservoir).history, 1.0 + base.accepted_count, PAIRWISE_MIS); - ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); - } else { - ro.reservoir = pack_reservoir(canonical); - } + var ro = ResampleOutput(); + ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); if (DECOUPLED_SHADING) { if (canonical.selected_target_score > 0.0) { @@ -524,7 +534,7 @@ fn resample_temporal( let prev_dir = get_ray_direction(prev_camera, tr.pixel); let prev_world_pos = prev_camera.position + tr.surface.depth * prev_dir; let other = PixelCache(tr.surface, tr.reservoir, prev_world_pos); - let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_history, rng, debug_len); + let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_tap_confidence, rng, debug_len); let mis_canonical = 1.0 + rr.mis_canonical; if (debug.view_mode == DebugMode_TemporalMatch) { @@ -573,18 +583,16 @@ fn resample_spatial( var color_and_weight = vec4(0.0); let base = ResampleBase(surface, canonical, position, f32(accepted_count)); var mis_canonical = 1.0; - var mis_sample_sum = 0.0; // evaluate the MIS of each of the samples versus the canonical one. for (var lid = 0u; lid < accepted_count; lid += 1u) { let other = pixel_cache[accepted_local_indices[lid]]; - let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_history, rng, debug_len); + let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_confidence, rng, debug_len); mis_canonical += rr.mis_canonical; - mis_sample_sum += rr.mis_sample; } if (debug.view_mode == DebugMode_SpatialMatch) { - let value = f32(accepted_count) / max(1.0, f32(parameters.spatial_taps)); + let value = base.accepted_count / max(1.0, f32(parameters.spatial_taps)); textureStore(out_debug, cur_pixel, vec4(value)); } if (debug.view_mode == DebugMode_SpatialMisCanonical) { @@ -602,7 +610,7 @@ fn compute_restir( let debug_len = select(0.0, rs.inner.depth * 0.2, enable_debug); let center_coord = get_prev_pixel(pixel, rs.position, rs.motion); - let tr = find_temporal(rs.inner, center_coord); + let tr = find_temporal(rs.inner, pixel, center_coord); let temporal = resample_temporal(rs.inner, pixel, rs.position, local_index, tr, rng, debug_len); pixel_cache[local_index] = PixelCache(rs.inner, temporal.reservoir, rs.position); diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 5b457314..9e692e17 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -52,6 +52,7 @@ pub enum DebugMode { Motion = 3, HitConsistency = 4, Grouping = 5, + Reprojection = 6, TemporalMatch = 10, TemporalMisCanonical = 11, SpatialMatch = 12, @@ -95,9 +96,9 @@ pub struct RayConfig { pub num_environment_samples: u32, pub environment_importance_sampling: bool, pub temporal_tap: bool, - pub temporal_history: u32, + pub temporal_confidence: f32, pub spatial_taps: u32, - pub spatial_tap_history: u32, + pub spatial_confidence: f32, /// Minimal distance to a spatially reused pixel (in the current frame). pub spatial_min_distance: u32, /// Scale and mix the groups into clusters, to allow spatial samples to mix @@ -364,9 +365,9 @@ struct MainParams { num_environment_samples: u32, environment_importance_sampling: u32, temporal_tap: u32, - temporal_history: u32, + temporal_confidence: f32, spatial_taps: u32, - spatial_tap_history: u32, + spatial_confidence: f32, spatial_min_distance: u32, t_start: f32, use_motion_vectors: u32, @@ -1088,9 +1089,9 @@ impl Renderer { environment_importance_sampling: ray_config.environment_importance_sampling as u32, temporal_tap: ray_config.temporal_tap as u32, - temporal_history: ray_config.temporal_history, + temporal_confidence: ray_config.temporal_confidence, spatial_taps: ray_config.spatial_taps, - spatial_tap_history: ray_config.spatial_tap_history, + spatial_confidence: ray_config.spatial_confidence, spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, diff --git a/examples/scene/main.rs b/examples/scene/main.rs index 1e9c6e57..2d9bc6be 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -260,9 +260,9 @@ impl Example { num_environment_samples: 1, environment_importance_sampling: false, temporal_tap: true, - temporal_history: 10, + temporal_confidence: 10.0, spatial_taps: 1, - spatial_tap_history: 5, + spatial_confidence: 5.0, spatial_min_distance: 4, group_mixer: 10, t_start: 0.1, diff --git a/src/lib.rs b/src/lib.rs index f7147f80..7a2c7a54 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -485,9 +485,9 @@ impl Engine { num_environment_samples: 1, environment_importance_sampling: false, temporal_tap: true, - temporal_history: 10, + temporal_confidence: 10.0, spatial_taps: 1, - spatial_tap_history: 5, + spatial_confidence: 5.0, spatial_min_distance: 4, group_mixer: 10, t_start: 0.01, From 1490f057835b666a4fb549acfd549af550e125f7 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sat, 7 Sep 2024 00:31:46 -0700 Subject: [PATCH 19/26] Use mouse wheel for fly speed control --- blade-helpers/src/camera.rs | 12 +++++++++++- examples/scene/main.rs | 3 +++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/blade-helpers/src/camera.rs b/blade-helpers/src/camera.rs index b866ca7f..246d014b 100644 --- a/blade-helpers/src/camera.rs +++ b/blade-helpers/src/camera.rs @@ -1,5 +1,7 @@ use super::ExposeHud; +const MAX_FLY_SPEED: f32 = 1000000.0; + pub struct ControlledCamera { pub inner: blade_render::Camera, pub fly_speed: f32, @@ -86,6 +88,14 @@ impl ControlledCamera { true } + + pub fn on_wheel(&mut self, delta: winit::event::MouseScrollDelta) { + let shift = match delta { + winit::event::MouseScrollDelta::LineDelta(_, lines) => lines, + winit::event::MouseScrollDelta::PixelDelta(position) => position.y as f32, + }; + self.fly_speed = (self.fly_speed * shift.exp()).clamp(1.0, MAX_FLY_SPEED); + } } impl ExposeHud for ControlledCamera { @@ -105,7 +115,7 @@ impl ExposeHud for ControlledCamera { }); ui.add(egui::Slider::new(&mut self.inner.fov_y, 0.5f32..=2.0f32).text("FOV")); ui.add( - egui::Slider::new(&mut self.fly_speed, 1f32..=100000f32) + egui::Slider::new(&mut self.fly_speed, 1f32..=MAX_FLY_SPEED) .text("Fly speed") .logarithmic(true), ); diff --git a/examples/scene/main.rs b/examples/scene/main.rs index 2d9bc6be..10320e18 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -949,6 +949,9 @@ fn main() { } last_mouse_pos = [position.x as i32, position.y as i32]; } + winit::event::WindowEvent::MouseWheel { delta, .. } => { + example.camera.on_wheel(delta); + } winit::event::WindowEvent::HoveredFile(_) => { example.is_file_hovered = true; example From 8643a83927019cc6932d7b007c36240b79c1b0b4 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sat, 7 Sep 2024 00:32:25 -0700 Subject: [PATCH 20/26] Refactor temporal accumulation, add WRITE_DEBUG_IMAGE flag --- blade-render/code/a-trous.wgsl | 4 --- blade-render/code/accum.inc.wgsl | 15 ++++++----- blade-render/code/motion.inc.wgsl | 1 - blade-render/code/ray-trace.wgsl | 45 +++++++++++++++---------------- 4 files changed, 31 insertions(+), 34 deletions(-) diff --git a/blade-render/code/a-trous.wgsl b/blade-render/code/a-trous.wgsl index 19277d20..d4eda9e6 100644 --- a/blade-render/code/a-trous.wgsl +++ b/blade-render/code/a-trous.wgsl @@ -1,4 +1,3 @@ -#include "camera.inc.wgsl" #include "quaternion.inc.wgsl" #include "surface.inc.wgsl" @@ -12,12 +11,9 @@ struct Params { iteration: u32, } -var camera: CameraParams; -var prev_camera: CameraParams; var params: Params; var t_depth: texture_2d; var t_flat_normal: texture_2d; -var t_motion: texture_2d; var input: texture_2d; var output: texture_storage_2d; diff --git a/blade-render/code/accum.inc.wgsl b/blade-render/code/accum.inc.wgsl index 7e704da7..5b30c0a2 100644 --- a/blade-render/code/accum.inc.wgsl +++ b/blade-render/code/accum.inc.wgsl @@ -1,18 +1,21 @@ const LUMA: vec3 = vec3(0.2126, 0.7152, 0.0722); +const MOTION_FACTOR: f32 = 0.1; var inout_diffuse: texture_storage_2d; fn accumulate_temporal( pixel: vec2, cur_illumination: vec3, temporal_weight: f32, prev_pixel: vec2, + motion_sqr: f32, ) { - let cur_luminocity = dot(cur_illumination, LUMA); - var ilm = vec4(cur_illumination, cur_luminocity * cur_luminocity); + var illumination = cur_illumination; if (prev_pixel.x >= 0 && temporal_weight < 1.0) { - let illumination = textureLoad(inout_diffuse, prev_pixel).xyz; - let luminocity = dot(illumination, LUMA); - let prev_ilm = vec4(illumination, luminocity * luminocity); - ilm = mix(prev_ilm, ilm, temporal_weight); + let factor = mix(temporal_weight, 1.0, min(pow(motion_sqr, 0.25) * MOTION_FACTOR, 1.0)); + let prev_illumination = textureLoad(inout_diffuse, prev_pixel).xyz; + illumination = mix(prev_illumination, illumination, factor); } + + let luminocity = dot(illumination, LUMA); + let ilm = vec4(illumination, luminocity * luminocity); textureStore(inout_diffuse, pixel, ilm); } diff --git a/blade-render/code/motion.inc.wgsl b/blade-render/code/motion.inc.wgsl index a9a9f48a..3e721716 100644 --- a/blade-render/code/motion.inc.wgsl +++ b/blade-render/code/motion.inc.wgsl @@ -1,2 +1 @@ const MOTION_SCALE: f32 = 0.02; -const USE_MOTION_VECTORS: bool = true; diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index f5cd7485..7271c89f 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -18,6 +18,9 @@ const PAIRWISE_MIS: bool = true; // See "DECOUPLING SHADING AND REUSE" in // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; +const WRITE_DEBUG_IMAGE: bool = false; +//TODO: currently unused +const WRITE_MOTION_VECTORS: bool = false; //TODO: crashes on AMD 6850U if `GROUP_SIZE_TOTAL` > 32 const GROUP_SIZE: vec2 = vec2(8, 4); @@ -256,14 +259,6 @@ fn evaluate_reflected_light(surface: Surface, light_index: u32, light_uv: vec2, pos_world: vec3, motion: vec2) -> vec2 { - if (USE_MOTION_VECTORS && parameters.use_motion_vectors != 0u) { - return vec2(pixel) + 0.5 + motion; - } else { - return get_projected_pixel_float(prev_camera, pos_world); - } -} - struct TargetScore { color: vec3, score: f32, @@ -385,7 +380,7 @@ fn find_temporal(surface: Surface, pixel: vec2, center_coord: vec2) -> } tr.is_valid = true; - if (debug.view_mode == DebugMode_Reprojection) { + if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_Reprojection) { var colors = array, 4>( vec3(1.0, 1.0, 1.0), vec3(1.0, 0.0, 0.0), @@ -537,10 +532,10 @@ fn resample_temporal( let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_tap_confidence, rng, debug_len); let mis_canonical = 1.0 + rr.mis_canonical; - if (debug.view_mode == DebugMode_TemporalMatch) { + if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_TemporalMatch) { textureStore(out_debug, cur_pixel, vec4(1.0)); } - if (debug.view_mode == DebugMode_TemporalMisCanonical) { + if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_TemporalMisCanonical) { let mis = mis_canonical / (1.0 + base.accepted_count); textureStore(out_debug, cur_pixel, vec4(mis)); } @@ -591,11 +586,11 @@ fn resample_spatial( mis_canonical += rr.mis_canonical; } - if (debug.view_mode == DebugMode_SpatialMatch) { + if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_SpatialMatch) { let value = base.accepted_count / max(1.0, f32(parameters.spatial_taps)); textureStore(out_debug, cur_pixel, vec4(value)); } - if (debug.view_mode == DebugMode_SpatialMisCanonical) { + if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_SpatialMisCanonical) { let mis = mis_canonical / (1.0 + base.accepted_count); textureStore(out_debug, cur_pixel, vec4(mis)); } @@ -609,8 +604,9 @@ fn compute_restir( ) -> vec3 { let debug_len = select(0.0, rs.inner.depth * 0.2, enable_debug); - let center_coord = get_prev_pixel(pixel, rs.position, rs.motion); + let center_coord = vec2(pixel) + 0.5 + select(vec2(0.0), rs.motion, parameters.use_motion_vectors != 0u); let tr = find_temporal(rs.inner, pixel, center_coord); + let motion_sqr = dot(rs.motion, rs.motion); let temporal = resample_temporal(rs.inner, pixel, rs.position, local_index, tr, rng, debug_len); pixel_cache[local_index] = PixelCache(rs.inner, temporal.reservoir, rs.position); @@ -625,7 +621,7 @@ fn compute_restir( let pixel_index = get_reservoir_index(pixel, camera); reservoirs[pixel_index] = spatial.reservoir; - accumulate_temporal(pixel, spatial.color, parameters.temporal_accumulation_weight, prev_pixel); + accumulate_temporal(pixel, spatial.color, parameters.temporal_accumulation_weight, prev_pixel, motion_sqr); return spatial.color; } @@ -640,13 +636,14 @@ fn main( return; } - if (debug.view_mode == DebugMode_Grouping) { - var rng = random_init(group_id.y * 1000u + group_id.x, 0u); - let h = random_gen(&rng) * 360.0; - let color = hsv_to_rgb(h, 1.0, 1.0); - textureStore(out_debug, pixel_coord, vec4(color, 1.0)); - } else if (debug.view_mode != DebugMode_Final) { - textureStore(out_debug, pixel_coord, vec4(0.0)); + if (WRITE_DEBUG_IMAGE) { + var default_color = vec3(0.0); + if (debug.view_mode == DebugMode_Grouping) { + var rng = random_init(group_id.y * 1000u + group_id.x, 0u); + let h = random_gen(&rng) * 360.0; + default_color = hsv_to_rgb(h, 1.0, 1.0); + } + textureStore(out_debug, pixel_coord, vec4(default_color, 0.0)); } let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); @@ -664,7 +661,9 @@ fn main( textureStore(inout_basis, pixel_coord, rs.inner.basis); textureStore(inout_flat_normal, pixel_coord, vec4(rs.inner.flat_normal, 0.0)); textureStore(out_albedo, pixel_coord, vec4(rs.albedo, 0.0)); - textureStore(out_motion, pixel_coord, vec4(rs.motion * MOTION_SCALE, 0.0, 0.0)); + if (WRITE_MOTION_VECTORS) { + textureStore(out_motion, pixel_coord, vec4(rs.motion * MOTION_SCALE, 0.0, 0.0)); + } if (enable_debug) { debug_buf.variance.color_sum += color; From d326d6a36b2d1937956615a69c3dd6079be1e4d2 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sat, 7 Sep 2024 15:17:34 -0700 Subject: [PATCH 21/26] Expose pairwise MIS in settings --- blade-helpers/src/hud.rs | 1 + blade-render/code/ray-trace.wgsl | 14 +++++--------- blade-render/src/render/mod.rs | 11 +++++++---- examples/scene/main.rs | 3 ++- src/lib.rs | 3 ++- 5 files changed, 17 insertions(+), 15 deletions(-) diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index efcba8c6..4d0579a0 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -33,6 +33,7 @@ impl ExposeHud for blade_render::RayConfig { .text("T min") .logarithmic(true), ); + ui.checkbox(&mut self.pairwise_mis, "Pairwise MIS"); } } diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 7271c89f..9e0726e6 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -12,9 +12,6 @@ const PI: f32 = 3.1415926; const MAX_RESAMPLE: u32 = 4u; -// See "9.1 pairwise mis for robust reservoir reuse" -// "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" -const PAIRWISE_MIS: bool = true; // See "DECOUPLING SHADING AND REUSE" in // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; @@ -36,9 +33,10 @@ struct MainParams { spatial_tap_confidence: f32, spatial_min_distance: i32, t_start: f32, + use_pairwise_mis: u32, use_motion_vectors: u32, - grid_scale: vec2, temporal_accumulation_weight: f32, + grid_scale: vec2, } var camera: CameraParams; @@ -414,7 +412,7 @@ fn resample( var src: LiveReservoir; let neighbor = other.reservoir; var rr = ResampleResult(); - if (PAIRWISE_MIS) { + if (parameters.use_pairwise_mis != 0u) { let canonical = base.canonical; let neighbor_history = min(neighbor.confidence, max_confidence); { // scoping this to hint the register allocation @@ -485,14 +483,12 @@ fn finalize_resampling( base: ResampleBase, mis_canonical: f32, rng: ptr, ) -> ResampleOutput { var canonical = base.canonical; - var effective_history = canonical.history; - if (PAIRWISE_MIS) - { + if (parameters.use_pairwise_mis != 0u) { canonical.weight_sum *= mis_canonical / canonical.history; - effective_history = 1.0 + base.accepted_count; } merge_reservoir(reservoir, canonical, random_gen(rng)); + let effective_history = select((*reservoir).history, 1.0 + base.accepted_count, parameters.use_pairwise_mis != 0u); var ro = ResampleOutput(); ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 9e692e17..4b18eecd 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -105,6 +105,9 @@ pub struct RayConfig { /// outside of the original workgroup pixel bounds. pub group_mixer: u32, pub t_start: f32, + /// See "9.1 pairwise mis for robust reservoir reuse" + /// "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" + pub pairwise_mis: bool, } #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] @@ -370,10 +373,10 @@ struct MainParams { spatial_confidence: f32, spatial_min_distance: u32, t_start: f32, + use_pairwise_mis: u32, use_motion_vectors: u32, - grid_scale: [u32; 2], temporal_accumulation_weight: f32, - pad: f32, + grid_scale: [u32; 2], } #[derive(blade_macros::ShaderData)] @@ -1094,14 +1097,14 @@ impl Renderer { spatial_confidence: ray_config.spatial_confidence, spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, + use_pairwise_mis: ray_config.pairwise_mis as u32, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, - grid_scale, temporal_accumulation_weight: if denoiser_config.enabled { denoiser_config.temporal_weight } else { 1.0 }, - pad: 0.0, + grid_scale, }, acc_struct: self.acceleration_structure, prev_acc_struct: if self.frame_scene_built < self.frame_index diff --git a/examples/scene/main.rs b/examples/scene/main.rs index 10320e18..a6be3e9d 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -258,7 +258,7 @@ impl Example { render_times: VecDeque::with_capacity(FRAME_TIME_HISTORY), ray_config: blade_render::RayConfig { num_environment_samples: 1, - environment_importance_sampling: false, + environment_importance_sampling: true, temporal_tap: true, temporal_confidence: 10.0, spatial_taps: 1, @@ -266,6 +266,7 @@ impl Example { spatial_min_distance: 4, group_mixer: 10, t_start: 0.1, + pairwise_mis: true, }, denoiser_config: blade_render::DenoiserConfig { enabled: true, diff --git a/src/lib.rs b/src/lib.rs index 7a2c7a54..a91c33b2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -483,7 +483,7 @@ impl Engine { }, ray_config: blade_render::RayConfig { num_environment_samples: 1, - environment_importance_sampling: false, + environment_importance_sampling: true, temporal_tap: true, temporal_confidence: 10.0, spatial_taps: 1, @@ -491,6 +491,7 @@ impl Engine { spatial_min_distance: 4, group_mixer: 10, t_start: 0.01, + pairwise_mis: true, }, denoiser_config: blade_render::DenoiserConfig { enabled: true, From 28b81232243c986130d91017bf558a2e9bf4e6b6 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sat, 7 Sep 2024 20:00:42 -0700 Subject: [PATCH 22/26] Tweak spatial re-use settings to avoid misses --- blade-render/code/ray-trace.wgsl | 2 +- examples/scene/main.rs | 2 +- src/lib.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 9e0726e6..0b70f69e 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -555,7 +555,7 @@ fn resample_spatial( var accepted_count = 0u; var accepted_local_indices = array(); let max_accepted = min(MAX_RESAMPLE, parameters.spatial_taps); - let num_candidates = parameters.spatial_taps * 3u; + let num_candidates = parameters.spatial_taps * 4u; for (var i = 0u; i < num_candidates && accepted_count < max_accepted; i += 1u) { let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; let diff = thread_index_to_coord(other_cache_index, group_id) - cur_pixel; diff --git a/examples/scene/main.rs b/examples/scene/main.rs index a6be3e9d..5ee2beca 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -263,7 +263,7 @@ impl Example { temporal_confidence: 10.0, spatial_taps: 1, spatial_confidence: 5.0, - spatial_min_distance: 4, + spatial_min_distance: 2, group_mixer: 10, t_start: 0.1, pairwise_mis: true, diff --git a/src/lib.rs b/src/lib.rs index a91c33b2..774e5cea 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -488,7 +488,7 @@ impl Engine { temporal_confidence: 10.0, spatial_taps: 1, spatial_confidence: 5.0, - spatial_min_distance: 4, + spatial_min_distance: 2, group_mixer: 10, t_start: 0.01, pairwise_mis: true, From 5ec439686718838594b461684d0deb0d328ad301 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Thu, 12 Sep 2024 00:38:03 -0700 Subject: [PATCH 23/26] make debug rendering optional --- blade-render/code/ray-trace.wgsl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 0b70f69e..66220932 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -12,6 +12,8 @@ const PI: f32 = 3.1415926; const MAX_RESAMPLE: u32 = 4u; + +const DRAW_DEBUG: bool = false; // See "DECOUPLING SHADING AND REUSE" in // "Rearchitecting Spatiotemporal Resampling for Production" const DECOUPLED_SHADING: bool = false; @@ -236,7 +238,7 @@ fn check_ray_occluded(acs: acceleration_structure, position: vec3, directio let intersection = rayQueryGetCommittedIntersection(&rq); let occluded = intersection.kind != RAY_QUERY_INTERSECTION_NONE; - if (debug_len != 0.0) { + if (DRAW_DEBUG && debug_len != 0.0) { let color = select(0xFFFFFFu, 0x0000FFu, occluded); debug_line(position, position + debug_len * direction, color); } @@ -642,7 +644,7 @@ fn main( textureStore(out_debug, pixel_coord, vec4(default_color, 0.0)); } - let enable_debug = all(pixel_coord == vec2(debug.mouse_pos)); + let enable_debug = DRAW_DEBUG && all(pixel_coord == vec2(debug.mouse_pos)); let rs = fetch_geometry(pixel_coord, true, enable_debug); let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); From 239e9cae99cfe70f1b4b08fd044b12469e694f8d Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Sat, 14 Sep 2024 11:12:49 -0700 Subject: [PATCH 24/26] Move the debug flag and the random gen into private vars --- blade-render/code/env-importance.inc.wgsl | 2 +- blade-render/code/random.inc.wgsl | 6 +- blade-render/code/ray-trace.wgsl | 92 +++++++++++------------ examples/init/env-sample.wgsl | 5 +- 4 files changed, 49 insertions(+), 56 deletions(-) diff --git a/blade-render/code/env-importance.inc.wgsl b/blade-render/code/env-importance.inc.wgsl index 43421fce..f5a77914 100644 --- a/blade-render/code/env-importance.inc.wgsl +++ b/blade-render/code/env-importance.inc.wgsl @@ -11,7 +11,7 @@ fn compute_texel_solid_angle(itc: vec2, dim: vec2) -> f32 { return meridian_solid_angle * meridian_part; } -fn generate_environment_sample(rng: ptr, dim: vec2) -> EnvImportantSample { +fn generate_environment_sample(rng: ptr, dim: vec2) -> EnvImportantSample { var es = EnvImportantSample(); es.pdf = 1.0; var mip = i32(textureNumLevels(env_weights)); diff --git a/blade-render/code/random.inc.wgsl b/blade-render/code/random.inc.wgsl index da144dbf..1bce9317 100644 --- a/blade-render/code/random.inc.wgsl +++ b/blade-render/code/random.inc.wgsl @@ -28,7 +28,7 @@ fn rot32(x: u32, bits: u32) -> u32 { } // https://en.wikipedia.org/wiki/MurmurHash -fn murmur3(rng: ptr) -> u32 { +fn murmur3(rng: ptr) -> u32 { let c1 = 0xcc9e2d51u; let c2 = 0x1b873593u; let r1 = 15u; @@ -56,11 +56,11 @@ fn murmur3(rng: ptr) -> u32 { return hash; } -fn random_u32(rng: ptr) -> u32 { +fn random_u32(rng: ptr) -> u32 { return murmur3(rng); } -fn random_gen(rng: ptr) -> f32 { +fn random_gen(rng: ptr) -> f32 { let v = murmur3(rng); let one = bitcast(1.0); let mask = (1u << 23u) - 1u; diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 66220932..1d1c6dca 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -25,6 +25,9 @@ const WRITE_MOTION_VECTORS: bool = false; const GROUP_SIZE: vec2 = vec2(8, 4); const GROUP_SIZE_TOTAL: u32 = GROUP_SIZE.x * GROUP_SIZE.y; +var p_debug_len: f32; +var p_rng: RandomState; + struct MainParams { frame_index: u32, num_environment_samples: u32, @@ -109,9 +112,10 @@ fn make_reservoir(ls: LightSample, light_index: u32, brdf: vec3) -> LiveRes return r; } -fn merge_reservoir(r: ptr, other: LiveReservoir, random: f32) -> bool { +fn merge_reservoir(r: ptr, other: LiveReservoir) -> bool { (*r).weight_sum += other.weight_sum; (*r).history += other.history; + let random = random_gen(&p_rng); if ((*r).weight_sum * random < other.weight_sum) { (*r).selected_light_index = other.selected_light_index; (*r).selected_uv = other.selected_uv; @@ -180,9 +184,9 @@ fn evaluate_environment(dir: vec3) -> vec3 { return textureSampleLevel(env_map, sampler_linear, uv, 0.0).xyz; } -fn sample_light_from_sphere(rng: ptr) -> LightSample { - let a = random_gen(rng); - let h = 1.0 - 2.0 * random_gen(rng); // make sure to allow h==1 +fn sample_light_from_sphere() -> LightSample { + let a = random_gen(&p_rng); + let h = 1.0 - 2.0 * random_gen(&p_rng); // make sure to allow h==1 let tangential = sqrt(1.0 - square(h)) * sample_circle(a); let dir = vec3(tangential.x, h, tangential.y); var ls = LightSample(); @@ -192,16 +196,16 @@ fn sample_light_from_sphere(rng: ptr) -> LightSample { return ls; } -fn sample_light_from_environment(rng: ptr) -> LightSample { +fn sample_light_from_environment() -> LightSample { let dim = textureDimensions(env_map, 0); - let es = generate_environment_sample(rng, dim); + let es = generate_environment_sample(&p_rng, dim); var ls = LightSample(); ls.pdf = es.pdf; // sample the incoming radiance ls.radiance = textureLoad(env_map, es.pixel, 0).xyz; // for determining direction - offset randomly within the texel // Note: this only works if the texels are sufficiently small - ls.uv = (vec2(es.pixel) + vec2(random_gen(rng), random_gen(rng))) / vec2(dim); + ls.uv = (vec2(es.pixel) + vec2(random_gen(&p_rng), random_gen(&p_rng))) / vec2(dim); return ls; } @@ -228,7 +232,7 @@ fn evaluate_brdf(surface: Surface, dir: vec3) -> f32 { return lambert_brdf * max(0.0, lambert_term); } -fn check_ray_occluded(acs: acceleration_structure, position: vec3, direction: vec3, debug_len: f32) -> bool { +fn check_ray_occluded(acs: acceleration_structure, position: vec3, direction: vec3) -> bool { var rq: ray_query; let flags = RAY_FLAG_TERMINATE_ON_FIRST_HIT | RAY_FLAG_CULL_NO_OPAQUE; rayQueryInitialize(&rq, acs, @@ -238,9 +242,9 @@ fn check_ray_occluded(acs: acceleration_structure, position: vec3, directio let intersection = rayQueryGetCommittedIntersection(&rq); let occluded = intersection.kind != RAY_QUERY_INTERSECTION_NONE; - if (DRAW_DEBUG && debug_len != 0.0) { + if (DRAW_DEBUG && p_debug_len != 0.0) { let color = select(0xFFFFFFu, 0x0000FFu, occluded); - debug_line(position, position + debug_len * direction, color); + debug_line(position, position + p_debug_len * direction, color); } return occluded; } @@ -269,7 +273,7 @@ fn make_target_score(color: vec3) -> TargetScore { } fn estimate_target_score_with_occlusion( - surface: Surface, position: vec3, light_index: u32, light_uv: vec2, acs: acceleration_structure, debug_len: f32 + surface: Surface, position: vec3, light_index: u32, light_uv: vec2, acs: acceleration_structure, ) -> TargetScore { if (light_index != 0u) { return TargetScore(); @@ -283,7 +287,7 @@ fn estimate_target_score_with_occlusion( return TargetScore(); } - if (check_ray_occluded(acs, position, direction, debug_len)) { + if (check_ray_occluded(acs, position, direction)) { return TargetScore(); } @@ -292,7 +296,7 @@ fn estimate_target_score_with_occlusion( return make_target_score(brdf * radiance); } -fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debug_len: f32) -> f32 { +fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3) -> f32 { let dir = map_equirect_uv_to_dir(ls.uv); if (dot(dir, surface.flat_normal) <= 0.0) { return 0.0; @@ -308,7 +312,7 @@ fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debu return 0.0; } - if (check_ray_occluded(acc_struct, start_pos, dir, debug_len)) { + if (check_ray_occluded(acc_struct, start_pos, dir)) { return 0.0; } @@ -317,21 +321,20 @@ fn evaluate_sample(ls: LightSample, surface: Surface, start_pos: vec3, debu fn produce_canonical( surface: Surface, position: vec3, - rng: ptr, debug_len: f32, ) -> LiveReservoir { var reservoir = LiveReservoir(); for (var i = 0u; i < parameters.num_environment_samples; i += 1u) { var ls: LightSample; if (parameters.environment_importance_sampling != 0u) { - ls = sample_light_from_environment(rng); + ls = sample_light_from_environment(); } else { - ls = sample_light_from_sphere(rng); + ls = sample_light_from_sphere(); } - let brdf = evaluate_sample(ls, surface, position, debug_len); + let brdf = evaluate_sample(ls, surface, position); if (brdf > 0.0) { let other = make_reservoir(ls, 0u, vec3(brdf)); - merge_reservoir(&reservoir, other, random_gen(rng)); + merge_reservoir(&reservoir, other); } else { bump_reservoir(&reservoir, 1.0); } @@ -409,7 +412,7 @@ struct ResampleResult { fn resample( dst: ptr, color_and_weight: ptr>, base: ResampleBase, other: PixelCache, other_acs: acceleration_structure, - max_confidence: f32, rng: ptr, debug_len: f32, + max_confidence: f32, ) -> ResampleResult { var src: LiveReservoir; let neighbor = other.reservoir; @@ -419,19 +422,14 @@ fn resample( let neighbor_history = min(neighbor.confidence, max_confidence); { // scoping this to hint the register allocation let t_canonical_at_neighbor = estimate_target_score_with_occlusion( - other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs, debug_len); + other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs); let nom = canonical.selected_target_score * canonical.history / base.accepted_count; let denom = t_canonical_at_neighbor.score * neighbor_history + nom; rr.mis_canonical = select(0.0, nom / denom, denom > 0.0); } - // Notes about t_neighbor_at_neighbor: - // 1. we assume lights aren't moving. Technically we should check if the - // target light has moved, and re-evaluate the occlusion. - // 2. we can use the cached target score, and there is no use of the target color - //let t_neighbor_at_neighbor = estimate_target_pdf(neighbor_surface, neighbor_position, neighbor.selected_dir); let t_neighbor_at_canonical = estimate_target_score_with_occlusion( - base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct, debug_len); + base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct); let nom = neighbor.target_score * neighbor_history; let denom = nom + t_neighbor_at_canonical.score * canonical.history / base.accepted_count; let mis_neighbor = select(0.0, nom / denom, denom > 0.0); @@ -451,13 +449,12 @@ fn resample( } if (DECOUPLED_SHADING) { - //TODO: use `mis_neighbor`O *color_and_weight += src.weight_sum * vec4(neighbor.contribution_weight * src.radiance, 1.0); } if (src.weight_sum <= 0.0) { bump_reservoir(dst, src.history); } else { - merge_reservoir(dst, src, random_gen(rng)); + merge_reservoir(dst, src); rr.selected = true; } return rr; @@ -482,13 +479,13 @@ fn finalize_canonical(reservoir: LiveReservoir) -> ResampleOutput { fn finalize_resampling( reservoir: ptr, color_and_weight: ptr>, - base: ResampleBase, mis_canonical: f32, rng: ptr, + base: ResampleBase, mis_canonical: f32, ) -> ResampleOutput { var canonical = base.canonical; if (parameters.use_pairwise_mis != 0u) { canonical.weight_sum *= mis_canonical / canonical.history; } - merge_reservoir(reservoir, canonical, random_gen(rng)); + merge_reservoir(reservoir, canonical); let effective_history = select((*reservoir).history, 1.0 + base.accepted_count, parameters.use_pairwise_mis != 0u); var ro = ResampleOutput(); @@ -509,13 +506,12 @@ fn finalize_resampling( fn resample_temporal( surface: Surface, cur_pixel: vec2, position: vec3, local_index: u32, tr: TemporalReprojection, - rng: ptr, debug_len: f32, ) -> ResampleOutput { if (surface.depth == 0.0) { return ResampleOutput(); } - let canonical = produce_canonical(surface, position, rng, debug_len); + let canonical = produce_canonical(surface, position); if (parameters.temporal_tap == 0u || !tr.is_valid) { return finalize_canonical(canonical); } @@ -527,7 +523,7 @@ fn resample_temporal( let prev_dir = get_ray_direction(prev_camera, tr.pixel); let prev_world_pos = prev_camera.position + tr.surface.depth * prev_dir; let other = PixelCache(tr.surface, tr.reservoir, prev_world_pos); - let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_tap_confidence, rng, debug_len); + let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_tap_confidence); let mis_canonical = 1.0 + rr.mis_canonical; if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_TemporalMatch) { @@ -538,13 +534,12 @@ fn resample_temporal( textureStore(out_debug, cur_pixel, vec4(mis)); } - return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical); } fn resample_spatial( surface: Surface, cur_pixel: vec2, position: vec3, group_id: vec3, canonical: LiveReservoir, - rng: ptr, debug_len: f32, ) -> ResampleOutput { if (surface.depth == 0.0) { let dir = normalize(position - camera.position); @@ -559,7 +554,7 @@ fn resample_spatial( let max_accepted = min(MAX_RESAMPLE, parameters.spatial_taps); let num_candidates = parameters.spatial_taps * 4u; for (var i = 0u; i < num_candidates && accepted_count < max_accepted; i += 1u) { - let other_cache_index = random_u32(rng) % GROUP_SIZE_TOTAL; + let other_cache_index = random_u32(&p_rng) % GROUP_SIZE_TOTAL; let diff = thread_index_to_coord(other_cache_index, group_id) - cur_pixel; if (dot(diff, diff) < parameters.spatial_min_distance * parameters.spatial_min_distance) { continue; @@ -580,7 +575,7 @@ fn resample_spatial( // evaluate the MIS of each of the samples versus the canonical one. for (var lid = 0u; lid < accepted_count; lid += 1u) { let other = pixel_cache[accepted_local_indices[lid]]; - let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_confidence, rng, debug_len); + let rr = resample(&reservoir, &color_and_weight, base, other, acc_struct, parameters.spatial_tap_confidence); mis_canonical += rr.mis_canonical; } @@ -592,21 +587,17 @@ fn resample_spatial( let mis = mis_canonical / (1.0 + base.accepted_count); textureStore(out_debug, cur_pixel, vec4(mis)); } - return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical, rng); + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical); } fn compute_restir( - rs: RichSurface, - pixel: vec2, local_index: u32, group_id: vec3, - rng: ptr, enable_debug: bool, + rs: RichSurface, pixel: vec2, local_index: u32, group_id: vec3, ) -> vec3 { - let debug_len = select(0.0, rs.inner.depth * 0.2, enable_debug); - let center_coord = vec2(pixel) + 0.5 + select(vec2(0.0), rs.motion, parameters.use_motion_vectors != 0u); let tr = find_temporal(rs.inner, pixel, center_coord); let motion_sqr = dot(rs.motion, rs.motion); - let temporal = resample_temporal(rs.inner, pixel, rs.position, local_index, tr, rng, debug_len); + let temporal = resample_temporal(rs.inner, pixel, rs.position, local_index, tr); pixel_cache[local_index] = PixelCache(rs.inner, temporal.reservoir, rs.position); var prev_pixel = select(vec2(-1), tr.pixel, tr.is_valid); @@ -614,7 +605,7 @@ fn compute_restir( workgroupBarrier(); let temporal_live = revive_canonical(temporal); - let spatial = resample_spatial(rs.inner, pixel, rs.position, group_id, temporal_live, rng, debug_len); + let spatial = resample_spatial(rs.inner, pixel, rs.position, group_id, temporal_live); let pixel_index = get_reservoir_index(pixel, camera); reservoirs[pixel_index] = spatial.reservoir; @@ -637,8 +628,8 @@ fn main( if (WRITE_DEBUG_IMAGE) { var default_color = vec3(0.0); if (debug.view_mode == DebugMode_Grouping) { - var rng = random_init(group_id.y * 1000u + group_id.x, 0u); - let h = random_gen(&rng) * 360.0; + p_rng = random_init(group_id.y * 1000u + group_id.x, 0u); + let h = random_gen(&p_rng) * 360.0; default_color = hsv_to_rgb(h, 1.0, 1.0); } textureStore(out_debug, pixel_coord, vec4(default_color, 0.0)); @@ -648,10 +639,11 @@ fn main( let rs = fetch_geometry(pixel_coord, true, enable_debug); let global_index = u32(pixel_coord.y) * camera.target_size.x + u32(pixel_coord.x); - var rng = random_init(global_index, parameters.frame_index); + p_rng = random_init(global_index, parameters.frame_index); let enable_restir_debug = (debug.draw_flags & DebugDrawFlags_RESTIR) != 0u && enable_debug; - let color = compute_restir(rs, pixel_coord, local_index, group_id, &rng, enable_restir_debug); + p_debug_len = select(0.0, rs.inner.depth * 0.2, enable_restir_debug); + let color = compute_restir(rs, pixel_coord, local_index, group_id); //Note: important to do this after the temporal pass specifically // TODO: option to avoid writing data for the sky diff --git a/examples/init/env-sample.wgsl b/examples/init/env-sample.wgsl index 2c4d379d..7891133e 100644 --- a/examples/init/env-sample.wgsl +++ b/examples/init/env-sample.wgsl @@ -5,12 +5,13 @@ const PI: f32 = 3.1415926; const BUMP: f32 = 0.025; var env_main: texture_2d; +var p_rng: RandomState; @vertex fn vs_accum(@builtin(vertex_index) vi: u32) -> @builtin(position) vec4 { - var rng = random_init(vi, 0u); let dim = textureDimensions(env_main); - let es = generate_environment_sample(&rng, dim); + p_rng = random_init(vi, 0u); + let es = generate_environment_sample(&p_rng, dim); let extent = textureDimensions(env_weights, 0); let relative = (vec2(es.pixel) + vec2(0.5)) / vec2(extent); return vec4(relative.x - 1.0, 1.0 - relative.y, 0.0, 1.0); From ceb233cc9d42a48c428f29db859d8fa42b683ac5 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Tue, 17 Sep 2024 23:47:09 -0700 Subject: [PATCH 25/26] Refactor MIS to include 1/M and follow a simple balanced heuristic --- blade-render/code/ray-trace.wgsl | 34 ++++++++++++++------------------ 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index 1d1c6dca..dccf8d47 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -137,13 +137,13 @@ fn unpack_reservoir(f: StoredReservoir, max_confidence: f32, radiance: vec3 r.history = history; return r; } -fn pack_reservoir_detail(r: LiveReservoir, denom_factor: f32) -> StoredReservoir { +fn pack_reservoir_detail(r: LiveReservoir, effective_history: f32) -> StoredReservoir { var f: StoredReservoir; f.light_index = r.selected_light_index; f.light_uv = r.selected_uv; f.target_score = r.selected_target_score; f.confidence = r.history; - let denom = f.target_score * denom_factor; + let denom = f.target_score * effective_history; f.contribution_weight = select(0.0, r.weight_sum / denom, denom > 0.0); return f; } @@ -408,6 +408,10 @@ struct ResampleResult { mis_sample: f32, } +fn ratio(a: f32, b: f32) -> f32 { + return select(0.0, a / (a+b), a+b > 0.0); +} + // Resample following Algorithm 8 in section 9.1 of Bitterli thesis fn resample( dst: ptr, color_and_weight: ptr>, @@ -423,26 +427,21 @@ fn resample( { // scoping this to hint the register allocation let t_canonical_at_neighbor = estimate_target_score_with_occlusion( other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs); - let nom = canonical.selected_target_score * canonical.history / base.accepted_count; - let denom = t_canonical_at_neighbor.score * neighbor_history + nom; - rr.mis_canonical = select(0.0, nom / denom, denom > 0.0); + rr.mis_canonical = ratio(canonical.selected_target_score, t_canonical_at_neighbor.score) / base.accepted_count; } let t_neighbor_at_canonical = estimate_target_score_with_occlusion( base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct); - let nom = neighbor.target_score * neighbor_history; - let denom = nom + t_neighbor_at_canonical.score * canonical.history / base.accepted_count; - let mis_neighbor = select(0.0, nom / denom, denom > 0.0); - rr.mis_sample = mis_neighbor; + rr.mis_sample = ratio(neighbor.target_score, t_neighbor_at_canonical.score) / base.accepted_count; src.history = neighbor_history; src.selected_light_index = neighbor.light_index; src.selected_uv = neighbor.light_uv; src.selected_target_score = t_neighbor_at_canonical.score; - src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * mis_neighbor; + src.weight_sum = t_neighbor_at_canonical.score * neighbor.contribution_weight * rr.mis_sample; src.radiance = t_neighbor_at_canonical.color; } else { - rr.mis_canonical = 0.0; + rr.mis_canonical = 1.0; rr.mis_sample = 1.0; let radiance = evaluate_reflected_light(base.surface, neighbor.light_index, neighbor.light_uv); src = unpack_reservoir(neighbor, max_confidence, radiance); @@ -487,7 +486,7 @@ fn finalize_resampling( } merge_reservoir(reservoir, canonical); - let effective_history = select((*reservoir).history, 1.0 + base.accepted_count, parameters.use_pairwise_mis != 0u); + let effective_history = select((*reservoir).history, 1.0, parameters.use_pairwise_mis != 0u); var ro = ResampleOutput(); ro.reservoir = pack_reservoir_detail(*reservoir, effective_history); @@ -524,17 +523,15 @@ fn resample_temporal( let prev_world_pos = prev_camera.position + tr.surface.depth * prev_dir; let other = PixelCache(tr.surface, tr.reservoir, prev_world_pos); let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_tap_confidence); - let mis_canonical = 1.0 + rr.mis_canonical; if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_TemporalMatch) { textureStore(out_debug, cur_pixel, vec4(1.0)); } if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_TemporalMisCanonical) { - let mis = mis_canonical / (1.0 + base.accepted_count); - textureStore(out_debug, cur_pixel, vec4(mis)); + textureStore(out_debug, cur_pixel, vec4(rr.mis_canonical)); } - return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical); + return finalize_resampling(&reservoir, &color_and_weight, base, rr.mis_canonical); } fn resample_spatial( @@ -570,7 +567,7 @@ fn resample_spatial( var reservoir = LiveReservoir(); var color_and_weight = vec4(0.0); let base = ResampleBase(surface, canonical, position, f32(accepted_count)); - var mis_canonical = 1.0; + var mis_canonical = f32(accepted_count == 0u); // evaluate the MIS of each of the samples versus the canonical one. for (var lid = 0u; lid < accepted_count; lid += 1u) { @@ -584,8 +581,7 @@ fn resample_spatial( textureStore(out_debug, cur_pixel, vec4(value)); } if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_SpatialMisCanonical) { - let mis = mis_canonical / (1.0 + base.accepted_count); - textureStore(out_debug, cur_pixel, vec4(mis)); + textureStore(out_debug, cur_pixel, vec4(mis_canonical)); } return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical); } From 8f153f20cbb22ac603cc19beabfbde3d2bb0af62 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 18 Sep 2024 00:31:11 -0700 Subject: [PATCH 26/26] Proper defensive MIS --- blade-helpers/src/hud.rs | 3 +++ blade-render/code/ray-trace.wgsl | 14 ++++++++++---- blade-render/src/render/mod.rs | 5 +++++ examples/scene/main.rs | 1 + src/lib.rs | 3 ++- 5 files changed, 21 insertions(+), 5 deletions(-) diff --git a/blade-helpers/src/hud.rs b/blade-helpers/src/hud.rs index 4d0579a0..8940825f 100644 --- a/blade-helpers/src/hud.rs +++ b/blade-helpers/src/hud.rs @@ -34,6 +34,9 @@ impl ExposeHud for blade_render::RayConfig { .logarithmic(true), ); ui.checkbox(&mut self.pairwise_mis, "Pairwise MIS"); + ui.add( + egui::widgets::Slider::new(&mut self.defensive_mis, 0.0..=1.0).text("Defensive MIS"), + ); } } diff --git a/blade-render/code/ray-trace.wgsl b/blade-render/code/ray-trace.wgsl index dccf8d47..93cacd15 100644 --- a/blade-render/code/ray-trace.wgsl +++ b/blade-render/code/ray-trace.wgsl @@ -39,8 +39,10 @@ struct MainParams { spatial_min_distance: i32, t_start: f32, use_pairwise_mis: u32, + defensive_mis: f32, use_motion_vectors: u32, temporal_accumulation_weight: f32, + pad: f32, grid_scale: vec2, } @@ -424,15 +426,18 @@ fn resample( if (parameters.use_pairwise_mis != 0u) { let canonical = base.canonical; let neighbor_history = min(neighbor.confidence, max_confidence); + let mis_scale = 1.0 / (base.accepted_count + parameters.defensive_mis); { // scoping this to hint the register allocation let t_canonical_at_neighbor = estimate_target_score_with_occlusion( other.surface, other.world_pos, canonical.selected_light_index, canonical.selected_uv, other_acs); - rr.mis_canonical = ratio(canonical.selected_target_score, t_canonical_at_neighbor.score) / base.accepted_count; + let r_canonical = ratio(canonical.history * canonical.selected_target_score / base.accepted_count, neighbor_history * t_canonical_at_neighbor.score); + rr.mis_canonical = mis_scale * (parameters.defensive_mis / base.accepted_count + r_canonical); } let t_neighbor_at_canonical = estimate_target_score_with_occlusion( base.surface, base.world_pos, neighbor.light_index, neighbor.light_uv, acc_struct); - rr.mis_sample = ratio(neighbor.target_score, t_neighbor_at_canonical.score) / base.accepted_count; + let r_neighbor = ratio(neighbor_history * neighbor.target_score, canonical.history * t_neighbor_at_canonical.score / base.accepted_count); + rr.mis_sample = mis_scale * r_neighbor; src.history = neighbor_history; src.selected_light_index = neighbor.light_index; @@ -523,15 +528,16 @@ fn resample_temporal( let prev_world_pos = prev_camera.position + tr.surface.depth * prev_dir; let other = PixelCache(tr.surface, tr.reservoir, prev_world_pos); let rr = resample(&reservoir, &color_and_weight, base, other, prev_acc_struct, parameters.temporal_tap_confidence); + let mis_canonical = rr.mis_canonical; if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_TemporalMatch) { textureStore(out_debug, cur_pixel, vec4(1.0)); } if (WRITE_DEBUG_IMAGE && debug.view_mode == DebugMode_TemporalMisCanonical) { - textureStore(out_debug, cur_pixel, vec4(rr.mis_canonical)); + textureStore(out_debug, cur_pixel, vec4(mis_canonical)); } - return finalize_resampling(&reservoir, &color_and_weight, base, rr.mis_canonical); + return finalize_resampling(&reservoir, &color_and_weight, base, mis_canonical); } fn resample_spatial( diff --git a/blade-render/src/render/mod.rs b/blade-render/src/render/mod.rs index 4b18eecd..4dd56365 100644 --- a/blade-render/src/render/mod.rs +++ b/blade-render/src/render/mod.rs @@ -108,6 +108,7 @@ pub struct RayConfig { /// See "9.1 pairwise mis for robust reservoir reuse" /// "Correlations and Reuse for Fast and Accurate Physically Based Light Transport" pub pairwise_mis: bool, + pub defensive_mis: f32, } #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] @@ -374,8 +375,10 @@ struct MainParams { spatial_min_distance: u32, t_start: f32, use_pairwise_mis: u32, + defensive_mis: f32, use_motion_vectors: u32, temporal_accumulation_weight: f32, + pad: u32, grid_scale: [u32; 2], } @@ -1098,12 +1101,14 @@ impl Renderer { spatial_min_distance: ray_config.spatial_min_distance, t_start: ray_config.t_start, use_pairwise_mis: ray_config.pairwise_mis as u32, + defensive_mis: ray_config.defensive_mis, use_motion_vectors: (self.frame_scene_built == self.frame_index) as u32, temporal_accumulation_weight: if denoiser_config.enabled { denoiser_config.temporal_weight } else { 1.0 }, + pad: 0, grid_scale, }, acc_struct: self.acceleration_structure, diff --git a/examples/scene/main.rs b/examples/scene/main.rs index 5ee2beca..c58c6a48 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -267,6 +267,7 @@ impl Example { group_mixer: 10, t_start: 0.1, pairwise_mis: true, + defensive_mis: 0.0, }, denoiser_config: blade_render::DenoiserConfig { enabled: true, diff --git a/src/lib.rs b/src/lib.rs index 774e5cea..72e0a2c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -487,11 +487,12 @@ impl Engine { temporal_tap: true, temporal_confidence: 10.0, spatial_taps: 1, - spatial_confidence: 5.0, + spatial_confidence: 10.0, spatial_min_distance: 2, group_mixer: 10, t_start: 0.01, pairwise_mis: true, + defensive_mis: 0.1, }, denoiser_config: blade_render::DenoiserConfig { enabled: true,