From c70d4a75e8819a050b7cf146b2c1f733a681e641 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Tue, 2 Sep 2025 20:24:50 +0300 Subject: [PATCH 01/16] remove unused mesh instance visibility mask --- Lorr/Engine/Scene/SceneRenderer.cc | 15 --------------- Lorr/Engine/Scene/SceneRenderer.hh | 2 -- Lorr/Runtime/main.cc | 2 +- 3 files changed, 1 insertion(+), 18 deletions(-) diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 83f2ac6d..c0341487 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -339,13 +339,6 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(device, "mesh instances", vuk::eNone); prepared_frame.mesh_instances_buffer = transfer_man.upload(info.gpu_mesh_instances, std::move(prepared_frame.mesh_instances_buffer)); - auto mesh_instance_visibility_mask_size_bytes = (info.mesh_instance_count + 31) / 32 * sizeof(u32); - self.mesh_instance_visibility_mask_buffer = - self.mesh_instance_visibility_mask_buffer.resize(device, mesh_instance_visibility_mask_size_bytes).value(); - prepared_frame.mesh_instance_visibility_mask_buffer = - self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eNone); - prepared_frame.mesh_instance_visibility_mask_buffer = zero_fill_pass(std::move(prepared_frame.mesh_instance_visibility_mask_buffer)); - auto meshlet_instance_visibility_mask_size_bytes = (info.max_meshlet_instance_count + 31) / 32 * sizeof(u32); self.meshlet_instance_visibility_mask_buffer = self.meshlet_instance_visibility_mask_buffer.resize(device, meshlet_instance_visibility_mask_size_bytes).value(); @@ -354,8 +347,6 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in prepared_frame.meshlet_instance_visibility_mask_buffer = zero_fill_pass(std::move(prepared_frame.meshlet_instance_visibility_mask_buffer)); } else if (self.mesh_instances_buffer) { prepared_frame.mesh_instances_buffer = self.mesh_instances_buffer.acquire(device, "mesh instances", vuk::eMemoryRead); - prepared_frame.mesh_instance_visibility_mask_buffer = - self.mesh_instance_visibility_mask_buffer.acquire(device, "mesh instance visibility mask", vuk::eMemoryRead); prepared_frame.meshlet_instance_visibility_mask_buffer = self.meshlet_instance_visibility_mask_buffer.acquire(device, "meshlet instances visibility mask", vuk::eMemoryRead); } @@ -1190,7 +1181,6 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value void { self.meshes_buffer = {}; } - if (self.mesh_instance_visibility_mask_buffer) { - device.destroy(self.mesh_instance_visibility_mask_buffer.id()); - self.mesh_instance_visibility_mask_buffer = {}; - } - if (self.meshlet_instance_visibility_mask_buffer) { device.destroy(self.meshlet_instance_visibility_mask_buffer.id()); self.meshlet_instance_visibility_mask_buffer = {}; diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 4adbfd75..6953b7d3 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -30,7 +30,6 @@ struct PreparedFrame { vuk::Value transforms_buffer = {}; vuk::Value meshes_buffer = {}; vuk::Value mesh_instances_buffer = {}; - vuk::Value mesh_instance_visibility_mask_buffer = {}; vuk::Value meshlet_instance_visibility_mask_buffer = {}; vuk::Value materials_buffer = {}; vuk::Value environment_buffer = {}; @@ -56,7 +55,6 @@ struct SceneRenderer { Buffer mesh_instances_buffer = {}; Buffer meshes_buffer = {}; - Buffer mesh_instance_visibility_mask_buffer = {}; Buffer meshlet_instance_visibility_mask_buffer = {}; Buffer materials_buffer = {}; diff --git a/Lorr/Runtime/main.cc b/Lorr/Runtime/main.cc index 32abe938..cfc19ab1 100755 --- a/Lorr/Runtime/main.cc +++ b/Lorr/Runtime/main.cc @@ -27,7 +27,7 @@ i32 main(i32 argc, c8 **argv) { }; lr::AppBuilder() // - .module(1) + .module(3) .module(window_info) .module() .module() From dece711c70c07e92f92d558a92085066fbd96957 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 5 Sep 2025 01:10:25 +0300 Subject: [PATCH 02/16] add VBGTAO --- Lorr/Engine/Graphics/Slang/Compiler.cc | 1 + Lorr/Engine/Graphics/Vulkan/Device.cc | 1 + .../Resources/shaders/passes/brdf.slang | 11 +- .../shaders/passes/vbgtao_denoise.slang | 80 +++++ .../shaders/passes/vbgtao_generate.slang | 209 ++++++++++++++ .../shaders/passes/vbgtao_prefilter.slang | 100 +++++++ Lorr/Engine/Resources/shaders/scene.slang | 4 +- Lorr/Engine/Resources/shaders/std/math.slang | 1 + Lorr/Engine/Scene/ECSModule/CoreComponents.hh | 1 - Lorr/Engine/Scene/EditorCamera.cc | 5 +- Lorr/Engine/Scene/GPUScene.hh | 4 +- Lorr/Engine/Scene/Scene.cc | 7 +- Lorr/Engine/Scene/SceneRenderer.cc | 273 +++++++++++++++++- Lorr/Engine/Scene/SceneRenderer.hh | 4 + 14 files changed, 679 insertions(+), 22 deletions(-) create mode 100644 Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang create mode 100644 Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang create mode 100644 Lorr/Engine/Resources/shaders/passes/vbgtao_prefilter.slang diff --git a/Lorr/Engine/Graphics/Slang/Compiler.cc b/Lorr/Engine/Graphics/Slang/Compiler.cc index fce9b645..f856cdac 100644 --- a/Lorr/Engine/Graphics/Slang/Compiler.cc +++ b/Lorr/Engine/Graphics/Slang/Compiler.cc @@ -343,6 +343,7 @@ auto SlangCompiler::new_session(const SlangSessionInfo &info) -> ls::option bool { vk10_features.features.shaderInt64 = true; vk10_features.features.multiDrawIndirect = true; vk10_features.features.samplerAnisotropy = true; + vk10_features.features.shaderImageGatherExtended = true; VkPhysicalDeviceMaintenance8FeaturesKHR maintenance_8_features = {}; maintenance_8_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_8_FEATURES_KHR; diff --git a/Lorr/Engine/Resources/shaders/passes/brdf.slang b/Lorr/Engine/Resources/shaders/passes/brdf.slang index 3bd6a67f..603f3054 100644 --- a/Lorr/Engine/Resources/shaders/passes/brdf.slang +++ b/Lorr/Engine/Resources/shaders/passes/brdf.slang @@ -14,6 +14,7 @@ struct ShaderParameters { Image2D sky_transmittance_lut; Image2D sky_multiscattering_lut; Image2D depth_image; + Image2D ambient_occlusion; Image2D albedo_image; Image2D normal_image; Image2D emissive_image; @@ -43,7 +44,9 @@ func fs_main(VertexOutput input) -> f32x4 { let metallic_roughness_occlusion = params.metallic_roughness_occlusion_image.Load(pixel_pos); let metallic = metallic_roughness_occlusion.x; let roughness = metallic_roughness_occlusion.y; - let occlusion = metallic_roughness_occlusion.z; + let baked_occlusion = metallic_roughness_occlusion.z; + let screen_space_occlusion = params.ambient_occlusion.Load(pixel_pos).r; + let occlusion = min(baked_occlusion, screen_space_occlusion); let NDC = f32x3(input.tex_coord * 2.0 - 1.0, depth); let world_position_h = mul(params.camera.inv_projection_view_mat, f32x4(NDC, 1.0)); @@ -89,7 +92,7 @@ func fs_main(VertexOutput input) -> f32x4 { sky_luminance = std::rec709_oetf(sky_result.luminance) * eye_gradient; } - let ambient_contribution = sky_luminance * albedo_color * occlusion; + let indirect_illuminance = sky_luminance * albedo_color * occlusion; // MATERIAL COLOR ─────────────────────────────────────────────────── // https://marmosetco.tumblr.com/post/81245981087 @@ -102,11 +105,11 @@ func fs_main(VertexOutput input) -> f32x4 { let NoL = max(dot(N, L), 0.0); if (NoL > 0.0) { let brdf = BRDF(V, N, L, albedo_color, roughness, metallic); - material_surface_color = brdf * horizon * sun_illuminance * NoL; + material_surface_color = brdf * horizon * sun_illuminance * NoL * occlusion; } // FINAL ──────────────────────────────────────────────────────────── - let final_color = material_surface_color + ambient_contribution + emission; + let final_color = material_surface_color + indirect_illuminance + emission; return f32x4(final_color, 1.0); } diff --git a/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang b/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang new file mode 100644 index 00000000..2050aa51 --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang @@ -0,0 +1,80 @@ +import std; +import gpu; +import scene; + +struct ShaderParameters { + Texture2D occlusion_noisy; + Texture2D depth_differences; + RWTexture2D ambient_occlusion; + Sampler point_clamp_sampler; +}; + +[[shader("compute")]] +[[numthreads(16, 16, 1)]] +func cs_main( + u32x2 thread_id : SV_DispatchThreadID, + uniform ParameterBlock params, + uniform i32x3 occlusion_noisy_extent +) -> void { + let pixel_coordinates = i32x2(thread_id.xy); + let uv = f32x2(pixel_coordinates) / f32x2(occlusion_noisy_extent.xy); + + let edges0 = params.depth_differences.GatherRed(params.point_clamp_sampler, uv); + let edges1 = params.depth_differences.GatherRed(params.point_clamp_sampler, uv, i32x2(2, 0)); + let edges2 = params.depth_differences.GatherRed(params.point_clamp_sampler, uv, i32x2(1, 2)); + let visibility0 = params.occlusion_noisy.GatherRed(params.point_clamp_sampler, uv); + let visibility1 = params.occlusion_noisy.GatherRed(params.point_clamp_sampler, uv, i32x2(2, 0)); + let visibility2 = params.occlusion_noisy.GatherRed(params.point_clamp_sampler, uv, i32x2(0, 2)); + let visibility3 = params.occlusion_noisy.GatherRed(params.point_clamp_sampler, uv, i32x2(2, 2)); + + let left_edges = unpackUnorm4x8ToFloat(edges0.x); + let right_edges = unpackUnorm4x8ToFloat(edges1.x); + let top_edges = unpackUnorm4x8ToFloat(edges0.z); + let bottom_edges = unpackUnorm4x8ToFloat(edges2.w); + var center_edges = unpackUnorm4x8ToFloat(edges0.y); + center_edges *= f32x4(left_edges.y, right_edges.x, top_edges.w, bottom_edges.z); + + let center_weight = 1.2; + let left_weight = center_edges.x; + let right_weight = center_edges.y; + let top_weight = center_edges.z; + let bottom_weight = center_edges.w; + let top_left_weight = 0.425 * (top_weight * top_edges.x + left_weight * left_edges.z); + let top_right_weight = 0.425 * (top_weight * top_edges.y + right_weight * right_edges.z); + let bottom_left_weight = 0.425 * (bottom_weight * bottom_edges.x + left_weight * left_edges.w); + let bottom_right_weight = 0.425 * (bottom_weight * bottom_edges.y + right_weight * right_edges.w); + + let center_visibility = visibility0.y; + let left_visibility = visibility0.x; + let right_visibility = visibility0.z; + let top_visibility = visibility1.x; + let bottom_visibility = visibility2.z; + let top_left_visibility = visibility0.w; + let top_right_visibility = visibility1.w; + let bottom_left_visibility = visibility2.w; + let bottom_right_visibility = visibility3.w; + + var sum = center_visibility; + sum += left_visibility * left_weight; + sum += right_visibility * right_weight; + sum += top_visibility * top_weight; + sum += bottom_visibility * bottom_weight; + sum += top_left_visibility * top_left_weight; + sum += top_right_visibility * top_right_weight; + sum += bottom_left_visibility * bottom_left_weight; + sum += bottom_right_visibility * bottom_right_weight; + + var sum_weight = center_weight; + sum_weight += left_weight; + sum_weight += right_weight; + sum_weight += top_weight; + sum_weight += bottom_weight; + sum_weight += top_left_weight; + sum_weight += top_right_weight; + sum_weight += bottom_left_weight; + sum_weight += bottom_right_weight; + + let denoised_visibility = sum / sum_weight; + + params.ambient_occlusion.Store(pixel_coordinates, denoised_visibility); +} diff --git a/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang b/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang new file mode 100644 index 00000000..e663282f --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang @@ -0,0 +1,209 @@ +// Visibility Bitmask Ground-Truth Ambient Occlusion (VBGTAO) +// Credits: +// - https://ar5iv.labs.arxiv.org/html/2301.11376 +// - https://github.com/bevyengine/bevy/blob/main/crates/bevy_pbr/src/ssao/ssao.wgsl + +import std; +import gpu; +import scene; + +struct ShaderParameters { + ConstantBuffer camera; + Texture2D prefiltered_depth; + Texture2D normals; + Texture2D hilbert_noise; + RWTexture2D ambient_occlusion; + RWTexture2D depth_differences; + SamplerState point_clamp_sampler; + SamplerState linear_clamp_sampler; +}; + +f32 fast_sqrt(f32 x) { + return (asfloat( 0x1fbd1df5 + ( asint( x ) >> 1 ) )); +} +// input [-1, 1] and output [0, PI], from https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/ +f32 fast_acos(f32 inX) { + f32 x = abs(inX); + f32 res = -0.156583 * x + HALF_PI; + res *= fast_sqrt(1.0 - x); + return (inX >= 0) ? res : PI - res; +} + +func load_noise(u32x2 coord, in Texture2D hilbert_noise) -> f32x2{ + var index = hilbert_noise.Load(i32x3(coord % 64, 0)).r; + // TODO: Temporal jitter (or if you have consts.frameIndex, etc...) + //index += 288*(temporalIndex%64); + + // R2 sequence - see http://extremelearning.com.au/unreasonable-effectiveness-of-quasirandom-sequences/ + return f32x2(frac(0.5 + index * float2(0.75487766624669276005, 0.5698402909980532659114))); +} + +func load_normal_view_space( + in f32x2 uv, + in Camera camera, + in Texture2D normals, + in SamplerState sampler_point_clamp +) -> f32x3 { + let packed_normals = normals.SampleLevel(sampler_point_clamp, uv, 0.0); + var world_normal = std::oct_to_vec3(packed_normals.zw); + world_normal = mul(camera.view_mat, f32x4(world_normal, 0.0)).xyz; + return normalize(world_normal); +} + +func reconstruct_view_space_position( + f32x2 uv, + f32 depth, + in Camera camera +) -> f32x3 { + let clip_xy = f32x2(uv.x * 2.0 - 1.0, 1.0 - 2.0 * uv.y); + let t = mul(camera.inv_projection_mat, f32x4(clip_xy, depth, 1.0)); + let view_xyz = t.xyz / t.w; + return view_xyz; +} + +func load_and_reconstruct_view_space_position( + f32x2 uv, + f32 sample_mip_level, + in Camera camera, + in Texture2D src_depth, + in SamplerState sampler_linear_clamp +) -> f32x3 { + let depth = src_depth.SampleLevel(sampler_linear_clamp, uv, sample_mip_level).r; + return reconstruct_view_space_position(uv, depth, camera); +} + +func calculate_edges( + i32x2 pixel_coord, + f32x2 uv, + in Texture2D src_depth, + in SamplerState sampler_point_clamp, + in RWTexture2D edges +) -> f32 { + // TODO: Replace gather + let depths_upper_left = src_depth.GatherRed(sampler_point_clamp, uv); + let depths_bottom_right = src_depth.GatherRed(sampler_point_clamp, uv, i32x2(1, 1)); + let depth_center = depths_upper_left.y; + let depth_left = depths_upper_left.x; + let depth_top = depths_upper_left.z; + let depth_bottom = depths_bottom_right.x; + let depth_right = depths_bottom_right.z; + + var edge_info = f32x4(depth_left, depth_right, depth_top, depth_bottom) - depth_center; + let slope_left_right = (edge_info.y - edge_info.x) * 0.5; + let slope_top_bottom = (edge_info.w - edge_info.z) * 0.5; + let edge_info_slope_adjusted = edge_info + f32x4(slope_left_right, -slope_left_right, slope_top_bottom, -slope_top_bottom); + edge_info = min(abs(edge_info), abs(edge_info_slope_adjusted)); + let bias = 0.25; // Using the bias and then saturating nudges the values a bit + let scale = depth_center * 0.011; // Weight the edges by their distance from the camera + edge_info = saturate((1.0 + bias) - edge_info / scale); // Apply the bias and scale, and invert edge_info so that small values become large, and vice versa + + // Pack the edge info into the texture + let edge_info_packed = packUnorm4x8(edge_info); + edges[pixel_coord] = edge_info_packed; + + return depth_center; +} + +func updateSectors( + f32 min_horizon, + f32 max_horizon, + f32 samples_per_slice, + u32 bitmask, +) -> u32 { + let start_horizon = u32(min_horizon * samples_per_slice); + let angle_horizon = u32(ceil((max_horizon - min_horizon) * samples_per_slice)); + + return bitfieldInsert(bitmask, 0xFFFFFFFFu, start_horizon, angle_horizon); +} + +func processSample( + f32 thickness, + f32x3 delta_position, + f32x3 view_vec, + f32 sampling_direction, + f32x2 n, + f32 samples_per_slice, + inout u32 bitmask, +) -> void { + let delta_position_back_face = delta_position - view_vec * thickness; + + var front_back_horizon = f32x2( + fast_acos(dot(normalize(delta_position), view_vec)), + fast_acos(dot(normalize(delta_position_back_face), view_vec)), + ); + + front_back_horizon = saturate(fma(f32x2(sampling_direction), -front_back_horizon, n)); + front_back_horizon = select(sampling_direction >= 0.0, front_back_horizon.yx, front_back_horizon.xy); + + bitmask = updateSectors(front_back_horizon.x, front_back_horizon.y, samples_per_slice, bitmask); +} + +[[numthreads(16, 16, 1)]] +func cs_main( + const uint2 pixel_coordinates : SV_DispatchThreadID, + uniform ParameterBlock params, +) -> void { + let thickness = 0.25; + let slice_count = 3.0; + let samples_per_slice_side = 3.0; + let effect_radius = 0.5 * 1.457; + let falloff_range = 0.615 * effect_radius; + let falloff_from = effect_radius * (1.0 - 0.615); + let falloff_mul = -1.0 / falloff_range; + let falloff_add = falloff_from / falloff_range + 1.0; + + let uv = (f32x2(pixel_coordinates) + 0.5) / params.camera.resolution; + var pixel_depth = calculate_edges(pixel_coordinates, uv, params.prefiltered_depth, params.point_clamp_sampler, params.depth_differences); + pixel_depth += 0.00001; + let pixel_position = reconstruct_view_space_position(uv, pixel_depth, params.camera); + let pixel_normal = load_normal_view_space(uv, params.camera, params.normals, params.point_clamp_sampler); + let view_vec = normalize(-pixel_position); + let noise = load_noise(pixel_coordinates, params.hilbert_noise); + let sample_scale = (-0.5 * effect_radius * params.camera.projection_mat[0][0]) / pixel_position.z; + var visibility = 0.0; + var occluded_sample_count = 0u; + for (var slice_t = 0.0; slice_t < slice_count; slice_t += 1.0) { + let slice = slice_t + noise.x; + let phi = (PI / slice_count) * slice; + let omega = f32x2(cos(phi), sin(phi)); + + let direction = f32x3(omega.xy, 0.0); + let orthographic_direction = direction - (dot(direction, view_vec) * view_vec); + let axis = cross(direction, view_vec); + let projected_normal = pixel_normal - axis * dot(pixel_normal, axis); + let projected_normal_length = max(length(projected_normal), 1e-6); + + let sign_norm = sign(dot(orthographic_direction, projected_normal)); + let cos_norm = saturate(dot(projected_normal, view_vec) / projected_normal_length); + let n = f32x2(((PI * 0.5) - sign_norm * fast_acos(cos_norm)) * (1.0 / PI)); + + var bitmask = 0u; + + let sample_mul = f32x2(omega.x, -omega.y) * sample_scale; + for (var sample_t = 0.0; sample_t < samples_per_slice_side; sample_t += 1.0) { + var sample_noise = (slice_t + sample_t * samples_per_slice_side) * 0.6180339887498948482; + sample_noise = fract(noise.y + sample_noise); + + var s = (sample_t + sample_noise) / samples_per_slice_side; + s *= s; // https://github.com/GameTechDev/XeGTAO#sample-distribution + let sample = s * sample_mul; + + let sample_mip_level = clamp(log2(length(sample * params.camera.resolution)) - 3.3, 0.0, 5.0); // https://github.com/GameTechDev/XeGTAO#memory-bandwidth-bottleneck + let sample_position_1 = load_and_reconstruct_view_space_position(uv + sample, sample_mip_level, params.camera, params.prefiltered_depth, params.linear_clamp_sampler); + let sample_position_2 = load_and_reconstruct_view_space_position(uv - sample, sample_mip_level, params.camera, params.prefiltered_depth, params.linear_clamp_sampler); + + let sample_difference_1 = sample_position_1 - pixel_position; + let sample_difference_2 = sample_position_2 - pixel_position; + + processSample(thickness, sample_difference_1, view_vec, -1.0, n, samples_per_slice_side * 2.0, bitmask); + processSample(thickness, sample_difference_2, view_vec, 1.0, n, samples_per_slice_side * 2.0, bitmask); + } + + let bit_count = countbits(bitmask); + occluded_sample_count += bit_count; + } + + visibility = 1.0 - f32(occluded_sample_count) / (slice_count * 2.0 * samples_per_slice_side); + visibility = clamp(visibility, 0.03, 1.0); + params.ambient_occlusion[pixel_coordinates] = visibility; +} \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/passes/vbgtao_prefilter.slang b/Lorr/Engine/Resources/shaders/passes/vbgtao_prefilter.slang new file mode 100644 index 00000000..f38e7b8c --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/vbgtao_prefilter.slang @@ -0,0 +1,100 @@ +import std; +import gpu; +import scene; + +struct ShaderParameters { + Texture2D src_depth; + RWTexture2D dst_depth_mip0; + RWTexture2D dst_depth_mip1; + RWTexture2D dst_depth_mip2; + RWTexture2D dst_depth_mip3; + RWTexture2D dst_depth_mip4; + Sampler point_clamp_sampler; +}; + +groupshared float previous_mip_depth[8][8]; + +float weighted_average(float depth0, float depth1, float depth2, float depth3) { + float depth_range_scale_factor = 0.75; + float effect_radius = depth_range_scale_factor * 0.5 * 1.457; + float falloff_range = 0.615 * effect_radius; + float falloff_from = effect_radius * (1.0 - 0.615); + float falloff_mul = -1.0 / falloff_range; + float falloff_add = falloff_from / falloff_range + 1.0; + + float min_depth = min(min(depth0, depth1), min(depth2, depth3)); + float weight0 = saturate((depth0 - min_depth) * falloff_mul + falloff_add); + float weight1 = saturate((depth1 - min_depth) * falloff_mul + falloff_add); + float weight2 = saturate((depth2 - min_depth) * falloff_mul + falloff_add); + float weight3 = saturate((depth3 - min_depth) * falloff_mul + falloff_add); + float weight_total = weight0 + weight1 + weight2 + weight3; + + return ((weight0 * depth0) + (weight1 * depth1) + (weight2 * depth2) + (weight3 * depth3)) / weight_total; +} + +[[numthreads(8, 8, 1)]] +func cs_main( + uint3 global_id : SV_DispatchThreadID, + uint3 local_id : SV_GroupThreadID, + uniform ParameterBlock params, + uniform i32x3 depth_extent +) -> void { + int2 base_coordinates = int2(global_id.xy); + + // MIP 0 + int2 pixel_coordinates0 = base_coordinates * 2; + int2 pixel_coordinates1 = pixel_coordinates0 + int2(1, 0); + int2 pixel_coordinates2 = pixel_coordinates0 + int2(0, 1); + int2 pixel_coordinates3 = pixel_coordinates0 + int2(1, 1); + + float2 depths_uv = float2(pixel_coordinates0) / f32x2(depth_extent.xy); + float4 depths = params.src_depth.Gather(params.point_clamp_sampler, depths_uv, int2(1,1)); + + params.dst_depth_mip0[pixel_coordinates0] = depths.w; + params.dst_depth_mip0[pixel_coordinates1] = depths.z; + params.dst_depth_mip0[pixel_coordinates2] = depths.x; + params.dst_depth_mip0[pixel_coordinates3] = depths.y; + + // MIP 1 + float depth_mip1 = weighted_average(depths.w, depths.z, depths.x, depths.y); + params.dst_depth_mip1[base_coordinates] = depth_mip1; + previous_mip_depth[local_id.x][local_id.y] = depth_mip1; + + GroupMemoryBarrierWithGroupSync(); + + // MIP 2 + if ((local_id.x % 2 == 0) && (local_id.y % 2 == 0)) { + float d0 = previous_mip_depth[local_id.x + 0][local_id.y + 0]; + float d1 = previous_mip_depth[local_id.x + 1][local_id.y + 0]; + float d2 = previous_mip_depth[local_id.x + 0][local_id.y + 1]; + float d3 = previous_mip_depth[local_id.x + 1][local_id.y + 1]; + float depth_mip2 = weighted_average(d0, d1, d2, d3); + params.dst_depth_mip2[base_coordinates / 2] = depth_mip2; + previous_mip_depth[local_id.x][local_id.y] = depth_mip2; + } + + GroupMemoryBarrierWithGroupSync(); + + // MIP 3 + if ((local_id.x % 4 == 0) && (local_id.y % 4 == 0)) { + float d0 = previous_mip_depth[local_id.x + 0][local_id.y + 0]; + float d1 = previous_mip_depth[local_id.x + 2][local_id.y + 0]; + float d2 = previous_mip_depth[local_id.x + 0][local_id.y + 2]; + float d3 = previous_mip_depth[local_id.x + 2][local_id.y + 2]; + float depth_mip3 = weighted_average(d0, d1, d2, d3); + params.dst_depth_mip3[base_coordinates / 4] = depth_mip3; + previous_mip_depth[local_id.x][local_id.y] = depth_mip3; + } + + GroupMemoryBarrierWithGroupSync(); + + // MIP 4 + if ((local_id.x % 8 == 0) && (local_id.y % 8 == 0)) { + float d0 = previous_mip_depth[local_id.x + 0][local_id.y + 0]; + float d1 = previous_mip_depth[local_id.x + 4][local_id.y + 0]; + float d2 = previous_mip_depth[local_id.x + 0][local_id.y + 4]; + float d3 = previous_mip_depth[local_id.x + 4][local_id.y + 4]; + float depth_mip4 = weighted_average(d0, d1, d2, d3); + params.dst_depth_mip4[base_coordinates / 8] = depth_mip4; + } +} diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 120838ed..a6d73e1f 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -73,11 +73,11 @@ public struct Environment { public struct Camera { public mat4 projection_mat; + public mat4 inv_projection_mat; public mat4 view_mat; - public mat4 projection_view_mat; public mat4 inv_view_mat; + public mat4 projection_view_mat; public mat4 inv_projection_view_mat; - public mat4 prev_projection_view_mat; public f32x3 position; public f32 near_clip; public f32 far_clip; diff --git a/Lorr/Engine/Resources/shaders/std/math.slang b/Lorr/Engine/Resources/shaders/std/math.slang index 7ce28fb6..0b6d774f 100644 --- a/Lorr/Engine/Resources/shaders/std/math.slang +++ b/Lorr/Engine/Resources/shaders/std/math.slang @@ -1,5 +1,6 @@ implementing std; +public static constexpr f32 HALF_PI = 1.57079632679; public static constexpr f32 PI = 3.1415926535897932384626433832795; public static constexpr f32 TAU = 6.283185307179586476925286766559; diff --git a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh index 94adee66..f50aca96 100644 --- a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh +++ b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh @@ -30,7 +30,6 @@ ECS_COMPONENT_BEGIN(Camera) ECS_COMPONENT_MEMBER(max_velocity, f32, 1.0f) ECS_COMPONENT_MEMBER(accel_speed, f32, 1.0f) ECS_COMPONENT_MEMBER(decel_speed, f32, 1.0f) - ECS_COMPONENT_MEMBER(frustum_projection_view_mat, glm::mat4, glm::mat4(1.0)) ECS_COMPONENT_MEMBER(acceptable_lod_error, f32, 2.0f) ECS_COMPONENT_END(); diff --git a/Lorr/Engine/Scene/EditorCamera.cc b/Lorr/Engine/Scene/EditorCamera.cc index 80a7a2d6..41c88ef6 100644 --- a/Lorr/Engine/Scene/EditorCamera.cc +++ b/Lorr/Engine/Scene/EditorCamera.cc @@ -19,12 +19,11 @@ auto EditorCamera::update(this EditorCamera &self, f32 delta_time, const glm::ve auto view_mat = glm::lookAt(self.position, self.position + direction, up); auto projection_view_mat = projection_mat * view_mat; - - self.frustum_projection_view_mat = self.projection_view_mat; self.projection_mat = projection_mat; + self.inv_projection_mat = glm::inverse(projection_mat); self.view_mat = view_mat; - self.projection_view_mat = projection_mat * view_mat; self.inv_view_mat = glm::inverse(view_mat); + self.projection_view_mat = projection_mat * view_mat; self.inv_projection_view_mat = glm::inverse(projection_view_mat); self.acceptable_lod_error = 2.0f; } diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 1c62ad10..d1d5bfc2 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -109,11 +109,11 @@ constexpr static f32 PLANET_RADIUS_OFFSET = 0.001; struct Camera { alignas(4) glm::mat4 projection_mat = {}; + alignas(4) glm::mat4 inv_projection_mat = {}; alignas(4) glm::mat4 view_mat = {}; - alignas(4) glm::mat4 projection_view_mat = {}; alignas(4) glm::mat4 inv_view_mat = {}; + alignas(4) glm::mat4 projection_view_mat = {}; alignas(4) glm::mat4 inv_projection_view_mat = {}; - alignas(4) glm::mat4 frustum_projection_view_mat = {}; alignas(4) glm::vec3 position = {}; alignas(4) f32 near_clip = 0.01f; alignas(4) f32 far_clip = 1000.0f; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 7328d69f..959628e4 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -430,7 +430,7 @@ auto Scene::create_model_entity(this Scene &self, UUID &importing_model_uuid) -> auto visit_nodes = [&](this auto &visitor, flecs::entity &root, std::vector &node_indices) -> void { for (const auto node_index : node_indices) { auto &cur_node = imported_model->nodes[node_index]; - auto node_entity = self.create_entity(self.find_entity(cur_node.name) ? std::string{} : cur_node.name); + auto node_entity = self.create_entity(); const auto T = glm::translate(glm::mat4(1.0f), cur_node.translation); const auto R = glm::mat4_cast(cur_node.rotation); @@ -606,17 +606,16 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< auto &camera_data = active_camera_data.emplace(GPU::Camera{}); camera_data.projection_mat = projection_mat; + camera_data.inv_projection_mat = glm::inverse(projection_mat); camera_data.view_mat = view_mat; - camera_data.projection_view_mat = camera_data.projection_mat * camera_data.view_mat; camera_data.inv_view_mat = glm::inverse(camera_data.view_mat); + camera_data.projection_view_mat = camera_data.projection_mat * camera_data.view_mat; camera_data.inv_projection_view_mat = glm::inverse(camera_data.projection_view_mat); camera_data.position = t.position; camera_data.near_clip = c.near_clip; camera_data.far_clip = c.far_clip; camera_data.resolution = c.resolution; camera_data.acceptable_lod_error = c.acceptable_lod_error; - camera_data.frustum_projection_view_mat = c.frustum_projection_view_mat; - c.frustum_projection_view_mat = camera_data.projection_view_mat; }); } diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index c0341487..7d7bbfd0 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -208,9 +208,77 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { }; Pipeline::create(device, default_slang_session, visualize_overdraw_pipeline_info).value(); + auto vbgtao_prefilter_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.vbgtao_prefilter", + .entry_points = { "cs_main" }, + }; + Pipeline::create(device, default_slang_session, vbgtao_prefilter_pipeline_info).value(); + + auto vbgtao_generate_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.vbgtao_generate", + .entry_points = { "cs_main" }, + }; + Pipeline::create(device, default_slang_session, vbgtao_generate_pipeline_info).value(); + + auto vbgtao_denoise_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.vbgtao_denoise", + .entry_points = { "cs_main" }, + }; + Pipeline::create(device, default_slang_session, vbgtao_denoise_pipeline_info).value(); + self.histogram_luminance_buffer = Buffer::create(device, sizeof(GPU::HistogramLuminance)).value(); vuk::fill(vuk::acquire_buf("histogram luminance", *device.buffer(self.histogram_luminance_buffer.id()), vuk::eNone), 0); + // Hilbert Noise LUT + constexpr auto HILBERT_NOISE_LUT_WIDTH = 64_u32; + auto hilbert_noise_lut_info = ImageInfo{ + .format = vuk::Format::eR16Uint, + .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eTransferDst, + .type = vuk::ImageType::e2D, + .extent = { .width = HILBERT_NOISE_LUT_WIDTH, .height = HILBERT_NOISE_LUT_WIDTH, .depth = 1 }, + .name = "Hilbert Noise LUT", + }; + std::tie(self.hilbert_noise_lut, self.hilbert_noise_lut_view) = Image::create_with_view(device, hilbert_noise_lut_info).value(); + + auto hilbert_index = [](u32 pos_x, u32 pos_y) -> u16 { + auto index = 0_u32; + for (auto cur_level = HILBERT_NOISE_LUT_WIDTH / 2; cur_level > 0_u32; cur_level /= 2_u32) { + auto region_x = (pos_x & cur_level) > 0_u32; + auto region_y = (pos_y & cur_level) > 0_u32; + index += cur_level * cur_level * ((3_u32 * region_x) ^ region_y); + if (region_y == 0_u32) { + if (region_x == 1_u32) { + pos_x = (HILBERT_NOISE_LUT_WIDTH - 1_u32) - pos_x; + pos_y = (HILBERT_NOISE_LUT_WIDTH - 1_u32) - pos_y; + } + + auto temp_pos_x = pos_x; + pos_x = pos_y; + pos_y = temp_pos_x; + } + } + + return index; + }; + + u16 hilbert_noise[HILBERT_NOISE_LUT_WIDTH * HILBERT_NOISE_LUT_WIDTH] = {}; + for (auto y = 0_u32; y < HILBERT_NOISE_LUT_WIDTH; y++) { + for (auto x = 0_u32; x < HILBERT_NOISE_LUT_WIDTH; x++) { + hilbert_noise[y * HILBERT_NOISE_LUT_WIDTH + x] = hilbert_index(x, y); + } + } + + auto &transfer_man = device.transfer_man(); + + auto hilbert_noise_size_bytes = HILBERT_NOISE_LUT_WIDTH * HILBERT_NOISE_LUT_WIDTH * sizeof(u16); + auto hilbert_noise_buffer = transfer_man.alloc_image_buffer(hilbert_noise_lut_info.format, hilbert_noise_lut_info.extent); + std::memcpy(hilbert_noise_buffer->mapped_ptr, hilbert_noise, hilbert_noise_size_bytes); + + auto hilbert_noise_lut_attachment = self.hilbert_noise_lut_view.discard(device, "hilbert noise", vuk::ImageUsageFlagBits::eTransferDst); + hilbert_noise_lut_attachment = transfer_man.upload(std::move(hilbert_noise_buffer), std::move(hilbert_noise_lut_attachment)); + hilbert_noise_lut_attachment = hilbert_noise_lut_attachment.as_released(vuk::eComputeSampled, vuk::DomainFlagBits::eGraphicsQueue); + transfer_man.wait_on(std::move(hilbert_noise_lut_attachment)); + return true; } @@ -356,6 +424,43 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in info.environment.multiscattering_lut_size = self.sky_multiscatter_lut_view.extent(); info.environment.aerial_perspective_lut_size = self.sky_aerial_perspective_lut_extent; prepared_frame.environment_buffer = transfer_man.scratch_buffer(info.environment); + + // glm::vec3 corners[8] = {}; + // glm::vec3 ndc_corners[8] = { + // glm::vec3(-1.0f, 1.0f, 0.0f), glm::vec3(1.0f, 1.0f, 0.0f), glm::vec3(1.0f, -1.0f, 0.0f), glm::vec3(-1.0f, -1.0f, 0.0f), + // glm::vec3(-1.0f, 1.0f, 1.0f), glm::vec3(1.0f, 1.0f, 1.0f), glm::vec3(1.0f, -1.0f, 1.0f), glm::vec3(-1.0f, -1.0f, 1.0f), + // }; + // + // for (int i = 0; i < 8; ++i) { + // auto world_corner = info.camera.inv_projection_view_mat * glm::vec4(ndc_corners[i], 1.0f); + // corners[i] = glm::vec3(world_corner) / world_corner.w; + // } + // + // auto center = glm::vec3(0.0f); + // for (const auto &c : corners) { + // center += c; + // } + // center /= static_cast(ls::count_of(corners)); + // + // auto shadow_map_size = 512.0f; + // auto light_pos = center - info.environment.sun_direction * (shadow_map_size * 0.5f); + // auto light_target = center; + // auto up = glm::vec3(0, 1, 0); + // if (1.0f - glm::abs(glm::dot(info.environment.sun_direction, up)) < 1e-4f) { + // up = glm::vec3(0, 0, 1); + // } + // + // auto view_mat = glm::lookAt(light_pos, glm::vec3(0), up); + // auto projection_mat = glm::orthoRH_ZO( + // -shadow_map_size * 0.5f, + // shadow_map_size * 0.5f, + // -shadow_map_size * 0.5f, + // shadow_map_size * 0.5f, + // -shadow_map_size * 0.5f, + // shadow_map_size * 0.5f + // ); + // projection_mat[1][1] *= -1.0; + prepared_frame.camera_buffer = transfer_man.scratch_buffer(info.camera); prepared_frame.mesh_instance_count = info.mesh_instance_count; @@ -1456,6 +1561,159 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valuemip(0)) + .bind_image(0, 2, dst_image->mip(1)) + .bind_image(0, 3, dst_image->mip(2)) + .bind_image(0, 4, dst_image->mip(3)) + .bind_image(0, 5, dst_image->mip(4)) + .bind_sampler(0, 6, nearest_clamp_sampler) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, depth_input->extent) + .dispatch((depth_input->extent.width + 16 - 1) / 16, (depth_input->extent.height + 16 - 1) / 16); + + return std::make_tuple(depth_input, dst_image); + } + ); + + auto vbgtao_depth_attachment = vuk::declare_ia( + "vbgtao depth", + { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, + .format = vuk::Format::eR32Sfloat, + .sample_count = vuk::Samples::e1, + .level_count = 5, + .layer_count = 1 } + ); + vbgtao_depth_attachment.same_extent_as(depth_attachment); + vbgtao_depth_attachment = vuk::clear_image(std::move(vbgtao_depth_attachment), vuk::Black); + + std::tie(depth_attachment, vbgtao_depth_attachment) = vbgtao_prefilter_pass(std::move(depth_attachment), std::move(vbgtao_depth_attachment)); + + auto vbgtao_generate_pass = vuk::make_pass( + "vbgtao generate", + [](vuk::CommandBuffer &command_buffer, // + VUK_BA(vuk::eComputeUniformRead) camera, + VUK_IA(vuk::eComputeSampled) prefiltered_depth, + VUK_IA(vuk::eComputeSampled) normals, + VUK_IA(vuk::eComputeSampled) hilbert_noise, + VUK_IA(vuk::eComputeRW) ambient_occlusion, + VUK_IA(vuk::eComputeRW) depth_differences) { + auto nearest_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eNearest, + .minFilter = vuk::Filter::eNearest, + .mipmapMode = vuk::SamplerMipmapMode::eNearest, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + auto linear_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eLinear, + .minFilter = vuk::Filter::eLinear, + .mipmapMode = vuk::SamplerMipmapMode::eLinear, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + command_buffer.bind_compute_pipeline("passes.vbgtao_generate") + .bind_buffer(0, 0, camera) + .bind_image(0, 1, prefiltered_depth) + .bind_image(0, 2, normals) + .bind_image(0, 3, hilbert_noise) + .bind_image(0, 4, ambient_occlusion) + .bind_image(0, 5, depth_differences) + .bind_sampler(0, 6, nearest_clamp_sampler) + .bind_sampler(0, 7, linear_clamp_sampler) + .dispatch_invocations_per_pixel(ambient_occlusion); + + return std::make_tuple(camera, normals, ambient_occlusion, depth_differences); + } + ); + + auto vbgtao_noisy_occlusion_attachment = vuk::declare_ia( + "vbgtao noisy occlusion", + { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, + .format = vuk::Format::eR16Sfloat, + .sample_count = vuk::Samples::e1 } + ); + vbgtao_noisy_occlusion_attachment.same_shape_as(final_attachment); + vbgtao_noisy_occlusion_attachment = vuk::clear_image(std::move(vbgtao_noisy_occlusion_attachment), vuk::White); + + auto vbgtao_depth_differences_attachment = vuk::declare_ia( + "vbgtao depth differences", + { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, + .format = vuk::Format::eR32Uint, + .sample_count = vuk::Samples::e1 } + ); + vbgtao_depth_differences_attachment.same_shape_as(final_attachment); + vbgtao_depth_differences_attachment = vuk::clear_image(std::move(vbgtao_depth_differences_attachment), vuk::Black); + + auto hilbert_noise_lut_attachment = + self.hilbert_noise_lut_view.acquire(device, "hilbert noise", vuk::ImageUsageFlagBits::eSampled, vuk::eComputeSampled); + + std::tie(camera_buffer, normal_attachment, vbgtao_noisy_occlusion_attachment, vbgtao_depth_differences_attachment) = vbgtao_generate_pass( + std::move(camera_buffer), + std::move(vbgtao_depth_attachment), + std::move(normal_attachment), + std::move(hilbert_noise_lut_attachment), + std::move(vbgtao_noisy_occlusion_attachment), + std::move(vbgtao_depth_differences_attachment) + ); + + auto vbgtao_denoise_pass = vuk::make_pass( + "vbgtao denoise", + [](vuk::CommandBuffer &command_buffer, // + VUK_IA(vuk::eComputeSampled) noisy_occlusion, + VUK_IA(vuk::eComputeSampled) depth_differences, + VUK_IA(vuk::eComputeRW) ambient_occlusion) { + auto nearest_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eNearest, + .minFilter = vuk::Filter::eNearest, + .mipmapMode = vuk::SamplerMipmapMode::eNearest, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + command_buffer.bind_compute_pipeline("passes.vbgtao_denoise") + .bind_image(0, 0, noisy_occlusion) + .bind_image(0, 1, depth_differences) + .bind_image(0, 2, ambient_occlusion) + .bind_sampler(0, 3, nearest_clamp_sampler) + .dispatch_invocations_per_pixel(ambient_occlusion); + + return ambient_occlusion; + } + ); + + auto vbgtao_occlusion_attachment = vuk::declare_ia( + "vbgtao occlusion", + { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, .sample_count = vuk::Samples::e1 } + ); + vbgtao_occlusion_attachment.same_format_as(vbgtao_noisy_occlusion_attachment); + vbgtao_occlusion_attachment.same_shape_as(vbgtao_noisy_occlusion_attachment); + vbgtao_occlusion_attachment = vuk::clear_image(std::move(vbgtao_occlusion_attachment), vuk::White); + vbgtao_occlusion_attachment = vbgtao_denoise_pass( + std::move(vbgtao_noisy_occlusion_attachment), + std::move(vbgtao_depth_differences_attachment), + std::move(vbgtao_occlusion_attachment) + ); + // ── BRDF ──────────────────────────────────────────────────────────── auto brdf_pass = vuk::make_pass( "brdf", @@ -1466,6 +1724,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value materials_buffer = {}; vuk::Value environment_buffer = {}; vuk::Value camera_buffer = {}; + vuk::Value directional_camera_buffer = {}; vuk::Value sky_transmittance_lut = {}; vuk::Value sky_multiscatter_lut = {}; }; @@ -69,6 +70,9 @@ struct SceneRenderer { Image hiz = {}; ImageView hiz_view = {}; + Image hilbert_noise_lut = {}; + ImageView hilbert_noise_lut_view = {}; + bool debug_lines = false; f32 overdraw_heatmap_scale = 0.0f; From e86ce51b9710c9a84223053832ab0b3f04d0f5ad Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 5 Sep 2025 01:34:26 +0300 Subject: [PATCH 03/16] amplifiy VBGTAO --- Lorr/Engine/Resources/shaders/passes/brdf.slang | 2 +- Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/brdf.slang b/Lorr/Engine/Resources/shaders/passes/brdf.slang index 603f3054..93d02861 100644 --- a/Lorr/Engine/Resources/shaders/passes/brdf.slang +++ b/Lorr/Engine/Resources/shaders/passes/brdf.slang @@ -46,7 +46,7 @@ func fs_main(VertexOutput input) -> f32x4 { let roughness = metallic_roughness_occlusion.y; let baked_occlusion = metallic_roughness_occlusion.z; let screen_space_occlusion = params.ambient_occlusion.Load(pixel_pos).r; - let occlusion = min(baked_occlusion, screen_space_occlusion); + let occlusion = baked_occlusion * screen_space_occlusion; let NDC = f32x3(input.tex_coord * 2.0 - 1.0, depth); let world_position_h = mul(params.camera.inv_projection_view_mat, f32x4(NDC, 1.0)); diff --git a/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang b/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang index 2050aa51..d15da907 100644 --- a/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang +++ b/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang @@ -74,7 +74,9 @@ func cs_main( sum_weight += bottom_left_weight; sum_weight += bottom_right_weight; - let denoised_visibility = sum / sum_weight; + var denoised_visibility = sum / sum_weight; + let power = 2.0; + denoised_visibility = pow(max(denoised_visibility, 0.0f), power); params.ambient_occlusion.Store(pixel_coordinates, denoised_visibility); } From f83ea4e633886afaf6c4327083a55e7c519ffcde Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 5 Sep 2025 12:29:43 +0300 Subject: [PATCH 04/16] fix VBGTAO halo around objects --- .../shaders/passes/vbgtao_generate.slang | 13 +- Lorr/Engine/Resources/shaders/scene.slang | 60 ++-- Lorr/Engine/Scene/GPUScene.hh | 12 +- Lorr/Engine/Scene/SceneRenderer.cc | 284 +++++++++--------- 4 files changed, 196 insertions(+), 173 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang b/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang index e663282f..60524b19 100644 --- a/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang +++ b/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang @@ -74,12 +74,12 @@ func load_and_reconstruct_view_space_position( func calculate_edges( i32x2 pixel_coord, - f32x2 uv, + in f32x2 resolution, in Texture2D src_depth, in SamplerState sampler_point_clamp, in RWTexture2D edges ) -> f32 { - // TODO: Replace gather + let uv = f32x2(pixel_coord) / resolution; let depths_upper_left = src_depth.GatherRed(sampler_point_clamp, uv); let depths_bottom_right = src_depth.GatherRed(sampler_point_clamp, uv, i32x2(1, 1)); let depth_center = depths_upper_left.y; @@ -153,7 +153,8 @@ func cs_main( let falloff_add = falloff_from / falloff_range + 1.0; let uv = (f32x2(pixel_coordinates) + 0.5) / params.camera.resolution; - var pixel_depth = calculate_edges(pixel_coordinates, uv, params.prefiltered_depth, params.point_clamp_sampler, params.depth_differences); + var pixel_depth = calculate_edges( + pixel_coordinates, params.camera.resolution, params.prefiltered_depth, params.point_clamp_sampler, params.depth_differences); pixel_depth += 0.00001; let pixel_position = reconstruct_view_space_position(uv, pixel_depth, params.camera); let pixel_normal = load_normal_view_space(uv, params.camera, params.normals, params.point_clamp_sampler); @@ -189,8 +190,10 @@ func cs_main( let sample = s * sample_mul; let sample_mip_level = clamp(log2(length(sample * params.camera.resolution)) - 3.3, 0.0, 5.0); // https://github.com/GameTechDev/XeGTAO#memory-bandwidth-bottleneck - let sample_position_1 = load_and_reconstruct_view_space_position(uv + sample, sample_mip_level, params.camera, params.prefiltered_depth, params.linear_clamp_sampler); - let sample_position_2 = load_and_reconstruct_view_space_position(uv - sample, sample_mip_level, params.camera, params.prefiltered_depth, params.linear_clamp_sampler); + let sample_position_1 = load_and_reconstruct_view_space_position( + uv + sample, sample_mip_level, params.camera, params.prefiltered_depth, params.linear_clamp_sampler); + let sample_position_2 = load_and_reconstruct_view_space_position( + uv - sample, sample_mip_level, params.camera, params.prefiltered_depth, params.linear_clamp_sampler); let sample_difference_1 = sample_position_1 - pixel_position; let sample_difference_2 = sample_position_2 - pixel_position; diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index a6d73e1f..d66b697a 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -32,43 +32,43 @@ public enum CullFlags : u32 { MicroTriangles, }; -[[Flags]] public enum EnvironmentFlags : u32 { None = 0, HasSun = 1 << 0, HasAtmosphere = 1 << 1, HasEyeAdaptation = 1 << 2, + HasVBGTAO = 1 << 3, }; public struct Environment { - public EnvironmentFlags flags = EnvironmentFlags::None; + public EnvironmentFlags flags; // Sun - public f32x3 sun_direction = {}; - public f32 sun_intensity = 10.0f; + public f32x3 sun_direction; + public f32 sun_intensity; // Atmosphere - public f32x3 atmos_rayleigh_scatter = { 0.005802f, 0.013558f, 0.033100f }; - public f32 atmos_rayleigh_density = 8.0f; - public f32x3 atmos_mie_scatter = { 0.003996f, 0.003996f, 0.003996f }; - public f32 atmos_mie_density = 1.2f; - public f32 atmos_mie_extinction = 0.004440f; - public f32 atmos_mie_asymmetry = 3.6f; - public f32x3 atmos_ozone_absorption = { 0.000650f, 0.001881f, 0.000085f }; - public f32 atmos_ozone_height = 25.0f; - public f32 atmos_ozone_thickness = 15.0f; - public f32x3 atmos_terrain_albedo = { 0.3f, 0.3f, 0.3f }; - public f32 atmos_planet_radius = 6360.0f; - public f32 atmos_atmos_radius = 6460.0f; - public f32 atmos_aerial_perspective_start_km = 8.0f; + public f32x3 atmos_rayleigh_scatter; + public f32 atmos_rayleigh_density; + public f32x3 atmos_mie_scatter; + public f32 atmos_mie_density; + public f32 atmos_mie_extinction; + public f32 atmos_mie_asymmetry; + public f32x3 atmos_ozone_absorption; + public f32 atmos_ozone_height; + public f32 atmos_ozone_thickness; + public f32x3 atmos_terrain_albedo; + public f32 atmos_planet_radius; + public f32 atmos_atmos_radius; + public f32 atmos_aerial_perspective_start_km; // Eye adaptation - public f32 eye_min_exposure = -6.0f; - public f32 eye_max_exposure = 18.0f; - public f32 eye_adaptation_speed = 1.1f; - public f32 eye_ISO_K = 100.0f / 12.5f; - - public i32x3 transmittance_lut_size = {}; - public i32x3 sky_view_lut_size = {}; - public i32x3 multiscattering_lut_size = {}; - public i32x3 aerial_perspective_lut_size = {}; + public f32 eye_min_exposure; + public f32 eye_max_exposure; + public f32 eye_adaptation_speed; + public f32 eye_ISO_K; + + public i32x3 transmittance_lut_size; + public i32x3 sky_view_lut_size; + public i32x3 multiscattering_lut_size; + public i32x3 aerial_perspective_lut_size; }; public struct Camera { @@ -315,3 +315,11 @@ public struct Light { public f32x3 position = {}; public f32x3 color = {}; }; + +public struct VBGTAO { + public f32 thickness; + public f32 depth_range_scale_factor; + public f32 default_radius; + public f32 default_radius_multiplier; + public f32 falloff_range; +}; diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index d1d5bfc2..add68f9c 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -221,8 +221,16 @@ constexpr static u32 HISTOGRAM_THREADS_Y = 16; constexpr static u32 HISTOGRAM_BIN_COUNT = HISTOGRAM_THREADS_X * HISTOGRAM_THREADS_Y; struct HistogramLuminance { - f32 adapted_luminance = 0.0f; - f32 exposure = 0.0f; + alignas(4) f32 adapted_luminance = 0.0f; + alignas(4) f32 exposure = 0.0f; +}; + +struct VBGTAO { + alignas(4) f32 thickness = {}; + alignas(4) f32 depth_range_scale_factor = {}; + alignas(4) f32 default_radius = {}; + alignas(4) f32 default_radius_multiplier = {}; + alignas(4) f32 falloff_range = {}; }; } // namespace lr::GPU diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 7d7bbfd0..6a1132a8 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -1562,157 +1562,161 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valuemip(0)) - .bind_image(0, 2, dst_image->mip(1)) - .bind_image(0, 3, dst_image->mip(2)) - .bind_image(0, 4, dst_image->mip(3)) - .bind_image(0, 5, dst_image->mip(4)) - .bind_sampler(0, 6, nearest_clamp_sampler) - .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, depth_input->extent) - .dispatch((depth_input->extent.width + 16 - 1) / 16, (depth_input->extent.height + 16 - 1) / 16); - - return std::make_tuple(depth_input, dst_image); - } - ); - - auto vbgtao_depth_attachment = vuk::declare_ia( - "vbgtao depth", - { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, - .format = vuk::Format::eR32Sfloat, - .sample_count = vuk::Samples::e1, - .level_count = 5, - .layer_count = 1 } - ); - vbgtao_depth_attachment.same_extent_as(depth_attachment); - vbgtao_depth_attachment = vuk::clear_image(std::move(vbgtao_depth_attachment), vuk::Black); - - std::tie(depth_attachment, vbgtao_depth_attachment) = vbgtao_prefilter_pass(std::move(depth_attachment), std::move(vbgtao_depth_attachment)); - - auto vbgtao_generate_pass = vuk::make_pass( - "vbgtao generate", - [](vuk::CommandBuffer &command_buffer, // - VUK_BA(vuk::eComputeUniformRead) camera, - VUK_IA(vuk::eComputeSampled) prefiltered_depth, - VUK_IA(vuk::eComputeSampled) normals, - VUK_IA(vuk::eComputeSampled) hilbert_noise, - VUK_IA(vuk::eComputeRW) ambient_occlusion, - VUK_IA(vuk::eComputeRW) depth_differences) { - auto nearest_clamp_sampler = vuk::SamplerCreateInfo{ - .magFilter = vuk::Filter::eNearest, - .minFilter = vuk::Filter::eNearest, - .mipmapMode = vuk::SamplerMipmapMode::eNearest, - .addressModeU = vuk::SamplerAddressMode::eClampToEdge, - .addressModeV = vuk::SamplerAddressMode::eClampToEdge, - .addressModeW = vuk::SamplerAddressMode::eClampToEdge, - }; - - auto linear_clamp_sampler = vuk::SamplerCreateInfo{ - .magFilter = vuk::Filter::eLinear, - .minFilter = vuk::Filter::eLinear, - .mipmapMode = vuk::SamplerMipmapMode::eLinear, - .addressModeU = vuk::SamplerAddressMode::eClampToEdge, - .addressModeV = vuk::SamplerAddressMode::eClampToEdge, - .addressModeW = vuk::SamplerAddressMode::eClampToEdge, - }; - - command_buffer.bind_compute_pipeline("passes.vbgtao_generate") - .bind_buffer(0, 0, camera) - .bind_image(0, 1, prefiltered_depth) - .bind_image(0, 2, normals) - .bind_image(0, 3, hilbert_noise) - .bind_image(0, 4, ambient_occlusion) - .bind_image(0, 5, depth_differences) - .bind_sampler(0, 6, nearest_clamp_sampler) - .bind_sampler(0, 7, linear_clamp_sampler) - .dispatch_invocations_per_pixel(ambient_occlusion); - - return std::make_tuple(camera, normals, ambient_occlusion, depth_differences); - } - ); - - auto vbgtao_noisy_occlusion_attachment = vuk::declare_ia( - "vbgtao noisy occlusion", + auto vbgtao_occlusion_attachment = vuk::declare_ia( + "vbgtao occlusion", { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, .format = vuk::Format::eR16Sfloat, .sample_count = vuk::Samples::e1 } ); - vbgtao_noisy_occlusion_attachment.same_shape_as(final_attachment); - vbgtao_noisy_occlusion_attachment = vuk::clear_image(std::move(vbgtao_noisy_occlusion_attachment), vuk::White); + vbgtao_occlusion_attachment.same_shape_as(final_attachment); + vbgtao_occlusion_attachment = vuk::clear_image(std::move(vbgtao_occlusion_attachment), vuk::White); - auto vbgtao_depth_differences_attachment = vuk::declare_ia( - "vbgtao depth differences", - { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, - .format = vuk::Format::eR32Uint, - .sample_count = vuk::Samples::e1 } - ); - vbgtao_depth_differences_attachment.same_shape_as(final_attachment); - vbgtao_depth_differences_attachment = vuk::clear_image(std::move(vbgtao_depth_differences_attachment), vuk::Black); + if (true) { + auto vbgtao_prefilter_pass = vuk::make_pass( + "vbgtao prefilter", + [](vuk::CommandBuffer &command_buffer, // + VUK_IA(vuk::eComputeSampled) depth_input, + VUK_IA(vuk::eComputeRW) dst_image) { + auto nearest_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eNearest, + .minFilter = vuk::Filter::eNearest, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + command_buffer.bind_compute_pipeline("passes.vbgtao_prefilter") + .bind_image(0, 0, depth_input) + .bind_image(0, 1, dst_image->mip(0)) + .bind_image(0, 2, dst_image->mip(1)) + .bind_image(0, 3, dst_image->mip(2)) + .bind_image(0, 4, dst_image->mip(3)) + .bind_image(0, 5, dst_image->mip(4)) + .bind_sampler(0, 6, nearest_clamp_sampler) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, depth_input->extent) + .dispatch((depth_input->extent.width + 16 - 1) / 16, (depth_input->extent.height + 16 - 1) / 16); + + return std::make_tuple(depth_input, dst_image); + } + ); - auto hilbert_noise_lut_attachment = - self.hilbert_noise_lut_view.acquire(device, "hilbert noise", vuk::ImageUsageFlagBits::eSampled, vuk::eComputeSampled); + auto vbgtao_depth_attachment = vuk::declare_ia( + "vbgtao depth", + { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, + .format = vuk::Format::eR32Sfloat, + .sample_count = vuk::Samples::e1, + .level_count = 5, + .layer_count = 1 } + ); + vbgtao_depth_attachment.same_extent_as(depth_attachment); + vbgtao_depth_attachment = vuk::clear_image(std::move(vbgtao_depth_attachment), vuk::Black); + + std::tie(depth_attachment, vbgtao_depth_attachment) = + vbgtao_prefilter_pass(std::move(depth_attachment), std::move(vbgtao_depth_attachment)); + + auto vbgtao_generate_pass = vuk::make_pass( + "vbgtao generate", + [](vuk::CommandBuffer &command_buffer, // + VUK_BA(vuk::eComputeUniformRead) camera, + VUK_IA(vuk::eComputeSampled) prefiltered_depth, + VUK_IA(vuk::eComputeSampled) normals, + VUK_IA(vuk::eComputeSampled) hilbert_noise, + VUK_IA(vuk::eComputeRW) ambient_occlusion, + VUK_IA(vuk::eComputeRW) depth_differences) { + auto nearest_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eNearest, + .minFilter = vuk::Filter::eNearest, + .mipmapMode = vuk::SamplerMipmapMode::eNearest, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + auto linear_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eLinear, + .minFilter = vuk::Filter::eLinear, + .mipmapMode = vuk::SamplerMipmapMode::eLinear, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + command_buffer.bind_compute_pipeline("passes.vbgtao_generate") + .bind_buffer(0, 0, camera) + .bind_image(0, 1, prefiltered_depth) + .bind_image(0, 2, normals) + .bind_image(0, 3, hilbert_noise) + .bind_image(0, 4, ambient_occlusion) + .bind_image(0, 5, depth_differences) + .bind_sampler(0, 6, nearest_clamp_sampler) + .bind_sampler(0, 7, linear_clamp_sampler) + .dispatch_invocations_per_pixel(ambient_occlusion); + + return std::make_tuple(camera, normals, ambient_occlusion, depth_differences); + } + ); - std::tie(camera_buffer, normal_attachment, vbgtao_noisy_occlusion_attachment, vbgtao_depth_differences_attachment) = vbgtao_generate_pass( - std::move(camera_buffer), - std::move(vbgtao_depth_attachment), - std::move(normal_attachment), - std::move(hilbert_noise_lut_attachment), - std::move(vbgtao_noisy_occlusion_attachment), - std::move(vbgtao_depth_differences_attachment) - ); + auto vbgtao_noisy_occlusion_attachment = vuk::declare_ia( + "vbgtao noisy occlusion", + { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, .sample_count = vuk::Samples::e1 } + ); + vbgtao_noisy_occlusion_attachment.same_format_as(vbgtao_occlusion_attachment); + vbgtao_noisy_occlusion_attachment.same_shape_as(vbgtao_occlusion_attachment); + vbgtao_noisy_occlusion_attachment = vuk::clear_image(std::move(vbgtao_noisy_occlusion_attachment), vuk::White); + + auto vbgtao_depth_differences_attachment = vuk::declare_ia( + "vbgtao depth differences", + { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, + .format = vuk::Format::eR32Uint, + .sample_count = vuk::Samples::e1 } + ); + vbgtao_depth_differences_attachment.same_shape_as(final_attachment); + vbgtao_depth_differences_attachment = vuk::clear_image(std::move(vbgtao_depth_differences_attachment), vuk::Black); - auto vbgtao_denoise_pass = vuk::make_pass( - "vbgtao denoise", - [](vuk::CommandBuffer &command_buffer, // - VUK_IA(vuk::eComputeSampled) noisy_occlusion, - VUK_IA(vuk::eComputeSampled) depth_differences, - VUK_IA(vuk::eComputeRW) ambient_occlusion) { - auto nearest_clamp_sampler = vuk::SamplerCreateInfo{ - .magFilter = vuk::Filter::eNearest, - .minFilter = vuk::Filter::eNearest, - .mipmapMode = vuk::SamplerMipmapMode::eNearest, - .addressModeU = vuk::SamplerAddressMode::eClampToEdge, - .addressModeV = vuk::SamplerAddressMode::eClampToEdge, - .addressModeW = vuk::SamplerAddressMode::eClampToEdge, - }; + auto hilbert_noise_lut_attachment = + self.hilbert_noise_lut_view.acquire(device, "hilbert noise", vuk::ImageUsageFlagBits::eSampled, vuk::eComputeSampled); - command_buffer.bind_compute_pipeline("passes.vbgtao_denoise") - .bind_image(0, 0, noisy_occlusion) - .bind_image(0, 1, depth_differences) - .bind_image(0, 2, ambient_occlusion) - .bind_sampler(0, 3, nearest_clamp_sampler) - .dispatch_invocations_per_pixel(ambient_occlusion); + std::tie(camera_buffer, normal_attachment, vbgtao_noisy_occlusion_attachment, vbgtao_depth_differences_attachment) = vbgtao_generate_pass( + std::move(camera_buffer), + std::move(vbgtao_depth_attachment), + std::move(normal_attachment), + std::move(hilbert_noise_lut_attachment), + std::move(vbgtao_noisy_occlusion_attachment), + std::move(vbgtao_depth_differences_attachment) + ); - return ambient_occlusion; - } - ); + auto vbgtao_denoise_pass = vuk::make_pass( + "vbgtao denoise", + [](vuk::CommandBuffer &command_buffer, // + VUK_IA(vuk::eComputeSampled) noisy_occlusion, + VUK_IA(vuk::eComputeSampled) depth_differences, + VUK_IA(vuk::eComputeRW) ambient_occlusion) { + auto nearest_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eNearest, + .minFilter = vuk::Filter::eNearest, + .mipmapMode = vuk::SamplerMipmapMode::eNearest, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + command_buffer.bind_compute_pipeline("passes.vbgtao_denoise") + .bind_image(0, 0, noisy_occlusion) + .bind_image(0, 1, depth_differences) + .bind_image(0, 2, ambient_occlusion) + .bind_sampler(0, 3, nearest_clamp_sampler) + .dispatch_invocations_per_pixel(ambient_occlusion); + + return ambient_occlusion; + } + ); - auto vbgtao_occlusion_attachment = vuk::declare_ia( - "vbgtao occlusion", - { .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, .sample_count = vuk::Samples::e1 } - ); - vbgtao_occlusion_attachment.same_format_as(vbgtao_noisy_occlusion_attachment); - vbgtao_occlusion_attachment.same_shape_as(vbgtao_noisy_occlusion_attachment); - vbgtao_occlusion_attachment = vuk::clear_image(std::move(vbgtao_occlusion_attachment), vuk::White); - vbgtao_occlusion_attachment = vbgtao_denoise_pass( - std::move(vbgtao_noisy_occlusion_attachment), - std::move(vbgtao_depth_differences_attachment), - std::move(vbgtao_occlusion_attachment) - ); + vbgtao_occlusion_attachment = vbgtao_denoise_pass( + std::move(vbgtao_noisy_occlusion_attachment), + std::move(vbgtao_depth_differences_attachment), + std::move(vbgtao_occlusion_attachment) + ); + } // ── BRDF ──────────────────────────────────────────────────────────── auto brdf_pass = vuk::make_pass( From 4951f938d26def5e0df29b7a4513860bf00d2823 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sat, 6 Sep 2025 23:52:10 +0300 Subject: [PATCH 05/16] fix VBGTAO horizon calculation --- Lorr/Engine/Asset/Asset.cc | 6 +- Lorr/Engine/Graphics/Vulkan/Device.cc | 2 +- Lorr/Engine/Resources/shaders/cull.slang | 23 +-- .../Resources/shaders/passes/brdf.slang | 8 +- .../shaders/passes/cull_meshlets.slang | 2 +- .../shaders/passes/vbgtao_denoise.slang | 2 +- .../shaders/passes/vbgtao_generate.slang | 159 ++++++++-------- .../shaders/passes/vbgtao_prefilter.slang | 2 +- .../shaders/passes/visbuffer_decode.slang | 176 +++++++++++------- .../shaders/passes/visbuffer_encode.slang | 9 +- Lorr/Engine/Resources/shaders/scene.slang | 26 +-- .../Resources/shaders/std/encoding.slang | 26 +-- Lorr/Engine/Scene/GPUScene.hh | 2 +- Lorr/Engine/Scene/SceneRenderer.cc | 5 +- 14 files changed, 244 insertions(+), 204 deletions(-) diff --git a/Lorr/Engine/Asset/Asset.cc b/Lorr/Engine/Asset/Asset.cc index b2191a9a..5feb6b52 100755 --- a/Lorr/Engine/Asset/Asset.cc +++ b/Lorr/Engine/Asset/Asset.cc @@ -1109,11 +1109,11 @@ auto AssetManager::load_texture(this AssetManager &self, const UUID &uuid, const .addr_v = vuk::SamplerAddressMode::eRepeat, .addr_w = vuk::SamplerAddressMode::eRepeat, .compare_op = vuk::CompareOp::eNever, - .max_anisotropy = 8.0f, + .max_anisotropy = 1.0f, .mip_lod_bias = 0.0f, .min_lod = 0.0f, - .max_lod = static_cast(mip_level_count - 1), - .use_anisotropy = true, + .max_lod = 1000.0f, + .use_anisotropy = false, }; auto sampler = Sampler::create(device, sampler_info).value(); diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index b7cd02c2..6d22f658 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -57,7 +57,7 @@ auto Device::init(this Device &self) -> bool { instance_extensions.push_back(VK_KHR_SURFACE_EXTENSION_NAME); instance_extensions.push_back("VK_KHR_xcb_surface"); instance_extensions.push_back("VK_KHR_xlib_surface"); - // instance_extensions.push_back("VK_KHR_wayland_surface"); + instance_extensions.push_back("VK_KHR_wayland_surface"); #endif #if LS_DEBUG instance_extensions.push_back(VK_EXT_DEBUG_UTILS_EXTENSION_NAME); diff --git a/Lorr/Engine/Resources/shaders/cull.slang b/Lorr/Engine/Resources/shaders/cull.slang index 0badc40a..d4b97ad1 100644 --- a/Lorr/Engine/Resources/shaders/cull.slang +++ b/Lorr/Engine/Resources/shaders/cull.slang @@ -88,23 +88,24 @@ public func test_frustum(in f32x4x4 mvp, in f32x3 aabb_center, in f32x3 aabb_ext public func test_occlusion( in ScreenAabb screen_aabb, in Image2D hiz_image, - in Sampler hiz_sampler, - in constexpr bool ceiling + in Sampler hiz_sampler ) -> bool { - var hiz_size = u32x2(0.0); + var width: u32; + var height: u32; var hiz_levels = 0; - hiz_image.GetDimensions(0, hiz_size.x, hiz_size.y, hiz_levels); + hiz_image.GetDimensions(0, width, height, hiz_levels); + let hiz_size = f32x2(width, height); - let min_uv = screen_aabb.min.xy; - let max_uv = screen_aabb.max.xy; - let min_texel = u32x2(max(min_uv * f32x2(hiz_size), 0.0)); - let max_texel = u32x2(min(max_uv * f32x2(hiz_size), hiz_size - 1.0)); + let min_uv = screen_aabb.min.xy * hiz_size; + let max_uv = screen_aabb.max.xy * hiz_size; + let min_texel = u32x2(max(min_uv, 0.0)); + let max_texel = u32x2(min(max_uv, hiz_size - 1.0)); let size = max_texel - min_texel; let max_size = max(size.x, size.y); - let mip = max(floor(log2(max_size)) - 1.0, 0.0); + var mip = max(floor(log2(max_size)), 0); - var uv = (min_uv + max_uv) * 0.5; + let uv = ((f32x2(min_texel) + f32x2(max_texel)) * 0.5f) / hiz_size; let d = hiz_image.SampleLevel(hiz_sampler, uv, mip); - return screen_aabb.max.z < d; + return screen_aabb.max.z <= d; } diff --git a/Lorr/Engine/Resources/shaders/passes/brdf.slang b/Lorr/Engine/Resources/shaders/passes/brdf.slang index 93d02861..5c01b9c8 100644 --- a/Lorr/Engine/Resources/shaders/passes/brdf.slang +++ b/Lorr/Engine/Resources/shaders/passes/brdf.slang @@ -36,8 +36,8 @@ func fs_main(VertexOutput input) -> f32x4 { let albedo_color = params.albedo_image.SampleLevel(params.linear_repeat_sampler, input.tex_coord, 0).rgb; let mapped_smooth_normal = params.normal_image.Load(pixel_pos); - let mapped_normal = std::oct_to_vec3(mapped_smooth_normal.xy); - let smooth_normal = std::oct_to_vec3(mapped_smooth_normal.zw); + let mapped_normal = std::octahedral_decode(mapped_smooth_normal.xy); + let smooth_normal = std::octahedral_decode(mapped_smooth_normal.zw); let emission = params.emissive_image.Load(pixel_pos); @@ -55,7 +55,7 @@ func fs_main(VertexOutput input) -> f32x4 { // PBR constants let V = normalize(params.camera.position - world_position); let L = normalize(params.environment.sun_direction); // temp - let N = normalize(mapped_normal); + let N = mapped_normal; var sun_illuminance = f32x3(1.0); var sky_luminance = f32x3(0.1); @@ -102,7 +102,7 @@ func fs_main(VertexOutput input) -> f32x4 { horizon *= horizon; var material_surface_color = f32x3(0.0); - let NoL = max(dot(N, L), 0.0); + let NoL = max(dot(N, L), 0.0001); if (NoL > 0.0) { let brdf = BRDF(V, N, L, albedo_color, roughness, metallic); material_surface_color = brdf * horizon * sun_illuminance * NoL * occlusion; diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index 35c9f6cc..c04cf883 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -61,7 +61,7 @@ func cs_main( if (LATE && visible && cull_occlusion) { if (let screen_aabb = project_aabb(mvp, camera.near_clip, bounds.aabb_center, bounds.aabb_extent)) { - visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler, false); + visible = !test_occlusion(screen_aabb, hiz_image, hiz_sampler); #ifdef DEBUG_DRAW if (visible) { let ndc_aabb_max = screen_aabb.max.xy * 2.0 - 1.0; diff --git a/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang b/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang index d15da907..76a42b04 100644 --- a/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang +++ b/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang @@ -75,7 +75,7 @@ func cs_main( sum_weight += bottom_right_weight; var denoised_visibility = sum / sum_weight; - let power = 2.0; + let power = 1.5; denoised_visibility = pow(max(denoised_visibility, 0.0f), power); params.ambient_occlusion.Store(pixel_coordinates, denoised_visibility); diff --git a/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang b/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang index 60524b19..3c8b5b3c 100644 --- a/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang +++ b/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang @@ -7,6 +7,8 @@ import std; import gpu; import scene; +constexpr static let SECTOR_COUNT = 32u; + struct ShaderParameters { ConstantBuffer camera; Texture2D prefiltered_depth; @@ -21,6 +23,7 @@ struct ShaderParameters { f32 fast_sqrt(f32 x) { return (asfloat( 0x1fbd1df5 + ( asint( x ) >> 1 ) )); } + // input [-1, 1] and output [0, PI], from https://seblagarde.wordpress.com/2014/12/01/inverse-trigonometric-functions-gpu-optimization-for-amd-gcn-architecture/ f32 fast_acos(f32 inX) { f32 x = abs(inX); @@ -45,7 +48,7 @@ func load_normal_view_space( in SamplerState sampler_point_clamp ) -> f32x3 { let packed_normals = normals.SampleLevel(sampler_point_clamp, uv, 0.0); - var world_normal = std::oct_to_vec3(packed_normals.zw); + var world_normal = std::octahedral_decode(packed_normals.zw); world_normal = mul(camera.view_mat, f32x4(world_normal, 0.0)).xyz; return normalize(world_normal); } @@ -55,23 +58,12 @@ func reconstruct_view_space_position( f32 depth, in Camera camera ) -> f32x3 { - let clip_xy = f32x2(uv.x * 2.0 - 1.0, 1.0 - 2.0 * uv.y); + let clip_xy = uv * 2.0 - 1.0; let t = mul(camera.inv_projection_mat, f32x4(clip_xy, depth, 1.0)); let view_xyz = t.xyz / t.w; return view_xyz; } -func load_and_reconstruct_view_space_position( - f32x2 uv, - f32 sample_mip_level, - in Camera camera, - in Texture2D src_depth, - in SamplerState sampler_linear_clamp -) -> f32x3 { - let depth = src_depth.SampleLevel(sampler_linear_clamp, uv, sample_mip_level).r; - return reconstruct_view_space_position(uv, depth, camera); -} - func calculate_edges( i32x2 pixel_coord, in f32x2 resolution, @@ -107,35 +99,37 @@ func calculate_edges( func updateSectors( f32 min_horizon, f32 max_horizon, - f32 samples_per_slice, u32 bitmask, ) -> u32 { - let start_horizon = u32(min_horizon * samples_per_slice); - let angle_horizon = u32(ceil((max_horizon - min_horizon) * samples_per_slice)); - - return bitfieldInsert(bitmask, 0xFFFFFFFFu, start_horizon, angle_horizon); + let startHorizonInt = uint(min_horizon * float(SECTOR_COUNT)); + // ceil: Sample needs to at least touch a sector to activate it + // round: Sample needs to cover at least half a sector to activate it + // floor: Sample needs to cover the entire sector to activate it + let angleHorizonInt = uint(ceil(saturate(max_horizon - min_horizon) * float(SECTOR_COUNT))); + let angleHorizonBitfield = angleHorizonInt > 0u ? (0xFFFFFFFFu >> (SECTOR_COUNT-angleHorizonInt)) : 0u; + let currentOccludedBitfield = angleHorizonBitfield << startHorizonInt; + return bitmask | currentOccludedBitfield; } -func processSample( +func calc_visibility_mask( + u32 bitmask, f32 thickness, f32x3 delta_position, - f32x3 view_vec, + f32x3 view_dir, f32 sampling_direction, f32x2 n, - f32 samples_per_slice, - inout u32 bitmask, -) -> void { - let delta_position_back_face = delta_position - view_vec * thickness; - +) -> u32 { + let delta_position_back_face = delta_position - view_dir * thickness; + var front_back_horizon = f32x2( - fast_acos(dot(normalize(delta_position), view_vec)), - fast_acos(dot(normalize(delta_position_back_face), view_vec)), + fast_acos(dot(normalize(delta_position), view_dir)), + fast_acos(dot(normalize(delta_position_back_face), view_dir)), ); - - front_back_horizon = saturate(fma(f32x2(sampling_direction), -front_back_horizon, n)); + + front_back_horizon = saturate(((sampling_direction * -front_back_horizon) + n + HALF_PI) / PI); front_back_horizon = select(sampling_direction >= 0.0, front_back_horizon.yx, front_back_horizon.xy); - - bitmask = updateSectors(front_back_horizon.x, front_back_horizon.y, samples_per_slice, bitmask); + + return updateSectors(front_back_horizon.x, front_back_horizon.y, bitmask); } [[numthreads(16, 16, 1)]] @@ -144,69 +138,76 @@ func cs_main( uniform ParameterBlock params, ) -> void { let thickness = 0.25; - let slice_count = 3.0; + let slice_count = 9.0; let samples_per_slice_side = 3.0; - let effect_radius = 0.5 * 1.457; - let falloff_range = 0.615 * effect_radius; - let falloff_from = effect_radius * (1.0 - 0.615); - let falloff_mul = -1.0 / falloff_range; - let falloff_add = falloff_from / falloff_range + 1.0; + let depth_range_scale_factor = 0.75; + let default_radius = 0.5; + let default_radius_mul = 1.457; + let default_falloff_range = 0.615; + + let effect_radius = depth_range_scale_factor * default_radius * default_radius_mul; + let falloff_range = default_falloff_range * effect_radius; + let falloff_from = effect_radius * (1.0f - default_falloff_range); + let falloff_MUL = -1.0f / falloff_range; + let falloff_add = falloff_from / (falloff_range) + 1.0f; + let mip_sampling_offset = 3.30; let uv = (f32x2(pixel_coordinates) + 0.5) / params.camera.resolution; var pixel_depth = calculate_edges( pixel_coordinates, params.camera.resolution, params.prefiltered_depth, params.point_clamp_sampler, params.depth_differences); pixel_depth += 0.00001; - let pixel_position = reconstruct_view_space_position(uv, pixel_depth, params.camera); - let pixel_normal = load_normal_view_space(uv, params.camera, params.normals, params.point_clamp_sampler); - let view_vec = normalize(-pixel_position); + let origin = reconstruct_view_space_position(uv, pixel_depth, params.camera); + let view_dir = normalize(-origin); + let normal = load_normal_view_space(uv, params.camera, params.normals, params.point_clamp_sampler); + let noise = load_noise(pixel_coordinates, params.hilbert_noise); - let sample_scale = (-0.5 * effect_radius * params.camera.projection_mat[0][0]) / pixel_position.z; + let sample_scale = -(0.5 * effect_radius * params.camera.projection_mat[0][0]) / origin.z; + var visibility = 0.0; - var occluded_sample_count = 0u; for (var slice_t = 0.0; slice_t < slice_count; slice_t += 1.0) { - let slice = slice_t + noise.x; - let phi = (PI / slice_count) * slice; + let slice = (slice_t + noise.x) / slice_count; + let phi = slice * PI; let omega = f32x2(cos(phi), sin(phi)); - + let direction = f32x3(omega.xy, 0.0); - let orthographic_direction = direction - (dot(direction, view_vec) * view_vec); - let axis = cross(direction, view_vec); - let projected_normal = pixel_normal - axis * dot(pixel_normal, axis); - let projected_normal_length = max(length(projected_normal), 1e-6); - + let orthographic_direction = normalize(direction - (dot(direction, view_dir) * view_dir)); + let axis = cross(direction, view_dir); + let projected_normal = normal - axis * dot(normal, axis); + let projected_normal_length = length(projected_normal); + let sign_norm = sign(dot(orthographic_direction, projected_normal)); - let cos_norm = saturate(dot(projected_normal, view_vec) / projected_normal_length); - let n = f32x2(((PI * 0.5) - sign_norm * fast_acos(cos_norm)) * (1.0 / PI)); - + let cos_norm = saturate(dot(projected_normal, normal) / projected_normal_length); + let n = sign_norm * fast_acos(cos_norm); + var bitmask = 0u; - let sample_mul = f32x2(omega.x, -omega.y) * sample_scale; for (var sample_t = 0.0; sample_t < samples_per_slice_side; sample_t += 1.0) { - var sample_noise = (slice_t + sample_t * samples_per_slice_side) * 0.6180339887498948482; - sample_noise = fract(noise.y + sample_noise); - - var s = (sample_t + sample_noise) / samples_per_slice_side; - s *= s; // https://github.com/GameTechDev/XeGTAO#sample-distribution - let sample = s * sample_mul; - - let sample_mip_level = clamp(log2(length(sample * params.camera.resolution)) - 3.3, 0.0, 5.0); // https://github.com/GameTechDev/XeGTAO#memory-bandwidth-bottleneck - let sample_position_1 = load_and_reconstruct_view_space_position( - uv + sample, sample_mip_level, params.camera, params.prefiltered_depth, params.linear_clamp_sampler); - let sample_position_2 = load_and_reconstruct_view_space_position( - uv - sample, sample_mip_level, params.camera, params.prefiltered_depth, params.linear_clamp_sampler); - - let sample_difference_1 = sample_position_1 - pixel_position; - let sample_difference_2 = sample_position_2 - pixel_position; - - processSample(thickness, sample_difference_1, view_vec, -1.0, n, samples_per_slice_side * 2.0, bitmask); - processSample(thickness, sample_difference_2, view_vec, 1.0, n, samples_per_slice_side * 2.0, bitmask); + var sample = (slice + sample_t * samples_per_slice_side) * 0.6180339887498948482; + sample = fract(noise.y + sample); + + var s = (sample_t + sample) / samples_per_slice_side; + s *= s; + + let sample_offset = s * sample_mul; + let sample_screen_pos_1 = uv + sample_offset; + let sample_screen_pos_2 = uv - sample_offset; + + let sample_mip_level = clamp(log2(length(sample_offset * params.camera.resolution)) - mip_sampling_offset, 0.0, 5.0); + let depth_1 = params.prefiltered_depth.SampleLevel(params.linear_clamp_sampler, sample_screen_pos_1, sample_mip_level).r; + let depth_2 = params.prefiltered_depth.SampleLevel(params.linear_clamp_sampler, sample_screen_pos_2, sample_mip_level).r; + + let sample_position_1 = reconstruct_view_space_position(sample_screen_pos_1, depth_1, params.camera); + let sample_position_2 = reconstruct_view_space_position(sample_screen_pos_2, depth_2, params.camera); + + let sample_delta_1 = sample_position_1 - origin; + let sample_delta_2 = sample_position_2 - origin; + bitmask = calc_visibility_mask(bitmask, thickness, sample_delta_1, view_dir, 1.0, n); + bitmask = calc_visibility_mask(bitmask, thickness, sample_delta_2, view_dir, -1.0, n); } - - let bit_count = countbits(bitmask); - occluded_sample_count += bit_count; + + visibility += 1.0 - f32(countbits(bitmask)) / f32(SECTOR_COUNT); } - - visibility = 1.0 - f32(occluded_sample_count) / (slice_count * 2.0 * samples_per_slice_side); - visibility = clamp(visibility, 0.03, 1.0); - params.ambient_occlusion[pixel_coordinates] = visibility; + + let ao = saturate(visibility / slice_count); + params.ambient_occlusion[pixel_coordinates] = ao; } \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/passes/vbgtao_prefilter.slang b/Lorr/Engine/Resources/shaders/passes/vbgtao_prefilter.slang index f38e7b8c..5aa404db 100644 --- a/Lorr/Engine/Resources/shaders/passes/vbgtao_prefilter.slang +++ b/Lorr/Engine/Resources/shaders/passes/vbgtao_prefilter.slang @@ -48,7 +48,7 @@ func cs_main( int2 pixel_coordinates3 = pixel_coordinates0 + int2(1, 1); float2 depths_uv = float2(pixel_coordinates0) / f32x2(depth_extent.xy); - float4 depths = params.src_depth.Gather(params.point_clamp_sampler, depths_uv, int2(1,1)); + float4 depths = params.src_depth.GatherRed(params.point_clamp_sampler, depths_uv, int2(1,1)); params.dst_depth_mip0[pixel_coordinates0] = depths.w; params.dst_depth_mip0[pixel_coordinates1] = depths.z; diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index e1453bda..ab57cfd9 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -30,15 +30,6 @@ struct PartialDeriv { f32x3 ddx; f32x3 ddy; f32x3 lambda; - - func gradient_of(f32x2x3 v) -> UVGradient { - UVGradient grad; - grad.uv = mul(this.lambda, v); - grad.ddx = mul(this.ddx, v); - grad.ddy = mul(this.ddy, v); - - return grad; - } }; func compute_partial_derivatives(in f32x4x3 world_positions, in f32x2 uv, in f32x2 resolution) -> PartialDeriv { @@ -73,11 +64,11 @@ func compute_partial_derivatives(in f32x4x3 world_positions, in f32x2 uv, in f32 interp_w * (delta_v.x * result.ddx.y + delta_v.y * result.ddy.y), interp_w * (delta_v.x * result.ddx.z + delta_v.y * result.ddy.z) ); - let two_over_resolution = 2.0 / resolution; - result.ddx *= two_over_resolution.x; - result.ddy *= -two_over_resolution.y; - ddx_sum *= two_over_resolution.x; - ddy_sum *= -two_over_resolution.y; + let inv_resolution = 1.0 / resolution; + result.ddx *= inv_resolution.x; + result.ddy *= -inv_resolution.y; + ddx_sum *= inv_resolution.x; + ddy_sum *= -inv_resolution.y; let interp_ddx_w = 1.0 / (interp_inv_w + ddx_sum); let interp_ddy_w = 1.0 / (interp_inv_w + ddy_sum); @@ -87,8 +78,69 @@ func compute_partial_derivatives(in f32x4x3 world_positions, in f32x2 uv, in f32 return result; } +func prepare_world_normal( + world_normal: f32x3, + double_sided: bool, + is_front: bool, +) -> f32x3 { + var output = world_normal; + // NOTE: When NOT using normal-mapping, if looking at the back face of a double-sided + // material, the normal needs to be inverted. This is a branchless version of that. + output = (f32(!double_sided || is_front) * 2.0 - 1.0) * output; + return normalize(output); +} + +func calculate_world_tangent( + world_normal: f32x3, + ddx_world_position: f32x3, + ddy_world_position: f32x3, + ddx_uv: f32x2, + ddy_uv: f32x2, +) -> f32x4 { + let ddx_world_position_s = ddx_world_position - dot(ddx_world_position, world_normal) * world_normal; + let ddy_world_position_s = ddy_world_position - dot(ddy_world_position, world_normal) * world_normal; + let jacobian_sign = sign(ddx_uv.x * ddy_uv.y - ddx_uv.y * ddy_uv.x); + var world_tangent = jacobian_sign * (ddy_uv.y * ddx_world_position_s - ddx_uv.y * ddy_world_position_s); + if (jacobian_sign != 0.0) { + world_tangent = normalize(world_tangent); + } + + let w = jacobian_sign * sign(dot(ddy_world_position, cross(world_normal, ddx_world_position))); + return f32x4(world_tangent, -w); +} + +func calculate_tbn_mikktspace(world_normal: f32x3, world_tangent: f32x4) -> f32x3x3 { + // NOTE: The mikktspace method of normal mapping explicitly requires that the world normal NOT + // be re-normalized in the fragment shader. This is primarily to match the way mikktspace + // bakes vertex tangents and normal maps so that this is the exact inverse. Blender, Unity, + // Unreal Engine, Godot, and more all use the mikktspace method. Do not change this code + // unless you really know what you are doing. + // http://www.mikktspace.com/ + var N = world_normal; + + // NOTE: The mikktspace method of normal mapping explicitly requires that these NOT be + // normalized nor any Gram-Schmidt applied to ensure the vertex normal is orthogonal to the + // vertex tangent! Do not change this code unless you really know what you are doing. + // http://www.mikktspace.com/ + var T = world_tangent.xyz; + var B = world_tangent.w * cross(N, T); + +#if 1 + // https://www.jeremyong.com/graphics/2023/12/16/surface-gradient-bump-mapping/#a-note-on-mikktspace-usage + let inverse_length_n = 1.0 / length(N); + T *= inverse_length_n; + B *= inverse_length_n; + N *= inverse_length_n; +#endif + + return f32x3x3(T, B, N); +} + [[shader("fragment")]] -func fs_main(VertexOutput input) -> FragmentOutput { +func fs_main( + VertexOutput input, + bool is_front : SV_IsFrontFace +) -> FragmentOutput { let texel = params.visbuffer.Load(u32x3(u32x2(input.position.xy), 0)); if (texel == ~0u) { discard; @@ -113,84 +165,72 @@ func fs_main(VertexOutput input) -> FragmentOutput { let positions = meshlet.positions(mesh, indices); let normals = meshlet.normals(mesh, indices); - let tex_coords = meshlet.tex_coords(mesh, indices); let world_positions = transform.to_world_positions(positions); let NDC = f32x3(input.tex_coord * 2.0 - 1.0, 1.0); let deriv = compute_partial_derivatives(world_positions, NDC.xy, params.camera.resolution); - let tex_coord_grad = deriv.gradient_of(tex_coords); + + let tex_coords = meshlet.tex_coords(mesh, indices); + let uv = mul(deriv.lambda, tex_coords); + let uv_ddx = mul(deriv.ddx, tex_coords); + let uv_ddy = mul(deriv.ddy, tex_coords); // ALBEDO ─────────────────────────────────────────────────────────── - output.albedo_color = material.sample_albedo_color(tex_coord_grad); + output.albedo_color = material.sample_albedo_color(uv, uv_ddx, uv_ddy); // NORMALS ────────────────────────────────────────────────────────── - const f32x3x3 camera_relative_world_positions = { + let camera_relative_world_positions = f32x3x3( world_positions[0].xyz - params.camera.position, world_positions[1].xyz - params.camera.position, world_positions[2].xyz - params.camera.position, - }; - const f32x3 pos_ddx = mul(deriv.ddx, camera_relative_world_positions); - const f32x3 pos_ddy = mul(deriv.ddy, camera_relative_world_positions); - - const f32x3x3 world_normals = transform.to_world_normals(normals); - const f32x3 world_normal = mul(deriv.lambda, world_normals); - - const f32x3 pos_ddx_s = pos_ddx - dot(pos_ddx, world_normal) * world_normal; - const f32x3 pos_ddy_s = pos_ddy - dot(pos_ddy, world_normal) * world_normal; - const f32x2 uv_ddx = tex_coord_grad.ddx; - const f32x2 uv_ddy = tex_coord_grad.ddy; - - const i32 jacobian_sign = sign(uv_ddx.x * uv_ddy.y - uv_ddx.y * uv_ddy.x); - f32x3 tangent = jacobian_sign * (uv_ddy.y * pos_ddx_s - uv_ddx.y * pos_ddy_s); - if (jacobian_sign != 0.0) { - tangent = normalize(tangent); - } - const f32 w = jacobian_sign * sign(dot(pos_ddy, cross(world_normal, pos_ddx))); - f32x3 bitangent = -w * cross(world_normal, tangent); - f32x3 normal = world_normal; - -#if 1 - const f32 inv_len_N = 1.0 / length(normal); - tangent *= inv_len_N; - bitangent *= inv_len_N; - normal *= inv_len_N; -#endif - - if (material.flags & MaterialFlag::HasNormalImage) { - f32x3 sampled_normal = material.sample_normal_color(tex_coord_grad); - // NOTE: This is here to convert tangent spaces to our correct spaces. - // for example, normal map's R component is +Y for our world. @grok is this true? - sampled_normal = f32x3(sampled_normal.y, sampled_normal.x, sampled_normal.z); - + ); + let pos_ddx = mul(deriv.ddx, camera_relative_world_positions); + let pos_ddy = mul(deriv.ddy, camera_relative_world_positions); + + let double_sided = false; + + let world_normals = transform.to_world_normals(normals); + var world_normal = mul(deriv.lambda, world_normals); + var normal = prepare_world_normal(world_normal, double_sided, is_front); + + if ((material.flags & MaterialFlag::HasNormalImage) != 0) { + let sampled_normal = material.sample_normal_color(uv, uv_ddx, uv_ddy); + let world_tangent = calculate_world_tangent(world_normal, pos_ddx, pos_ddy, uv_ddx, uv_ddy); + let TBN = calculate_tbn_mikktspace(normal, world_tangent); + let T = TBN[0]; + let B = TBN[1]; + let N = TBN[2]; + var Nt = sampled_normal; if (material.flags & MaterialFlag::NormalTwoComponent) { - sampled_normal = f32x3(sampled_normal.yx * 2.0 - 1.0, 0.0); - sampled_normal.z = sqrt(1.0 - sampled_normal.x * sampled_normal.x - sampled_normal.y * sampled_normal.y); + Nt = f32x3(Nt.rg * 2.0 - 1.0, 0.0); + Nt.z = sqrt(1.0 - Nt.x * Nt.x - Nt.y * Nt.y); } else { - sampled_normal = sampled_normal * 2.0 - 1.0; + Nt = Nt * 2.0 - 1.0; + } + + if (true || material.flags & MaterialFlag::NormalFlipY) { + Nt.y = -Nt.y; } - if (material.flags & MaterialFlag::NormalFlipY) { - sampled_normal.y = -sampled_normal.y; + if (double_sided && !is_front) { + Nt = -Nt; } - normal = sampled_normal.x * tangent - + sampled_normal.y * bitangent - + sampled_normal.z * normal; - normal = normalize(normal); + normal = normalize(Nt.x * T + Nt.y * B + Nt.z * N); } - output.normal_color.xy = std::vec3_to_oct(normal); - output.normal_color.zw = std::vec3_to_oct(world_normal); + output.normal_color.xy = std::octahedral_encode(normal); + output.normal_color.zw = std::octahedral_encode(world_normal); // EMISSION ───────────────────────────────────────────────────────── - output.emission_color = material.sample_emissive_color(tex_coord_grad); + output.emission_color = material.sample_emissive_color(uv, uv_ddx, uv_ddy); // METALLIC ROUGHNESS ─────────────────────────────────────────────── - const f32x2 metallic_roughness_color = material.sample_metallic_roughness(tex_coord_grad); + const f32x2 metallic_roughness_color = material.sample_metallic_roughness(uv, uv_ddx, uv_ddy); output.metallic_roughness_occlusion_color.xy = metallic_roughness_color; // AMBIENT OCCLUSION ──────────────────────────────────────────────── - const f32 occlusion_color = material.sample_occlusion_color(tex_coord_grad); + const f32 occlusion_color = material.sample_occlusion_color(uv, uv_ddx, uv_ddy); output.metallic_roughness_occlusion_color.z = occlusion_color; return output; -} +} \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang index 29315b79..96a91abd 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_encode.slang @@ -57,11 +57,10 @@ func fs_main(VertexOutput input) -> u32 { #if 1 let material = params.materials[input.material_index]; if (material.flags & MaterialFlag::HasAlbedoImage) { - UVGradient grad; - grad.uv = input.tex_coord; - grad.ddx = ddx(input.tex_coord); - grad.ddy = ddy(input.tex_coord); - let alpha_color = material.sample_albedo_color(grad).a; + let uv = input.tex_coord; + let uv_ddx = ddx(input.tex_coord); + let uv_ddy = ddy(input.tex_coord); + let alpha_color = material.sample_albedo_color(uv, uv_ddx, uv_ddy).a; // We are doing deferred, blend alpha mode is not supported in this pass. if (alpha_color < clamp(material.alpha_cutoff, 0.001, 1.0)) { diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index d66b697a..7e991436 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -111,12 +111,6 @@ public struct Transform { } }; -public struct UVGradient { - public f32x2 uv; - public f32x2 ddx; - public f32x2 ddy; -}; - [[vk::binding(0, 1)]] Sampler bindless_samplers[]; [[vk::binding(1, 1)]] @@ -155,46 +149,46 @@ public struct Material { public u32 metallic_roughness_image_index = 0; public u32 occlusion_image_index = 0; - public func sample_albedo_color(in UVGradient grad) -> f32x4 { + public func sample_albedo_color(f32x2 uv, f32x2 ddx, f32x2 ddy) -> f32x4 { if (this.flags & MaterialFlag::HasAlbedoImage) { let color = bindless_images[this.albedo_image_index] - .SampleGrad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy); + .SampleGrad(bindless_samplers[this.sampler_index], uv, ddx, ddy); return this.albedo_color * color; } return this.albedo_color; } - public func sample_normal_color(in UVGradient grad) -> f32x3 { + public func sample_normal_color(f32x2 uv, f32x2 ddx, f32x2 ddy) -> f32x3 { return bindless_images[this.normal_image_index] - .SampleGrad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).rgb; + .SampleGrad(bindless_samplers[this.sampler_index], uv, ddx, ddy).rgb; } - public func sample_emissive_color(in UVGradient grad) -> f32x3 { + public func sample_emissive_color(f32x2 uv, f32x2 ddx, f32x2 ddy) -> f32x3 { if (this.flags & MaterialFlag::HasEmissiveImage) { let color = bindless_images[this.emissive_image_index] - .SampleGrad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).rgb; + .SampleGrad(bindless_samplers[this.sampler_index], uv, ddx, ddy).rgb; return this.emissive_color * color; } return this.emissive_color; } - public func sample_metallic_roughness(in UVGradient grad) -> f32x2 { + public func sample_metallic_roughness(f32x2 uv, f32x2 ddx, f32x2 ddy) -> f32x2 { let metallic_roughness = f32x2(this.metallic_factor, this.roughness_factor); if (this.flags & MaterialFlag::HasMetallicRoughnessImage) { let color = bindless_images[this.metallic_roughness_image_index] - .SampleGrad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).bg; + .SampleGrad(bindless_samplers[this.sampler_index], uv, ddx, ddy).bg; return metallic_roughness * color; } return metallic_roughness; } - public func sample_occlusion_color(in UVGradient grad) -> f32 { + public func sample_occlusion_color(f32x2 uv, f32x2 ddx, f32x2 ddy) -> f32 { if (this.flags & MaterialFlag::HasOcclusionImage) { return bindless_images[this.occlusion_image_index] - .SampleGrad(bindless_samplers[this.sampler_index], grad.uv, grad.ddx, grad.ddy).r; + .SampleGrad(bindless_samplers[this.sampler_index], uv, ddx, ddy).r; } return 1.0; diff --git a/Lorr/Engine/Resources/shaders/std/encoding.slang b/Lorr/Engine/Resources/shaders/std/encoding.slang index c7a9a8b6..e471bb7f 100644 --- a/Lorr/Engine/Resources/shaders/std/encoding.slang +++ b/Lorr/Engine/Resources/shaders/std/encoding.slang @@ -4,20 +4,24 @@ public namespace std { // Unit vectors octahedral encoding // "Survey of Efficient Representations for Independent Unit Vectors" // http://jcgt.org/published/0003/02/01 -public func oct_to_vec3(f32x2 e) -> f32x3 { - f32x3 v = f32x3(e.xy, 1.0 - abs(e.x) - abs(e.y)); - const let sign_not_zero = f32x2((v.x >= 0.0) ? +1.0 : -1.0, (v.y >= 0.0) ? +1.0 : -1.0); - if (v.z < 0.0) { - v.xy = (1.0 - abs(v.yx)) * sign_not_zero; - } +func octahedral_decode_signed(f32x2 v) -> f32x3 { + var n = f32x3(v.xy, 1.0 - abs(v.x) - abs(v.y)); + let t = saturate(-n.z); + let w = select(n.xy >= 0.0, f32x2(-t), f32x2(t)); + n = f32x3(n.xy + w, n.z); + return normalize(n); +} - return normalize(v); +public func octahedral_decode(f32x2 v) -> f32x3 { + let f = v * 2.0 - 1.0; + return octahedral_decode_signed(f); } -public func vec3_to_oct(f32x3 v) -> f32x2 { - const let p = v.xy * (1.0 / (abs(v.x) + abs(v.y) + abs(v.z))); - const let sign_not_zero = f32x2((p.x >= 0.0) ? 1.0 : -1.0, (p.y >= 0.0) ? 1.0 : -1.0); - return (v.z <= 0.0) ? ((1.0 - abs(p.yx)) * sign_not_zero) : p; +public func octahedral_encode(f32x3 v) -> f32x2 { + var n = v / (abs(v.x) + abs(v.y) + abs(v.z)); + let octahedral_wrap = (1.0 - abs(n.yx)) * select(n.xy > f32x2(0.0), f32x2(1.0), f32x2(-1.0)); + let n_xy = select(n.z >= 0.0, n.xy, octahedral_wrap); + return n_xy * 0.5 + 0.5; } } diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index add68f9c..92475f08 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -205,7 +205,7 @@ struct MeshLOD { }; struct Mesh { - constexpr static auto MAX_LODS = 8_sz; + constexpr static auto MAX_LODS = 1_sz; alignas(8) u64 vertex_positions = 0; alignas(8) u64 vertex_normals = 0; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 6a1132a8..348504a0 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -1168,8 +1168,8 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valueextent.width + 63_u32) & ~63_u32, - .height = (dst_attachment->extent.height + 63_u32) & ~63_u32, + .width = (dst_attachment->extent.width + 1) >> 1, + .height = (dst_attachment->extent.height + 1) >> 1, .depth = 1, }; @@ -1580,6 +1580,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value Date: Sun, 7 Sep 2025 01:13:57 +0300 Subject: [PATCH 06/16] tiny bit less aggressive occlusion culling --- Lorr/Engine/Resources/shaders/cull.slang | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lorr/Engine/Resources/shaders/cull.slang b/Lorr/Engine/Resources/shaders/cull.slang index d4b97ad1..074f7f43 100644 --- a/Lorr/Engine/Resources/shaders/cull.slang +++ b/Lorr/Engine/Resources/shaders/cull.slang @@ -107,5 +107,6 @@ public func test_occlusion( let uv = ((f32x2(min_texel) + f32x2(max_texel)) * 0.5f) / hiz_size; let d = hiz_image.SampleLevel(hiz_sampler, uv, mip); - return screen_aabb.max.z <= d; + constexpr static let EPSILON = 1e-7; + return screen_aabb.max.z <= d - EPSILON; } From 14618d50268d6a4bf8f41b884239493b94f19b0f Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sun, 7 Sep 2025 14:22:36 +0300 Subject: [PATCH 07/16] add vbgtao settings --- .../shaders/passes/vbgtao_denoise.slang | 4 +-- .../shaders/passes/vbgtao_generate.slang | 33 ++++++++----------- Lorr/Engine/Resources/shaders/scene.slang | 9 +++-- Lorr/Engine/Scene/ECSModule/CoreComponents.hh | 11 +++++++ Lorr/Engine/Scene/GPUScene.hh | 13 +++++--- Lorr/Engine/Scene/Scene.cc | 22 ++++++++++++- Lorr/Engine/Scene/SceneRenderer.cc | 10 ++++-- Lorr/Engine/Scene/SceneRenderer.hh | 2 ++ 8 files changed, 70 insertions(+), 34 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang b/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang index 76a42b04..ab1bc591 100644 --- a/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang +++ b/Lorr/Engine/Resources/shaders/passes/vbgtao_denoise.slang @@ -14,7 +14,8 @@ struct ShaderParameters { func cs_main( u32x2 thread_id : SV_DispatchThreadID, uniform ParameterBlock params, - uniform i32x3 occlusion_noisy_extent + uniform i32x3 occlusion_noisy_extent, + uniform f32 power ) -> void { let pixel_coordinates = i32x2(thread_id.xy); let uv = f32x2(pixel_coordinates) / f32x2(occlusion_noisy_extent.xy); @@ -75,7 +76,6 @@ func cs_main( sum_weight += bottom_right_weight; var denoised_visibility = sum / sum_weight; - let power = 1.5; denoised_visibility = pow(max(denoised_visibility, 0.0f), power); params.ambient_occlusion.Store(pixel_coordinates, denoised_visibility); diff --git a/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang b/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang index 3c8b5b3c..b8022ac4 100644 --- a/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang +++ b/Lorr/Engine/Resources/shaders/passes/vbgtao_generate.slang @@ -134,23 +134,13 @@ func calc_visibility_mask( [[numthreads(16, 16, 1)]] func cs_main( - const uint2 pixel_coordinates : SV_DispatchThreadID, + const uint2 pixel_coordinates : SV_DispatchThreadID, uniform ParameterBlock params, + uniform VBGTAO settings ) -> void { - let thickness = 0.25; - let slice_count = 9.0; - let samples_per_slice_side = 3.0; - let depth_range_scale_factor = 0.75; - let default_radius = 0.5; - let default_radius_mul = 1.457; - let default_falloff_range = 0.615; - - let effect_radius = depth_range_scale_factor * default_radius * default_radius_mul; - let falloff_range = default_falloff_range * effect_radius; - let falloff_from = effect_radius * (1.0f - default_falloff_range); - let falloff_MUL = -1.0f / falloff_range; - let falloff_add = falloff_from / (falloff_range) + 1.0f; + let effect_radius = settings.depth_range_scale_factor * settings.radius * settings.radius_multiplier; let mip_sampling_offset = 3.30; + let inv_far = 1.0 / params.camera.far_clip; let uv = (f32x2(pixel_coordinates) + 0.5) / params.camera.resolution; var pixel_depth = calculate_edges( @@ -160,12 +150,15 @@ func cs_main( let view_dir = normalize(-origin); let normal = load_normal_view_space(uv, params.camera, params.normals, params.point_clamp_sampler); + let linear_depth = -origin.z * inv_far; + let thickness = settings.thickness * saturate(linear_depth) * settings.linear_thickness_multiplier; + let noise = load_noise(pixel_coordinates, params.hilbert_noise); let sample_scale = -(0.5 * effect_radius * params.camera.projection_mat[0][0]) / origin.z; var visibility = 0.0; - for (var slice_t = 0.0; slice_t < slice_count; slice_t += 1.0) { - let slice = (slice_t + noise.x) / slice_count; + for (var slice_t = 0.0; slice_t < settings.slice_count; slice_t += 1.0) { + let slice = (slice_t + noise.x) / settings.slice_count; let phi = slice * PI; let omega = f32x2(cos(phi), sin(phi)); @@ -181,11 +174,11 @@ func cs_main( var bitmask = 0u; let sample_mul = f32x2(omega.x, -omega.y) * sample_scale; - for (var sample_t = 0.0; sample_t < samples_per_slice_side; sample_t += 1.0) { - var sample = (slice + sample_t * samples_per_slice_side) * 0.6180339887498948482; + for (var sample_t = 0.0; sample_t < settings.sample_count_per_slice; sample_t += 1.0) { + var sample = (slice + sample_t * settings.sample_count_per_slice) * 0.6180339887498948482; sample = fract(noise.y + sample); - var s = (sample_t + sample) / samples_per_slice_side; + var s = (sample_t + sample) / settings.sample_count_per_slice; s *= s; let sample_offset = s * sample_mul; @@ -208,6 +201,6 @@ func cs_main( visibility += 1.0 - f32(countbits(bitmask)) / f32(SECTOR_COUNT); } - let ao = saturate(visibility / slice_count); + let ao = saturate(visibility / settings.slice_count); params.ambient_occlusion[pixel_coordinates] = ao; } \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index 7e991436..fc151dd1 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -313,7 +313,10 @@ public struct Light { public struct VBGTAO { public f32 thickness; public f32 depth_range_scale_factor; - public f32 default_radius; - public f32 default_radius_multiplier; - public f32 falloff_range; + public f32 radius; + public f32 radius_multiplier; + public f32 slice_count; + public f32 sample_count_per_slice; + public f32 denoise_power; + public f32 linear_thickness_multiplier; }; diff --git a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh index f50aca96..20a31f61 100644 --- a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh +++ b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh @@ -65,4 +65,15 @@ ECS_COMPONENT_BEGIN(Environment) ECS_COMPONENT_MEMBER(eye_k, f32, 12.5f) ECS_COMPONENT_END(); +ECS_COMPONENT_BEGIN(VBGTAO) + ECS_COMPONENT_MEMBER(thickness, f32, 0.25f) + ECS_COMPONENT_MEMBER(depth_range_scale_factor, f32, 0.75f) + ECS_COMPONENT_MEMBER(radius, f32, 0.5f) + ECS_COMPONENT_MEMBER(radius_multiplier, f32, 1.457f) + ECS_COMPONENT_MEMBER(slice_count, f32, 3.0f) + ECS_COMPONENT_MEMBER(sample_count_per_slice, f32, 3.0f) + ECS_COMPONENT_MEMBER(denoise_power, f32, 1.1f) + ECS_COMPONENT_MEMBER(linear_thickness_multiplier, f32, 300.0f) +ECS_COMPONENT_END(); + // clang-format on diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 92475f08..b8c36a96 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -226,11 +226,14 @@ struct HistogramLuminance { }; struct VBGTAO { - alignas(4) f32 thickness = {}; - alignas(4) f32 depth_range_scale_factor = {}; - alignas(4) f32 default_radius = {}; - alignas(4) f32 default_radius_multiplier = {}; - alignas(4) f32 falloff_range = {}; + alignas(4) f32 thickness = 0.25f; + alignas(4) f32 depth_range_scale_factor = 0.75f; + alignas(4) f32 radius = 0.5f; + alignas(4) f32 radius_multiplier = 1.457f; + alignas(4) f32 slice_count = 3.0f; + alignas(4) f32 sample_count_per_slice = 3.0f; + alignas(4) f32 denoise_power = 1.1f; + alignas(4) f32 linear_thickness_multiplier = 300.0f; }; } // namespace lr::GPU diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 959628e4..3964cd20 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -587,6 +587,9 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< auto environment_query = self.get_world() .query_builder() .build(); + auto vbgtao_query = self.get_world() + .query_builder() + .build(); // clang-format on ls::option active_camera_data = override_camera; @@ -663,6 +666,22 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< regenerate_sky |= self.last_environment.atmos_planet_radius != environment.atmos_planet_radius; self.last_environment = environment; + auto vbgtao = ls::option(); + vbgtao_query.each([&vbgtao](flecs::entity, ECS::VBGTAO &vbgtao_comp) { + vbgtao.emplace( + GPU::VBGTAO{ + .thickness = vbgtao_comp.thickness, + .depth_range_scale_factor = vbgtao_comp.depth_range_scale_factor, + .radius = vbgtao_comp.radius, + .radius_multiplier = vbgtao_comp.radius_multiplier, + .slice_count = glm::ceil(vbgtao_comp.slice_count), + .sample_count_per_slice = glm::ceil(vbgtao_comp.sample_count_per_slice), + .denoise_power = vbgtao_comp.denoise_power, + .linear_thickness_multiplier = vbgtao_comp.linear_thickness_multiplier, + } + ); + }); + auto meshlet_instance_visibility_offset = 0_u32; auto max_meshlet_instance_count = 0_u32; auto gpu_meshes = std::vector(); @@ -743,7 +762,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< flags |= metallic_roughness_image_index.has_value() ? GPU::MaterialFlag::HasMetallicRoughnessImage : GPU::MaterialFlag::None; flags |= occlusion_image_index.has_value() ? GPU::MaterialFlag::HasOcclusionImage : GPU::MaterialFlag::None; - auto gpu_material = GPU::Material { + auto gpu_material = GPU::Material{ .albedo_color = material->albedo_color, .emissive_color = material->emissive_color, .roughness_factor = material->roughness_factor, @@ -773,6 +792,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< .gpu_mesh_instances = gpu_mesh_instances, .environment = environment, .camera = active_camera_data.value_or(GPU::Camera{}), + .vbgtao = vbgtao, }; auto prepared_frame = renderer.prepare_frame(prepare_info); diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 348504a0..74b98012 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -521,6 +521,8 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in self.sky_multiscatter_lut_view.acquire(device, "sky multiscatter lut", vuk::ImageUsageFlagBits::eSampled, vuk::Access::eComputeSampled); } + prepared_frame.vbgtao = info.vbgtao; + return prepared_frame; } @@ -1571,7 +1573,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value); - if (true) { + if (frame.vbgtao.has_value()) { auto vbgtao_prefilter_pass = vuk::make_pass( "vbgtao prefilter", [](vuk::CommandBuffer &command_buffer, // @@ -1617,7 +1619,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valuedenoise_power](vuk::CommandBuffer &command_buffer, // VUK_IA(vuk::eComputeSampled) noisy_occlusion, VUK_IA(vuk::eComputeSampled) depth_differences, VUK_IA(vuk::eComputeRW) ambient_occlusion) { @@ -1706,6 +1709,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valueextent, power)) .dispatch_invocations_per_pixel(ambient_occlusion); return ambient_occlusion; diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 1eab089a..78d563eb 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -21,6 +21,7 @@ struct FramePrepareInfo { GPU::Environment environment = {}; GPU::Camera camera = {}; + ls::option vbgtao = {}; }; struct PreparedFrame { @@ -37,6 +38,7 @@ struct PreparedFrame { vuk::Value directional_camera_buffer = {}; vuk::Value sky_transmittance_lut = {}; vuk::Value sky_multiscatter_lut = {}; + ls::option vbgtao = {}; }; struct SceneRenderInfo { From fa78c1f367b68b13efd592205758a4d4648e815a Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Tue, 9 Sep 2025 13:59:47 +0300 Subject: [PATCH 08/16] sky atmos cleanup --- Lorr/Engine/Graphics/Slang/Compiler.cc | 2 +- Lorr/Engine/Graphics/Vulkan/Device.cc | 2 + .../Resources/shaders/passes/brdf.slang | 5 +- .../shaders/passes/cull_meshlets.slang | 9 +- .../shaders/passes/cull_triangles.slang | 16 +- .../passes/sky_aerial_perspective.slang | 12 +- .../Resources/shaders/passes/sky_final.slang | 63 ++--- .../shaders/passes/sky_multiscattering.slang | 65 +++-- .../shaders/passes/sky_transmittance.slang | 2 +- .../Resources/shaders/passes/sky_view.slang | 34 ++- Lorr/Engine/Resources/shaders/sky.slang | 243 +++++++----------- Lorr/Engine/Resources/shaders/std/math.slang | 18 +- Lorr/Engine/Scene/ECSModule/CoreComponents.hh | 1 + Lorr/Engine/Scene/Scene.cc | 8 +- Lorr/Engine/Scene/SceneRenderer.cc | 4 +- Lorr/Engine/Scene/SceneRenderer.hh | 2 +- xmake/packages.lua | 2 +- xmake/repo/packages/s/shader-slang/xmake.lua | 2 + 18 files changed, 239 insertions(+), 251 deletions(-) diff --git a/Lorr/Engine/Graphics/Slang/Compiler.cc b/Lorr/Engine/Graphics/Slang/Compiler.cc index f856cdac..a81a7ad1 100644 --- a/Lorr/Engine/Graphics/Slang/Compiler.cc +++ b/Lorr/Engine/Graphics/Slang/Compiler.cc @@ -358,7 +358,7 @@ auto SlangCompiler::new_session(const SlangSessionInfo &info) -> ls::optionglobal_session->findProfile("spirv_1_5"), .flags = SLANG_TARGET_FLAG_GENERATE_SPIRV_DIRECTLY, - .floatingPointMode = SLANG_FLOATING_POINT_MODE_FAST, + //.floatingPointMode = SLANG_FLOATING_POINT_MODE_FAST, .lineDirectiveMode = SLANG_LINE_DIRECTIVE_MODE_STANDARD, .forceGLSLScalarBufferLayout = true, .compilerOptionEntries = entries, diff --git a/Lorr/Engine/Graphics/Vulkan/Device.cc b/Lorr/Engine/Graphics/Vulkan/Device.cc index 6d22f658..3227116e 100644 --- a/Lorr/Engine/Graphics/Vulkan/Device.cc +++ b/Lorr/Engine/Graphics/Vulkan/Device.cc @@ -138,6 +138,7 @@ auto Device::init(this Device &self) -> bool { vk12_features.vulkanMemoryModel = true; vk12_features.vulkanMemoryModelDeviceScope = true; vk12_features.storageBuffer8BitAccess = true; + vk12_features.uniformAndStorageBuffer8BitAccess = true; vk12_features.scalarBlockLayout = true; vk12_features.shaderInt8 = true; vk12_features.shaderSubgroupExtendedTypes = true; @@ -154,6 +155,7 @@ auto Device::init(this Device &self) -> bool { vk10_features.features.vertexPipelineStoresAndAtomics = true; vk10_features.features.fragmentStoresAndAtomics = true; vk10_features.features.shaderInt64 = true; + vk10_features.features.shaderInt16 = true; vk10_features.features.multiDrawIndirect = true; vk10_features.features.samplerAnisotropy = true; vk10_features.features.shaderImageGatherExtended = true; diff --git a/Lorr/Engine/Resources/shaders/passes/brdf.slang b/Lorr/Engine/Resources/shaders/passes/brdf.slang index 5c01b9c8..8d335366 100644 --- a/Lorr/Engine/Resources/shaders/passes/brdf.slang +++ b/Lorr/Engine/Resources/shaders/passes/brdf.slang @@ -79,11 +79,8 @@ func fs_main(VertexOutput input) -> f32x4 { sky_info.eye_dir = up_vec; sky_info.sun_dir = L; sky_info.sun_intensity = params.environment.sun_intensity; - sky_info.sampling.variable_sample_count = true; - sky_info.sampling.min_sample_count = 1; - sky_info.sampling.max_sample_count = 4; + sky_info.step_count = 1; sky_info.eval_multiscattering = true; - sky_info.eval_mie_phase = false; let sky_result = integrate_single_scattered_luminance( sky_info, params.environment, params.linear_clamp_sampler, params.sky_transmittance_lut, params.sky_multiscattering_lut); diff --git a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang index c04cf883..873f2ac2 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_meshlets.slang @@ -27,11 +27,12 @@ import debug_drawer; [[shader("compute")]] [[numthreads(CULLING_MESHLET_COUNT, 1, 1)]] func cs_main( - uint3 thread_id : SV_DispatchThreadID, + uint group_thread_id : SV_GroupThreadID, + uint global_thread_id : SV_DispatchThreadID, uniform CullFlags cull_flags ) -> void { let meshlet_instance_count = visible_meshlet_instances_count[2]; - let meshlet_instance_index = thread_id.x; + let meshlet_instance_index = global_thread_id; if (meshlet_instance_index >= meshlet_instance_count) { return; } @@ -94,9 +95,9 @@ func cs_main( if (LATE) { if (visible) { - __atomic_or(meshlet_instance_visibility_mask[mask_index], visibility_bit, MemoryOrder::AcquireRelease); + __atomic_or(meshlet_instance_visibility_mask[mask_index], visibility_bit, MemoryOrder::Relaxed); } else { - __atomic_and(meshlet_instance_visibility_mask[mask_index], ~visibility_bit, MemoryOrder::AcquireRelease); + __atomic_and(meshlet_instance_visibility_mask[mask_index], ~visibility_bit, MemoryOrder::Relaxed); } } } diff --git a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang index 8c6da6aa..4d885696 100644 --- a/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang +++ b/Lorr/Engine/Resources/shaders/passes/cull_triangles.slang @@ -20,6 +20,7 @@ import passes.visbuffer; groupshared u32 base_index_shared; groupshared u32 triangles_passed_shared; groupshared mat4 model_view_proj_shared; +groupshared u32 meshlet_triangle_count_shared; // https://github.com/GPUOpen-Effects/GeometryFX/blob/master/amd_geometryfx/src/Shaders/AMD_GeometryFX_Filtering.hlsl // Parameters: vertices in UV space, viewport extent @@ -123,12 +124,15 @@ func cs_main( let meshlet_instance_index = visible_meshlet_instances_indices[visible_meshlet_instance_index]; let meshlet_instance = meshlet_instances[meshlet_instance_index]; let mesh_instance = mesh_instances[meshlet_instance.mesh_instance_index]; - let mesh = meshes[mesh_instance.mesh_index]; - let mesh_lod = mesh.lods[mesh_instance.lod_index]; - let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; if (local_index == 0) { triangles_passed_shared = 0; + + let mesh = meshes[mesh_instance.mesh_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; + let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; + meshlet_triangle_count_shared = meshlet.triangle_count; + let transform = transforms[mesh_instance.transform_index]; model_view_proj_shared = mul(camera.projection_view_mat, transform.world); } @@ -137,7 +141,11 @@ func cs_main( var triangle_passed = false; var active_triangle_index = 0; - if (local_index < meshlet.triangle_count) { + if (local_index < meshlet_triangle_count_shared) { + let mesh = meshes[mesh_instance.mesh_index]; + let mesh_lod = mesh.lods[mesh_instance.lod_index]; + let meshlet = mesh_lod.meshlets[meshlet_instance.meshlet_index]; + let indices = meshlet.indices(mesh_lod, local_index); let positions = meshlet.positions(mesh, indices); triangle_passed = test_triangle(positions, camera.resolution, cull_flags, local_index); diff --git a/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang b/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang index a6f4f080..4f5ee6b7 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_aerial_perspective.slang @@ -74,14 +74,14 @@ func cs_main( info.eye_dir = world_dir; info.sun_dir = params.environment.sun_direction; info.sun_intensity = params.environment.sun_intensity; - info.max_integration_length = t_max_max; info.eval_planet_luminance = false; info.eval_multiscattering = true; + info.step_count = max(1.0, (f32(thread_id.z) + 1.0) * 2.0); - info.sampling.variable_sample_count = false; - info.sampling.initial_sample_count = max(1.0, (f32(thread_id.z) + 1.0) * 2.0); + let result = integrate_single_scattered_luminance( + info, params.environment, params.sampler, params.sky_transmittance_lut, params.sky_multiscattering_lut); + let inv_luminance = 1.0 / max(result.luminance, float3(1.0 / 1048576.0)); + let inv_mult = min(1048576.0, max(inv_luminance.x, max(inv_luminance.y, inv_luminance.z))); - let result = integrate_single_scattered_luminance(info, params.environment, params.sampler, params.sky_transmittance_lut, params.sky_multiscattering_lut); - let transmittance = dot(result.transmittance, f32x3(1.0f / 3.0f)); - params.sky_aerial_perspective_lut.Store(thread_id, f32x4(result.luminance, transmittance)); + params.sky_aerial_perspective_lut.Store(thread_id, f32x4(result.luminance * inv_mult, 1.0 / inv_mult)); } diff --git a/Lorr/Engine/Resources/shaders/passes/sky_final.slang b/Lorr/Engine/Resources/shaders/passes/sky_final.slang index d0df5b48..6ab0a460 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_final.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_final.slang @@ -30,17 +30,18 @@ func vs_main(u32 vertex_index : SV_VertexID) -> VertexOutput { } func draw_sun(f32x3 world_dir, f32x3 sun_dir, f32 radius) -> f32x3 { - const let min_cos_theta = cos(radius * PI / 180.0); - - const let cosTheta = dot(world_dir, sun_dir); - if (cosTheta >= min_cos_theta) { - return 1.0; - } + let min_cos_theta = cos(radius * PI / 250.0); + let cosTheta = dot(world_dir, sun_dir); + return cosTheta >= min_cos_theta ? 1.0 : 0.0; +} - const let offset = min_cos_theta - cosTheta; - const let gaussianBloom = exp(-offset * 50000.0) * 0.5; - const let invBloom = 1.0 / (0.02 + offset * 300.0) * 0.01; - return f32x3(gaussianBloom + invBloom); +func build_orthonormal_basis(f32x3 z_basis) -> f32x3x3 { + let sign = copysign(1.0, z_basis.z); + let a = -1.0 / (sign + z_basis.z); + let b = z_basis.x * z_basis.y * a; + let x_basis = f32x3(1.0 + sign * z_basis.x * z_basis.x * a, sign * b, -sign * z_basis.x); + let y_basis = f32x3(b, sign + z_basis.y * z_basis.y * a, -z_basis.y); + return f32x3x3(x_basis, y_basis, z_basis); } [[shader("fragment")]] @@ -65,41 +66,43 @@ func fs_main( params.environment.atmos_aerial_perspective_start_km); } - let eye_dir = normalize(world_pos - params.camera.position); + let up = f32x3(0.0, 1.0, 0.0); var eye_altitude = params.camera.position.y * CAMERA_SCALE_UNIT; eye_altitude += params.environment.atmos_planet_radius + PLANET_RADIUS_OFFSET; let eye_pos = f32x3(0.0, eye_altitude, 0.0); - let up = f32x3(0.0, 1.0, 0.0); - let right = normalize(cross(up, eye_dir)); - let forward = normalize(cross(right, up)); - let sun_dir = normalize(params.environment.sun_direction); - let light_on_plane = normalize(f32x2(dot(sun_dir, forward), dot(sun_dir, right))); - f32 view_zenith_cos_angle = dot(eye_dir, up); + var sun_dir = (params.environment.sun_direction); + var eye_dir = normalize(world_pos - params.camera.position); - const let planet_intersection = std::ray_sphere_intersect_nearest(eye_pos, eye_dir, params.environment.atmos_planet_radius); - f32x2 uv = sky_view_params_to_lut_uv( + let view_zenith_cos_angle = acos(dot(eye_dir, up)); + let light_view_cos_angle = acos(clamp(dot( + normalize(f32x3(sun_dir.x, 0.0, sun_dir.z)), + normalize(f32x3(eye_dir.x, 0.0, eye_dir.z)), + ), -1.0, 1.0)); + let planet_intersection = std::ray_sphere_intersect_nearest(eye_pos, eye_dir, params.environment.atmos_planet_radius); + let uv = sky_view_params_to_lut_uv( params.environment.atmos_atmos_radius, params.environment.atmos_planet_radius, params.environment.sky_view_lut_size.xy, - planet_intersection.hasValue, + planet_intersection != -1.0, eye_altitude, view_zenith_cos_angle, - light_on_plane); - f32x4 result = params.sky_view_lut.SampleLevel(params.sampler, uv, 0.0); - f32x3 luminance = result.rgb; - f32 transmittance = result.a; + light_view_cos_angle); + + let result = params.sky_view_lut.SampleLevel(params.sampler, uv, 0.0); + let atmos_luminance = result.rgb * result.a; + var color = atmos_luminance * params.environment.sun_intensity; - f32 sun_cos_theta = dot(sun_dir, up); - f32x2 transmittance_uv = transmittance_params_to_lut_uv( + let sun_cos_theta = dot(sun_dir, up); + let transmittance_uv = transmittance_params_to_lut_uv( params.environment.atmos_atmos_radius, params.environment.atmos_planet_radius, f32x2(eye_altitude, sun_cos_theta)); - f32x3 sun_transmittance = params.sky_transmittance_lut.SampleLevel(params.sampler, transmittance_uv, 0.0).rgb; + let sun_transmittance = params.sky_transmittance_lut.SampleLevel(params.sampler, transmittance_uv, 0.0).rgb; - if (!planet_intersection.hasValue) { - luminance += draw_sun(eye_dir, params.environment.sun_direction, 1.0) * params.environment.sun_intensity * sun_transmittance; + if (planet_intersection == -1.0) { + color += draw_sun(eye_dir, params.environment.sun_direction, 1.0) * params.environment.sun_intensity * sun_transmittance; } - return f32x4(luminance, 1.0 - transmittance); + return f32x4(color, 1.0); } diff --git a/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang b/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang index 0c227c51..0ec9d2ce 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang @@ -14,49 +14,60 @@ struct ShaderParameters { StorageImage2D sky_multiscattering_lut; }; +#define GOLDEN_RATIO 1.6180339 #define SAMPLE_COUNT 64 +static groupshared AtmosphereLuminance luminance_shared[2]; + [[shader("compute")]] -[[numthreads(16, 16, 1)]] +[[numthreads(1, 1, SAMPLE_COUNT)]] func cs_main( u32x3 thread_id : SV_DispatchThreadID, + u32 group_index : SV_GroupIndex, uniform ParameterBlock params ) -> void { - f32x2 uv = f32x2(f32x2(thread_id.xy) + 0.5) / f32x2(params.environment.multiscattering_lut_size.xy); + var uv = f32x2(f32x2(thread_id.xy) + 0.5) / f32x2(params.environment.multiscattering_lut_size.xy); + uv = from_sub_uvs_to_unit(uv, f32x2(params.environment.multiscattering_lut_size.xy)); let atmosphere_thickness = params.environment.atmos_atmos_radius - params.environment.atmos_planet_radius; let altitude = params.environment.atmos_planet_radius + uv.y * atmosphere_thickness + PLANET_RADIUS_OFFSET; - f32 sun_cos_theta = uv.x * 2.0 - 1.0; - f32x3 sun_dir = f32x3(0.0, sun_cos_theta, std::safe_sqrt(1.0 - sun_cos_theta * sun_cos_theta)); - f32x3 ray_pos = f32x3(0.0, altitude, 0.0); + let sun_zenith_angle = uv.x * 2.0 - 1.0; + let sun_dir = f32x3(0.0, sun_zenith_angle, std::safe_sqrt(1.0 - saturate(sun_zenith_angle * sun_zenith_angle))); + let eye_pos = f32x3(0.0, altitude, 0.0); + + let theta = acos(1.0f - 2.0f * (thread_id.z + 0.5f) / f32(SAMPLE_COUNT)); + let phi = (2 * PI * thread_id.z) / GOLDEN_RATIO; + let eye_dir = f32x3( + cos(phi) * sin(theta), + cos(theta), + sin(phi) * sin(theta), + ); AtmosphereIntegrateInfo info = {}; - info.eye_pos = ray_pos; + info.eye_pos = eye_pos; + info.eye_dir = eye_dir; info.sun_dir = sun_dir; - info.eval_mie_phase = false; - info.eval_rayleigh_phase = false; - info.eval_planet_luminance = true; + info.eval_planet_luminance = false; info.eval_multiscattering = false; - info.sampling.variable_sample_count = false; - info.sampling.initial_sample_count = 32; - - f32x3 luminance = 0.0; - f32x3 multi_scattering_as_1 = 0.0; - for (int i = 0; i < SAMPLE_COUNT; i++) { - info.eye_dir = HEMISPHERE_64[i]; - let result = integrate_single_scattered_luminance(info, params.environment, params.sampler, params.sky_transmittance_lut, params.sky_transmittance_lut); - multi_scattering_as_1 += result.multiscattering_as_1; - luminance += result.luminance; + info.step_count = 32.0; + var result = integrate_single_scattered_luminance(info, params.environment, params.sampler, params.sky_transmittance_lut, params.sky_transmittance_lut); + result.multiscattering_as_1 = WaveActiveSum(result.multiscattering_as_1); + result.luminance = WaveActiveSum(result.luminance); + + GroupMemoryBarrierWithGroupSync(); + + if (WaveIsFirstLane()) { + let index = group_index > 0 ? 1 : 0; + luminance_shared[index] = result; } - let sphere_solid_angle = 4.0f * PI; - let isotropic_phase = 1.0f / sphere_solid_angle; - let inv_sample_count = 1.0 / f32(SAMPLE_COUNT); - luminance *= sphere_solid_angle * inv_sample_count; - multi_scattering_as_1 *= inv_sample_count; - f32x3 scattered_luminance = luminance * isotropic_phase; - f32x3 f_ms = 1.0 / (1.0 - multi_scattering_as_1); + GroupMemoryBarrierWithGroupSync(); - params.sky_multiscattering_lut.Store(thread_id.xy, f32x4(scattered_luminance * f_ms, 1.0)); + if (group_index == 0) { + let ms_sum = (luminance_shared[0].multiscattering_as_1 + luminance_shared[1].multiscattering_as_1) / f32(SAMPLE_COUNT); + let luminance_sum = (luminance_shared[0].luminance + luminance_shared[1].luminance) / f32(SAMPLE_COUNT); + let luminance = luminance_sum * (1.0 / (1.0 - ms_sum)); + params.sky_multiscattering_lut.Store(thread_id.xy, f32x4(luminance, 1.0)); + } } diff --git a/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang b/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang index 15ad0765..5b158c3f 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_transmittance.slang @@ -32,7 +32,7 @@ func cs_main( f32x3 ray_pos = f32x3(0.0, 0.0, lut_x); const f32 STEP_COUNT = 420.0; - f32 distance = std::ray_sphere_intersect_nearest(ray_pos, sun_dir, params.environment.atmos_atmos_radius).value; + f32 distance = std::ray_sphere_intersect_nearest(ray_pos, sun_dir, params.environment.atmos_atmos_radius); f32 distance_per_step = distance / STEP_COUNT; f32x3 optical_depth = 0.0; for (f32 i = 0.0; i < STEP_COUNT; i += 1.0) { diff --git a/Lorr/Engine/Resources/shaders/passes/sky_view.slang b/Lorr/Engine/Resources/shaders/passes/sky_view.slang index 87c0b097..4ebaca9a 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_view.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_view.slang @@ -17,26 +17,39 @@ struct ShaderParameters { [[shader("compute")]] [[numthreads(16, 16, 1)]] func cs_main( - u32x3 thread_id : SV_DispatchThreadID, + u32x2 thread_id : SV_DispatchThreadID, uniform ParameterBlock params ) -> void { + if (any(thread_id >= u32x2(params.environment.sky_view_lut_size.xy))) { + return; + } + let uv = f32x2(thread_id.xy) / f32x2(params.environment.sky_view_lut_size.xy); var eye_altitude = params.camera.position.y * CAMERA_SCALE_UNIT; eye_altitude += params.environment.atmos_planet_radius + PLANET_RADIUS_OFFSET; var eye_pos = f32x3(0.0, eye_altitude, 0.0); - let eye_dir = uv_to_sky_view_lut_params( + let sky_params = uv_to_sky_view_lut_params( + params.environment.atmos_atmos_radius, params.environment.atmos_planet_radius, params.environment.sky_view_lut_size.xy, uv, eye_altitude); + let view_zenith_angle = sky_params.x; + let light_view_angle = sky_params.y; + let eye_dir = f32x3( + cos(light_view_angle) * sin(view_zenith_angle), + cos(view_zenith_angle), // Y is up + sin(light_view_angle) * sin(view_zenith_angle), + ); + if (!move_to_top_atmosphere(eye_pos, eye_dir, params.environment.atmos_atmos_radius)) { params.sky_view_lut.Store(thread_id.xy, 0.0); return; } let up_vec = f32x3(0.0, 1.0, 0.0); - let sun_zenith_cos_angle = dot(normalize(params.environment.sun_direction), up_vec); + let sun_zenith_cos_angle = dot(params.environment.sun_direction, up_vec); let sun_dir = normalize(f32x3(std::safe_sqrt(1.0 - sun_zenith_cos_angle * sun_zenith_cos_angle), sun_zenith_cos_angle, 0.0)); AtmosphereIntegrateInfo info = {}; @@ -44,14 +57,13 @@ func cs_main( info.eye_dir = eye_dir; info.sun_dir = sun_dir; info.sun_intensity = params.environment.sun_intensity; - - let sample_count = 48; - info.sampling.variable_sample_count = true; - info.sampling.min_sample_count = sample_count; - info.sampling.max_sample_count = sample_count; + info.step_count = 32.0; info.eval_multiscattering = true; - let result = integrate_single_scattered_luminance(info, params.environment, params.sampler, params.sky_transmittance_lut, params.sky_multiscattering_lut); - let transmittance = dot(result.transmittance, 1.0 / 3.0); + info.eval_planet_luminance = false; + let result = integrate_single_scattered_luminance( + info, params.environment, params.sampler, params.sky_transmittance_lut, params.sky_multiscattering_lut); + let inv_luminance = 1.0 / max(result.luminance, float3(1.0 / 1048576.0)); + let inv_mult = min(1048576.0, max(inv_luminance.x, max(inv_luminance.y, inv_luminance.z))); - params.sky_view_lut.Store(thread_id.xy, f32x4(result.luminance, transmittance)); + params.sky_view_lut.Store(thread_id.xy, f32x4(result.luminance * inv_mult, 1.0 / inv_mult)); } diff --git a/Lorr/Engine/Resources/shaders/sky.slang b/Lorr/Engine/Resources/shaders/sky.slang index a41a58ff..49921d48 100644 --- a/Lorr/Engine/Resources/shaders/sky.slang +++ b/Lorr/Engine/Resources/shaders/sky.slang @@ -43,72 +43,73 @@ public func sky_view_params_to_lut_uv( bool intersect_planet, f32 altitude, f32 view_zenith_cos_angle, - f32x2 light_on_plane + f32 light_view_angle ) -> f32x2 { - f32 horizon = std::safe_sqrt(altitude * altitude - planet_radius * planet_radius); - f32 beta = acos(horizon / altitude); - f32 zenith_horizon_angle = PI - beta; - f32 view_zenith_angle = acos(view_zenith_cos_angle); - - f32x2 uv = 0.0; - if (!intersect_planet) { - f32 coord = view_zenith_angle / zenith_horizon_angle; - coord = 1.0 - coord; - coord = std::safe_sqrt(coord); - coord = 1.0 - coord; - uv.y = coord * 0.5; + var uv: f32x2; + if (altitude < atmos_radius) { + let beta = asin(planet_radius / altitude); + let zenith_horizon_angle = PI - beta; + if (!intersect_planet) { + var coord = view_zenith_cos_angle / zenith_horizon_angle; + coord = (1.0 - std::safe_sqrt(1.0 - coord)) * 0.5; + uv.y = coord; + } else { + var coord = (view_zenith_cos_angle - zenith_horizon_angle) / beta; + coord = (std::safe_sqrt(coord) + 1.0) * 0.5; + uv.y = coord; + } } else { - f32 coord = (view_zenith_angle - zenith_horizon_angle) / beta; - coord = std::safe_sqrt(coord); - uv.y = coord * 0.5 + 0.5; + let beta = asin(atmos_radius / altitude); + let zenith_horizon_angle = PI - beta; + var coord = std::safe_sqrt((view_zenith_cos_angle - zenith_horizon_angle) / beta); + uv.y = coord; } - f32 theta = atan2(-light_on_plane.y, -light_on_plane.x); - uv.x = (theta + PI) / (2.0 * PI); - return from_unit_to_sub_uvs(uv, f32x2(sky_view_lut_size)); + uv.x = std::safe_sqrt(light_view_angle / PI); + return from_unit_to_sub_uvs(uv, f32x2(sky_view_lut_size.xy)); } public func uv_to_sky_view_lut_params( + f32 atmos_radius, f32 planet_radius, i32x2 sky_view_lut_size, f32x2 uv, f32 altitude -) -> f32x3 { +) -> f32x2 { uv = from_sub_uvs_to_unit(uv, f32x2(sky_view_lut_size)); - f32 horizon = std::safe_sqrt(altitude * altitude - planet_radius * planet_radius); - f32 beta = acos(horizon / altitude); - f32 zenith_horizon_angle = PI - beta; - - f32 view_zenith_angle = 0.0; - if (uv.y < 0.5) { - f32 coord = uv.y * 2.0; - coord = 1.0 - coord; - coord *= coord; - coord = 1.0 - coord; - view_zenith_angle = zenith_horizon_angle * coord; + var view_zenith_angle: f32; + var light_view_angle: f32; + if (altitude < atmos_radius) { + let beta = asin(planet_radius / altitude); + let zenith_horizon_angle = PI - beta; + if (uv.y < 0.5) { + let y = 1.0 - 2.0 * uv.y; + let coord = 1.0 - y * y; + view_zenith_angle = zenith_horizon_angle * coord; + } else { + let y = 2.0 * uv.y - 1.0; + let coord = y * y; + view_zenith_angle = zenith_horizon_angle + beta * coord; + } } else { - f32 coord = uv.y * 2.0 - 1.0; - coord *= coord; + let beta = asin(atmos_radius / altitude); + let zenith_horizon_angle = PI - beta; + let coord = uv.y * uv.y; view_zenith_angle = zenith_horizon_angle + beta * coord; } - let longitude_view_cos_angle = uv.x * TAU; - let view_zenith_cos_angle = cos(view_zenith_angle); - let view_zenith_sin_angle = std::safe_sqrt(1.0 - view_zenith_cos_angle * view_zenith_cos_angle) * (view_zenith_angle > 0.0 ? 1.0 : -1.0); - let cos_longitude_view_cos_angle = cos(longitude_view_cos_angle); - let sin_longitude_view_cos_angle = - std::safe_sqrt(1.0 - cos_longitude_view_cos_angle * cos_longitude_view_cos_angle) * (longitude_view_cos_angle <= PI ? 1.0 : -1.0); - return f32x3(view_zenith_sin_angle * cos_longitude_view_cos_angle, view_zenith_cos_angle, view_zenith_sin_angle * sin_longitude_view_cos_angle); + light_view_angle = (uv.x * uv.x) * PI; + return f32x2(view_zenith_angle, light_view_angle); } public func move_to_top_atmosphere(inout f32x3 pos, f32x3 dir, f32 atmos_radius) -> bool { f32 h = length(pos); if (h > atmos_radius) { const let top_intersection = std::ray_sphere_intersect_nearest(pos, dir, atmos_radius); - if (top_intersection.hasValue) { + if (top_intersection != -1.0) { f32x3 up_vec = pos / h; f32x3 up_offset = up_vec * -PLANET_RADIUS_OFFSET; - pos = pos + dir * top_intersection.value + up_offset; + pos = pos + dir * top_intersection + up_offset; } else { return false; } @@ -117,60 +118,37 @@ public func move_to_top_atmosphere(inout f32x3 pos, f32x3 dir, f32 atmos_radius) } public struct MediumScattering { - public f32x3 rayleigh_scattering; - public f32x3 rayleigh_extinction; - public f32x3 mie_scattering; - public f32x3 mie_extinction; - - public f32x3 ozone_absorption; - public f32x3 ozone_extinction; - - public f32x3 scattering_sum; + public f32x3 rayleigh_scattering; public f32x3 extinction_sum; - [ForceInline] public __init(in Environment environment, f32 altitude) { - const f32 rayleigh_density = exp(-altitude / environment.atmos_rayleigh_density); - const f32 mie_density = exp(-altitude / environment.atmos_mie_density); - const f32 ozone_density = max(0.0, 1.0 - abs(altitude - environment.atmos_ozone_height) / environment.atmos_ozone_thickness); + [ForceInline] + public __init(in Environment environment, f32 altitude) { + let mie_density = exp(-altitude / environment.atmos_mie_density); + let rayleigh_density = exp(-altitude / environment.atmos_rayleigh_density); + let ozone_density = max(0.0, 1.0 - abs(altitude - environment.atmos_ozone_height) / environment.atmos_ozone_thickness); - this.rayleigh_scattering = environment.atmos_rayleigh_scatter * rayleigh_density; - this.rayleigh_extinction = this.rayleigh_scattering; // Rayleigh scattering doesn't have absorption behavior + let mie_extinction = environment.atmos_mie_extinction * mie_density; + let rayleigh_extinction = environment.atmos_rayleigh_scatter * rayleigh_density; + let ozone_extinction = environment.atmos_ozone_absorption * ozone_density; this.mie_scattering = environment.atmos_mie_scatter * mie_density; - this.mie_extinction = environment.atmos_mie_extinction * mie_density; // Mie scattering doesn't have absorption behavior - - this.ozone_absorption = environment.atmos_ozone_absorption * ozone_density; - this.ozone_extinction = this.ozone_absorption; - - this.scattering_sum = this.rayleigh_scattering + this.mie_scattering; - this.extinction_sum = this.rayleigh_extinction + this.mie_extinction + this.ozone_extinction; + this.rayleigh_scattering = environment.atmos_rayleigh_scatter * rayleigh_density; + this.extinction_sum = mie_extinction + rayleigh_extinction + ozone_extinction; } }; public struct AtmosphereLuminance { public f32x3 luminance = 0.0; public f32x3 multiscattering_as_1 = 0.0; - public f32x3 transmittance = 1.0; -}; - -public struct AtmosphereSampling { - public bool variable_sample_count = false; - public f32 initial_sample_count = 0.0; - public f32 min_sample_count = 0.0; - public f32 max_sample_count = 0.0; - public f32 inv_distance_to_sample_count_max = 0.01; }; public struct AtmosphereIntegrateInfo { public f32x3 eye_pos = {}; public f32x3 eye_dir = {}; public f32x3 sun_dir = {}; - public f32 sun_intensity = 1.0; - public f32 max_integration_length = 9000000.0; - public AtmosphereSampling sampling = {}; - public constexpr bool eval_mie_phase = true; - public constexpr bool eval_rayleigh_phase = true; + public f32 sun_intensity = 10.0; + public f32 step_count = 32.0; public constexpr bool eval_planet_luminance = false; public constexpr bool eval_multiscattering = false; }; @@ -188,112 +166,81 @@ public func integrate_single_scattered_luminance( return result; } - let atmos_intersection = std::ray_sphere_intersect_nearest(info.eye_pos, info.eye_dir, environment.atmos_atmos_radius); let planet_intersection = std::ray_sphere_intersect_nearest(info.eye_pos, info.eye_dir, environment.atmos_planet_radius); + let atmos_intersection = std::ray_sphere_intersect_nearest(info.eye_pos, info.eye_dir, environment.atmos_atmos_radius); var integration_length = 0.0; - if (!atmos_intersection.hasValue) { + if (atmos_intersection == -1.0) { // No intersection return result; - } else if (!planet_intersection.hasValue) { + } else if (planet_intersection == -1.0) { // Atmosphere only intersection - integration_length = atmos_intersection.value; + integration_length = atmos_intersection; } else { - integration_length = max(0.0, planet_intersection.value); - } - - integration_length = min(integration_length, info.max_integration_length); - var sample_count = info.sampling.initial_sample_count; - var sample_count_floor = info.sampling.initial_sample_count; - var max_integration_length_floor = integration_length; - if (info.sampling.variable_sample_count) { - sample_count = lerp( - info.sampling.min_sample_count, - info.sampling.max_sample_count, - saturate(integration_length * info.sampling.inv_distance_to_sample_count_max) - ); - sample_count_floor = floor(sample_count); - max_integration_length_floor = integration_length * sample_count_floor / sample_count; + integration_length = max(0.0, planet_intersection); } let cos_theta = dot(info.sun_dir, info.eye_dir); let rayleigh_phase = std::rayleigh_phase(cos_theta); let mie_phase = std::henyey_greenstein_draine_phase(environment.atmos_mie_asymmetry, cos_theta); - var step = 0.0; - var delta_step = integration_length / sample_count; - for (f32 i = 0; i < sample_count; i += 1.0) { - if (info.sampling.variable_sample_count) { - f32 cur_step = (i + 0.0) / sample_count_floor; - f32 next_step = (i + 1.0) / sample_count_floor; - cur_step *= cur_step; - next_step *= next_step; - - cur_step *= max_integration_length_floor; - next_step = next_step > 1.0 ? integration_length : next_step * max_integration_length_floor; - delta_step = next_step - cur_step; - step = cur_step + delta_step * 0.3; - } else { - step = integration_length * (i + 0.3) / sample_count; - } + var transmittance_sum = f32x3(1.0); + var step_length = integration_length / info.step_count; + var old_ray_shift = 0.0; + for (f32 step = 0.0; step < info.step_count; step += 1.0) { + var new_ray_shift = integration_length * (step + 0.3) / info.step_count; + step_length = new_ray_shift - old_ray_shift; + old_ray_shift = new_ray_shift; + + let step_pos = info.eye_pos + new_ray_shift * info.eye_dir; - let step_pos = info.eye_pos + step * info.eye_dir; let h = length(step_pos); let altitude = h - environment.atmos_planet_radius; let medium_info = MediumScattering(environment, altitude); + let scattering_sum = medium_info.rayleigh_scattering + medium_info.mie_scattering; - f32x3 up_vec = normalize(step_pos); - f32 earth_shadow = std::ray_sphere_intersect_nearest(step_pos, info.sun_dir, environment.atmos_planet_radius).hasValue ? 0.0 : 1.0; - f32 sun_theta = dot(info.sun_dir, up_vec); + let up = normalize(step_pos); + let sun_theta = dot(info.sun_dir, up); - f32x2 transmittance_uv = transmittance_params_to_lut_uv( + let transmittance_uv = transmittance_params_to_lut_uv( environment.atmos_atmos_radius, environment.atmos_planet_radius, f32x2(h, sun_theta)); - f32x3 sun_transmittance = transmittance_image.SampleLevel(lut_sampler, transmittance_uv, 0.0).rgb; + let sun_transmittance = transmittance_image.SampleLevel(lut_sampler, transmittance_uv, 0.0).rgb; - f32x3 MS = 0.0; + var MS = f32x3(0.0); if (info.eval_multiscattering) { - f32x2 multiscatter_uv = multiscattering_params_to_lut_uv( + let multiscatter_uv = multiscattering_params_to_lut_uv( environment.atmos_atmos_radius, environment.atmos_planet_radius, environment.multiscattering_lut_size.xy, altitude, sun_theta); MS = multiscattering_image.SampleLevel(lut_sampler, multiscatter_uv, 0.0).rgb; } - f32x3 scattering_phase = 0.0; - if (info.eval_mie_phase && info.eval_rayleigh_phase) { - scattering_phase = medium_info.mie_scattering * mie_phase + medium_info.rayleigh_scattering * rayleigh_phase; - } else if (info.eval_mie_phase) { - scattering_phase = medium_info.mie_scattering * mie_phase + medium_info.rayleigh_scattering * std::uniform_phase(); - } else if (info.eval_rayleigh_phase) { - scattering_phase = medium_info.mie_scattering * std::uniform_phase() + medium_info.rayleigh_scattering * rayleigh_phase; - } else { - scattering_phase = medium_info.scattering_sum * std::uniform_phase(); - } - - f32x3 sun_luminance = earth_shadow * sun_transmittance * scattering_phase + (MS * medium_info.scattering_sum); - f32x3 step_transmittance = exp(-delta_step * medium_info.extinction_sum); + var scattering_phase = medium_info.mie_scattering * mie_phase + medium_info.rayleigh_scattering * rayleigh_phase; + let earth_shadow = std::ray_sphere_intersect_nearest(step_pos, info.sun_dir, environment.atmos_planet_radius) == -1.0 ? 1.0 : 0.0; + let sun_luminance = earth_shadow * sun_transmittance * scattering_phase + (MS * scattering_sum); + let step_transmittance = exp(-step_length * medium_info.extinction_sum); - f32x3 integral = (sun_luminance - sun_luminance * step_transmittance) / medium_info.extinction_sum; - f32x3 ms_integral = (medium_info.scattering_sum - medium_info.scattering_sum * step_transmittance) / medium_info.extinction_sum; + var integral = (sun_luminance - sun_luminance * step_transmittance) / medium_info.extinction_sum; + var ms_integral = (scattering_sum - scattering_sum * step_transmittance) / medium_info.extinction_sum; let extinction_zero = medium_info.extinction_sum == f32x3(0.0); integral = select(extinction_zero, f32x3(0.0), integral); ms_integral = select(extinction_zero, f32x3(0.0), ms_integral); - result.luminance += info.sun_intensity * (integral * result.transmittance); - result.multiscattering_as_1 += ms_integral * result.transmittance; - result.transmittance *= step_transmittance; + result.luminance += integral * transmittance_sum; + result.multiscattering_as_1 += ms_integral * transmittance_sum; + transmittance_sum *= step_transmittance; } // Accumulate light bouncing off planet's ground - if (info.eval_planet_luminance && planet_intersection.hasValue && integration_length == planet_intersection.value) { - f32x3 planet_pos = info.eye_pos + integration_length * info.eye_dir; - f32 h = length(planet_pos); - f32x3 up_vec = planet_pos / h; - f32 sun_theta = dot(info.sun_dir, up_vec); - f32 NoL = saturate(dot(normalize(info.sun_dir), normalize(up_vec))); + if (info.eval_planet_luminance && integration_length != -1.0 && integration_length == planet_intersection) { + let planet_pos = info.eye_pos + integration_length * info.eye_dir; + let h = length(planet_pos); + let up = planet_pos / h; + let sun_theta = dot(info.sun_dir, up); + let NoL = saturate(dot(normalize(info.sun_dir), normalize(up))); - f32x2 transmittance_uv = transmittance_params_to_lut_uv(environment.atmos_atmos_radius, environment.atmos_planet_radius, f32x2(h, sun_theta)); - f32x3 sun_transmittance = transmittance_image.SampleLevel(lut_sampler, transmittance_uv, 0.0).rgb; + let transmittance_uv = transmittance_params_to_lut_uv(environment.atmos_atmos_radius, environment.atmos_planet_radius, f32x2(h, sun_theta)); + let sun_transmittance = transmittance_image.SampleLevel(lut_sampler, transmittance_uv, 0.0).rgb; - result.luminance += info.sun_intensity * (sun_transmittance * result.transmittance * NoL * environment.atmos_terrain_albedo / PI); + result.luminance += sun_transmittance * transmittance_sum * NoL * environment.atmos_terrain_albedo / PI; } return result; diff --git a/Lorr/Engine/Resources/shaders/std/math.slang b/Lorr/Engine/Resources/shaders/std/math.slang index 0b6d774f..7488d671 100644 --- a/Lorr/Engine/Resources/shaders/std/math.slang +++ b/Lorr/Engine/Resources/shaders/std/math.slang @@ -69,17 +69,19 @@ public func ray_sphere_intersect(f32x3 ray_origin, f32x3 ray_direction, f32 sphe return (-b + f32x2(-1.0, 1.0) * sqrt_delta) / (2.0 * a); } -public func ray_sphere_intersect_nearest(f32x3 ray_origin, f32x3 ray_direction, f32 sphere_radius) -> Optional { - const let sol = ray_sphere_intersect(ray_origin, ray_direction, sphere_radius); - if (!sol.hasValue) { - return none; +public func ray_sphere_intersect_nearest(f32x3 ray_origin, f32x3 ray_direction, f32 sphere_radius) -> f32 { + let a = dot(ray_direction, ray_direction); + let b = 2.0 * dot(ray_direction, ray_origin); + let c = dot(ray_origin, ray_origin) - (sphere_radius * sphere_radius); + let delta = b * b - 4.0 * a * c; + if (delta < 0.0 || a == 0.0) { + return -1.0; } - const let sol0 = sol.value.x; - const let sol1 = sol.value.y; - + let sol0 = (-b - safe_sqrt(delta)) / (2.0 * a); + let sol1 = (-b + safe_sqrt(delta)) / (2.0 * a); if (sol0 < 0.0 && sol1 < 0.0) { - return none; + return -1.0; } if (sol0 < 0.0) { diff --git a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh index 20a31f61..0d83bb0b 100644 --- a/Lorr/Engine/Scene/ECSModule/CoreComponents.hh +++ b/Lorr/Engine/Scene/ECSModule/CoreComponents.hh @@ -56,6 +56,7 @@ ECS_COMPONENT_BEGIN(Environment) ECS_COMPONENT_MEMBER(atmos_ozone_absorption, glm::vec3, { 0.650f, 1.881f, 0.085f }) ECS_COMPONENT_MEMBER(atmos_ozone_height, f32, 25.0f) ECS_COMPONENT_MEMBER(atmos_ozone_thickness, f32, 15.0f) + ECS_COMPONENT_MEMBER(atmos_terrain_albedo, glm::vec3, { 0.3f, 0.3f, 0.3f }) ECS_COMPONENT_MEMBER(atmos_aerial_perspective_start_km, f32, 8.0f) ECS_COMPONENT_MEMBER(eye_adaptation, bool, true) ECS_COMPONENT_MEMBER(eye_min_exposure, f32, -6.0f) diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 3964cd20..8dfd20c4 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -600,9 +600,9 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< projection_mat[1][1] *= -1; auto direction = glm::vec3( - glm::cos(glm::radians(t.rotation.x)) * glm::cos(glm::radians(t.rotation.y)), - glm::sin(glm::radians(t.rotation.y)), - glm::sin(glm::radians(t.rotation.x)) * glm::cos(glm::radians(t.rotation.y)) + glm::cos(glm::radians(t.rotation.x)) * glm::sin(glm::radians(t.rotation.y)), + glm::sin(glm::radians(t.rotation.x)) * glm::sin(glm::radians(t.rotation.y)), + glm::cos(glm::radians(t.rotation.y)) ); direction = glm::normalize(direction); auto view_mat = glm::lookAt(t.position, t.position + direction, glm::vec3(0.0f, 1.0f, 0.0f)); @@ -644,6 +644,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< environment.atmos_ozone_absorption = environment_comp.atmos_ozone_absorption * 1e-3f; environment.atmos_ozone_height = environment_comp.atmos_ozone_height; environment.atmos_ozone_thickness = environment_comp.atmos_ozone_thickness; + environment.atmos_terrain_albedo = environment_comp.atmos_terrain_albedo; environment.atmos_aerial_perspective_start_km = environment_comp.atmos_aerial_perspective_start_km; environment.eye_min_exposure = environment_comp.eye_min_exposure; environment.eye_max_exposure = environment_comp.eye_max_exposure; @@ -664,6 +665,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< regenerate_sky |= self.last_environment.atmos_terrain_albedo != environment.atmos_terrain_albedo; regenerate_sky |= self.last_environment.atmos_atmos_radius != environment.atmos_atmos_radius; regenerate_sky |= self.last_environment.atmos_planet_radius != environment.atmos_planet_radius; + regenerate_sky |= self.last_environment.atmos_terrain_albedo != environment.atmos_terrain_albedo; self.last_environment = environment; auto vbgtao = ls::option(); diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 74b98012..9052ae80 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -81,7 +81,7 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { .format = vuk::Format::eR16G16B16A16Sfloat, .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, .type = vuk::ImageType::e2D, - .extent = { .width = 32, .height = 32, .depth = 1 }, + .extent = { .width = 64, .height = 64, .depth = 1 }, .name = "Sky Multiscatter LUT", }; std::tie(self.sky_multiscatter_lut, self.sky_multiscatter_lut_view) = Image::create_with_view(device, sky_multiscatter_lut_info).value(); @@ -500,7 +500,7 @@ auto SceneRenderer::prepare_frame(this SceneRenderer &self, FramePrepareInfo &in .bind_image(0, 1, sky_transmittance_lut) .bind_buffer(0, 2, environment) .bind_image(0, 3, sky_multiscatter_lut) - .dispatch_invocations_per_pixel(sky_multiscatter_lut); + .dispatch(sky_multiscatter_lut->extent.width, sky_multiscatter_lut->extent.height); return std::make_tuple(sky_transmittance_lut, sky_multiscatter_lut, environment); } diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 78d563eb..643ec6db 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -66,7 +66,7 @@ struct SceneRenderer { ImageView sky_transmittance_lut_view = {}; Image sky_multiscatter_lut = {}; ImageView sky_multiscatter_lut_view = {}; - vuk::Extent3D sky_view_lut_extent = { .width = 312, .height = 192, .depth = 1 }; + vuk::Extent3D sky_view_lut_extent = { .width = 192, .height = 108, .depth = 1 }; vuk::Extent3D sky_aerial_perspective_lut_extent = { .width = 32, .height = 32, .depth = 32 }; Image hiz = {}; diff --git a/xmake/packages.lua b/xmake/packages.lua index 9aba045a..f8bf45d0 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -55,7 +55,7 @@ add_requires("flecs v4.0.4") add_requires("libsdl3") -add_requires("shader-slang v2025.15") +add_requires("shader-slang v2025.16") add_requires("vuk 2025.09.01", { configs = { debug_allocations = false, disable_exceptions = false, diff --git a/xmake/repo/packages/s/shader-slang/xmake.lua b/xmake/repo/packages/s/shader-slang/xmake.lua index c35d2391..e07e6db8 100644 --- a/xmake/repo/packages/s/shader-slang/xmake.lua +++ b/xmake/repo/packages/s/shader-slang/xmake.lua @@ -10,6 +10,7 @@ package("shader-slang") add_versions("v2025.10.4", "f4199d9cb32f93410444713adfe880da2b665a9e13f2f8e23fdbff06068a9ff3") add_versions("v2025.12.1", "02018cc923a46c434e23b166ef13c14165b0a0c4b863279731c4f6c4898fbf8e") add_versions("v2025.15", "f37e7215e51bee4e8f5ec7b84a5d783deb6cbd0bd033c026b94f2d5a31e88d28") + add_versions("v2025.16", "5d6f01208e502d8365d905ba0f4102c9af476d36f33d834107e89ecf0463bc61") elseif is_host("linux") then add_urls("https://github.com/shader-slang/slang/releases/download/v$(version)/slang-$(version)-linux-x86_64.tar.gz", {version = function (version) return version:gsub("v", "") end}) @@ -17,6 +18,7 @@ package("shader-slang") add_versions("v2025.10.4", "c2edcfdada38feb345725613c516a842700437f6fa55910b567b9058c415ce8f") add_versions("v2025.12.1", "8f34b98391562ce6f97d899e934645e2c4466a02e66b69f69651ff1468553b27") add_versions("v2025.15", "1eaa24f1f0483f8b8cc4b95153c815394d2f6cae08dbaf8b18d6b7975b8bbe03") + add_versions("v2025.16", "2db64f788eadd2742280752334439c7f540581dfa59d23c1a56e06556e5b8405") end on_install("windows|x64", "linux|x86_64", function (package) From a46316003915873e22445308469b8aa0b8cb68db Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Wed, 10 Sep 2025 16:06:00 +0300 Subject: [PATCH 09/16] improve sky multiscattering LUT --- .../shaders/passes/sky_multiscattering.slang | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang b/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang index 0ec9d2ce..8aaa3f19 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_multiscattering.slang @@ -26,6 +26,7 @@ func cs_main( u32 group_index : SV_GroupIndex, uniform ParameterBlock params ) -> void { + let i = f32(thread_id.z); var uv = f32x2(f32x2(thread_id.xy) + 0.5) / f32x2(params.environment.multiscattering_lut_size.xy); uv = from_sub_uvs_to_unit(uv, f32x2(params.environment.multiscattering_lut_size.xy)); @@ -33,14 +34,14 @@ func cs_main( let altitude = params.environment.atmos_planet_radius + uv.y * atmosphere_thickness + PLANET_RADIUS_OFFSET; let sun_zenith_angle = uv.x * 2.0 - 1.0; - let sun_dir = f32x3(0.0, sun_zenith_angle, std::safe_sqrt(1.0 - saturate(sun_zenith_angle * sun_zenith_angle))); + let sun_dir = f32x3(0.0, sun_zenith_angle, std::safe_sqrt(saturate(1.0 - sun_zenith_angle * sun_zenith_angle))); let eye_pos = f32x3(0.0, altitude, 0.0); - let theta = acos(1.0f - 2.0f * (thread_id.z + 0.5f) / f32(SAMPLE_COUNT)); - let phi = (2 * PI * thread_id.z) / GOLDEN_RATIO; + let phi = acos(1.0 - 2.0 * (i + 0.5) / f32(SAMPLE_COUNT)); + let theta = TAU * i / GOLDEN_RATIO; let eye_dir = f32x3( - cos(phi) * sin(theta), - cos(theta), + sin(phi) * cos(theta), + cos(phi), sin(phi) * sin(theta), ); @@ -48,7 +49,7 @@ func cs_main( info.eye_pos = eye_pos; info.eye_dir = eye_dir; info.sun_dir = sun_dir; - info.eval_planet_luminance = false; + info.eval_planet_luminance = true; info.eval_multiscattering = false; info.step_count = 32.0; var result = integrate_single_scattered_luminance(info, params.environment, params.sampler, params.sky_transmittance_lut, params.sky_transmittance_lut); From 653893c4efdc2c81a3a07c807ef3bb31077e68c7 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Thu, 11 Sep 2025 17:28:40 +0300 Subject: [PATCH 10/16] bump slang --- .../shaders/passes/visbuffer_decode.slang | 6 ++-- Lorr/Engine/Resources/shaders/scene.slang | 26 +++++++++++++++-- Lorr/Engine/Scene/GPUScene.hh | 22 ++++++++++++++- Lorr/Engine/Scene/SceneRenderer.cc | 28 +++++++++++-------- xmake/packages.lua | 2 +- xmake/repo/packages/s/shader-slang/xmake.lua | 2 ++ 6 files changed, 67 insertions(+), 19 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang index ab57cfd9..f2de0234 100644 --- a/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang +++ b/Lorr/Engine/Resources/shaders/passes/visbuffer_decode.slang @@ -207,7 +207,7 @@ func fs_main( Nt = Nt * 2.0 - 1.0; } - if (true || material.flags & MaterialFlag::NormalFlipY) { + if (material.flags & MaterialFlag::NormalFlipY) { Nt.y = -Nt.y; } @@ -225,11 +225,11 @@ func fs_main( output.emission_color = material.sample_emissive_color(uv, uv_ddx, uv_ddy); // METALLIC ROUGHNESS ─────────────────────────────────────────────── - const f32x2 metallic_roughness_color = material.sample_metallic_roughness(uv, uv_ddx, uv_ddy); + let metallic_roughness_color = material.sample_metallic_roughness(uv, uv_ddx, uv_ddy); output.metallic_roughness_occlusion_color.xy = metallic_roughness_color; // AMBIENT OCCLUSION ──────────────────────────────────────────────── - const f32 occlusion_color = material.sample_occlusion_color(uv, uv_ddx, uv_ddy); + let occlusion_color = material.sample_occlusion_color(uv, uv_ddx, uv_ddy); output.metallic_roughness_occlusion_color.z = occlusion_color; return output; diff --git a/Lorr/Engine/Resources/shaders/scene.slang b/Lorr/Engine/Resources/shaders/scene.slang index fc151dd1..894f57f1 100644 --- a/Lorr/Engine/Resources/shaders/scene.slang +++ b/Lorr/Engine/Resources/shaders/scene.slang @@ -305,9 +305,29 @@ public struct Mesh { public Bounds bounds = {}; }; -public struct Light { - public f32x3 position = {}; - public f32x3 color = {}; +#ifndef MAX_DIRECTIONAL_LIGHT_CASCADES +#define MAX_DIRECTIONAL_LIGHT_CASCADES 6 +#endif + +public struct DirectionalLight { + public struct Cascade { + public f32x4 projection; + }; + + public Cascade cascades[MAX_DIRECTIONAL_LIGHT_CASCADES]; + public u32 cascade_count; + public f32x4 color; + public f32x3 direction; +}; + +#ifndef MAX_DIRECTIONAL_LIGHTS +#define MAX_DIRECTIONAL_LIGHTS 1 +#endif + +public struct Lights { + public u32 directional_light_count; + public u32 _unused; + public DirectionalLight directional_lights[MAX_DIRECTIONAL_LIGHTS]; }; public struct VBGTAO { diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index b8c36a96..0f784ebb 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -205,7 +205,7 @@ struct MeshLOD { }; struct Mesh { - constexpr static auto MAX_LODS = 1_sz; + constexpr static auto MAX_LODS = 8_sz; alignas(8) u64 vertex_positions = 0; alignas(8) u64 vertex_normals = 0; @@ -225,6 +225,26 @@ struct HistogramLuminance { alignas(4) f32 exposure = 0.0f; }; +struct DirectionalLight { + constexpr static auto MAX_DIRECTIONAL_LIGHT_CASCADES = 6_sz; + struct Cascade { + alignas(4) glm::vec4 projection = {}; + }; + + alignas(4) Cascade cascades[MAX_DIRECTIONAL_LIGHT_CASCADES] = {}; + alignas(4) u32 cascade_count = 0; + alignas(4) glm::vec4 color = {}; + alignas(4) glm::vec3 direction = {}; +}; + +struct Lights { + constexpr static auto MAX_DIRECTIONAL_LIGHTS = 1_u32; + + alignas(4) u32 directional_light_count = 0; + alignas(4) u32 _unused = 0; // for future light types + alignas(8) DirectionalLight directional_lights[MAX_DIRECTIONAL_LIGHTS] = {}; +}; + struct VBGTAO { alignas(4) f32 thickness = 0.25f; alignas(4) f32 depth_range_scale_factor = 0.75f; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 9052ae80..aa1fb1fd 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -46,6 +46,8 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { { "CULLING_MESHLET_COUNT", std::to_string(Model::MAX_MESHLET_INDICES) }, { "CULLING_TRIANGLE_COUNT", std::to_string(Model::MAX_MESHLET_PRIMITIVES) }, { "MESH_MAX_LODS", std::to_string(GPU::Mesh::MAX_LODS) }, + { "MAX_DIRECTIONAL_LIGHT_CASCADES", std::to_string(GPU::DirectionalLight::MAX_DIRECTIONAL_LIGHT_CASCADES) }, + { "MAX_DIRECTIONAL_LIGHTS", std::to_string(GPU::Lights::MAX_DIRECTIONAL_LIGHTS) }, { "HISTOGRAM_THREADS_X", std::to_string(GPU::HISTOGRAM_THREADS_X) }, { "HISTOGRAM_THREADS_Y", std::to_string(GPU::HISTOGRAM_THREADS_Y) }, }, @@ -1619,13 +1621,15 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valuedenoise_power](vuk::CommandBuffer &command_buffer, // - VUK_IA(vuk::eComputeSampled) noisy_occlusion, - VUK_IA(vuk::eComputeSampled) depth_differences, - VUK_IA(vuk::eComputeRW) ambient_occlusion) { + [power = frame.vbgtao->denoise_power]( + vuk::CommandBuffer &command_buffer, // + VUK_IA(vuk::eComputeSampled) noisy_occlusion, + VUK_IA(vuk::eComputeSampled) depth_differences, + VUK_IA(vuk::eComputeRW) ambient_occlusion + ) { auto nearest_clamp_sampler = vuk::SamplerCreateInfo{ .magFilter = vuk::Filter::eNearest, .minFilter = vuk::Filter::eNearest, diff --git a/xmake/packages.lua b/xmake/packages.lua index f8bf45d0..7df698da 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -55,7 +55,7 @@ add_requires("flecs v4.0.4") add_requires("libsdl3") -add_requires("shader-slang v2025.16") +add_requires("shader-slang v2025.16.1") add_requires("vuk 2025.09.01", { configs = { debug_allocations = false, disable_exceptions = false, diff --git a/xmake/repo/packages/s/shader-slang/xmake.lua b/xmake/repo/packages/s/shader-slang/xmake.lua index e07e6db8..abe6a89d 100644 --- a/xmake/repo/packages/s/shader-slang/xmake.lua +++ b/xmake/repo/packages/s/shader-slang/xmake.lua @@ -11,6 +11,7 @@ package("shader-slang") add_versions("v2025.12.1", "02018cc923a46c434e23b166ef13c14165b0a0c4b863279731c4f6c4898fbf8e") add_versions("v2025.15", "f37e7215e51bee4e8f5ec7b84a5d783deb6cbd0bd033c026b94f2d5a31e88d28") add_versions("v2025.16", "5d6f01208e502d8365d905ba0f4102c9af476d36f33d834107e89ecf0463bc61") + add_versions("v2025.16.1", "26a5acb8f03f0a664d04842df15567de9db6d46db17621efb94469a70d6dce70") elseif is_host("linux") then add_urls("https://github.com/shader-slang/slang/releases/download/v$(version)/slang-$(version)-linux-x86_64.tar.gz", {version = function (version) return version:gsub("v", "") end}) @@ -19,6 +20,7 @@ package("shader-slang") add_versions("v2025.12.1", "8f34b98391562ce6f97d899e934645e2c4466a02e66b69f69651ff1468553b27") add_versions("v2025.15", "1eaa24f1f0483f8b8cc4b95153c815394d2f6cae08dbaf8b18d6b7975b8bbe03") add_versions("v2025.16", "2db64f788eadd2742280752334439c7f540581dfa59d23c1a56e06556e5b8405") + add_versions("v2025.16.1", "059d5e5ccafd1107ac5965b95706426c68afccfe7f720f1359ee877b41b31a2a") end on_install("windows|x64", "linux|x86_64", function (package) From 68cdef036864c98e49b2038923f006985a0d18ae Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 12 Sep 2025 17:52:07 +0300 Subject: [PATCH 11/16] add sky cubemap --- .../passes/{brdf.slang => pbr_apply.slang} | 63 ++++---- .../shaders/passes/sky_cubemap.slang | 128 +++++++++++++++++ .../Resources/shaders/passes/sky_final.slang | 18 +-- Lorr/Engine/Resources/shaders/pbr.slang | 4 +- Lorr/Engine/Resources/shaders/sky.slang | 29 ++++ Lorr/Engine/Resources/shaders/std/wave.slang | 8 ++ Lorr/Engine/Scene/SceneRenderer.cc | 136 +++++++++++++----- Lorr/Engine/Scene/SceneRenderer.hh | 2 + shell.nix | 9 +- xmake/packages.lua | 44 +++--- 10 files changed, 340 insertions(+), 101 deletions(-) rename Lorr/Engine/Resources/shaders/passes/{brdf.slang => pbr_apply.slang} (67%) create mode 100644 Lorr/Engine/Resources/shaders/passes/sky_cubemap.slang diff --git a/Lorr/Engine/Resources/shaders/passes/brdf.slang b/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang similarity index 67% rename from Lorr/Engine/Resources/shaders/passes/brdf.slang rename to Lorr/Engine/Resources/shaders/passes/pbr_apply.slang index 8d335366..51838c44 100644 --- a/Lorr/Engine/Resources/shaders/passes/brdf.slang +++ b/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang @@ -1,5 +1,3 @@ -module brdf; - import gpu; import std; import pbr; @@ -12,7 +10,7 @@ struct ShaderParameters { Sampler linear_clamp_sampler; Sampler linear_repeat_sampler; Image2D sky_transmittance_lut; - Image2D sky_multiscattering_lut; + TextureCube sky_cubemap; Image2D depth_image; Image2D ambient_occlusion; Image2D albedo_image; @@ -42,9 +40,9 @@ func fs_main(VertexOutput input) -> f32x4 { let emission = params.emissive_image.Load(pixel_pos); let metallic_roughness_occlusion = params.metallic_roughness_occlusion_image.Load(pixel_pos); - let metallic = metallic_roughness_occlusion.x; - let roughness = metallic_roughness_occlusion.y; - let baked_occlusion = metallic_roughness_occlusion.z; + let metallic = metallic_roughness_occlusion.r; + let roughness = metallic_roughness_occlusion.g; + let baked_occlusion = metallic_roughness_occlusion.b; let screen_space_occlusion = params.ambient_occlusion.Load(pixel_pos).r; let occlusion = baked_occlusion * screen_space_occlusion; @@ -56,44 +54,47 @@ func fs_main(VertexOutput input) -> f32x4 { let V = normalize(params.camera.position - world_position); let L = normalize(params.environment.sun_direction); // temp let N = mapped_normal; + let R = reflect(-V, N); + + var indirect_illuminance = f32x3(1.0); var sun_illuminance = f32x3(1.0); - var sky_luminance = f32x3(0.1); - if (params.environment.flags & (EnvironmentFlags::HasSun | EnvironmentFlags::HasAtmosphere)) { - // SUN LIGHT COLOR ────────────────────────────────────────────────── + if ((params.environment.flags & EnvironmentFlags::HasSun) != 0u) { var eye_altitude = max(world_position.y, 0.0) * CAMERA_SCALE_UNIT; eye_altitude += params.environment.atmos_planet_radius + PLANET_RADIUS_OFFSET; - var eye_pos = f32x3(0.0, eye_altitude, 0.0); - let up_vec = f32x3(0.0, 1.0, 0.0); - f32 sun_cos_theta = dot(L, up_vec); + f32 sun_cos_theta = dot(L, f32x3(0.0, 1.0, 0.0)); + f32x2 transmittance_uv = transmittance_params_to_lut_uv( params.environment.atmos_atmos_radius, params.environment.atmos_planet_radius, f32x2(eye_altitude, sun_cos_theta)); f32x3 sun_transmittance = params.sky_transmittance_lut.SampleLevel(params.linear_clamp_sampler, transmittance_uv, 0.0).rgb; sun_illuminance = sun_transmittance * params.environment.sun_intensity; - - // SKY AMBIENT COLOR ──────────────────────────────────────────────── - AtmosphereIntegrateInfo sky_info = {}; - sky_info.eye_pos = eye_pos; - sky_info.eye_dir = up_vec; - sky_info.sun_dir = L; - sky_info.sun_intensity = params.environment.sun_intensity; - sky_info.step_count = 1; - sky_info.eval_multiscattering = true; - let sky_result = integrate_single_scattered_luminance( - sky_info, params.environment, params.linear_clamp_sampler, params.sky_transmittance_lut, params.sky_multiscattering_lut); - - var eye_gradient = dot(N, up_vec); - eye_gradient = (eye_gradient + 1.0) * 0.375 + 0.25; - sky_luminance = std::rec709_oetf(sky_result.luminance) * eye_gradient; } - let indirect_illuminance = sky_luminance * albedo_color * occlusion; + if ((params.environment.flags & EnvironmentFlags::HasAtmosphere) != 0u) { + let specular_sample = params.sky_cubemap.SampleLevel(params.linear_clamp_sampler, R, 0.0); + let diffuse_sample = params.sky_cubemap.SampleLevel(params.linear_clamp_sampler, N, 0.0); + + let specular_sky = specular_sample.rgb * specular_sample.a; + let diffuse_sky = diffuse_sample.rgb * diffuse_sample.a; + + let NoV = abs(dot(N, V)) + 1e-5; + let reflectance = 0.04; + let F0 = 0.16 * reflectance * reflectance * (1.0 - metallic) + albedo_color * metallic; + let F = F_Schlick(NoV, F0); + let kD = (1.0 - metallic) * (1.0 - F); + let diffuse = kD * diffuse_sky * albedo_color * Fd_Lambert(); + + let specular_strength = (1.0 - roughness) * (1.0 - roughness); + let specular = F * specular_sky * specular_strength; + let sky_illuminance = diffuse + specular; + + indirect_illuminance = sky_illuminance * occlusion; + } // MATERIAL COLOR ─────────────────────────────────────────────────── // https://marmosetco.tumblr.com/post/81245981087 - let R = reflect(-V, N); let horizon_fade = 1.3; var horizon = saturate(1.0 + horizon_fade * dot(R, smooth_normal)); horizon *= horizon; @@ -106,7 +107,9 @@ func fs_main(VertexOutput input) -> f32x4 { } // FINAL ──────────────────────────────────────────────────────────── - let final_color = material_surface_color + indirect_illuminance + emission; + let base_ambient_color = f32x3(0.05); + let indirect_illuminance_sum = indirect_illuminance + (base_ambient_color * albedo_color * occlusion); + let final_color = material_surface_color + indirect_illuminance_sum + emission; return f32x4(final_color, 1.0); } diff --git a/Lorr/Engine/Resources/shaders/passes/sky_cubemap.slang b/Lorr/Engine/Resources/shaders/passes/sky_cubemap.slang new file mode 100644 index 00000000..2d7565d2 --- /dev/null +++ b/Lorr/Engine/Resources/shaders/passes/sky_cubemap.slang @@ -0,0 +1,128 @@ +// Credits: +// - https://github.com/Sunset-Flock/Timberdoodle/blob/a44dd4dd6b7f75e37be29a1178088c6b1b41b3dd/src/rendering/tasks/sky.glsl + +import std; +import gpu; +import scene; +import sky; + +typealias RWTextureCube = _Texture; + +struct ShaderParameters { + Sampler sampler; + Image2D sky_view_lut; + ConstantBuffer environment; + ConstantBuffer camera; + RWTextureCube ibl_cube; +}; + +float radical_inverse_vdc(uint bits) { + bits = (bits << 16u) | (bits >> 16u); + bits = ((bits & 0x55555555u) << 1u) | ((bits & 0xAAAAAAAAu) >> 1u); + bits = ((bits & 0x33333333u) << 2u) | ((bits & 0xCCCCCCCCu) >> 2u); + bits = ((bits & 0x0F0F0F0Fu) << 4u) | ((bits & 0xF0F0F0F0u) >> 4u); + bits = ((bits & 0x00FF00FFu) << 8u) | ((bits & 0xFF00FF00u) >> 8u); + return float(bits) * 2.3283064365386963e-10; // / 0x100000000 +} + +f32x2 hammersley(uint i, uint n) { + return f32x2(float(i + 1) / n, radical_inverse_vdc(i + 1)); +} + +mat3 CUBE_MAP_FACE_ROTATION(uint face) { + switch (face) { + case 0: return mat3(+0, +0, -1, +0, -1, +0, -1, +0, +0); + case 1: return mat3(+0, +0, +1, +0, -1, +0, +1, +0, +0); + case 2: return mat3(+1, +0, +0, +0, +0, -1, +0, +1, +0); + case 3: return mat3(+1, +0, +0, +0, +0, +1, +0, -1, +0); + case 4: return mat3(+1, +0, +0, +0, -1, +0, +0, +0, -1); + default: return mat3(-1, +0, +0, +0, -1, +0, +0, +0, +1); + } +} + +static uint _rand_state; +void rand_seed(uint seed) { + _rand_state = seed; +} + +float rand() { + // https://www.pcg-random.org/ + _rand_state = _rand_state * 747796405u + 2891336453u; + uint result = ((_rand_state >> ((_rand_state >> 28u) + 4u)) ^ _rand_state) * 277803737u; + result = (result >> 22u) ^ result; + return result / 4294967295.0; +} + +float rand_normal_dist() { + float theta = 2.0 * PI * rand(); + float rho = sqrt(-2.0 * log(rand())); + return rho * cos(theta); +} + +f32x3 rand_dir() { + return normalize(f32x3( + rand_normal_dist(), + rand_normal_dist(), + rand_normal_dist())); +} + +f32x3 rand_hemi_dir(f32x3 nrm) { + f32x3 result = rand_dir(); + return result * sign(dot(nrm, result)); +} + +#define IBL_CUBE_RES 32 +#define IBL_CUBE_X 2 +#define IBL_CUBE_Y 2 + +[[shader("compute")]] +[[numthreads(IBL_CUBE_X, IBL_CUBE_Y, IBL_CUBE_RES)]] +func cs_main( + u32x3 thread_id : SV_DispatchThreadID, + u32x3 group_id : SV_GroupID, + uniform ParameterBlock params, + uniform u32 frame_index +) -> void { + let wg_base_pix_pos = group_id.xy * u32x2(IBL_CUBE_X, IBL_CUBE_Y); + let sg_index = std::subgroup_id(); + let sg_pix_pos = wg_base_pix_pos + u32x2(sg_index % IBL_CUBE_X, sg_index / IBL_CUBE_X); + let sg_thread_id = WaveGetLaneIndex(); + let face = group_id.z; + let uv = (f32x2(sg_pix_pos) + 0.5) / IBL_CUBE_RES; + let output_dir = normalize(mul(CUBE_MAP_FACE_ROTATION(face), f32x3(uv * 2.0 - 1.0, -1.0))); + + let up = f32x3(0.0, 1.0, 0.0); + var eye_altitude = params.camera.position.y * CAMERA_SCALE_UNIT; + eye_altitude += params.environment.atmos_planet_radius + PLANET_RADIUS_OFFSET; + let eye_pos = f32x3(0.0, eye_altitude, 0.0); + + // these values are hardcoded + let sample_count = 128; + let subgroup_size = 32; + let iter_count = sample_count / subgroup_size; + let global_thread_index = (thread_id.x * IBL_CUBE_RES * IBL_CUBE_RES + thread_id.y * IBL_CUBE_RES + thread_id.z); + let seed = global_thread_index + frame_index * IBL_CUBE_RES * IBL_CUBE_RES * 6; + var accumulated_result = f32x3(0.0); + for (uint i = 0; i < iter_count; ++i) { + rand_seed((i * subgroup_size + sg_thread_id + seed * sample_count)); + let input_dir = rand_hemi_dir(output_dir); + let result = get_atmosphere_illuminance_along_ray( + eye_pos, input_dir, params.environment.sun_direction, params.environment, params.sampler, params.sky_view_lut); + + let cos_weighed_result = result * dot(output_dir, input_dir); + accumulated_result += std::subgroup_inclusive_add(cos_weighed_result); + } + + if (sg_thread_id == 31) { + let this_frame_luminance = accumulated_result / sample_count; + let compressed_accumulated_luminance = params.ibl_cube.Load(i32x3(sg_pix_pos, group_id.z)); + // Could be nan for some reason + let unsafe_accumulated_luminance = compressed_accumulated_luminance.rgb * compressed_accumulated_luminance.a; + let accumulated_luminance = isnan(unsafe_accumulated_luminance.x) ? 0.0 : unsafe_accumulated_luminance; + + let luminance = 0.995 * accumulated_luminance + 0.005 * this_frame_luminance; + let inv_luminance = 1.0 / max(luminance, 1.0 / 1048576.0); + let inv_mult = min(1048576.0, max(inv_luminance.x, max(inv_luminance.y, inv_luminance.z))); + params.ibl_cube.Store(i32x3(sg_pix_pos, group_id.z), f32x4(luminance * inv_mult, 1.0 / inv_mult)); + } +} \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/passes/sky_final.slang b/Lorr/Engine/Resources/shaders/passes/sky_final.slang index 6ab0a460..d41f853a 100644 --- a/Lorr/Engine/Resources/shaders/passes/sky_final.slang +++ b/Lorr/Engine/Resources/shaders/passes/sky_final.slang @@ -74,24 +74,8 @@ func fs_main( var sun_dir = (params.environment.sun_direction); var eye_dir = normalize(world_pos - params.camera.position); - let view_zenith_cos_angle = acos(dot(eye_dir, up)); - let light_view_cos_angle = acos(clamp(dot( - normalize(f32x3(sun_dir.x, 0.0, sun_dir.z)), - normalize(f32x3(eye_dir.x, 0.0, eye_dir.z)), - ), -1.0, 1.0)); let planet_intersection = std::ray_sphere_intersect_nearest(eye_pos, eye_dir, params.environment.atmos_planet_radius); - let uv = sky_view_params_to_lut_uv( - params.environment.atmos_atmos_radius, - params.environment.atmos_planet_radius, - params.environment.sky_view_lut_size.xy, - planet_intersection != -1.0, - eye_altitude, - view_zenith_cos_angle, - light_view_cos_angle); - - let result = params.sky_view_lut.SampleLevel(params.sampler, uv, 0.0); - let atmos_luminance = result.rgb * result.a; - var color = atmos_luminance * params.environment.sun_intensity; + var color = get_atmosphere_illuminance_along_ray(eye_pos, eye_dir, sun_dir, params.environment, params.sampler, params.sky_view_lut); let sun_cos_theta = dot(sun_dir, up); let transmittance_uv = transmittance_params_to_lut_uv( diff --git a/Lorr/Engine/Resources/shaders/pbr.slang b/Lorr/Engine/Resources/shaders/pbr.slang index 986ad988..609a3060 100644 --- a/Lorr/Engine/Resources/shaders/pbr.slang +++ b/Lorr/Engine/Resources/shaders/pbr.slang @@ -13,8 +13,8 @@ public f32 Smith_G2_Height_Correlated_GGX_Lagarde(f32 NoV, f32 NoL, f32 roughnes return saturate(0.5f / (GGXV + GGXL)); } -public f32x3 F_Schlick(f32 LoH, f32x3 specular_albedo) { - return specular_albedo + (1.0 - specular_albedo) * pow(1.0 - LoH, 5.0); +public f32x3 F_Schlick(f32 u, f32x3 F0) { + return F0 + (1.0 - F0) * pow(1.0 - u, 5.0); } public constexpr f32 Fd_Lambert() { diff --git a/Lorr/Engine/Resources/shaders/sky.slang b/Lorr/Engine/Resources/shaders/sky.slang index 49921d48..9505c1a8 100644 --- a/Lorr/Engine/Resources/shaders/sky.slang +++ b/Lorr/Engine/Resources/shaders/sky.slang @@ -282,3 +282,32 @@ public func sample_aerial_perspective( return aerial_perspective; } + +public func get_atmosphere_illuminance_along_ray( + f32x3 eye_pos, + f32x3 eye_dir, + f32x3 sun_dir, + in Environment environment, + in Sampler sampler, + in Image2D sky_view_image +) -> f32x3 { + let height = length(eye_pos); + let view_zenith_cos_angle = acos(dot(eye_dir, f32x3(0.0, 1.0, 0.0))); + let light_view_cos_angle = acos(clamp(dot( + normalize(f32x3(sun_dir.x, 0.0, sun_dir.z)), + normalize(f32x3(eye_dir.x, 0.0, eye_dir.z)), + ), -1.0, 1.0)); + let planet_intersection = std::ray_sphere_intersect_nearest(eye_pos, eye_dir, environment.atmos_planet_radius); + let uv = sky_view_params_to_lut_uv( + environment.atmos_atmos_radius, + environment.atmos_planet_radius, + environment.sky_view_lut_size.xy, + planet_intersection != -1.0, + height, + view_zenith_cos_angle, + light_view_cos_angle); + + let result = sky_view_image.SampleLevel(sampler, uv, 0.0); + let atmos_luminance = result.rgb * result.a; + return atmos_luminance * environment.sun_intensity; +} \ No newline at end of file diff --git a/Lorr/Engine/Resources/shaders/std/wave.slang b/Lorr/Engine/Resources/shaders/std/wave.slang index 53062542..1e23efab 100644 --- a/Lorr/Engine/Resources/shaders/std/wave.slang +++ b/Lorr/Engine/Resources/shaders/std/wave.slang @@ -7,6 +7,14 @@ public func subgroup_id() -> u32 { }; } +__generic +public func subgroup_inclusive_add(vector expr) -> vector { + return spirv_asm { + OpCapability GroupNonUniformArithmetic; + OpGroupNonUniformFAdd $$vector result Subgroup InclusiveScan $expr + }; +} + public func wave_shuffle_xor(T value, u32 mask) -> T { return spirv_asm { OpCapability GroupNonUniformShuffle; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index aa1fb1fd..d10aa8b3 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -111,6 +111,12 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { }; Pipeline::create(device, default_slang_session, sky_final_pipeline_info).value(); + auto sky_cubemap_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.sky_cubemap", + .entry_points = { "cs_main" }, + }; + Pipeline::create(device, default_slang_session, sky_cubemap_pipeline_info).value(); + // ── VISBUFFER ─────────────────────────────────────────────────────── auto generate_cull_commands_pipeline_info = PipelineCompileInfo{ .module_name = "passes.generate_cull_commands", @@ -155,11 +161,11 @@ auto SceneRenderer::init(this SceneRenderer &self) -> bool { Pipeline::create(device, default_slang_session, vis_decode_pipeline_info, bindless_descriptor_set).value(); // ── PBR ───────────────────────────────────────────────────────────── - auto pbr_basic_pipeline_info = PipelineCompileInfo{ - .module_name = "passes.brdf", + auto pbr_apply_pipeline_info = PipelineCompileInfo{ + .module_name = "passes.pbr_apply", .entry_points = { "vs_main", "fs_main" }, }; - Pipeline::create(device, default_slang_session, pbr_basic_pipeline_info).value(); + Pipeline::create(device, default_slang_session, pbr_apply_pipeline_info).value(); // ── POST PROCESS ──────────────────────────────────────────────────── auto histogram_generate_pipeline_info = PipelineCompileInfo{ @@ -967,13 +973,11 @@ static auto draw_hiz(vuk::Value &hiz_attachment, vuk::Valu static auto draw_sky( SceneRenderer &self, - vuk::Value &dst_attachment, - vuk::Value &depth_attachment, vuk::Value &sky_transmittance_lut_attachment, vuk::Value &sky_multiscatter_lut_attachment, vuk::Value &environment_buffer, vuk::Value &camera_buffer -) -> void { +) -> std::tuple, vuk::Value> { ZoneScoped; auto sky_view_lut_attachment = vuk::declare_ia( @@ -988,17 +992,18 @@ static auto draw_sky( .layer_count = 1 } ); - auto sky_aerial_perspective_attachment = vuk::declare_ia( - "sky aerial perspective", - { .image_type = vuk::ImageType::e3D, + auto sky_cubemap_attachment = vuk::declare_ia( + "sky cubemap", + { .image_flags = vuk::ImageCreateFlagBits::eCubeCompatible, + .image_type = vuk::ImageType::e2D, .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, - .extent = self.sky_aerial_perspective_lut_extent, + .extent = self.sky_cubemap_extent, + .format = vuk::Format::eR16G16B16A16Sfloat, .sample_count = vuk::Samples::e1, - .view_type = vuk::ImageViewType::e3D, + .view_type = vuk::ImageViewType::eCube, .level_count = 1, - .layer_count = 1 } + .layer_count = 6 } ); - sky_aerial_perspective_attachment.same_format_as(sky_view_lut_attachment); // ── SKY VIEW LUT ──────────────────────────────────────────────────── auto sky_view_pass = vuk::make_pass( @@ -1029,6 +1034,7 @@ static auto draw_sky( return std::make_tuple(sky_transmittance_lut, sky_multiscatter_lut, environment, camera, sky_view_lut); } ); + std::tie(sky_transmittance_lut_attachment, sky_multiscatter_lut_attachment, environment_buffer, camera_buffer, sky_view_lut_attachment) = sky_view_pass( std::move(sky_transmittance_lut_attachment), @@ -1038,6 +1044,69 @@ static auto draw_sky( std::move(sky_view_lut_attachment) ); + auto sky_cubemap_pass = vuk::make_pass( + "sky cubemap", + [frame_index = self.frame_index](vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eComputeSampled) sky_view_lut, + VUK_BA(vuk::eComputeRead) environment, + VUK_BA(vuk::eComputeRead) camera, + VUK_IA(vuk::eComputeRW) sky_cubemap) { + auto linear_clamp_sampler = vuk::SamplerCreateInfo{ + .magFilter = vuk::Filter::eLinear, + .minFilter = vuk::Filter::eLinear, + .addressModeU = vuk::SamplerAddressMode::eClampToEdge, + .addressModeV = vuk::SamplerAddressMode::eClampToEdge, + .addressModeW = vuk::SamplerAddressMode::eClampToEdge, + }; + + cmd_list // + .bind_compute_pipeline("passes.sky_cubemap") + .bind_sampler(0, 0, linear_clamp_sampler) + .bind_image(0, 1, sky_view_lut) + .bind_buffer(0, 2, environment) + .bind_buffer(0, 3, camera) + .bind_image(0, 4, sky_cubemap) + .push_constants(vuk::ShaderStageFlagBits::eCompute, 0, frame_index) + .dispatch((sky_cubemap->extent.width + 2 - 1) / 2, (sky_cubemap->extent.height + 2 - 1) / 2, 6); + + return std::make_tuple(sky_view_lut, environment, camera, sky_cubemap); + } + ); + + std::tie(sky_view_lut_attachment, environment_buffer, camera_buffer, sky_cubemap_attachment) = sky_cubemap_pass( + std::move(sky_view_lut_attachment), + std::move(environment_buffer), + std::move(camera_buffer), + std::move(sky_cubemap_attachment) + ); + + return std::make_tuple(sky_view_lut_attachment, sky_cubemap_attachment); +} + +static auto apply_sky( + SceneRenderer &self, + vuk::Value &dst_attachment, + vuk::Value &depth_attachment, + vuk::Value &sky_view_lut_attachment, + vuk::Value &sky_transmittance_lut_attachment, + vuk::Value &sky_multiscatter_lut_attachment, + vuk::Value &environment_buffer, + vuk::Value &camera_buffer +) -> void { + ZoneScoped; + + auto sky_aerial_perspective_attachment = vuk::declare_ia( + "sky aerial perspective", + { .image_type = vuk::ImageType::e3D, + .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eStorage, + .extent = self.sky_aerial_perspective_lut_extent, + .sample_count = vuk::Samples::e1, + .view_type = vuk::ImageViewType::e3D, + .level_count = 1, + .layer_count = 1 } + ); + sky_aerial_perspective_attachment.same_format_as(sky_view_lut_attachment); + // ── SKY AERIAL PERSPECTIVE ────────────────────────────────────────── auto sky_aerial_perspective_pass = vuk::make_pass( "sky aerial perspective", @@ -1064,6 +1133,7 @@ static auto draw_sky( .bind_buffer(0, 4, camera) .bind_image(0, 5, sky_aerial_perspective_lut) .dispatch_invocations_per_pixel(sky_aerial_perspective_lut); + return std::make_tuple(sky_transmittance_lut, sky_multiscatter_lut, environment, camera, sky_aerial_perspective_lut); } ); @@ -1151,6 +1221,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value(); auto &transfer_man = device.transfer_man(); auto &bindless_descriptor_set = device.get_descriptor_set(); + self.frame_index += 1; // ────────────────────────────────────────────────────────────────────── auto final_attachment = vuk::declare_ia( @@ -1244,9 +1315,17 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value{}; + auto sky_cubemap_attachment = vuk::Value{}; auto sky_transmittance_lut_attachment = std::move(frame.sky_transmittance_lut); auto sky_multiscatter_lut_attachment = std::move(frame.sky_multiscatter_lut); + if (frame.environment_flags & GPU::EnvironmentFlags::HasAtmosphere) { + std::tie(sky_view_lut_attachment, sky_cubemap_attachment) = + draw_sky(self, sky_transmittance_lut_attachment, sky_multiscatter_lut_attachment, environment_buffer, camera_buffer); + } + if (frame.mesh_instance_count) { auto visbuffer_attachment = vuk::declare_ia( "visbuffer", @@ -1729,15 +1808,14 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value bool; auto destroy(this SceneRenderer &) -> void; diff --git a/shell.nix b/shell.nix index 8799823b..abd6cef8 100644 --- a/shell.nix +++ b/shell.nix @@ -27,7 +27,14 @@ pkgs.mkShell.override { stdenv = pkgs.llvmPackages_20.libcxxStdenv; } { pkgs.meshoptimizer # for SDL3 - pkgs.sdl3 + pkgs.xorg.libX11 + pkgs.xorg.libxcb + pkgs.xorg.libXScrnSaver + pkgs.xorg.libXcursor + pkgs.xorg.libXext + pkgs.xorg.libXfixes + pkgs.xorg.libXi + pkgs.xorg.libXrandr ]; shellHook = '' diff --git a/xmake/packages.lua b/xmake/packages.lua index 7df698da..0a743ee8 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -13,20 +13,23 @@ add_requireconfs("fmt", "fmtlog.fmt", { system = false }) -add_requires("xxhash v0.8.3") +add_requires("xxhash v0.8.3", {system = false}) add_requires("glm 1.0.1", { configs = { header_only = true, cxx_standard = "20", -} }) -add_requires("plf_colony v7.41") +}, system = false }) +add_requires("plf_colony v7.41", {system = false}) local imgui_version = "v1.92.0-docking" local imgui_configs = { wchar32 = true } -add_requires("imgui " .. imgui_version, { configs = imgui_configs }) +add_requires("imgui " .. imgui_version, { configs = imgui_configs, system = false }) add_requires("implot 3da8bd34299965d3b0ab124df743fe3e076fa222") add_requireconfs("imgui", "implot.imgui", { - override = true, version = imgui_version, configs = imgui_configs + override = true, + version = imgui_version, + configs = imgui_configs, + system = false, }) add_requires("imguizmo 1.91.3+wip") @@ -34,9 +37,9 @@ add_requireconfs("imgui", "imguizmo.imgui", { override = true, version = imgui_version, configs = imgui_configs }) -add_requires("simdutf v6.2.0") -add_requires("simdjson v3.12.2") -add_requires("unordered_dense v4.5.0") +add_requires("simdutf v6.2.0", {system = false}) +add_requires("simdjson v3.12.2", {system = false}) +add_requires("unordered_dense v4.5.0", {system = false}) add_requires("tracy v0.11.1", { configs = { tracy_enable = false, on_demand = true, @@ -45,23 +48,26 @@ add_requires("tracy v0.11.1", { configs = { code_transfer = false, exit = true, system_tracing = false, + system = false, } }) add_requires("vk-bootstrap v1.4.307", { system = false, debug = is_mode("debug") }) -add_requires("fastgltf v0.8.0") -add_requires("stb 2024.06.01") -add_requires("lz4 v1.10.0") -add_requires("zstd v1.5.6") -add_requires("flecs v4.0.4") +add_requires("fastgltf v0.8.0", {system = false}) +add_requires("stb 2024.06.01", {system = false}) +add_requires("lz4 v1.10.0", {system = false}) +add_requires("zstd v1.5.6", {system = false}) +add_requires("flecs v4.0.4", {system = false}) -add_requires("libsdl3") +add_requires("libsdl3 3.2.16", { configs = { + wayland = false +}, system = false}) -add_requires("shader-slang v2025.16.1") +add_requires("shader-slang v2025.15", {system = false}) add_requires("vuk 2025.09.01", { configs = { debug_allocations = false, disable_exceptions = false, -}, debug = is_mode("debug") }) +}, debug = is_mode("debug"), system = false }) -add_requires("meshoptimizer v0.24") -add_requires("ktx v4.4.0", { debug = is_plat("windows") }) +add_requires("meshoptimizer v0.24", {system = false}) +add_requires("ktx v4.4.0", { debug = is_plat("windows"), system = false }) -add_requires("svector v1.0.3") +add_requires("svector v1.0.3", {system = false}) From 467a1b57a8ae11c90cd6b617e5e0321aa0c8abca Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Fri, 12 Sep 2025 22:19:53 +0300 Subject: [PATCH 12/16] POT HiZ extent --- Lorr/Engine/Resources/shaders/passes/pbr_apply.slang | 12 ++++++------ Lorr/Engine/Resources/shaders/pbr.slang | 2 +- Lorr/Engine/Scene/Scene.cc | 6 +++--- Lorr/Engine/Scene/SceneRenderer.cc | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang b/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang index 51838c44..51d4ad92 100644 --- a/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang +++ b/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang @@ -53,10 +53,10 @@ func fs_main(VertexOutput input) -> f32x4 { // PBR constants let V = normalize(params.camera.position - world_position); let L = normalize(params.environment.sun_direction); // temp - let N = mapped_normal; + let N = normalize(mapped_normal); let R = reflect(-V, N); - var indirect_illuminance = f32x3(1.0); + var indirect_illuminance = f32x3(0.0); var sun_illuminance = f32x3(1.0); if ((params.environment.flags & EnvironmentFlags::HasSun) != 0u) { @@ -75,13 +75,13 @@ func fs_main(VertexOutput input) -> f32x4 { if ((params.environment.flags & EnvironmentFlags::HasAtmosphere) != 0u) { let specular_sample = params.sky_cubemap.SampleLevel(params.linear_clamp_sampler, R, 0.0); let diffuse_sample = params.sky_cubemap.SampleLevel(params.linear_clamp_sampler, N, 0.0); - + let specular_sky = specular_sample.rgb * specular_sample.a; let diffuse_sky = diffuse_sample.rgb * diffuse_sample.a; - + let NoV = abs(dot(N, V)) + 1e-5; let reflectance = 0.04; - let F0 = 0.16 * reflectance * reflectance * (1.0 - metallic) + albedo_color * metallic; + let F0 = lerp(reflectance, albedo_color, metallic); let F = F_Schlick(NoV, F0); let kD = (1.0 - metallic) * (1.0 - F); let diffuse = kD * diffuse_sky * albedo_color * Fd_Lambert(); @@ -100,7 +100,7 @@ func fs_main(VertexOutput input) -> f32x4 { horizon *= horizon; var material_surface_color = f32x3(0.0); - let NoL = max(dot(N, L), 0.0001); + let NoL = max(dot(N, L), 0.0); if (NoL > 0.0) { let brdf = BRDF(V, N, L, albedo_color, roughness, metallic); material_surface_color = brdf * horizon * sun_illuminance * NoL * occlusion; diff --git a/Lorr/Engine/Resources/shaders/pbr.slang b/Lorr/Engine/Resources/shaders/pbr.slang index 609a3060..b4af3491 100644 --- a/Lorr/Engine/Resources/shaders/pbr.slang +++ b/Lorr/Engine/Resources/shaders/pbr.slang @@ -46,7 +46,7 @@ public func BRDF(f32x3 V, f32x3 N, f32x3 L, f32x3 albedo, f32 roughness, f32 met f32 LoH = max(dot(L, H), 0.0); let reflectance = 0.04; - let F0 = 0.16 * reflectance * reflectance * (1.0 - metallic) + albedo * metallic; + let F0 = lerp(reflectance, albedo, metallic); // Microfacet let roughness2 = roughness * roughness; diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 8dfd20c4..7ab460ad 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -600,9 +600,9 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< projection_mat[1][1] *= -1; auto direction = glm::vec3( - glm::cos(glm::radians(t.rotation.x)) * glm::sin(glm::radians(t.rotation.y)), - glm::sin(glm::radians(t.rotation.x)) * glm::sin(glm::radians(t.rotation.y)), - glm::cos(glm::radians(t.rotation.y)) + glm::cos(glm::radians(t.rotation.x)) * glm::cos(glm::radians(t.rotation.y)), + glm::sin(glm::radians(t.rotation.y)), + glm::sin(glm::radians(t.rotation.x)) * glm::cos(glm::radians(t.rotation.y)) ); direction = glm::normalize(direction); auto view_mat = glm::lookAt(t.position, t.position + direction, glm::vec3(0.0f, 1.0f, 0.0f)); diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index d10aa8b3..93d224ec 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -1243,8 +1243,8 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Valueextent.width + 1) >> 1, - .height = (dst_attachment->extent.height + 1) >> 1, + .width = std::bit_ceil((dst_attachment->extent.width + 1) >> 1), + .height = std::bit_ceil((dst_attachment->extent.height + 1) >> 1), .depth = 1, }; From 9a268e7278d897b03503b926bbd813bdd62203d9 Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sat, 13 Sep 2025 11:05:50 +0300 Subject: [PATCH 13/16] intersect planet during pbr lights --- Lorr/Engine/Resources/shaders/passes/pbr_apply.slang | 9 +++++++-- Lorr/Engine/Scene/SceneRenderer.cc | 3 --- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang b/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang index 51d4ad92..2ddb5df7 100644 --- a/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang +++ b/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang @@ -59,9 +59,14 @@ func fs_main(VertexOutput input) -> f32x4 { var indirect_illuminance = f32x3(0.0); var sun_illuminance = f32x3(1.0); + var intersects_planet = false; if ((params.environment.flags & EnvironmentFlags::HasSun) != 0u) { var eye_altitude = max(world_position.y, 0.0) * CAMERA_SCALE_UNIT; eye_altitude += params.environment.atmos_planet_radius + PLANET_RADIUS_OFFSET; + let planet_intersection = std::ray_sphere_intersect_nearest( + f32x3(world_position.x, world_position.y + eye_altitude, world_position.z), L, params.environment.atmos_planet_radius); + intersects_planet = planet_intersection != -1.0; + f32 sun_cos_theta = dot(L, f32x3(0.0, 1.0, 0.0)); f32x2 transmittance_uv = transmittance_params_to_lut_uv( @@ -101,13 +106,13 @@ func fs_main(VertexOutput input) -> f32x4 { var material_surface_color = f32x3(0.0); let NoL = max(dot(N, L), 0.0); - if (NoL > 0.0) { + if (!intersects_planet && NoL > 0.0) { let brdf = BRDF(V, N, L, albedo_color, roughness, metallic); material_surface_color = brdf * horizon * sun_illuminance * NoL * occlusion; } // FINAL ──────────────────────────────────────────────────────────── - let base_ambient_color = f32x3(0.05); + let base_ambient_color = f32x3(0.0); let indirect_illuminance_sum = indirect_illuminance + (base_ambient_color * albedo_color * occlusion); let final_color = material_surface_color + indirect_illuminance_sum + emission; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 93d224ec..37df0b5e 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -1283,8 +1283,6 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value{}; auto debug_draw_aabb_buffer = vuk::Value{}; @@ -1465,7 +1463,6 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value Date: Sat, 13 Sep 2025 14:12:09 +0300 Subject: [PATCH 14/16] add image index and count to scene renderer --- Lorr/Editor/EditorModule.cc | 6 +++--- Lorr/Editor/EditorModule.hh | 2 +- Lorr/Editor/Window/AssetBrowserWindow.hh | 4 ++-- Lorr/Editor/Window/ConsoleWindow.hh | 4 ++-- Lorr/Editor/Window/IWindow.hh | 4 ++-- Lorr/Editor/Window/InspectorWindow.hh | 4 ++-- Lorr/Editor/Window/SceneBrowserWindow.hh | 4 ++-- Lorr/Editor/Window/ViewportWindow.cc | 12 +++++++----- Lorr/Editor/Window/ViewportWindow.hh | 8 ++++---- Lorr/Engine/Scene/Scene.cc | 3 ++- Lorr/Engine/Scene/Scene.hh | 3 ++- Lorr/Engine/Scene/SceneRenderer.hh | 2 ++ Lorr/Runtime/RuntimeModule.cc | 3 ++- 13 files changed, 33 insertions(+), 26 deletions(-) diff --git a/Lorr/Editor/EditorModule.cc b/Lorr/Editor/EditorModule.cc index 8e6e9f9c..32a175ed 100755 --- a/Lorr/Editor/EditorModule.cc +++ b/Lorr/Editor/EditorModule.cc @@ -286,7 +286,7 @@ bool EditorModule::update(this EditorModule &self, f64 delta_time) { } } - self.render(swapchain_attachment->format, swapchain_attachment->extent); + self.render(window.swap_chain.value()); self.frame_profiler.measure(&device, delta_time); @@ -509,7 +509,7 @@ static auto draw_profiler(EditorModule &self) -> void { ImGui::End(); } -auto EditorModule::render(this EditorModule &self, vuk::Format format, vuk::Extent3D extent) -> bool { +auto EditorModule::render(this EditorModule &self, vuk::Swapchain &swap_chain) -> bool { ZoneScoped; auto *viewport = ImGui::GetMainViewport(); @@ -550,7 +550,7 @@ auto EditorModule::render(this EditorModule &self, vuk::Format format, vuk::Exte ImGui::End(); for (auto &window : self.windows) { - window->do_render(format, extent); + window->do_render(swap_chain); } if (!self.active_project) { diff --git a/Lorr/Editor/EditorModule.hh b/Lorr/Editor/EditorModule.hh index 62a367ba..f39fe72c 100755 --- a/Lorr/Editor/EditorModule.hh +++ b/Lorr/Editor/EditorModule.hh @@ -36,7 +36,7 @@ struct EditorModule { auto init(this EditorModule &) -> bool; auto update(this EditorModule &, f64 delta_time) -> bool; - auto render(this EditorModule &, vuk::Format format, vuk::Extent3D extent) -> bool; + auto render(this EditorModule &, vuk::Swapchain &swap_chain) -> bool; auto destroy(this EditorModule &) -> void; template diff --git a/Lorr/Editor/Window/AssetBrowserWindow.hh b/Lorr/Editor/Window/AssetBrowserWindow.hh index d0c83fc6..52059dbb 100644 --- a/Lorr/Editor/Window/AssetBrowserWindow.hh +++ b/Lorr/Editor/Window/AssetBrowserWindow.hh @@ -33,8 +33,8 @@ struct AssetBrowserWindow : IWindow { auto find_directory(this AssetBrowserWindow &, const fs::path &path) -> AssetDirectory *; void render(this AssetBrowserWindow &); - void do_render(vuk::Format, vuk::Extent3D) override { + void do_render(vuk::Swapchain &) override { render(); } }; -} // namespace lr +} // namespace led diff --git a/Lorr/Editor/Window/ConsoleWindow.hh b/Lorr/Editor/Window/ConsoleWindow.hh index 827c8781..54dac432 100644 --- a/Lorr/Editor/Window/ConsoleWindow.hh +++ b/Lorr/Editor/Window/ConsoleWindow.hh @@ -7,8 +7,8 @@ struct ConsoleWindow : IWindow { ConsoleWindow(std::string name_, bool open_ = true); void render(this ConsoleWindow &); - void do_render(vuk::Format, vuk::Extent3D) override { + void do_render(vuk::Swapchain &) override { render(); } }; -} // namespace lr +} // namespace led diff --git a/Lorr/Editor/Window/IWindow.hh b/Lorr/Editor/Window/IWindow.hh index 0cd128ca..2e3d5d18 100755 --- a/Lorr/Editor/Window/IWindow.hh +++ b/Lorr/Editor/Window/IWindow.hh @@ -4,7 +4,7 @@ #include #include -#include "Engine/Graphics/VulkanTypes.hh" +#include namespace led { struct IWindow { @@ -14,7 +14,7 @@ struct IWindow { IWindow(std::string name_, bool open_ = true): name(std::move(name_)), open(open_) {}; virtual ~IWindow() = default; - virtual auto do_render(vuk::Format format, vuk::Extent3D extent) -> void = 0; + virtual auto do_render(vuk::Swapchain &swap_chain) -> void = 0; }; } // namespace lr diff --git a/Lorr/Editor/Window/InspectorWindow.hh b/Lorr/Editor/Window/InspectorWindow.hh index bcaa439f..e967c28a 100644 --- a/Lorr/Editor/Window/InspectorWindow.hh +++ b/Lorr/Editor/Window/InspectorWindow.hh @@ -7,8 +7,8 @@ struct InspectorWindow : IWindow { InspectorWindow(std::string name_, bool open_ = true); auto render(this InspectorWindow &) -> void; - void do_render(vuk::Format, vuk::Extent3D) override { + void do_render(vuk::Swapchain &) override { render(); } }; -} // namespace lr +} // namespace led diff --git a/Lorr/Editor/Window/SceneBrowserWindow.hh b/Lorr/Editor/Window/SceneBrowserWindow.hh index 31cff075..50f98c1a 100644 --- a/Lorr/Editor/Window/SceneBrowserWindow.hh +++ b/Lorr/Editor/Window/SceneBrowserWindow.hh @@ -7,8 +7,8 @@ struct SceneBrowserWindow : IWindow { SceneBrowserWindow(std::string name_, bool open_ = true); void render(this SceneBrowserWindow &); - void do_render(vuk::Format, vuk::Extent3D) override { + void do_render(vuk::Swapchain &) override { render(); } }; -} // namespace lr +} // namespace led diff --git a/Lorr/Editor/Window/ViewportWindow.cc b/Lorr/Editor/Window/ViewportWindow.cc index f9d1e7f4..d615eb48 100755 --- a/Lorr/Editor/Window/ViewportWindow.cc +++ b/Lorr/Editor/Window/ViewportWindow.cc @@ -162,7 +162,7 @@ static auto draw_tools(ViewportWindow &self) -> void { } } -static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3D) -> void { +static auto draw_viewport(ViewportWindow &self, vuk::Swapchain &swap_chain) -> void { auto &asset_man = lr::App::mod(); auto &editor = lr::App::mod(); auto &scene_renderer = lr::App::mod(); @@ -242,13 +242,14 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 self.editor_camera.update(delta_time, target_velocity); } - auto prepared_frame = active_scene->prepare_frame(scene_renderer, self.editor_camera); // NOLINT(cppcoreguidelines-slicing) + auto prepared_frame = + active_scene->prepare_frame(scene_renderer, swap_chain.images.size(), self.editor_camera); // NOLINT(cppcoreguidelines-slicing) auto viewport_attachment_info = vuk::ImageAttachment{ .image_type = vuk::ImageType::e2D, .usage = vuk::ImageUsageFlagBits::eSampled | vuk::ImageUsageFlagBits::eColorAttachment, .extent = { .width = static_cast(window_size.x), .height = static_cast(window_size.y), .depth = 1 }, - .format = format, + .format = swap_chain.images[0].format, .sample_count = vuk::Samples::e1, .view_type = vuk::ImageViewType::e2D, .level_count = 1, @@ -257,6 +258,7 @@ static auto draw_viewport(ViewportWindow &self, vuk::Format format, vuk::Extent3 auto viewport_attachment = vuk::declare_ia("viewport", viewport_attachment_info); auto scene_render_info = lr::SceneRenderInfo{ .delta_time = delta_time, + .image_index = swap_chain.image_index, .cull_flags = active_scene->get_cull_flags(), .picking_texel = requested_texel_transform, }; @@ -347,14 +349,14 @@ ViewportWindow::ViewportWindow(std::string name_, bool open_) : IWindow(std::mov this->gizmo_op = ImGuizmo::TRANSLATE; } -auto ViewportWindow::render(this ViewportWindow &self, vuk::Format format, vuk::Extent3D extent) -> void { +auto ViewportWindow::render(this ViewportWindow &self, vuk::Swapchain &swap_chain) -> void { auto &editor = lr::App::mod(); const auto should_render = editor.active_project && editor.active_project->active_scene_uuid; ImGui::PushStyleVar(ImGuiStyleVar_WindowPadding, ImVec2(0.0, 0.0)); if (ImGui::Begin(self.name.data())) { if (should_render) { - draw_viewport(self, format, extent); + draw_viewport(self, swap_chain); draw_tools(self); } diff --git a/Lorr/Editor/Window/ViewportWindow.hh b/Lorr/Editor/Window/ViewportWindow.hh index cf7232e7..bdec8213 100644 --- a/Lorr/Editor/Window/ViewportWindow.hh +++ b/Lorr/Editor/Window/ViewportWindow.hh @@ -12,9 +12,9 @@ struct ViewportWindow : IWindow { ViewportWindow(std::string name_, bool open_ = true); - auto render(this ViewportWindow &, vuk::Format format, vuk::Extent3D extent) -> void; - void do_render(vuk::Format format, vuk::Extent3D extent) override { - render(format, extent); + auto render(this ViewportWindow &, vuk::Swapchain &swap_chain) -> void; + void do_render(vuk::Swapchain &swap_chain) override { + render(swap_chain); } }; -} // namespace lr +} // namespace led diff --git a/Lorr/Engine/Scene/Scene.cc b/Lorr/Engine/Scene/Scene.cc index 7ab460ad..fa63863c 100644 --- a/Lorr/Engine/Scene/Scene.cc +++ b/Lorr/Engine/Scene/Scene.cc @@ -572,7 +572,7 @@ auto Scene::get_cull_flags(this Scene &self) -> GPU::CullFlags & { return self.cull_flags; } -auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option override_camera) -> PreparedFrame { +auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, u32 image_count, ls::option override_camera) -> PreparedFrame { ZoneScoped; auto &asset_man = App::mod(); @@ -783,6 +783,7 @@ auto Scene::prepare_frame(this Scene &self, SceneRenderer &renderer, ls::option< } auto prepare_info = FramePrepareInfo{ + .image_count = image_count, .mesh_instance_count = self.mesh_instance_count, .max_meshlet_instance_count = self.max_meshlet_instance_count, .regenerate_sky = regenerate_sky, diff --git a/Lorr/Engine/Scene/Scene.hh b/Lorr/Engine/Scene/Scene.hh index e73bb321..df384f4a 100644 --- a/Lorr/Engine/Scene/Scene.hh +++ b/Lorr/Engine/Scene/Scene.hh @@ -79,7 +79,8 @@ public: auto find_entity(this Scene &, u32 transform_index) -> flecs::entity; // If we really want to render something, camera needs to be there - auto prepare_frame(this Scene &, SceneRenderer &renderer, ls::option override_camera = ls::nullopt) -> PreparedFrame; + auto prepare_frame(this Scene &, SceneRenderer &renderer, u32 image_count, ls::option override_camera = ls::nullopt) + -> PreparedFrame; auto tick(this Scene &, f32 delta_time) -> bool; auto set_name(this Scene &, const std::string &name) -> void; diff --git a/Lorr/Engine/Scene/SceneRenderer.hh b/Lorr/Engine/Scene/SceneRenderer.hh index 9ad9f87e..31f4b352 100644 --- a/Lorr/Engine/Scene/SceneRenderer.hh +++ b/Lorr/Engine/Scene/SceneRenderer.hh @@ -6,6 +6,7 @@ namespace lr { struct FramePrepareInfo { + u32 image_count = 0; u32 mesh_instance_count = 0; u32 max_meshlet_instance_count = 0; bool regenerate_sky = false; @@ -43,6 +44,7 @@ struct PreparedFrame { struct SceneRenderInfo { f32 delta_time = 0.0f; + u32 image_index = 0; GPU::CullFlags cull_flags = {}; ls::option picking_texel = ls::nullopt; diff --git a/Lorr/Runtime/RuntimeModule.cc b/Lorr/Runtime/RuntimeModule.cc index 224cc780..950a72ea 100644 --- a/Lorr/Runtime/RuntimeModule.cc +++ b/Lorr/Runtime/RuntimeModule.cc @@ -96,9 +96,10 @@ auto RuntimeModule::update(this RuntimeModule &self, f64 delta_time) -> void { active_scene->tick(static_cast(delta_time)); - auto prepared_frame = active_scene->prepare_frame(scene_renderer); + auto prepared_frame = active_scene->prepare_frame(scene_renderer, window.swap_chain->images.size()); auto scene_render_info = lr::SceneRenderInfo{ .delta_time = static_cast(delta_time), + .image_index = window.swap_chain->image_index, .cull_flags = active_scene->get_cull_flags(), }; swapchain_attachment = scene_renderer.render(std::move(swapchain_attachment), scene_render_info, prepared_frame); From 8a5c430d2150df8f2e11c45fccec119f04540cfc Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sat, 20 Sep 2025 18:41:09 +0300 Subject: [PATCH 15/16] move remote packages to local --- .gitignore | 1 + Lorr/Editor/EditorModule.cc | 2 +- Lorr/Engine/Scene/GPUScene.hh | 3 + Lorr/Engine/Scene/SceneRenderer.cc | 17 +++-- Lorr/Engine/Scene/SceneRenderer.hh | 2 +- Lorr/Engine/xmake.lua | 2 +- xmake.lua | 2 +- xmake/packages.lua | 6 +- xmake/repo/packages/f/fmtlog/port/xmake.lua | 16 ++++ xmake/repo/packages/f/fmtlog/xmake.lua | 41 ++++++++++ xmake/repo/packages/k/ktx/xmake.lua | 80 ++++++++++++++++++++ xmake/repo/packages/s/small_vector/xmake.lua | 16 ++++ xmake/repo/packages/s/spirv-cross/xmake.lua | 63 +++++++++++++++ 13 files changed, 237 insertions(+), 14 deletions(-) create mode 100644 xmake/repo/packages/f/fmtlog/port/xmake.lua create mode 100644 xmake/repo/packages/f/fmtlog/xmake.lua create mode 100644 xmake/repo/packages/k/ktx/xmake.lua create mode 100644 xmake/repo/packages/s/small_vector/xmake.lua create mode 100644 xmake/repo/packages/s/spirv-cross/xmake.lua diff --git a/.gitignore b/.gitignore index 13b38cfb..0c4556ac 100755 --- a/.gitignore +++ b/.gitignore @@ -14,3 +14,4 @@ out/ compile_commands.json .luarc.json .notes/ +xmake-requires.lock diff --git a/Lorr/Editor/EditorModule.cc b/Lorr/Editor/EditorModule.cc index 32a175ed..b61b74c7 100755 --- a/Lorr/Editor/EditorModule.cc +++ b/Lorr/Editor/EditorModule.cc @@ -620,7 +620,7 @@ bool ImGui::drag_vec(i32 id, void *data, usize components, ImGuiDataType data_ty } ImGui::PushID(static_cast(i)); - value_changed |= ImGui::DragScalar("", data_type, data, 0.01f); + value_changed |= ImGui::DragScalar("", data_type, data, 0.1f); ImGui::PopItemWidth(); ImGui::PopID(); diff --git a/Lorr/Engine/Scene/GPUScene.hh b/Lorr/Engine/Scene/GPUScene.hh index 0f784ebb..1878921c 100644 --- a/Lorr/Engine/Scene/GPUScene.hh +++ b/Lorr/Engine/Scene/GPUScene.hh @@ -238,6 +238,9 @@ struct DirectionalLight { }; struct Lights { + // If we increase this, realistically we would need to support + // multiple lights for sky atmosphere aswell, which would increase + // raymach counts (per sun count), and this means less performance constexpr static auto MAX_DIRECTIONAL_LIGHTS = 1_u32; alignas(4) u32 directional_light_count = 0; diff --git a/Lorr/Engine/Scene/SceneRenderer.cc b/Lorr/Engine/Scene/SceneRenderer.cc index 37df0b5e..97b059fa 100644 --- a/Lorr/Engine/Scene/SceneRenderer.cc +++ b/Lorr/Engine/Scene/SceneRenderer.cc @@ -973,6 +973,7 @@ static auto draw_hiz(vuk::Value &hiz_attachment, vuk::Valu static auto draw_sky( SceneRenderer &self, + u32 frame_index, vuk::Value &sky_transmittance_lut_attachment, vuk::Value &sky_multiscatter_lut_attachment, vuk::Value &environment_buffer, @@ -1046,11 +1047,13 @@ static auto draw_sky( auto sky_cubemap_pass = vuk::make_pass( "sky cubemap", - [frame_index = self.frame_index](vuk::CommandBuffer &cmd_list, // - VUK_IA(vuk::eComputeSampled) sky_view_lut, - VUK_BA(vuk::eComputeRead) environment, - VUK_BA(vuk::eComputeRead) camera, - VUK_IA(vuk::eComputeRW) sky_cubemap) { + [frame_index]( + vuk::CommandBuffer &cmd_list, // + VUK_IA(vuk::eComputeSampled) sky_view_lut, + VUK_BA(vuk::eComputeRead) environment, + VUK_BA(vuk::eComputeRead) camera, + VUK_IA(vuk::eComputeRW) sky_cubemap + ) { auto linear_clamp_sampler = vuk::SamplerCreateInfo{ .magFilter = vuk::Filter::eLinear, .minFilter = vuk::Filter::eLinear, @@ -1221,7 +1224,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value(); auto &transfer_man = device.transfer_man(); auto &bindless_descriptor_set = device.get_descriptor_set(); - self.frame_index += 1; + auto frame_index = self.frame_counter++; // ────────────────────────────────────────────────────────────────────── auto final_attachment = vuk::declare_ia( @@ -1321,7 +1324,7 @@ auto SceneRenderer::render(this SceneRenderer &self, vuk::Value bool; auto destroy(this SceneRenderer &) -> void; diff --git a/Lorr/Engine/xmake.lua b/Lorr/Engine/xmake.lua index 43deef24..74689e10 100755 --- a/Lorr/Engine/xmake.lua +++ b/Lorr/Engine/xmake.lua @@ -30,7 +30,7 @@ target("Lorr") add_packages( "fmt", - "fmtlog", + "fmtlog-lr", "libsdl3", "vk-bootstrap", "imgui", diff --git a/xmake.lua b/xmake.lua index 1961070e..cfc5c83d 100755 --- a/xmake.lua +++ b/xmake.lua @@ -1,5 +1,5 @@ +set_policy("package.requires_lock", true) set_policy("package.precompiled", false) -add_repositories("exdal https://github.com/exdal/xmake-repo.git") includes("xmake/*.lua") diff --git a/xmake/packages.lua b/xmake/packages.lua index 0a743ee8..affbab29 100755 --- a/xmake/packages.lua +++ b/xmake/packages.lua @@ -3,9 +3,9 @@ add_repositories("local repo", {rootdir = os.scriptdir()}) local fmt_version = "11.2.0" local fmt_configs = { header_only = false, shared = false } add_requires("fmt " .. fmt_version, { configs = fmt_configs, system = false }) -add_requires("fmtlog v2.3.0", { configs = { +add_requires("local@fmtlog v2.3.0", { configs = { shared = false, -}, system = false }) +}, system = false, alias = "fmtlog-lr" }) add_requireconfs("fmt", "fmtlog.fmt", { override = true, version = fmt_version, @@ -68,6 +68,6 @@ add_requires("vuk 2025.09.01", { configs = { }, debug = is_mode("debug"), system = false }) add_requires("meshoptimizer v0.24", {system = false}) -add_requires("ktx v4.4.0", { debug = is_plat("windows"), system = false }) +add_requires("ktx v4.4.0", { debug = false, system = false }) add_requires("svector v1.0.3", {system = false}) diff --git a/xmake/repo/packages/f/fmtlog/port/xmake.lua b/xmake/repo/packages/f/fmtlog/port/xmake.lua new file mode 100644 index 00000000..30aa222d --- /dev/null +++ b/xmake/repo/packages/f/fmtlog/port/xmake.lua @@ -0,0 +1,16 @@ +add_rules("mode.release", "mode.debug") +set_project("fmtlog") + +add_requires("fmt 11.1.3", { system = false, configs = { header_only = false } }) + +target("fmtlog") + set_kind("static") + add_languages("cxx20") + add_includedirs(".", { public = true }) + add_files("fmtlog.cc") + add_packages("fmt") + if is_plat("linux") then + add_syslinks("pthread") + end + +target_end() diff --git a/xmake/repo/packages/f/fmtlog/xmake.lua b/xmake/repo/packages/f/fmtlog/xmake.lua new file mode 100644 index 00000000..2bca7828 --- /dev/null +++ b/xmake/repo/packages/f/fmtlog/xmake.lua @@ -0,0 +1,41 @@ +package("fmtlog") + set_homepage("https://github.com/MengRao/fmtlog") + set_description("fmtlog is a performant fmtlib-style logging library with latency in nanoseconds.") + set_license("MIT") + + add_urls("https://github.com/MengRao/fmtlog/archive/refs/tags/$(version).tar.gz", + "https://github.com/MengRao/fmtlog.git", {submodules = false}) + + add_versions("v2.3.0", "769dee37a6375e2c4784c936c7191aaa755e669ef9ed311c412153305878ba56") + + add_deps("cmake") + add_deps("fmt") + + if is_plat("linux") then + add_syslinks("pthread") + end + + on_install("linux", "macosx", "windows|!arm64", function (package) + os.cp(path.join(os.scriptdir(), "port", "xmake.lua"), "xmake.lua") + + local configs = {} + import("package.tools.xmake").install(package, configs) + + if package:config("shared") then + os.tryrm(path.join(package:installdir("lib"), "*.a")) + else + os.tryrm(path.join(package:installdir("lib"), "*.dll")) + os.tryrm(path.join(package:installdir("lib"), "*.dylib")) + os.tryrm(path.join(package:installdir("lib"), "*.so")) + end + os.cp("*.h", package:installdir("include/fmtlog")) + end) + + on_test(function (package) + assert(package:check_cxxsnippets({test = [[ + void test() { + logi("A info msg"); + } + ]]}, {configs = {languages = "c++17"}, includes = "fmtlog/fmtlog.h"})) + end) + diff --git a/xmake/repo/packages/k/ktx/xmake.lua b/xmake/repo/packages/k/ktx/xmake.lua new file mode 100644 index 00000000..b15a6ef2 --- /dev/null +++ b/xmake/repo/packages/k/ktx/xmake.lua @@ -0,0 +1,80 @@ +package("ktx") + set_homepage("https://github.com/KhronosGroup/KTX-Software") + set_description("KTX (Khronos Texture) Library and Tools") + set_license("Apache-2.0") + + add_urls("https://github.com/KhronosGroup/KTX-Software/archive/refs/tags/$(version).tar.gz", + "https://github.com/KhronosGroup/KTX-Software.git", {submodules = false}) + + add_versions("v4.4.0", "3585d76edcdcbe3a671479686f8c81c1c10339f419e4b02a9a6f19cc6e4e0612") + + add_configs("tools", {description = "Create KTX tools", default = false, type = "boolean"}) + add_configs("decoder", {description = "ETC decoding support", default = false, type = "boolean"}) + add_configs("opencl", {description = "Compile with OpenCL support so applications can choose to use it.", default = false, type = "boolean"}) + add_configs("embed", {description = "Embed bitcode in binaries.", default = false, type = "boolean"}) + add_configs("ktx1", {description = "Enable KTX 1 support.", default = true, type = "boolean"}) + add_configs("ktx2", {description = "Enable KTX 2 support.", default = true, type = "boolean"}) + add_configs("vulkan", {description = "Enable Vulkan texture upload.", default = false, type = "boolean"}) + add_configs("opengl", {description = "Enable OpenGL texture upload.", default = is_plat("wasm"), type = "boolean"}) + -- This project .def file export 64-bit symbols only + if is_plat("wasm", "iphoneos") or (is_plat("windows", "mingw") and is_arch("x86", "i386")) then + add_configs("shared", {description = "Build shared library.", default = false, type = "boolean", readonly = true}) + end + + add_deps("cmake") + if is_subhost("windows") then + add_deps("pkgconf") + else + add_deps("pkg-config") + end + + on_check(function (package) + if is_subhost("windows") and os.arch() == "arm64" then + raise("package(ktx) require python (from pkgconf) for building, but windows arm64 python binaries are unsupported") + end + end) + + on_load(function (package) + if package:config("tools") then + package:add("deps", "fmt", "cxxopts", {private = true}) + end + if not package:config("shared") then + package:add("defines", "KHRONOS_STATIC") + end + if package:config("ktx1") then + package:add("defines", "KTX_FEATURE_KTX1") + end + if package:config("ktx2") then + package:add("defines", "KTX_FEATURE_KTX2") + end + end) + + on_install("!iphoneos and !wasm", function (package) + if package:has_runtime("MD", "MT") then + io.replace("CMakeLists.txt", "_DEBUG", "", {plain = true}) + end + + local configs = {"-DKTX_FEATURE_TESTS=OFF"} + table.insert(configs, "-DCMAKE_BUILD_TYPE=" .. (package:is_debug() and "Debug" or "Release")) + table.insert(configs, "-DBUILD_SHARED_LIBS=" .. (package:config("shared") and "ON" or "OFF")) + + if not package:gitref() and package:version():startswith("v") then + table.insert(configs, "-DKTX_GIT_VERSION_FULL=" .. package:version()) + end + + table.insert(configs, "-DKTX_FEATURE_TOOLS=" .. (package:config("tools") and "ON" or "OFF")) + table.insert(configs, "-DKTX_FEATURE_ETC_UNPACK=" .. (package:config("decoder") and "ON" or "OFF")) + table.insert(configs, "-DBASISU_SUPPORT_OPENCL=" .. (package:config("opencl") and "ON" or "OFF")) + table.insert(configs, "-DKTX_EMBED_BITCODE=" .. (package:config("embed") and "ON" or "OFF")) + table.insert(configs, "-DKTX_FEATURE_KTX1=" .. (package:config("ktx1") and "ON" or "OFF")) + table.insert(configs, "-DKTX_FEATURE_KTX2=" .. (package:config("ktx2") and "ON" or "OFF")) + table.insert(configs, "-DKTX_FEATURE_VK_UPLOAD=" .. (package:config("vulkan") and "ON" or "OFF")) + table.insert(configs, "-DKTX_FEATURE_GL_UPLOAD=" .. (package:config("opengl") and "ON" or "OFF")) + import("package.tools.cmake").install(package, configs) + end) + + on_test(function (package) + assert(package:has_cfuncs("ktxErrorString", {includes = "ktx.h"})) + end) + + diff --git a/xmake/repo/packages/s/small_vector/xmake.lua b/xmake/repo/packages/s/small_vector/xmake.lua new file mode 100644 index 00000000..b1cacc9d --- /dev/null +++ b/xmake/repo/packages/s/small_vector/xmake.lua @@ -0,0 +1,16 @@ +package("small_vector") + set_kind("library", {headeronly = true}) + set_homepage("https://github.com/gharveymn/small_vector") + set_description("MIT") + + add_urls("https://github.com/gharveymn/small_vector.git") + add_versions("2024.12.23", "5b4ad3bd3dc3e1593a7e95cb3843a87b5ae21000") + + add_includedirs("include", "include/stb") + + on_install(function (package) + os.cp("source/include/gch", package:installdir("include")) + end) +package_end() + + diff --git a/xmake/repo/packages/s/spirv-cross/xmake.lua b/xmake/repo/packages/s/spirv-cross/xmake.lua new file mode 100644 index 00000000..6c15ba0c --- /dev/null +++ b/xmake/repo/packages/s/spirv-cross/xmake.lua @@ -0,0 +1,63 @@ +package("spirv-cross") + set_homepage("https://github.com/KhronosGroup/SPIRV-Cross/") + set_description("SPIRV-Cross is a practical tool and library for performing reflection on SPIR-V and disassembling SPIR-V back to high level languages.") + set_license("Apache-2.0") + + add_urls("https://github.com/KhronosGroup/SPIRV-Cross.git") + + add_versions("1.2.154+1", "e6f5ce6b8998f551f3400ad743b77be51bbe3019") + add_versions("1.2.162+0", "6d10da0224bd3214c9a507832e62d9fb6ae9620d") + add_versions("1.2.189+1", "0e2880ab990e79ce6cc8c79c219feda42d98b1e8") + add_versions("1.3.231+1", "f09ba2777714871bddb70d049878af34b94fa54d") + add_versions("1.3.268+0", "2de1265fca722929785d9acdec4ab728c47a0254") + add_versions("1.4.309+0", "2c32b6bf86f3c4a5539aa1f0bacbd59fe61759cf") + + add_deps("cmake") + + if is_plat("windows") then + set_policy("platform.longpaths", true) + end + + on_load(function (package) + local links = { "spirv-cross-core" } + for _, link in ipairs(links) do + if package:is_plat("windows") and package:is_debug() then + link = link .. "d" + end + package:add("links", link) + end + end) + + on_install("windows", "linux", "macosx", "mingw", function (package) + local configs = { + "-DSPIRV_CROSS_ENABLE_TESTS=OFF", + "-DSPIRV_CROSS_CLI=OFF", + "-DSPIRV_CROSS_ENABLE_HLSL=OFF", + "-DSPIRV_CROSS_ENABLE_MSL=OFF", + "-DSPIRV_CROSS_ENABLE_CPP=OFF", + "-DSPIRV_CROSS_ENABLE_REFLECT=OFF", + "-DSPIRV_CROSS_ENABLE_C_API=OFF", + "-DSPIRV_CROSS_ENABLE_UTIL=OFF", + "-DSPIRV_CROSS_EXCEPTIONS_TO_ASSERTIONS=ON" + } + table.insert(configs, "-DCMAKE_BUILD_TYPE=" .. (package:is_debug() and "Debug" or "Release")) + + local cxflags + if package:is_plat("windows") and package:is_debug() then + cxflags = cxflags or {} + table.insert(cxflags, "/FS") + end + if package:config("shared") then + table.insert(configs, "-DSPIRV_CROSS_SHARED=ON") + else + table.insert(configs, "-DSPIRV_CROSS_SHARED=OFF") + end + + import("package.tools.cmake").install(package, configs, {cxflags = cxflags}) + + os.cp("*.hpp", package:installdir("include")) + package:addenv("PATH", "bin") + end) +package_end() + + From 888cbb80d74faa159c24b5b731e9834f843d51da Mon Sep 17 00:00:00 2001 From: exdal <63502313+exdal@users.noreply.github.com> Date: Sun, 21 Sep 2025 13:10:12 +0300 Subject: [PATCH 16/16] fix pbr lighting --- Lorr/Engine/Resources/shaders/passes/pbr_apply.slang | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang b/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang index 2ddb5df7..c58a8c84 100644 --- a/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang +++ b/Lorr/Engine/Resources/shaders/passes/pbr_apply.slang @@ -34,8 +34,8 @@ func fs_main(VertexOutput input) -> f32x4 { let albedo_color = params.albedo_image.SampleLevel(params.linear_repeat_sampler, input.tex_coord, 0).rgb; let mapped_smooth_normal = params.normal_image.Load(pixel_pos); - let mapped_normal = std::octahedral_decode(mapped_smooth_normal.xy); - let smooth_normal = std::octahedral_decode(mapped_smooth_normal.zw); + let mapped_normal = std::octahedral_decode(mapped_smooth_normal.rg); + let smooth_normal = std::octahedral_decode(mapped_smooth_normal.ba); let emission = params.emissive_image.Load(pixel_pos); @@ -78,7 +78,8 @@ func fs_main(VertexOutput input) -> f32x4 { } if ((params.environment.flags & EnvironmentFlags::HasAtmosphere) != 0u) { - let specular_sample = params.sky_cubemap.SampleLevel(params.linear_clamp_sampler, R, 0.0); + let reflection_dir = lerp(N, R, 1.0 - roughness); + let specular_sample = params.sky_cubemap.SampleLevel(params.linear_clamp_sampler, reflection_dir, 0.0); let diffuse_sample = params.sky_cubemap.SampleLevel(params.linear_clamp_sampler, N, 0.0); let specular_sky = specular_sample.rgb * specular_sample.a; @@ -112,7 +113,7 @@ func fs_main(VertexOutput input) -> f32x4 { } // FINAL ──────────────────────────────────────────────────────────── - let base_ambient_color = f32x3(0.0); + let base_ambient_color = f32x3(0.04) * sun_illuminance; let indirect_illuminance_sum = indirect_illuminance + (base_ambient_color * albedo_color * occlusion); let final_color = material_surface_color + indirect_illuminance_sum + emission;