From 387fe0fa823900fa011dd8dc69849c24de3f9a89 Mon Sep 17 00:00:00 2001 From: garamond13 <98652255+garamond13@users.noreply.github.com> Date: Fri, 6 Mar 2026 17:48:26 +0100 Subject: [PATCH 1/3] Dishonored 2: Improve DLSS. --- Shaders/Dishonored 2/Luma_PostDLSS_CS.hlsl | 82 +++++++++++++++++++++ Shaders/Dishonored 2/Luma_PreDLSS_CS.hlsl | 84 ++++++++++++++++++++++ Source/Games/Dishonored 2/main.cpp | 56 ++++++++++++--- 3 files changed, 213 insertions(+), 9 deletions(-) create mode 100644 Shaders/Dishonored 2/Luma_PostDLSS_CS.hlsl create mode 100644 Shaders/Dishonored 2/Luma_PreDLSS_CS.hlsl diff --git a/Shaders/Dishonored 2/Luma_PostDLSS_CS.hlsl b/Shaders/Dishonored 2/Luma_PostDLSS_CS.hlsl new file mode 100644 index 0000000..eaa53cd --- /dev/null +++ b/Shaders/Dishonored 2/Luma_PostDLSS_CS.hlsl @@ -0,0 +1,82 @@ +struct postfx_luminance_autoexposure_t +{ + float EngineLuminanceFactor; // Offset: 0 + float LuminanceFactor; // Offset: 4 + float MinLuminanceLDR; // Offset: 8 + float MaxLuminanceLDR; // Offset: 12 + float MiddleGreyLuminanceLDR; // Offset: 16 + float EV; // Offset: 20 + float Fstop; // Offset: 24 + uint PeakHistogramValue; // Offset: 28 +}; + +cbuffer PerInstanceCB : register(b2) +{ + float4 cb_positiontoviewtexture : packoffset(c0); + float4 cb_taatexsize : packoffset(c1); + float4 cb_taaditherandviewportsize : packoffset(c2); + float4 cb_postfx_tonemapping_tonemappingparms : packoffset(c3); + float4 cb_postfx_tonemapping_tonemappingcoeffsinverse1 : packoffset(c4); + float4 cb_postfx_tonemapping_tonemappingcoeffsinverse0 : packoffset(c5); + float4 cb_postfx_tonemapping_tonemappingcoeffs1 : packoffset(c6); + float4 cb_postfx_tonemapping_tonemappingcoeffs0 : packoffset(c7); + uint2 cb_postfx_luminance_exposureindex : packoffset(c8); + float2 cb_prevresolutionscale : packoffset(c8.z); + float cb_env_tonemapping_white_level : packoffset(c9); + float cb_view_white_level : packoffset(c9.y); + float cb_taaamount : packoffset(c9.z); + float cb_postfx_luminance_customevbias : packoffset(c9.w); +} + +cbuffer PerViewCB : register(b1) +{ + float4 cb_alwaystweak : packoffset(c0); + float4 cb_viewrandom : packoffset(c1); + float4x4 cb_viewprojectionmatrix : packoffset(c2); + float4x4 cb_viewmatrix : packoffset(c6); + float4 cb_subpixeloffset : packoffset(c10); + float4x4 cb_projectionmatrix : packoffset(c11); + float4x4 cb_previousviewprojectionmatrix : packoffset(c15); + float4x4 cb_previousviewmatrix : packoffset(c19); + float4x4 cb_previousprojectionmatrix : packoffset(c23); + float4 cb_mousecursorposition : packoffset(c27); + float4 cb_mousebuttonsdown : packoffset(c28); + float4 cb_jittervectors : packoffset(c29); + float4x4 cb_inverseviewprojectionmatrix : packoffset(c30); + float4x4 cb_inverseviewmatrix : packoffset(c34); + float4x4 cb_inverseprojectionmatrix : packoffset(c38); + float4 cb_globalviewinfos : packoffset(c42); + float3 cb_wscamforwarddir : packoffset(c43); + uint cb_alwaysone : packoffset(c43.w); + float3 cb_wscamupdir : packoffset(c44); + uint cb_usecompressedhdrbuffers : packoffset(c44.w); + float3 cb_wscampos : packoffset(c45); + float cb_time : packoffset(c45.w); + float3 cb_wscamleftdir : packoffset(c46); + float cb_systime : packoffset(c46.w); + float2 cb_jitterrelativetopreviousframe : packoffset(c47); + float2 cb_worldtime : packoffset(c47.z); + float2 cb_shadowmapatlasslicedimensions : packoffset(c48); + float2 cb_resolutionscale : packoffset(c48.z); + float2 cb_parallelshadowmapslicedimensions : packoffset(c49); + float cb_framenumber : packoffset(c49.z); + uint cb_alwayszero : packoffset(c49.w); +} + +StructuredBuffer ro_postfx_luminance_buffautoexposure : register(t0); +RWTexture2D uav : register(u0); + +[numthreads(8, 8, 1)] +void main(uint3 dtid : SV_DispatchThreadID) +{ + // From the game's native TAA. + const float r0z = ro_postfx_luminance_buffautoexposure[cb_postfx_luminance_exposureindex.y].EngineLuminanceFactor; + const float r1w = cb_view_white_level * r0z; // Compression factor? + + float4 color = uav[dtid.xy]; + + // As done in the game's native TAA. + color.rgb = cb_usecompressedhdrbuffers ? color.rgb * rcp(r1w) : color.rgb; + + uav[dtid.xy] = color; +} \ No newline at end of file diff --git a/Shaders/Dishonored 2/Luma_PreDLSS_CS.hlsl b/Shaders/Dishonored 2/Luma_PreDLSS_CS.hlsl new file mode 100644 index 0000000..a61c589 --- /dev/null +++ b/Shaders/Dishonored 2/Luma_PreDLSS_CS.hlsl @@ -0,0 +1,84 @@ +struct postfx_luminance_autoexposure_t +{ + float EngineLuminanceFactor; // Offset: 0 + float LuminanceFactor; // Offset: 4 + float MinLuminanceLDR; // Offset: 8 + float MaxLuminanceLDR; // Offset: 12 + float MiddleGreyLuminanceLDR; // Offset: 16 + float EV; // Offset: 20 + float Fstop; // Offset: 24 + uint PeakHistogramValue; // Offset: 28 +}; + +cbuffer PerInstanceCB : register(b2) +{ + float4 cb_positiontoviewtexture : packoffset(c0); + float4 cb_taatexsize : packoffset(c1); + float4 cb_taaditherandviewportsize : packoffset(c2); + float4 cb_postfx_tonemapping_tonemappingparms : packoffset(c3); + float4 cb_postfx_tonemapping_tonemappingcoeffsinverse1 : packoffset(c4); + float4 cb_postfx_tonemapping_tonemappingcoeffsinverse0 : packoffset(c5); + float4 cb_postfx_tonemapping_tonemappingcoeffs1 : packoffset(c6); + float4 cb_postfx_tonemapping_tonemappingcoeffs0 : packoffset(c7); + uint2 cb_postfx_luminance_exposureindex : packoffset(c8); + float2 cb_prevresolutionscale : packoffset(c8.z); + float cb_env_tonemapping_white_level : packoffset(c9); + float cb_view_white_level : packoffset(c9.y); + float cb_taaamount : packoffset(c9.z); + float cb_postfx_luminance_customevbias : packoffset(c9.w); +} + +cbuffer PerViewCB : register(b1) +{ + float4 cb_alwaystweak : packoffset(c0); + float4 cb_viewrandom : packoffset(c1); + float4x4 cb_viewprojectionmatrix : packoffset(c2); + float4x4 cb_viewmatrix : packoffset(c6); + float4 cb_subpixeloffset : packoffset(c10); + float4x4 cb_projectionmatrix : packoffset(c11); + float4x4 cb_previousviewprojectionmatrix : packoffset(c15); + float4x4 cb_previousviewmatrix : packoffset(c19); + float4x4 cb_previousprojectionmatrix : packoffset(c23); + float4 cb_mousecursorposition : packoffset(c27); + float4 cb_mousebuttonsdown : packoffset(c28); + float4 cb_jittervectors : packoffset(c29); + float4x4 cb_inverseviewprojectionmatrix : packoffset(c30); + float4x4 cb_inverseviewmatrix : packoffset(c34); + float4x4 cb_inverseprojectionmatrix : packoffset(c38); + float4 cb_globalviewinfos : packoffset(c42); + float3 cb_wscamforwarddir : packoffset(c43); + uint cb_alwaysone : packoffset(c43.w); + float3 cb_wscamupdir : packoffset(c44); + uint cb_usecompressedhdrbuffers : packoffset(c44.w); + float3 cb_wscampos : packoffset(c45); + float cb_time : packoffset(c45.w); + float3 cb_wscamleftdir : packoffset(c46); + float cb_systime : packoffset(c46.w); + float2 cb_jitterrelativetopreviousframe : packoffset(c47); + float2 cb_worldtime : packoffset(c47.z); + float2 cb_shadowmapatlasslicedimensions : packoffset(c48); + float2 cb_resolutionscale : packoffset(c48.z); + float2 cb_parallelshadowmapslicedimensions : packoffset(c49); + float cb_framenumber : packoffset(c49.z); + uint cb_alwayszero : packoffset(c49.w); +} + +StructuredBuffer ro_postfx_luminance_buffautoexposure : register(t0); +RWTexture2D uav : register(u0); + +[numthreads(8, 8, 1)] +void main(uint3 dtid : SV_DispatchThreadID) +{ + // From the game's native TAA. + const float r0z = ro_postfx_luminance_buffautoexposure[cb_postfx_luminance_exposureindex.y].EngineLuminanceFactor; + const float r1w = cb_view_white_level * r0z; // Compression factor? + + float4 color = uav[dtid.xy]; + + // As done in the game's native TAA. + color.rgb = cb_usecompressedhdrbuffers ? color.rgb * r1w : color.rgb; + color.rgb = max(0.0, color.rgb); + color.rgb = min(cb_env_tonemapping_white_level, color.rgb); + + uav[dtid.xy] = color; +} \ No newline at end of file diff --git a/Source/Games/Dishonored 2/main.cpp b/Source/Games/Dishonored 2/main.cpp index 170a52c..17f5dd3 100644 --- a/Source/Games/Dishonored 2/main.cpp +++ b/Source/Games/Dishonored 2/main.cpp @@ -56,7 +56,6 @@ namespace ShaderHashesList shader_hashes_DownsampleDepth; ShaderHashesList shader_hashes_UnprojectDepth; ShaderHashesList shader_hashes_SSAO; - ShaderHashesList shader_hashes_Downsample; // XeGTAO constexpr size_t XE_GTAO_DEPTH_MIP_LEVELS = 5; @@ -100,7 +99,10 @@ struct GameDeviceDataDishonored2 final : public GameDeviceData com_ptr sr_motion_vectors; //com_ptr sr_output_color_2; //TODOFT: delete this and related code + // We are getting these from the game's TAA. + ComPtr cb_taa_b1; ComPtr cb_taa_b2; + ComPtr srv_ro_postfx_luminance_buffautoexposure; // Game state com_ptr depth_buffer; @@ -156,6 +158,8 @@ class Dishonored2 final : public Game native_shaders_definitions.emplace(CompileTimeStringHash("DS2 XeGTAO Prefilter Depths CS"), ShaderDefinition{ "Luma_XeGTAO", reshade::api::pipeline_subobject_type::compute_shader, nullptr, "prefilter_depths16x16_cs" }); native_shaders_definitions.emplace(CompileTimeStringHash("DS2 XeGTAO Main Pass PS"), ShaderDefinition{ "Luma_XeGTAO", reshade::api::pipeline_subobject_type::pixel_shader, nullptr, "main_pass_ps" }); + native_shaders_definitions.emplace(CompileTimeStringHash("DS2 Pre DLSS CS"), ShaderDefinition{ "Luma_PreDLSS_CS", reshade::api::pipeline_subobject_type::compute_shader }); + native_shaders_definitions.emplace(CompileTimeStringHash("DS2 Post DLSS CS"), ShaderDefinition{ "Luma_PostDLSS_CS", reshade::api::pipeline_subobject_type::compute_shader }); } // This needs to be overridden with your own "GameDeviceData" sub-class (destruction is automatically handled) @@ -553,13 +557,6 @@ class Dishonored2 final : public Game } } - if (original_shader_hashes.Contains(shader_hashes_Downsample)) - { - native_device_context->PSSetConstantBuffers(3, 1, &game_device_data.cb_taa_b2); - - return DrawOrDispatchOverrideType::None; - } - if (original_shader_hashes.Contains(shader_hashes_TAA)) { // Not thread safe? @@ -572,7 +569,9 @@ class Dishonored2 final : public Game // SR/TAA if (device_data.sr_type != SR::Type::None && !device_data.sr_suppressed && original_shader_hashes.Contains(shader_hashes_TAA)) { + native_device_context->CSGetConstantBuffers(1, 1, game_device_data.cb_taa_b1.put()); native_device_context->CSGetConstantBuffers(2, 1, game_device_data.cb_taa_b2.put()); + native_device_context->CSGetShaderResources(3, 1, game_device_data.srv_ro_postfx_luminance_buffautoexposure.put()); // TODO: Clean up all this, I think game will always use deferred rendering, so most of this is not needed. assert(native_device_context->GetType() == D3D11_DEVICE_CONTEXT_DEFERRED); @@ -948,6 +947,29 @@ class Dishonored2 final : public Game uint32_t render_width_dlss = std::lrintf(device_data.render_resolution.x); uint32_t render_height_dlss = std::lrintf(device_data.render_resolution.y); + // PreDLSS pass + // + // The game does some HDR compression/decompression before and after the TAA, + // so we replicate that here in PreDLSS pass and PostDLSS pass (immediately after DLSS draw). + // + + // Create UAV. + ComPtr native_device; + native_device_context->GetDevice(native_device.put()); + ComPtr uav; + ensure(native_device->CreateUnorderedAccessView(game_device_data.sr_source_color.get(), nullptr, uav.put()), >= 0); + + // Bindings. + native_device_context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + native_device_context->CSSetShader(device_data.native_compute_shaders.at(CompileTimeStringHash("DS2 Pre DLSS CS")).get(), nullptr, 0); + native_device_context->CSSetConstantBuffers(1, 1, &game_device_data.cb_taa_b1); + native_device_context->CSSetConstantBuffers(2, 1, &game_device_data.cb_taa_b2); + native_device_context->CSSetShaderResources(0, 1, &game_device_data.srv_ro_postfx_luminance_buffautoexposure); + + native_device_context->Dispatch((render_width_dlss + 8 - 1) / 8, (render_height_dlss + 8 - 1) / 8, 1); + + // + SR::SettingsData settings_data; settings_data.output_width = device_data.output_resolution.x; settings_data.output_height = device_data.output_resolution.y; @@ -991,6 +1013,23 @@ class Dishonored2 final : public Game bool dlss_succeeded = sr_implementations[device_data.sr_type]->Draw(sr_instance_data, native_device_context.get(), draw_data); ASSERT_ONCE(dlss_succeeded); // We can't restore the original TAA pass at this point (well, we could, but it's pointless, we'll just skip one frame) // TODO: copy the resource instead? + // PostDLSS pass + // + + // Create UAV. + ensure(native_device->CreateUnorderedAccessView(device_data.sr_output_color.get(), nullptr, uav.put()), >= 0); + + // Bindings. + native_device_context->CSSetUnorderedAccessViews(0, 1, &uav, nullptr); + native_device_context->CSSetShader(device_data.native_compute_shaders.at(CompileTimeStringHash("DS2 Post DLSS CS")).get(), nullptr, 0); + native_device_context->CSSetConstantBuffers(1, 1, &game_device_data.cb_taa_b1); + native_device_context->CSSetConstantBuffers(2, 1, &game_device_data.cb_taa_b2); + native_device_context->CSSetShaderResources(0, 1, &game_device_data.srv_ro_postfx_luminance_buffautoexposure); + + native_device_context->Dispatch((device_data.output_resolution.x + 8 - 1) / 8, (device_data.output_resolution.y + 8 - 1) / 8, 1); + + // + game_device_data.sr_source_color = nullptr; game_device_data.sr_motion_vectors = nullptr; device_data.sr_output_color = nullptr; @@ -1178,7 +1217,6 @@ BOOL APIENTRY DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserv shader_hashes_UnprojectDepth.compute_shaders.emplace(std::stoul("223FB9DA", nullptr, 16)); // DH2 shader_hashes_UnprojectDepth.compute_shaders.emplace(std::stoul("74E15FB8", nullptr, 16)); // DH DOTO shader_hashes_SSAO.pixel_shaders.emplace(0x94445D2D); // DH2 + DH DOTO - shader_hashes_Downsample.pixel_shaders.emplace(0x42873B15); // All UI pixel shaders (these are all Shader Model 4.0, as opposed to the rest of the rendering using SM5.0) shader_hashes_UI.pixel_shaders = { std::stoul("6FE8114D", nullptr, 16), From 01a6626776a246dcd710748a60fabd541a4d9fcc Mon Sep 17 00:00:00 2001 From: garamond13 <98652255+garamond13@users.noreply.github.com> Date: Fri, 6 Mar 2026 19:38:38 +0100 Subject: [PATCH 2/3] Dishonored 2: Add FSR. --- .../Games/Dishonored 2/Dishonored 2.vcxproj | 24 +++++++++---------- Source/Games/Dishonored 2/main.cpp | 6 +++++ 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/Source/Games/Dishonored 2/Dishonored 2.vcxproj b/Source/Games/Dishonored 2/Dishonored 2.vcxproj index 4944a79..d790c5f 100644 --- a/Source/Games/Dishonored 2/Dishonored 2.vcxproj +++ b/Source/Games/Dishonored 2/Dishonored 2.vcxproj @@ -105,7 +105,7 @@ - ..\..\External\reshade;..\..\External\NGX;%(AdditionalIncludeDirectories) + ..\..\External\reshade;..\..\External\NGX;$(SolutionDir)\Source\External\FidelityFX;%(AdditionalIncludeDirectories) EnableFastChecks ProgramDatabase Sync @@ -136,8 +136,8 @@ %(Filename)_p.c - nvsdk_ngx_d_dbg.lib - ..\..\External\NGX\libs + nvsdk_ngx_d_dbg.lib;ffx_backend_dx11_x64d.lib;ffx_fsr3_x64d.lib;ffx_frameinterpolation_x64d.lib;ffx_fsr3upscaler_x64d.lib;ffx_opticalflow_x64d.lib + ..\..\External\NGX\libs;..\..\External\FidelityFX\libs %(AdditionalOptions) /machine:x64 DebugFull @@ -155,7 +155,7 @@ - ..\..\External\reshade;..\..\External\NGX;%(AdditionalIncludeDirectories) + ..\..\External\reshade;..\..\External\NGX;$(SolutionDir)\Source\External\FidelityFX;%(AdditionalIncludeDirectories) Sync Default stdcpplatest @@ -184,8 +184,8 @@ %(Filename)_p.c - nvsdk_ngx_d.lib - ..\..\External\NGX\libs + nvsdk_ngx_d.lib;ffx_backend_dx11_x64.lib;ffx_fsr3_x64.lib;ffx_frameinterpolation_x64.lib;ffx_fsr3upscaler_x64.lib;ffx_opticalflow_x64.lib + ..\..\External\NGX\libs;..\..\External\FidelityFX\libs %(AdditionalOptions) /machine:x64 false @@ -201,7 +201,7 @@ - ..\..\External\reshade;..\..\External\NGX;%(AdditionalIncludeDirectories) + ..\..\External\reshade;..\..\External\NGX;$(SolutionDir)\Source\External\FidelityFX;%(AdditionalIncludeDirectories) Sync Default stdcpplatest @@ -230,8 +230,8 @@ %(Filename)_p.c - nvsdk_ngx_d.lib - ..\..\External\NGX\libs + nvsdk_ngx_d.lib;ffx_backend_dx11_x64.lib;ffx_fsr3_x64.lib;ffx_frameinterpolation_x64.lib;ffx_fsr3upscaler_x64.lib;ffx_opticalflow_x64.lib + ..\..\External\NGX\libs;..\..\External\FidelityFX\libs %(AdditionalOptions) /machine:x64 false @@ -247,7 +247,7 @@ - ..\..\External\reshade;..\..\External\NGX;%(AdditionalIncludeDirectories) + ..\..\External\reshade;..\..\External\NGX;$(SolutionDir)\Source\External\FidelityFX;%(AdditionalIncludeDirectories) Sync Default stdcpplatest @@ -276,8 +276,8 @@ %(Filename)_p.c - nvsdk_ngx_d.lib - ..\..\External\NGX\libs + nvsdk_ngx_d.lib;ffx_backend_dx11_x64.lib;ffx_fsr3_x64.lib;ffx_frameinterpolation_x64.lib;ffx_fsr3upscaler_x64.lib;ffx_opticalflow_x64.lib + ..\..\External\NGX\libs;..\..\External\FidelityFX\libs %(AdditionalOptions) /machine:x64 false diff --git a/Source/Games/Dishonored 2/main.cpp b/Source/Games/Dishonored 2/main.cpp index 17f5dd3..096cdb6 100644 --- a/Source/Games/Dishonored 2/main.cpp +++ b/Source/Games/Dishonored 2/main.cpp @@ -1,6 +1,8 @@ #define GAME_DISHONORED_2 1 #define ENABLE_NGX 1 +#define ENABLE_FIDELITY_SK 1 + // Hangs on boot #define DISABLE_AUTO_DEBUGGER // Previously disabled as it made boot extremely slow, it should now be fine as we optimized the code @@ -1009,6 +1011,10 @@ class Dishonored2 final : public Game draw_data.reset = reset_dlss; draw_data.render_width = render_width_dlss; draw_data.render_height = render_height_dlss; + draw_data.vert_fov = std::atan(1.0f / projection_matrix.m11) * 2.0; + draw_data.near_plane = cb_per_view_global.cb_globalviewinfos.z; + draw_data.far_plane = cb_per_view_global.cb_globalviewinfos.w; + draw_data.frame_index = cb_luma_global_settings.FrameIndex; bool dlss_succeeded = sr_implementations[device_data.sr_type]->Draw(sr_instance_data, native_device_context.get(), draw_data); ASSERT_ONCE(dlss_succeeded); // We can't restore the original TAA pass at this point (well, we could, but it's pointless, we'll just skip one frame) // TODO: copy the resource instead? From 94041d959e94c5951007cecb4d8e49281060087a Mon Sep 17 00:00:00 2001 From: garamond13 <98652255+garamond13@users.noreply.github.com> Date: Fri, 6 Mar 2026 21:29:26 +0100 Subject: [PATCH 3/3] Dishonored 2: Replace TAA. --- .../AA_TAA_0x06BBC941_0x9F77B624.cs_5_0.hlsl | 642 ++++++------------ 1 file changed, 192 insertions(+), 450 deletions(-) diff --git a/Shaders/Dishonored 2/AA_TAA_0x06BBC941_0x9F77B624.cs_5_0.hlsl b/Shaders/Dishonored 2/AA_TAA_0x06BBC941_0x9F77B624.cs_5_0.hlsl index 75d187d..04e644f 100644 --- a/Shaders/Dishonored 2/AA_TAA_0x06BBC941_0x9F77B624.cs_5_0.hlsl +++ b/Shaders/Dishonored 2/AA_TAA_0x06BBC941_0x9F77B624.cs_5_0.hlsl @@ -1,5 +1,3 @@ -#include "../Includes/Common.hlsl" - struct postfx_luminance_autoexposure_t { float EngineLuminanceFactor; // Offset: 0 @@ -50,7 +48,7 @@ cbuffer PerViewCB : register(b1) float4x4 cb_previousprojectionmatrix : packoffset(c23); float4 cb_mousecursorposition : packoffset(c27); float4 cb_mousebuttonsdown : packoffset(c28); - // xy and the jitter offsets in uv space (y is flipped), zw might be the same in another space or the ones from the previous frame + // Jitters in UV space. xy current frame, zw previous frame. float4 cb_jittervectors : packoffset(c29); float4x4 cb_inverseviewprojectionmatrix : packoffset(c30); float4x4 cb_inverseviewmatrix : packoffset(c34); @@ -75,471 +73,215 @@ cbuffer PerViewCB : register(b1) uint cb_alwayszero : packoffset(c49.w); } -#define DISPATCH_BLOCK 16 +SamplerState smp_linearclamp_s : register(s0); +Texture2D ro_taahistory_read : register(t0); +Texture2D ro_motionvectors : register(t1); +Texture2D ro_viewcolormap : register(t2); +StructuredBuffer ro_postfx_luminance_buffautoexposure : register(t3); +RWTexture2D rw_taahistory_write : register(u0); +RWTexture2D rw_taaresult : register(u1); -#define DISABLE_TAA 0 -// This fixes highlights being clipped in a couple places, and possibly avoids HDR colors from being clipped, and blacks from being raised -#define DISABLE_CLAMP 1 -// This is "optional" as the output looks similar with and without it, but it should help improve the quality of TAA, -// the only reason they tonemapped was to theoretically store in UNORM textures (which only take a 0-1 SDR range), -// but then they actually stored TAA in float textures, and undid the tonemapping when reading them back, -// so it would have just lowered the quality of it really (I don't think they did this to normalize the history by the exposure of the previous frames, as that's not part of TM). -// Somehow this causes raised blacks when on... -#define DISABLE_TONEMAP 0 -#define ALLOW_HDR_DITHER 1 -// This fixes the red/blue/green random colors that generate in bloom -#define DISABLE_DITHER 1 +#ifndef VIEWPORT_SIZE +#define VIEWPORT_SIZE cb_taatexsize.xy +#endif -groupshared struct { float val[36]; } g1[DISPATCH_BLOCK + 2]; -groupshared struct { float val[72]; } g0[DISPATCH_BLOCK + 2]; +#ifndef INV_VIEWPORT_SIZE +#define INV_VIEWPORT_SIZE cb_taatexsize.zw +#endif -SamplerState smp_linearclamp_s : register(s0); -// The history of TAA (the accumulation of the previous frames). Either R11G11B10F or R16G16B16A16F (same format as rw_taahistory_write) -Texture2D ro_taahistory_read : register(t0); -// MVs on x and y, default initialized values on z and w -Texture2D ro_motionvectors : register(t1); -// Jittered color buffer (pre-TAA) -Texture2D ro_viewcolormap : register(t2); -StructuredBuffer ro_postfx_luminance_buffautoexposure : register(t3); -// The output history of our TAA. Either R11G11B10F or R16G16B16A16F -RWTexture2D rw_taahistory_write : register(u0); -// De-jittered color output. Either R11G11B10F or R16G16B16A16F -RWTexture2D rw_taaresult : register(u1); +#ifndef MIN_ALPHA +#define MIN_ALPHA 0.04 +#endif + +// Replicate what the game's native TAA is doing +// pre and post tonemap. +// -float3 linearize(float3 value) +float3 pre_tonemap(float3 color, const float exposure, const float factor) { -#if 0 // Disabled //TODO - return value; -#else // Made safe - return sqr(abs(value)) * sign(value); - //return sqr(value); -#endif + color = cb_usecompressedhdrbuffers ? color * factor : color; + color = max(0.0, color); + color = min(cb_env_tonemapping_white_level, color); + return color * exposure; } -float3 vanillaTonemap_Inverse(float3 inputColor, bool inverse = false) +float3 post_tonemap(float3 color, const float exposure, const float factor) { -#if 1 // OG inv TM - inverse = true; - // Some threshold to skip tonemapping, or treat highlights differently. - // Unless this threshold is 0, the tonemapper output won't be contiguous, - // unless both "cb_postfx_tonemapping_tonemappingcoeffs0" and "cb_postfx_tonemapping_tonemappingcoeffs1" were equal, - // or were specifically calculated to match at the threshold point (which is probably what's happening). - bool3 tonemapThreshold = inputColor < (inverse ? cb_postfx_tonemapping_tonemappingparms.y : cb_postfx_tonemapping_tonemappingparms.x); - - // This isn't the actual inverse tonemap formula, it's just called inverse anyway for some reason - float4 tonemappingCoeffs0 = inverse ? cb_postfx_tonemapping_tonemappingcoeffsinverse0 : cb_postfx_tonemapping_tonemappingcoeffs0; - float4 tonemappingCoeffs1 = inverse ? cb_postfx_tonemapping_tonemappingcoeffsinverse1 : cb_postfx_tonemapping_tonemappingcoeffs1; - - float3 outputColor; - float4 tonemappingCoeffs; - // This is an "advanced" version of Reinhard with levels and other kind of curves/scaling (it seemengly supports negative input values properly). - // Unless the coefficients have very specific values, this will not compress to exactly 0-1, and could end up clipping. - // The tonemap coefficients are probably something like this: - // x: exposure/brightness scaling (dividend). Likely close or identical to "y". Neutral value at 1. - // y: exposure/brightness scaling (divisor). Likely close or identical to "x". Neutral value at 1. - // z: additive brightness levelling. This can be used to raise or crush (clip) blacks. Neutral value at 0. This should generally be lower than "w". - // w: neutral Reinhard value at 1. It's likely that it often revolves around that value. - tonemappingCoeffs = tonemapThreshold.r ? tonemappingCoeffs0.xyzw : tonemappingCoeffs1.xyzw; - outputColor.r = ((tonemappingCoeffs.x * inputColor.r) + tonemappingCoeffs.z) / ((tonemappingCoeffs.y * inputColor.r) + tonemappingCoeffs.w); - - tonemappingCoeffs = tonemapThreshold.g ? tonemappingCoeffs0.xyzw : tonemappingCoeffs1.xyzw; - outputColor.g = ((tonemappingCoeffs.x * inputColor.g) + tonemappingCoeffs.z) / ((tonemappingCoeffs.y * inputColor.g) + tonemappingCoeffs.w); - - tonemappingCoeffs = tonemapThreshold.b ? tonemappingCoeffs0.xyzw : tonemappingCoeffs1.xyzw; - outputColor.b = ((tonemappingCoeffs.x * inputColor.b) + tonemappingCoeffs.z) / ((tonemappingCoeffs.y * inputColor.b) + tonemappingCoeffs.w); - - return outputColor; -#else - float4 tonemappingCoeffs0 = inverse ? cb_postfx_tonemapping_tonemappingcoeffsinverse0 : cb_postfx_tonemapping_tonemappingcoeffs0; - float4 tonemappingCoeffs1 = inverse ? cb_postfx_tonemapping_tonemappingcoeffsinverse1 : cb_postfx_tonemapping_tonemappingcoeffs1; - - float3 outputColor0 = (tonemappingCoeffs0.z - (tonemappingCoeffs0.w * inputColor.rgb)) / ((tonemappingCoeffs0.y * inputColor.rgb) - tonemappingCoeffs0.x); - float3 outputColor1 = (tonemappingCoeffs1.z - (tonemappingCoeffs1.w * inputColor.rgb)) / ((tonemappingCoeffs1.y * inputColor.rgb) - tonemappingCoeffs1.x); - - //TODO: add threshold? Or actually, pick based on the distance from validity... - bool3 valid0 = outputColor0 < (inverse ? cb_postfx_tonemapping_tonemappingparms.y : cb_postfx_tonemapping_tonemappingparms.x); - bool3 valid1 = outputColor1 >= (inverse ? cb_postfx_tonemapping_tonemappingparms.y : cb_postfx_tonemapping_tonemappingparms.x); - - float3 outputColor = 0.0; - if (valid0.r && valid1.r) - outputColor.r = max(outputColor0.r, outputColor1.r); - else if (valid0.r) - outputColor.r = outputColor0.r; - else if (valid1.r) - outputColor.r = outputColor1.r; - if (valid0.g && valid1.g) - outputColor.g = max(outputColor0.g, outputColor1.g); - else if (valid0.g) - outputColor.g = outputColor0.g; - else if (valid1.g) - outputColor.g = outputColor1.g; - if (valid0.b && valid1.b) - outputColor.b = max(outputColor0.b, outputColor1.b); - else if (valid0.b) - outputColor.b = outputColor0.b; - else if (valid1.b) - outputColor.b = outputColor1.b; - return outputColor; -#endif + color *= rcp(exposure); + color = cb_usecompressedhdrbuffers ? color * rcp(factor) : color; + return color; } -// This doesn't seem to make much sense given that TAA was running before tonemapping and storing its history on a linear (R11G11B10F) texture. -// My guess is that they first wrote TAA on a R8G8B8A8 UNORM texture, and hence applied gamma and tonemap to it, and then converted to storing it in linear space and forgot about it. -// The "inverse" parameter is to use the approximate inverse tonemapper TAA came with, but our implementation is more accurate. -float3 vanillaTonemap(float3 inputColor, bool inverse = false) +// + +float get_luma(float3 color) { -#if 0 // TAA doesn't need tonemapping to work properly, in fact, it's probably worse (and more expensive) to run it (actually this breaks the output) - return inputColor; -#endif -#if DISABLE_TONEMAP - if (inverse) - { - return inputColor * (vanillaTonemap_Inverse(MidGray) / MidGray); - } - else - { - return inputColor * (MidGray / vanillaTonemap_Inverse(MidGray)); - } -#endif -#if _9F77B624 && !DISABLE_CLAMP // LUMA: removed unnecessary clamp that clips colors (this was only done in DOTO) - if (inverse) - { - inputColor = min(inputColor, 1.0); - } -#endif - // Some threshold to skip tonemapping, or treat highlights differently. - // Unless this threshold is 0, the tonemapper output won't be contiguous, - // unless both "cb_postfx_tonemapping_tonemappingcoeffs0" and "cb_postfx_tonemapping_tonemappingcoeffs1" were equal, - // or were specifically calculated to match at the threshold point (which is probably what's happening). - bool3 tonemapThreshold = inputColor < (inverse ? cb_postfx_tonemapping_tonemappingparms.y : cb_postfx_tonemapping_tonemappingparms.x); - - // This isn't the actual inverse tonemap formula, it's just called inverse anyway for some reason - float4 tonemappingCoeffs0 = inverse ? cb_postfx_tonemapping_tonemappingcoeffsinverse0 : cb_postfx_tonemapping_tonemappingcoeffs0; - float4 tonemappingCoeffs1 = inverse ? cb_postfx_tonemapping_tonemappingcoeffsinverse1 : cb_postfx_tonemapping_tonemappingcoeffs1; - - float3 outputColor; - float4 tonemappingCoeffs; - // This is an "advanced" version of Reinhard with levels and other kind of curves/scaling (it seemengly supports negative input values properly). - // Unless the coefficients have very specific values, this will not compress to exactly 0-1, and could end up clipping. - // The tonemap coefficients are probably something like this: - // x: exposure/brightness scaling (dividend). Likely close or identical to "y". Neutral value at 1. - // y: exposure/brightness scaling (divisor). Likely close or identical to "x". Neutral value at 1. - // z: additive brightness levelling. This can be used to raise or crush (clip) blacks. Neutral value at 0. This should generally be lower than "w". - // w: neutral Reinhard value at 1. It's likely that it often revolves around that value. - tonemappingCoeffs = tonemapThreshold.r ? tonemappingCoeffs0.xyzw : tonemappingCoeffs1.xyzw; - outputColor.r = ((tonemappingCoeffs.x * inputColor.r) + tonemappingCoeffs.z) / ((tonemappingCoeffs.y * inputColor.r) + tonemappingCoeffs.w); - - tonemappingCoeffs = tonemapThreshold.g ? tonemappingCoeffs0.xyzw : tonemappingCoeffs1.xyzw; - outputColor.g = ((tonemappingCoeffs.x * inputColor.g) + tonemappingCoeffs.z) / ((tonemappingCoeffs.y * inputColor.g) + tonemappingCoeffs.w); - - tonemappingCoeffs = tonemapThreshold.b ? tonemappingCoeffs0.xyzw : tonemappingCoeffs1.xyzw; - outputColor.b = ((tonemappingCoeffs.x * inputColor.b) + tonemappingCoeffs.z) / ((tonemappingCoeffs.y * inputColor.b) + tonemappingCoeffs.w); - - return outputColor; + return dot(color, float3(0.2126, 0.7152, 0.0722)); } -// Runs before tonemapping -[numthreads(DISPATCH_BLOCK, DISPATCH_BLOCK, 1)] -void main(uint3 vThreadID : SV_DispatchThreadID, uint3 vGroupID : SV_GroupID, uint3 vThreadIDInGroup : SV_GroupThreadID) +float3 tonemap(float3 color) { -#if DISABLE_TAA - const uint2 vPixelPosUInt = vGroupID.xy * uint2(DISPATCH_BLOCK, DISPATCH_BLOCK) + vThreadIDInGroup.xy; // Equal to "vThreadID.xy" - GroupMemoryBarrierWithGroupSync(); - //rw_taaresult[vThreadID.xy] = ro_viewcolormap[vThreadID.xy].rgb; - rw_taaresult[vThreadID.xy] = ro_viewcolormap.SampleLevel(smp_linearclamp_s, ((vThreadID.xy + 0.5) * cb_taatexsize.zw) - (cb_jittervectors.xy * float2(1, -1)), 0).rgb; // "fast" dejitter -#else - - // Read the source color in chunks, in some weird misaligned way - - // Pixel coordinates over a single number (from the top left, going right and then down) - float2 groupTopLeftCoords; - groupTopLeftCoords.x = (vThreadIDInGroup.y * DISPATCH_BLOCK) + vThreadIDInGroup.x; - // Center around the texel uv - groupTopLeftCoords.x += 0.5; - - // Round coordinates (or something like that, split them by block) - groupTopLeftCoords.x *= 1.f / (DISPATCH_BLOCK + 2); - groupTopLeftCoords.y = floor(groupTopLeftCoords.x); - groupTopLeftCoords.x = frac(groupTopLeftCoords.x); - groupTopLeftCoords.x *= DISPATCH_BLOCK + 2; - groupTopLeftCoords.x = floor(groupTopLeftCoords.x); - - int2 groupTopLeftCoordsInt = (int2)groupTopLeftCoords.xy; - int2 groupTopLeftPixelCoords = -(int2)vThreadIDInGroup.xy + (int2)vThreadID.xy - int2(1, 1); - float exposure = ro_postfx_luminance_buffautoexposure[cb_postfx_luminance_exposureindex.y].EngineLuminanceFactor; //TODO: rename. This isn't exposure? - float biased_exposure = exposure * exp2(-cb_postfx_luminance_customevbias); - bool someBool = groupTopLeftCoordsInt.y < (DISPATCH_BLOCK - 2); //TODO - const float exposure_view_white_level = cb_view_white_level * exposure; - // Even horizontal lines (or uneven) - if (someBool) - { - int2 pixelCoords = groupTopLeftPixelCoords.xy + groupTopLeftCoordsInt.xy; // Not the same as "vThreadID.xy" - float3 sourceColor = ro_viewcolormap.Load(int3(pixelCoords, 0)).xyz; - sourceColor *= cb_usecompressedhdrbuffers ? exposure_view_white_level : 1.f; -#if !DISABLE_CLAMP // Disable vanilla clamping (without disabling this, HDR is clamped) - sourceColor = max(0.0, sourceColor); // Clamp colors below 0 - sourceColor = min(cb_env_autoexp_adapt_max_luminance, sourceColor); // Clip colors beyond 1 (or whatever the white level was) (hopefully this isn't used to do fades to black) -#endif - // It's unclear why they'd apply exposure twice (at least in case "cb_usecompressedhdrbuffers" was true) - sourceColor *= biased_exposure; - - float3 tonemappedColor = vanillaTonemap(sourceColor); - - float2 motionVectors = ro_motionvectors.Load(int3(pixelCoords, 0)).xy; - float motionVectorsSquaredLength = dot(motionVectors.xy, motionVectors.xy); // Sum of x and y squares (needs sqrt to find the length) - uint2 someCoords = (uint2)groupTopLeftCoordsInt.xx << int2(4, 3); - g0[groupTopLeftCoordsInt.y].val[someCoords.x / 4] = tonemappedColor.r; - g0[groupTopLeftCoordsInt.y].val[someCoords.x / 4 + 1] = tonemappedColor.g; - g0[groupTopLeftCoordsInt.y].val[someCoords.x / 4 + 2] = tonemappedColor.b; - g0[groupTopLeftCoordsInt.y].val[someCoords.x / 4 + 3] = motionVectorsSquaredLength; - g1[groupTopLeftCoordsInt.y].val[someCoords.y / 4] = motionVectors.x; - g1[groupTopLeftCoordsInt.y].val[someCoords.y / 4 + 1] = motionVectors.y; - } - groupTopLeftCoordsInt.y += DISPATCH_BLOCK - 2; - someBool = groupTopLeftCoordsInt.y < (DISPATCH_BLOCK + 2); - // Uneven horizontal lines (or even) - if (someBool) - { - int2 pixelCoords = (int2)groupTopLeftPixelCoords.xy + (int2)groupTopLeftCoordsInt.xy; // Not the same as "vThreadID.xy" - float3 sourceColor = ro_viewcolormap.Load(int3(pixelCoords, 0)).xyz; - sourceColor *= cb_usecompressedhdrbuffers ? exposure_view_white_level : 1.f; -#if !DISABLE_CLAMP // Disable vanilla clamping (without disabling this, HDR is clamped) - sourceColor = max(0.0, sourceColor); // Clamp colors below 0 - sourceColor = min(cb_env_autoexp_adapt_max_luminance, sourceColor); // Clip colors beyond 1 (or whatever the white level was) (hopefully this isn't used to do fades to black) -#endif - // It's unclear why they'd apply exposure twice (at least in case "cb_usecompressedhdrbuffers" was true) - sourceColor *= biased_exposure; - - float3 tonemappedColor = vanillaTonemap(sourceColor); - - float2 motionVectors = ro_motionvectors.Load(int3(pixelCoords, 0)).xy; - float motionVectorsSquaredLength = dot(motionVectors.xy, motionVectors.xy); // Sum of x and y squares (needs sqrt to find the length) - uint2 someCoords = (uint2)groupTopLeftCoordsInt.xx << int2(4, 3); - g0[groupTopLeftCoordsInt.y].val[someCoords.x / 4] = tonemappedColor.r; - g0[groupTopLeftCoordsInt.y].val[someCoords.x / 4 + 1] = tonemappedColor.g; - g0[groupTopLeftCoordsInt.y].val[someCoords.x / 4 + 2] = tonemappedColor.b; - g0[groupTopLeftCoordsInt.y].val[someCoords.x / 4 + 3] = motionVectorsSquaredLength; - g1[groupTopLeftCoordsInt.y].val[someCoords.y / 4] = motionVectors.x; - g1[groupTopLeftCoordsInt.y].val[someCoords.y / 4 + 1] = motionVectors.y; - } + return color * rcp(max(1e-6, 1.0 + get_luma(color))); +} - GroupMemoryBarrierWithGroupSync(); +float3 inv_tonemap(float3 color) +{ + return color * rcp(max(1e-6, 1.0 - get_luma(color))); +} - // Actually do TAA +float3 rgb_to_ycocg(float3 color) +{ + const float y = dot(color, float3(0.25, 0.5, 0.25)); + const float co = dot(color, float3(0.5, 0.0, -0.5)); + const float cg = dot(color, float3(-0.25, 0.5, -0.25)); + return float3(y, co, cg); +} - float2 pixelCoords = (int2)vThreadID.xy + 0.5; // Center around the texel uv +float3 ycocg_to_rgb(float3 color) +{ + const float r = dot(color, float3(1.0, 1.0, -1.0)); + const float g = dot(color, float3(1.0, 0.0, 1.0)); + const float b = dot(color, float3(1.0, -1.0, -1.0)); + return float3(r, g, b); +} - // Dithering (or film grain) -#if 1 // Double the size of film grain - float2 ditherPixelCoords = (((uint2)vThreadID.xy / 2) * 2) + 0.5; -#else - float2 ditherPixelCoords = pixelCoords; -#endif - int ditherI = asint(dot(ditherPixelCoords.xy + cb_taaditherandviewportsize.xy, float2(2531011.75, 214013.15625))); - ditherI = ((ditherI * ditherI * 0x00003d73) + 0x000c0ae5) * ditherI; - ditherI = (uint)ditherI >> 9; - ditherI = ditherI + 0x3f800000; - float dither = (2.0 - asfloat(ditherI)) * 0.6 - 0.3; - - uint3 indexes; - indexes.x = (uint)vThreadIDInGroup.x << 4; - indexes.y = indexes.x + DISPATCH_BLOCK; - indexes.z = indexes.y + DISPATCH_BLOCK; - - // Read back the colors in chunks - float3 tonemappedColorSumGamma = 0; - float3 tonemappedColorSumLinear = 0; - float4 r3, r4; - r3.xyz = float3(-1, 0, 0); // x is "minMotionVectorsSquaredLength" - r4.xy = (int2)vThreadIDInGroup.xy; - - //TODO: make for loop? by 9 or 3 - float3 tonemappedColor = float3(g0[r4.y].val[indexes.x / 4], g0[r4.y].val[indexes.x / 4 + 1], g0[r4.y].val[indexes.x / 4 + 2]); - float motionVectorsSquaredLength = g0[r4.y].val[indexes.x / 4 + 3]; - tonemappedColorSumGamma += tonemappedColor; - tonemappedColorSumLinear += linearize(tonemappedColor); // Linearize (not needed with LUMA? Actually was it needed at all in DH2 given render here was still linear?) - r3.xyz = (r3.x < motionVectorsSquaredLength) ? float3(motionVectorsSquaredLength, r4.xy) : r3.xyz; - r4.xyzw = (int4)vThreadIDInGroup.xxyy + int4(1, 2, 0, 0); - - tonemappedColor = float3(g0[r4.w].val[indexes.y / 4], g0[r4.w].val[indexes.y / 4 + 1], g0[r4.w].val[indexes.y / 4 + 2]); - motionVectorsSquaredLength = g0[r4.w].val[indexes.y / 4 + 3]; - tonemappedColorSumGamma += tonemappedColor; - tonemappedColorSumLinear += linearize(tonemappedColor); - r3.xyz = (r3.x < motionVectorsSquaredLength) ? float3(motionVectorsSquaredLength, r4.xw) : r3.xyz; - - tonemappedColor = float3(g0[r4.z].val[indexes.z / 4], g0[r4.z].val[indexes.z / 4 + 1], g0[r4.z].val[indexes.z / 4 + 2]); - motionVectorsSquaredLength = g0[r4.z].val[indexes.z / 4 + 3]; - tonemappedColorSumGamma += tonemappedColor; - tonemappedColorSumLinear += linearize(tonemappedColor); - r3.xyz = (r3.x < motionVectorsSquaredLength) ? float3(motionVectorsSquaredLength, r4.yz) : r3.xyz; - r4.xyzw = (int4)vThreadIDInGroup.xxyy + int4(0, 1, 1, 1); - - tonemappedColor = float3(g0[r4.w].val[indexes.x / 4], g0[r4.w].val[indexes.x / 4 + 1], g0[r4.w].val[indexes.x / 4 + 2]); - motionVectorsSquaredLength = g0[r4.w].val[indexes.x / 4 + 3]; - tonemappedColorSumGamma += tonemappedColor; - tonemappedColorSumLinear += linearize(tonemappedColor); - r3.xyz = (r3.x < motionVectorsSquaredLength) ? float3(motionVectorsSquaredLength, r4.xw) : r3.xyz; - - tonemappedColor = float3(g0[r4.z].val[indexes.y / 4], g0[r4.z].val[indexes.y / 4 + 1], g0[r4.z].val[indexes.y / 4 + 2]); - motionVectorsSquaredLength = g0[r4.z].val[indexes.y / 4 + 3]; - tonemappedColorSumGamma += tonemappedColor; - tonemappedColorSumLinear += linearize(tonemappedColor); - r3.xyz = (r3.x < motionVectorsSquaredLength) ? float3(motionVectorsSquaredLength, r4.yz) : r3.xyz; - r4.xyzw = (int4)vThreadIDInGroup.xxyy + int4(2, 0, 2, 1); - - float3 tonemappedCenterColor = tonemappedColor; - - tonemappedColor = float3(g0[r4.w].val[indexes.z / 4], g0[r4.w].val[indexes.z / 4 + 1], g0[r4.w].val[indexes.z / 4 + 2]); - motionVectorsSquaredLength = g0[r4.w].val[indexes.z / 4 + 3]; - tonemappedColorSumGamma += tonemappedColor; - tonemappedColorSumLinear += linearize(tonemappedColor); - r3.xyz = (r3.x < motionVectorsSquaredLength) ? float3(motionVectorsSquaredLength, r4.xw) : r3.xyz; - - tonemappedColor = float3(g0[r4.z].val[indexes.x / 4], g0[r4.z].val[indexes.x / 4 + 1], g0[r4.z].val[indexes.x / 4 + 2]); - motionVectorsSquaredLength = g0[r4.z].val[indexes.x / 4 + 3]; - tonemappedColorSumGamma += tonemappedColor; - tonemappedColorSumLinear += linearize(tonemappedColor); - r3.xyz = (r3.x < motionVectorsSquaredLength) ? float3(motionVectorsSquaredLength, r4.yz) : r3.xyz; - r4.xyzw = (int4)vThreadIDInGroup.xyxy + int4(1, 2, 2, 2); - - tonemappedColor = float3(g0[r4.y].val[indexes.y / 4], g0[r4.y].val[indexes.y / 4 + 1], g0[r4.y].val[indexes.y / 4 + 2]); - motionVectorsSquaredLength = g0[r4.y].val[indexes.y / 4 + 3]; - tonemappedColorSumGamma += tonemappedColor; - tonemappedColorSumLinear += linearize(tonemappedColor); - r3.xyz = (r3.x < motionVectorsSquaredLength) ? float3(motionVectorsSquaredLength, r4.xy) : r3.xyz; - - tonemappedColor = float3(g0[r4.w].val[indexes.z / 4], g0[r4.w].val[indexes.z / 4 + 1], g0[r4.w].val[indexes.z / 4 + 2]); - motionVectorsSquaredLength = g0[r4.w].val[indexes.z / 4 + 3]; - tonemappedColorSumGamma += tonemappedColor; - tonemappedColorSumLinear += linearize(tonemappedColor); - r3.xyz = (r3.x < motionVectorsSquaredLength) ? float3(motionVectorsSquaredLength, r4.zw) : r3.xyz; - - const uint samples_num = 9; - float4 r0, r1, r2, r6, r7, r8; - r2.w = (uint)r3.y << 3; //TODO: cast int? - float2 motionVectors = float2(g1[r3.z].val[r2.w / 4], g1[r3.z].val[r2.w / 4 + 1]); - float2 jitteredPixelCoords = -motionVectors.xy * cb_taaditherandviewportsize.zw + pixelCoords.xy; - float2 prevRelativeResScale = cb_prevresolutionscale.xy / cb_resolutionscale.xy; - r3.zw = (jitteredPixelCoords * prevRelativeResScale) - 0.5; - r3.zw = floor(r3.zw); - r6.xyzw = float4(0.5, 0.5, -0.5, -0.5) + r3.zwzw; - r0.xy = jitteredPixelCoords * prevRelativeResScale - r6.xy; - r3.xy = sqr(r0.yx); - r7.xy = r3.xy * r0.yx; - r7.zw = r3.yx * r0.xy + r0.xy; - r7.zw = -r7.zw * 0.5 + r3.yx; - r8.xy = 2.5 * r3.yx; - r7.xy = r7.yx * 1.5 - r8.xy; - r7.xy = 1 + r7.xy; - r0.xy = r3.xy * r0.yx - r3.xy; - r3.xy = 0.5 * r0.xy; - r8.xy = 1 - r7.wz; - r8.xy = r8.xy - r7.yx; - r0.xy = -r0.xy * 0.5 + r8.xy; - r7.xy = r7.xy + r0.yx; - r0.xy = r0.xy / r7.yx; - r0.xy = r6.xy + r0.xy; - r3.zw = 2.5 + r3.zw; - r6.xw = cb_taatexsize.zw * r6.zw; - r6.yz = cb_taatexsize.wz * r0.yx; - r8.xy = cb_taatexsize.zw * r3.zw; - const float2 maxUV = -cb_positiontoviewtexture.zw * 0.5 + cb_prevresolutionscale.xy; - - float historyTotalColorLuminance = 0; - float historyColorMinLuminance = 1.00000003e+032; - float historyColorMaxLuminance = -1.00000003e+032; - float3 historyColorTMSum = 0; - [unroll] - for (uint i = 0; i < samples_num; i++) - { - float2 historyUV; - float localExposure; - switch (i) - { - default: - case 0: - historyUV = r6.xw; - localExposure = r7.w * r7.z; - break; - case 1: - historyUV = r6.zw; - localExposure = r7.y * r7.z; - break; - case 2: - historyUV = float2(r8.x, r6.w); - localExposure = r3.x * r7.z; - break; - case 3: - historyUV = r6.xy; - localExposure = r7.x * r7.w; - break; - case 4: - historyUV = r6.zy; - localExposure = r7.y * r7.x; - break; - case 5: - historyUV = float2(r8.x, r6.y); - localExposure = r7.x * r3.x; - break; - case 6: - historyUV = float2(r6.x, r8.y); - localExposure = r3.y * r7.w; - break; - case 7: - historyUV = float2(r6.z, r8.y); - localExposure = r7.y * r3.y; - break; - case 8: - historyUV = r8.xy; - localExposure = r3.x * r3.y; - break; - } - historyUV = clamp(cb_prevresolutionscale.xy * historyUV, 0.0, maxUV.xy); - float3 historyColor = ro_taahistory_read.SampleLevel(smp_linearclamp_s, historyUV, 0).xyz; - float3 historyColorTM = historyColor * (cb_usecompressedhdrbuffers ? exposure_view_white_level : 1.0); - historyColorTMSum += historyColorTM * localExposure; - float historyColorLuminance = GetLuminance(historyColorTM); - historyTotalColorLuminance += historyColorLuminance; - historyColorMinLuminance = min(historyColorMinLuminance, historyColorLuminance); - historyColorMaxLuminance = max(historyColorMaxLuminance, historyColorLuminance); - } - - // Lower film grain (up to zero) as we get closer to white -#if ALLOW_HDR_DITHER - dither = (dither * clamp(abs(1.0 - GetLuminance(tonemappedCenterColor)), 0.1, 1.0)) + 1.0; // LUMA: allow a bit of film grain even on white and beyond! -#else - dither = (dither * saturate(1.0 - GetLuminance(tonemappedCenterColor))) + 1.0; // Original version made HDR "compatible" -#endif -#if DISABLE_DITHER - dither = 1.0; -#endif +float3 sample_catmull_rom_aprox(Texture2D tex, SamplerState smp, float2 uv) +{ + const float2 f = frac(uv * VIEWPORT_SIZE - 0.5); + const float2 tc = uv - f * INV_VIEWPORT_SIZE; + const float2 f2 = f * f; + const float2 f3 = f2 * f; + + // Catmull-Rom weights. + const float2 w0 = f2 - 0.5 * (f3 + f); + const float2 w1 = 1.5 * f3 - 2.5 * f2 + 1.0; + const float2 w3 = 0.5 * (f3 - f2); + const float2 w2 = 1.0 - w0 - w1 - w3; + const float2 w12 = w1 + w2; + + // Texel coords. + const float2 tc0 = tc - 1.0 * INV_VIEWPORT_SIZE; + const float2 tc3 = tc + 2.0 * INV_VIEWPORT_SIZE; + const float2 tc12 = tc + w2 / w12 * INV_VIEWPORT_SIZE; + + // Combined weights. + const float w12w0 = w12.x * w0.y; + const float w0w12 = w0.x * w12.y; + const float w12w12 = w12.x * w12.y; + const float w3w12 = w3.x * w12.y; + const float w12w3 = w12.x * w3.y; + + float3 c = tex.SampleLevel(smp, float2(tc12.x, tc0.y), 0.0).xyz * w12w0; + c += tex.SampleLevel(smp, float2(tc0.x, tc12.y), 0.0).xyz * w0w12; + c += tex.SampleLevel(smp, tc12, 0.0).xyz * w12w12; + c += tex.SampleLevel(smp, float2(tc3.x, tc12.y), 0.0).xyz * w3w12; + c += tex.SampleLevel(smp, float2(tc12.x, tc3.y), 0.0).xyz * w12w3; + + // Normalize. + c *= rcp(w12w0 + w0w12 + w12w12 + w3w12 + w12w3); + + return c; +} - // Scale the sums of samples we summed up to normalize it - float3 tonemappedColorAverageGamma = tonemappedColorSumGamma / samples_num; - float3 tonemappedColorAverageLinear = tonemappedColorSumLinear / samples_num; - float3 linearMinusGamma = tonemappedColorAverageLinear - linearize(tonemappedColorAverageGamma); -#if !DISABLE_CLAMP // Disable clamping and gammification - linearMinusGamma = max(0.0, linearMinusGamma); - linearMinusGamma = sqrt(linearMinusGamma); -#else - linearMinusGamma = sqrt(abs(linearMinusGamma)) * sign(linearMinusGamma); // Made safe for negative values -#endif +float3 clip_to_aabb(float3 color, float3 minc, float3 maxc) +{ + const float3 center = (minc + maxc) * 0.5; + const float3 extent = (maxc - minc) * 0.5 + 1e-3; + const float3 offset = color - center; + const float3 units = abs(offset * rcp(extent)); + const float max_unit = max(max(units.x, units.y), max(units.z, 1.0)); + return center + offset * rcp(max_unit); +} - float3 someTMColor1 = min(tonemappedCenterColor * dither, tonemappedColorAverageGamma - linearMinusGamma); - float3 someTMColor2 = max(tonemappedCenterColor * dither, tonemappedColorAverageGamma + linearMinusGamma); - const float minLuminance = 0.1f; - float colorLuminanceDiff = max(minLuminance, historyColorMaxLuminance - historyColorMinLuminance); - float colorSomeLuminance = max(minLuminance, GetLuminance(tonemappedColorAverageGamma)); - float taaHistoryAmount = (max(minLuminance, historyTotalColorLuminance / samples_num) * max(minLuminance, GetLuminance(someTMColor2) - GetLuminance(someTMColor1))) / (colorLuminanceDiff * colorSomeLuminance); - taaHistoryAmount = saturate(-1.f + taaHistoryAmount); - taaHistoryAmount = (taaHistoryAmount * 2.f - 3.f) * sqr(taaHistoryAmount) + 1.f; - float inverseTaaHistoryAmount = cb_taaamount * taaHistoryAmount + (1.f / 100.f); - float3 semiFinalTAAColor = lerp(clamp(historyColorTMSum, someTMColor1, someTMColor2), tonemappedCenterColor * dither, inverseTaaHistoryAmount); - float3 finalTAAColor = semiFinalTAAColor / (cb_usecompressedhdrbuffers ? exposure_view_white_level : 1.f); - rw_taahistory_write[vThreadID.xy] = finalTAAColor; - rw_taaresult[vThreadID.xy] = (vanillaTonemap(semiFinalTAAColor, true) / biased_exposure) / (cb_usecompressedhdrbuffers ? exposure_view_white_level : 1.f); - //rw_taaresult[vThreadID.xy] = tonemappedCenterColor; - //rw_taaresult[vThreadID.xy] = tonemappedColorAverageGamma * 7; - //rw_taaresult[vThreadID.xy] = tonemappedColorSumLinear * 3; -#endif +[numthreads(16, 16, 1)] +void main(uint3 dtid : SV_DispatchThreadID) +{ + // Neighborhood offsets. + const int2 offsets[8] = { + int2(-1, -1), + int2(0, -1), + int2(1, -1), + int2(-1, 0), + int2(1, 0), + int2(-1, 1), + int2(0, 1), + int2(1, 1) + }; + + const float2 uv = (dtid.xy + 0.5) * INV_VIEWPORT_SIZE; + + // From the game's native TAA. + const float r0z = ro_postfx_luminance_buffautoexposure[cb_postfx_luminance_exposureindex.y].EngineLuminanceFactor; + float r0w = exp2(-cb_postfx_luminance_customevbias); + r0w = r0z * r0w; // Exposure? + const float r1w = cb_view_white_level * r0z; // Compression factor? + + float4 color = ro_viewcolormap.Load(int3(dtid.xy, 0)).xyzw; + color.xyz = pre_tonemap(color.xyz, r0w, r1w); + color.xyz = tonemap(color.xyz); + color.xyz = rgb_to_ycocg(color.xyz); + + // Find the longest motion vector and + // calculate variance box in 3x3 neighborhood. + // + + // The longest motion vector. + float2 longest_mv = ro_motionvectors.Load(int3(dtid.xy, 0)).xy; + + // Variance box. + float3 m1 = color.xyz; + float3 m2 = color.xyz * color.xyz; + float3 minn = color.xyz; + float3 maxn = color.xyz; + + [unroll] + for (int i = 0; i < 8; ++i) { + // The longest motion vector. + const float2 mv = ro_motionvectors.Load(int3(dtid.xy + offsets[i], 0)).xy; + longest_mv = dot(mv, mv) > dot(longest_mv, longest_mv) ? mv : longest_mv; + + // Variance box. + float3 c = ro_viewcolormap.Load(int3(dtid.xy + offsets[i], 0)).xyz; + c = pre_tonemap(c, r0w, r1w); + c = tonemap(c); + c = rgb_to_ycocg(c); + m1 += c; + m2 += c * c; + minn = min(minn, c); + maxn = max(maxn, c); + } + + // Variance box. + m1 /= 9.0; + m2 /= 9.0; + const float3 sigma = sqrt(max(0.0, m2 - m1 * m1)); + const float velocity = length(longest_mv * VIEWPORT_SIZE); + const float gamma = lerp(1.5, 0.75, saturate(velocity / 20.0)); + const float3 minc = m1 - gamma * sigma; + const float3 maxc = m1 + gamma * sigma; + + // + + // Sample history. + const float2 prev_uv = uv - longest_mv; + float3 history = sample_catmull_rom_aprox(ro_taahistory_read, smp_linearclamp_s, prev_uv); + history = tonemap(history); + history = rgb_to_ycocg(history); + + // Clip history. + history = clamp(history, minn, maxn); + history = clip_to_aabb(history, minc, maxc); + + // Calculate the alpha (final blend). + float alpha = lerp(MIN_ALPHA, 0.2, saturate(velocity / 30.0)); + alpha = max(alpha, saturate(0.01 * history.x * rcp(abs(color.x - history.x)))); + + // The final color. + color.xyz = lerp(history, color.xyz, alpha); + + color.xyz = ycocg_to_rgb(color.xyz); + color.xyz = inv_tonemap(color.xyz); + rw_taahistory_write[dtid.xy] = float4(color.xyz, 1.0); + color.xyz = post_tonemap(color.xyz, r0w, r1w); + rw_taaresult[dtid.xy] = color; } \ No newline at end of file