diff --git a/CMakeLists.txt b/CMakeLists.txt index 5f83f19def..605ff4786c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ option(IGL_WITH_OPENGLES "Enable IGL/OpenGL ES" OFF) option(IGL_WITH_VULKAN "Enable IGL/Vulkan" ON) option(IGL_WITH_METAL "Enable IGL/Metal" ON) option(IGL_WITH_WEBGL "Enable IGL/WebGL" OFF) +option(IGL_WITH_D3D12 "Enable IGL/DirectX 12" OFF) option(IGL_WITH_IGLU "Enable IGLU utils" ON) option(IGL_WITH_SHELL "Enable Shell utils" ON) @@ -49,6 +50,10 @@ if(NOT APPLE) set(IGL_WITH_METAL OFF) endif() +if(NOT WIN32) + set(IGL_WITH_D3D12 OFF) +endif() + if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") # disable for all targets due to warnings in third-party code add_definitions(-Wno-nullability-completeness) @@ -100,6 +105,7 @@ message(STATUS "IGL_WITH_OPENGLES = ${IGL_WITH_OPENGLES}") message(STATUS "IGL_WITH_VULKAN = ${IGL_WITH_VULKAN}") message(STATUS "IGL_WITH_METAL = ${IGL_WITH_METAL}") message(STATUS "IGL_WITH_WEBGL = ${IGL_WITH_WEBGL}") +message(STATUS "IGL_WITH_D3D12 = ${IGL_WITH_D3D12}") message(STATUS "IGL_WITH_IGLU = ${IGL_WITH_IGLU}") message(STATUS "IGL_WITH_SHELL = ${IGL_WITH_SHELL}") @@ -120,8 +126,8 @@ if(APPLE) message(FATAL_ERROR "At least one rendering backend should be defined (OpenGL, Vulkan or Metal).") endif() else() - if(NOT (IGL_WITH_OPENGL OR IGL_WITH_VULKAN OR IGL_WITH_OPENGLES OR IGL_WITH_WEBGL)) - message(FATAL_ERROR "At least one rendering backend should be defined (OpenGL or Vulkan).") + if(NOT (IGL_WITH_OPENGL OR IGL_WITH_VULKAN OR IGL_WITH_OPENGLES OR IGL_WITH_WEBGL OR IGL_WITH_D3D12)) + message(FATAL_ERROR "At least one rendering backend should be defined (OpenGL, Vulkan, or DirectX 12).") endif() endif() @@ -193,6 +199,16 @@ if(IGL_WITH_OPENXR) igl_set_folder(openxr_loader "third-party/OpenXR") endif() +if(WIN32 AND IGL_WITH_D3D12) + set(DIRECTX_HEADERS_ROOT "${IGL_ROOT_DIR}/third-party/deps/src/DirectX-Headers") + if(EXISTS "${DIRECTX_HEADERS_ROOT}/CMakeLists.txt") + add_subdirectory("${DIRECTX_HEADERS_ROOT}" "${CMAKE_BINARY_DIR}/DirectX-Headers") + set(DIRECTX_HEADERS_INCLUDE_DIR "${DIRECTX_HEADERS_ROOT}/include/directx") + else() + message(FATAL_ERROR "DirectX-Headers dependency not found. Run deploy_deps.py to download third-party/deps/src/DirectX-Headers.") + endif() +endif() + add_subdirectory(src/igl) if(IGL_WITH_TRACY) @@ -234,11 +250,20 @@ endif() if(APPLE AND IGL_WITH_METAL) target_compile_definitions(IGLLibrary PUBLIC "IGL_BACKEND_ENABLE_METAL=1") endif() +if(WIN32 AND IGL_WITH_D3D12) + target_compile_definitions(IGLLibrary PUBLIC "IGL_BACKEND_ENABLE_D3D12=1") +endif() target_compile_definitions(IGLLibrary PUBLIC "IGL_CMAKE_BUILD=1") include_directories(.) +# Enable CTest at top-level when tests are requested so `ctest` can discover tests +if(IGL_WITH_TESTS) + include(CTest) + enable_testing() +endif() + if(IGL_WITH_IGLU OR IGL_WITH_SAMPLES) add_library(IGLstb third-party/deps/patches/stb_impl/stb_image.c third-party/deps/patches/stb_impl/stb_image_resize.c third-party/deps/patches/stb_impl/stb_image_write.c) diff --git a/samples/desktop/CMakeLists.txt b/samples/desktop/CMakeLists.txt index d4a2d579db..48ff15312c 100644 --- a/samples/desktop/CMakeLists.txt +++ b/samples/desktop/CMakeLists.txt @@ -35,19 +35,52 @@ macro(ADD_DEMO app) target_link_libraries(${app} PRIVATE EGL) endif() target_link_libraries(${app} PRIVATE IGLstb) + + # For D3D12 builds on Windows, ensure dxil.dll is deployed next to sample + # executables so that DXC/DXIL validation and signed DXIL shaders work in + # both Debug and Release configurations. This mirrors the behavior used for + # render sessions (shell/windows/CMakeLists.txt) and unit tests + # (test_all_unittests.bat). + if(IGL_WITH_D3D12 AND WIN32 AND MSVC) + find_file(DXIL_DLL_FOR_${app} + NAMES dxil.dll + PATHS + "C:/Program Files (x86)/Windows Kits/10/bin/10.0.22621.0/x64" + "C:/Program Files (x86)/Windows Kits/10/bin/10.0.22000.0/x64" + "C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x64" + "$ENV{WindowsSdkBinPath}/x64" + NO_DEFAULT_PATH + ) + if(DXIL_DLL_FOR_${app}) + add_custom_command(TARGET ${app} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${DXIL_DLL_FOR_${app}}" + "$/" + COMMENT "Copying dxil.dll for ${app}" + ) + endif() + endif() endmacro() -add_demo("Tiny") +if(IGL_WITH_OPENGL OR IGL_WITH_VULKAN) + add_demo("Tiny") +endif() if(IGL_WITH_VULKAN) # this demo app does not work without Vulkan (yet) add_demo("Tiny_Mesh") endif() -add_demo("Tiny_MeshLarge") +# Tiny_MeshLarge can run on Vulkan/OpenGL; expose it for D3D12 configs too so the binary is available. +if(IGL_WITH_OPENGL OR IGL_WITH_VULKAN OR IGL_WITH_D3D12) + add_demo("Tiny_MeshLarge") -target_sources(Tiny_MeshLarge - PUBLIC "${IGL_ROOT_DIR}/third-party/deps/src/3D-Graphics-Rendering-Cookbook/shared/UtilsCubemap.cpp") -if(NOT IGL_WITH_VULKAN) - target_sources(Tiny_MeshLarge PUBLIC "${IGL_ROOT_DIR}/src/igl/vulkan/util/TextureFormat.cpp") + target_sources( + Tiny_MeshLarge + PUBLIC "${IGL_ROOT_DIR}/third-party/deps/src/3D-Graphics-Rendering-Cookbook/shared/UtilsCubemap.cpp") + if(NOT IGL_WITH_VULKAN) + target_sources(Tiny_MeshLarge PUBLIC "${IGL_ROOT_DIR}/src/igl/vulkan/util/TextureFormat.cpp") + endif() +else() + message(STATUS "Skipping Tiny_MeshLarge: no compatible backend enabled (needs OpenGL/Vulkan/D3D12)") endif() diff --git a/shell/CMakeLists.txt b/shell/CMakeLists.txt index ef205a131a..63c974c1ba 100644 --- a/shell/CMakeLists.txt +++ b/shell/CMakeLists.txt @@ -82,6 +82,7 @@ if(IGL_WITH_OPENXR) add_subdirectory(openxr) endif() + macro(ADD_SHELL_SESSION target libs) set(shell_srcs apps/SessionApp.cpp renderSessions/${target}.cpp renderSessions/${target}.h) add_shell_session_with_srcs(${target} "${shell_srcs}" "${libs}") diff --git a/shell/windows/CMakeLists.txt b/shell/windows/CMakeLists.txt index b0ef48b155..f4fd278fed 100644 --- a/shell/windows/CMakeLists.txt +++ b/shell/windows/CMakeLists.txt @@ -41,6 +41,12 @@ endif() if(IGL_WITH_OPENGLES) add_shell_app(opengles) endif() +# Only add D3D12 shell app if the sources are present (removed in this branch) +set(IGL_D3D12_APP_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/../windows/d3d12/App.cpp") +if(IGL_WITH_D3D12 AND EXISTS "${IGL_D3D12_APP_SOURCE}") + add_shell_app(d3d12) + target_link_libraries(IGLShellApp_d3d12 PUBLIC IGLD3D12) +endif() function(ADD_SHELL_SESSION_BACKEND targetApp backend srcs libs) set(target ${targetApp}_${backend}) @@ -50,6 +56,27 @@ function(ADD_SHELL_SESSION_BACKEND targetApp backend srcs libs) target_compile_definitions(${target} PRIVATE "IGL_SHELL_SESSION=${targetApp}") target_link_libraries(${target} PUBLIC ${libs}) target_link_libraries(${target} PUBLIC IGLShellApp_${backend}) + + # Copy dxil.dll for D3D12 executables (required for DXIL signing) + if(backend STREQUAL "d3d12" AND WIN32 AND MSVC) + find_file(DXIL_DLL_FOR_${target} + NAMES dxil.dll + PATHS + "C:/Program Files (x86)/Windows Kits/10/bin/10.0.22621.0/x64" + "C:/Program Files (x86)/Windows Kits/10/bin/10.0.22000.0/x64" + "C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x64" + "$ENV{WindowsSdkBinPath}/x64" + NO_DEFAULT_PATH + ) + if(DXIL_DLL_FOR_${target}) + add_custom_command(TARGET ${target} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${DXIL_DLL_FOR_${target}}" + "$/" + COMMENT "Copying dxil.dll for ${target}" + ) + endif() + endif() endfunction() function(ADD_SHELL_SESSION_BACKEND_OPENXR_SIM targetApp backend srcs libs compileDefs) @@ -72,4 +99,7 @@ macro(ADD_SHELL_SESSION_WITH_SRCS target srcs libs) if(IGL_WITH_OPENGLES) add_shell_session_backend(${target} opengles "${srcs}" "${libs}") endif() + if(IGL_WITH_D3D12 AND TARGET IGLShellApp_d3d12) + add_shell_session_backend(${target} d3d12 "${srcs}" "${libs}") + endif() endmacro() diff --git a/src/igl/Buffer.h b/src/igl/Buffer.h index 38d2c7e008..44ab481873 100644 --- a/src/igl/Buffer.h +++ b/src/igl/Buffer.h @@ -80,6 +80,21 @@ struct BufferDesc { /** @brief Identifier used for debugging */ std::string debugName; + /** + * @brief Element stride in bytes for storage buffers. + * + * For buffers created with BufferTypeBits::Storage, this describes the size of a single + * structured element when the buffer is viewed as a StructuredBuffer / RWStructuredBuffer. + * + * Backends that create structured SRV/UAV views (such as D3D12) use this value to populate + * D3D12_BUFFER_SRV / D3D12_BUFFER_UAV StructureByteStride and to compute NumElements. + * + * A value of 0 means "unknown/unspecified" and backends may fall back to a default + * element size (typically 4 bytes) for compatibility with existing code that assumes + * float / uint elements. + */ + size_t storageStride = 0; + BufferDesc(BufferType type = 0, const void* IGL_NULLABLE data = nullptr, size_t length = 0, diff --git a/src/igl/CMakeLists.txt b/src/igl/CMakeLists.txt index 27c02d3f54..acf1e99f77 100644 --- a/src/igl/CMakeLists.txt +++ b/src/igl/CMakeLists.txt @@ -63,9 +63,16 @@ if(IGL_WITH_METAL) target_link_libraries(IGLLibrary PUBLIC IGLMetal) endif() +if(IGL_WITH_D3D12) + add_subdirectory(d3d12) + target_link_libraries(IGLLibrary PUBLIC IGLD3D12) +endif() + # OpenGL tests use GLES on Windows and we do not use Angle with CMake - so OGL # tests are disabled for now on Windows -if(IGL_WITH_TESTS AND IGL_WITH_IGLU AND (IGL_WITH_VULKAN OR (NOT WIN32))) +# Enable tests when requested. On Windows, allow tests if either Vulkan or D3D12 is enabled +# (previously required Vulkan on Windows, which blocked D3D12-only test runs). +if(IGL_WITH_TESTS AND IGL_WITH_IGLU AND (IGL_WITH_VULKAN OR IGL_WITH_D3D12 OR (NOT WIN32))) add_subdirectory(tests) if((IGL_WITH_OPENGL OR IGL_WITH_OPENGLES) AND NOT APPLE) target_sources(IGLTests PRIVATE opengl/egl/Context.cpp opengl/egl/Device.cpp opengl/egl/HWDevice.cpp diff --git a/src/igl/Common.h b/src/igl/Common.h index a5d505d311..127f4e4f67 100644 --- a/src/igl/Common.h +++ b/src/igl/Common.h @@ -28,7 +28,14 @@ using Deleter = void (*)(void* IGL_NULLABLE); /// Device Capabilities or Metal Features constexpr uint32_t IGL_TEXTURE_SAMPLERS_MAX = 16; -constexpr uint32_t IGL_VERTEX_ATTRIBUTES_MAX = 24; + +// Maximum vertex attributes across all backends +// - D3D12: D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT = 32 +// - Vulkan: VkPhysicalDeviceLimits::maxVertexInputAttributes (typically >= 16, commonly 32) +// - Metal: 31 (Metal Feature Set Tables) +// - OpenGL: GL_MAX_VERTEX_ATTRIBS (typically >= 16) +// Setting to 32 ensures compatibility with D3D12 (the most widely-supported modern API) +constexpr uint32_t IGL_VERTEX_ATTRIBUTES_MAX = 32; // maximum number of buffers that can be bound to a shader stage // See maximum number of entries in the buffer argument table, per graphics or kernel function @@ -136,6 +143,7 @@ enum class BackendType { OpenGL, Metal, Vulkan, + D3D12, // @fb-only Custom, }; @@ -146,6 +154,7 @@ enum class BackendFlavor : uint8_t { OpenGL_ES, Metal, Vulkan, + D3D12, // @fb-only }; diff --git a/src/igl/Device.cpp b/src/igl/Device.cpp index 027a990be7..a10831b942 100644 --- a/src/igl/Device.cpp +++ b/src/igl/Device.cpp @@ -50,6 +50,8 @@ Color IDevice::backendDebugColor() const noexcept { return {1.f, 0.f, 1.f, 1.f}; case BackendType::Vulkan: return {0.f, 1.f, 1.f, 1.f}; + case BackendType::D3D12: + return {0.f, 1.f, 1.f, 1.f}; // Match Vulkan for parity testing // @fb-only // @fb-only case BackendType::Custom: diff --git a/src/igl/DeviceFeatures.h b/src/igl/DeviceFeatures.h index 0a065e09f1..8c7fb77cbc 100644 --- a/src/igl/DeviceFeatures.h +++ b/src/igl/DeviceFeatures.h @@ -154,18 +154,30 @@ enum class DeviceRequirement { * @brief DeviceFeatureLimits provides specific limitations on certain features supported on the * device * - * BufferAlignment Required byte alignment for buffer data - * BufferNoCopyAlignment Required byte alignment for no copy buffer data - * MaxBindBytesBytes Maximum number of bytes that can be bound with bindBytes - * MaxCubeMapDimension Maximum cube map dimensions - * MaxFragmentUniformVectors Maximum fragment uniform vectors - * MaxMultisampleCount Maximum number of samples - * MaxPushConstantBytes Maximum number of bytes for Push Constants - * MaxTextureDimension1D2D Maximum texture dimensions - * MaxUniformBufferBytes Maximum number of bytes for a uniform buffer - * MaxStorageBufferBytes Maximum number of bytes for storage buffers - * MaxVertexUniformVectors Maximum vertex uniform vectors - * PushConstantsAlignment Required byte alignment for push constants data + * BufferAlignment Required byte alignment for buffer data + * BufferNoCopyAlignment Required byte alignment for no copy buffer data + * MaxBindBytesBytes Maximum number of bytes that can be bound with bindBytes + * MaxCubeMapDimension Maximum cube map dimensions + * MaxFragmentUniformVectors Maximum fragment uniform vectors + * MaxMultisampleCount Maximum number of samples + * MaxPushConstantBytes Maximum number of bytes for Push Constants + * MaxTextureDimension1D2D Maximum texture dimensions for 1D and 2D textures + * MaxTextureDimension3D Maximum texture dimensions for 3D textures + * MaxStorageBufferBytes Maximum number of bytes for storage buffers + * MaxUniformBufferBytes Maximum number of bytes for a uniform buffer + * MaxVertexUniformVectors Maximum vertex uniform vectors + * PushConstantsAlignment Required byte alignment for push constants data + * ShaderStorageBufferOffsetAlignment Required byte alignment for shader storage buffer offset + * MaxComputeWorkGroupSizeX Maximum compute work group size in X dimension + * MaxComputeWorkGroupSizeY Maximum compute work group size in Y dimension + * MaxComputeWorkGroupSizeZ Maximum compute work group size in Z dimension + * MaxComputeWorkGroupInvocations Maximum total compute work group invocations + * MaxVertexInputAttributes Maximum number of vertex input attributes + * MaxColorAttachments Maximum number of color attachments (render targets) + * MaxDescriptorHeapCbvSrvUav Maximum CBV/SRV/UAV descriptors in shader-visible heap (I-005) + * MaxDescriptorHeapSamplers Maximum sampler descriptors in shader-visible heap (I-005) + * MaxDescriptorHeapRtvs Maximum RTV descriptors in CPU-visible heap (I-005) + * MaxDescriptorHeapDsvs Maximum DSV descriptors in CPU-visible heap (I-005) */ enum class DeviceFeatureLimits { BufferAlignment = 0, @@ -176,11 +188,23 @@ enum class DeviceFeatureLimits { MaxMultisampleCount, MaxPushConstantBytes, MaxTextureDimension1D2D, + MaxTextureDimension3D, MaxStorageBufferBytes, MaxUniformBufferBytes, MaxVertexUniformVectors, PushConstantsAlignment, ShaderStorageBufferOffsetAlignment, + MaxComputeWorkGroupSizeX, + MaxComputeWorkGroupSizeY, + MaxComputeWorkGroupSizeZ, + MaxComputeWorkGroupInvocations, + MaxVertexInputAttributes, + MaxColorAttachments, + // I-005: Descriptor heap size limits for cross-platform compatibility + MaxDescriptorHeapCbvSrvUav, + MaxDescriptorHeapSamplers, + MaxDescriptorHeapRtvs, + MaxDescriptorHeapDsvs, }; /** @@ -192,7 +216,7 @@ enum class DeviceFeatureLimits { * Metal Metal API (macOS, iOS, etc.) * SpirV Standard Portable Intermediate Representation open standard format */ -enum class ShaderFamily : uint8_t { Unknown, Glsl, GlslEs, Metal, SpirV }; +enum class ShaderFamily : uint8_t { Unknown, Glsl, GlslEs, Metal, SpirV, Hlsl }; /** * @brief ShaderVersion provides information on the shader family type and version diff --git a/src/igl/PlatformDevice.h b/src/igl/PlatformDevice.h index 087df19d24..03e1d86edc 100644 --- a/src/igl/PlatformDevice.h +++ b/src/igl/PlatformDevice.h @@ -27,6 +27,7 @@ enum class PlatformDeviceType { OpenGLMacOS, OpenGLWebGL, Vulkan, + D3D12, // @fb-only }; diff --git a/src/igl/d3d12/Buffer.cpp b/src/igl/d3d12/Buffer.cpp new file mode 100644 index 0000000000..e9ae928303 --- /dev/null +++ b/src/igl/d3d12/Buffer.cpp @@ -0,0 +1,601 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +namespace { +constexpr D3D12_RESOURCE_DESC makeBufferDesc(UINT64 size, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE) { + D3D12_RESOURCE_DESC desc = {}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Alignment = 0; + desc.Width = size; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.SampleDesc.Quality = 0; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + desc.Flags = flags; + return desc; +} +} // namespace + +Buffer::Buffer(Device& device, + igl::d3d12::ComPtr resource, + const BufferDesc& desc, + D3D12_RESOURCE_STATES initialState) + : device_(&device), + resource_(std::move(resource)), + desc_(desc), + defaultState_(computeDefaultState(desc)), + currentState_(initialState) { + // Determine storage type based on heap properties + if (resource_.Get()) { + D3D12_HEAP_PROPERTIES heapProps; + D3D12_HEAP_FLAGS heapFlags; + resource_->GetHeapProperties(&heapProps, &heapFlags); + + if (heapProps.Type == D3D12_HEAP_TYPE_UPLOAD) { + storage_ = ResourceStorage::Shared; + } else if (heapProps.Type == D3D12_HEAP_TYPE_READBACK) { + storage_ = ResourceStorage::Shared; + } else { + storage_ = ResourceStorage::Private; + } + + if (storage_ != ResourceStorage::Private) { + currentState_ = D3D12_RESOURCE_STATE_GENERIC_READ; + } + + // Track resource creation + D3D12Context::trackResourceCreation("Buffer", desc_.length); + } +} + +Buffer::~Buffer() { + if (resource_.Get()) { + // Track resource destruction + D3D12Context::trackResourceDestruction("Buffer", desc_.length); + } + if (mappedPtr_) { + unmap(); + } +} + +Result Buffer::upload(const void* data, const BufferRange& range) { + if (resource_.Get() == nullptr) { + return Result(Result::Code::ArgumentInvalid, "Buffer resource is null"); + } + + if (!data) { + IGL_LOG_ERROR("Buffer::upload: data is NULL!\n"); + return Result(Result::Code::ArgumentInvalid, "Upload data is null"); + } + + // Validate range + if (range.size == 0 || range.offset + range.size > desc_.length) { + return Result(Result::Code::ArgumentOutOfRange, "Upload range is out of bounds"); + } + + // For UPLOAD heap, map, copy, unmap + if (storage_ == ResourceStorage::Shared) { + void* mappedData = nullptr; + D3D12_RANGE readRange = {0, 0}; // Not reading from GPU + + HRESULT hr = resource_->Map(0, &readRange, &mappedData); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to map buffer"); + } + + uint8_t* dest = static_cast(mappedData) + range.offset; + std::memcpy(dest, data, range.size); + + D3D12_RANGE writtenRange = {range.offset, range.offset + range.size}; + resource_->Unmap(0, &writtenRange); + + return Result(Result::Code::Ok); + } + + // For DEFAULT heap, need upload via intermediate buffer + if (!device_) { + return Result(Result::Code::RuntimeError, "Buffer device is null"); + } + + auto& ctx = device_->getD3D12Context(); + ID3D12Device* d3dDevice = ctx.getDevice(); + ID3D12CommandQueue* queue = ctx.getCommandQueue(); + if (!d3dDevice || !queue) { + return Result(Result::Code::RuntimeError, "D3D12 device or command queue unavailable"); + } + + // Reclaim completed upload buffers before allocating new ones. + device_->processCompletedUploads(); + + UploadRingBuffer* ringBuffer = device_->getUploadRingBuffer(); + UploadRingBuffer::Allocation ringAllocation; + bool useRingBuffer = false; + + // Get fence value that will signal when this upload completes + const UINT64 uploadFenceValue = device_->getNextUploadFenceValue(); + + if (ringBuffer) { + // D3D12 requires 256-byte alignment (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT) + ringAllocation = ringBuffer->allocate(range.size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, uploadFenceValue); + + if (ringAllocation.valid) { + // Successfully allocated from ring buffer + std::memcpy(ringAllocation.cpuAddress, data, range.size); + useRingBuffer = true; + } + } + + // Fallback: create temporary upload buffer if ring buffer allocation failed + igl::d3d12::ComPtr uploadBuffer; + HRESULT hr = S_OK; + + if (!useRingBuffer) { + D3D12_HEAP_PROPERTIES uploadHeapProps = {}; + uploadHeapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + const auto uploadDesc = makeBufferDesc(range.size); + hr = d3dDevice->CreateCommittedResource(&uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &uploadDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(uploadBuffer.GetAddressOf())); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to create upload buffer"); + } + + void* mapped = nullptr; + D3D12_RANGE rr = {0, 0}; + hr = uploadBuffer->Map(0, &rr, &mapped); + if (FAILED(hr) || mapped == nullptr) { + return Result(Result::Code::RuntimeError, "Failed to map upload buffer"); + } + std::memcpy(mapped, data, range.size); + uploadBuffer->Unmap(0, nullptr); + } + + igl::d3d12::ComPtr allocator = device_->getUploadCommandAllocator(); + if (!allocator.Get()) { + return Result(Result::Code::RuntimeError, "Failed to get command allocator from pool"); + } + + igl::d3d12::ComPtr cmdList; + hr = d3dDevice->CreateCommandList(0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + allocator.Get(), + nullptr, + IID_PPV_ARGS(cmdList.GetAddressOf())); + if (FAILED(hr)) { + // Return allocator to pool with fence value 0 (immediately available) + device_->returnUploadCommandAllocator(allocator, 0); + return Result(Result::Code::RuntimeError, "Failed to create command list for upload"); + } + + if (currentState_ != D3D12_RESOURCE_STATE_COPY_DEST) { + // Validate state transition and insert intermediate state if needed + const bool needsIntermediate = !D3D12StateTransition::isLegalDirectTransition( + currentState_, D3D12_RESOURCE_STATE_COPY_DEST); + + if (needsIntermediate) { + // Transition to COMMON first + D3D12_RESOURCE_BARRIER toCommon = {}; + toCommon.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toCommon.Transition.pResource = resource_.Get(); + toCommon.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toCommon.Transition.StateBefore = currentState_; + toCommon.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; + cmdList->ResourceBarrier(1, &toCommon); + currentState_ = D3D12_RESOURCE_STATE_COMMON; + } + + // Now transition to COPY_DEST (guaranteed legal from COMMON or if direct was legal) + D3D12_RESOURCE_BARRIER toCopyDest = {}; + toCopyDest.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toCopyDest.Transition.pResource = resource_.Get(); + toCopyDest.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toCopyDest.Transition.StateBefore = currentState_; + toCopyDest.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + cmdList->ResourceBarrier(1, &toCopyDest); + } + + // Copy from either ring buffer or temporary upload buffer + if (useRingBuffer) { + cmdList->CopyBufferRegion(resource_.Get(), range.offset, + ringBuffer->getUploadHeap(), ringAllocation.offset, + range.size); + } else { + cmdList->CopyBufferRegion(resource_.Get(), range.offset, uploadBuffer.Get(), 0, range.size); + } + + // Prepare state transition barriers but defer state update until after GPU completes + D3D12_RESOURCE_STATES postState = + (defaultState_ == D3D12_RESOURCE_STATE_COMMON) ? D3D12_RESOURCE_STATE_GENERIC_READ : defaultState_; + + if (postState != D3D12_RESOURCE_STATE_COPY_DEST) { + // Validate state transition and insert intermediate state if needed + const bool needsIntermediate = !D3D12StateTransition::isLegalDirectTransition( + D3D12_RESOURCE_STATE_COPY_DEST, postState); + + if (needsIntermediate) { + // Transition to COMMON first + D3D12_RESOURCE_BARRIER toCommon = {}; + toCommon.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toCommon.Transition.pResource = resource_.Get(); + toCommon.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toCommon.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + toCommon.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; + cmdList->ResourceBarrier(1, &toCommon); + + // Then transition to final state + D3D12_RESOURCE_BARRIER toFinal = {}; + toFinal.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toFinal.Transition.pResource = resource_.Get(); + toFinal.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toFinal.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; + toFinal.Transition.StateAfter = postState; + cmdList->ResourceBarrier(1, &toFinal); + } else { + // Direct transition is legal + D3D12_RESOURCE_BARRIER toDefault = {}; + toDefault.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toDefault.Transition.pResource = resource_.Get(); + toDefault.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toDefault.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + toDefault.Transition.StateAfter = postState; + cmdList->ResourceBarrier(1, &toDefault); + } + } + + hr = cmdList->Close(); + if (FAILED(hr)) { + // Return allocator to pool with fence value 0 (immediately available) + device_->returnUploadCommandAllocator(allocator, 0); + return Result(Result::Code::RuntimeError, "Failed to close upload command list"); + } + + ID3D12CommandList* lists[] = {cmdList.Get()}; + queue->ExecuteCommandLists(1, lists); + + // Ensure the allocator is not reused until GPU completes execution + ID3D12Fence* uploadFence = device_->getUploadFence(); + + // Signal must succeed; otherwise fence will never reach uploadFenceValue + hr = queue->Signal(uploadFence, uploadFenceValue); + if (FAILED(hr)) { + // Return allocator immediately (no fence wait needed) + device_->returnUploadCommandAllocator(allocator, 0); + + // Check for device removal to provide richer diagnostics + Result deviceStatus = device_->checkDeviceRemoval(); + if (!deviceStatus.isOk()) { + return deviceStatus; // Device removed - return specific error + } + + return Result(Result::Code::RuntimeError, "Failed to signal upload fence"); + } + + // Return allocator to pool with fence value (will be reused after fence is signaled) + device_->returnUploadCommandAllocator(allocator, uploadFenceValue); + + // Only track temporary upload buffers (ring buffer is persistent) + // Pass uploadFenceValue (already signaled above) to track with correct fence + if (!useRingBuffer && uploadBuffer.Get()) { + device_->trackUploadBuffer(std::move(uploadBuffer), uploadFenceValue); + } + + // Wait for upload fence to signal before returning. + // This ensures the buffer upload completes before the caller uses it. + Result waitResult = device_->waitForUploadFence(uploadFenceValue); + if (!waitResult.isOk()) { + return waitResult; + } + + // Now safe to update resource state; GPU upload has completed + currentState_ = (postState != D3D12_RESOURCE_STATE_COPY_DEST) ? postState : D3D12_RESOURCE_STATE_COPY_DEST; + + return Result(Result::Code::Ok); +} + +void* Buffer::map(const BufferRange& range, Result* IGL_NULLABLE outResult) { + if (resource_.Get() == nullptr) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Buffer resource is null"); + return nullptr; + } + + // Validate range + if (range.offset > desc_.length || range.size > desc_.length || + (range.offset + range.size) > desc_.length) { + Result::setResult(outResult, Result::Code::ArgumentOutOfRange, "Map range is out of bounds"); + return nullptr; + } + + // Handle mapping of DEFAULT heap storage buffers requested as Shared + // This happens when compute shader output buffers need to be read back + const bool isStorageBuffer = (desc_.type & BufferDesc::BufferTypeBits::Storage) != 0; + const bool requestedShared = (desc_.storage == ResourceStorage::Shared || + desc_.storage == ResourceStorage::Managed); + const bool needsReadbackStaging = (storage_ != ResourceStorage::Shared) && + isStorageBuffer && requestedShared; + + if (needsReadbackStaging) { + // Storage buffer in DEFAULT heap but requested as Shared - need staging + if (!device_) { + Result::setResult(outResult, Result::Code::RuntimeError, "Device is null"); + return nullptr; + } + + auto& ctx = device_->getD3D12Context(); + auto* d3dDevice = ctx.getDevice(); + auto* queue = ctx.getCommandQueue(); + + if (!d3dDevice || !queue) { + Result::setResult(outResult, Result::Code::RuntimeError, "D3D12 device or queue is null"); + return nullptr; + } + + // Create READBACK staging buffer if not already created + if (!readbackStagingBuffer_.Get()) { + D3D12_HEAP_PROPERTIES readbackHeap = {}; + readbackHeap.Type = D3D12_HEAP_TYPE_READBACK; + readbackHeap.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + readbackHeap.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + + D3D12_RESOURCE_DESC bufferDesc = {}; + bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + bufferDesc.Alignment = 0; + bufferDesc.Width = desc_.length; + bufferDesc.Height = 1; + bufferDesc.DepthOrArraySize = 1; + bufferDesc.MipLevels = 1; + bufferDesc.Format = DXGI_FORMAT_UNKNOWN; + bufferDesc.SampleDesc.Count = 1; + bufferDesc.SampleDesc.Quality = 0; + bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + bufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + HRESULT hr = d3dDevice->CreateCommittedResource( + &readbackHeap, + D3D12_HEAP_FLAG_NONE, + &bufferDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(readbackStagingBuffer_.GetAddressOf())); + + if (FAILED(hr)) { + Result::setResult(outResult, Result::Code::RuntimeError, + "Failed to create readback staging buffer"); + return nullptr; + } + } + + // ALWAYS copy from DEFAULT buffer to readback staging when mapping + // The DEFAULT buffer content may have changed since the last map() call + // (e.g., via copyTextureToBuffer or compute shader writes) + IGL_D3D12_LOG_VERBOSE("Buffer::map() - Copying from DEFAULT buffer (resource=%p) to readback staging\n", + resource_.Get()); + + // D-001: Use pooled allocator instead of creating transient one + auto allocator = device_->getUploadCommandAllocator(); + if (!allocator.Get()) { + Result::setResult(outResult, Result::Code::RuntimeError, + "Failed to get allocator from pool"); + return nullptr; + } + + igl::d3d12::ComPtr cmdList; + if (FAILED(d3dDevice->CreateCommandList(0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + allocator.Get(), + nullptr, + IID_PPV_ARGS(cmdList.GetAddressOf())))) { + Result::setResult(outResult, Result::Code::RuntimeError, + "Failed to create command list for buffer copy"); + // D-001: Return allocator to pool even on failure + device_->returnUploadCommandAllocator(allocator, 0); + return nullptr; + } + + // Transition source buffer to COPY_SOURCE with validation. + const D3D12_RESOURCE_STATES assumedState = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + const bool needsIntermediate = !D3D12StateTransition::isLegalDirectTransition( + assumedState, D3D12_RESOURCE_STATE_COPY_SOURCE); + + if (needsIntermediate) { + // Transition to COMMON first + D3D12_RESOURCE_BARRIER toCommon = {}; + toCommon.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toCommon.Transition.pResource = resource_.Get(); + toCommon.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toCommon.Transition.StateBefore = assumedState; + toCommon.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; + cmdList->ResourceBarrier(1, &toCommon); + + // Then to COPY_SOURCE + D3D12_RESOURCE_BARRIER toCopySource = {}; + toCopySource.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toCopySource.Transition.pResource = resource_.Get(); + toCopySource.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toCopySource.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; + toCopySource.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + cmdList->ResourceBarrier(1, &toCopySource); + } else { + // Direct transition is legal + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = resource_.Get(); + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barrier.Transition.StateBefore = assumedState; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + cmdList->ResourceBarrier(1, &barrier); + } + + // Copy entire buffer. + cmdList->CopyBufferRegion(readbackStagingBuffer_.Get(), 0, resource_.Get(), 0, desc_.length); + + // Transition back with validation. + const bool needsIntermediateBack = !D3D12StateTransition::isLegalDirectTransition( + D3D12_RESOURCE_STATE_COPY_SOURCE, assumedState); + + if (needsIntermediateBack) { + // Transition to COMMON first + D3D12_RESOURCE_BARRIER toCommon = {}; + toCommon.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toCommon.Transition.pResource = resource_.Get(); + toCommon.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toCommon.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + toCommon.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; + cmdList->ResourceBarrier(1, &toCommon); + + // Then back to original state + D3D12_RESOURCE_BARRIER toOriginal = {}; + toOriginal.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toOriginal.Transition.pResource = resource_.Get(); + toOriginal.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toOriginal.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; + toOriginal.Transition.StateAfter = assumedState; + cmdList->ResourceBarrier(1, &toOriginal); + } else { + // Direct transition is legal + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = resource_.Get(); + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.StateAfter = assumedState; + cmdList->ResourceBarrier(1, &barrier); + } + + cmdList->Close(); + ID3D12CommandList* lists[] = {cmdList.Get()}; + queue->ExecuteCommandLists(1, lists); + + // Wait for copy to complete + ctx.waitForGPU(); + + // D-001: Return allocator to pool after synchronous GPU wait + device_->returnUploadCommandAllocator(allocator, 0); + + // Map the READBACK staging buffer + D3D12_RANGE readRange = {static_cast(range.offset), + static_cast(range.offset + range.size)}; + HRESULT hr = readbackStagingBuffer_->Map(0, &readRange, &mappedPtr_); + + if (FAILED(hr)) { + Result::setResult(outResult, Result::Code::RuntimeError, "Failed to map readback staging buffer"); + return nullptr; + } + + Result::setOk(outResult); + return static_cast(mappedPtr_) + range.offset; + } + + // Standard path for UPLOAD/READBACK heap buffers + if (storage_ != ResourceStorage::Shared) { + Result::setResult(outResult, Result::Code::Unsupported, + "Cannot map GPU-only buffer (use ResourceStorage::Shared)"); + return nullptr; + } + + if (mappedPtr_) { + // Already mapped, return offset pointer + Result::setOk(outResult); + return static_cast(mappedPtr_) + range.offset; + } + + D3D12_RANGE readRange = {0, 0}; // Not reading from GPU + HRESULT hr = resource_->Map(0, &readRange, &mappedPtr_); + + if (FAILED(hr)) { + Result::setResult(outResult, Result::Code::RuntimeError, "Failed to map buffer"); + return nullptr; + } + + Result::setOk(outResult); + return static_cast(mappedPtr_) + range.offset; +} + +void Buffer::unmap() { + if (!mappedPtr_) { + return; + } + + // Unmap the appropriate resource (staging buffer or main buffer) + if (readbackStagingBuffer_.Get()) { + readbackStagingBuffer_->Unmap(0, nullptr); + } else if (resource_.Get()) { + resource_->Unmap(0, nullptr); + } + + mappedPtr_ = nullptr; +} + +BufferDesc::BufferAPIHint Buffer::requestedApiHints() const noexcept { + return desc_.hint; +} + +BufferDesc::BufferAPIHint Buffer::acceptedApiHints() const noexcept { + return desc_.hint; +} + +ResourceStorage Buffer::storage() const noexcept { + return storage_; +} + +size_t Buffer::getSizeInBytes() const { + return desc_.length; +} + +uint64_t Buffer::gpuAddress(size_t offset) const { + if (resource_.Get() == nullptr) { + return 0; + } + + return resource_->GetGPUVirtualAddress() + offset; +} + +BufferDesc::BufferType Buffer::getBufferType() const { + return desc_.type; +} + +D3D12_RESOURCE_STATES Buffer::computeDefaultState(const BufferDesc& desc) const { + D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COMMON; + + if ((desc.type & BufferDesc::BufferTypeBits::Storage) != 0) { + state |= D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + } + if ((desc.type & BufferDesc::BufferTypeBits::Vertex) != 0 || + (desc.type & BufferDesc::BufferTypeBits::Uniform) != 0) { + state |= D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } + if ((desc.type & BufferDesc::BufferTypeBits::Index) != 0) { + state |= D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + if ((desc.type & BufferDesc::BufferTypeBits::Indirect) != 0) { + state |= D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT; + } + + if (state == D3D12_RESOURCE_STATE_COMMON) { + return D3D12_RESOURCE_STATE_GENERIC_READ; + } + + // Remove COMMON bit if other bits are set. + state &= ~D3D12_RESOURCE_STATE_COMMON; + return state; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/Buffer.h b/src/igl/d3d12/Buffer.h new file mode 100644 index 0000000000..1560fee8c2 --- /dev/null +++ b/src/igl/d3d12/Buffer.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace igl::d3d12 { + +class Device; + +class Buffer final : public IBuffer, public std::enable_shared_from_this { + public: + Buffer(Device& device, + igl::d3d12::ComPtr resource, + const BufferDesc& desc, + D3D12_RESOURCE_STATES initialState); + ~Buffer() override; + + Result upload(const void* data, const BufferRange& range) override; + void* map(const BufferRange& range, Result* IGL_NULLABLE outResult) override; + void unmap() override; + + BufferDesc::BufferAPIHint requestedApiHints() const noexcept override; + BufferDesc::BufferAPIHint acceptedApiHints() const noexcept override; + ResourceStorage storage() const noexcept override; + + size_t getSizeInBytes() const override; + uint64_t gpuAddress(size_t offset = 0) const override; + + BufferDesc::BufferType getBufferType() const override; + + // D3D12-specific accessors + ID3D12Resource* getResource() const { return resource_.Get(); } + // Returns the element stride in bytes for storage buffers, as provided in BufferDesc. + // A value of 0 means "unspecified"; callers should fall back to a reasonable default + // (e.g. 4 bytes) when this occurs. + size_t getStorageElementStride() const noexcept { return desc_.storageStride; } + + private: + [[nodiscard]] D3D12_RESOURCE_STATES computeDefaultState(const BufferDesc& desc) const; + + Device* device_ = nullptr; + igl::d3d12::ComPtr resource_; + BufferDesc desc_; + void* mappedPtr_ = nullptr; + ResourceStorage storage_ = ResourceStorage::Private; + D3D12_RESOURCE_STATES defaultState_ = D3D12_RESOURCE_STATE_GENERIC_READ; + // State tracking: single non-mutable field, updated only via non-const methods + D3D12_RESOURCE_STATES currentState_ = D3D12_RESOURCE_STATE_COMMON; + + // Staging buffer for mapping DEFAULT heap storage buffers requested as Shared + igl::d3d12::ComPtr readbackStagingBuffer_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/CMakeLists.txt b/src/igl/d3d12/CMakeLists.txt new file mode 100644 index 0000000000..9d1e8bf8c8 --- /dev/null +++ b/src/igl/d3d12/CMakeLists.txt @@ -0,0 +1,55 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +cmake_minimum_required(VERSION 3.19) + +project(IGLD3D12 CXX C) + +file(GLOB SRC_FILES LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp) +file(GLOB HEADER_FILES LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.h) + +add_library(IGLD3D12 ${SRC_FILES} ${HEADER_FILES}) + +target_link_libraries(IGLD3D12 PRIVATE IGLLibrary IGLGlslang) + +igl_set_cxxstd(IGLD3D12 20) +igl_set_folder(IGLD3D12 "IGL") + +# Link DirectX 12 system libraries +target_link_libraries(IGLD3D12 PUBLIC + d3d12.lib + dxgi.lib + dxguid.lib + dxcompiler.lib + d3dcompiler.lib +) + +# DirectX headers (fetched via bootstrap-deps) +if(NOT DIRECTX_HEADERS_INCLUDE_DIR) + set(DIRECTX_HEADERS_INCLUDE_DIR "${IGL_ROOT_DIR}/third-party/deps/src/DirectX-Headers/include/directx") +endif() +target_include_directories(IGLD3D12 PUBLIC "${DIRECTX_HEADERS_INCLUDE_DIR}") +if(TARGET Microsoft::DirectX-Headers) + target_link_libraries(IGLD3D12 PUBLIC Microsoft::DirectX-Headers) +endif() + +# Include SPIRV-Cross for potential SPIR-V to HLSL conversion +target_include_directories(IGLD3D12 PUBLIC "${IGL_ROOT_DIR}/third-party/deps/src/SPIRV-Cross") +target_include_directories(IGLD3D12 PUBLIC "${IGL_ROOT_DIR}/third-party/deps/src/glslang") + +# Windows-specific definitions (removed - defined in D3D12Headers.h to avoid conflicts) + +if(WIN32 AND MSVC) + # Enable multithreaded compilation + target_compile_options(IGLD3D12 PRIVATE "/MP") + # Disable NOMINMAX warning (already defined in D3D12Headers.h) + target_compile_options(IGLD3D12 PRIVATE "/wd4005") + # Note: /Zc:preprocessor (conformant preprocessor) causes issues with + # some d3dx12 headers and WRL, so we're using the traditional preprocessor + + # Note: dxil.dll deployment is now handled by individual executable targets + # (shell sessions and tests) to ensure it's copied to the correct directories. + # See shell/windows/CMakeLists.txt and src/igl/tests/CMakeLists.txt +endif() diff --git a/src/igl/d3d12/CommandBuffer.cpp b/src/igl/d3d12/CommandBuffer.cpp new file mode 100644 index 0000000000..a42afcac9f --- /dev/null +++ b/src/igl/d3d12/CommandBuffer.cpp @@ -0,0 +1,760 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +CommandBuffer::CommandBuffer(Device& device, const CommandBufferDesc& desc) + : ICommandBuffer(desc), device_(device) { + auto* d3dDevice = device_.getD3D12Context().getDevice(); + + if (!d3dDevice) { + IGL_DEBUG_ASSERT(false, "D3D12 device is null - context not initialized"); + IGL_LOG_ERROR("D3D12 device is null - context not initialized"); + return; // Leave commandList_ null to indicate failure + } + + // Check if device is in good state + HRESULT deviceRemovedReason = d3dDevice->GetDeviceRemovedReason(); + if (FAILED(deviceRemovedReason)) { + char errorMsg[512]; + snprintf(errorMsg, sizeof(errorMsg), + "D3D12 device was removed before creating command buffer. Reason: 0x%08X\n" + " 0x887A0005 = DXGI_ERROR_DEVICE_REMOVED\n" + " 0x887A0006 = DXGI_ERROR_DEVICE_HUNG\n" + " 0x887A0007 = DXGI_ERROR_DEVICE_RESET\n" + " 0x887A0020 = DXGI_ERROR_DRIVER_INTERNAL_ERROR", + static_cast(deviceRemovedReason)); + IGL_LOG_ERROR(errorMsg); + IGL_DEBUG_ASSERT(false, "Device removed - see error above"); + return; // Leave commandList_ null to indicate failure + } + + // Use the current frame's command allocator - allocators are created ready-to-use + // Following Microsoft's D3D12HelloFrameBuffering: each frame has its own allocator + auto& ctx = device_.getD3D12Context(); + const uint32_t frameIdx = ctx.getCurrentFrameIndex(); + auto* frameAllocator = ctx.getFrameContexts()[frameIdx].allocator.Get(); + + HRESULT hr = d3dDevice->CreateCommandList( + 0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + frameAllocator, // Use frame allocator directly - it's in ready-to-use state after creation + nullptr, + IID_PPV_ARGS(commandList_.GetAddressOf())); + + if (FAILED(hr)) { + char errorMsg[256]; + snprintf(errorMsg, sizeof(errorMsg), "Failed to create command list: HRESULT = 0x%08X", static_cast(hr)); + IGL_DEBUG_ASSERT(false, "%s", errorMsg); + IGL_LOG_ERROR(errorMsg); + return; // Leave commandList_ null to indicate failure + } + + // Command lists are created in recording state, close it for now + commandList_->Close(); + + // Create scheduling fence for waitUntilScheduled() support + // D-003: Fence event is now created per-wait in waitUntilScheduled() to eliminate TOCTOU race + hr = d3dDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(scheduleFence_.GetAddressOf())); + if (FAILED(hr)) { + char errorMsg[256]; + snprintf(errorMsg, sizeof(errorMsg), "Failed to create scheduling fence: HRESULT = 0x%08X", static_cast(hr)); + IGL_DEBUG_ASSERT(false, "%s", errorMsg); + IGL_LOG_ERROR(errorMsg); + return; // Leave fence null to indicate failure + } +} + +CommandBuffer::~CommandBuffer() { + // D-003: No need to clean up scheduleFenceEvent_ - now using dedicated events per wait + // scheduleFence_ is a ComPtr and will be automatically released +} + +// Pre-allocated descriptor heap with fail-fast on exhaustion. +// Allocates from pre-allocated pages, switching pages as needed. +// Fails immediately if all pages are exhausted (Vulkan fail-fast pattern). +Result CommandBuffer::getNextCbvSrvUavDescriptor(uint32_t* outDescriptorIndex) { + auto& ctx = device_.getD3D12Context(); + const uint32_t frameIdx = ctx.getCurrentFrameIndex(); + auto& frameCtx = ctx.getFrameContexts()[frameIdx]; + auto& pages = frameCtx.cbvSrvUavHeapPages; + uint32_t currentPageIdx = frameCtx.currentCbvSrvUavPageIndex; + + // Validate we have at least one page + if (pages.empty()) { + return Result{Result::Code::RuntimeError, "No CBV/SRV/UAV descriptor heap pages available"}; + } + + // Get current page index validation + if (currentPageIdx >= pages.size()) { + return Result{Result::Code::RuntimeError, "Invalid descriptor heap page index"}; + } + + // Check current offset before acquiring reference (avoid use-after-reallocation) + const uint32_t currentOffset = frameCtx.nextCbvSrvUavDescriptor; + + // Check if current page has space; fail fast if pre-allocation is enabled. + if (currentOffset >= pages[currentPageIdx].capacity) { + // Current page is full - check if we can move to next page + const uint32_t nextPageIdx = currentPageIdx + 1; + + // Fail-fast if pre-allocated pages are exhausted (Vulkan pattern). + if (nextPageIdx >= pages.size()) { + char errorMsg[512]; + // All pages exhausted - fail immediately (no mid-frame allocation). + // Calculate actual descriptor capacity from allocated pages. + uint32_t totalCapacity = 0; + for (const auto& page : pages) { + totalCapacity += page.capacity; + } + snprintf(errorMsg, sizeof(errorMsg), + "CBV/SRV/UAV descriptor heap exhausted! Frame %u used all %zu pre-allocated pages (%u descriptors total). " + "This frame requires more descriptors than available. " + "Increase D3D12ContextConfig::maxHeapPages or enable preAllocateDescriptorPages=true, or optimize descriptor usage.", + frameIdx, pages.size(), totalCapacity); + return Result{Result::Code::RuntimeError, errorMsg}; + } + + // Move to next pre-allocated page. + currentPageIdx = nextPageIdx; + frameCtx.currentCbvSrvUavPageIndex = currentPageIdx; + frameCtx.nextCbvSrvUavDescriptor = 0; // Reset offset for new page + + IGL_D3D12_LOG_VERBOSE("D3D12: Switching to pre-allocated CBV/SRV/UAV page %u for frame %u\n", + currentPageIdx, frameIdx); + + // Update active heap when switching pages. + frameCtx.activeCbvSrvUavHeap = pages[currentPageIdx].heap; + + // Rebind heap on the command list when switching pages. + if (commandList_.Get()) { + ID3D12DescriptorHeap* heaps[] = { + frameCtx.activeCbvSrvUavHeap.Get(), + frameCtx.samplerHeap.Get() + }; + commandList_->SetDescriptorHeaps(2, heaps); + IGL_D3D12_LOG_VERBOSE("D3D12: Rebound descriptor heaps after switching to page %u\n", currentPageIdx); + } + } + + // SAFE: Acquire reference AFTER any potential reallocation from emplace_back + auto& currentPage = pages[currentPageIdx]; + + // Allocate from current page + const uint32_t descriptorIndex = frameCtx.nextCbvSrvUavDescriptor++; + currentPage.used = frameCtx.nextCbvSrvUavDescriptor; + + // Track peak usage for telemetry + const uint32_t totalUsed = static_cast(currentPageIdx * kDescriptorsPerPage + descriptorIndex); + if (totalUsed > frameCtx.peakCbvSrvUavUsage) { + frameCtx.peakCbvSrvUavUsage = totalUsed; + } + + // Return the descriptor index within the current page + *outDescriptorIndex = descriptorIndex; + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::getNextCbvSrvUavDescriptor() - frame %u, page %u, descriptor %u (total allocated: %u)\n", + frameIdx, currentPageIdx, descriptorIndex, totalUsed); +#endif + + return Result{}; +} + +// Allocate a contiguous range of CBV/SRV/UAV descriptors from pre-allocated pages. +// Ensures the range can be bound as a single descriptor table. +// Fails immediately if all pages are exhausted (Vulkan fail-fast pattern). +Result CommandBuffer::allocateCbvSrvUavRange(uint32_t count, uint32_t* outBaseDescriptorIndex) { + if (count == 0) { + return Result{Result::Code::ArgumentInvalid, "Cannot allocate zero descriptors"}; + } + + auto& ctx = device_.getD3D12Context(); + const uint32_t frameIdx = ctx.getCurrentFrameIndex(); + auto& frameCtx = ctx.getFrameContexts()[frameIdx]; + auto& pages = frameCtx.cbvSrvUavHeapPages; + uint32_t currentPageIdx = frameCtx.currentCbvSrvUavPageIndex; + + if (pages.empty()) { + return Result{Result::Code::RuntimeError, "No CBV/SRV/UAV descriptor heap pages available"}; + } + + if (currentPageIdx >= pages.size()) { + return Result{Result::Code::RuntimeError, "Invalid descriptor heap page index"}; + } + + // Check space before acquiring reference (avoid use-after-reallocation) + const uint32_t currentOffset = frameCtx.nextCbvSrvUavDescriptor; + const uint32_t spaceRemaining = pages[currentPageIdx].capacity - currentOffset; + + // Check if the requested range fits in the current page; fail fast on exhaustion. + if (count > spaceRemaining) { + // Not enough space in current page - validate range and check for next page + if (count > kDescriptorsPerPage) { + char errorMsg[256]; + snprintf(errorMsg, sizeof(errorMsg), + "Requested descriptor range (%u) exceeds page capacity (%u)", + count, kDescriptorsPerPage); + return Result{Result::Code::ArgumentOutOfRange, errorMsg}; + } + + // Move to next pre-allocated page (fail-fast if exhausted). + const uint32_t nextPageIdx = currentPageIdx + 1; + if (nextPageIdx >= pages.size()) { + char errorMsg[512]; + snprintf(errorMsg, sizeof(errorMsg), + "CBV/SRV/UAV descriptor heap exhausted! Frame %u needs page %u for contiguous range of %u descriptors, " + "but only %zu pages are pre-allocated. " + "Increase D3D12ContextConfig::maxHeapPages or optimize descriptor usage.", + frameIdx, nextPageIdx, count, pages.size()); + return Result{Result::Code::RuntimeError, errorMsg}; + } + + // Move to next pre-allocated page + currentPageIdx = nextPageIdx; + frameCtx.currentCbvSrvUavPageIndex = currentPageIdx; + frameCtx.nextCbvSrvUavDescriptor = 0; + + IGL_D3D12_LOG_VERBOSE("D3D12: Switching to pre-allocated CBV/SRV/UAV page %u for contiguous range of %u descriptors\n", + currentPageIdx, count); + + // Rebind heap on command list when switching pages + frameCtx.activeCbvSrvUavHeap = pages[currentPageIdx].heap; + if (commandList_.Get()) { + ID3D12DescriptorHeap* heaps[] = { + frameCtx.activeCbvSrvUavHeap.Get(), + frameCtx.samplerHeap.Get() + }; + commandList_->SetDescriptorHeaps(2, heaps); + } + } + + // SAFE: Acquire reference AFTER any potential reallocation from emplace_back + auto& currentPage = pages[currentPageIdx]; + + // Allocate the range from current page + const uint32_t baseIndex = frameCtx.nextCbvSrvUavDescriptor; + frameCtx.nextCbvSrvUavDescriptor += count; + currentPage.used = frameCtx.nextCbvSrvUavDescriptor; + + // Track peak usage + const uint32_t totalUsed = static_cast(currentPageIdx * kDescriptorsPerPage + baseIndex + count); + if (totalUsed > frameCtx.peakCbvSrvUavUsage) { + frameCtx.peakCbvSrvUavUsage = totalUsed; + } + + *outBaseDescriptorIndex = baseIndex; + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::allocateCbvSrvUavRange() - frame %u, page %u, base %u, count %u\n", + frameIdx, currentPageIdx, baseIndex, count); +#endif + + return Result{}; +} + +uint32_t& CommandBuffer::getNextSamplerDescriptor() { + auto& ctx = device_.getD3D12Context(); + const uint32_t frameIdx = ctx.getCurrentFrameIndex(); + auto& frameCtx = ctx.getFrameContexts()[frameIdx]; + + // Add bounds checking to prevent sampler descriptor heap overflow. + // The sampler heap is allocated with kSamplerHeapSize descriptors. + const uint32_t currentValue = frameCtx.nextSamplerDescriptor; + + // Track peak usage for telemetry (before incrementing) + if (currentValue > frameCtx.peakSamplerUsage) { + frameCtx.peakSamplerUsage = currentValue; + + // Warn if approaching capacity (>80%) + const float usage = static_cast(currentValue) / static_cast(kSamplerHeapSize); + if (usage > 0.8f) { + IGL_LOG_ERROR("D3D12: Sampler descriptor usage at %.1f%% capacity (%u/%u) for frame %u\n", + usage * 100.0f, currentValue, kSamplerHeapSize, frameIdx); + } + } + + // CRITICAL: Assert on overflow in debug builds + IGL_DEBUG_ASSERT(currentValue < kSamplerHeapSize, + "D3D12: Sampler descriptor heap overflow! Allocated: %u, Capacity: %u (frame %u). " + "This will cause memory corruption and device removal. Increase heap size or optimize descriptor usage.", + currentValue, kSamplerHeapSize, frameIdx); + + // Graceful degradation in release builds: clamp to last valid descriptor + if (currentValue >= kSamplerHeapSize) { + IGL_LOG_ERROR("D3D12: Sampler descriptor heap overflow! Allocated: %u, Capacity: %u (frame %u)\n" + "Clamping to last valid descriptor. Rendering artifacts expected.\n", + currentValue, kSamplerHeapSize, frameIdx); + // Return reference to a clamped value to prevent further damage + // This will cause rendering artifacts but prevent crashes + static uint32_t clampedValue = kSamplerHeapSize - 1; + return clampedValue; + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::getNextSamplerDescriptor() - frame %u, current value=%u\n", + frameIdx, currentValue); +#endif + return frameCtx.nextSamplerDescriptor; +} + +void CommandBuffer::trackTransientBuffer(std::shared_ptr buffer) { + // Add to the CURRENT frame's transient buffer list + // These will be kept alive until the frame completes GPU execution + auto& ctx = device_.getD3D12Context(); + const uint32_t frameIdx = ctx.getCurrentFrameIndex(); + auto& frameCtx = ctx.getFrameContexts()[frameIdx]; + + frameCtx.transientBuffers.push_back(std::move(buffer)); + + // Track high-water mark for telemetry. + const size_t currentCount = frameCtx.transientBuffers.size(); + if (currentCount > frameCtx.transientBuffersHighWater) { + frameCtx.transientBuffersHighWater = currentCount; + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::trackTransientBuffer() - Added buffer to frame %u (total=%zu, high-water=%zu)\n", + frameIdx, currentCount, frameCtx.transientBuffersHighWater); +#endif +} + +void CommandBuffer::trackTransientResource(ID3D12Resource* resource) { + if (!resource) { + return; + } + auto& ctx = device_.getD3D12Context(); + const uint32_t frameIdx = ctx.getCurrentFrameIndex(); + auto& frameCtx = ctx.getFrameContexts()[frameIdx]; + + igl::d3d12::ComPtr keepAlive; + resource->AddRef(); + keepAlive.Attach(resource); + frameCtx.transientResources.push_back(std::move(keepAlive)); + + // Track high-water mark for telemetry. + const size_t currentCount = frameCtx.transientResources.size(); + if (currentCount > frameCtx.transientResourcesHighWater) { + frameCtx.transientResourcesHighWater = currentCount; + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::trackTransientResource() - Added resource to frame %u (total=%zu, high-water=%zu)\n", + frameIdx, currentCount, frameCtx.transientResourcesHighWater); +#endif +} + +Result CommandBuffer::begin() { + if (recording_) { + return Result(); + } + + // NOTE: Transient buffers are now stored in FrameContext and cleared when advancing frames + // NOTE: Descriptor counters are now stored in FrameContext and shared across all CommandBuffers + // They are reset at the start of each frame in CommandQueue::submit(), not here + + // Reset per-command-buffer draw count for this recording + currentDrawCount_ = 0; + + // CRITICAL: Set the per-frame descriptor heaps before recording commands. + // Each frame has its own isolated heaps to prevent descriptor conflicts. + // Uses the current page's heap (will be updated if we grow to new pages). + auto& ctx = device_.getD3D12Context(); + const uint32_t frameIdx = ctx.getCurrentFrameIndex(); + auto& frameCtx = ctx.getFrameContexts()[frameIdx]; + + // Initialize active heap to current page at frame start. + if (frameCtx.cbvSrvUavHeapPages.empty()) { + const char* msg = "No CBV/SRV/UAV heap pages available"; + IGL_LOG_ERROR("CommandBuffer::begin() - %s for frame %u\n", msg, frameIdx); + return Result(Result::Code::RuntimeError, msg); + } + + frameCtx.activeCbvSrvUavHeap = frameCtx.cbvSrvUavHeapPages[frameCtx.currentCbvSrvUavPageIndex].heap; + + if (!frameCtx.activeCbvSrvUavHeap.Get()) { + const char* msg = "No CBV/SRV/UAV heap available"; + IGL_LOG_ERROR("CommandBuffer::begin() - %s for frame %u\n", msg, frameIdx); + return Result(Result::Code::RuntimeError, msg); + } + + // Use the CURRENT FRAME's command allocator from FrameContext + // Following Microsoft's D3D12HelloFrameBuffering pattern + auto* frameAllocator = ctx.getFrameContexts()[frameIdx].allocator.Get(); + + // Microsoft pattern: Reset allocator THEN reset command list + // Allocator was reset in CommandQueue::submit() after fence wait, OR is in initial ready state +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::begin() - Frame %u: Resetting command list with allocator...\n", frameIdx); +#endif + HRESULT hr = commandList_->Reset(frameAllocator, nullptr); + if (FAILED(hr)) { + IGL_LOG_ERROR("CommandBuffer::begin() - Reset command list FAILED: 0x%08X\n", static_cast(hr)); + return getResultFromHRESULT(hr); + } +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::begin() - Command list reset OK\n"); +#endif + recording_ = true; + + // Bind heaps using active heap, not legacy accessor, now that the command list + // has been reset and is in the recording state. + ID3D12DescriptorHeap* heaps[] = { + frameCtx.activeCbvSrvUavHeap.Get(), + frameCtx.samplerHeap.Get() + }; + commandList_->SetDescriptorHeaps(2, heaps); + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::begin() - Set per-frame descriptor heaps for frame %u\n", frameIdx); +#endif + + // Record timer start timestamp after reset and before any GPU work is recorded. + // This ensures the timer measures the actual command buffer workload. + if (desc.timer) { + auto* timer = static_cast(desc.timer.get()); + timer->begin(commandList_.Get()); + } + + return Result(); +} + +void CommandBuffer::end() { + if (!recording_) { + return; + } + + // No timer recording here; timer->begin() was called in begin(), + // and timer->end() will be called in CommandQueue::submit() before close. + + // Close the command list - all recording is complete + commandList_->Close(); + recording_ = false; +} + +D3D12Context& CommandBuffer::getContext() { + return device_.getD3D12Context(); +} + +const D3D12Context& CommandBuffer::getContext() const { + return device_.getD3D12Context(); +} + +// Device draw count is incremented by CommandQueue::submit() using this buffer's count + +std::unique_ptr CommandBuffer::createRenderCommandEncoder( + const RenderPassDesc& renderPass, + const std::shared_ptr& framebuffer, + const Dependencies& /*dependencies*/, + Result* IGL_NULLABLE outResult) { + // Begin command buffer if not already begun + Result beginResult = begin(); + if (!beginResult.isOk()) { + Result::setResult(outResult, std::move(beginResult)); + return nullptr; + } + + // Create encoder with lightweight constructor, then initialize with render pass + auto encoder = std::make_unique(*this, framebuffer); + encoder->begin(renderPass); + Result::setOk(outResult); + return encoder; +} + +std::unique_ptr CommandBuffer::createComputeCommandEncoder() { + // Begin command buffer if not already begun + Result beginResult = begin(); + if (!beginResult.isOk()) { + IGL_LOG_ERROR("CommandBuffer::createComputeCommandEncoder() - begin() failed: %s\n", + beginResult.message.c_str()); + return nullptr; + } + + return std::make_unique(*this); +} + +void CommandBuffer::present(const std::shared_ptr& /*surface*/) const { + // Note: Actual present happens in CommandQueue::submit(). This call serves + // as a marker indicating that this command buffer should trigger a swapchain + // Present when submitted. + willPresent_ = true; +} + +void CommandBuffer::waitUntilScheduled() { + // If scheduleValue_ is 0, the command buffer hasn't been submitted yet + if (scheduleValue_ == 0) { +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::waitUntilScheduled() - Not yet submitted, returning immediately\n"); +#endif + return; + } + + // Check if the scheduling fence has already been signaled + if (!scheduleFence_.Get()) { + IGL_LOG_ERROR("CommandBuffer::waitUntilScheduled() - Scheduling fence is null\n"); + return; + } + + const UINT64 completedValue = scheduleFence_->GetCompletedValue(); + if (completedValue >= scheduleValue_) { +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::waitUntilScheduled() - Already scheduled (completed=%llu, target=%llu)\n", + completedValue, scheduleValue_); +#endif + return; + } + + // Wait for the scheduling fence to be signaled +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::waitUntilScheduled() - Waiting for scheduling (completed=%llu, target=%llu)\n", + completedValue, scheduleValue_); +#endif + + // Use FenceWaiter RAII wrapper for proper fence waiting with TOCTOU protection + FenceWaiter waiter(scheduleFence_.Get(), scheduleValue_); + Result waitResult = waiter.wait(); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("CommandBuffer::waitUntilScheduled() - Fence wait failed: %s\n", + waitResult.message.c_str()); + return; + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::waitUntilScheduled() - Scheduling complete (fence now=%llu)\n", + scheduleFence_->GetCompletedValue()); +#endif +} + +void CommandBuffer::waitUntilCompleted() { + // Wait for all submitted GPU work to complete + // The CommandQueue tracks frame completion via fences, so we need to wait for the current frame + auto& ctx = getContext(); + auto* queue = ctx.getCommandQueue(); + if (!queue) { + return; + } + + // Signal a fence and wait for it + // This ensures all previously submitted command lists have completed on the GPU + igl::d3d12::ComPtr fence; + auto* device = ctx.getDevice(); + if (!device || FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf())))) { + return; + } + + queue->Signal(fence.Get(), 1); + + FenceWaiter waiter(fence.Get(), 1); + Result waitResult = waiter.wait(); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("CommandBuffer::waitUntilCompleted() - Fence wait failed: %s\n", + waitResult.message.c_str()); + return; + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandBuffer::waitUntilCompleted() - GPU work completed\n"); +#endif +} + +void CommandBuffer::pushDebugGroupLabel(const char* label, + const igl::Color& /*color*/) const { + // Only emit GPU debug markers while the command list is in recording state. + if (!recording_ || !commandList_.Get() || !label) { + return; + } + + const size_t len = strlen(label); + std::wstring wlabel(len, L' '); + std::mbstowcs(&wlabel[0], label, len); + commandList_->BeginEvent( + 0, wlabel.c_str(), static_cast((wlabel.length() + 1) * sizeof(wchar_t))); +} + +void CommandBuffer::popDebugGroupLabel() const { + // Only pop GPU debug markers while the command list is in recording state. + if (!recording_ || !commandList_.Get()) { + return; + } + + commandList_->EndEvent(); +} + +void CommandBuffer::copyBuffer(IBuffer& source, + IBuffer& destination, + uint64_t sourceOffset, + uint64_t destinationOffset, + uint64_t size) { + auto* src = static_cast(&source); + auto* dst = static_cast(&destination); + ID3D12Resource* srcRes = src->getResource(); + ID3D12Resource* dstRes = dst->getResource(); + if (!srcRes || !dstRes || size == 0) { + return; + } + + // Use a transient copy with appropriate heap handling + auto& ctx = getContext(); + ID3D12Device* device = ctx.getDevice(); + ID3D12CommandQueue* queue = ctx.getCommandQueue(); + if (!device || !queue) { + return; + } + + auto doCopyOnList = [&](ID3D12GraphicsCommandList* list, + ID3D12Resource* dstResLocal, + uint64_t dstOffsetLocal) { + D3D12_RESOURCE_BARRIER barriers[2] = {}; + barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barriers[0].Transition.pResource = srcRes; + barriers[0].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ; + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barriers[1].Transition.pResource = dstResLocal; + barriers[1].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ; + barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + list->ResourceBarrier(2, barriers); + + list->CopyBufferRegion(dstResLocal, dstOffsetLocal, srcRes, sourceOffset, size); + + barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; + barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ; + list->ResourceBarrier(2, barriers); + }; + + if (dst->storage() == ResourceStorage::Shared) { + // GPU cannot write into UPLOAD heap; use a READBACK staging buffer and then memcpy into UPLOAD + D3D12_HEAP_PROPERTIES readbackHeap{}; + readbackHeap.Type = D3D12_HEAP_TYPE_READBACK; + D3D12_RESOURCE_DESC desc{}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Width = size + destinationOffset; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + igl::d3d12::ComPtr readback; + HRESULT hr = device->CreateCommittedResource(&readbackHeap, + D3D12_HEAP_FLAG_NONE, + &desc, + D3D12_RESOURCE_STATE_COPY_DEST, + nullptr, + IID_PPV_ARGS(readback.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("copyBuffer: Failed to create READBACK buffer, hr=0x%08X\n", static_cast(hr)); + return; + } + + igl::d3d12::ComPtr allocator; + igl::d3d12::ComPtr list; + if (FAILED(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(allocator.GetAddressOf()))) || + FAILED(device->CreateCommandList(0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + allocator.Get(), + nullptr, + IID_PPV_ARGS(list.GetAddressOf())))) { + IGL_LOG_ERROR("copyBuffer: Failed to create transient command list\n"); + return; + } + + // Transition source to COPY_SOURCE + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = srcRes; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + list->ResourceBarrier(1, &barrier); + + // Copy from source to readback (readback is already in COPY_DEST state) + list->CopyBufferRegion(readback.Get(), destinationOffset, srcRes, sourceOffset, size); + + // Transition source back + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON; + list->ResourceBarrier(1, &barrier); + list->Close(); + ID3D12CommandList* lists[] = {list.Get()}; + queue->ExecuteCommandLists(1, lists); + ctx.waitForGPU(); + + // Map readback and copy into the UPLOAD buffer + void* rbPtr = nullptr; + D3D12_RANGE readRange{static_cast(destinationOffset), static_cast(destinationOffset + size)}; + if (SUCCEEDED(readback->Map(0, &readRange, &rbPtr)) && rbPtr) { + // Map destination upload buffer + Result r1; + void* dstPtr = dst->map(BufferRange(size, destinationOffset), &r1); + if (dstPtr && r1.isOk()) { + std::memcpy(dstPtr, static_cast(rbPtr) + destinationOffset, size); + dst->unmap(); + } + readback->Unmap(0, nullptr); + } + return; + } + + // Default path: copy using a transient command list to DEFAULT/COMMON destinations + igl::d3d12::ComPtr allocator; + igl::d3d12::ComPtr list; + if (FAILED(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(allocator.GetAddressOf()))) || + FAILED(device->CreateCommandList(0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + allocator.Get(), + nullptr, + IID_PPV_ARGS(list.GetAddressOf())))) { + return; + } + + doCopyOnList(list.Get(), dstRes, destinationOffset); + list->Close(); + ID3D12CommandList* lists2[] = {list.Get()}; + queue->ExecuteCommandLists(1, lists2); + ctx.waitForGPU(); +} + +// Public API: Record texture-to-buffer copy for deferred execution +void CommandBuffer::copyTextureToBuffer(ITexture& source, + IBuffer& destination, + uint64_t destinationOffset, + uint32_t mipLevel, + uint32_t layer) { + // Like Vulkan, defer the copy operation until command buffer submission + // D3D12 requires this to execute AFTER render commands complete, not during recording + // (Unlike Vulkan which can record into the command buffer, D3D12 has closed command list and padding constraints) + + IGL_D3D12_LOG_VERBOSE("copyTextureToBuffer: Recording deferred copy operation (will execute in CommandQueue::submit)\n"); + + deferredTextureCopies_.push_back({ + &source, + &destination, + destinationOffset, + mipLevel, + layer + }); +} + +} // namespace igl::d3d12 + diff --git a/src/igl/d3d12/CommandBuffer.h b/src/igl/d3d12/CommandBuffer.h new file mode 100644 index 0000000000..18dc1d0249 --- /dev/null +++ b/src/igl/d3d12/CommandBuffer.h @@ -0,0 +1,146 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace igl::d3d12 { + +class Device; + +class CommandBuffer final : public ICommandBuffer { + public: + CommandBuffer(Device& device, const CommandBufferDesc& desc); + ~CommandBuffer() override; + + std::unique_ptr createRenderCommandEncoder( + const RenderPassDesc& renderPass, + const std::shared_ptr& framebuffer, + const Dependencies& dependencies, + Result* IGL_NULLABLE outResult) override; + + std::unique_ptr createComputeCommandEncoder() override; + + void present(const std::shared_ptr& surface) const override; + + void waitUntilScheduled() override; + void waitUntilCompleted() override; + + void pushDebugGroupLabel(const char* label, const igl::Color& color) const override; + void popDebugGroupLabel() const override; + + void copyBuffer(IBuffer& source, + IBuffer& destination, + uint64_t sourceOffset, + uint64_t destinationOffset, + uint64_t size) override; + void copyTextureToBuffer(ITexture& source, + IBuffer& destination, + uint64_t destinationOffset, + uint32_t mipLevel, + uint32_t layer) override; + + Result begin(); + void end(); + bool isRecording() const { return recording_; } + + ID3D12GraphicsCommandList* getCommandList() const { return commandList_.Get(); } + D3D12Context& getContext(); + const D3D12Context& getContext() const; + Device& getDevice() { return device_; } + + size_t getCurrentDrawCount() const { return currentDrawCount_; } + void incrementDrawCount(size_t count = 1) { currentDrawCount_ += count; } + + // Track transient resources (e.g., push constants buffers) that need to be kept alive + // until this FRAME completes GPU execution (not just until this command buffer is destroyed) + void trackTransientBuffer(std::shared_ptr buffer); + void trackTransientResource(ID3D12Resource* resource); + + // ============================================================================ + // INTERNAL API: Descriptor Allocation (Transient Descriptor Allocator) + // ============================================================================ + // + // These methods are implementation details of the per-frame descriptor heap + // management system (Strategy 1 in D3D12ResourcesBinder.h). + // + // WARNING: Do NOT call these methods directly. Use D3D12ResourcesBinder instead. + // + // These methods delegate to D3D12Context::FrameContext to share descriptor heaps + // across all command buffers in the current frame, ensuring efficient utilization + // and automatic cleanup at frame boundaries. + // + // Access: public for friend class D3D12ResourcesBinder, conceptually private. + // Returns Result to allow error handling on heap overflow. + // ============================================================================ + + /** + * @brief Allocate a single CBV/SRV/UAV descriptor from per-frame heap + * @internal This is an implementation detail - use D3D12ResourcesBinder instead + */ + Result getNextCbvSrvUavDescriptor(uint32_t* outDescriptorIndex); + + /** + * @brief Allocate a contiguous range of CBV/SRV/UAV descriptors on a single page + * @internal This is an implementation detail - use D3D12ResourcesBinder instead + * + * This ensures the range can be bound as a single descriptor table. + * Returns the base descriptor index; descriptors are [baseIndex, baseIndex+count) + */ + Result allocateCbvSrvUavRange(uint32_t count, uint32_t* outBaseDescriptorIndex); + + /** + * @brief Get reference to next sampler descriptor index (for increment) + * @internal This is an implementation detail - use D3D12ResourcesBinder instead + */ + uint32_t& getNextSamplerDescriptor(); + + // Deferred texture-to-buffer copy operations + // These are recorded during command buffer recording and executed in CommandQueue::submit() + // AFTER all render/compute commands have been executed by the GPU + struct DeferredTextureCopy { + ITexture* source; + IBuffer* destination; + uint64_t destinationOffset; + uint32_t mipLevel; + uint32_t layer; + }; + const std::vector& getDeferredTextureCopies() const { + return deferredTextureCopies_; + } + + // Whether this command buffer requested a swapchain present via present(). + bool willPresent() const { return willPresent_; } + + private: + Device& device_; + igl::d3d12::ComPtr commandList_; + // NOTE: Command allocators are now managed per-frame in FrameContext, not per-CommandBuffer + size_t currentDrawCount_ = 0; + bool recording_ = false; + + // Scheduling fence infrastructure (separate from completion fence) + // Used to track when command buffer is submitted to GPU queue (not when GPU completes) + // D-003: Removed scheduleFenceEvent_ - now using dedicated events per wait operation + igl::d3d12::ComPtr scheduleFence_; + uint64_t scheduleValue_ = 0; + + // Deferred copy operations to execute after command buffer submission + std::vector deferredTextureCopies_; + + // Tracks whether present(surface) was called on this command buffer. + // Mutable to allow modification from the logically-const present() override. + mutable bool willPresent_ = false; + + friend class CommandQueue; // Allow CommandQueue to signal scheduleFence_ +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/CommandQueue.cpp b/src/igl/d3d12/CommandQueue.cpp new file mode 100644 index 0000000000..49435f18aa --- /dev/null +++ b/src/igl/d3d12/CommandQueue.cpp @@ -0,0 +1,222 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +namespace { + +/** + * @brief Execute deferred texture-to-buffer copies after render commands + */ +void executeDeferredCopies(D3D12Context& ctx, Device& device, const CommandBuffer& cmdBuffer) { + const auto& deferredCopies = cmdBuffer.getDeferredTextureCopies(); + if (deferredCopies.empty()) { + return; + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandQueue: Executing %zu deferred copyTextureToBuffer operations\n", + deferredCopies.size()); +#endif + + for (const auto& copy : deferredCopies) { + auto* srcTex = static_cast(copy.source); + auto* dstBuf = static_cast(copy.destination); + + Result copyResult = TextureCopyUtils::executeCopyTextureToBuffer( + ctx, device, *srcTex, *dstBuf, copy.destinationOffset, copy.mipLevel, copy.layer); + if (!copyResult.isOk()) { + IGL_LOG_ERROR("Failed to copy texture to buffer: %s\n", copyResult.message.c_str()); + } + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandQueue: All deferred copies executed successfully\n"); +#endif +} + +// No standalone helper needed - this is now inlined in CommandQueue::submit() + +/** + * @brief Update per-frame fence tracking + */ +void updateFrameFences(D3D12Context& ctx, UINT64 currentFenceValue) { + auto& frameCtx = ctx.getFrameContexts()[ctx.getCurrentFrameIndex()]; + + // Update frame fence (backward compatibility) + if (frameCtx.fenceValue == 0) { + frameCtx.fenceValue = currentFenceValue; + } + + // Update max allocator fence (critical for safe allocator reset) + if (currentFenceValue > frameCtx.maxAllocatorFence) { + frameCtx.maxAllocatorFence = currentFenceValue; + } + + // Track command buffer count + frameCtx.commandBufferCount++; + +#ifdef IGL_DEBUG + // Keep this at IGL_LOG_INFO because the test harness (test_all_sessions.bat) parses + // "Signaled fence" from the INFO log; do not downgrade to VERBOSE + IGL_LOG_INFO("CommandQueue: Signaled fence for frame %u " + "(value=%llu, maxAllocatorFence=%llu, cmdBufCount=%u)\n", + ctx.getCurrentFrameIndex(), currentFenceValue, + frameCtx.maxAllocatorFence, frameCtx.commandBufferCount); +#endif +} + +} // namespace + +CommandQueue::CommandQueue(Device& device) : device_(device) {} + +std::shared_ptr CommandQueue::createCommandBuffer(const CommandBufferDesc& desc, + Result* IGL_NULLABLE outResult) { + auto cmdBuffer = std::make_shared(device_, desc); + + // Check if CommandBuffer was successfully initialized + // CommandBuffer leaves commandList_ null on failure + if (!cmdBuffer->getCommandList()) { + Result::setResult(outResult, Result::Code::RuntimeError, + "Failed to create D3D12 command list. " + "Possible causes: device removed, out of memory, or device initialization failed. " + "Check debug output for HRESULT error code."); + return nullptr; + } + + Result::setOk(outResult); + return cmdBuffer; +} + +// Error handling behavior for submit(). +// This function executes command lists and presents frames. Error handling: +// - Device removal: Detected via checkDeviceRemoval(), logs diagnostics, sets device.isDeviceLost() +// flag, and triggers IGL_DEBUG_ASSERT. Returns SubmitHandle normally (legacy API limitation). +// - Present failures: Logged with IGL_LOG_ERROR via PresentManager. Device removal during Present +// also triggers IGL_DEBUG_ASSERT. Non-removal failures (swapchain/window issues) are logged but +// do not assert. Present result is checked but not propagated as Result (legacy API limitation). +// - Return value: The SubmitHandle is always returned regardless of errors and does NOT reflect +// submission success/failure. Use device.checkDeviceRemoval() or device.isDeviceLost() as the +// authoritative source for fatal error detection. +// Future: Consider Result-based submission API for explicit error propagation. +// +// Refactored from 614 lines to under 100 lines using helper classes: +// - FenceWaiter: RAII fence waiting with TOCTOU protection +// - FrameManager: Frame advancement and resource management +// - PresentManager: Swapchain presentation with device removal detection +SubmitHandle CommandQueue::submit(const ICommandBuffer& commandBuffer, bool /*endOfFrame*/) { + auto& cmdBuffer = const_cast(static_cast(commandBuffer)); + auto& ctx = device_.getD3D12Context(); + auto* commandList = cmdBuffer.getCommandList(); + auto* fence = ctx.getFence(); + + // Defensive: Ensure we have a valid command list + IGL_DEBUG_ASSERT(commandList, "D3D12 CommandQueue::submit() with null command list"); + + // Record timer end timestamp before closing command list + if (commandBuffer.desc.timer) { + auto* timer = static_cast(commandBuffer.desc.timer.get()); + const UINT64 timerFenceValue = ctx.getFenceValue() + 1; + timer->end(commandList, fence, timerFenceValue); + } + + // Close command list + cmdBuffer.end(); + + // Signal scheduling fence + ++scheduleFenceValue_; + cmdBuffer.scheduleValue_ = scheduleFenceValue_; + ctx.getCommandQueue()->Signal(cmdBuffer.scheduleFence_.Get(), scheduleFenceValue_); +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandQueue: Signaled scheduling fence (value=%llu)\n", scheduleFenceValue_); +#endif + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandQueue::submit() - Executing command list...\n"); +#endif + + // Execute command list + ID3D12CommandList* commandLists[] = {commandList}; + ctx.getCommandQueue()->ExecuteCommandLists(1, commandLists); + + // Execute deferred texture-to-buffer copies + executeDeferredCopies(ctx, device_, cmdBuffer); + + // Check device status + Result deviceCheck = device_.checkDeviceRemoval(); + if (!deviceCheck.isOk()) { + IGL_LOG_ERROR("CommandQueue::submit() - Device removal detected: %s\n", + deviceCheck.message.c_str()); + } + + // Present only if this command buffer requested it (via present()) + // and we have a swapchain. This avoids advancing the swapchain for + // intermediate offscreen passes that do not render to the back buffer. + bool presentOk = true; + if (ctx.getSwapChain() && cmdBuffer.willPresent()) { + PresentManager presentMgr(ctx); + presentOk = presentMgr.present(); + if (!presentOk) { + IGL_LOG_ERROR("CommandQueue::submit() - Present failed; frame advancement may be unsafe\n"); + // Note: Continue with fence signaling for now to maintain legacy behavior, + // but future work should consider early-return or recovery strategy + } + } + + // Signal fence for current frame + const UINT64 currentFenceValue = ++ctx.getFenceValue(); + ctx.getCommandQueue()->Signal(ctx.getFence(), currentFenceValue); + + // Update frame fence tracking + updateFrameFences(ctx, currentFenceValue); + + // Advance to next frame with proper synchronization + if (ctx.getSwapChain()) { + FrameManager frameMgr(ctx); + frameMgr.advanceFrame(currentFenceValue); + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandQueue::submit() - Complete!\n"); +#endif + + // Aggregate per-command-buffer draw count into the device, matching GL/Vulkan behavior + const auto cbDraws = cmdBuffer.getCurrentDrawCount(); +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandQueue::submit() - Aggregating %zu draws from CB into device\n", cbDraws); +#endif + device_.incrementDrawCount(cbDraws); +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("CommandQueue::submit() - Device drawCount now=%zu\n", device_.getCurrentDrawCount()); + + // Log resource stats every 30 draws to track leaks + const size_t drawCount = device_.getCurrentDrawCount(); + if (drawCount == 30 || drawCount == 60 || drawCount == 90 || drawCount == 120 || + drawCount == 150 || drawCount == 300 || drawCount == 600 || drawCount == 900 || + drawCount == 1200 || drawCount == 1500 || drawCount == 1800) { + IGL_D3D12_LOG_VERBOSE("CommandQueue::submit() - Logging resource stats at drawCount=%zu\n", drawCount); + D3D12Context::logResourceStats(); + } +#endif + + return 0; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/CommandQueue.h b/src/igl/d3d12/CommandQueue.h new file mode 100644 index 0000000000..26142503f7 --- /dev/null +++ b/src/igl/d3d12/CommandQueue.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace igl::d3d12 { + +class Device; + +class CommandQueue final : public ICommandQueue { + public: + explicit CommandQueue(Device& device); + ~CommandQueue() override = default; + + std::shared_ptr createCommandBuffer(const CommandBufferDesc& desc, + Result* IGL_NULLABLE outResult) override; + SubmitHandle submit(const ICommandBuffer& commandBuffer, bool endOfFrame = false) override; + + Device& getDevice() { return device_; } + + private: + Device& device_; + uint64_t scheduleFenceValue_ = 0; // Monotonically increasing fence value used for scheduling. +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/Common.cpp b/src/igl/d3d12/Common.cpp new file mode 100644 index 0000000000..5342c1db44 --- /dev/null +++ b/src/igl/d3d12/Common.cpp @@ -0,0 +1,191 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace igl::d3d12 { + +DXGI_FORMAT textureFormatToDXGIFormat(TextureFormat format) { + switch (format) { + case TextureFormat::Invalid: + return DXGI_FORMAT_UNKNOWN; + case TextureFormat::R_UNorm8: + return DXGI_FORMAT_R8_UNORM; + case TextureFormat::R_UNorm16: + return DXGI_FORMAT_R16_UNORM; + case TextureFormat::R_F16: + return DXGI_FORMAT_R16_FLOAT; + case TextureFormat::R_UInt16: + return DXGI_FORMAT_R16_UINT; + case TextureFormat::B5G5R5A1_UNorm: + return DXGI_FORMAT_B5G5R5A1_UNORM; + case TextureFormat::B5G6R5_UNorm: + return DXGI_FORMAT_B5G6R5_UNORM; + case TextureFormat::RG_UNorm8: + return DXGI_FORMAT_R8G8_UNORM; + case TextureFormat::RG_UNorm16: + return DXGI_FORMAT_R16G16_UNORM; + case TextureFormat::R5G5B5A1_UNorm: + return DXGI_FORMAT_B5G5R5A1_UNORM; // DXGI closest match + case TextureFormat::BGRA_UNorm8: + return DXGI_FORMAT_B8G8R8A8_UNORM; + case TextureFormat::RGBA_UNorm8: + case TextureFormat::RGBX_UNorm8: + return DXGI_FORMAT_R8G8B8A8_UNORM; + case TextureFormat::RGBA_SRGB: + return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB; + case TextureFormat::BGRA_SRGB: + return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB; + case TextureFormat::RG_F16: + return DXGI_FORMAT_R16G16_FLOAT; + case TextureFormat::RG_UInt16: + return DXGI_FORMAT_R16G16_UINT; + case TextureFormat::RGB10_A2_UNorm_Rev: + return DXGI_FORMAT_R10G10B10A2_UNORM; + case TextureFormat::RGB10_A2_Uint_Rev: + return DXGI_FORMAT_R10G10B10A2_UINT; + case TextureFormat::R_F32: + return DXGI_FORMAT_R32_FLOAT; + case TextureFormat::R_UInt32: + return DXGI_FORMAT_R32_UINT; + case TextureFormat::RG_F32: + return DXGI_FORMAT_R32G32_FLOAT; + case TextureFormat::RGB_F16: + return DXGI_FORMAT_R16G16B16A16_FLOAT; // DXGI doesn't have RGB16, use RGBA16 + case TextureFormat::RGBA_F16: + return DXGI_FORMAT_R16G16B16A16_FLOAT; + case TextureFormat::RGB_F32: + return DXGI_FORMAT_R32G32B32_FLOAT; + case TextureFormat::RGBA_UInt32: + return DXGI_FORMAT_R32G32B32A32_UINT; + case TextureFormat::RGBA_F32: + return DXGI_FORMAT_R32G32B32A32_FLOAT; + // BC7 compressed color formats + case TextureFormat::RGBA_BC7_UNORM_4x4: + return DXGI_FORMAT_BC7_UNORM; + case TextureFormat::RGBA_BC7_SRGB_4x4: + return DXGI_FORMAT_BC7_UNORM_SRGB; + // Depth/stencil formats + case TextureFormat::Z_UNorm16: + return DXGI_FORMAT_D16_UNORM; + case TextureFormat::Z_UNorm24: + return DXGI_FORMAT_D24_UNORM_S8_UINT; // DXGI doesn't have D24 alone + case TextureFormat::Z_UNorm32: + return DXGI_FORMAT_D32_FLOAT; + case TextureFormat::S8_UInt_Z24_UNorm: + return DXGI_FORMAT_D24_UNORM_S8_UINT; + case TextureFormat::S8_UInt_Z32_UNorm: + return DXGI_FORMAT_D32_FLOAT_S8X24_UINT; + case TextureFormat::S_UInt8: + // Stencil-only formats are not natively supported by D3D12. + // TODO: Implement via typed subresource views using stencil plane formats: + // - DXGI_FORMAT_X24_TYPELESS_G8_UINT (for D24_UNORM_S8_UINT backing resource) + // - DXGI_FORMAT_X32_TYPELESS_G8X24_UINT (for D32_FLOAT_S8X24_UINT backing resource) + // See: https://learn.microsoft.com/en-us/windows/win32/api/dxgiformat/ne-dxgiformat-dxgi_format + IGL_LOG_ERROR_ONCE("TextureFormat::S_UInt8 not supported on D3D12 (no stencil-only textures) - use S8_UInt_Z24_UNorm or S8_UInt_Z32_UNorm instead\n"); + return DXGI_FORMAT_UNKNOWN; + default: + return DXGI_FORMAT_UNKNOWN; + } +} + +namespace { +bool isDepthOrStencilFormat(TextureFormat format) { + switch (format) { + case TextureFormat::Z_UNorm16: + case TextureFormat::Z_UNorm24: + case TextureFormat::Z_UNorm32: + case TextureFormat::S8_UInt_Z24_UNorm: + case TextureFormat::S8_UInt_Z32_UNorm: + return true; + default: + return false; + } +} +} // namespace + +DXGI_FORMAT textureFormatToDXGIResourceFormat(TextureFormat format, bool sampledUsage) { + if (!sampledUsage || !isDepthOrStencilFormat(format)) { + return textureFormatToDXGIFormat(format); + } + + switch (format) { + case TextureFormat::Z_UNorm16: + return DXGI_FORMAT_R16_TYPELESS; + case TextureFormat::Z_UNorm24: + case TextureFormat::S8_UInt_Z24_UNorm: + return DXGI_FORMAT_R24G8_TYPELESS; + case TextureFormat::Z_UNorm32: + return DXGI_FORMAT_R32_TYPELESS; + case TextureFormat::S8_UInt_Z32_UNorm: + return DXGI_FORMAT_R32G8X24_TYPELESS; + default: + return textureFormatToDXGIFormat(format); + } +} + +DXGI_FORMAT textureFormatToDXGIShaderResourceViewFormat(TextureFormat format) { + if (!isDepthOrStencilFormat(format)) { + return textureFormatToDXGIFormat(format); + } + + switch (format) { + case TextureFormat::Z_UNorm16: + return DXGI_FORMAT_R16_UNORM; + case TextureFormat::Z_UNorm24: + return DXGI_FORMAT_R24_UNORM_X8_TYPELESS; + case TextureFormat::S8_UInt_Z24_UNorm: + return DXGI_FORMAT_R24_UNORM_X8_TYPELESS; + case TextureFormat::Z_UNorm32: + return DXGI_FORMAT_R32_FLOAT; + case TextureFormat::S8_UInt_Z32_UNorm: + return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS; + default: + return textureFormatToDXGIFormat(format); + } +} + +TextureFormat dxgiFormatToTextureFormat(DXGI_FORMAT format) { + switch (format) { + case DXGI_FORMAT_UNKNOWN: + return TextureFormat::Invalid; + case DXGI_FORMAT_R8_UNORM: + return TextureFormat::R_UNorm8; + case DXGI_FORMAT_R16_UNORM: + return TextureFormat::R_UNorm16; + case DXGI_FORMAT_R16_FLOAT: + return TextureFormat::R_F16; + case DXGI_FORMAT_R8G8_UNORM: + return TextureFormat::RG_UNorm8; + case DXGI_FORMAT_R8G8B8A8_UNORM: + return TextureFormat::RGBA_UNorm8; + case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB: + return TextureFormat::RGBA_SRGB; + case DXGI_FORMAT_B8G8R8A8_UNORM: + return TextureFormat::BGRA_UNorm8; + case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB: + return TextureFormat::BGRA_SRGB; + case DXGI_FORMAT_R16G16B16A16_FLOAT: + return TextureFormat::RGBA_F16; + case DXGI_FORMAT_R32G32B32A32_FLOAT: + return TextureFormat::RGBA_F32; + case DXGI_FORMAT_D16_UNORM: + return TextureFormat::Z_UNorm16; + case DXGI_FORMAT_D24_UNORM_S8_UINT: + return TextureFormat::S8_UInt_Z24_UNorm; + case DXGI_FORMAT_D32_FLOAT: + return TextureFormat::Z_UNorm32; + case DXGI_FORMAT_BC7_UNORM: + return TextureFormat::RGBA_BC7_UNORM_4x4; + case DXGI_FORMAT_BC7_UNORM_SRGB: + return TextureFormat::RGBA_BC7_SRGB_4x4; + default: + return TextureFormat::Invalid; + } +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/Common.h b/src/igl/d3d12/Common.h new file mode 100644 index 0000000000..eef83bcb08 --- /dev/null +++ b/src/igl/d3d12/Common.h @@ -0,0 +1,374 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#ifndef IGL_D3D12_COMMON_H +#define IGL_D3D12_COMMON_H + +#include +#include + +#include +#include +#include +#include +#include + +// Set to 1 to see verbose debug console logs with D3D12 commands +#define IGL_D3D12_PRINT_COMMANDS 0 + +// Set to 1 to enable verbose logging (hot-path logs, detailed state tracking, etc.) +// This is disabled by default to reduce log volume. +#define IGL_D3D12_DEBUG_VERBOSE 0 + +namespace igl::d3d12 { + +// Configuration structure for D3D12 backend. +// Centralizes all size-related configuration with documented rationale. +struct D3D12ContextConfig { + // === Frame Buffering === + // Rationale: Triple buffering (3 frames) provides optimal GPU/CPU parallelism on modern hardware + // while maintaining reasonable memory overhead. Reducing to 2 can save memory on constrained + // devices but may reduce throughput. Increasing beyond 3 provides minimal benefit. + // D3D12 spec: Minimum 2, recommended 2-3 for flip model swapchains + // + // LIMITATION: Currently fixed at 3 due to fixed-size arrays (frameContexts_, renderTargets_). + // Attempting to change this value will be clamped by validate(). To enable true configurability, + // D3D12Context must be refactored to use std::vector instead of fixed-size arrays. + uint32_t maxFramesInFlight = 3; + + // === Descriptor Heap Sizes (Per-Frame Shader-Visible) === + // Rationale: Following Microsoft MiniEngine pattern for dynamic per-frame allocation + // Samplers: D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE (2048) is the hardware limit. + // D3D12 spec limits: CBV/SRV/UAV up to 1,000,000, Samplers max 2048 + uint32_t samplerHeapSize = 2048; // Total sampler descriptors per frame (D3D12 spec limit) + + // === CBV/SRV/UAV Dynamic Heap Growth === + // Rationale: Prevents unbounded memory growth while supporting complex scenes + // Starts with one page, can grow up to maxHeapPages as needed + // 16 pages × 1024 descriptors = 16,384 max descriptors per frame + // This supports ~500-1000 draw calls per frame with typical descriptor usage patterns + uint32_t descriptorsPerPage = 1024; // CBV/SRV/UAV descriptors per heap page + uint32_t maxHeapPages = 16; // Maximum pages per frame (total capacity = pages × descriptorsPerPage) + + // Pre-allocation policy for descriptor pages. + // Rationale: Following Vulkan fail-fast pattern to prevent mid-frame descriptor invalidation. + // When true: All maxHeapPages are pre-allocated at init (recommended). + // When false: Only 1 page pre-allocated at init (minimal memory footprint). + // Both modes fail-fast when pages are exhausted - no dynamic growth to prevent descriptor invalidation. + // Default: true for safety (matches Vulkan behavior and supports complex scenes). + bool preAllocateDescriptorPages = true; + + // DEPRECATED: Use descriptorsPerPage instead + // This field is kept for backward compatibility but has the same value as descriptorsPerPage + uint32_t cbvSrvUavHeapSize = 1024; // Alias for descriptorsPerPage (deprecated) + + // === CPU-Visible Descriptor Heaps (Static) === + // Rationale: RTVs/DSVs are created once per texture and persist across frames + // 256 RTVs: Supports ~128 textures with mips/array layers (typical for games) + // 128 DSVs: Sufficient for depth buffers, shadow maps, and multi-pass rendering + // These values should be tuned based on application texture usage patterns + uint32_t rtvHeapSize = 256; + uint32_t dsvHeapSize = 128; + + // === Upload Ring Buffer === + // Rationale: 128MB provides good balance for streaming resources (textures, constant buffers) + // Smaller values (64MB) reduce memory footprint but increase allocation failures + // Larger values (256MB) reduce failures but waste memory on simple scenes + // Microsoft MiniEngine uses similar sizes (64-256MB range) + uint64_t uploadRingBufferSize = 128 * 1024 * 1024; // 128 MB + + // === Validation Helpers === + // Clamp values to D3D12 spec limits and provide warnings for unusual configurations + void validate() { + // Frame buffering: Allow 2-4 buffers (double/triple/quad buffering) + // T43: Now that renderTargets_ and frameContexts_ are std::vector, we can support runtime counts. + // Practical range: 2 (double-buffer, higher latency), 3 (triple-buffer, balanced), 4 (lower latency, more memory) + // Note: DXGI may adjust the requested count; actual runtime count comes from GetDesc1(). + constexpr uint32_t kMinFrames = 2; + constexpr uint32_t kMaxFrames = 4; + if (maxFramesInFlight < kMinFrames || maxFramesInFlight > kMaxFrames) { + IGL_LOG_ERROR("D3D12ContextConfig: maxFramesInFlight=%u out of range [%u, %u], clamping to %u\n", + maxFramesInFlight, kMinFrames, kMaxFrames, + (maxFramesInFlight < kMinFrames) ? kMinFrames : kMaxFrames); + maxFramesInFlight = (maxFramesInFlight < kMinFrames) ? kMinFrames : kMaxFrames; + } + + // Sampler heap: Use D3D12 constant instead of magic number + if (samplerHeapSize > D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE) { + IGL_LOG_INFO("D3D12ContextConfig: samplerHeapSize=%u exceeds D3D12 limit (%u), clamping\n", + samplerHeapSize, D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE); + samplerHeapSize = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE; + } + + // Descriptor page limits: Prevent absurd/invalid values + if (descriptorsPerPage == 0) { + IGL_LOG_ERROR("D3D12ContextConfig: descriptorsPerPage=0 is invalid, setting to 1024\n"); + descriptorsPerPage = 1024; + } + if (maxHeapPages == 0) { + IGL_LOG_ERROR("D3D12ContextConfig: maxHeapPages=0 is invalid, setting to 16\n"); + maxHeapPages = 16; + } + + // CBV/SRV/UAV heap: D3D12 spec limit (generic, tier-independent upper bound) + // Note: Actual device limits may be lower depending on resource binding tier; + // use CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS) for precise caps + constexpr uint32_t kMaxCbvSrvUavDescriptors = 1000000; + if (descriptorsPerPage > kMaxCbvSrvUavDescriptors) { + IGL_LOG_INFO("D3D12ContextConfig: descriptorsPerPage=%u exceeds D3D12 limit (%u), clamping\n", + descriptorsPerPage, kMaxCbvSrvUavDescriptors); + descriptorsPerPage = kMaxCbvSrvUavDescriptors; + } + + // Keep deprecated cbvSrvUavHeapSize in sync with descriptorsPerPage + cbvSrvUavHeapSize = descriptorsPerPage; + + // Upload ring buffer: Warn if too small (may cause allocation failures) + constexpr uint64_t kMinRecommendedSize = 32 * 1024 * 1024; // 32 MB + if (uploadRingBufferSize < kMinRecommendedSize) { + IGL_LOG_INFO("D3D12ContextConfig: uploadRingBufferSize=%llu MB is small, " + "may cause allocation failures (recommended minimum: %llu MB)\n", + uploadRingBufferSize / (1024 * 1024), kMinRecommendedSize / (1024 * 1024)); + } + } + + // === Preset Configurations === + // Factory methods for common use cases + + // Default configuration (balanced for most applications) + static D3D12ContextConfig defaultConfig() { + return D3D12ContextConfig{}; // Uses default member initializers + } + + // Low memory configuration (mobile, integrated GPUs, constrained devices) + static D3D12ContextConfig lowMemoryConfig() { + D3D12ContextConfig config; + config.maxFramesInFlight = 2; // Double-buffering to reduce memory (T43) + config.descriptorsPerPage = 512; // Smaller pages + config.cbvSrvUavHeapSize = 512; // Keep in sync (deprecated field) + config.maxHeapPages = 8; // Fewer pages (total: 512 × 8 = 4K descriptors) + config.rtvHeapSize = 128; // Fewer RTVs + config.dsvHeapSize = 64; // Fewer DSVs + config.uploadRingBufferSize = 64 * 1024 * 1024; // 64 MB + config.validate(); + return config; + } + + // High performance configuration (discrete GPUs, desktop, complex scenes) + static D3D12ContextConfig highPerformanceConfig() { + D3D12ContextConfig config; + config.maxFramesInFlight = 3; // Triple-buffering (balanced, default) (T43) + config.descriptorsPerPage = 2048; // Larger pages + config.cbvSrvUavHeapSize = 2048; // Keep in sync (deprecated field) + config.maxHeapPages = 32; // More pages (total: 2048 × 32 = 64K descriptors) + config.rtvHeapSize = 512; // More RTVs for render targets + config.dsvHeapSize = 256; // More DSVs for shadow maps + config.uploadRingBufferSize = 256 * 1024 * 1024; // 256 MB + config.validate(); + return config; + } +}; + +// Default frame buffering count (triple buffering). +// T43: D3D12Context now uses runtime swapchainBufferCount_ queried from the swapchain. +// This constant serves as the default value for D3D12ContextConfig::maxFramesInFlight +// and is used by headless contexts (which have no swapchain to query). +// Applications can configure 2-4 buffers via D3D12ContextConfig::maxFramesInFlight. +constexpr uint32_t kMaxFramesInFlight = 3; + +// Maximum number of descriptor sets (matching IGL's Vulkan backend) +constexpr uint32_t kMaxDescriptorSets = 4; + +// Maximum number of samplers; increased to D3D12 spec limit to support complex scenes. +// D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE is defined as 2048 in d3d12.h. +constexpr uint32_t kMaxSamplers = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE; + +// Descriptor heap sizes (per-frame shader-visible heaps) +// Following Microsoft MiniEngine pattern for dynamic per-frame allocation +constexpr uint32_t kCbvSrvUavHeapSize = 1024; // CBV/SRV/UAV descriptors per page +constexpr uint32_t kSamplerHeapSize = kMaxSamplers; // Sampler descriptors per frame + +// Dynamic heap growth limits (prevent unbounded memory usage). +constexpr uint32_t kDescriptorsPerPage = kCbvSrvUavHeapSize; // 1024 descriptors per page +constexpr uint32_t kMaxHeapPages = 16; // Maximum 16 pages = 16K descriptors per frame +constexpr uint32_t kMaxDescriptorsPerFrame = kMaxHeapPages * kDescriptorsPerPage; // 16384 total + +// Maximum number of vertex attributes (D3D12 spec limit). +// Uses D3D12 spec constant instead of a hard-coded value. +constexpr uint32_t kMaxVertexAttributes = D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT; // 32 + +// Normalized error macros - single log per error (no double logging). +// Debug builds: IGL_DEBUG_ASSERT logs via _IGLDebugAbort +// Release builds: IGL_LOG_ERROR provides visibility +#if IGL_DEBUG_ABORT_ENABLED + #define D3D12_CHECK(func) \ + do { \ + const HRESULT d3d12_check_result = (func); \ + if (FAILED(d3d12_check_result)) { \ + IGL_DEBUG_ASSERT(false, "D3D12 API call failed: %s, HRESULT: 0x%08X", \ + #func, \ + static_cast(d3d12_check_result)); \ + } \ + } while (0) + + #define D3D12_CHECK_RETURN(func) \ + do { \ + const HRESULT d3d12_check_result = (func); \ + if (FAILED(d3d12_check_result)) { \ + IGL_DEBUG_ASSERT(false, "D3D12 API call failed: %s, HRESULT: 0x%08X", \ + #func, \ + static_cast(d3d12_check_result)); \ + return getResultFromHRESULT(d3d12_check_result); \ + } \ + } while (0) +#else + #define D3D12_CHECK(func) \ + do { \ + const HRESULT d3d12_check_result = (func); \ + if (FAILED(d3d12_check_result)) { \ + IGL_LOG_ERROR("D3D12 API call failed: %s, HRESULT: 0x%08X\n", \ + #func, \ + static_cast(d3d12_check_result)); \ + } \ + } while (0) + + #define D3D12_CHECK_RETURN(func) \ + do { \ + const HRESULT d3d12_check_result = (func); \ + if (FAILED(d3d12_check_result)) { \ + IGL_LOG_ERROR("D3D12 API call failed: %s, HRESULT: 0x%08X\n", \ + #func, \ + static_cast(d3d12_check_result)); \ + return getResultFromHRESULT(d3d12_check_result); \ + } \ + } while (0) +#endif + +// Verbose logging macro (hot-path logs, detailed state tracking). +// Only logs when IGL_D3D12_DEBUG_VERBOSE is enabled (disabled by default) +#if IGL_D3D12_DEBUG_VERBOSE + #define IGL_D3D12_LOG_VERBOSE(format, ...) IGL_LOG_INFO(format, ##__VA_ARGS__) +#else + #define IGL_D3D12_LOG_VERBOSE(format, ...) ((void)0) +#endif + +// Command logging macro (D3D12 API command traces). +// Only logs when IGL_D3D12_PRINT_COMMANDS is enabled (disabled by default) +// Use for command recording, state transitions, and D3D12 API call traces +// Note: Treated as INFO-level severity but controlled separately from DEBUG_VERBOSE +// to allow independent toggling of command traces vs general verbose output +#if IGL_D3D12_PRINT_COMMANDS + #define IGL_D3D12_LOG_CMD(format, ...) IGL_LOG_INFO(format, ##__VA_ARGS__) +#else + #define IGL_D3D12_LOG_CMD(format, ...) ((void)0) +#endif + +// Convert HRESULT to IGL Result +inline Result getResultFromHRESULT(HRESULT hr) { + if (SUCCEEDED(hr)) { + return Result(Result::Code::Ok); + } + + // Map common HRESULT codes to IGL Result codes + switch (hr) { + case E_OUTOFMEMORY: + return Result(Result::Code::RuntimeError, "Out of memory"); + case E_INVALIDARG: + return Result(Result::Code::ArgumentInvalid, "Invalid argument"); + case E_NOTIMPL: + return Result(Result::Code::Unimplemented, "Not implemented"); + case DXGI_ERROR_DEVICE_REMOVED: + return Result(Result::Code::RuntimeError, "Device removed"); + case DXGI_ERROR_DEVICE_RESET: + return Result(Result::Code::RuntimeError, "Device reset"); + default: { + // Include HRESULT code for better debugging of unexpected errors. + char buf[64]; + snprintf(buf, sizeof(buf), "D3D12 error (hr=0x%08X)", static_cast(hr)); + return Result(Result::Code::RuntimeError, buf); + } + } +} + +// Helper for COM resource release +template +void SafeRelease(T*& ptr) { + if (ptr) { + ptr->Release(); + ptr = nullptr; + } +} + +// Texture format conversion +DXGI_FORMAT textureFormatToDXGIFormat(TextureFormat format); +DXGI_FORMAT textureFormatToDXGIResourceFormat(TextureFormat format, bool sampledUsage); +DXGI_FORMAT textureFormatToDXGIShaderResourceViewFormat(TextureFormat format); +TextureFormat dxgiFormatToTextureFormat(DXGI_FORMAT format); + +// Align value to specified alignment (must be power-of-two) +// Template allows use with different integer types (UINT64, size_t, etc.) +// IMPORTANT: alignment must be a power of 2 (e.g., 256, 4096, 65536) +template +inline T AlignUp(T value, T alignment) { + IGL_DEBUG_ASSERT((alignment & (alignment - 1)) == 0, "AlignUp: alignment must be power-of-two"); + return (value + alignment - 1) & ~(alignment - 1); +} + +// Hash combining utility (boost::hash_combine pattern) +// Used for hashing complex structures like root signatures and pipeline descriptors +template +inline void hashCombine(size_t& seed, const T& value) { + seed ^= std::hash{}(value) + 0x9e3779b9 + (seed << 6) + (seed >> 2); +} + +// Feature level to string conversion +inline const char* featureLevelToString(D3D_FEATURE_LEVEL level) { + switch (level) { + case D3D_FEATURE_LEVEL_12_2: return "12.2"; + case D3D_FEATURE_LEVEL_12_1: return "12.1"; + case D3D_FEATURE_LEVEL_12_0: return "12.0"; + case D3D_FEATURE_LEVEL_11_1: return "11.1"; + case D3D_FEATURE_LEVEL_11_0: return "11.0"; + default: return "Unknown"; + } +} + +// Shader target helper. +// Convert D3D_SHADER_MODEL enum to shader target string (e.g., "vs_6_6", "ps_5_1"). +inline std::string getShaderTarget(D3D_SHADER_MODEL shaderModel, ShaderStage stage) { + // Extract major and minor version from D3D_SHADER_MODEL enum + // Format: 0xMm where M = major, m = minor (e.g., 0x66 = SM 6.6, 0x51 = SM 5.1) + int major = (shaderModel >> 4) & 0xF; + int minor = shaderModel & 0xF; + + // Get stage prefix + const char* stagePrefix = nullptr; + switch (stage) { + case ShaderStage::Vertex: + stagePrefix = "vs"; + break; + case ShaderStage::Fragment: + stagePrefix = "ps"; // DirectX uses "ps" for pixel/fragment shaders + break; + case ShaderStage::Compute: + stagePrefix = "cs"; + break; + default: + return ""; + } + + // Build target string (e.g., "vs_6_6", "ps_5_1", "cs_6_0") + char target[16]; + snprintf(target, sizeof(target), "%s_%d_%d", stagePrefix, major, minor); + return std::string(target); +} + +} // namespace igl::d3d12 + +#endif // IGL_D3D12_COMMON_H diff --git a/src/igl/d3d12/ComputeCommandEncoder.cpp b/src/igl/d3d12/ComputeCommandEncoder.cpp new file mode 100644 index 0000000000..19d225c738 --- /dev/null +++ b/src/igl/d3d12/ComputeCommandEncoder.cpp @@ -0,0 +1,671 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +ComputeCommandEncoder::ComputeCommandEncoder(CommandBuffer& commandBuffer) : + commandBuffer_(commandBuffer), resourcesBinder_(commandBuffer, true /* isCompute */), isEncoding_(true) { + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder created\n"); + + // Set descriptor heaps for this command list. + // Use active heap from frame context, not the legacy accessor. + auto& context = commandBuffer_.getContext(); + auto& frameCtx = context.getFrameContexts()[context.getCurrentFrameIndex()]; + + ID3D12DescriptorHeap* cbvSrvUavHeap = frameCtx.activeCbvSrvUavHeap.Get(); + ID3D12DescriptorHeap* samplerHeap = frameCtx.samplerHeap.Get(); + + // Legacy fallback: if the context does not provide per-frame heaps, try the manager once + if ((!cbvSrvUavHeap || !samplerHeap) && context.getDescriptorHeapManager()) { + auto* heapMgr = context.getDescriptorHeapManager(); + if (!cbvSrvUavHeap) { + cbvSrvUavHeap = heapMgr->getCbvSrvUavHeap(); + } + if (!samplerHeap) { + samplerHeap = heapMgr->getSamplerHeap(); + } + } + + if (cbvSrvUavHeap && samplerHeap) { + auto* commandList = commandBuffer_.getCommandList(); + if (commandList) { + ID3D12DescriptorHeap* heaps[] = {cbvSrvUavHeap, samplerHeap}; + commandList->SetDescriptorHeaps(2, heaps); + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Descriptor heaps set (active heap from FrameContext)\n"); + } + } +} + +void ComputeCommandEncoder::endEncoding() { + if (!isEncoding_) { + return; + } + + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::endEncoding()\n"); + isEncoding_ = false; +} + +void ComputeCommandEncoder::bindComputePipelineState( + const std::shared_ptr& pipelineState) { + if (!pipelineState) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindComputePipelineState - null pipeline state\n"); + return; + } + + currentPipeline_ = static_cast(pipelineState.get()); + + auto* commandList = commandBuffer_.getCommandList(); + if (!commandBuffer_.isRecording() || !commandList) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindComputePipelineState - command list is closed or null\n"); + return; + } + + // Set compute root signature and pipeline state + commandList->SetComputeRootSignature(currentPipeline_->getRootSignature()); + commandList->SetPipelineState(currentPipeline_->getPipelineState()); + + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindComputePipelineState - PSO and root signature set\n"); +} + +void ComputeCommandEncoder::dispatchThreadGroups(const Dimensions& threadgroupCount, + const Dimensions& /*threadgroupSize*/, + const Dependencies& dependencies) { + if (!currentPipeline_) { + IGL_LOG_ERROR("ComputeCommandEncoder::dispatchThreadGroups - no pipeline state bound\n"); + return; + } + + auto* commandList = commandBuffer_.getCommandList(); + if (!commandBuffer_.isRecording() || !commandList) { + IGL_LOG_ERROR("ComputeCommandEncoder::dispatchThreadGroups - command list is closed or null\n"); + return; + } + + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::dispatchThreadGroups(%u, %u, %u)\n", + threadgroupCount.width, threadgroupCount.height, threadgroupCount.depth); + + // Process dependencies - insert barriers for buffers and textures + const Dependencies* deps = &dependencies; + std::vector uavResources; + + while (deps) { + // Handle buffer dependencies + for (IBuffer* buf : deps->buffers) { + if (!buf) { + break; + } + auto* d3dBuffer = static_cast(buf); + uavResources.push_back(d3dBuffer->getResource()); + } + + // Handle texture dependencies + for (ITexture* tex : deps->textures) { + if (!tex) { + break; + } + auto* d3dTexture = static_cast(tex); + // Ensure texture is in proper state for compute access + d3dTexture->transitionAll(commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + uavResources.push_back(d3dTexture->getResource()); + } + + deps = deps->next; + } + + // Insert UAV barriers for dependent resources before dispatch + if (!uavResources.empty()) { + std::vector barriers; + barriers.reserve(uavResources.size()); + + for (ID3D12Resource* resource : uavResources) { + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.UAV.pResource = resource; + barriers.push_back(barrier); + } + + commandList->ResourceBarrier(static_cast(barriers.size()), barriers.data()); + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Inserted %zu UAV barriers before dispatch\n", barriers.size()); + } + + // Apply all resource bindings (textures, samplers, buffers, UAVs) before dispatch. + // For compute pipelines, pass nullptr since there's no RenderPipelineState + Result bindResult; + if (!resourcesBinder_.updateBindings(nullptr, &bindResult)) { + IGL_LOG_ERROR("dispatchThreadGroups: Failed to update resource bindings: %s\n", bindResult.message.c_str()); + return; + } + + // Bind all cached resources to root parameters + // Root signature layout (from Device::createComputePipeline): + // - Parameter 0: Root Constants (b0) - 16 DWORDs + // - Parameter 1: UAV table (u0-uN) + // - Parameter 2: SRV table (t0-tN) + // - Parameter 3: CBV table (b1-bN) + // - Parameter 4: Sampler table (s0-sN) + + // Bind UAVs (parameter 1), with debug validation to catch sparse binding. + if (boundUavCount_ > 0) { + IGL_DEBUG_ASSERT(cachedUavHandles_[0].ptr != 0, + "UAV count > 0 but base handle is null - did you bind only higher slots?"); + if (cachedUavHandles_[0].ptr != 0) { + commandList->SetComputeRootDescriptorTable(1, cachedUavHandles_[0]); + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Bound %zu UAVs\n", boundUavCount_); + } else { + IGL_LOG_ERROR("ComputeCommandEncoder: UAV count > 0 but base handle is null - skipping binding and clearing boundUavCount_ to 0\n"); + // Clear count to avoid repeated errors on subsequent dispatches + boundUavCount_ = 0; + } + } + + // Bind SRVs (Parameter 2) + if (boundSrvCount_ > 0) { + IGL_DEBUG_ASSERT(cachedSrvHandles_[0].ptr != 0, + "SRV count > 0 but base handle is null - did you bind only higher slots?"); + if (cachedSrvHandles_[0].ptr != 0) { + commandList->SetComputeRootDescriptorTable(2, cachedSrvHandles_[0]); + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Bound %zu SRVs\n", boundSrvCount_); + } else { + IGL_LOG_ERROR("ComputeCommandEncoder: SRV count > 0 but base handle is null - skipping binding and clearing boundSrvCount_ to 0\n"); + // Clear count to avoid repeated errors on subsequent dispatches + boundSrvCount_ = 0; + } + } + + // Bind CBVs (parameter 3). Only create/allocate CBV descriptors when bindings have + // changed or the heap page has changed. + if (boundCbvCount_ > 0) { + auto& context = commandBuffer_.getContext(); + auto& frameCtx = context.getFrameContexts()[context.getCurrentFrameIndex()]; + const uint32_t currentPageIdx = frameCtx.currentCbvSrvUavPageIndex; + + // Check if heap page changed - invalidates cached descriptors + const bool heapPageChanged = (cachedCbvPageIndex_ != currentPageIdx); + if (heapPageChanged) { + cbvBindingsDirty_ = true; + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Heap page changed (%u -> %u), invalidating CBV cache\n", + cachedCbvPageIndex_, currentPageIdx); + } + + // Only recreate descriptors if bindings are dirty or heap changed + if (cbvBindingsDirty_) { + auto* device = context.getDevice(); + + // Allocate descriptors for CBV table - use fixed-size array to avoid heap allocation + uint32_t cbvIndices[kMaxComputeBuffers] = {}; + for (size_t i = 0; i < boundCbvCount_; ++i) { + uint32_t descriptorIndex = 0; + Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR("ComputeCommandEncoder: Failed to allocate CBV descriptor %zu: %s\n", i, allocResult.message.c_str()); + return; + } + cbvIndices[i] = descriptorIndex; + } + + // Create CBV descriptors for all bound constant buffers + for (size_t i = 0; i < boundCbvCount_; ++i) { + if (cachedCbvAddresses_[i] != 0 && cachedCbvSizes_[i] > 0) { + const uint32_t descriptorIndex = cbvIndices[i]; + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex); + + // Enforce 64 KB limit for CBVs. + constexpr size_t kMaxCBVSize = 65536; // 64 KB (D3D12 spec limit) + if (cachedCbvSizes_[i] > kMaxCBVSize) { + IGL_LOG_ERROR("ComputeCommandEncoder: Constant buffer %zu size (%zu bytes) exceeds D3D12 64 KB limit\n", + i, cachedCbvSizes_[i]); + continue; // Skip this CBV + } + + // Align size to 256-byte boundary (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT) + const size_t alignedSize = (cachedCbvSizes_[i] + 255) & ~255; + + IGL_DEBUG_ASSERT(alignedSize <= kMaxCBVSize, "CBV size exceeds 64 KB after alignment"); + + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = cachedCbvAddresses_[i]; + cbvDesc.SizeInBytes = static_cast(alignedSize); + + device->CreateConstantBufferView(&cbvDesc, cpuHandle); + } + } + + // Cache the base index and page for reuse + cachedCbvBaseIndex_ = cbvIndices[0]; + cachedCbvPageIndex_ = currentPageIdx; + cbvBindingsDirty_ = false; + + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Created %zu CBV descriptors at page %u (descriptors %u-%u)\n", + boundCbvCount_, currentPageIdx, cbvIndices[0], cbvIndices[boundCbvCount_ - 1]); + } + + // Recompute GPU handle from cached base index for current heap + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(cachedCbvBaseIndex_); + + // Defensive check: ensure handle is valid before binding + IGL_DEBUG_ASSERT(gpuHandle.ptr != 0, "CBV count > 0 but GPU handle is null"); + if (gpuHandle.ptr != 0) { + commandList->SetComputeRootDescriptorTable(3, gpuHandle); + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Bound %zu CBVs via descriptor table (base index %u)\n", + boundCbvCount_, cachedCbvBaseIndex_); + } else { + IGL_LOG_ERROR("ComputeCommandEncoder: CBV GPU handle is null, skipping binding\n"); + } + } + + // Bind Samplers (Parameter 4) + if (boundSamplerCount_ > 0) { + IGL_DEBUG_ASSERT(cachedSamplerHandles_[0].ptr != 0, + "Sampler count > 0 but base handle is null - did you bind only higher slots?"); + if (cachedSamplerHandles_[0].ptr != 0) { + commandList->SetComputeRootDescriptorTable(4, cachedSamplerHandles_[0]); + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Bound %zu samplers\n", boundSamplerCount_); + } else { + IGL_LOG_ERROR("ComputeCommandEncoder: Sampler count > 0 but base handle is null - skipping binding and clearing boundSamplerCount_ to 0\n"); + // Clear count to avoid repeated errors on subsequent dispatches + boundSamplerCount_ = 0; + } + } + + // Dispatch compute work + // Note: threadgroupSize is embedded in the compute shader ([numthreads(...)]) + commandList->Dispatch(threadgroupCount.width, threadgroupCount.height, threadgroupCount.depth); + + // Insert resource-specific UAV barriers for bound UAVs to ensure compute writes are visible. + // Only barrier UAVs that were actually bound (more efficient than a global barrier). + if (boundUavCount_ > 0) { + // Use fixed-size array to avoid heap allocation in hot path + D3D12_RESOURCE_BARRIER barriers[kMaxComputeBuffers]; + UINT barrierCount = 0; + + for (size_t i = 0; i < boundUavCount_; ++i) { + if (boundUavResources_[i] != nullptr) { + D3D12_RESOURCE_BARRIER& barrier = barriers[barrierCount++]; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.UAV.pResource = boundUavResources_[i]; // Resource-specific UAV barrier + } + } + + if (barrierCount > 0) { + commandList->ResourceBarrier(barrierCount, barriers); + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::dispatchThreadGroups - dispatch complete, %u resource-specific UAV barriers inserted\n", + barrierCount); + } + } +} + +void ComputeCommandEncoder::bindPushConstants(const void* data, + size_t length, + size_t offset) { + auto* commandList = commandBuffer_.getCommandList(); + if (!commandBuffer_.isRecording() || !commandList || !data || length == 0) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindPushConstants: Invalid parameters or closed command list (list=%p, data=%p, len=%zu)\n", + commandList, data, length); + return; + } + + // Compute root signature parameter 0 is declared as D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS (b0). + // Increased to 32 DWORDs (128 bytes) to match Vulkan. + constexpr size_t kMaxPushConstantBytes = 128; + + if (length + offset > kMaxPushConstantBytes) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindPushConstants: size %zu + offset %zu exceeds maximum %zu bytes\n", + length, offset, kMaxPushConstantBytes); + return; + } + + // Calculate number of 32-bit values and offset in DWORDs + const uint32_t num32BitValues = static_cast((length + 3) / 4); // Round up to DWORDs + const uint32_t destOffsetIn32BitValues = static_cast(offset / 4); + + // Use SetComputeRoot32BitConstants to directly write data to root constants + // Root parameter 0 = b0 (Push Constants), as declared in compute root signature + commandList->SetComputeRoot32BitConstants( + 0, // Root parameter index (push constants at parameter 0) + num32BitValues, // Number of 32-bit values to set + data, // Source data + destOffsetIn32BitValues); // Destination offset in 32-bit values + + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindPushConstants: Set %u DWORDs (%zu bytes) at offset %zu to root parameter 0 (b0)\n", + num32BitValues, length, offset); +} + +void ComputeCommandEncoder::bindTexture(uint32_t index, ITexture* texture) { + // Delegate to D3D12ResourcesBinder for centralized descriptor management. + resourcesBinder_.bindTexture(index, texture); +} + +void ComputeCommandEncoder::bindBuffer(uint32_t index, IBuffer* buffer, size_t offset, size_t /*bufferSize*/) { + if (!buffer) { + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindBuffer: null buffer\n"); + return; + } + + auto* d3dBuffer = static_cast(buffer); + auto& context = commandBuffer_.getContext(); + auto* device = context.getDevice(); + + if (!device || context.getCbvSrvUavHeap() == nullptr) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: missing device or per-frame descriptor heap\n"); + return; + } + + // Determine buffer type + const auto bufferType = d3dBuffer->getBufferType(); + const bool isUniformBuffer = (bufferType & BufferDesc::BufferTypeBits::Uniform) != 0; + const bool isStorageBuffer = (bufferType & BufferDesc::BufferTypeBits::Storage) != 0; + + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindBuffer(%u): isUniform=%d, isStorage=%d\n", + index, isUniformBuffer, isStorageBuffer); + + if (isStorageBuffer) { + // Storage buffer - bind as UAV (unordered access view) for read/write + if (index >= kMaxComputeBuffers) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: UAV index %u exceeds max %zu\n", + index, kMaxComputeBuffers); + return; + } + + // Determine element stride for structured buffer views + // If storageStride is not specified, default to 4 bytes to preserve existing behavior + size_t elementStride = d3dBuffer->getStorageElementStride(); + if (elementStride == 0) { + elementStride = 4; + } + + // D3D12 requires UAV buffer views to use element-aligned offsets + if (offset % elementStride != 0) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Storage buffer offset %zu is not aligned to " + "element stride (%zu bytes). UAV FirstElement will be truncated (offset/stride).\n", + offset, elementStride); + // Continue but log warning – FirstElement below uses integer division + } + + // Validate offset doesn't exceed buffer size to prevent underflow + const size_t bufferSizeBytes = d3dBuffer->getSizeInBytes(); + if (offset > bufferSizeBytes) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Storage buffer offset %zu exceeds buffer size %zu; skipping UAV binding\n", + offset, bufferSizeBytes); + return; + } + const size_t remaining = bufferSizeBytes - offset; + + // Check for undersized buffer (would create empty or partial view) + if (remaining < elementStride) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Remaining buffer size %zu is less than element stride %zu; " + "UAV will have NumElements=0 (empty view). Check buffer size and offset.\n", + remaining, elementStride); + // Continue to create the descriptor, but it will be empty (NumElements=0) + } + + // Use Result-based allocation with dynamic heap growth. + uint32_t descriptorIndex = 0; + Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Failed to allocate UAV descriptor: %s\n", allocResult.message.c_str()); + return; + } + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex); + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex); + + // Create UAV descriptor for RWStructuredBuffer (structured buffer) + // D3D12 compute shaders expect structured buffers, not raw buffers + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_UNKNOWN; // Required for structured buffers + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + // Element index and count are expressed in units of elementStride bytes + // Division truncates if offset is not aligned; see warning above + uavDesc.Buffer.FirstElement = static_cast(offset / elementStride); + // CRITICAL: NumElements must be (size - offset) / stride, not total size / stride + uavDesc.Buffer.NumElements = static_cast(remaining / elementStride); + uavDesc.Buffer.StructureByteStride = static_cast(elementStride); + uavDesc.Buffer.CounterOffsetInBytes = 0; + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; // No flags for structured buffers + + // Pre-creation validation. + IGL_DEBUG_ASSERT(device != nullptr, "Device is null before CreateUnorderedAccessView"); + IGL_DEBUG_ASSERT(d3dBuffer->getResource() != nullptr, "Buffer resource is null"); + IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "UAV descriptor handle is invalid"); + + device->CreateUnorderedAccessView(d3dBuffer->getResource(), nullptr, &uavDesc, cpuHandle); + + cachedUavHandles_[index] = gpuHandle; + for (size_t i = index + 1; i < kMaxComputeBuffers; ++i) { + cachedUavHandles_[i] = {}; + } + boundUavCount_ = static_cast(index + 1); + + // Track UAV resource for precise barrier synchronization. + // Note: UAV bindings are assumed to be dense (slots 0..boundUavCount_-1). + // Both cachedUavHandles_ and boundUavResources_ rely on this invariant. + boundUavResources_[index] = d3dBuffer->getResource(); + for (size_t i = index + 1; i < kMaxComputeBuffers; ++i) { + boundUavResources_[i] = nullptr; + } + + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindBuffer: Created UAV at index %u, descriptor slot %u\n", + index, descriptorIndex); + + commandBuffer_.trackTransientResource(d3dBuffer->getResource()); + } else if (isUniformBuffer) { + // Uniform buffer - bind as CBV (constant buffer view) + if (index >= kMaxComputeBuffers) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: CBV index %u exceeds max %zu\n", + index, kMaxComputeBuffers); + return; + } + + // Enforce dense binding: CBVs must start at slot 0 with no gaps + if (index > 0 && cachedCbvAddresses_[0] == 0) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: CBV bindings must be dense starting from slot 0. " + "Cannot bind slot %u when slot 0 is not bound.\n", index); + return; + } + + // Check for gaps in bindings + for (size_t i = 0; i < index; ++i) { + if (cachedCbvAddresses_[i] == 0) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: CBV bindings must be dense. " + "Cannot bind slot %u when slot %zu is not bound (gap detected).\n", index, i); + return; + } + } + + // D3D12 requires constant buffer addresses to be 256-byte aligned + // (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT) + if ((offset & 255) != 0) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: ERROR - CBV offset %zu is not 256-byte aligned " + "(required by D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT). " + "Constant buffers must be created at aligned offsets. Ignoring bind request.\n", offset); + return; + } + + cachedCbvAddresses_[index] = d3dBuffer->gpuAddress(offset); + // Store buffer size for CBV descriptor creation on the next dispatch. + // Actual descriptor creation happens in dispatchThreadGroups when cbvBindingsDirty_ is set. + size_t bufferSize = d3dBuffer->getSizeInBytes() - offset; + + // D3D12 spec: Constant buffers must be ≤ 64 KB + constexpr size_t kMaxCBVSize = 65536; // 64 KB + if (bufferSize > kMaxCBVSize) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Buffer size (%zu bytes) exceeds D3D12 64 KB limit for constant buffers at index %u. Clamping to 64 KB.\n", + bufferSize, index); + bufferSize = kMaxCBVSize; + } + + cachedCbvSizes_[index] = bufferSize; + for (size_t i = index + 1; i < kMaxComputeBuffers; ++i) { + cachedCbvAddresses_[i] = 0; + cachedCbvSizes_[i] = 0; + } + boundCbvCount_ = static_cast(index + 1); + + // Mark CBV bindings as dirty to trigger descriptor recreation on the next dispatch. + cbvBindingsDirty_ = true; + + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindBuffer: Cached CBV at index %u, address 0x%llx, size %zu\n", + index, cachedCbvAddresses_[index], cachedCbvSizes_[index]); + + commandBuffer_.trackTransientResource(d3dBuffer->getResource()); + } else { + IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Buffer must be Uniform or Storage type\n"); + } +} + +void ComputeCommandEncoder::bindUniform(const UniformDesc& /*uniformDesc*/, const void* /*data*/) { + // Single uniform binding not supported in D3D12 + // Use uniform buffers (CBVs) instead + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindUniform - not supported, use uniform buffers\n"); +} + +void ComputeCommandEncoder::bindBytes(uint32_t /*index*/, const void* /*data*/, size_t /*length*/) { + + // D3D12 backend does not support bindBytes + // Applications should use uniform buffers (bindBuffer) instead + // This is a no-op to maintain compatibility with cross-platform code + IGL_DEBUG_ASSERT_NOT_IMPLEMENTED(); + IGL_LOG_INFO_ONCE("bindBytes is not supported in D3D12 backend. Use bindBuffer with uniform buffers instead.\n"); +} + +void ComputeCommandEncoder::bindImageTexture(uint32_t index, ITexture* texture, TextureFormat /*format*/) { + if (!texture) { + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindImageTexture: null texture\n"); + return; + } + + if (index >= kMaxComputeBuffers) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindImageTexture: index %u exceeds max %zu\n", + index, kMaxComputeBuffers); + return; + } + + auto& context = commandBuffer_.getContext(); + auto* device = context.getDevice(); + auto* d3dTexture = static_cast(texture); + + if (!device || !d3dTexture->getResource() || context.getCbvSrvUavHeap() == nullptr) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindImageTexture: missing device, resource, or per-frame heap\n"); + return; + } + + // Transition texture to UAV state for compute shader read/write access + auto* commandList = commandBuffer_.getCommandList(); + if (commandList) { + d3dTexture->transitionAll(commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS); + } + + // Allocate descriptor and create UAV using Result-based allocation with dynamic heap growth. + uint32_t descriptorIndex = 0; + Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR("ComputeCommandEncoder::bindImageTexture: Failed to allocate UAV descriptor: %s\n", allocResult.message.c_str()); + return; + } + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex); + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex); + + // Create UAV descriptor + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = textureFormatToDXGIFormat(d3dTexture->getFormat()); + + auto resourceDesc = d3dTexture->getResource()->GetDesc(); + if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) { + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D; + uavDesc.Texture3D.MipSlice = 0; + uavDesc.Texture3D.FirstWSlice = 0; + uavDesc.Texture3D.WSize = resourceDesc.DepthOrArraySize; + } else if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D) { + if (resourceDesc.DepthOrArraySize > 1) { + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY; + uavDesc.Texture2DArray.MipSlice = 0; + uavDesc.Texture2DArray.FirstArraySlice = 0; + uavDesc.Texture2DArray.ArraySize = resourceDesc.DepthOrArraySize; + uavDesc.Texture2DArray.PlaneSlice = 0; + } else { + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D; + uavDesc.Texture2D.MipSlice = 0; + uavDesc.Texture2D.PlaneSlice = 0; + } + } else { + IGL_LOG_ERROR("ComputeCommandEncoder::bindImageTexture: unsupported dimension\n"); + return; + } + + // Pre-creation validation. + IGL_DEBUG_ASSERT(device != nullptr, "Device is null before CreateUnorderedAccessView"); + IGL_DEBUG_ASSERT(d3dTexture->getResource() != nullptr, "Texture resource is null"); + IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "UAV descriptor handle is invalid"); + + device->CreateUnorderedAccessView(d3dTexture->getResource(), nullptr, &uavDesc, cpuHandle); + + cachedUavHandles_[index] = gpuHandle; + for (size_t i = index + 1; i < kMaxComputeBuffers; ++i) { + cachedUavHandles_[i] = {}; + } + boundUavCount_ = static_cast(index + 1); + + // Track UAV resources for precise barrier synchronization. + // Note: UAV bindings are assumed to be dense (slots 0..boundUavCount_-1). + // Both cachedUavHandles_ and boundUavResources_ rely on this invariant. + boundUavResources_[index] = d3dTexture->getResource(); + for (size_t i = index + 1; i < kMaxComputeBuffers; ++i) { + boundUavResources_[i] = nullptr; + } + + IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindImageTexture: Created UAV at index %u, descriptor slot %u\n", + index, descriptorIndex); +} + +void ComputeCommandEncoder::bindSamplerState(uint32_t index, ISamplerState* samplerState) { + // Delegate to D3D12ResourcesBinder for centralized descriptor management. + resourcesBinder_.bindSamplerState(index, samplerState); +} + +void ComputeCommandEncoder::pushDebugGroupLabel(const char* label, const Color& /*color*/) const { + auto* commandList = commandBuffer_.getCommandList(); + if (!commandBuffer_.isRecording() || !commandList || !label) { + return; + } + // PIX debug markers + const size_t len = strlen(label); + std::wstring wlabel(len, L' '); + std::mbstowcs(&wlabel[0], label, len); + commandList->BeginEvent( + 0, wlabel.c_str(), static_cast((wlabel.length() + 1) * sizeof(wchar_t))); +} + +void ComputeCommandEncoder::insertDebugEventLabel(const char* label, const Color& /*color*/) const { + auto* commandList = commandBuffer_.getCommandList(); + if (!commandBuffer_.isRecording() || !commandList || !label) { + return; + } + const size_t len = strlen(label); + std::wstring wlabel(len, L' '); + std::mbstowcs(&wlabel[0], label, len); + commandList->SetMarker( + 0, wlabel.c_str(), static_cast((wlabel.length() + 1) * sizeof(wchar_t))); +} + +void ComputeCommandEncoder::popDebugGroupLabel() const { + auto* commandList = commandBuffer_.getCommandList(); + if (!commandBuffer_.isRecording() || !commandList) { + return; + } + commandList->EndEvent(); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/ComputeCommandEncoder.h b/src/igl/d3d12/ComputeCommandEncoder.h new file mode 100644 index 0000000000..6c13aae126 --- /dev/null +++ b/src/igl/d3d12/ComputeCommandEncoder.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace igl::d3d12 { + +class CommandBuffer; +class ComputePipelineState; + +class ComputeCommandEncoder final : public IComputeCommandEncoder { + public: + explicit ComputeCommandEncoder(CommandBuffer& commandBuffer); + ~ComputeCommandEncoder() override = default; + + void endEncoding() override; + + void bindComputePipelineState(const std::shared_ptr& pipelineState) override; + void dispatchThreadGroups(const Dimensions& threadgroupCount, + const Dimensions& threadgroupSize, + const Dependencies& dependencies = {}) override; + void bindPushConstants(const void* data, size_t length, size_t offset = 0) override; + void bindTexture(uint32_t index, ITexture* texture) override; + + /** + * @brief Bind a buffer to a compute shader slot + * + * IMPORTANT: For constant buffers (uniform buffers) in compute shaders, bindings MUST be DENSE + * starting from index 0 with NO GAPS. For example: + * - VALID: bindBuffer(0, ...), bindBuffer(1, ...), bindBuffer(2, ...) + * - INVALID: bindBuffer(0, ...), bindBuffer(2, ...) // gap at index 1 + * - INVALID: bindBuffer(1, ...), bindBuffer(2, ...) // index 0 not bound + * + * This constraint is enforced by D3D12ResourcesBinder and will return InvalidOperation if violated. + * See D3D12ResourcesBinder::updateBufferBindings for implementation details. + * + * @param index Buffer slot index (maps to HLSL register b0, b1, etc. for CBVs) + * @param buffer Buffer to bind + * @param offset Offset in bytes into the buffer + * @param bufferSize Size of the buffer region to bind + */ + void bindBuffer(uint32_t index, IBuffer* buffer, size_t offset = 0, size_t bufferSize = 0) override; + void bindUniform(const UniformDesc& uniformDesc, const void* data) override; + void bindBytes(uint32_t index, const void* data, size_t length) override; + void bindImageTexture(uint32_t index, ITexture* texture, TextureFormat format) override; + void bindSamplerState(uint32_t index, ISamplerState* samplerState) override; + + // Debug labels + void pushDebugGroupLabel(const char* label, const Color& color) const override; + void insertDebugEventLabel(const char* label, const Color& color) const override; + void popDebugGroupLabel() const override; + + private: + CommandBuffer& commandBuffer_; + const ComputePipelineState* currentPipeline_ = nullptr; + bool isEncoding_ = false; + + // Centralized resource binding management. + D3D12ResourcesBinder resourcesBinder_; + + // Cached GPU handles for resources + // IMPORTANT: Bindings must be DENSE and start at slot 0 for each table. + // SetComputeRootDescriptorTable always uses cached*Handles_[0] as the base, + // so binding only higher slots (e.g., slot 1 without slot 0) will fail. + static constexpr size_t kMaxComputeBuffers = 8; + // Increased from 8 to 16 to match IGL_TEXTURE_SAMPLERS_MAX contract. + static constexpr size_t kMaxComputeTextures = IGL_TEXTURE_SAMPLERS_MAX; // 16 + // Increased from 4 to 16 to match IGL_TEXTURE_SAMPLERS_MAX contract. + static constexpr size_t kMaxComputeSamplers = IGL_TEXTURE_SAMPLERS_MAX; // 16 + + D3D12_GPU_DESCRIPTOR_HANDLE cachedUavHandles_[kMaxComputeBuffers] = {}; + D3D12_GPU_DESCRIPTOR_HANDLE cachedSrvHandles_[kMaxComputeTextures] = {}; + D3D12_GPU_DESCRIPTOR_HANDLE cachedSamplerHandles_[kMaxComputeSamplers] = {}; + D3D12_GPU_VIRTUAL_ADDRESS cachedCbvAddresses_[kMaxComputeBuffers] = {}; + // Track CBV sizes for descriptor creation. + size_t cachedCbvSizes_[kMaxComputeBuffers] = {}; + + size_t boundUavCount_ = 0; + size_t boundSrvCount_ = 0; + size_t boundCbvCount_ = 0; + size_t boundSamplerCount_ = 0; + + // Cache CBV descriptor indices to avoid per-dispatch allocation. + uint32_t cachedCbvBaseIndex_ = 0; + uint32_t cachedCbvPageIndex_ = UINT32_MAX; // Track heap page for invalidation + bool cbvBindingsDirty_ = true; // Track if CBV bindings have changed + + // Track UAV resources for precise synchronization barriers. + // Tracks UAV resources bound via bindBuffer (storage buffers) and bindImageTexture (RW textures). + ID3D12Resource* boundUavResources_[kMaxComputeBuffers] = {}; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/ComputePipelineState.cpp b/src/igl/d3d12/ComputePipelineState.cpp new file mode 100644 index 0000000000..b23547c2c5 --- /dev/null +++ b/src/igl/d3d12/ComputePipelineState.cpp @@ -0,0 +1,173 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +ComputePipelineState::ComputePipelineState(const ComputePipelineDesc& desc, + igl::d3d12::ComPtr pipelineState, + igl::d3d12::ComPtr rootSignature) + : desc_(desc), + pipelineState_(std::move(pipelineState)), + rootSignature_(std::move(rootSignature)) { + // Set D3D12 object names for PIX debugging + const std::string& debugName = desc_.debugName; + if (pipelineState_.Get() && !debugName.empty()) { + std::wstring wideName(debugName.begin(), debugName.end()); + pipelineState_->SetName((L"ComputePSO_" + wideName).c_str()); + IGL_D3D12_LOG_VERBOSE("ComputePipelineState: Set PIX debug name 'ComputePSO_%s'\n", debugName.c_str()); + } + if (rootSignature_.Get() && !debugName.empty()) { + std::wstring wideName(debugName.begin(), debugName.end()); + rootSignature_->SetName((L"ComputeRootSig_" + wideName).c_str()); + IGL_D3D12_LOG_VERBOSE("ComputePipelineState: Set PIX root signature name 'ComputeRootSig_%s'\n", debugName.c_str()); + } +} + +std::shared_ptr +ComputePipelineState::computePipelineReflection() { + // Return cached reflection if already created + if (reflection_) { + return reflection_; + } + + // Reflection implementation following the pattern from RenderPipelineState + struct ReflectionImpl final : public IComputePipelineReflection { + std::vector ubs; + std::vector samplers; + std::vector textures; + const std::vector& allUniformBuffers() const override { return ubs; } + const std::vector& allSamplers() const override { return samplers; } + const std::vector& allTextures() const override { return textures; } + }; + + auto out = std::make_shared(); + + // Get compute shader module and reflect it + if (!desc_.shaderStages) { + return out; + } + + auto computeModule = desc_.shaderStages->getComputeModule(); + if (!computeModule) { + return out; + } + + auto* d3dMod = dynamic_cast(computeModule.get()); + if (!d3dMod) { + return out; + } + + const auto& bc = d3dMod->getBytecode(); + if (bc.empty()) { + return out; + } + + // Create shader reflection interface using D3DReflect + igl::d3d12::ComPtr refl; + if (FAILED(D3DReflect(bc.data(), bc.size(), IID_PPV_ARGS(refl.GetAddressOf())))) { + return out; + } + + D3D12_SHADER_DESC sd{}; + if (FAILED(refl->GetDesc(&sd))) { + return out; + } + + // Extract constant buffer information + for (UINT i = 0; i < sd.ConstantBuffers; ++i) { + auto* cb = refl->GetConstantBufferByIndex(i); + D3D12_SHADER_BUFFER_DESC cbd{}; + if (FAILED(cb->GetDesc(&cbd))) { + continue; + } + + // Find the bind point for this constant buffer + int bufferIndex = -1; + for (UINT r = 0; r < sd.BoundResources; ++r) { + D3D12_SHADER_INPUT_BIND_DESC bind{}; + if (SUCCEEDED(refl->GetResourceBindingDesc(r, &bind))) { + if (bind.Type == D3D_SIT_CBUFFER && + std::string(bind.Name) == std::string(cbd.Name)) { + bufferIndex = static_cast(bind.BindPoint); + break; + } + } + } + + BufferArgDesc ub; + ub.name = igl::genNameHandle(cbd.Name ? cbd.Name : ""); + ub.bufferAlignment = 256; // D3D12 constant buffer alignment + ub.bufferDataSize = cbd.Size; + ub.bufferIndex = bufferIndex; + ub.shaderStage = ShaderStage::Compute; + ub.isUniformBlock = true; + + // Extract member variables from constant buffer + for (UINT v = 0; v < cbd.Variables; ++v) { + auto* var = cb->GetVariableByIndex(v); + D3D12_SHADER_VARIABLE_DESC vd{}; + if (FAILED(var->GetDesc(&vd))) { + continue; + } + + auto* t = var->GetType(); + if (!t) { + continue; + } + + D3D12_SHADER_TYPE_DESC td{}; + if (FAILED(t->GetDesc(&td))) { + continue; + } + + BufferArgDesc::BufferMemberDesc m; + m.name = igl::genNameHandle(vd.Name ? vd.Name : ""); + m.type = ReflectionUtils::mapUniformType(td); + m.offset = vd.StartOffset; + m.arrayLength = td.Elements ? td.Elements : 1; + ub.members.push_back(std::move(m)); + } + + out->ubs.push_back(std::move(ub)); + } + + // Extract texture and sampler bindings + for (UINT r = 0; r < sd.BoundResources; ++r) { + D3D12_SHADER_INPUT_BIND_DESC bind{}; + if (FAILED(refl->GetResourceBindingDesc(r, &bind))) { + continue; + } + + if (bind.Type == D3D_SIT_TEXTURE) { + TextureArgDesc t; + t.name = bind.Name ? bind.Name : ""; + t.type = TextureType::TwoD; + t.textureIndex = bind.BindPoint; + t.shaderStage = ShaderStage::Compute; + out->textures.push_back(std::move(t)); + } else if (bind.Type == D3D_SIT_SAMPLER) { + SamplerArgDesc s; + s.name = bind.Name ? bind.Name : ""; + s.samplerIndex = bind.BindPoint; + s.shaderStage = ShaderStage::Compute; + out->samplers.push_back(std::move(s)); + } + } + + // Cache the reflection for future calls + reflection_ = out; + return reflection_; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/ComputePipelineState.h b/src/igl/d3d12/ComputePipelineState.h new file mode 100644 index 0000000000..6cd8c6e0dd --- /dev/null +++ b/src/igl/d3d12/ComputePipelineState.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace igl::d3d12 { + +class ComputePipelineState final : public IComputePipelineState { + public: + ComputePipelineState(const ComputePipelineDesc& desc, + igl::d3d12::ComPtr pipelineState, + igl::d3d12::ComPtr rootSignature); + ~ComputePipelineState() override = default; + + std::shared_ptr computePipelineReflection() override; + + // D3D12-specific accessors + ID3D12PipelineState* getPipelineState() const { return pipelineState_.Get(); } + ID3D12RootSignature* getRootSignature() const { return rootSignature_.Get(); } + + private: + ComputePipelineDesc desc_; + igl::d3d12::ComPtr pipelineState_; + igl::d3d12::ComPtr rootSignature_; + std::shared_ptr reflection_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12AllocatorPool.cpp b/src/igl/d3d12/D3D12AllocatorPool.cpp new file mode 100644 index 0000000000..fcef51b7c5 --- /dev/null +++ b/src/igl/d3d12/D3D12AllocatorPool.cpp @@ -0,0 +1,217 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +void D3D12AllocatorPool::initialize(D3D12Context& ctx, IFenceProvider* fenceProvider) { + auto* device = ctx.getDevice(); + if (!device) { + IGL_LOG_ERROR("D3D12AllocatorPool::initialize: D3D12 device is null\n"); + return; + } + + HRESULT hr = device->CreateFence( + 0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(uploadFence_.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR( + "D3D12AllocatorPool::initialize: Failed to create upload fence: 0x%08X\n", + hr); + } else { + uploadFenceValue_ = 0; + IGL_D3D12_LOG_VERBOSE( + "D3D12AllocatorPool::initialize: Upload fence created successfully\n"); + } + + // Use upload ring buffer size from D3D12ContextConfig instead of hardcoding. + const uint64_t uploadRingBufferSize = ctx.getConfig().uploadRingBufferSize; + uploadRingBuffer_ = + std::make_unique(device, uploadRingBufferSize); + + auto* commandQueue = ctx.getCommandQueue(); + if (commandQueue && uploadFence_.Get() && fenceProvider) { + immediateCommands_ = std::make_unique( + device, commandQueue, uploadFence_.Get(), fenceProvider); + stagingDevice_ = std::make_unique( + device, uploadFence_.Get(), uploadRingBuffer_.get()); + } +} + +void D3D12AllocatorPool::processCompletedUploads() { + if (!uploadFence_.Get()) { + return; + } + + const UINT64 completed = uploadFence_->GetCompletedValue(); + + { + std::lock_guard lock(pendingUploadsMutex_); + auto it = pendingUploads_.begin(); + while (it != pendingUploads_.end()) { + if (it->fenceValue <= completed) { + it = pendingUploads_.erase(it); + } else { + ++it; + } + } + } + + if (uploadRingBuffer_) { + uploadRingBuffer_->retire(completed); + } +} + +void D3D12AllocatorPool::trackUploadBuffer( + ComPtr buffer, + UINT64 fenceValue) { + if (!buffer.Get()) { + return; + } + + std::lock_guard lock(pendingUploadsMutex_); + pendingUploads_.push_back(PendingUpload{fenceValue, std::move(buffer)}); +} + +ComPtr D3D12AllocatorPool::getUploadCommandAllocator( + D3D12Context& ctx) { + if (!uploadFence_.Get()) { + IGL_LOG_ERROR( + "D3D12AllocatorPool::getUploadCommandAllocator: Upload fence not " + "initialized\n"); + return nullptr; + } + + std::lock_guard lock(commandAllocatorPoolMutex_); + + const UINT64 completedValue = uploadFence_->GetCompletedValue(); + + for (size_t i = 0; i < commandAllocatorPool_.size(); ++i) { + auto& tracked = commandAllocatorPool_[i]; + + if (completedValue >= tracked.fenceValue) { + auto allocator = tracked.allocator; + + commandAllocatorPool_[i] = commandAllocatorPool_.back(); + commandAllocatorPool_.pop_back(); + + HRESULT hr = allocator->Reset(); + if (FAILED(hr)) { + IGL_LOG_ERROR( + "D3D12AllocatorPool::getUploadCommandAllocator: " + "CommandAllocator::Reset failed: 0x%08X\n", + hr); + return nullptr; + } + + totalAllocatorReuses_++; + return allocator; + } + } + + static constexpr size_t kMaxCommandAllocators = 256; + + if (totalCommandAllocatorsCreated_ >= kMaxCommandAllocators) { + IGL_LOG_ERROR( + "D3D12AllocatorPool::getUploadCommandAllocator: Command allocator " + "pool exhausted\n"); + return nullptr; + } + + auto* device = ctx.getDevice(); + if (!device) { + IGL_LOG_ERROR( + "D3D12AllocatorPool::getUploadCommandAllocator: D3D12 device is null\n"); + return nullptr; + } + + ComPtr newAllocator; + HRESULT hr = device->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(newAllocator.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR( + "D3D12AllocatorPool::getUploadCommandAllocator: " + "CreateCommandAllocator failed: 0x%08X\n", + hr); + return nullptr; + } + + totalCommandAllocatorsCreated_++; + return newAllocator; +} + +void D3D12AllocatorPool::returnUploadCommandAllocator( + ComPtr allocator, + UINT64 fenceValue) { + if (!allocator.Get()) { + return; + } + + std::lock_guard lock(commandAllocatorPoolMutex_); + + TrackedCommandAllocator tracked; + tracked.allocator = allocator; + tracked.fenceValue = fenceValue; + commandAllocatorPool_.push_back(tracked); + + if (commandAllocatorPool_.size() > peakPoolSize_) { + peakPoolSize_ = commandAllocatorPool_.size(); + } +} + +::igl::Result D3D12AllocatorPool::waitForUploadFence( + const Device& device, + UINT64 fenceValue) const { + if (!uploadFence_.Get()) { + return ::igl::Result( + ::igl::Result::Code::InvalidOperation, "Upload fence not initialized"); + } + + if (uploadFence_->GetCompletedValue() >= fenceValue) { + return ::igl::Result(); + } + + FenceWaiter waiter(uploadFence_.Get(), fenceValue); + ::igl::Result waitResult = waiter.wait(); + if (!waitResult.isOk()) { + ::igl::Result deviceStatus = device.checkDeviceRemoval(); + if (!deviceStatus.isOk()) { + return deviceStatus; + } + return waitResult; + } + + return Result(); +} + +void D3D12AllocatorPool::clearOnDeviceDestruction() { + { + std::lock_guard lock(commandAllocatorPoolMutex_); + commandAllocatorPool_.clear(); + totalCommandAllocatorsCreated_ = 0; + peakPoolSize_ = 0; + totalAllocatorReuses_ = 0; + } + { + std::lock_guard lock(pendingUploadsMutex_); + pendingUploads_.clear(); + } + + uploadRingBuffer_.reset(); + stagingDevice_.reset(); + immediateCommands_.reset(); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12AllocatorPool.h b/src/igl/d3d12/D3D12AllocatorPool.h new file mode 100644 index 0000000000..61bca25c89 --- /dev/null +++ b/src/igl/d3d12/D3D12AllocatorPool.h @@ -0,0 +1,98 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include + +namespace igl { +class Result; +} // namespace igl + +namespace igl::d3d12 { + +class Device; +class D3D12Context; +class UploadRingBuffer; +class D3D12ImmediateCommands; +class D3D12StagingDevice; +class IFenceProvider; + +class D3D12AllocatorPool { + public: + D3D12AllocatorPool() = default; + + void initialize(D3D12Context& ctx, IFenceProvider* fenceProvider); + + void processCompletedUploads(); + void trackUploadBuffer(ComPtr buffer, UINT64 fenceValue); + + ComPtr getUploadCommandAllocator(D3D12Context& ctx); + void returnUploadCommandAllocator(ComPtr allocator, + UINT64 fenceValue); + + ID3D12Fence* getUploadFence() const { + return uploadFence_.Get(); + } + + UINT64 getNextUploadFenceValue() { + return ++uploadFenceValue_; + } + + UINT64 getLastUploadFenceValue() const { + return uploadFenceValue_; + } + + UploadRingBuffer* getUploadRingBuffer() const { + return uploadRingBuffer_.get(); + } + + D3D12ImmediateCommands* getImmediateCommands() const { + return immediateCommands_.get(); + } + + D3D12StagingDevice* getStagingDevice() const { + return stagingDevice_.get(); + } + + ::igl::Result waitForUploadFence(const Device& device, UINT64 fenceValue) const; + + void clearOnDeviceDestruction(); + + private: + struct PendingUpload { + UINT64 fenceValue = 0; + ComPtr resource; + }; + + struct TrackedCommandAllocator { + ComPtr allocator; + UINT64 fenceValue = 0; + }; + + std::mutex pendingUploadsMutex_; + std::vector pendingUploads_; + + std::mutex commandAllocatorPoolMutex_; + std::vector commandAllocatorPool_; + size_t totalCommandAllocatorsCreated_ = 0; + size_t peakPoolSize_ = 0; + size_t totalAllocatorReuses_ = 0; + + ComPtr uploadFence_; + UINT64 uploadFenceValue_ = 0; + + std::unique_ptr uploadRingBuffer_; + std::unique_ptr immediateCommands_; + std::unique_ptr stagingDevice_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12Context.cpp b/src/igl/d3d12/D3D12Context.cpp new file mode 100644 index 0000000000..11875700b5 --- /dev/null +++ b/src/igl/d3d12/D3D12Context.cpp @@ -0,0 +1,1468 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +#include +#include + +namespace igl::d3d12 { + +namespace { +// Import ComPtr for readability +template +using ComPtr = igl::d3d12::ComPtr; +} // namespace + +// Static member initialization +D3D12Context::ResourceStats D3D12Context::resourceStats_; +std::mutex D3D12Context::resourceStatsMutex_; + +// AdapterInfo helper methods +uint64_t D3D12Context::AdapterInfo::getDedicatedVideoMemoryMB() const { + return desc.DedicatedVideoMemory / (1024 * 1024); +} + +const char* D3D12Context::AdapterInfo::getVendorName() const { + switch (desc.VendorId) { + case 0x10DE: return "NVIDIA"; + case 0x1002: case 0x1022: return "AMD"; + case 0x8086: return "Intel"; + case 0x1414: return "Microsoft"; + default: return "Unknown"; + } +} + +// MemoryBudget helper methods +uint64_t D3D12Context::MemoryBudget::totalAvailableMemory() const { + return dedicatedVideoMemory + sharedSystemMemory; +} + +double D3D12Context::MemoryBudget::getUsagePercentage() const { + if (totalAvailableMemory() == 0) return 0.0; + return (static_cast(estimatedUsage) / totalAvailableMemory()) * 100.0; +} + +bool D3D12Context::MemoryBudget::isMemoryCritical() const { + return getUsagePercentage() > 90.0; +} + +bool D3D12Context::MemoryBudget::isMemoryLow() const { + return getUsagePercentage() > 70.0; +} + +// A-011: Helper function to probe highest supported feature level for an adapter +D3D_FEATURE_LEVEL D3D12Context::getHighestFeatureLevel(IDXGIAdapter1* adapter) { + const D3D_FEATURE_LEVEL featureLevels[] = { + D3D_FEATURE_LEVEL_12_2, + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + }; + + for (D3D_FEATURE_LEVEL fl : featureLevels) { + if (SUCCEEDED(D3D12CreateDevice(adapter, fl, _uuidof(ID3D12Device), nullptr))) { + return fl; + } + } + + return static_cast(0); // No supported feature level +} + +D3D12Context::~D3D12Context() { + // Wait for GPU to finish before cleanup + waitForGPU(); + + // Explicitly release all frame context resources to prevent leaks. + for (uint32_t i = 0; i < frameContexts_.size(); ++i) { + frameContexts_[i].transientBuffers.clear(); + frameContexts_[i].transientResources.clear(); + + // Explicitly reset heaps inside each page before clearing the vector. + for (auto& page : frameContexts_[i].cbvSrvUavHeapPages) { + page.heap.Reset(); + } + frameContexts_[i].cbvSrvUavHeapPages.clear(); + + frameContexts_[i].samplerHeap.Reset(); + frameContexts_[i].activeCbvSrvUavHeap.Reset(); + frameContexts_[i].allocator.Reset(); + } + + // Release render targets explicitly. + for (uint32_t i = 0; i < renderTargets_.size(); ++i) { + renderTargets_[i].Reset(); + } + + // Release command signatures. + drawIndirectSignature_.Reset(); + drawIndexedIndirectSignature_.Reset(); + + // Release core resources explicitly. + rtvHeap_.Reset(); + swapChain_.Reset(); + fence_.Reset(); + commandQueue_.Reset(); + + // Clean up descriptor heap manager's heaps before deleting it. + // Note: heapMgr_ may point to either ownedHeapMgr_ OR external heap manager + // (e.g., HeadlessContext owns it via unique_ptr). We cleanup the heaps regardless. + if (heapMgr_) { + heapMgr_->cleanup(); + } + + // Clean up owned descriptor heap manager (if we own it) + delete ownedHeapMgr_; + ownedHeapMgr_ = nullptr; + heapMgr_ = nullptr; + + // Release device last, after all dependent resources are freed. + device_.Reset(); + adapter_.Reset(); + dxgiFactory_.Reset(); + +#ifdef IGL_DEBUG + IGL_LOG_INFO("[D3D12Context] All resources released\n"); +#endif +} + +Result D3D12Context::initialize(HWND hwnd, uint32_t width, uint32_t height, + const D3D12ContextConfig& config) { + width_ = width; + height_ = height; + + // Store and validate configuration. + config_ = config; + config_.validate(); + + // Pre-allocate vectors to config size (T43). Will be verified/resized after swapchain creation. + swapchainBufferCount_ = config_.maxFramesInFlight; + renderTargets_.resize(swapchainBufferCount_); + frameContexts_.resize(swapchainBufferCount_); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating D3D12 device...\n"); + Result deviceResult = createDevice(); + if (!deviceResult.isOk()) { + return deviceResult; + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Device created successfully\n"); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating command queue...\n"); + Result queueResult = createCommandQueue(); + if (!queueResult.isOk()) { + return queueResult; + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Command queue created successfully\n"); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating swapchain (%ux%u)...\n", width, height); + Result swapChainResult = createSwapChain(hwnd, width, height); + if (!swapChainResult.isOk()) { + return swapChainResult; + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain created successfully\n"); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating RTV heap...\n"); + Result rtvResult = createRTVHeap(); + if (!rtvResult.isOk()) { + return rtvResult; + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: RTV heap created successfully\n"); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating back buffers...\n"); + Result backBufferResult = createBackBuffers(); + if (!backBufferResult.isOk()) { + return backBufferResult; + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Back buffers created successfully\n"); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating descriptor heaps...\n"); + Result descriptorHeapResult = createDescriptorHeaps(); + if (!descriptorHeapResult.isOk()) { + return descriptorHeapResult; + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Descriptor heaps created successfully\n"); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating command signatures...\n"); + Result commandSigResult = createCommandSignatures(); + if (!commandSigResult.isOk()) { + return commandSigResult; + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Command signatures created successfully\n"); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating fence for GPU synchronization...\n"); + HRESULT hr = device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence_.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to create fence (HRESULT: 0x%08X)\n", static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to create fence"); + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Fence created successfully\n"); + + // Create per-frame command allocators using runtime buffer count (T43). + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating per-frame command allocators...\n"); + for (UINT i = 0; i < swapchainBufferCount_; i++) { + hr = device_->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(frameContexts_[i].allocator.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to create command allocator for frame %u (HRESULT: 0x%08X)\n", i, static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to create command allocator for frame " + std::to_string(i)); + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Created command allocator for frame %u\n", i); + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Per-frame command allocators created successfully\n"); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Initialization complete!\n"); + + return Result(); +} + +Result D3D12Context::resize(uint32_t width, uint32_t height) { + // Validate dimensions + if (width == 0 || height == 0) { + return Result{Result::Code::ArgumentInvalid, + "Invalid resize dimensions: width and height must be non-zero"}; + } + + if (width == width_ && height == height_) { + return Result(); + } + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Resizing swapchain from %ux%u to %ux%u\n", + width_, height_, width, height); + + width_ = width; + height_ = height; + + // Wait for all GPU work to complete before releasing backbuffers + // This prevents DXGI_ERROR_DEVICE_REMOVED when GPU is still rendering to old buffers + if (fence_.Get() && commandQueue_.Get()) { + const UINT64 currentFence = fenceValue_; + commandQueue_->Signal(fence_.Get(), currentFence); + + FenceWaiter waiter(fence_.Get(), currentFence); + Result waitResult = waiter.wait(); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("D3D12Context::resize() - Fence wait failed: %s\n", + waitResult.message.c_str()); + // Continue with resize despite error - old buffers will be released anyway + } + } + + // Release old back buffers (T43: use runtime buffer count) + for (UINT i = 0; i < swapchainBufferCount_; i++) { + renderTargets_[i].Reset(); + } + + // Store swapchain format and flags for potential recreation + DXGI_SWAP_CHAIN_DESC1 currentDesc = {}; + if (swapChain_.Get()) { + swapChain_->GetDesc1(¤tDesc); + } + + // Try to resize existing swapchain (T43: use runtime buffer count) + HRESULT hr = swapChain_->ResizeBuffers( + swapchainBufferCount_, + width, + height, + currentDesc.Format ? currentDesc.Format : DXGI_FORMAT_B8G8R8A8_UNORM, + currentDesc.Flags); + + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: ResizeBuffers failed (HRESULT=0x%08X), attempting to recreate swapchain\n", + static_cast(hr)); + + // Graceful fallback: Recreate swapchain from scratch + Result result = recreateSwapChain(width, height); + if (!result.isOk()) { + IGL_LOG_ERROR("D3D12Context: Failed to recreate swapchain: %s\n", result.message.c_str()); + return Result{Result::Code::RuntimeError, + "Failed to resize or recreate swapchain"}; + } + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain recreated successfully\n"); + } else { + IGL_D3D12_LOG_VERBOSE("D3D12Context: ResizeBuffers succeeded\n"); + } + + // Recreate back buffer views + Result backBufferResult = createBackBuffers(); + if (!backBufferResult.isOk()) { + IGL_LOG_ERROR("D3D12Context: Failed to recreate back buffers: %s\n", backBufferResult.message.c_str()); + return backBufferResult; + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain resize complete\n"); + + return Result(); +} + +Result D3D12Context::recreateSwapChain(uint32_t width, uint32_t height) { + IGL_D3D12_LOG_VERBOSE("D3D12Context: Recreating swapchain with dimensions %ux%u\n", width, height); + + // Get window handle from existing swapchain before releasing it + DXGI_SWAP_CHAIN_DESC1 oldDesc = {}; + if (!swapChain_.Get()) { + return Result{Result::Code::RuntimeError, "No existing swapchain to recreate"}; + } + + HRESULT hr = swapChain_->GetDesc1(&oldDesc); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to get swapchain description (HRESULT=0x%08X)\n", + static_cast(hr)); + return Result{Result::Code::RuntimeError, "Failed to get swapchain description"}; + } + + // Try to get HWND via GetHwnd (IDXGISwapChain3) + HWND hwnd = nullptr; + hr = swapChain_->GetHwnd(&hwnd); + if (FAILED(hr) || !hwnd) { + IGL_LOG_ERROR("D3D12Context: Failed to get HWND from swapchain (HRESULT=0x%08X)\n", + static_cast(hr)); + return Result{Result::Code::RuntimeError, "Failed to get HWND from swapchain"}; + } + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Retrieved HWND=%p from existing swapchain\n", hwnd); + + // Release old swapchain completely + swapChain_.Reset(); + IGL_D3D12_LOG_VERBOSE("D3D12Context: Old swapchain released\n"); + + // Create new swapchain with updated dimensions + DXGI_SWAP_CHAIN_DESC1 newDesc = {}; + newDesc.Width = width; + newDesc.Height = height; + newDesc.Format = oldDesc.Format ? oldDesc.Format : DXGI_FORMAT_B8G8R8A8_UNORM; + newDesc.Stereo = FALSE; + newDesc.SampleDesc.Count = 1; + newDesc.SampleDesc.Quality = 0; + newDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + newDesc.BufferCount = swapchainBufferCount_; // T43: use runtime buffer count + newDesc.Scaling = DXGI_SCALING_STRETCH; + newDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + newDesc.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED; + newDesc.Flags = oldDesc.Flags; // Preserve tearing support flag + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating new swapchain (format=%u, flags=0x%X)\n", + newDesc.Format, newDesc.Flags); + + igl::d3d12::ComPtr swapChain1; + hr = dxgiFactory_->CreateSwapChainForHwnd( + commandQueue_.Get(), + hwnd, + &newDesc, + nullptr, + nullptr, + swapChain1.GetAddressOf()); + + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: CreateSwapChainForHwnd failed (HRESULT=0x%08X)\n", + static_cast(hr)); + return Result{Result::Code::RuntimeError, + "Failed to recreate swapchain with CreateSwapChainForHwnd"}; + } + + // Query IDXGISwapChain3 interface + hr = swapChain1->QueryInterface(IID_PPV_ARGS(swapChain_.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to query IDXGISwapChain3 (HRESULT=0x%08X)\n", + static_cast(hr)); + return Result{Result::Code::RuntimeError, + "Failed to query IDXGISwapChain3 interface"}; + } + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain recreated successfully\n"); + return Result{}; +} + +Result D3D12Context::createDevice() { + // DO NOT enable experimental features in windowed mode - it breaks swapchain creation! + // Experimental features are ONLY enabled in HeadlessD3D12Context for unit tests + // Windowed render sessions use signed DXIL (via IDxcValidator) which doesn't need experimental mode + + // A-007: Read debug configuration from environment variables + // Helper function to read boolean env var (returns defaultValue if not set) + auto getEnvBool = [](const char* name, bool defaultValue) -> bool { + const char* value = std::getenv(name); + if (!value) return defaultValue; + return (std::string(value) == "1") || (std::string(value) == "true"); + }; + + // A-007: Debug configuration from environment variables. + // Defaults are tuned for aggressive validation in debug builds so that + // issues like PSO creation failures and binding mismatches are surfaced + // without requiring the user to set environment variables manually. + bool enableDebugLayer = getEnvBool("IGL_D3D12_DEBUG", +#ifdef _DEBUG + true // Default ON in debug builds +#else + false // Default OFF in release builds +#endif + ); + bool enableGPUValidation = getEnvBool("IGL_D3D12_GPU_VALIDATION", +#ifdef _DEBUG + true // Default ON in debug builds for better diagnostics +#else + false // Default OFF in release builds +#endif + ); + bool enableDRED = getEnvBool("IGL_D3D12_DRED", +#ifdef _DEBUG + true // Default ON in debug builds +#else + false // Default OFF in release builds +#endif + ); + bool enableDXGIDebug = getEnvBool("IGL_DXGI_DEBUG", +#ifdef _DEBUG + true // Default ON in debug builds +#else + false // Default OFF in release builds +#endif + ); + bool breakOnError = getEnvBool("IGL_D3D12_BREAK_ON_ERROR", +#ifdef _DEBUG + true // Default BREAK on error in debug builds +#else + false // Default LOG only in release builds +#endif + ); + bool breakOnWarning = getEnvBool("IGL_D3D12_BREAK_ON_WARNING", +#ifdef _DEBUG + false // Default LOG warnings in debug builds (can be overridden) +#else + false +#endif + ); + + IGL_D3D12_LOG_VERBOSE("=== D3D12 Debug Configuration ===\n"); + IGL_D3D12_LOG_VERBOSE(" Debug Layer: %s\n", enableDebugLayer ? "ENABLED" : "DISABLED"); + IGL_D3D12_LOG_VERBOSE(" GPU Validation: %s\n", enableGPUValidation ? "ENABLED" : "DISABLED"); + IGL_D3D12_LOG_VERBOSE(" DRED: %s\n", enableDRED ? "ENABLED" : "DISABLED"); + IGL_D3D12_LOG_VERBOSE(" DXGI Debug: %s\n", enableDXGIDebug ? "ENABLED" : "DISABLED"); + IGL_D3D12_LOG_VERBOSE(" Break on Error: %s\n", breakOnError ? "ENABLED" : "DISABLED"); + IGL_D3D12_LOG_VERBOSE(" Break on Warning: %s\n", breakOnWarning ? "ENABLED" : "DISABLED"); + IGL_D3D12_LOG_VERBOSE("=================================\n"); + + // Initialize DXGI factory flags + UINT dxgiFactoryFlags = 0; + + // A-007: Enable debug layer if configured + if (enableDebugLayer) { + igl::d3d12::ComPtr debugController; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(debugController.GetAddressOf())))) { + debugController->EnableDebugLayer(); + IGL_D3D12_LOG_VERBOSE("D3D12Context: Debug layer ENABLED\n"); + + // Enable DXGI debug layer if configured + if (enableDXGIDebug) { + dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; + IGL_D3D12_LOG_VERBOSE("D3D12Context: DXGI debug layer ENABLED\n"); + } + + // A-007: Enable GPU-Based Validation if configured + // WARNING: This significantly impacts performance (10-100x slower) + if (enableGPUValidation) { + igl::d3d12::ComPtr debugController1; + if (SUCCEEDED(debugController->QueryInterface(IID_PPV_ARGS(debugController1.GetAddressOf())))) { + debugController1->SetEnableGPUBasedValidation(TRUE); + IGL_D3D12_LOG_VERBOSE("D3D12Context: GPU-Based Validation ENABLED (may slow down rendering 10-100x)\n"); + } else { + IGL_LOG_ERROR("D3D12Context: Failed to enable GPU-Based Validation (requires ID3D12Debug1)\n"); + } + } + } else { + IGL_LOG_ERROR("D3D12Context: Failed to get D3D12 debug interface - Graphics Tools may not be installed\n"); + } + } else { + IGL_D3D12_LOG_VERBOSE("D3D12Context: Debug layer DISABLED\n"); + } + + // A-007: Enable DRED if configured (Device Removed Extended Data for better crash diagnostics) + if (enableDRED) { + igl::d3d12::ComPtr dredSettings1; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(dredSettings1.GetAddressOf())))) { + dredSettings1->SetAutoBreadcrumbsEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON); + dredSettings1->SetPageFaultEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON); + dredSettings1->SetBreadcrumbContextEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON); + IGL_D3D12_LOG_VERBOSE("D3D12Context: DRED 1.2 fully configured (breadcrumbs + page faults + context)\n"); + } else { + IGL_LOG_ERROR("D3D12Context: Failed to configure DRED (requires Windows 10 19041+)\n"); + } + } + + // Create DXGI factory with debug flag in debug builds. + HRESULT hr = CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(dxgiFactory_.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to create DXGI factory (HRESULT: 0x%08X)\n", static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to create DXGI factory"); + } + + // A-011: Enumerate and select best adapter + Result enumResult = enumerateAndSelectAdapter(); + if (!enumResult.isOk()) { + return enumResult; + } + + // A-012: Detect memory budget + detectMemoryBudget(); + + // Create D3D12 device on selected adapter + hr = D3D12CreateDevice( + adapter_.Get(), + selectedFeatureLevel_, + IID_PPV_ARGS(device_.GetAddressOf())); + + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12CreateDevice failed on selected adapter: 0x%08X\n", static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to create D3D12 device on selected adapter"); + } + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Device created with Feature Level %s\n", + featureLevelToString(selectedFeatureLevel_)); + + // A-007: Setup info queue with configurable break-on-severity settings + if (enableDebugLayer) { + igl::d3d12::ComPtr infoQueue; + if (SUCCEEDED(device_->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) { + // A-007: Configure break-on-severity based on environment variables + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE); // Always break on corruption + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, breakOnError ? TRUE : FALSE); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, breakOnWarning ? TRUE : FALSE); + + // Filter out INFO messages and a small set of known performance-only + // clear warnings that are expected in this backend (no functional + // impact). Do NOT filter invalid-shader-bytecode or signature messages so + // that pipeline creation problems surface clearly during debugging. + D3D12_MESSAGE_SEVERITY severities[] = { + D3D12_MESSAGE_SEVERITY_INFO + }; + + // Filter out only clear-value performance hints (IDs 820/821) and the + // known PS float-to-uint RT bitcast warning (677). All other message IDs + // (including invalid shader bytecode or unparseable signatures) are kept. + D3D12_MESSAGE_ID denyIds[] = { + static_cast(820), // ClearRenderTargetView w/o optimized clear value + static_cast(821), // ClearDepthStencilView clear value mismatch + static_cast(677) // PS float output to UINT RT (bitcast) + }; + + D3D12_INFO_QUEUE_FILTER filter = {}; + filter.DenyList.NumSeverities = 1; + filter.DenyList.pSeverityList = severities; + filter.DenyList.NumIDs = static_cast(std::size(denyIds)); + filter.DenyList.pIDList = denyIds; + infoQueue->PushStorageFilter(&filter); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Info queue configured (Corruption=BREAK, Error=%s, Warning=%s)\n", + breakOnError ? "BREAK" : "LOG", breakOnWarning ? "BREAK" : "LOG"); + } + } + + // Query root signature capabilities. + // This is critical for Tier-1 devices which don't support unbounded descriptor ranges. + IGL_D3D12_LOG_VERBOSE("D3D12Context: Querying root signature capabilities...\n"); + + // Query highest supported root signature version + D3D12_FEATURE_DATA_ROOT_SIGNATURE featureDataRootSig = {}; + featureDataRootSig.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_1; + + hr = device_->CheckFeatureSupport( + D3D12_FEATURE_ROOT_SIGNATURE, + &featureDataRootSig, + sizeof(featureDataRootSig)); + + if (SUCCEEDED(hr)) { + highestRootSignatureVersion_ = featureDataRootSig.HighestVersion; + IGL_D3D12_LOG_VERBOSE(" Highest Root Signature Version: %s\n", + highestRootSignatureVersion_ == D3D_ROOT_SIGNATURE_VERSION_1_1 ? "1.1" : "1.0"); + } else { + // If query fails, assume v1.0 (most conservative) + highestRootSignatureVersion_ = D3D_ROOT_SIGNATURE_VERSION_1_0; + IGL_D3D12_LOG_VERBOSE(" Root Signature query failed (assuming v1.0)\n"); + } + + // Query resource binding tier + D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; + hr = device_->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS, + &options, + sizeof(options)); + + if (SUCCEEDED(hr)) { + resourceBindingTier_ = options.ResourceBindingTier; + const char* tierName = "Unknown"; + switch (resourceBindingTier_) { + case D3D12_RESOURCE_BINDING_TIER_1: tierName = "Tier 1 (bounded descriptors required)"; break; + case D3D12_RESOURCE_BINDING_TIER_2: tierName = "Tier 2 (unbounded arrays except samplers)"; break; + case D3D12_RESOURCE_BINDING_TIER_3: tierName = "Tier 3 (fully unbounded)"; break; + } + IGL_D3D12_LOG_VERBOSE(" Resource Binding Tier: %s\n", tierName); + } else { + // If query fails, assume Tier 1 (most conservative) + resourceBindingTier_ = D3D12_RESOURCE_BINDING_TIER_1; + IGL_D3D12_LOG_VERBOSE(" Resource Binding Tier query failed (assuming Tier 1)\n"); + } + + // Query shader model support with progressive fallback (A-005) + // This is critical for FL11 hardware which only supports SM 5.1, not SM 6.0+ + IGL_D3D12_LOG_VERBOSE("D3D12Context: Querying shader model capabilities for Feature Level %d.%d...\n", + (selectedFeatureLevel_ >> 12) & 0xF, (selectedFeatureLevel_ >> 8) & 0xF); + + // Helper to map feature level to expected minimum shader model + auto getMinShaderModelForFeatureLevel = [](D3D_FEATURE_LEVEL fl) -> D3D_SHADER_MODEL { + switch (fl) { + case D3D_FEATURE_LEVEL_12_2: + return D3D_SHADER_MODEL_6_6; // FL 12.2 supports SM 6.6+ + case D3D_FEATURE_LEVEL_12_1: + return D3D_SHADER_MODEL_6_1; // FL 12.1 supports SM 6.1 (mesh shaders) + case D3D_FEATURE_LEVEL_12_0: + return D3D_SHADER_MODEL_6_0; // FL 12.0 supports SM 6.0 (wave operations) + case D3D_FEATURE_LEVEL_11_1: + case D3D_FEATURE_LEVEL_11_0: + return D3D_SHADER_MODEL_5_1; // FL 11.x only supports SM 5.1 + default: + return D3D_SHADER_MODEL_5_1; // Conservative fallback + } + }; + + auto shaderModelToString = [](D3D_SHADER_MODEL sm) -> const char* { + switch (sm) { + case D3D_SHADER_MODEL_6_6: return "6.6"; + case D3D_SHADER_MODEL_6_5: return "6.5"; + case D3D_SHADER_MODEL_6_4: return "6.4"; + case D3D_SHADER_MODEL_6_3: return "6.3"; + case D3D_SHADER_MODEL_6_2: return "6.2"; + case D3D_SHADER_MODEL_6_1: return "6.1"; + case D3D_SHADER_MODEL_6_0: return "6.0"; + case D3D_SHADER_MODEL_5_1: return "5.1"; + default: return "Unknown"; + } + }; + + // Shader models to attempt, from highest to lowest + const D3D_SHADER_MODEL shaderModels[] = { + D3D_SHADER_MODEL_6_6, + D3D_SHADER_MODEL_6_5, + D3D_SHADER_MODEL_6_4, + D3D_SHADER_MODEL_6_3, + D3D_SHADER_MODEL_6_2, + D3D_SHADER_MODEL_6_1, + D3D_SHADER_MODEL_6_0, + D3D_SHADER_MODEL_5_1, + }; + + D3D_SHADER_MODEL detectedShaderModel = D3D_SHADER_MODEL_5_1; + bool shaderModelDetected = false; + + // Try each shader model from highest to lowest + for (D3D_SHADER_MODEL sm : shaderModels) { + D3D12_FEATURE_DATA_SHADER_MODEL shaderModelData = { sm }; + hr = device_->CheckFeatureSupport( + D3D12_FEATURE_SHADER_MODEL, + &shaderModelData, + sizeof(shaderModelData)); + + if (SUCCEEDED(hr)) { + detectedShaderModel = shaderModelData.HighestShaderModel; + shaderModelDetected = true; + IGL_D3D12_LOG_VERBOSE(" Detected Shader Model: %s\n", shaderModelToString(detectedShaderModel)); + break; // Found highest supported, stop trying + } else { + IGL_D3D12_LOG_VERBOSE(" Shader Model %s not supported, trying lower version\n", + shaderModelToString(sm)); + } + } + + if (!shaderModelDetected) { + // Fallback based on feature level + D3D_SHADER_MODEL minimumSM = getMinShaderModelForFeatureLevel(selectedFeatureLevel_); + IGL_D3D12_LOG_VERBOSE(" WARNING: Shader model detection failed, using minimum for Feature Level: %s\n", + shaderModelToString(minimumSM)); + detectedShaderModel = minimumSM; + } + + // Validate shader model is appropriate for feature level + D3D_SHADER_MODEL minimumRequired = getMinShaderModelForFeatureLevel(selectedFeatureLevel_); + if (detectedShaderModel < minimumRequired) { + IGL_D3D12_LOG_VERBOSE(" WARNING: Detected Shader Model %s is below minimum for Feature Level: %s\n", + shaderModelToString(detectedShaderModel), + shaderModelToString(minimumRequired)); + } + + maxShaderModel_ = detectedShaderModel; + IGL_D3D12_LOG_VERBOSE("D3D12Context: Final Shader Model selected: %s\n", shaderModelToString(maxShaderModel_)); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Root signature capabilities detected successfully\n"); + + return Result(); +} + +// A-011: Enumerate and select best adapter +Result D3D12Context::enumerateAndSelectAdapter() { + enumeratedAdapters_.clear(); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Enumerating DXGI adapters...\n"); + + // Try IDXGIFactory6 first for high-performance GPU preference + igl::d3d12::ComPtr factory6; + (void)dxgiFactory_->QueryInterface(IID_PPV_ARGS(factory6.GetAddressOf())); + + if (factory6.Get()) { + for (UINT i = 0; ; ++i) { + igl::d3d12::ComPtr adapter; + if (FAILED(factory6->EnumAdapterByGpuPreference(i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, + IID_PPV_ARGS(adapter.GetAddressOf())))) { + break; + } + + AdapterInfo info{}; + info.adapter = adapter; + info.index = i; + info.isWarp = false; + + adapter->GetDesc1(&info.desc); + + // Skip software adapters in main enumeration (we'll add WARP separately) + if (info.desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { + continue; + } + + // Determine feature level + info.featureLevel = getHighestFeatureLevel(adapter.Get()); + if (info.featureLevel == static_cast(0)) { + IGL_D3D12_LOG_VERBOSE("D3D12Context: Adapter %u does not support D3D12 (skipping)\n", i); + continue; + } + + enumeratedAdapters_.push_back(info); + + // Log adapter details + IGL_D3D12_LOG_VERBOSE("D3D12Context: Adapter %u:\n", i); + IGL_D3D12_LOG_VERBOSE(" Description: %ls\n", info.desc.Description); + IGL_D3D12_LOG_VERBOSE(" Vendor ID: 0x%04X (%s)\n", info.desc.VendorId, info.getVendorName()); + IGL_D3D12_LOG_VERBOSE(" Device ID: 0x%04X\n", info.desc.DeviceId); + IGL_D3D12_LOG_VERBOSE(" Dedicated VRAM: %llu MB\n", info.getDedicatedVideoMemoryMB()); + IGL_D3D12_LOG_VERBOSE(" Shared System Memory: %llu MB\n", info.desc.SharedSystemMemory / (1024 * 1024)); + IGL_D3D12_LOG_VERBOSE(" Feature Level: %s\n", featureLevelToString(info.featureLevel)); + IGL_D3D12_LOG_VERBOSE(" LUID: 0x%08X:0x%08X\n", info.desc.AdapterLuid.HighPart, info.desc.AdapterLuid.LowPart); + } + } + + // Fallback enumeration if Factory6 not available + if (enumeratedAdapters_.empty()) { + for (UINT i = 0; ; ++i) { + igl::d3d12::ComPtr adapter; + if (dxgiFactory_->EnumAdapters1(i, adapter.GetAddressOf()) == DXGI_ERROR_NOT_FOUND) { + break; + } + + AdapterInfo info{}; + info.adapter = adapter; + info.index = i; + info.isWarp = false; + + adapter->GetDesc1(&info.desc); + + // Skip software adapters + if (info.desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { + continue; + } + + // Determine feature level + info.featureLevel = getHighestFeatureLevel(adapter.Get()); + if (info.featureLevel == static_cast(0)) { + continue; + } + + enumeratedAdapters_.push_back(info); + + // Log adapter details + IGL_D3D12_LOG_VERBOSE("D3D12Context: Adapter %u:\n", i); + IGL_D3D12_LOG_VERBOSE(" Description: %ls\n", info.desc.Description); + IGL_D3D12_LOG_VERBOSE(" Vendor ID: 0x%04X (%s)\n", info.desc.VendorId, info.getVendorName()); + IGL_D3D12_LOG_VERBOSE(" Device ID: 0x%04X\n", info.desc.DeviceId); + IGL_D3D12_LOG_VERBOSE(" Dedicated VRAM: %llu MB\n", info.getDedicatedVideoMemoryMB()); + IGL_D3D12_LOG_VERBOSE(" Shared System Memory: %llu MB\n", info.desc.SharedSystemMemory / (1024 * 1024)); + IGL_D3D12_LOG_VERBOSE(" Feature Level: %s\n", featureLevelToString(info.featureLevel)); + } + } + + // Add WARP adapter as fallback option (software rasterizer) + igl::d3d12::ComPtr warpAdapter; + if (SUCCEEDED(dxgiFactory_->EnumWarpAdapter(IID_PPV_ARGS(warpAdapter.GetAddressOf())))) { + igl::d3d12::ComPtr warpAdapter1; + if (SUCCEEDED(warpAdapter->QueryInterface(IID_PPV_ARGS(warpAdapter1.GetAddressOf())))) { + AdapterInfo warpInfo{}; + warpInfo.adapter = warpAdapter1; + warpInfo.index = static_cast(enumeratedAdapters_.size()); + warpInfo.isWarp = true; + + warpAdapter1->GetDesc1(&warpInfo.desc); + warpInfo.featureLevel = getHighestFeatureLevel(warpAdapter1.Get()); + + enumeratedAdapters_.push_back(warpInfo); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: WARP Adapter (Software):\n"); + IGL_D3D12_LOG_VERBOSE(" Description: %ls\n", warpInfo.desc.Description); + IGL_D3D12_LOG_VERBOSE(" Feature Level: %s\n", featureLevelToString(warpInfo.featureLevel)); + } + } + + if (enumeratedAdapters_.empty()) { + IGL_LOG_ERROR("D3D12Context: No compatible D3D12 adapters found!\n"); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "No D3D12-compatible adapters available"); + } + + // Select adapter based on environment variable or heuristic + selectedAdapterIndex_ = 0; // Default to first adapter (discrete GPU on laptops) + + char adapterEnv[64] = {}; + DWORD envResult = GetEnvironmentVariableA("IGL_D3D12_ADAPTER", adapterEnv, sizeof(adapterEnv)); + if (envResult > 0 && envResult < sizeof(adapterEnv)) { + if (strcmp(adapterEnv, "WARP") == 0) { + // Find WARP adapter + for (size_t i = 0; i < enumeratedAdapters_.size(); ++i) { + if (enumeratedAdapters_[i].isWarp) { + selectedAdapterIndex_ = static_cast(i); + IGL_D3D12_LOG_VERBOSE("D3D12Context: Environment override - using WARP adapter\n"); + break; + } + } + } else { + // Parse adapter index + int requestedIndex = atoi(adapterEnv); + if (requestedIndex >= 0 && requestedIndex < static_cast(enumeratedAdapters_.size())) { + selectedAdapterIndex_ = static_cast(requestedIndex); + IGL_D3D12_LOG_VERBOSE("D3D12Context: Environment override - using adapter %d\n", requestedIndex); + } else { + IGL_LOG_ERROR("D3D12Context: Invalid adapter index %d (available: 0-%zu)\n", + requestedIndex, enumeratedAdapters_.size() - 1); + } + } + } else { + // Heuristic: Choose adapter with highest feature level and most VRAM + D3D_FEATURE_LEVEL highestFL = enumeratedAdapters_[0].featureLevel; + uint64_t largestVRAM = enumeratedAdapters_[0].getDedicatedVideoMemoryMB(); + + for (size_t i = 1; i < enumeratedAdapters_.size(); ++i) { + if (enumeratedAdapters_[i].isWarp) { + continue; // Skip WARP for automatic selection + } + + uint64_t vram = enumeratedAdapters_[i].getDedicatedVideoMemoryMB(); + D3D_FEATURE_LEVEL fl = enumeratedAdapters_[i].featureLevel; + + // Prefer higher feature level, or same feature level with more VRAM + if (fl > highestFL || (fl == highestFL && vram > largestVRAM)) { + selectedAdapterIndex_ = static_cast(i); + highestFL = fl; + largestVRAM = vram; + } + } + } + + adapter_ = enumeratedAdapters_[selectedAdapterIndex_].adapter; + selectedFeatureLevel_ = enumeratedAdapters_[selectedAdapterIndex_].featureLevel; + + // T44: Concise single-line adapter log at INFO level (matches Vulkan/Metal minimalism) + const auto& selected = enumeratedAdapters_[selectedAdapterIndex_]; + IGL_LOG_INFO("D3D12 Adapter: %ls (FL %s, %llu MB VRAM)\n", + selected.desc.Description, + featureLevelToString(selectedFeatureLevel_), + selected.getDedicatedVideoMemoryMB()); + + // Verbose: Detailed adapter info (vendor, device ID, LUID, etc.) + IGL_D3D12_LOG_VERBOSE("D3D12Context: Selected adapter %u: %ls (FL %s)\n", + selectedAdapterIndex_, + selected.desc.Description, + featureLevelToString(selectedFeatureLevel_)); + + return Result(); +} + +// A-012: Detect memory budget from selected adapter +void D3D12Context::detectMemoryBudget() { + if (selectedAdapterIndex_ >= enumeratedAdapters_.size()) { + IGL_LOG_ERROR("D3D12Context: No adapter selected for memory budget detection\n"); + return; + } + + const auto& selectedAdapter = enumeratedAdapters_[selectedAdapterIndex_]; + + memoryBudget_.dedicatedVideoMemory = selectedAdapter.desc.DedicatedVideoMemory; + memoryBudget_.sharedSystemMemory = selectedAdapter.desc.SharedSystemMemory; + + IGL_D3D12_LOG_VERBOSE("D3D12Context: GPU Memory Budget:\n"); + IGL_D3D12_LOG_VERBOSE(" Dedicated Video Memory: %.2f MB\n", + memoryBudget_.dedicatedVideoMemory / (1024.0 * 1024.0)); + IGL_D3D12_LOG_VERBOSE(" Shared System Memory: %.2f MB\n", + memoryBudget_.sharedSystemMemory / (1024.0 * 1024.0)); + IGL_D3D12_LOG_VERBOSE(" Total Available: %.2f MB\n", + memoryBudget_.totalAvailableMemory() / (1024.0 * 1024.0)); + + // Recommend conservative budget (80% of available) + uint64_t recommendedBudget = static_cast(memoryBudget_.totalAvailableMemory() * 0.8); + IGL_D3D12_LOG_VERBOSE(" Recommended Budget (80%%): %.2f MB\n", + recommendedBudget / (1024.0 * 1024.0)); +} + +// A-010: Detect HDR output capabilities +void D3D12Context::detectHDRCapabilities() { + IGL_D3D12_LOG_VERBOSE("D3D12Context: Detecting HDR output capabilities...\n"); + + // Reset to defaults + hdrCapabilities_ = HDRCapabilities{}; + + // Need a valid swapchain to query output + if (!swapChain_.Get()) { + IGL_D3D12_LOG_VERBOSE(" No swapchain available, HDR detection skipped\n"); + return; + } + + // Get the output (monitor) containing the swapchain + igl::d3d12::ComPtr output; + HRESULT hr = swapChain_->GetContainingOutput(output.GetAddressOf()); + if (FAILED(hr)) { + IGL_D3D12_LOG_VERBOSE(" Failed to get containing output (0x%08X), HDR not available\n", static_cast(hr)); + return; + } + + // Query for IDXGIOutput6 (required for HDR queries) + igl::d3d12::ComPtr output6; + hr = output->QueryInterface(IID_PPV_ARGS(output6.GetAddressOf())); + if (FAILED(hr)) { + IGL_D3D12_LOG_VERBOSE(" IDXGIOutput6 not available (needs Windows 10 1703+), HDR not supported\n"); + return; + } + + // Get output description with color space info + DXGI_OUTPUT_DESC1 outputDesc = {}; + hr = output6->GetDesc1(&outputDesc); + if (FAILED(hr)) { + IGL_D3D12_LOG_VERBOSE(" Failed to get output description (0x%08X)\n", static_cast(hr)); + return; + } + + // Store native color space + hdrCapabilities_.nativeColorSpace = outputDesc.ColorSpace; + + // Store luminance information + hdrCapabilities_.maxLuminance = outputDesc.MaxLuminance; + hdrCapabilities_.minLuminance = outputDesc.MinLuminance; + hdrCapabilities_.maxFullFrameLuminance = outputDesc.MaxFullFrameLuminance; + + IGL_D3D12_LOG_VERBOSE(" Native Color Space: %u\n", outputDesc.ColorSpace); + IGL_D3D12_LOG_VERBOSE(" Max Luminance: %.2f nits\n", outputDesc.MaxLuminance); + IGL_D3D12_LOG_VERBOSE(" Min Luminance: %.4f nits\n", outputDesc.MinLuminance); + IGL_D3D12_LOG_VERBOSE(" Max Full Frame Luminance: %.2f nits\n", outputDesc.MaxFullFrameLuminance); + + // Check for HDR10 support (BT.2020 ST2084 - PQ curve) via swapchain + UINT colorSpaceSupport = 0; + hr = swapChain_->CheckColorSpaceSupport(DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, &colorSpaceSupport); + if (SUCCEEDED(hr) && (colorSpaceSupport & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT)) { + hdrCapabilities_.hdrSupported = true; + IGL_D3D12_LOG_VERBOSE(" HDR10 (BT.2020 PQ): SUPPORTED\n"); + } else { + IGL_D3D12_LOG_VERBOSE(" HDR10 (BT.2020 PQ): NOT SUPPORTED\n"); + } + + // Check for scRGB support (linear floating-point HDR) + hr = swapChain_->CheckColorSpaceSupport(DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709, &colorSpaceSupport); + if (SUCCEEDED(hr) && (colorSpaceSupport & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT)) { + hdrCapabilities_.scRGBSupported = true; + IGL_D3D12_LOG_VERBOSE(" scRGB (Linear FP16): SUPPORTED\n"); + } else { + IGL_D3D12_LOG_VERBOSE(" scRGB (Linear FP16): NOT SUPPORTED\n"); + } + + // Summary + if (hdrCapabilities_.hdrSupported || hdrCapabilities_.scRGBSupported) { + IGL_D3D12_LOG_VERBOSE("D3D12Context: HDR output AVAILABLE (max %.0f nits)\n", outputDesc.MaxLuminance); + } else { + IGL_D3D12_LOG_VERBOSE("D3D12Context: HDR output NOT AVAILABLE (SDR display)\n"); + } +} + +Result D3D12Context::createCommandQueue() { + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + + HRESULT hr = device_->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(commandQueue_.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to create command queue (HRESULT: 0x%08X)\n", static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to create command queue"); + } + + return Result(); +} + +Result D3D12Context::createSwapChain(HWND hwnd, uint32_t width, uint32_t height) { + DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {}; + swapChainDesc.Width = width; + swapChainDesc.Height = height; + // Use BGRA_UNORM (non-sRGB) for maximum compatibility with all display adapters + // Vulkan baselines use BGRA channel ordering for swapchain and MRT targets + swapChainDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + swapChainDesc.Stereo = FALSE; + swapChainDesc.SampleDesc.Count = 1; + swapChainDesc.SampleDesc.Quality = 0; + swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + swapChainDesc.BufferCount = config_.maxFramesInFlight; // T43: use configured buffer count + swapChainDesc.Scaling = DXGI_SCALING_STRETCH; + swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + swapChainDesc.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED; + + // Query tearing support capability (required for variable refresh rate displays) + // This capability must be queried before creating the swapchain + BOOL allowTearing = FALSE; + igl::d3d12::ComPtr factory5; + if (SUCCEEDED(dxgiFactory_.Get()->QueryInterface(IID_PPV_ARGS(factory5.GetAddressOf())))) { + if (SUCCEEDED(factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING, + &allowTearing, + sizeof(allowTearing)))) { + tearingSupported_ = (allowTearing == TRUE); + if (tearingSupported_) { + IGL_D3D12_LOG_VERBOSE("D3D12Context: Tearing support available (variable refresh rate)\n"); + } + } + } + + // Set swapchain tearing flag if supported (required to use DXGI_PRESENT_ALLOW_TEARING) + // Without this flag, using DXGI_PRESENT_ALLOW_TEARING in Present() is invalid + swapChainDesc.Flags = tearingSupported_ ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0; + + igl::d3d12::ComPtr tempSwapChain; + HRESULT hr = dxgiFactory_->CreateSwapChainForHwnd( + commandQueue_.Get(), + hwnd, + &swapChainDesc, + nullptr, + nullptr, + tempSwapChain.GetAddressOf() + ); + + if (FAILED(hr)) { + IGL_LOG_ERROR("CreateSwapChainForHwnd failed: 0x%08X, trying legacy CreateSwapChain\n", (unsigned)hr); + // Fallback: legacy CreateSwapChain + DXGI_SWAP_CHAIN_DESC legacy = {}; + legacy.BufferDesc.Width = width; + legacy.BufferDesc.Height = height; + legacy.BufferDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM; + legacy.BufferDesc.RefreshRate.Numerator = 60; + legacy.BufferDesc.RefreshRate.Denominator = 1; + legacy.SampleDesc.Count = 1; + legacy.SampleDesc.Quality = 0; + legacy.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT; + legacy.BufferCount = config_.maxFramesInFlight; // T43: use configured buffer count + legacy.OutputWindow = hwnd; + legacy.Windowed = TRUE; + legacy.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; + legacy.Flags = 0; + + igl::d3d12::ComPtr legacySwap; + HRESULT hr2 = dxgiFactory_->CreateSwapChain(commandQueue_.Get(), &legacy, legacySwap.GetAddressOf()); + if (FAILED(hr2)) { + IGL_LOG_ERROR("D3D12Context: Failed to create swapchain (hr=0x%08X / 0x%08X)\n", (unsigned)hr, (unsigned)hr2); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to create swapchain"); + } + // Try to QI to IDXGISwapChain3 + hr2 = legacySwap->QueryInterface(IID_PPV_ARGS(swapChain_.GetAddressOf())); + if (FAILED(hr2)) { + IGL_LOG_ERROR("D3D12Context: Failed to query IDXGISwapChain3 (hr=0x%08X)\n", (unsigned)hr2); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to query IDXGISwapChain3"); + } + return Result(); + } + + // Cast to IDXGISwapChain3 + hr = tempSwapChain->QueryInterface(IID_PPV_ARGS(swapChain_.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to query IDXGISwapChain3 interface (HRESULT: 0x%08X)\n", static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to query IDXGISwapChain3 interface"); + } + + // A-009: Verify swapchain actually supports tearing after creation + if (tearingSupported_) { + DXGI_SWAP_CHAIN_DESC1 actualDesc = {}; + hr = swapChain_->GetDesc1(&actualDesc); + if (SUCCEEDED(hr)) { + const bool actualTearingFlag = (actualDesc.Flags & DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) != 0; + const bool actualWindowedMode = (actualDesc.SwapEffect == DXGI_SWAP_EFFECT_FLIP_DISCARD || + actualDesc.SwapEffect == DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL); + + if (!actualTearingFlag) { + IGL_D3D12_LOG_VERBOSE("D3D12Context: Tearing flag was NOT set on swapchain (downgraded by driver)\n"); + tearingSupported_ = false; + } else if (!actualWindowedMode) { + IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain not in flip mode (tearing requires flip model)\n"); + tearingSupported_ = false; + } else { + IGL_D3D12_LOG_VERBOSE("D3D12Context: Tearing verified on swapchain (windowed flip model + tearing flag)\n"); + } + } else { + IGL_D3D12_LOG_VERBOSE("D3D12Context: Failed to verify swapchain desc, assuming tearing unavailable\n"); + tearingSupported_ = false; + } + } + + // A-010: Detect HDR capabilities now that swapchain is created + detectHDRCapabilities(); + + // Query swapchain buffer count for dynamic frame management (T43) + DXGI_SWAP_CHAIN_DESC1 swapDesc = {}; + hr = swapChain_->GetDesc1(&swapDesc); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to query swapchain description (HRESULT: 0x%08X)\n", static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to query swapchain description"); + } + + swapchainBufferCount_ = swapDesc.BufferCount; + IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain created with %u buffers\n", swapchainBufferCount_); + + // Resize frame management arrays to match swapchain buffer count + renderTargets_.resize(swapchainBufferCount_); + frameContexts_.resize(swapchainBufferCount_); + + return Result(); +} + +Result D3D12Context::createRTVHeap() { + D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; + heapDesc.NumDescriptors = swapchainBufferCount_; // Use queried buffer count + heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + + HRESULT hr = device_->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(rtvHeap_.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to create RTV heap (HRESULT: 0x%08X)\n", static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to create RTV heap"); + } + + rtvDescriptorSize_ = device_->GetDescriptorHandleIncrementSize( + D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + + return Result(); +} + +Result D3D12Context::createBackBuffers() { + D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle = rtvHeap_->GetCPUDescriptorHandleForHeapStart(); + + for (UINT i = 0; i < swapchainBufferCount_; i++) { // Use queried buffer count + HRESULT hr = swapChain_->GetBuffer(i, IID_PPV_ARGS(renderTargets_[i].GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to get swapchain buffer %u (HRESULT: 0x%08X)\n", i, static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to get swapchain buffer"); + } + + // Pre-creation validation. + IGL_DEBUG_ASSERT(device_.Get() != nullptr, "Device is null before CreateRenderTargetView"); + IGL_DEBUG_ASSERT(renderTargets_[i].Get() != nullptr, "Swapchain buffer is null"); + IGL_DEBUG_ASSERT(rtvHandle.ptr != 0, "RTV descriptor handle is invalid"); + + device_->CreateRenderTargetView(renderTargets_[i].Get(), nullptr, rtvHandle); + rtvHandle.ptr += rtvDescriptorSize_; + } + + return Result(); +} + +Result D3D12Context::createDescriptorHeaps() { + // Cache descriptor sizes + cbvSrvUavDescriptorSize_ = device_->GetDescriptorHandleIncrementSize( + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + samplerDescriptorSize_ = device_->GetDescriptorHandleIncrementSize( + D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + + // Create per-frame shader-visible descriptor heaps using configurable sizes. + // Each frame gets its own isolated heaps to prevent descriptor conflicts between frames. + // Use pre-allocation with fail-fast on exhaustion (Vulkan pattern, no dynamic growth). + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating per-frame descriptor heaps with fail-fast allocation...\n"); + IGL_D3D12_LOG_VERBOSE(" Config: bufferCount=%u, samplerHeapSize=%u, " + "descriptorsPerPage=%u, maxHeapPages=%u, preAllocate=%s\n", + swapchainBufferCount_, config_.samplerHeapSize, + config_.descriptorsPerPage, config_.maxHeapPages, + config_.preAllocateDescriptorPages ? "true" : "false"); + + for (UINT i = 0; i < swapchainBufferCount_; i++) { + // CBV/SRV/UAV heap: pre-allocate pages based on the configuration policy. + // When preAllocateDescriptorPages is true, allocate all maxHeapPages upfront + // to prevent mid-frame allocation and descriptor invalidation (Vulkan fail-fast pattern). + { + frameContexts_[i].cbvSrvUavHeapPages.clear(); + frameContexts_[i].currentCbvSrvUavPageIndex = 0; + + const uint32_t pagesToAllocate = config_.preAllocateDescriptorPages ? config_.maxHeapPages : 1; + + for (uint32_t pageIdx = 0; pageIdx < pagesToAllocate; ++pageIdx) { + igl::d3d12::ComPtr heap; + Result result = allocateDescriptorHeapPage( + D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, + config_.descriptorsPerPage, + &heap); + + if (!result.isOk()) { + IGL_LOG_ERROR("D3D12Context: Failed to create CBV/SRV/UAV heap page %u for frame %u: %s\n", + pageIdx, i, result.message.c_str()); + IGL_DEBUG_ASSERT(false); + return result; + } + + frameContexts_[i].cbvSrvUavHeapPages.emplace_back(heap, config_.descriptorsPerPage); + } + + const uint32_t allocatedDescriptors = pagesToAllocate * config_.descriptorsPerPage; + if (config_.preAllocateDescriptorPages) { + IGL_D3D12_LOG_VERBOSE(" Frame %u: Pre-allocated %u CBV/SRV/UAV heap pages (%u descriptors, fail-fast on exhaustion)\n", + i, pagesToAllocate, allocatedDescriptors); + } else { + IGL_D3D12_LOG_VERBOSE(" Frame %u: Allocated %u CBV/SRV/UAV heap page (%u descriptors, fail-fast on exhaustion)\n", + i, pagesToAllocate, allocatedDescriptors); + } + } + + // Sampler heap: samplerHeapSize descriptors + { + D3D12_DESCRIPTOR_HEAP_DESC desc = {}; + desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + desc.NumDescriptors = config_.samplerHeapSize; // T14: Use configurable size + desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + desc.NodeMask = 0; + + HRESULT hr = device_->CreateDescriptorHeap(&desc, + IID_PPV_ARGS(frameContexts_[i].samplerHeap.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to create per-frame Sampler heap for frame %u (HRESULT: 0x%08X)\n", i, static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to create per-frame Sampler heap for frame " + std::to_string(i)); + } + IGL_D3D12_LOG_VERBOSE(" Frame %u: Created Sampler heap (%u descriptors)\n", i, config_.samplerHeapSize); + } + } + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Per-frame descriptor heaps created successfully\n"); + // Memory calculation reflects actual pre-allocation (no dynamic growth). + const uint32_t pagesPerFrame = config_.preAllocateDescriptorPages ? config_.maxHeapPages : 1; + const uint32_t cbvSrvUavDescriptors = config_.descriptorsPerPage * pagesPerFrame; + const uint32_t totalDescriptorsPerFrame = cbvSrvUavDescriptors + config_.samplerHeapSize; + const uint32_t totalMemoryKB = (swapchainBufferCount_ * totalDescriptorsPerFrame * 32) / 1024; + IGL_D3D12_LOG_VERBOSE(" Allocated memory: %u frames * (%u CBV/SRV/UAV + %u Samplers) * 32 bytes = %u KB\n", + swapchainBufferCount_, cbvSrvUavDescriptors, + config_.samplerHeapSize, totalMemoryKB); + + IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating descriptor heap manager...\n"); + + // Create descriptor heap manager using configuration values. + DescriptorHeapManager::Sizes sizes{}; + sizes.cbvSrvUav = 256; // For CPU-visible staging (not used for shader-visible). + sizes.samplers = 16; // For CPU-visible staging (not used for shader-visible). + sizes.rtvs = config_.rtvHeapSize; + sizes.dsvs = config_.dsvHeapSize; + + ownedHeapMgr_ = new DescriptorHeapManager(); + Result result = ownedHeapMgr_->initialize(device_.Get(), sizes); + if (!result.isOk()) { + IGL_LOG_ERROR("D3D12Context: Failed to initialize descriptor heap manager: %s\n", + result.message.c_str()); + delete ownedHeapMgr_; + ownedHeapMgr_ = nullptr; + } else { + heapMgr_ = ownedHeapMgr_; + IGL_D3D12_LOG_VERBOSE("D3D12Context: Descriptor heap manager created successfully\n"); + } + + return Result(); +} + +Result D3D12Context::createCommandSignatures() { + // Create command signature for DrawInstanced (multiDrawIndirect) + // D3D12_DRAW_ARGUMENTS: { VertexCountPerInstance, InstanceCount, StartVertexLocation, StartInstanceLocation } + { + D3D12_INDIRECT_ARGUMENT_DESC drawArg = {}; + drawArg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW; + + D3D12_COMMAND_SIGNATURE_DESC drawSigDesc = {}; + drawSigDesc.ByteStride = sizeof(D3D12_DRAW_ARGUMENTS); // 16 bytes (4 x UINT) + drawSigDesc.NumArgumentDescs = 1; + drawSigDesc.pArgumentDescs = &drawArg; + drawSigDesc.NodeMask = 0; + + HRESULT hr = device_->CreateCommandSignature( + &drawSigDesc, + nullptr, // No root signature needed for simple draw commands + IID_PPV_ARGS(drawIndirectSignature_.GetAddressOf())); + + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to create draw indirect command signature (HRESULT: 0x%08X)\n", static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to create draw indirect command signature"); + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Created draw indirect command signature (stride: %u bytes)\n", + drawSigDesc.ByteStride); + } + + // Create command signature for DrawIndexedInstanced (multiDrawIndexedIndirect) + // D3D12_DRAW_INDEXED_ARGUMENTS: { IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation } + { + D3D12_INDIRECT_ARGUMENT_DESC drawIndexedArg = {}; + drawIndexedArg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED; + + D3D12_COMMAND_SIGNATURE_DESC drawIndexedSigDesc = {}; + drawIndexedSigDesc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS); // 20 bytes (5 x UINT) + drawIndexedSigDesc.NumArgumentDescs = 1; + drawIndexedSigDesc.pArgumentDescs = &drawIndexedArg; + drawIndexedSigDesc.NodeMask = 0; + + HRESULT hr = device_->CreateCommandSignature( + &drawIndexedSigDesc, + nullptr, // No root signature needed for simple draw commands + IID_PPV_ARGS(drawIndexedIndirectSignature_.GetAddressOf())); + + if (FAILED(hr)) { + IGL_LOG_ERROR("D3D12Context: Failed to create draw indexed indirect command signature (HRESULT: 0x%08X)\n", static_cast(hr)); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, "Failed to create draw indexed indirect command signature"); + } + IGL_D3D12_LOG_VERBOSE("D3D12Context: Created draw indexed indirect command signature (stride: %u bytes)\n", + drawIndexedSigDesc.ByteStride); + } + + return Result(); +} + +uint32_t D3D12Context::getCurrentBackBufferIndex() const { + if (swapChain_.Get() == nullptr) { + return 0; + } + return swapChain_->GetCurrentBackBufferIndex(); +} + +ID3D12Resource* D3D12Context::getCurrentBackBuffer() const { + uint32_t index = getCurrentBackBufferIndex(); + if (index >= swapchainBufferCount_) { + IGL_LOG_ERROR("getCurrentBackBuffer(): index %u >= swapchainBufferCount %u\n", index, swapchainBufferCount_); + return nullptr; + } + + ID3D12Resource* resource = renderTargets_[index].Get(); + IGL_D3D12_LOG_VERBOSE("getCurrentBackBuffer(): index=%u, resource=%p\n", index, (void*)resource); + return resource; +} + +D3D12_CPU_DESCRIPTOR_HANDLE D3D12Context::getCurrentRTV() const { + if (rtvHeap_.Get() == nullptr) { + return {0}; + } + D3D12_CPU_DESCRIPTOR_HANDLE rtv = rtvHeap_->GetCPUDescriptorHandleForHeapStart(); + rtv.ptr += getCurrentBackBufferIndex() * rtvDescriptorSize_; + return rtv; +} + +void D3D12Context::waitForGPU() { + if (!fence_.Get() || !commandQueue_.Get()) { + return; + } + + // Signal and increment the fence value + const UINT64 fenceToWaitFor = ++fenceValue_; + commandQueue_->Signal(fence_.Get(), fenceToWaitFor); + + // Wait until the fence is crossed using FenceWaiter (TOCTOU-safe) + FenceWaiter waiter(fence_.Get(), fenceToWaitFor); + Result waitResult = waiter.wait(INFINITE); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("D3D12Context::waitForGPU() - Fence wait failed: %s (fence=%llu)\n", + waitResult.message.c_str(), fenceToWaitFor); + } +} + +void D3D12Context::trackResourceCreation(const char* type, size_t sizeBytes) { + std::lock_guard lock(resourceStatsMutex_); + if (strcmp(type, "Buffer") == 0) { + resourceStats_.totalBuffersCreated++; + resourceStats_.bufferMemoryBytes += sizeBytes; + } else if (strcmp(type, "Texture") == 0) { + resourceStats_.totalTexturesCreated++; + resourceStats_.textureMemoryBytes += sizeBytes; + } else if (strcmp(type, "SRV") == 0) { + resourceStats_.totalSRVsCreated++; + } else if (strcmp(type, "Sampler") == 0) { + resourceStats_.totalSamplersCreated++; + } +} + +void D3D12Context::trackResourceDestruction(const char* type, size_t sizeBytes) { + std::lock_guard lock(resourceStatsMutex_); + if (strcmp(type, "Buffer") == 0) { + resourceStats_.totalBuffersDestroyed++; + resourceStats_.bufferMemoryBytes -= sizeBytes; + } else if (strcmp(type, "Texture") == 0) { + resourceStats_.totalTexturesDestroyed++; + resourceStats_.textureMemoryBytes -= sizeBytes; + } +} + +void D3D12Context::logResourceStats() { + std::lock_guard lock(resourceStatsMutex_); + IGL_D3D12_LOG_VERBOSE("=== D3D12 Resource Statistics ===\n"); + IGL_D3D12_LOG_VERBOSE(" Buffers: %zu created, %zu destroyed (leaked: %zd)\n", + resourceStats_.totalBuffersCreated, + resourceStats_.totalBuffersDestroyed, + (int64_t)resourceStats_.totalBuffersCreated - (int64_t)resourceStats_.totalBuffersDestroyed); + IGL_D3D12_LOG_VERBOSE(" Textures: %zu created, %zu destroyed (leaked: %zd)\n", + resourceStats_.totalTexturesCreated, + resourceStats_.totalTexturesDestroyed, + (int64_t)resourceStats_.totalTexturesCreated - (int64_t)resourceStats_.totalTexturesDestroyed); + IGL_D3D12_LOG_VERBOSE(" SRVs created: %zu\n", resourceStats_.totalSRVsCreated); + IGL_D3D12_LOG_VERBOSE(" Samplers created: %zu\n", resourceStats_.totalSamplersCreated); + IGL_D3D12_LOG_VERBOSE(" Buffer memory: %.2f MB\n", resourceStats_.bufferMemoryBytes / (1024.0 * 1024.0)); + IGL_D3D12_LOG_VERBOSE(" Texture memory: %.2f MB\n", resourceStats_.textureMemoryBytes / (1024.0 * 1024.0)); + IGL_D3D12_LOG_VERBOSE("==================================\n"); +} + +// Allocate a new descriptor heap page for dynamic growth. +Result D3D12Context::allocateDescriptorHeapPage( + D3D12_DESCRIPTOR_HEAP_TYPE type, + uint32_t numDescriptors, + igl::d3d12::ComPtr* outHeap) { + if (!device_.Get()) { + return Result{Result::Code::RuntimeError, "Device is null"}; + } + + D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {}; + heapDesc.Type = type; + heapDesc.NumDescriptors = numDescriptors; + heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + heapDesc.NodeMask = 0; + + HRESULT hr = device_->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(outHeap->GetAddressOf())); + if (FAILED(hr)) { + char errorMsg[256]; + snprintf(errorMsg, sizeof(errorMsg), + "Failed to create descriptor heap page (type=%d, numDescriptors=%u): HRESULT=0x%08X", + static_cast(type), numDescriptors, static_cast(hr)); + return Result{Result::Code::RuntimeError, errorMsg}; + } + + return Result{}; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12Context.h b/src/igl/d3d12/D3D12Context.h new file mode 100644 index 0000000000..7ad8873167 --- /dev/null +++ b/src/igl/d3d12/D3D12Context.h @@ -0,0 +1,423 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace igl { +class IBuffer; // Forward declaration for igl::IBuffer +} + +namespace igl::d3d12 { + +class DescriptorHeapManager; // fwd decl in igl::d3d12 + +/** + * @brief Descriptor heap page for dynamic multi-page growth + * + * Part of Strategy 1 (Transient Descriptor Allocator) architecture. + * See D3D12ResourcesBinder.h for full architecture documentation. + * + * Following Microsoft MiniEngine's DynamicDescriptorHeap pattern: + * - Start with 1 page of 1024 descriptors per frame + * - Grow to up to 16 pages (16,384 descriptors) on-demand + * - Reset all counters at frame boundary (no deallocation needed) + */ +struct DescriptorHeapPage { + igl::d3d12::ComPtr heap; + uint32_t capacity; // Total descriptors in this page + uint32_t used; // Currently allocated descriptors + + DescriptorHeapPage() : capacity(0), used(0) {} + DescriptorHeapPage(igl::d3d12::ComPtr h, uint32_t cap) + : heap(h), capacity(cap), used(0) {} +}; + +/** + * @brief Per-frame context for CPU/GPU parallelism and descriptor management + * + * ============================================================================ + * ARCHITECTURE: Strategy 1 - Transient Descriptor Allocator + * ============================================================================ + * + * FrameContext implements the per-frame descriptor heap management system + * (Strategy 1 in D3D12ResourcesBinder.h architecture). + * + * **Key Design Decisions**: + * - 3 frames in flight: Prevents CPU/GPU stalls while enabling triple buffering + * - Per-frame isolation: Each frame gets independent descriptor heaps + * - Shared across command buffers: ALL command buffers in a frame share these heaps + * - Linear allocation: O(1) descriptor allocation with simple counter increment + * - Frame-boundary reset: Counters reset to 0, no per-descriptor deallocation + * - Dynamic growth: CBV/SRV/UAV heaps can grow from 1 to 16 pages on-demand + * + * **Descriptor Heap Layout**: + * - CBV/SRV/UAV: Multi-page array (1024 descriptors/page, up to 16 pages = 16K total) + * - Samplers: Single heap (2048 descriptors, D3D12 spec limit, no growth) + * + * **Access Pattern**: + * - CommandBuffer::getNextCbvSrvUavDescriptor() - allocates from current page + * - CommandBuffer::allocateCbvSrvUavRange() - allocates contiguous range + * - CommandBuffer::getNextSamplerDescriptor() - returns reference for increment + * - FrameManager::resetDescriptorCounters() - resets at frame boundary + * + * **Performance Characteristics**: + * - Allocation: O(1) with occasional page growth (O(n) for page vector resize) + * - Deallocation: None (bulk reset at frame boundary) + * - Memory: ~4MB worst case per frame (16 pages * 1024 descriptors * 32 bytes/descriptor) + * + * For architecture overview, see D3D12ResourcesBinder.h documentation. + */ +struct FrameContext { + igl::d3d12::ComPtr allocator; + UINT64 fenceValue = 0; // First fence signaled this frame (backward compatibility) + + // D-002: Track maximum fence value of ALL command lists using this allocator + // CRITICAL: Allocator can only be reset when GPU completes maxAllocatorFence + UINT64 maxAllocatorFence = 0; + + // D-002: Count command buffers submitted with this allocator (telemetry) + uint32_t commandBufferCount = 0; + + // Per-frame shader-visible descriptor heaps (following Microsoft MiniEngine pattern). + // Supports multiple pages for dynamic growth to prevent overflow and corruption. + // Each frame gets its own isolated heap pages to prevent descriptor conflicts. + std::vector cbvSrvUavHeapPages; // Dynamic array of 1024-descriptor pages + igl::d3d12::ComPtr samplerHeap; // 2048 descriptors (kMaxSamplers) + + // Current active page index for CBV/SRV/UAV allocation. + uint32_t currentCbvSrvUavPageIndex = 0; + + // Track the currently active shader-visible heap for command list binding. + // This is updated when allocating new pages and must be rebound to the command list. + // This heap is returned by D3D12Context::getCbvSrvUavHeap() for binding. + igl::d3d12::ComPtr activeCbvSrvUavHeap; + + // Linear allocator counters - reset to 0 each frame + // Incremented by each command buffer's encoders as they allocate descriptors + uint32_t nextCbvSrvUavDescriptor = 0; + uint32_t nextSamplerDescriptor = 0; + + // Transient resources that must be kept alive until this frame completes GPU execution + // Examples: push constant buffers, temporary upload buffers + // CRITICAL: These are cleared when we advance to the next frame AFTER waiting for + // this frame's fence, ensuring the GPU has finished reading them + std::vector> transientBuffers; + std::vector> transientResources; + + // Telemetry for transient resource tracking. + // Tracks high-water mark to observe peak usage and detect unbounded growth. + size_t transientBuffersHighWater = 0; + size_t transientResourcesHighWater = 0; + + // Telemetry for descriptor heap usage tracking. + // Tracks peak descriptor usage per frame to detect heap overflow risks. + uint32_t peakCbvSrvUavUsage = 0; + uint32_t peakSamplerUsage = 0; +}; + +class D3D12Context { + public: + // A-011: Multi-adapter enumeration and tracking + struct AdapterInfo { + igl::d3d12::ComPtr adapter; + DXGI_ADAPTER_DESC1 desc; + D3D_FEATURE_LEVEL featureLevel; + bool isWarp; // Software rasterizer + uint32_t index; // Original enumeration index + + // Helper methods + uint64_t getDedicatedVideoMemoryMB() const; + const char* getVendorName() const; + }; + + // A-012: Memory budget tracking + struct MemoryBudget { + uint64_t dedicatedVideoMemory = 0; // Dedicated GPU memory (bytes) + uint64_t sharedSystemMemory = 0; // Shared system memory accessible to GPU (bytes) + uint64_t estimatedUsage = 0; // Current estimated usage by this device (bytes) + uint64_t userDefinedBudgetLimit = 0; // Optional soft limit + + uint64_t totalAvailableMemory() const; + double getUsagePercentage() const; + bool isMemoryCritical() const; + bool isMemoryLow() const; + }; + + // A-010: HDR output capabilities + struct HDRCapabilities { + bool hdrSupported = false; // HDR10 support + bool scRGBSupported = false; // scRGB (FP16) support + DXGI_COLOR_SPACE_TYPE nativeColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; // SDR default + float maxLuminance = 80.0f; // Max luminance in nits (SDR default) + float minLuminance = 0.0f; // Min luminance in nits + float maxFullFrameLuminance = 80.0f; // Max full-frame luminance in nits + }; + + D3D12Context() = default; + ~D3D12Context(); + + // initialize() accepts optional D3D12ContextConfig for configurable sizes. + Result initialize(HWND hwnd, uint32_t width, uint32_t height, + const D3D12ContextConfig& config = D3D12ContextConfig::defaultConfig()); + Result resize(uint32_t width, uint32_t height); + + ID3D12Device* getDevice() const { return device_.Get(); } + ID3D12CommandQueue* getCommandQueue() const { return commandQueue_.Get(); } + IDXGISwapChain3* getSwapChain() const { return swapChain_.Get(); } + + // Get currently active CBV/SRV/UAV descriptor heap for current frame. + // Returns the active heap used for descriptor allocation. Use this for heap binding. + // For multi-page access or diagnostics, use getFrameContexts(). + ID3D12DescriptorHeap* getCbvSrvUavHeap() const { + const auto& frameCtx = frameContexts_[currentFrameIndex_]; + return frameCtx.activeCbvSrvUavHeap.Get(); + } + ID3D12DescriptorHeap* getSamplerHeap() const { + return frameContexts_[currentFrameIndex_].samplerHeap.Get(); + } + + // Allocate a new descriptor heap page for dynamic growth. + Result allocateDescriptorHeapPage(D3D12_DESCRIPTOR_HEAP_TYPE type, + uint32_t numDescriptors, + igl::d3d12::ComPtr* outHeap); + + // Get descriptor sizes + UINT getCbvSrvUavDescriptorSize() const { return cbvSrvUavDescriptorSize_; } + UINT getSamplerDescriptorSize() const { return samplerDescriptorSize_; } + + // Get root signature capabilities + D3D_ROOT_SIGNATURE_VERSION getHighestRootSignatureVersion() const { return highestRootSignatureVersion_; } + D3D12_RESOURCE_BINDING_TIER getResourceBindingTier() const { return resourceBindingTier_; } + + // Get shader model capability. + D3D_SHADER_MODEL getMaxShaderModel() const { return maxShaderModel_; } + + // Get selected feature level (A-004, A-005) + D3D_FEATURE_LEVEL getSelectedFeatureLevel() const { return selectedFeatureLevel_; } + + // Get tearing support capability + bool isTearingSupported() const { return tearingSupported_; } + + // Get command signatures for indirect drawing. + ID3D12CommandSignature* getDrawIndirectSignature() const { return drawIndirectSignature_.Get(); } + ID3D12CommandSignature* getDrawIndexedIndirectSignature() const { return drawIndexedIndirectSignature_.Get(); } + + // Get descriptor handles from per-frame heaps using the current page for multi-heap support. + D3D12_CPU_DESCRIPTOR_HANDLE getCbvSrvUavCpuHandle(uint32_t descriptorIndex) const { + const auto& frameCtx = frameContexts_[currentFrameIndex_]; + const auto& pages = frameCtx.cbvSrvUavHeapPages; + const uint32_t pageIdx = frameCtx.currentCbvSrvUavPageIndex; + + if (pages.empty() || pageIdx >= pages.size()) { + return {0}; // Invalid handle + } + + auto h = pages[pageIdx].heap->GetCPUDescriptorHandleForHeapStart(); + h.ptr += descriptorIndex * cbvSrvUavDescriptorSize_; + return h; + } + + D3D12_GPU_DESCRIPTOR_HANDLE getCbvSrvUavGpuHandle(uint32_t descriptorIndex) const { + const auto& frameCtx = frameContexts_[currentFrameIndex_]; + const auto& pages = frameCtx.cbvSrvUavHeapPages; + const uint32_t pageIdx = frameCtx.currentCbvSrvUavPageIndex; + + if (pages.empty() || pageIdx >= pages.size()) { + return {0}; // Invalid handle + } + + auto h = pages[pageIdx].heap->GetGPUDescriptorHandleForHeapStart(); + h.ptr += descriptorIndex * cbvSrvUavDescriptorSize_; + return h; + } + + D3D12_CPU_DESCRIPTOR_HANDLE getSamplerCpuHandle(uint32_t descriptorIndex) const { + auto h = frameContexts_[currentFrameIndex_].samplerHeap->GetCPUDescriptorHandleForHeapStart(); + h.ptr += descriptorIndex * samplerDescriptorSize_; + return h; + } + + D3D12_GPU_DESCRIPTOR_HANDLE getSamplerGpuHandle(uint32_t descriptorIndex) const { + auto h = frameContexts_[currentFrameIndex_].samplerHeap->GetGPUDescriptorHandleForHeapStart(); + h.ptr += descriptorIndex * samplerDescriptorSize_; + return h; + } + + // Optional descriptor heap manager (provided by headless context) + DescriptorHeapManager* getDescriptorHeapManager() const { return heapMgr_; } + + uint32_t getCurrentBackBufferIndex() const; + ID3D12Resource* getCurrentBackBuffer() const; + D3D12_CPU_DESCRIPTOR_HANDLE getCurrentRTV() const; + + void waitForGPU(); + + // Per-frame fence access for CommandQueue + FrameContext* getFrameContexts() { return frameContexts_.data(); } + UINT& getCurrentFrameIndex() { return currentFrameIndex_; } + UINT getSwapchainBufferCount() const { return swapchainBufferCount_; } + UINT64& getFenceValue() { return fenceValue_; } + ID3D12Fence* getFence() const { return fence_.Get(); } + + // Resource tracking for diagnostics + static void trackResourceCreation(const char* type, size_t sizeBytes); + static void trackResourceDestruction(const char* type, size_t sizeBytes); + static void logResourceStats(); + + // A-011: Adapter enumeration and selection + const std::vector& getEnumeratedAdapters() const { return enumeratedAdapters_; } + const AdapterInfo* getSelectedAdapter() const { + if (selectedAdapterIndex_ < enumeratedAdapters_.size()) { + return &enumeratedAdapters_[selectedAdapterIndex_]; + } + return nullptr; + } + uint32_t getSelectedAdapterIndex() const { return selectedAdapterIndex_; } + + // A-012: Memory budget tracking + MemoryBudget getMemoryBudget() const { + std::lock_guard lock(memoryTrackingMutex_); + return memoryBudget_; + } + + double getMemoryUsagePercentage() const { + std::lock_guard lock(memoryTrackingMutex_); + return memoryBudget_.getUsagePercentage(); + } + + bool isMemoryLow() const { + std::lock_guard lock(memoryTrackingMutex_); + return memoryBudget_.isMemoryLow(); + } + + bool isMemoryCritical() const { + std::lock_guard lock(memoryTrackingMutex_); + return memoryBudget_.isMemoryCritical(); + } + + void updateMemoryUsage(int64_t delta) { + std::lock_guard lock(memoryTrackingMutex_); + uint64_t newUsage = memoryBudget_.estimatedUsage; + if (delta < 0) { + uint64_t absDelta = static_cast(-delta); + newUsage = (absDelta > newUsage) ? 0 : (newUsage - absDelta); + } else { + newUsage += static_cast(delta); + } + memoryBudget_.estimatedUsage = newUsage; + } + + // A-010: HDR output capabilities + const HDRCapabilities& getHDRCapabilities() const { return hdrCapabilities_; } + bool isHDRSupported() const { return hdrCapabilities_.hdrSupported; } + + // Accessor for configuration (sizes, frame buffering, etc.). + const D3D12ContextConfig& getConfig() const { return config_; } + + protected: + [[nodiscard]] Result createDevice(); + [[nodiscard]] Result createCommandQueue(); + [[nodiscard]] Result createSwapChain(HWND hwnd, uint32_t width, uint32_t height); + Result recreateSwapChain(uint32_t width, uint32_t height); + [[nodiscard]] Result createRTVHeap(); + [[nodiscard]] Result createBackBuffers(); + [[nodiscard]] Result createDescriptorHeaps(); + [[nodiscard]] Result createCommandSignatures(); + + // A-011: Adapter enumeration + [[nodiscard]] Result enumerateAndSelectAdapter(); + static D3D_FEATURE_LEVEL getHighestFeatureLevel(IDXGIAdapter1* adapter); + + // A-012: Memory budget detection + void detectMemoryBudget(); + + // A-010: HDR output detection + void detectHDRCapabilities(); + + igl::d3d12::ComPtr dxgiFactory_; + igl::d3d12::ComPtr adapter_; + igl::d3d12::ComPtr device_; + igl::d3d12::ComPtr commandQueue_; + igl::d3d12::ComPtr swapChain_; + UINT swapchainBufferCount_ = 0; // Queried from swapchain, replaces kMaxFramesInFlight + + igl::d3d12::ComPtr rtvHeap_; + std::vector> renderTargets_; // Sized to swapchainBufferCount_ + UINT rtvDescriptorSize_ = 0; + + // Descriptor sizes (cached from device) + UINT cbvSrvUavDescriptorSize_ = 0; + UINT samplerDescriptorSize_ = 0; + + // Feature detection for root signature capabilities + D3D_ROOT_SIGNATURE_VERSION highestRootSignatureVersion_ = D3D_ROOT_SIGNATURE_VERSION_1_0; + D3D12_RESOURCE_BINDING_TIER resourceBindingTier_ = D3D12_RESOURCE_BINDING_TIER_1; + + // Feature detection for device feature level (A-004) + D3D_FEATURE_LEVEL selectedFeatureLevel_ = D3D_FEATURE_LEVEL_11_0; + + // Feature detection for shader model. + // DXC requires SM 6.0 minimum (SM 5.x deprecated). + D3D_SHADER_MODEL maxShaderModel_ = D3D_SHADER_MODEL_6_0; + + // Feature detection for variable refresh rate (tearing) support + bool tearingSupported_ = false; + + // A-011: Multi-adapter tracking (structs defined in public section) + std::vector enumeratedAdapters_; + uint32_t selectedAdapterIndex_ = 0; + + // A-012: Memory budget tracking (struct defined in public section) + MemoryBudget memoryBudget_; + mutable std::mutex memoryTrackingMutex_; + + // A-010: HDR output capabilities (struct defined in public section) + HDRCapabilities hdrCapabilities_; + + // Command signatures for indirect drawing. + igl::d3d12::ComPtr drawIndirectSignature_; + igl::d3d12::ComPtr drawIndexedIndirectSignature_; + + // Descriptor heap manager for headless contexts (unit tests) + DescriptorHeapManager* ownedHeapMgr_ = nullptr; // Owned manager for windowed contexts (raw ptr, manually deleted) + DescriptorHeapManager* heapMgr_ = nullptr; // non-owning; points to ownedHeapMgr_ or external (headless) + + // Per-frame synchronization for CPU/GPU parallelism + std::vector frameContexts_; // Sized to swapchainBufferCount_ + UINT currentFrameIndex_ = 0; + + // Global synchronization + igl::d3d12::ComPtr fence_; + UINT64 fenceValue_ = 0; + + uint32_t width_ = 0; + uint32_t height_ = 0; + + // Configuration for customizable sizes. + D3D12ContextConfig config_; + + // Resource tracking (static for global tracking across all contexts) + struct ResourceStats { + size_t totalBuffersCreated = 0; + size_t totalBuffersDestroyed = 0; + size_t totalTexturesCreated = 0; + size_t totalTexturesDestroyed = 0; + size_t totalSRVsCreated = 0; + size_t totalSamplersCreated = 0; + size_t bufferMemoryBytes = 0; + size_t textureMemoryBytes = 0; + }; + static ResourceStats resourceStats_; + static std::mutex resourceStatsMutex_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12DeviceCapabilities.cpp b/src/igl/d3d12/D3D12DeviceCapabilities.cpp new file mode 100644 index 0000000000..cccd98eec1 --- /dev/null +++ b/src/igl/d3d12/D3D12DeviceCapabilities.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include + +namespace igl::d3d12 { + +void D3D12DeviceCapabilities::initialize(D3D12Context& ctx) { + validateDeviceLimits(ctx); +} + +void D3D12DeviceCapabilities::validateDeviceLimits(D3D12Context& ctx) { + auto* device = ctx.getDevice(); + if (!device) { + IGL_LOG_ERROR("D3D12DeviceCapabilities::validateDeviceLimits: D3D12 device is null\n"); + return; + } + + IGL_D3D12_LOG_VERBOSE("=== D3D12 Device Capabilities and Limits Validation ===\n"); + + // Query D3D12_FEATURE_D3D12_OPTIONS for resource binding tier and other capabilities + HRESULT hr = + device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &deviceOptions_, sizeof(deviceOptions_)); + + if (SUCCEEDED(hr)) { + // Log resource binding tier + const char* tierName = "Unknown"; + switch (deviceOptions_.ResourceBindingTier) { + case D3D12_RESOURCE_BINDING_TIER_1: + tierName = "Tier 1 (bounded descriptors required)"; + break; + case D3D12_RESOURCE_BINDING_TIER_2: + tierName = "Tier 2 (unbounded arrays except samplers)"; + break; + case D3D12_RESOURCE_BINDING_TIER_3: + tierName = "Tier 3 (fully unbounded)"; + break; + } + IGL_D3D12_LOG_VERBOSE(" Resource Binding Tier: %s\n", tierName); + + // Log other relevant capabilities + IGL_D3D12_LOG_VERBOSE(" Standard Swizzle 64KB Supported: %s\n", + deviceOptions_.StandardSwizzle64KBSupported ? "Yes" : "No"); + IGL_D3D12_LOG_VERBOSE(" Cross-Node Sharing Tier: %d\n", deviceOptions_.CrossNodeSharingTier); + IGL_D3D12_LOG_VERBOSE(" Conservative Rasterization Tier: %d\n", + deviceOptions_.ConservativeRasterizationTier); + } else { + IGL_LOG_ERROR( + " Failed to query D3D12_FEATURE_D3D12_OPTIONS (HRESULT: 0x%08X)\n", hr); + } + + // Query D3D12_FEATURE_D3D12_OPTIONS1 for root signature version + hr = device->CheckFeatureSupport( + D3D12_FEATURE_D3D12_OPTIONS1, &deviceOptions1_, sizeof(deviceOptions1_)); + + if (SUCCEEDED(hr)) { + IGL_D3D12_LOG_VERBOSE(" Wave Intrinsics Supported: %s\n", + deviceOptions1_.WaveOps ? "Yes" : "No"); + IGL_D3D12_LOG_VERBOSE(" Wave Lane Count Min: %u\n", deviceOptions1_.WaveLaneCountMin); + IGL_D3D12_LOG_VERBOSE(" Wave Lane Count Max: %u\n", deviceOptions1_.WaveLaneCountMax); + IGL_D3D12_LOG_VERBOSE(" Total Lane Count: %u\n", deviceOptions1_.TotalLaneCount); + } else { + IGL_D3D12_LOG_VERBOSE( + " D3D12_FEATURE_D3D12_OPTIONS1 query failed (not critical)\n"); + } + + // The rest of the original validation logic lives in Device::getFeatureLimits() + // and related capability queries, so no additional checks are needed here. +} + +} // namespace igl::d3d12 + diff --git a/src/igl/d3d12/D3D12DeviceCapabilities.h b/src/igl/d3d12/D3D12DeviceCapabilities.h new file mode 100644 index 0000000000..338bc8f551 --- /dev/null +++ b/src/igl/d3d12/D3D12DeviceCapabilities.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace igl::d3d12 { + +class D3D12Context; + +class D3D12DeviceCapabilities { + public: + void initialize(D3D12Context& ctx); + + [[nodiscard]] const D3D12_FEATURE_DATA_D3D12_OPTIONS& getOptions() const { + return deviceOptions_; + } + + [[nodiscard]] const D3D12_FEATURE_DATA_D3D12_OPTIONS1& getOptions1() const { + return deviceOptions1_; + } + + [[nodiscard]] D3D12_RESOURCE_BINDING_TIER getResourceBindingTier() const { + return deviceOptions_.ResourceBindingTier; + } + + private: + void validateDeviceLimits(D3D12Context& ctx); + + D3D12_FEATURE_DATA_D3D12_OPTIONS deviceOptions_ = {}; + D3D12_FEATURE_DATA_D3D12_OPTIONS1 deviceOptions1_ = {}; +}; + +} // namespace igl::d3d12 + diff --git a/src/igl/d3d12/D3D12FenceWaiter.cpp b/src/igl/d3d12/D3D12FenceWaiter.cpp new file mode 100644 index 0000000000..8c018e088b --- /dev/null +++ b/src/igl/d3d12/D3D12FenceWaiter.cpp @@ -0,0 +1,121 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace igl::d3d12 { + +FenceWaiter::FenceWaiter(ID3D12Fence* fence, UINT64 targetValue) + : fence_(fence), targetValue_(targetValue) { + if (!fence_) { + IGL_LOG_ERROR("FenceWaiter: null fence provided\n"); + setupErrorCode_ = Result::Code::ArgumentNull; + setupErrorMessage_ = "Null fence provided to FenceWaiter"; + return; + } + + event_ = CreateEvent(nullptr, FALSE, FALSE, nullptr); + if (!event_) { + const DWORD lastError = GetLastError(); + IGL_LOG_ERROR("FenceWaiter: Failed to create event handle (LastError=0x%08X)\n", lastError); + setupErrorCode_ = Result::Code::InvalidOperation; + char buf[128]; + snprintf(buf, sizeof(buf), "CreateEvent failed (OS error 0x%08X)", lastError); + setupErrorMessage_ = buf; + return; + } + + HRESULT hr = fence_->SetEventOnCompletion(targetValue_, event_); + if (FAILED(hr)) { + IGL_LOG_ERROR("FenceWaiter: SetEventOnCompletion failed: 0x%08X\n", static_cast(hr)); + CloseHandle(event_); + event_ = nullptr; + setupErrorCode_ = Result::Code::InvalidOperation; + char buf[128]; + snprintf(buf, sizeof(buf), "SetEventOnCompletion failed (HRESULT=0x%08X)", static_cast(hr)); + setupErrorMessage_ = buf; + return; + } + + setupSucceeded_ = true; +} + +FenceWaiter::~FenceWaiter() { + if (event_) { + CloseHandle(event_); + } +} + +bool FenceWaiter::isComplete() const { + return fence_ && fence_->GetCompletedValue() >= targetValue_; +} + +Result FenceWaiter::wait(DWORD timeoutMs) { + // Check if setup succeeded (constructor completed event creation and SetEventOnCompletion) + if (!setupSucceeded_ || !event_) { + return Result(setupErrorCode_, setupErrorMessage_); + } + + // D-003: Re-check fence after SetEventOnCompletion to avoid TOCTOU race + if (isComplete()) { + return Result(); // Already complete, no wait needed + } + + DWORD waitResult = WaitForSingleObject(event_, timeoutMs); + + if (waitResult == WAIT_OBJECT_0) { + // Verify fence actually reached target value + UINT64 completedValue = fence_->GetCompletedValue(); + if (completedValue < targetValue_) { + IGL_LOG_ERROR("FenceWaiter: Wait returned but fence incomplete (expected=%llu, got=%llu)\n", + targetValue_, completedValue); + + // CRITICAL: This indicates a GPU/driver issue (event signaled but fence not updated) + // For INFINITE timeout, try bounded recovery; otherwise honor the timeout contract + if (timeoutMs == INFINITE) { + // Bounded spin as last resort for INFINITE waits only + const int maxSpins = 10000; + int spins = 0; + for (; spins < maxSpins && fence_->GetCompletedValue() < targetValue_; ++spins) { + Sleep(1); + } + + if (fence_->GetCompletedValue() >= targetValue_) { + IGL_D3D12_LOG_VERBOSE("FenceWaiter: Fence completed after %d recovery spins\n", spins); + return Result(); // Success after recovery + } + + IGL_LOG_ERROR("FenceWaiter: Fence still incomplete after %d bounded spins\n", maxSpins); + } + + // Honor timeout contract: event signaled but fence incomplete = failure + return Result(Result::Code::RuntimeError, + "Fence incomplete after wait (possible GPU hang or driver issue)"); + } + return Result(); // Success + } else if (waitResult == WAIT_TIMEOUT) { + const UINT64 completedValue = fence_ ? fence_->GetCompletedValue() : 0; + IGL_LOG_ERROR("FenceWaiter: Timeout waiting for fence %llu (completed=%llu)\n", + targetValue_, completedValue); + return Result(Result::Code::RuntimeError, + "Fence wait timed out (possible GPU hang)"); + } else { + const DWORD lastError = GetLastError(); + IGL_LOG_ERROR("FenceWaiter: Wait failed with result 0x%08X (LastError=0x%08X)\n", + waitResult, lastError); + char buf[128]; + snprintf(buf, sizeof(buf), "WaitForSingleObject failed (result=0x%08X, OS error=0x%08X)", + waitResult, lastError); + return Result(Result::Code::RuntimeError, buf); + } +} + +bool FenceWaiter::isTimeoutError(const Result& result) { + return !result.isOk() && result.message.find("timed out") != std::string::npos; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12FenceWaiter.h b/src/igl/d3d12/D3D12FenceWaiter.h new file mode 100644 index 0000000000..cb0c5cf3a1 --- /dev/null +++ b/src/igl/d3d12/D3D12FenceWaiter.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace igl::d3d12 { + +/** + * @brief RAII helper for D3D12 fence waiting operations + * + * Manages event creation, SetEventOnCompletion, and proper cleanup. + * Eliminates TOCTOU races by rechecking fence after SetEventOnCompletion. + * + * IMPORTANT: The fence pointer must remain valid for the lifetime of the FenceWaiter. + * Typical usage is with fences owned by long-lived context objects. + * + * Usage: + * FenceWaiter waiter(fence, targetValue); + * Result result = waiter.wait(timeoutMs); + * if (!result.isOk()) { + * // Handle specific error (timeout, setup failure, etc.) + * } + */ +class FenceWaiter final { + public: + FenceWaiter(ID3D12Fence* fence, UINT64 targetValue); + ~FenceWaiter(); + + // Delete copy/move to ensure single ownership of event handle + FenceWaiter(const FenceWaiter&) = delete; + FenceWaiter& operator=(const FenceWaiter&) = delete; + FenceWaiter(FenceWaiter&&) = delete; + FenceWaiter& operator=(FenceWaiter&&) = delete; + + /** + * @brief Wait for fence to reach target value with timeout + * @param timeoutMs Timeout in milliseconds (INFINITE for no timeout) + * @return Result with specific error code and message on failure: + * - ArgumentNull: Null fence provided to constructor + * - InvalidOperation: Event creation or SetEventOnCompletion failed + * - RuntimeError: Wait timed out (use isTimeoutError() to detect) + * - RuntimeError: Wait failed or fence incomplete after event signaled + */ + Result wait(DWORD timeoutMs = INFINITE); + + /** + * @brief Check if fence already reached target without waiting + */ + bool isComplete() const; + + /** + * @brief Check if a Result represents a timeout error + * @param result The Result to check + * @return true if the result indicates a timeout, false otherwise + */ + static bool isTimeoutError(const Result& result); + + private: + ID3D12Fence* fence_; + UINT64 targetValue_; + HANDLE event_ = nullptr; + bool setupSucceeded_ = false; + Result::Code setupErrorCode_ = Result::Code::Ok; + std::string setupErrorMessage_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12FrameManager.cpp b/src/igl/d3d12/D3D12FrameManager.cpp new file mode 100644 index 0000000000..098a8e2f14 --- /dev/null +++ b/src/igl/d3d12/D3D12FrameManager.cpp @@ -0,0 +1,242 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +namespace igl::d3d12 { + +void FrameManager::advanceFrame(UINT64 currentFenceValue) { + // Calculate next frame index + const uint32_t bufferCount = context_.getSwapchainBufferCount(); + const uint32_t nextFrameIndex = (context_.getCurrentFrameIndex() + 1) % bufferCount; + + // STEP 1: Pipeline overload protection + waitForPipelineSync(currentFenceValue); + + // STEP 2: Wait for next frame's resources to be available + if (!waitForFrame(nextFrameIndex)) { + IGL_LOG_ERROR("FrameManager: Skipping frame advancement due to fence wait failure\n"); + return; + } + + // STEP 3: Advance to next frame + context_.getCurrentFrameIndex() = nextFrameIndex; +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("FrameManager: Advanced to frame index %u\n", nextFrameIndex); +#endif + + // STEP 4: Reset allocator safely + resetAllocator(nextFrameIndex); + + // STEP 5: Clear transient resources + clearTransientResources(nextFrameIndex); + + // STEP 6: Reset descriptor counters + resetDescriptorCounters(nextFrameIndex); +} + +void FrameManager::waitForPipelineSync(UINT64 currentFenceValue) { + auto* fence = context_.getFence(); + + // Ensure we don't have more frames in flight than swapchain buffers + const uint32_t bufferCount = context_.getSwapchainBufferCount(); + const UINT64 minimumSafeFence = (currentFenceValue >= bufferCount) + ? (currentFenceValue - (bufferCount - 1)) + : 0; + + const UINT64 currentCompletedValue = fence->GetCompletedValue(); + if (currentCompletedValue < minimumSafeFence) { +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("FrameManager: SAFETY WAIT - Pipeline overload protection (completed=%llu, need=%llu)\n", + currentCompletedValue, minimumSafeFence); +#endif + + FenceWaiter waiter(fence, minimumSafeFence); + Result waitResult = waiter.wait(INFINITE); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("FrameManager: CRITICAL - Pipeline safety wait failed: %s; continuing but overload protection compromised\n", + waitResult.message.c_str()); + // Continue anyway - this is a safety net, not a hard requirement + // But future work should consider aborting here as well + } +#ifdef IGL_DEBUG + else { + IGL_D3D12_LOG_VERBOSE("FrameManager: Safety wait completed (fence now=%llu)\n", + fence->GetCompletedValue()); + } +#endif + } +} + +bool FrameManager::waitForFrame(uint32_t frameIndex) { + auto* fence = context_.getFence(); + const UINT64 frameFence = context_.getFrameContexts()[frameIndex].fenceValue; + + if (frameFence != 0 && fence->GetCompletedValue() < frameFence) { +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("FrameManager: Waiting for frame %u (fence=%llu, current=%llu)\n", + frameIndex, frameFence, fence->GetCompletedValue()); +#endif + + FenceWaiter waiter(fence, frameFence); + + // Try with 5-second timeout first (handles window drag scenarios) + Result waitResult = waiter.wait(5000); + if (!waitResult.isOk()) { + // Check if it's a timeout or other error + if (FenceWaiter::isTimeoutError(waitResult)) { + IGL_LOG_ERROR("FrameManager: Wait for frame %u fence %llu timed out after 5s; forcing infinite wait\n", + frameIndex, frameFence); + } else { + IGL_LOG_ERROR("FrameManager: Wait for frame %u fence %llu failed: %s; forcing infinite wait\n", + frameIndex, frameFence, waitResult.message.c_str()); + } + // Fall back to infinite wait + waitResult = waiter.wait(INFINITE); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("FrameManager: CRITICAL - Infinite wait for frame %u failed: %s; aborting frame advancement\n", + frameIndex, waitResult.message.c_str()); + return false; // Abort frame advancement - unsafe to proceed + } + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("FrameManager: Frame %u resources now available (completed=%llu)\n", + frameIndex, fence->GetCompletedValue()); +#endif + } else { +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("FrameManager: Frame %u resources already available (fence=%llu, completed=%llu)\n", + frameIndex, frameFence, fence->GetCompletedValue()); +#endif + } + return true; +} + +void FrameManager::resetAllocator(uint32_t frameIndex) { + auto* fence = context_.getFence(); + auto& frame = context_.getFrameContexts()[frameIndex]; + auto* allocator = frame.allocator.Get(); + + const UINT64 allocatorFence = frame.maxAllocatorFence; + + if (allocatorFence == 0) { + // First time using this allocator + HRESULT hr = allocator->Reset(); + if (FAILED(hr)) { + IGL_LOG_ERROR("FrameManager: Failed to reset frame %u allocator: 0x%08X\n", + frameIndex, static_cast(hr)); + } + } else { + // Verify GPU completed all command lists using this allocator + const UINT64 completedValue = fence->GetCompletedValue(); + + if (completedValue < allocatorFence) { + IGL_LOG_ERROR("FrameManager: ALLOCATOR SYNC ISSUE - GPU not done with all command lists " + "(completed=%llu, need=%llu, cmdBufCount=%u). Waiting...\n", + completedValue, allocatorFence, frame.commandBufferCount); + + FenceWaiter waiter(fence, allocatorFence); + Result waitResult = waiter.wait(INFINITE); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("FrameManager: CRITICAL - Allocator wait failed: %s; skipping unsafe allocator reset for frame %u\n", + waitResult.message.c_str(), frameIndex); + // Do not reset allocator if GPU hasn't completed - would cause sync violations + return; + } + IGL_D3D12_LOG_VERBOSE("FrameManager: Allocator wait completed (fence now=%llu)\n", + fence->GetCompletedValue()); + } + + // Reset allocator (safe now - GPU has completed all command lists) + HRESULT hr = allocator->Reset(); + if (FAILED(hr)) { + IGL_LOG_ERROR("FrameManager: Failed to reset frame %u allocator: 0x%08X " + "(maxFence=%llu, completed=%llu, cmdBufCount=%u)\n", + frameIndex, static_cast(hr), + allocatorFence, fence->GetCompletedValue(), + frame.commandBufferCount); + } else { +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("FrameManager: Reset frame %u allocator (waited for %u command buffers, maxFence=%llu)\n", + frameIndex, frame.commandBufferCount, allocatorFence); +#endif + } + +#ifdef _DEBUG + if (SUCCEEDED(hr)) { + const UINT64 currentCompleted = fence->GetCompletedValue(); + IGL_DEBUG_ASSERT(currentCompleted >= allocatorFence, + "Allocator reset before GPU completed all command lists!"); + } +#endif + } + + // Reset frame tracking + frame.fenceValue = 0; + frame.maxAllocatorFence = 0; + frame.commandBufferCount = 0; +} + +void FrameManager::clearTransientResources(uint32_t frameIndex) { + auto& frame = context_.getFrameContexts()[frameIndex]; + + if (!frame.transientBuffers.empty()) { +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("FrameManager: Clearing %zu transient buffers from frame %u (high-water=%zu)\n", + frame.transientBuffers.size(), frameIndex, frame.transientBuffersHighWater); +#endif + frame.transientBuffers.clear(); + } + + if (!frame.transientResources.empty()) { +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("FrameManager: Releasing %zu transient D3D resources from frame %u (high-water=%zu)\n", + frame.transientResources.size(), frameIndex, frame.transientResourcesHighWater); +#endif + frame.transientResources.clear(); + } +} + +void FrameManager::resetDescriptorCounters(uint32_t frameIndex) { + auto& frame = context_.getFrameContexts()[frameIndex]; + + const uint32_t cbvSrvUavUsage = frame.nextCbvSrvUavDescriptor; + const uint32_t samplerUsage = frame.nextSamplerDescriptor; + const uint32_t peakCbvSrvUav = frame.peakCbvSrvUavUsage; + const uint32_t peakSampler = frame.peakSamplerUsage; + + if (cbvSrvUavUsage > 0 || samplerUsage > 0) { +#ifdef IGL_DEBUG + const float cbvSrvUavPercent = (static_cast(cbvSrvUavUsage) / kCbvSrvUavHeapSize) * 100.0f; + const float samplerPercent = (static_cast(samplerUsage) / kSamplerHeapSize) * 100.0f; + const float peakCbvSrvUavPercent = (static_cast(peakCbvSrvUav) / kCbvSrvUavHeapSize) * 100.0f; + const float peakSamplerPercent = (static_cast(peakSampler) / kSamplerHeapSize) * 100.0f; + + IGL_D3D12_LOG_VERBOSE("FrameManager: Frame %u descriptor usage:\n" + " CBV/SRV/UAV: final=%u/%u (%.1f%%), peak=%u/%u (%.1f%%)\n" + " Samplers: final=%u/%u (%.1f%%), peak=%u/%u (%.1f%%)\n", + frameIndex, + cbvSrvUavUsage, kCbvSrvUavHeapSize, cbvSrvUavPercent, + peakCbvSrvUav, kCbvSrvUavHeapSize, peakCbvSrvUavPercent, + samplerUsage, kSamplerHeapSize, samplerPercent, + peakSampler, kSamplerHeapSize, peakSamplerPercent); +#endif + } + + // Reset counters + frame.nextCbvSrvUavDescriptor = 0; + frame.nextSamplerDescriptor = 0; + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("FrameManager: Reset descriptor counters for frame %u to 0\n", frameIndex); +#endif +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12FrameManager.h b/src/igl/d3d12/D3D12FrameManager.h new file mode 100644 index 0000000000..e416b8987b --- /dev/null +++ b/src/igl/d3d12/D3D12FrameManager.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace igl::d3d12 { + +class D3D12Context; + +/** + * @brief Manages frame advancement, fence waiting, and resource cleanup + * + * Centralizes the complex logic for: + * - Waiting for next frame's resources to become available + * - Pipeline overload protection (ensuring max frames in flight) + * - Safe command allocator reset after GPU completion + * - Transient resource cleanup + * - Descriptor heap reset + */ +class FrameManager final { + public: + explicit FrameManager(D3D12Context& context) : context_(context) {} + + /** + * @brief Advance to next frame with proper synchronization + * + * Handles: + * 1. Calculate next frame index + * 2. Wait for pipeline overload protection + * 3. Wait for next frame's resources + * 4. Update frame index + * 5. Reset allocator safely + * 6. Clear transient resources + * 7. Reset descriptor counters + * + * @param currentFenceValue The fence value just signaled + */ + void advanceFrame(UINT64 currentFenceValue); + + private: + /** + * @brief Wait for pipeline to avoid overload (max frames in flight) + */ + void waitForPipelineSync(UINT64 currentFenceValue); + + /** + * @brief Wait for specific frame's resources to become available + * @return true if wait succeeded, false if catastrophic wait failure + */ + bool waitForFrame(uint32_t frameIndex); + + /** + * @brief Safely reset command allocator after GPU completion + */ + void resetAllocator(uint32_t frameIndex); + + /** + * @brief Clear transient resources from completed frame + */ + void clearTransientResources(uint32_t frameIndex); + + /** + * @brief Log and reset descriptor usage counters + */ + void resetDescriptorCounters(uint32_t frameIndex); + + D3D12Context& context_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12Headers.h b/src/igl/d3d12/D3D12Headers.h new file mode 100644 index 0000000000..10e3b90f9f --- /dev/null +++ b/src/igl/d3d12/D3D12Headers.h @@ -0,0 +1,230 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#ifndef IGL_D3D12_D3D12HEADERS_H +#define IGL_D3D12_D3D12HEADERS_H + +// Windows headers +#ifndef NOMINMAX +#define NOMINMAX +#endif +// Don't use WIN32_LEAN_AND_MEAN - it excludes wrl/client.h +#include + +// DirectX 12 headers +#include +#include + +// DirectX Shader Compiler +#include +#include // For D3DCompile (legacy HLSL compiler) + +// D3DX12 helper library (header-only) +// Disable buggy helper classes that have preprocessor issues or require newer SDK +#define D3DX12_NO_STATE_OBJECT_HELPERS +#define D3DX12_NO_CHECK_FEATURE_SUPPORT_CLASS + +// Manually include only the d3dx12 headers we need (excluding incompatible ones) +#include +#include +#include +#include +#include +#include +// Excluded: d3dx12_resource_helpers.h (requires property_format_table.h which needs newer SDK) +// Excluded: d3dx12_property_format_table.h (requires newer SDK) +#include + +// ComPtr for COM object management +// IGL's minimal ComPtr-like smart pointer implementing the subset of Microsoft::WRL::ComPtr +// used by the D3D12 backend. We keep it custom to avoid preprocessor issues with . +// +// WARNING: Do not include before this header in the same translation unit. +// Use igl::d3d12::ComPtr instead of Microsoft::WRL::ComPtr throughout the D3D12 backend. +// (Including after this header is technically safe but unsupported.) +// +// Supported operations: Get, GetAddressOf, ReleaseAndGetAddressOf, Reset, Attach, Detach, +// As, TryAs, CopyTo, comparison operators, bool conversion, move/copy semantics. +// Operations NOT implemented: ComPtrRef, operator&, CopyTo(REFIID, void**), and other WRL internals. + +// Compile-time guard: fail if was already included before this header +#if defined(__WRL_CLIENT_H__) || defined(_WRL_CLIENT_H_) +#error "D3D12Headers.h must be included before . The D3D12 backend uses igl::d3d12::ComPtr exclusively." +#endif + +namespace igl { +namespace d3d12 { + template + class ComPtr { + public: + ComPtr() noexcept : ptr_(nullptr) {} + ComPtr(T* ptr) noexcept : ptr_(ptr) { if (ptr_) ptr_->AddRef(); } + + // Copy constructor - AddRef the pointer + ComPtr(const ComPtr& other) noexcept : ptr_(other.ptr_) { + if (ptr_) ptr_->AddRef(); + } + + // Copy assignment - AddRef new, Release old + ComPtr& operator=(const ComPtr& other) noexcept { + if (this != &other) { + if (other.ptr_) other.ptr_->AddRef(); + if (ptr_) ptr_->Release(); + ptr_ = other.ptr_; + } + return *this; + } + + // Move constructor + ComPtr(ComPtr&& other) noexcept : ptr_(other.ptr_) { other.ptr_ = nullptr; } + + // Move assignment + ComPtr& operator=(ComPtr&& other) noexcept { + if (this != &other) { + if (ptr_) ptr_->Release(); + ptr_ = other.ptr_; + other.ptr_ = nullptr; + } + return *this; + } + + // Destructor + ~ComPtr() { if (ptr_) ptr_->Release(); } + + // Accessor methods + T* Get() const noexcept { return ptr_; } + T** GetAddressOf() noexcept { return &ptr_; } + T* operator->() const noexcept { return ptr_; } + + // Dereference operator (caller must ensure ptr_ != nullptr) + T& operator*() const noexcept { return *ptr_; } + + // Comparison operators (operator< compares raw addresses for use in containers) + bool operator==(const ComPtr& other) const noexcept { + return ptr_ == other.ptr_; + } + + bool operator!=(const ComPtr& other) const noexcept { + return ptr_ != other.ptr_; + } + + bool operator<(const ComPtr& other) const noexcept { + return ptr_ < other.ptr_; + } + + bool operator==(T* other) const noexcept { + return ptr_ == other; + } + + bool operator!=(T* other) const noexcept { + return ptr_ != other; + } + + // Boolean conversion for nullptr checks + explicit operator bool() const noexcept { + return ptr_ != nullptr; + } + + // Reset to release current pointer + void Reset() noexcept { + if (ptr_) { + ptr_->Release(); + ptr_ = nullptr; + } + } + + // Attach a raw pointer without AddRef + void Attach(T* ptr) noexcept { + if (ptr_) { + ptr_->Release(); + } + ptr_ = ptr; + } + + // Detach and return raw pointer without Release + T* Detach() noexcept { + T* temp = ptr_; + ptr_ = nullptr; + return temp; + } + + // ReleaseAndGetAddressOf - release current and return address for output + T** ReleaseAndGetAddressOf() noexcept { + Reset(); + return &ptr_; + } + + // As() - QueryInterface to another interface type + // Note: Unlike WRL's ComPtr::As, this implementation adds null-safety: + // - Returns E_POINTER if 'other' is null + // - Returns E_FAIL and resets 'other' if this->ptr_ is null (no object to query) + // - Otherwise returns HRESULT from QueryInterface (S_OK or E_NOINTERFACE typically) + // WRL assumes non-null and relies only on QueryInterface return value. + // Rationale: Explicit null checks give more predictable behavior when pointers may be null. + // Callers should treat any non-S_OK result uniformly as "interface query failed". + template + HRESULT As(ComPtr* other) const noexcept { + if (!other) { + return E_POINTER; + } + if (!ptr_) { + other->Reset(); + return E_FAIL; // No object to query + } + return ptr_->QueryInterface(__uuidof(U), reinterpret_cast(other->ReleaseAndGetAddressOf())); + } + + // TryAs() - QueryInterface convenience method that returns ComPtr + // WARNING: Silently drops HRESULT; returns empty ComPtr on failure. + // For error-sensitive code, prefer the HRESULT-returning As(ComPtr* other) overload. + // Use case: Optional interface queries where failure is expected/acceptable and doesn't need diagnosis. + // In code paths that return igl::Result or log errors, prefer As() so you can propagate the HRESULT. + template + ComPtr TryAs() const noexcept { + ComPtr result; + if (ptr_) { + ptr_->QueryInterface(__uuidof(U), reinterpret_cast(result.ReleaseAndGetAddressOf())); + } + return result; + } + + // CopyTo - Copy pointer with AddRef + // Note: Returns S_OK even if ptr_ is null; *other will be set to nullptr (matches WRL) + HRESULT CopyTo(T** other) const noexcept { + if (!other) { + return E_POINTER; + } + *other = ptr_; + if (ptr_) { + ptr_->AddRef(); + } + return S_OK; + } + + private: + T* ptr_; + }; +} // namespace d3d12 +} // namespace igl + +// For convenience in D3D12 implementation files, you may add a local using declaration: +// namespace { template using ComPtr = igl::d3d12::ComPtr; } +// This reduces verbosity without polluting the global or igl::d3d12 namespace. + +namespace Microsoft { +namespace WRL { + // DO NOT define ComPtr here - it conflicts with + // All D3D12 code should use igl::d3d12::ComPtr directly +} // namespace WRL +} // namespace Microsoft + +// Note: Library linking is handled by CMake (see src/igl/d3d12/CMakeLists.txt) +// Required libraries: d3d12.lib, dxgi.lib, dxguid.lib, dxcompiler.lib, d3dcompiler.lib + +#endif // IGL_D3D12_D3D12HEADERS_H diff --git a/src/igl/d3d12/D3D12ImmediateCommands.cpp b/src/igl/d3d12/D3D12ImmediateCommands.cpp new file mode 100644 index 0000000000..266b02c236 --- /dev/null +++ b/src/igl/d3d12/D3D12ImmediateCommands.cpp @@ -0,0 +1,205 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +namespace igl::d3d12 { + +D3D12ImmediateCommands::D3D12ImmediateCommands(ID3D12Device* device, + ID3D12CommandQueue* queue, + ID3D12Fence* fence, + IFenceProvider* fenceProvider) + : device_(device), queue_(queue), fence_(fence), fenceProvider_(fenceProvider) { + IGL_DEBUG_ASSERT(device_); + IGL_DEBUG_ASSERT(queue_); + IGL_DEBUG_ASSERT(fence_); + IGL_DEBUG_ASSERT(fenceProvider_); + + IGL_D3D12_LOG_VERBOSE("D3D12ImmediateCommands: Initialized (using shared fence timeline)\n"); +} + +D3D12ImmediateCommands::~D3D12ImmediateCommands() { + // Wait for all in-flight operations to complete + if (fence_) { + for (const auto& entry : inFlightAllocators_) { + if (fence_->GetCompletedValue() < entry.fenceValue) { + FenceWaiter waiter(fence_, entry.fenceValue); + Result waitResult = waiter.wait(); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("D3D12ImmediateCommands::~D3D12ImmediateCommands() - Fence wait failed during cleanup: %s\n", + waitResult.message.c_str()); + } + } + } + } + + IGL_D3D12_LOG_VERBOSE("D3D12ImmediateCommands: Destroyed\n"); +} + +ID3D12GraphicsCommandList* D3D12ImmediateCommands::begin(Result* outResult) { + std::lock_guard lock(poolMutex_); + + // Reclaim completed allocators first + reclaimCompletedAllocators(); + + // Get or create an allocator + Result result = getOrCreateAllocator(¤tAllocator_); + if (!result.isOk()) { + Result::setResult(outResult, result); + return nullptr; + } + + // Reset the allocator for reuse + HRESULT hr = currentAllocator_->Reset(); + if (FAILED(hr)) { + Result::setResult(outResult, + Result{Result::Code::RuntimeError, "Failed to reset command allocator"}); + return nullptr; + } + + // Create or reset command list + if (!cmdList_.Get()) { + hr = device_->CreateCommandList(0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + currentAllocator_.Get(), + nullptr, + IID_PPV_ARGS(cmdList_.GetAddressOf())); + if (FAILED(hr)) { + Result::setResult(outResult, + Result{Result::Code::RuntimeError, "Failed to create command list"}); + return nullptr; + } + } else { + hr = cmdList_->Reset(currentAllocator_.Get(), nullptr); + if (FAILED(hr)) { + Result::setResult(outResult, + Result{Result::Code::RuntimeError, "Failed to reset command list"}); + return nullptr; + } + } + + Result::setOk(outResult); + return cmdList_.Get(); +} + +uint64_t D3D12ImmediateCommands::submit(bool wait, Result* outResult) { + if (!cmdList_.Get()) { + Result::setResult(outResult, Result{Result::Code::RuntimeError, "No active command list"}); + return 0; + } + + // Close the command list + HRESULT hr = cmdList_->Close(); + if (FAILED(hr)) { + Result::setResult(outResult, + Result{Result::Code::RuntimeError, "Failed to close command list"}); + return 0; + } + + // Execute command list + ID3D12CommandList* lists[] = {cmdList_.Get()}; + queue_->ExecuteCommandLists(1, lists); + + // Get next fence value from shared timeline + const uint64_t fenceValue = fenceProvider_->getNextFenceValue(); + + // Signal fence on shared timeline + hr = queue_->Signal(fence_, fenceValue); + if (FAILED(hr)) { + Result::setResult(outResult, + Result{Result::Code::RuntimeError, "Failed to signal fence"}); + return 0; + } + + // Move current allocator to in-flight list + { + std::lock_guard lock(poolMutex_); + inFlightAllocators_.push_back({currentAllocator_, fenceValue}); + currentAllocator_.Reset(); + } + + // Wait if requested + if (wait) { + Result waitResult = waitForFence(fenceValue); + if (!waitResult.isOk()) { + Result::setResult(outResult, waitResult); + return 0; // Return 0 to signal failure + } + } + + Result::setOk(outResult); + return fenceValue; +} + +bool D3D12ImmediateCommands::isComplete(uint64_t fenceValue) const { + if (!fence_) { + return false; + } + return fence_->GetCompletedValue() >= fenceValue; +} + +Result D3D12ImmediateCommands::waitForFence(uint64_t fenceValue) { + if (!fence_) { + return Result{Result::Code::RuntimeError, "Fence is null"}; + } + + if (isComplete(fenceValue)) { + return Result{}; + } + + FenceWaiter waiter(fence_, fenceValue); + return waiter.wait(); // Directly return the detailed Result +} + +void D3D12ImmediateCommands::reclaimCompletedAllocators() { + // Note: Internal helper called by begin() with poolMutex_ already held + if (!fence_) { + return; + } + + const uint64_t completedValue = fence_->GetCompletedValue(); + + // Move completed allocators from in-flight to available + auto it = inFlightAllocators_.begin(); + while (it != inFlightAllocators_.end()) { + if (it->fenceValue <= completedValue) { + availableAllocators_.push_back({it->allocator, 0}); + it = inFlightAllocators_.erase(it); + } else { + ++it; + } + } +} + +Result D3D12ImmediateCommands::getOrCreateAllocator( + igl::d3d12::ComPtr* outAllocator) { + // Try to reuse an available allocator + if (!availableAllocators_.empty()) { + *outAllocator = availableAllocators_.back().allocator; + availableAllocators_.pop_back(); + return Result{}; + } + + // Create new allocator + HRESULT hr = device_->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(outAllocator->GetAddressOf())); + + if (FAILED(hr)) { + return Result{Result::Code::RuntimeError, "Failed to create command allocator"}; + } + + IGL_D3D12_LOG_VERBOSE("D3D12ImmediateCommands: Created new command allocator (pool size: %zu)\n", + availableAllocators_.size() + inFlightAllocators_.size() + 1); + + return Result{}; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12ImmediateCommands.h b/src/igl/d3d12/D3D12ImmediateCommands.h new file mode 100644 index 0000000000..4753e1a389 --- /dev/null +++ b/src/igl/d3d12/D3D12ImmediateCommands.h @@ -0,0 +1,141 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +/** + * @brief Interface for obtaining fence values from shared timeline + * + * This interface allows D3D12ImmediateCommands to participate in the + * device's shared fence timeline without managing its own counter. + */ +class IFenceProvider { +public: + virtual ~IFenceProvider() = default; + + /** + * @brief Get the next fence value from the shared timeline + * @return Monotonically increasing fence value + */ + virtual uint64_t getNextFenceValue() = 0; +}; + +/** + * @brief Centralized management of immediate copy operations + * + * Provides a pooled command allocator/list infrastructure for transient + * upload/readback operations, eliminating per-operation allocator creation + * and redundant GPU synchronization. + * + * Thread-safety: This class is NOT thread-safe for concurrent begin()/submit(). + * Only one begin()/submit() sequence may be active at a time. Multiple threads + * calling begin() concurrently will corrupt the shared command list. + * + * The allocator pool (reclaimCompletedAllocators) is internally synchronized. + * + * Inspired by Vulkan's VulkanImmediateCommands pattern. + */ +class D3D12ImmediateCommands { +public: + /** + * @brief Initialize the immediate commands infrastructure + * @param device D3D12 device for resource creation + * @param queue Command queue for submission + * @param fence Fence for completion tracking (shared with device) + * @param fenceProvider Provider for next fence values from shared timeline + */ + D3D12ImmediateCommands(ID3D12Device* device, + ID3D12CommandQueue* queue, + ID3D12Fence* fence, + IFenceProvider* fenceProvider); + + ~D3D12ImmediateCommands(); + + /** + * @brief Get command list for immediate copy operation + * + * Returns a ready-to-use command list from the pool. The command list + * is already reset and ready for recording. + * + * @param outResult Optional result for error reporting + * @return Command list ready for recording, or nullptr on failure + */ + [[nodiscard]] ID3D12GraphicsCommandList* begin(Result* outResult = nullptr); + + /** + * @brief Submit command list and optionally wait for completion + * + * Closes, submits, and signals the fence. If wait=true, blocks until + * GPU completes the work. + * + * @param wait If true, block until GPU completes + * @param outResult Optional result for error reporting + * @return Fence value that will signal when work completes (0 on failure) + */ + [[nodiscard]] uint64_t submit(bool wait, Result* outResult = nullptr); + + /** + * @brief Check if a fence value has completed + * @param fenceValue Fence value to check + * @return true if GPU has completed this fence value + */ + [[nodiscard]] bool isComplete(uint64_t fenceValue) const; + + /** + * @brief Wait for a specific fence value to complete + * @param fenceValue Fence value to wait for + * @return Result indicating success or failure + */ + [[nodiscard]] Result waitForFence(uint64_t fenceValue); + +private: + /** + * @brief Reclaim completed command allocators back to pool + * + * Internal method called during begin() to recycle allocators. + * Must be called with poolMutex_ held. + */ + void reclaimCompletedAllocators(); + struct AllocatorEntry { + igl::d3d12::ComPtr allocator; + uint64_t fenceValue = 0; // Fence value when this allocator was last used + }; + + ID3D12Device* device_ = nullptr; + ID3D12CommandQueue* queue_ = nullptr; + ID3D12Fence* fence_ = nullptr; // Shared fence (owned by Device) + IFenceProvider* fenceProvider_ = nullptr; // Provides fence values from shared timeline + + // Current command list for recording + igl::d3d12::ComPtr cmdList_; + + // Current allocator being used + igl::d3d12::ComPtr currentAllocator_; + + // Pool of available allocators + std::vector availableAllocators_; + + // Allocators in flight (waiting for GPU) + std::vector inFlightAllocators_; + + // Mutex for thread-safe allocator pool access + std::mutex poolMutex_; + + // Get or create an allocator from the pool + [[nodiscard]] Result getOrCreateAllocator( + igl::d3d12::ComPtr* outAllocator); +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12PipelineBuilder.cpp b/src/igl/d3d12/D3D12PipelineBuilder.cpp new file mode 100644 index 0000000000..b479335479 --- /dev/null +++ b/src/igl/d3d12/D3D12PipelineBuilder.cpp @@ -0,0 +1,706 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +namespace igl::d3d12 { + +namespace { + +// Helper function to calculate root signature cost in DWORDs +uint32_t calculateRootSignatureCostInternal(const D3D12_ROOT_SIGNATURE_DESC& desc) { + uint32_t totalCost = 0; + + for (uint32_t i = 0; i < desc.NumParameters; ++i) { + const auto& param = desc.pParameters[i]; + + switch (param.ParameterType) { + case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + totalCost += param.Constants.Num32BitValues; + break; + case D3D12_ROOT_PARAMETER_TYPE_CBV: + case D3D12_ROOT_PARAMETER_TYPE_SRV: + case D3D12_ROOT_PARAMETER_TYPE_UAV: + totalCost += 2; // Root descriptors cost 2 DWORDs + break; + case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + totalCost += 1; // Descriptor tables cost 1 DWORD + break; + } + } + + return totalCost; +} + +// Helper to convert IGL blend factor to D3D12 +D3D12_BLEND toD3D12Blend(BlendFactor f) { + switch (f) { + case BlendFactor::Zero: return D3D12_BLEND_ZERO; + case BlendFactor::One: return D3D12_BLEND_ONE; + case BlendFactor::SrcColor: return D3D12_BLEND_SRC_COLOR; + case BlendFactor::OneMinusSrcColor: return D3D12_BLEND_INV_SRC_COLOR; + case BlendFactor::SrcAlpha: return D3D12_BLEND_SRC_ALPHA; + case BlendFactor::OneMinusSrcAlpha: return D3D12_BLEND_INV_SRC_ALPHA; + case BlendFactor::DstColor: return D3D12_BLEND_DEST_COLOR; + case BlendFactor::OneMinusDstColor: return D3D12_BLEND_INV_DEST_COLOR; + case BlendFactor::DstAlpha: return D3D12_BLEND_DEST_ALPHA; + case BlendFactor::OneMinusDstAlpha: return D3D12_BLEND_INV_DEST_ALPHA; + case BlendFactor::SrcAlphaSaturated: return D3D12_BLEND_SRC_ALPHA_SAT; + case BlendFactor::BlendColor: return D3D12_BLEND_BLEND_FACTOR; + case BlendFactor::OneMinusBlendColor: return D3D12_BLEND_INV_BLEND_FACTOR; + case BlendFactor::BlendAlpha: return D3D12_BLEND_BLEND_FACTOR; + case BlendFactor::OneMinusBlendAlpha: return D3D12_BLEND_INV_BLEND_FACTOR; + case BlendFactor::Src1Color: return D3D12_BLEND_SRC1_COLOR; + case BlendFactor::OneMinusSrc1Color: return D3D12_BLEND_INV_SRC1_COLOR; + case BlendFactor::Src1Alpha: return D3D12_BLEND_SRC1_ALPHA; + case BlendFactor::OneMinusSrc1Alpha: return D3D12_BLEND_INV_SRC1_ALPHA; + default: return D3D12_BLEND_ONE; + } +} + +// Helper to convert IGL blend operation to D3D12 +D3D12_BLEND_OP toD3D12BlendOp(BlendOp op) { + switch (op) { + case BlendOp::Add: return D3D12_BLEND_OP_ADD; + case BlendOp::Subtract: return D3D12_BLEND_OP_SUBTRACT; + case BlendOp::ReverseSubtract: return D3D12_BLEND_OP_REV_SUBTRACT; + case BlendOp::Min: return D3D12_BLEND_OP_MIN; + case BlendOp::Max: return D3D12_BLEND_OP_MAX; + default: return D3D12_BLEND_OP_ADD; + } +} + +} // anonymous namespace + +//============================================================================= +// D3D12GraphicsPipelineBuilder +//============================================================================= + +D3D12GraphicsPipelineBuilder::D3D12GraphicsPipelineBuilder() { + // Zero-initialize the descriptor + psoDesc_ = {}; + + // Set sensible defaults for rasterizer state + psoDesc_.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + psoDesc_.RasterizerState.CullMode = D3D12_CULL_MODE_BACK; + psoDesc_.RasterizerState.FrontCounterClockwise = FALSE; + psoDesc_.RasterizerState.DepthBias = 0; + psoDesc_.RasterizerState.DepthBiasClamp = 0.0f; + psoDesc_.RasterizerState.SlopeScaledDepthBias = 0.0f; + psoDesc_.RasterizerState.DepthClipEnable = TRUE; + psoDesc_.RasterizerState.MultisampleEnable = FALSE; + psoDesc_.RasterizerState.AntialiasedLineEnable = FALSE; + psoDesc_.RasterizerState.ForcedSampleCount = 0; + psoDesc_.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + + // Set sensible defaults for blend state + psoDesc_.BlendState.AlphaToCoverageEnable = FALSE; + psoDesc_.BlendState.IndependentBlendEnable = FALSE; + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) { + psoDesc_.BlendState.RenderTarget[i].BlendEnable = FALSE; + psoDesc_.BlendState.RenderTarget[i].LogicOpEnable = FALSE; + psoDesc_.BlendState.RenderTarget[i].SrcBlend = D3D12_BLEND_ONE; + psoDesc_.BlendState.RenderTarget[i].DestBlend = D3D12_BLEND_ZERO; + psoDesc_.BlendState.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD; + psoDesc_.BlendState.RenderTarget[i].SrcBlendAlpha = D3D12_BLEND_ONE; + psoDesc_.BlendState.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_ZERO; + psoDesc_.BlendState.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD; + psoDesc_.BlendState.RenderTarget[i].LogicOp = D3D12_LOGIC_OP_NOOP; + psoDesc_.BlendState.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + } + + // Set sensible defaults for depth-stencil state + psoDesc_.DepthStencilState.DepthEnable = FALSE; + psoDesc_.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + psoDesc_.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; + psoDesc_.DepthStencilState.StencilEnable = FALSE; + psoDesc_.DepthStencilState.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + psoDesc_.DepthStencilState.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; + psoDesc_.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP; + psoDesc_.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; + psoDesc_.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP; + psoDesc_.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; + psoDesc_.DepthStencilState.BackFace = psoDesc_.DepthStencilState.FrontFace; + + // Defaults for other fields + psoDesc_.SampleMask = UINT_MAX; + psoDesc_.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + psoDesc_.NumRenderTargets = 1; + psoDesc_.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM; + psoDesc_.DSVFormat = DXGI_FORMAT_UNKNOWN; + psoDesc_.SampleDesc.Count = 1; + psoDesc_.SampleDesc.Quality = 0; + psoDesc_.NodeMask = 0; + psoDesc_.CachedPSO.pCachedBlob = nullptr; + psoDesc_.CachedPSO.CachedBlobSizeInBytes = 0; + psoDesc_.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::vertexShader( + const std::vector& bytecode) { + vsBytecode_ = bytecode; + psoDesc_.VS = {vsBytecode_.data(), vsBytecode_.size()}; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::pixelShader( + const std::vector& bytecode) { + psBytecode_ = bytecode; + psoDesc_.PS = {psBytecode_.data(), psBytecode_.size()}; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::shaderBytecode( + const std::vector& vs, + const std::vector& ps) { + return vertexShader(vs).pixelShader(ps); +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::vertexInputLayout( + const std::vector& elements) { + inputElements_ = elements; + psoDesc_.InputLayout = {inputElements_.data(), static_cast(inputElements_.size())}; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::blendState( + const D3D12_BLEND_DESC& desc) { + psoDesc_.BlendState = desc; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::blendStateForAttachment( + UINT attachmentIndex, + const RenderPipelineDesc::TargetDesc::ColorAttachment& attachment) { + if (attachmentIndex >= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT) { + return *this; + } + + // Enable independent blending when configuring attachments beyond RT0 + if (attachmentIndex > 0) { + psoDesc_.BlendState.IndependentBlendEnable = TRUE; + } + + auto& rt = psoDesc_.BlendState.RenderTarget[attachmentIndex]; + rt.BlendEnable = attachment.blendEnabled ? TRUE : FALSE; + rt.SrcBlend = toD3D12Blend(attachment.srcRGBBlendFactor); + rt.DestBlend = toD3D12Blend(attachment.dstRGBBlendFactor); + rt.BlendOp = toD3D12BlendOp(attachment.rgbBlendOp); + rt.SrcBlendAlpha = toD3D12Blend(attachment.srcAlphaBlendFactor); + rt.DestBlendAlpha = toD3D12Blend(attachment.dstAlphaBlendFactor); + rt.BlendOpAlpha = toD3D12BlendOp(attachment.alphaBlendOp); + + // Convert IGL color write mask to D3D12 + UINT8 writeMask = 0; + if (attachment.colorWriteMask & igl::kColorWriteBitsRed) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_RED; + } + if (attachment.colorWriteMask & igl::kColorWriteBitsGreen) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_GREEN; + } + if (attachment.colorWriteMask & igl::kColorWriteBitsBlue) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_BLUE; + } + if (attachment.colorWriteMask & igl::kColorWriteBitsAlpha) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_ALPHA; + } + rt.RenderTargetWriteMask = writeMask; + + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::rasterizerState( + const D3D12_RASTERIZER_DESC& desc) { + psoDesc_.RasterizerState = desc; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::cullMode(CullMode mode) { + switch (mode) { + case CullMode::Back: + psoDesc_.RasterizerState.CullMode = D3D12_CULL_MODE_BACK; + break; + case CullMode::Front: + psoDesc_.RasterizerState.CullMode = D3D12_CULL_MODE_FRONT; + break; + case CullMode::Disabled: + default: + psoDesc_.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + break; + } + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::frontFaceWinding(WindingMode mode) { + psoDesc_.RasterizerState.FrontCounterClockwise = + (mode == WindingMode::CounterClockwise) ? TRUE : FALSE; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::polygonFillMode(PolygonFillMode mode) { + psoDesc_.RasterizerState.FillMode = + (mode == PolygonFillMode::Line) ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::depthStencilState( + const D3D12_DEPTH_STENCIL_DESC& desc) { + psoDesc_.DepthStencilState = desc; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::depthTestEnabled(bool enabled) { + psoDesc_.DepthStencilState.DepthEnable = enabled ? TRUE : FALSE; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::depthWriteEnabled(bool enabled) { + psoDesc_.DepthStencilState.DepthWriteMask = + enabled ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::depthCompareFunc( + D3D12_COMPARISON_FUNC func) { + psoDesc_.DepthStencilState.DepthFunc = func; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::renderTargetFormat( + UINT index, + DXGI_FORMAT format) { + if (index < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT) { + psoDesc_.RTVFormats[index] = format; + // Auto-update NumRenderTargets to include this slot + if (index + 1 > psoDesc_.NumRenderTargets) { + psoDesc_.NumRenderTargets = index + 1; + // Enable independent blending when using multiple render targets + psoDesc_.BlendState.IndependentBlendEnable = (psoDesc_.NumRenderTargets > 1) ? TRUE : FALSE; + } + } + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::renderTargetFormats( + const std::vector& formats) { + const UINT count = static_cast( + std::min(formats.size(), D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT)); + psoDesc_.NumRenderTargets = count; + // Enable independent blending when using multiple render targets + psoDesc_.BlendState.IndependentBlendEnable = (count > 1) ? TRUE : FALSE; + for (UINT i = 0; i < count; ++i) { + psoDesc_.RTVFormats[i] = formats[i]; + } + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::depthStencilFormat( + DXGI_FORMAT format) { + psoDesc_.DSVFormat = format; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::numRenderTargets(UINT count) { + const UINT clamped = std::min(count, static_cast(D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT)); + psoDesc_.NumRenderTargets = clamped; + // Enable independent blending when using multiple render targets + psoDesc_.BlendState.IndependentBlendEnable = (clamped > 1) ? TRUE : FALSE; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::sampleCount(UINT count) { + psoDesc_.SampleDesc.Count = count; + psoDesc_.RasterizerState.MultisampleEnable = (count > 1) ? TRUE : FALSE; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::sampleMask(UINT mask) { + psoDesc_.SampleMask = mask; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::primitiveTopologyType( + D3D12_PRIMITIVE_TOPOLOGY_TYPE type) { + psoDesc_.PrimitiveTopologyType = type; + return *this; +} + +D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::streamOutput( + const D3D12_STREAM_OUTPUT_DESC& desc) { + psoDesc_.StreamOutput = desc; + return *this; +} + +Result D3D12GraphicsPipelineBuilder::build(ID3D12Device* device, + ID3D12RootSignature* rootSignature, + ID3D12PipelineState** outPipelineState, + const char* debugName) { + if (!device) { + return Result(Result::Code::ArgumentNull, "Device is null"); + } + if (!rootSignature) { + return Result(Result::Code::ArgumentNull, "Root signature is null"); + } + if (!outPipelineState) { + return Result(Result::Code::ArgumentNull, "Output pipeline state is null"); + } + + // Initialize output to null for safety + *outPipelineState = nullptr; + + // Validate shader bytecode + if (psoDesc_.VS.BytecodeLength == 0) { + return Result(Result::Code::ArgumentInvalid, "Vertex shader bytecode is required"); + } + if (psoDesc_.PS.BytecodeLength == 0) { + return Result(Result::Code::ArgumentInvalid, "Pixel shader bytecode is required"); + } + + // Set root signature + psoDesc_.pRootSignature = rootSignature; + + // Create pipeline state + igl::d3d12::ComPtr pipelineState; + HRESULT hr = device->CreateGraphicsPipelineState(&psoDesc_, IID_PPV_ARGS(pipelineState.GetAddressOf())); + if (FAILED(hr)) { + char errorMsg[512]; + snprintf(errorMsg, sizeof(errorMsg), + "Failed to create graphics pipeline state. HRESULT: 0x%08X", + static_cast(hr)); + return Result(Result::Code::RuntimeError, errorMsg); + } + + // Set debug name if provided + if (debugName && debugName[0] != '\0') { + std::wstring wideName(debugName, debugName + strlen(debugName)); + pipelineState->SetName(wideName.c_str()); + } + + *outPipelineState = pipelineState.Get(); + pipelineState->AddRef(); // Transfer ownership + return Result(); +} + +//============================================================================= +// D3D12ComputePipelineBuilder +//============================================================================= + +D3D12ComputePipelineBuilder::D3D12ComputePipelineBuilder() { + // Zero-initialize the descriptor + psoDesc_ = {}; + psoDesc_.NodeMask = 0; + psoDesc_.CachedPSO.pCachedBlob = nullptr; + psoDesc_.CachedPSO.CachedBlobSizeInBytes = 0; + psoDesc_.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; +} + +D3D12ComputePipelineBuilder& D3D12ComputePipelineBuilder::shaderBytecode( + const std::vector& bytecode) { + csBytecode_ = bytecode; + psoDesc_.CS.pShaderBytecode = csBytecode_.data(); + psoDesc_.CS.BytecodeLength = csBytecode_.size(); + return *this; +} + +Result D3D12ComputePipelineBuilder::build(ID3D12Device* device, + ID3D12RootSignature* rootSignature, + ID3D12PipelineState** outPipelineState, + const char* debugName) { + if (!device) { + return Result(Result::Code::ArgumentNull, "Device is null"); + } + if (!rootSignature) { + return Result(Result::Code::ArgumentNull, "Root signature is null"); + } + if (!outPipelineState) { + return Result(Result::Code::ArgumentNull, "Output pipeline state is null"); + } + + // Initialize output to null for safety + *outPipelineState = nullptr; + + // Validate shader bytecode + if (psoDesc_.CS.BytecodeLength == 0) { + return Result(Result::Code::ArgumentInvalid, "Compute shader bytecode is required"); + } + + // Set root signature + psoDesc_.pRootSignature = rootSignature; + + // Create pipeline state + igl::d3d12::ComPtr pipelineState; + HRESULT hr = device->CreateComputePipelineState(&psoDesc_, IID_PPV_ARGS(pipelineState.GetAddressOf())); + if (FAILED(hr)) { + char errorMsg[512]; + snprintf(errorMsg, sizeof(errorMsg), + "Failed to create compute pipeline state. HRESULT: 0x%08X", + static_cast(hr)); + return Result(Result::Code::RuntimeError, errorMsg); + } + + // Set debug name if provided + if (debugName && debugName[0] != '\0') { + std::wstring wideName(debugName, debugName + strlen(debugName)); + pipelineState->SetName(wideName.c_str()); + } + + *outPipelineState = pipelineState.Get(); + pipelineState->AddRef(); // Transfer ownership + return Result(); +} + +//============================================================================= +// D3D12RootSignatureBuilder +//============================================================================= + +D3D12RootSignatureBuilder::D3D12RootSignatureBuilder() { + flags_ = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; +} + +D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::addRootConstants( + UINT shaderRegister, + UINT num32BitValues, + UINT registerSpace) { + RootParameter param{}; // Zero-initialize + param.param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + param.param.Constants.ShaderRegister = shaderRegister; + param.param.Constants.RegisterSpace = registerSpace; + param.param.Constants.Num32BitValues = num32BitValues; + param.param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameters_.push_back(param); + return *this; +} + +D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::addRootCBV(UINT shaderRegister, + UINT registerSpace) { + RootParameter param{}; // Zero-initialize + param.param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV; + param.param.Descriptor.ShaderRegister = shaderRegister; + param.param.Descriptor.RegisterSpace = registerSpace; + param.param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameters_.push_back(param); + return *this; +} + +D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::addRootSRV(UINT shaderRegister, + UINT registerSpace) { + RootParameter param{}; // Zero-initialize + param.param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV; + param.param.Descriptor.ShaderRegister = shaderRegister; + param.param.Descriptor.RegisterSpace = registerSpace; + param.param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameters_.push_back(param); + return *this; +} + +D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::addRootUAV(UINT shaderRegister, + UINT registerSpace) { + RootParameter param{}; // Zero-initialize + param.param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV; + param.param.Descriptor.ShaderRegister = shaderRegister; + param.param.Descriptor.RegisterSpace = registerSpace; + param.param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParameters_.push_back(param); + return *this; +} + +D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::addDescriptorTable( + D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace) { + RootParameter param{}; // Zero-initialize + param.param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + param.param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + // Create descriptor range + DescriptorRange range; + range.range.RangeType = rangeType; + range.range.NumDescriptors = numDescriptors; + range.range.BaseShaderRegister = baseShaderRegister; + range.range.RegisterSpace = registerSpace; + range.range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + param.ranges.push_back(range); + rootParameters_.push_back(param); + return *this; +} + +D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::flags(D3D12_ROOT_SIGNATURE_FLAGS flags) { + flags_ = flags; + return *this; +} + +Result D3D12RootSignatureBuilder::build(ID3D12Device* device, + const D3D12Context* context, + ID3D12RootSignature** outRootSignature) { + if (!device) { + return Result(Result::Code::ArgumentNull, "Device is null"); + } + if (!outRootSignature) { + return Result(Result::Code::ArgumentNull, "Output root signature is null"); + } + + // Initialize output to null for safety + *outRootSignature = nullptr; + + // Build arrays of D3D12_ROOT_PARAMETER and descriptor ranges + std::vector d3d12Params; + std::vector> allRanges; + + d3d12Params.reserve(rootParameters_.size()); + allRanges.reserve(rootParameters_.size()); + + for (auto& param : rootParameters_) { + if (param.param.ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) { + // Store ranges for this table + std::vector ranges; + ranges.reserve(param.ranges.size()); + for (auto& r : param.ranges) { + D3D12_DESCRIPTOR_RANGE range = r.range; + if (context) { + const UINT maxCount = getMaxDescriptorCount(context, range.RangeType); + if (range.NumDescriptors == UINT_MAX || range.NumDescriptors > maxCount) { + range.NumDescriptors = maxCount; + } + } + ranges.push_back(range); + } + allRanges.push_back(std::move(ranges)); + + // Update descriptor table to point to the ranges + D3D12_ROOT_PARAMETER d3d12Param = param.param; + d3d12Param.DescriptorTable.NumDescriptorRanges = + static_cast(allRanges.back().size()); + d3d12Param.DescriptorTable.pDescriptorRanges = allRanges.back().data(); + d3d12Params.push_back(d3d12Param); + } else { + // Not a descriptor table, just copy + d3d12Params.push_back(param.param); + } + } + + // Build root signature descriptor + D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {}; + rootSigDesc.NumParameters = static_cast(d3d12Params.size()); + rootSigDesc.pParameters = d3d12Params.data(); + rootSigDesc.NumStaticSamplers = 0; + rootSigDesc.pStaticSamplers = nullptr; + rootSigDesc.Flags = flags_; + + // Validate cost (64 DWORD limit) + const uint32_t cost = calculateRootSignatureCostInternal(rootSigDesc); + if (cost > 64) { + char errorMsg[256]; + snprintf(errorMsg, sizeof(errorMsg), + "Root signature cost exceeds 64 DWORD limit: %u DWORDs", cost); + return Result(Result::Code::ArgumentOutOfRange, errorMsg); + } + + // Serialize root signature + igl::d3d12::ComPtr signature; + igl::d3d12::ComPtr error; + HRESULT hr = D3D12SerializeRootSignature(&rootSigDesc, + D3D_ROOT_SIGNATURE_VERSION_1, + signature.GetAddressOf(), + error.GetAddressOf()); + if (FAILED(hr)) { + const char* errorStr = error.Get() ? static_cast(error->GetBufferPointer()) + : "Unknown error"; + char errorMsg[512]; + snprintf(errorMsg, sizeof(errorMsg), + "Failed to serialize root signature. HRESULT: 0x%08X, Error: %s", + static_cast(hr), errorStr); + return Result(Result::Code::RuntimeError, errorMsg); + } + + // Create root signature + igl::d3d12::ComPtr rootSignature; + hr = device->CreateRootSignature(0, + signature->GetBufferPointer(), + signature->GetBufferSize(), + IID_PPV_ARGS(rootSignature.GetAddressOf())); + if (FAILED(hr)) { + char errorMsg[256]; + snprintf(errorMsg, sizeof(errorMsg), + "Failed to create root signature. HRESULT: 0x%08X", + static_cast(hr)); + return Result(Result::Code::RuntimeError, errorMsg); + } + + *outRootSignature = rootSignature.Get(); + rootSignature->AddRef(); // Transfer ownership + return Result(); +} + +UINT D3D12RootSignatureBuilder::getMaxDescriptorCount(const D3D12Context* context, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType) { + if (!context) { + return 128; // Conservative default + } + + const D3D12_RESOURCE_BINDING_TIER bindingTier = context->getResourceBindingTier(); + const bool needsBoundedRanges = (bindingTier == D3D12_RESOURCE_BINDING_TIER_1); + + if (!needsBoundedRanges) { + return UINT_MAX; // Unbounded + } + + // Conservative bounds for Tier 1 devices + switch (rangeType) { + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: + return 128; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: + return 64; + case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: + return 64; + case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: + return 32; + default: + return 128; + } +} + +uint32_t D3D12RootSignatureBuilder::calculateCost() const { + // Build temporary descriptor for cost calculation + std::vector d3d12Params; + std::vector> allRanges; + + d3d12Params.reserve(rootParameters_.size()); + allRanges.reserve(rootParameters_.size()); + + for (const auto& param : rootParameters_) { + if (param.param.ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) { + std::vector ranges; + ranges.reserve(param.ranges.size()); + for (const auto& r : param.ranges) { + ranges.push_back(r.range); + } + allRanges.push_back(std::move(ranges)); + + D3D12_ROOT_PARAMETER d3d12Param = param.param; + d3d12Param.DescriptorTable.NumDescriptorRanges = + static_cast(allRanges.back().size()); + d3d12Param.DescriptorTable.pDescriptorRanges = allRanges.back().data(); + d3d12Params.push_back(d3d12Param); + } else { + d3d12Params.push_back(param.param); + } + } + + D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {}; + rootSigDesc.NumParameters = static_cast(d3d12Params.size()); + rootSigDesc.pParameters = d3d12Params.data(); + rootSigDesc.NumStaticSamplers = 0; + rootSigDesc.pStaticSamplers = nullptr; + rootSigDesc.Flags = flags_; + + return calculateRootSignatureCostInternal(rootSigDesc); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12PipelineBuilder.h b/src/igl/d3d12/D3D12PipelineBuilder.h new file mode 100644 index 0000000000..bb5e9c9467 --- /dev/null +++ b/src/igl/d3d12/D3D12PipelineBuilder.h @@ -0,0 +1,208 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace igl::d3d12 { + +class D3D12Context; + +/** + * @brief Fluent builder for D3D12 graphics pipeline state objects + * + * Encapsulates the complex setup of D3D12_GRAPHICS_PIPELINE_STATE_DESC + * and provides a clean, chainable API similar to VulkanPipelineBuilder. + * + * Usage: + * D3D12GraphicsPipelineBuilder builder; + * builder.shaderBytecode(vsBytecode, psBytecode) + * .vertexInputLayout(inputElements) + * .blendState(blendDesc) + * .rasterizerState(rasterizerDesc) + * .depthStencilState(depthStencilDesc) + * .renderTargetFormats(rtvFormats) + * .sampleCount(sampleCount) + * .primitiveTopology(topology); + * auto result = builder.build(device, rootSignature, outPipelineState); + */ +class D3D12GraphicsPipelineBuilder final { + public: + D3D12GraphicsPipelineBuilder(); + ~D3D12GraphicsPipelineBuilder() = default; + + // Shader configuration + D3D12GraphicsPipelineBuilder& vertexShader(const std::vector& bytecode); + D3D12GraphicsPipelineBuilder& pixelShader(const std::vector& bytecode); + D3D12GraphicsPipelineBuilder& shaderBytecode(const std::vector& vs, + const std::vector& ps); + + // Vertex input layout + D3D12GraphicsPipelineBuilder& vertexInputLayout( + const std::vector& elements); + + // Blend state + D3D12GraphicsPipelineBuilder& blendState(const D3D12_BLEND_DESC& desc); + D3D12GraphicsPipelineBuilder& blendStateForAttachment( + UINT attachmentIndex, + const RenderPipelineDesc::TargetDesc::ColorAttachment& attachment); + + // Rasterizer state + D3D12GraphicsPipelineBuilder& rasterizerState(const D3D12_RASTERIZER_DESC& desc); + D3D12GraphicsPipelineBuilder& cullMode(CullMode mode); + D3D12GraphicsPipelineBuilder& frontFaceWinding(WindingMode mode); + D3D12GraphicsPipelineBuilder& polygonFillMode(PolygonFillMode mode); + + // Depth-stencil state + D3D12GraphicsPipelineBuilder& depthStencilState(const D3D12_DEPTH_STENCIL_DESC& desc); + D3D12GraphicsPipelineBuilder& depthTestEnabled(bool enabled); + D3D12GraphicsPipelineBuilder& depthWriteEnabled(bool enabled); + D3D12GraphicsPipelineBuilder& depthCompareFunc(D3D12_COMPARISON_FUNC func); + + // Render target configuration + D3D12GraphicsPipelineBuilder& renderTargetFormat(UINT index, DXGI_FORMAT format); + D3D12GraphicsPipelineBuilder& renderTargetFormats(const std::vector& formats); + D3D12GraphicsPipelineBuilder& depthStencilFormat(DXGI_FORMAT format); + D3D12GraphicsPipelineBuilder& numRenderTargets(UINT count); + + // Sample configuration + D3D12GraphicsPipelineBuilder& sampleCount(UINT count); + D3D12GraphicsPipelineBuilder& sampleMask(UINT mask); + + // Primitive topology + D3D12GraphicsPipelineBuilder& primitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE type); + + // Stream output (optional) + D3D12GraphicsPipelineBuilder& streamOutput(const D3D12_STREAM_OUTPUT_DESC& desc); + + // Build the pipeline state object + [[nodiscard]] Result build(ID3D12Device* device, + ID3D12RootSignature* rootSignature, + ID3D12PipelineState** outPipelineState, + const char* debugName = nullptr); + + // Get the current PSO desc (for inspection/debugging) + [[nodiscard]] const D3D12_GRAPHICS_PIPELINE_STATE_DESC& getDesc() const { + return psoDesc_; + } + + private: + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc_; + std::vector inputElements_; + std::vector vsBytecode_; + std::vector psBytecode_; +}; + +/** + * @brief Fluent builder for D3D12 compute pipeline state objects + * + * Simplified builder for compute shaders. + * + * Usage: + * D3D12ComputePipelineBuilder builder; + * builder.shaderBytecode(csBytecode); + * auto result = builder.build(device, rootSignature, outPipelineState); + */ +class D3D12ComputePipelineBuilder final { + public: + D3D12ComputePipelineBuilder(); + ~D3D12ComputePipelineBuilder() = default; + + // Shader configuration + D3D12ComputePipelineBuilder& shaderBytecode(const std::vector& bytecode); + + // Build the pipeline state object + [[nodiscard]] Result build(ID3D12Device* device, + ID3D12RootSignature* rootSignature, + ID3D12PipelineState** outPipelineState, + const char* debugName = nullptr); + + // Get the current PSO desc (for inspection/debugging) + [[nodiscard]] const D3D12_COMPUTE_PIPELINE_STATE_DESC& getDesc() const { + return psoDesc_; + } + + private: + D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc_; + std::vector csBytecode_; +}; + +/** + * @brief Builder for D3D12 root signatures + * + * Encapsulates root signature creation with support for: + * - Root constants (push constants) + * - Root descriptors (CBVs) + * - Descriptor tables (CBV/SRV/UAV/Sampler) + * - Automatic cost calculation and validation + * + * Usage: + * D3D12RootSignatureBuilder builder; + * builder.addRootConstants(shaderRegister, num32BitValues) + * .addRootCBV(shaderRegister) + * .addDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, count, baseRegister); + * auto result = builder.build(device, context, outRootSignature); + */ +class D3D12RootSignatureBuilder final { + public: + D3D12RootSignatureBuilder(); + ~D3D12RootSignatureBuilder() = default; + + // Root constants (inline 32-bit values) + D3D12RootSignatureBuilder& addRootConstants(UINT shaderRegister, + UINT num32BitValues, + UINT registerSpace = 0); + + // Root descriptors (CBV/SRV/UAV accessed directly via GPU virtual address) + D3D12RootSignatureBuilder& addRootCBV(UINT shaderRegister, UINT registerSpace = 0); + D3D12RootSignatureBuilder& addRootSRV(UINT shaderRegister, UINT registerSpace = 0); + D3D12RootSignatureBuilder& addRootUAV(UINT shaderRegister, UINT registerSpace = 0); + + // Descriptor tables + D3D12RootSignatureBuilder& addDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE rangeType, + UINT numDescriptors, + UINT baseShaderRegister, + UINT registerSpace = 0); + + // Flags + D3D12RootSignatureBuilder& flags(D3D12_ROOT_SIGNATURE_FLAGS flags); + + // Build the root signature + // Note: context parameter is reserved for future tier-based validation. + // Currently, callers should use getMaxDescriptorCount() when configuring + // descriptor tables to ensure hardware compatibility. + [[nodiscard]] Result build(ID3D12Device* device, + const D3D12Context* context, + ID3D12RootSignature** outRootSignature); + + // Query limits from device - use this when calling addDescriptorTable() + // to ensure descriptor counts are within hardware tier limits + static UINT getMaxDescriptorCount(const D3D12Context* context, + D3D12_DESCRIPTOR_RANGE_TYPE rangeType); + + // Calculate root signature cost in DWORDs (must be <= 64) + [[nodiscard]] uint32_t calculateCost() const; + + private: + struct DescriptorRange { + D3D12_DESCRIPTOR_RANGE range; + }; + + struct RootParameter { + D3D12_ROOT_PARAMETER param; + std::vector ranges; // For descriptor tables + }; + + std::vector rootParameters_; + D3D12_ROOT_SIGNATURE_FLAGS flags_ = D3D12_ROOT_SIGNATURE_FLAG_NONE; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12PipelineCache.cpp b/src/igl/d3d12/D3D12PipelineCache.cpp new file mode 100644 index 0000000000..8ba02033c6 --- /dev/null +++ b/src/igl/d3d12/D3D12PipelineCache.cpp @@ -0,0 +1,212 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +ComPtr D3D12PipelineCache::createRootSignatureFromKey( + ID3D12Device* d3dDevice, + const D3D12RootSignatureKey& key, + D3D12_RESOURCE_BINDING_TIER bindingTier, + Result* IGL_NULLABLE outResult) const { + + if (!d3dDevice) { + Result::setResult(outResult, Result::Code::InvalidOperation, "D3D12 device is null"); + return nullptr; + } + + IGL_D3D12_LOG_VERBOSE("Creating root signature from reflection key:\n"); + if (key.hasPushConstants) { + IGL_D3D12_LOG_VERBOSE(" Push constants: b%u (%u DWORDs)\n", + key.pushConstantSlot, key.pushConstantSize); + } + IGL_D3D12_LOG_VERBOSE(" CBV slots: %zu, SRV slots: %zu, UAV slots: %zu, Sampler slots: %zu\n", + key.usedCBVSlots.size(), key.usedSRVSlots.size(), + key.usedUAVSlots.size(), key.usedSamplerSlots.size()); + + // Determine if we need bounded ranges (Tier 1 hardware) + const bool needsBoundedRanges = (bindingTier == D3D12_RESOURCE_BINDING_TIER_1); + const UINT srvBound = needsBoundedRanges ? 128 : UINT_MAX; + const UINT samplerBound = needsBoundedRanges ? 32 : UINT_MAX; + const UINT uavBound = needsBoundedRanges ? 8 : UINT_MAX; + + // Build descriptor ranges dynamically - only create ranges for resource types the shader uses + // The ranges must remain stable (no reallocation) since root parameters will point to them + std::vector descriptorRanges; + descriptorRanges.reserve(4); // Maximum: CBV, SRV, Sampler, UAV + + // Track which descriptor range index corresponds to each resource type + size_t cbvRangeIndex = SIZE_MAX; + size_t srvRangeIndex = SIZE_MAX; + size_t samplerRangeIndex = SIZE_MAX; + size_t uavRangeIndex = SIZE_MAX; + + // CBV descriptor table (only if shader uses CBVs) + if (!key.usedCBVSlots.empty()) { + cbvRangeIndex = descriptorRanges.size(); + + // D3D12 descriptor tables must start at register 0 + // Calculate range from 0 to max slot (includes unused slots) + UINT maxCBVSlot = key.maxCBVSlot; + UINT numCBVs = maxCBVSlot + 1; + + D3D12_DESCRIPTOR_RANGE cbvRange = {}; + cbvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + cbvRange.NumDescriptors = numCBVs; + cbvRange.BaseShaderRegister = 0; + cbvRange.RegisterSpace = 0; + cbvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + descriptorRanges.push_back(cbvRange); + } + + // SRV descriptor table (only if shader uses SRVs) + if (!key.usedSRVSlots.empty()) { + srvRangeIndex = descriptorRanges.size(); + + // D3D12 descriptor tables must start at register 0 + // Calculate range from 0 to max slot (includes unused slots) + UINT maxSRVSlot = key.maxSRVSlot; + UINT numSRVs = maxSRVSlot + 1; + + D3D12_DESCRIPTOR_RANGE srvRange = {}; + srvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + srvRange.NumDescriptors = numSRVs; + srvRange.BaseShaderRegister = 0; + srvRange.RegisterSpace = 0; + srvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + descriptorRanges.push_back(srvRange); + } + + // Sampler descriptor table (only if shader uses samplers) + if (!key.usedSamplerSlots.empty()) { + samplerRangeIndex = descriptorRanges.size(); + + // D3D12 descriptor tables must start at register 0 + // Calculate range from 0 to max slot (includes unused slots) + UINT maxSamplerSlot = key.maxSamplerSlot; + UINT numSamplers = maxSamplerSlot + 1; + + D3D12_DESCRIPTOR_RANGE samplerRange = {}; + samplerRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + samplerRange.NumDescriptors = numSamplers; + samplerRange.BaseShaderRegister = 0; + samplerRange.RegisterSpace = 0; + samplerRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + descriptorRanges.push_back(samplerRange); + } + + // UAV descriptor table (only if shader uses UAVs) + if (!key.usedUAVSlots.empty()) { + uavRangeIndex = descriptorRanges.size(); + + // D3D12 descriptor tables must start at register 0 + // Calculate range from 0 to max slot (includes unused slots) + UINT maxUAVSlot = key.maxUAVSlot; + UINT numUAVs = maxUAVSlot + 1; + + D3D12_DESCRIPTOR_RANGE uavRange = {}; + uavRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + uavRange.NumDescriptors = numUAVs; + uavRange.BaseShaderRegister = 0; + uavRange.RegisterSpace = 0; + uavRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + descriptorRanges.push_back(uavRange); + } + + // Build root parameters dynamically based on shader reflection (Vulkan approach) + // Only include what the shader actually declares - no hardcoded assumptions + // Order: Push constants, CBV table, SRV table, Sampler table, UAV table + std::vector rootParams; + + // Track which root parameter index corresponds to each resource type + UINT pushConstantRootParamIndex = UINT_MAX; + UINT cbvTableRootParamIndex = UINT_MAX; + UINT srvTableRootParamIndex = UINT_MAX; + UINT samplerTableRootParamIndex = UINT_MAX; + UINT uavTableRootParamIndex = UINT_MAX; + + // Add push constants if shader uses them (always first if present) + if (key.hasPushConstants) { + pushConstantRootParamIndex = static_cast(rootParams.size()); + + D3D12_ROOT_PARAMETER pushConstParam = {}; + pushConstParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + pushConstParam.Constants.ShaderRegister = key.pushConstantSlot; + pushConstParam.Constants.RegisterSpace = 0; + pushConstParam.Constants.Num32BitValues = key.pushConstantSize; + pushConstParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParams.push_back(pushConstParam); + } + + // Add CBV descriptor table if shader uses any CBV slots + if (!key.usedCBVSlots.empty() && cbvRangeIndex != SIZE_MAX) { + cbvTableRootParamIndex = static_cast(rootParams.size()); + + D3D12_ROOT_PARAMETER cbvTableParam = {}; + cbvTableParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + cbvTableParam.DescriptorTable.NumDescriptorRanges = 1; + cbvTableParam.DescriptorTable.pDescriptorRanges = &descriptorRanges[cbvRangeIndex]; + cbvTableParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParams.push_back(cbvTableParam); + } + + // Add SRV descriptor table if shader uses any SRV slots + if (!key.usedSRVSlots.empty() && srvRangeIndex != SIZE_MAX) { + srvTableRootParamIndex = static_cast(rootParams.size()); + + D3D12_ROOT_PARAMETER srvTableParam = {}; + srvTableParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + srvTableParam.DescriptorTable.NumDescriptorRanges = 1; + srvTableParam.DescriptorTable.pDescriptorRanges = &descriptorRanges[srvRangeIndex]; + srvTableParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParams.push_back(srvTableParam); + } + + // Add Sampler descriptor table if shader uses any sampler slots + if (!key.usedSamplerSlots.empty() && samplerRangeIndex != SIZE_MAX) { + samplerTableRootParamIndex = static_cast(rootParams.size()); + + D3D12_ROOT_PARAMETER samplerTableParam = {}; + samplerTableParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + samplerTableParam.DescriptorTable.NumDescriptorRanges = 1; + samplerTableParam.DescriptorTable.pDescriptorRanges = &descriptorRanges[samplerRangeIndex]; + samplerTableParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParams.push_back(samplerTableParam); + } + + // Add UAV descriptor table if shader uses any UAV slots + if (!key.usedUAVSlots.empty() && uavRangeIndex != SIZE_MAX) { + uavTableRootParamIndex = static_cast(rootParams.size()); + + D3D12_ROOT_PARAMETER uavTableParam = {}; + uavTableParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + uavTableParam.DescriptorTable.NumDescriptorRanges = 1; + uavTableParam.DescriptorTable.pDescriptorRanges = &descriptorRanges[uavRangeIndex]; + uavTableParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + rootParams.push_back(uavTableParam); + } + + // Create root signature desc + D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {}; + rootSigDesc.NumParameters = static_cast(rootParams.size()); + rootSigDesc.pParameters = rootParams.data(); + rootSigDesc.NumStaticSamplers = 0; + rootSigDesc.pStaticSamplers = nullptr; + rootSigDesc.Flags = key.flags; + + IGL_D3D12_LOG_VERBOSE(" Root signature has %u parameters\n", rootSigDesc.NumParameters); + + // Use existing caching infrastructure + return getOrCreateRootSignature(d3dDevice, rootSigDesc, outResult); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12PipelineCache.h b/src/igl/d3d12/D3D12PipelineCache.h new file mode 100644 index 0000000000..7888e620f3 --- /dev/null +++ b/src/igl/d3d12/D3D12PipelineCache.h @@ -0,0 +1,471 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +class D3D12PipelineCache { + public: + D3D12PipelineCache() = default; + + void clear(); + + private: + size_t hashRootSignature(const D3D12_ROOT_SIGNATURE_DESC& desc) const; + ComPtr getOrCreateRootSignature( + ID3D12Device* d3dDevice, + const D3D12_ROOT_SIGNATURE_DESC& desc, + Result* IGL_NULLABLE outResult) const; + + // Create root signature from reflection-based key (for dynamic binding) + ComPtr createRootSignatureFromKey( + ID3D12Device* d3dDevice, + const struct D3D12RootSignatureKey& key, + D3D12_RESOURCE_BINDING_TIER bindingTier, + Result* IGL_NULLABLE outResult) const; + + size_t hashRenderPipelineDesc(const RenderPipelineDesc& desc) const; + size_t hashComputePipelineDesc(const ComputePipelineDesc& desc) const; + + mutable std::unordered_map> graphicsPSOCache_; + mutable std::unordered_map> computePSOCache_; + mutable std::mutex psoCacheMutex_; + mutable size_t graphicsPSOCacheHits_ = 0; + mutable size_t graphicsPSOCacheMisses_ = 0; + mutable size_t computePSOCacheHits_ = 0; + mutable size_t computePSOCacheMisses_ = 0; + + mutable std::unordered_map> rootSignatureCache_; + mutable std::mutex rootSignatureCacheMutex_; + mutable size_t rootSignatureCacheHits_ = 0; + mutable size_t rootSignatureCacheMisses_ = 0; + + std::vector mipmapVSBytecode_; + std::vector mipmapPSBytecode_; + ComPtr mipmapRootSignature_; + bool mipmapShadersAvailable_ = false; + + friend class Device; +}; + +inline void D3D12PipelineCache::clear() { + { + std::lock_guard lock(psoCacheMutex_); + graphicsPSOCache_.clear(); + computePSOCache_.clear(); + graphicsPSOCacheHits_ = 0; + graphicsPSOCacheMisses_ = 0; + computePSOCacheHits_ = 0; + computePSOCacheMisses_ = 0; + } + { + std::lock_guard lock(rootSignatureCacheMutex_); + rootSignatureCache_.clear(); + rootSignatureCacheHits_ = 0; + rootSignatureCacheMisses_ = 0; + } + mipmapVSBytecode_.clear(); + mipmapPSBytecode_.clear(); + mipmapRootSignature_.Reset(); + mipmapShadersAvailable_ = false; +} + +inline size_t D3D12PipelineCache::hashRootSignature( + const D3D12_ROOT_SIGNATURE_DESC& desc) const { + size_t hash = 0; + + hashCombine(hash, static_cast(desc.Flags)); + hashCombine(hash, static_cast(desc.NumParameters)); + + for (UINT i = 0; i < desc.NumParameters; ++i) { + const auto& param = desc.pParameters[i]; + + hashCombine(hash, static_cast(param.ParameterType)); + hashCombine(hash, static_cast(param.ShaderVisibility)); + + switch (param.ParameterType) { + case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: { + hashCombine(hash, + static_cast(param.DescriptorTable.NumDescriptorRanges)); + + for (UINT j = 0; j < param.DescriptorTable.NumDescriptorRanges; ++j) { + const auto& range = param.DescriptorTable.pDescriptorRanges[j]; + hashCombine(hash, static_cast(range.RangeType)); + hashCombine(hash, static_cast(range.NumDescriptors)); + hashCombine(hash, static_cast(range.BaseShaderRegister)); + hashCombine(hash, static_cast(range.RegisterSpace)); + hashCombine( + hash, + static_cast(range.OffsetInDescriptorsFromTableStart)); + } + break; + } + case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: { + hashCombine(hash, static_cast(param.Constants.ShaderRegister)); + hashCombine(hash, static_cast(param.Constants.RegisterSpace)); + hashCombine(hash, static_cast(param.Constants.Num32BitValues)); + break; + } + case D3D12_ROOT_PARAMETER_TYPE_CBV: + case D3D12_ROOT_PARAMETER_TYPE_SRV: + case D3D12_ROOT_PARAMETER_TYPE_UAV: { + hashCombine(hash, static_cast(param.Descriptor.ShaderRegister)); + hashCombine(hash, static_cast(param.Descriptor.RegisterSpace)); + break; + } + } + } + + hashCombine(hash, static_cast(desc.NumStaticSamplers)); + for (UINT i = 0; i < desc.NumStaticSamplers; ++i) { + const auto& sampler = desc.pStaticSamplers[i]; + hashCombine(hash, static_cast(sampler.Filter)); + hashCombine(hash, static_cast(sampler.AddressU)); + hashCombine(hash, static_cast(sampler.AddressV)); + hashCombine(hash, static_cast(sampler.AddressW)); + hashCombine(hash, static_cast(sampler.ComparisonFunc)); + hashCombine(hash, static_cast(sampler.ShaderRegister)); + hashCombine(hash, static_cast(sampler.RegisterSpace)); + hashCombine(hash, static_cast(sampler.ShaderVisibility)); + } + + return hash; +} + +inline ComPtr D3D12PipelineCache::getOrCreateRootSignature( + ID3D12Device* d3dDevice, + const D3D12_ROOT_SIGNATURE_DESC& desc, + Result* IGL_NULLABLE outResult) const { + const size_t hash = hashRootSignature(desc); + + { + std::lock_guard lock(rootSignatureCacheMutex_); + auto it = rootSignatureCache_.find(hash); + if (it != rootSignatureCache_.end()) { + rootSignatureCacheHits_++; + IGL_D3D12_LOG_VERBOSE( + " Root signature cache HIT (hash=0x%zx, hits=%zu, misses=%zu)\n", + hash, + rootSignatureCacheHits_, + rootSignatureCacheMisses_); + return it->second; + } + } + + rootSignatureCacheMisses_++; + IGL_D3D12_LOG_VERBOSE( + " Root signature cache MISS (hash=0x%zx, hits=%zu, misses=%zu)\n", + hash, + rootSignatureCacheHits_, + rootSignatureCacheMisses_); + + if (!d3dDevice) { + Result::setResult(outResult, + Result::Code::InvalidOperation, + "D3D12 device is null"); + return nullptr; + } + + ComPtr signature; + ComPtr error; + + // Query highest supported root signature version for this device. + D3D12_FEATURE_DATA_ROOT_SIGNATURE featureData{}; + featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_1; + HRESULT featureHr = d3dDevice->CheckFeatureSupport( + D3D12_FEATURE_ROOT_SIGNATURE, &featureData, sizeof(featureData)); + D3D_ROOT_SIGNATURE_VERSION highestVersion = + SUCCEEDED(featureHr) ? featureData.HighestVersion : D3D_ROOT_SIGNATURE_VERSION_1_0; + + HRESULT hr = E_FAIL; + + if (highestVersion >= D3D_ROOT_SIGNATURE_VERSION_1_1) { + // Use versioned root signature (1.1) when available and preserve NumDescriptors + // as-is (Tier 2/3 unbounded ranges are expressed via UINT_MAX). + std::vector params1; + std::vector> rangesPerParam; + params1.reserve(desc.NumParameters); + rangesPerParam.reserve(desc.NumParameters); + + for (UINT i = 0; i < desc.NumParameters; ++i) { + const D3D12_ROOT_PARAMETER& srcParam = desc.pParameters[i]; + D3D12_ROOT_PARAMETER1 dstParam{}; + dstParam.ParameterType = srcParam.ParameterType; + dstParam.ShaderVisibility = srcParam.ShaderVisibility; + + switch (srcParam.ParameterType) { + case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + dstParam.Constants.ShaderRegister = srcParam.Constants.ShaderRegister; + dstParam.Constants.RegisterSpace = srcParam.Constants.RegisterSpace; + dstParam.Constants.Num32BitValues = srcParam.Constants.Num32BitValues; + break; + case D3D12_ROOT_PARAMETER_TYPE_CBV: + case D3D12_ROOT_PARAMETER_TYPE_SRV: + case D3D12_ROOT_PARAMETER_TYPE_UAV: + dstParam.Descriptor.ShaderRegister = srcParam.Descriptor.ShaderRegister; + dstParam.Descriptor.RegisterSpace = srcParam.Descriptor.RegisterSpace; + dstParam.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE; + break; + case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: { + const UINT numRanges = srcParam.DescriptorTable.NumDescriptorRanges; + const D3D12_DESCRIPTOR_RANGE* srcRanges = srcParam.DescriptorTable.pDescriptorRanges; + + if (numRanges > 0 && srcRanges) { + rangesPerParam.emplace_back(); + auto& dstRanges = rangesPerParam.back(); + dstRanges.resize(numRanges); + + for (UINT j = 0; j < numRanges; ++j) { + const D3D12_DESCRIPTOR_RANGE& srcRange = srcRanges[j]; + D3D12_DESCRIPTOR_RANGE1 dstRange{}; + dstRange.RangeType = srcRange.RangeType; + dstRange.NumDescriptors = srcRange.NumDescriptors; + dstRange.BaseShaderRegister = srcRange.BaseShaderRegister; + dstRange.RegisterSpace = srcRange.RegisterSpace; + // Mark descriptor ranges as DESCRIPTORS_VOLATILE to match the dynamic + // per-draw descriptor update pattern used by D3D12ResourcesBinder. + // This avoids D3D12 WARNING/ERROR ID=646, which requires all descriptors + // in STATIC ranges to be initialized before binding the table. + dstRange.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE; + dstRange.OffsetInDescriptorsFromTableStart = + srcRange.OffsetInDescriptorsFromTableStart; + dstRanges[j] = dstRange; + } + + dstParam.DescriptorTable.NumDescriptorRanges = numRanges; + dstParam.DescriptorTable.pDescriptorRanges = dstRanges.data(); + } else { + dstParam.DescriptorTable.NumDescriptorRanges = 0; + dstParam.DescriptorTable.pDescriptorRanges = nullptr; + } + break; + } + } + + params1.push_back(dstParam); + } + + D3D12_ROOT_SIGNATURE_DESC1 desc1{}; + desc1.NumParameters = static_cast(params1.size()); + desc1.pParameters = params1.data(); + desc1.NumStaticSamplers = desc.NumStaticSamplers; + desc1.pStaticSamplers = desc.pStaticSamplers; + desc1.Flags = desc.Flags; + + D3D12_VERSIONED_ROOT_SIGNATURE_DESC versionedDesc{}; + versionedDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1; + versionedDesc.Desc_1_1 = desc1; + + IGL_D3D12_LOG_VERBOSE(" Serializing root signature (version 1.1)...\n"); + hr = D3D12SerializeVersionedRootSignature( + &versionedDesc, signature.GetAddressOf(), error.GetAddressOf()); + } else { + // Device only supports Root Signature 1.0. Clamp any unbounded descriptor ranges + // (NumDescriptors == UINT_MAX) to a large but finite conservative value so that + // the serialized root signature is portable across RS 1.0 implementations. + constexpr UINT kMaxDescriptorsFallback = 16384; // Sufficient for current heap sizes. + + std::vector params; + std::vector> rangesPerParam; + params.reserve(desc.NumParameters); + rangesPerParam.reserve(desc.NumParameters); + + for (UINT i = 0; i < desc.NumParameters; ++i) { + const D3D12_ROOT_PARAMETER& srcParam = desc.pParameters[i]; + D3D12_ROOT_PARAMETER dstParam = srcParam; + + if (srcParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE && + srcParam.DescriptorTable.NumDescriptorRanges > 0 && + srcParam.DescriptorTable.pDescriptorRanges) { + const UINT numRanges = srcParam.DescriptorTable.NumDescriptorRanges; + const D3D12_DESCRIPTOR_RANGE* srcRanges = srcParam.DescriptorTable.pDescriptorRanges; + + rangesPerParam.emplace_back(); + auto& dstRanges = rangesPerParam.back(); + dstRanges.resize(numRanges); + + for (UINT j = 0; j < numRanges; ++j) { + dstRanges[j] = srcRanges[j]; + if (dstRanges[j].NumDescriptors == UINT_MAX) { + dstRanges[j].NumDescriptors = kMaxDescriptorsFallback; + } + } + + dstParam.DescriptorTable.NumDescriptorRanges = numRanges; + dstParam.DescriptorTable.pDescriptorRanges = dstRanges.data(); + } + + params.push_back(dstParam); + } + + D3D12_ROOT_SIGNATURE_DESC adjustedDesc{}; + adjustedDesc.NumParameters = static_cast(params.size()); + adjustedDesc.pParameters = params.data(); + adjustedDesc.NumStaticSamplers = desc.NumStaticSamplers; + adjustedDesc.pStaticSamplers = desc.pStaticSamplers; + adjustedDesc.Flags = desc.Flags; + + IGL_D3D12_LOG_VERBOSE(" Serializing root signature (version 1.0, bounded ranges)...\n"); + hr = D3D12SerializeRootSignature( + &adjustedDesc, + D3D_ROOT_SIGNATURE_VERSION_1, + signature.GetAddressOf(), + error.GetAddressOf()); + } + + if (FAILED(hr)) { + if (error.Get()) { + const char* errorMsg = + static_cast(error->GetBufferPointer()); + IGL_LOG_ERROR("Root signature serialization error: %s\n", errorMsg); + } + Result::setResult(outResult, + Result::Code::RuntimeError, + "Failed to serialize root signature"); + return nullptr; + } + + ComPtr rootSignature; + hr = d3dDevice->CreateRootSignature(0, + signature->GetBufferPointer(), + signature->GetBufferSize(), + IID_PPV_ARGS(rootSignature.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR( + " CreateRootSignature FAILED: 0x%08X\n", + static_cast(hr)); + Result::setResult(outResult, + Result::Code::RuntimeError, + "Failed to create root signature"); + return nullptr; + } + + IGL_D3D12_LOG_VERBOSE(" Root signature created successfully\n"); + + { + std::lock_guard lock(rootSignatureCacheMutex_); + rootSignatureCache_[hash] = rootSignature; + } + + return rootSignature; +} + +inline size_t D3D12PipelineCache::hashRenderPipelineDesc( + const RenderPipelineDesc& desc) const { + size_t hash = 0; + + if (desc.shaderStages) { + auto* vertexModule = + static_cast(desc.shaderStages->getVertexModule().get()); + auto* fragmentModule = + static_cast(desc.shaderStages->getFragmentModule().get()); + + if (vertexModule) { + const auto& vsBytecode = vertexModule->getBytecode(); + hashCombine(hash, vsBytecode.size()); + size_t bytesToHash = std::min(256, vsBytecode.size()); + for (size_t i = 0; i < bytesToHash; i += 8) { + hashCombine(hash, static_cast(vsBytecode[i])); + } + } + + if (fragmentModule) { + const auto& psBytecode = fragmentModule->getBytecode(); + hashCombine(hash, psBytecode.size()); + size_t bytesToHash = std::min(256, psBytecode.size()); + for (size_t i = 0; i < bytesToHash; i += 8) { + hashCombine(hash, static_cast(psBytecode[i])); + } + } + } + + if (desc.vertexInputState) { + auto* d3d12VertexInput = + static_cast(desc.vertexInputState.get()); + const auto& vertexDesc = d3d12VertexInput->getDesc(); + hashCombine(hash, vertexDesc.numAttributes); + for (size_t i = 0; i < vertexDesc.numAttributes; ++i) { + hashCombine(hash, + static_cast(vertexDesc.attributes[i].format)); + hashCombine(hash, vertexDesc.attributes[i].offset); + hashCombine(hash, vertexDesc.attributes[i].bufferIndex); + hashCombine( + hash, + std::hash{}(vertexDesc.attributes[i].name)); + } + } + + hashCombine(hash, desc.targetDesc.colorAttachments.size()); + for (const auto& att : desc.targetDesc.colorAttachments) { + hashCombine(hash, static_cast(att.textureFormat)); + } + hashCombine(hash, + static_cast(desc.targetDesc.depthAttachmentFormat)); + hashCombine(hash, + static_cast(desc.targetDesc.stencilAttachmentFormat)); + + for (const auto& att : desc.targetDesc.colorAttachments) { + hashCombine(hash, att.blendEnabled ? 1 : 0); + hashCombine(hash, static_cast(att.srcRGBBlendFactor)); + hashCombine(hash, static_cast(att.dstRGBBlendFactor)); + hashCombine(hash, static_cast(att.rgbBlendOp)); + hashCombine(hash, static_cast(att.srcAlphaBlendFactor)); + hashCombine(hash, static_cast(att.dstAlphaBlendFactor)); + hashCombine(hash, static_cast(att.alphaBlendOp)); + hashCombine(hash, static_cast(att.colorWriteMask)); + } + + hashCombine(hash, static_cast(desc.cullMode)); + hashCombine(hash, static_cast(desc.frontFaceWinding)); + hashCombine(hash, static_cast(desc.polygonFillMode)); + + hashCombine(hash, static_cast(desc.topology)); + + hashCombine(hash, desc.sampleCount); + + return hash; +} + +inline size_t D3D12PipelineCache::hashComputePipelineDesc( + const ComputePipelineDesc& desc) const { + size_t hash = 0; + + if (desc.shaderStages) { + auto* computeModule = + static_cast(desc.shaderStages->getComputeModule().get()); + + if (computeModule) { + const auto& csBytecode = computeModule->getBytecode(); + hashCombine(hash, csBytecode.size()); + size_t bytesToHash = std::min(256, csBytecode.size()); + for (size_t i = 0; i < bytesToHash; i += 8) { + hashCombine(hash, static_cast(csBytecode[i])); + } + } + } + + for (char c : desc.debugName) { + hashCombine(hash, static_cast(c)); + } + + return hash; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12PresentManager.cpp b/src/igl/d3d12/D3D12PresentManager.cpp new file mode 100644 index 0000000000..638bff867f --- /dev/null +++ b/src/igl/d3d12/D3D12PresentManager.cpp @@ -0,0 +1,194 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +namespace igl::d3d12 { + +bool PresentManager::present() { + auto* swapChain = context_.getSwapChain(); + if (!swapChain) { + return true; // No swapchain, nothing to present + } + + auto* device = context_.getDevice(); + + // Check device status before presenting + if (!checkDeviceStatus("before Present")) { + return false; + } + + // Configure VSync via environment variable + UINT syncInterval = 1; + UINT presentFlags = 0; + { + char buf[8] = {}; + if (GetEnvironmentVariableA("IGL_D3D12_VSYNC", buf, sizeof(buf)) > 0) { + if (buf[0] == '0') { + syncInterval = 0; + if (context_.isTearingSupported()) { + presentFlags |= DXGI_PRESENT_ALLOW_TEARING; + } + } + } + } + + // Present + HRESULT presentHr = swapChain->Present(syncInterval, presentFlags); + if (FAILED(presentHr)) { + IGL_LOG_ERROR("PresentManager: Present failed: 0x%08X\n", static_cast(presentHr)); + + // Check if device was removed during Present + HRESULT deviceStatus = device->GetDeviceRemovedReason(); + if (FAILED(deviceStatus)) { + IGL_LOG_ERROR("PresentManager: DEVICE REMOVED during Present! Reason: 0x%08X\n", + static_cast(deviceStatus)); +#ifdef IGL_DEBUG + logInfoQueueMessages(device); + logDredInfo(device); +#endif + IGL_DEBUG_ASSERT(false); + } else { + IGL_LOG_ERROR("PresentManager: Present failed but device reports OK; check swapchain/window state\n"); + } + // Present failed - return false regardless of whether device was removed + return false; + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("PresentManager: Present OK\n"); +#endif + + // Check device status after Present + if (!checkDeviceStatus("after Present")) { + return false; + } + + return true; +} + +bool PresentManager::checkDeviceStatus(const char* contextStr) { + auto* device = context_.getDevice(); + HRESULT deviceStatus = device->GetDeviceRemovedReason(); + + if (FAILED(deviceStatus)) { + IGL_LOG_ERROR("PresentManager: DEVICE REMOVED %s! Reason: 0x%08X\n", + contextStr, static_cast(deviceStatus)); +#ifdef IGL_DEBUG + logInfoQueueMessages(device); + logDredInfo(device); +#endif + IGL_DEBUG_ASSERT(false); + return false; + } + + return true; +} + +#ifdef IGL_DEBUG +void PresentManager::logInfoQueueMessages(ID3D12Device* device) { + igl::d3d12::ComPtr infoQueue; + if (FAILED(device->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) { + return; + } + + UINT64 numMessages = infoQueue->GetNumStoredMessages(); + IGL_D3D12_LOG_VERBOSE("D3D12 Info Queue has %llu messages:\n", numMessages); + for (UINT64 i = 0; i < numMessages; ++i) { + SIZE_T messageLength = 0; + infoQueue->GetMessage(i, nullptr, &messageLength); + if (messageLength == 0) { + continue; + } + // Use RAII vector instead of malloc/free + std::vector messageBuffer(messageLength); + auto* message = reinterpret_cast(messageBuffer.data()); + if (SUCCEEDED(infoQueue->GetMessage(i, message, &messageLength))) { + const char* severityStr = "UNKNOWN"; + switch (message->Severity) { + case D3D12_MESSAGE_SEVERITY_CORRUPTION: severityStr = "CORRUPTION"; break; + case D3D12_MESSAGE_SEVERITY_ERROR: severityStr = "ERROR"; break; + case D3D12_MESSAGE_SEVERITY_WARNING: severityStr = "WARNING"; break; + case D3D12_MESSAGE_SEVERITY_INFO: severityStr = "INFO"; break; + case D3D12_MESSAGE_SEVERITY_MESSAGE: severityStr = "MESSAGE"; break; + } + IGL_D3D12_LOG_VERBOSE(" [%s] %s\n", severityStr, message->pDescription); + } + // messageBuffer automatically freed at end of scope + } +} + +void PresentManager::logDredInfo(ID3D12Device* device) { +#if defined(__ID3D12DeviceRemovedExtendedData1_INTERFACE_DEFINED__) + igl::d3d12::ComPtr dred; + if (FAILED(device->QueryInterface(IID_PPV_ARGS(dred.GetAddressOf())))) { + IGL_D3D12_LOG_VERBOSE("DRED: ID3D12DeviceRemovedExtendedData1 not available.\n"); + return; + } + + D3D12_DRED_AUTO_BREADCRUMBS_OUTPUT1 breadcrumbs = {}; + if (SUCCEEDED(dred->GetAutoBreadcrumbsOutput1(&breadcrumbs)) && breadcrumbs.pHeadAutoBreadcrumbNode) { + IGL_LOG_ERROR("DRED AutoBreadcrumbs (most recent first):\n"); + const D3D12_AUTO_BREADCRUMB_NODE1* node = breadcrumbs.pHeadAutoBreadcrumbNode; + uint32_t nodeIndex = 0; + constexpr uint32_t kMaxNodesToPrint = 16; + while (node && nodeIndex < kMaxNodesToPrint) { + const char* listName = node->pCommandListDebugNameA ? node->pCommandListDebugNameA : ""; + const char* queueName = node->pCommandQueueDebugNameA ? node->pCommandQueueDebugNameA : ""; + IGL_LOG_ERROR(" Node #%u: CommandList=%p (%s) CommandQueue=%p (%s) Breadcrumbs=%u completed=%u\n", + nodeIndex, + node->pCommandList, + listName, + node->pCommandQueue, + queueName, + node->BreadcrumbCount, + node->pLastBreadcrumbValue ? *node->pLastBreadcrumbValue : 0); + if (node->pCommandHistory && node->BreadcrumbCount > 0) { + D3D12_AUTO_BREADCRUMB_OP lastOp = node->pCommandHistory[node->BreadcrumbCount - 1]; + IGL_LOG_ERROR(" Last command: %d (history count=%u)\n", static_cast(lastOp), node->BreadcrumbCount); + } + node = node->pNext; + ++nodeIndex; + } + if (node) { + IGL_LOG_ERROR(" ... additional breadcrumbs omitted ...\n"); + } + } else { + IGL_D3D12_LOG_VERBOSE("DRED: No auto breadcrumbs captured.\n"); + } + + D3D12_DRED_PAGE_FAULT_OUTPUT1 pageFault = {}; + if (SUCCEEDED(dred->GetPageFaultAllocationOutput1(&pageFault)) && pageFault.PageFaultVA != 0) { + IGL_LOG_ERROR("DRED PageFault: VA=0x%016llx\n", pageFault.PageFaultVA); + if (pageFault.pHeadExistingAllocationNode) { + const auto* alloc = pageFault.pHeadExistingAllocationNode; + IGL_LOG_ERROR(" Existing allocation: Object=%p Name=%s Type=%u\n", + alloc->pObject, + alloc->ObjectNameA ? alloc->ObjectNameA : "", + static_cast(alloc->AllocationType)); + } + if (pageFault.pHeadRecentFreedAllocationNode) { + const auto* freed = pageFault.pHeadRecentFreedAllocationNode; + IGL_LOG_ERROR(" Recently freed allocation: Object=%p Name=%s Type=%u\n", + freed->pObject, + freed->ObjectNameA ? freed->ObjectNameA : "", + static_cast(freed->AllocationType)); + } + } else { + IGL_D3D12_LOG_VERBOSE("DRED: No page fault data available.\n"); + } +#else + (void)device; + IGL_D3D12_LOG_VERBOSE("DRED: Extended data interfaces not available on this SDK.\n"); +#endif +} +#endif // IGL_DEBUG + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12PresentManager.h b/src/igl/d3d12/D3D12PresentManager.h new file mode 100644 index 0000000000..5ea0c86c30 --- /dev/null +++ b/src/igl/d3d12/D3D12PresentManager.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include + +namespace igl::d3d12 { + +class D3D12Context; + +/** + * @brief Manages swapchain presentation with device removal detection + * + * Handles: + * - VSync configuration via environment variable + * - Present flags (tearing support) + * - Device removal detection before/after Present + * - DRED and Info Queue diagnostics on failure + */ +class PresentManager final { + public: + explicit PresentManager(D3D12Context& context) : context_(context) {} + + /** + * @brief Present the current frame with proper error handling + * + * Checks device status before and after Present, logs diagnostics on failure. + * Does not throw - sets device lost flag for application to check. + * + * @return true if present succeeded, false if device was removed or present failed + */ + bool present(); + + private: + /** + * @brief Check device status and log diagnostics if removed + */ + bool checkDeviceStatus(const char* context); + +#ifdef IGL_DEBUG + /** + * @brief Log Info Queue messages for debugging (debug builds only) + */ + void logInfoQueueMessages(ID3D12Device* device); + + /** + * @brief Log DRED breadcrumbs and page fault info (debug builds only) + */ + void logDredInfo(ID3D12Device* device); +#endif + + D3D12Context& context_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12ReflectionUtils.cpp b/src/igl/d3d12/D3D12ReflectionUtils.cpp new file mode 100644 index 0000000000..bb4554252d --- /dev/null +++ b/src/igl/d3d12/D3D12ReflectionUtils.cpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +namespace igl::d3d12::ReflectionUtils { + +igl::UniformType mapUniformType(const D3D12_SHADER_TYPE_DESC& td) { + if ((td.Class == D3D_SVC_MATRIX_ROWS || td.Class == D3D_SVC_MATRIX_COLUMNS) && + td.Rows == 4 && td.Columns == 4) { + return igl::UniformType::Mat4x4; + } + if (td.Type == D3D_SVT_FLOAT) { + if (td.Class == D3D_SVC_SCALAR) return igl::UniformType::Float; + if (td.Class == D3D_SVC_VECTOR) { + switch (td.Columns) { + case 2: return igl::UniformType::Float2; + case 3: return igl::UniformType::Float3; + case 4: return igl::UniformType::Float4; + default: return igl::UniformType::Invalid; + } + } + } + return igl::UniformType::Invalid; +} + +} // namespace igl::d3d12::ReflectionUtils diff --git a/src/igl/d3d12/D3D12ReflectionUtils.h b/src/igl/d3d12/D3D12ReflectionUtils.h new file mode 100644 index 0000000000..ffc8061044 --- /dev/null +++ b/src/igl/d3d12/D3D12ReflectionUtils.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace igl::d3d12::ReflectionUtils { + +/** + * Maps D3D12 shader type descriptor to IGL uniform type + * + * Supported types: + * - float (D3D_SVT_FLOAT + D3D_SVC_SCALAR) → UniformType::Float + * - float2/3/4 (D3D_SVT_FLOAT + D3D_SVC_VECTOR) → UniformType::Float2/3/4 + * - float4x4 (D3D_SVC_MATRIX_ROWS/COLUMNS, 4x4) → UniformType::Mat4x4 + * + * All other types (int, uint, bool, matrices other than 4x4, etc.) map to UniformType::Invalid + * + * @param td D3D12 shader type descriptor from reflection + * @return Corresponding IGL UniformType, or UniformType::Invalid for unsupported types + */ +igl::UniformType mapUniformType(const D3D12_SHADER_TYPE_DESC& td); + +} // namespace igl::d3d12::ReflectionUtils diff --git a/src/igl/d3d12/D3D12ResourcesBinder.cpp b/src/igl/d3d12/D3D12ResourcesBinder.cpp new file mode 100644 index 0000000000..6476d0b9e3 --- /dev/null +++ b/src/igl/d3d12/D3D12ResourcesBinder.cpp @@ -0,0 +1,929 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +namespace { +// D3D12 alignment requirement for constant buffer views +constexpr size_t kConstantBufferAlignment = 256; +constexpr size_t kMaxCBVSize = 65536; // 64 KB (D3D12 spec limit) + +// Compute pipeline hardcoded root parameter layout +// Note: Graphics pipelines use pure reflection-based layout queried from RenderPipelineState +// Compute pipelines still use this hardcoded layout (should be migrated to reflection) +constexpr uint32_t kComputeRootParam_PushConstants = 0; +constexpr uint32_t kComputeRootParam_UAVTable = 1; +constexpr uint32_t kComputeRootParam_SRVTable = 2; +constexpr uint32_t kComputeRootParam_CBVTable = 3; +constexpr uint32_t kComputeRootParam_SamplerTable = 4; + +} // namespace + +D3D12ResourcesBinder::D3D12ResourcesBinder(CommandBuffer& commandBuffer, bool isCompute) + : commandBuffer_(commandBuffer), isCompute_(isCompute) {} + +void D3D12ResourcesBinder::bindTexture(uint32_t index, ITexture* texture) { + if (index >= IGL_TEXTURE_SAMPLERS_MAX) { + IGL_LOG_ERROR("D3D12ResourcesBinder::bindTexture: index %u exceeds maximum %u\n", + index, + IGL_TEXTURE_SAMPLERS_MAX); + return; + } + + if (!texture) { + // Unbind texture at this slot + if (index < bindingsTextures_.count) { + bindingsTextures_.textures[index] = nullptr; + bindingsTextures_.handles[index] = {}; + // Update count to highest bound slot + 1 + while (bindingsTextures_.count > 0 && bindingsTextures_.textures[bindingsTextures_.count - 1] == nullptr) { + bindingsTextures_.count--; + } + } + dirtyFlags_ |= DirtyFlagBits_Textures; + return; + } + + auto* d3dTexture = static_cast(texture); + ID3D12Resource* resource = d3dTexture->getResource(); + + if (!resource) { + IGL_LOG_ERROR("D3D12ResourcesBinder::bindTexture: texture resource is null\n"); + return; + } + + // Transition texture to shader resource state + // Note: This must happen immediately, not deferred until updateBindings() + // Use pipeline-specific states for optimal barrier tracking: + // - Graphics: PIXEL_SHADER_RESOURCE (pixel shader read) + // - Compute: NON_PIXEL_SHADER_RESOURCE (compute/vertex/geometry shader read) + auto* commandList = commandBuffer_.getCommandList(); + const auto targetState = isCompute_ ? D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE + : D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + d3dTexture->transitionAll(commandList, targetState); + + // Store texture pointer for descriptor creation in updateBindings() + bindingsTextures_.textures[index] = texture; + + // Mark textures dirty - descriptor will be created in updateBindings() + dirtyFlags_ |= DirtyFlagBits_Textures; + + // Update binding count + if (index >= bindingsTextures_.count) { + bindingsTextures_.count = index + 1; + } +} + +void D3D12ResourcesBinder::bindSamplerState(uint32_t index, ISamplerState* samplerState) { + if (index >= IGL_TEXTURE_SAMPLERS_MAX) { + IGL_LOG_ERROR("D3D12ResourcesBinder::bindSamplerState: index %u exceeds maximum %u\n", + index, + IGL_TEXTURE_SAMPLERS_MAX); + return; + } + + if (!samplerState) { + // Unbind sampler at this slot + if (index < bindingsSamplers_.count) { + bindingsSamplers_.samplers[index] = nullptr; + bindingsSamplers_.handles[index] = {}; + // Update count to highest bound slot + 1 + while (bindingsSamplers_.count > 0 && bindingsSamplers_.samplers[bindingsSamplers_.count - 1] == nullptr) { + bindingsSamplers_.count--; + } + } + dirtyFlags_ |= DirtyFlagBits_Samplers; + return; + } + + // Store sampler pointer for descriptor creation in updateBindings() + bindingsSamplers_.samplers[index] = samplerState; + + // Mark samplers dirty - descriptor will be created in updateBindings() + dirtyFlags_ |= DirtyFlagBits_Samplers; + + // Update binding count + if (index >= bindingsSamplers_.count) { + bindingsSamplers_.count = index + 1; + } +} + +void D3D12ResourcesBinder::bindBuffer(uint32_t index, + IBuffer* buffer, + size_t offset, + size_t size, + bool isUAV, + size_t elementStride) { + if (index >= IGL_BUFFER_BINDINGS_MAX) { + IGL_LOG_ERROR("D3D12ResourcesBinder::bindBuffer: index %u exceeds maximum %u\n", + index, + IGL_BUFFER_BINDINGS_MAX); + return; + } + + if (!buffer) { + // Unbind buffer/UAV at this slot + if (isUAV) { + if (index < bindingsUAVs_.count) { + bindingsUAVs_.buffers[index] = nullptr; + bindingsUAVs_.offsets[index] = 0; + bindingsUAVs_.elementStrides[index] = 0; + bindingsUAVs_.handles[index] = {}; + while (bindingsUAVs_.count > 0 && bindingsUAVs_.buffers[bindingsUAVs_.count - 1] == nullptr) { + bindingsUAVs_.count--; + } + } + dirtyFlags_ |= DirtyFlagBits_UAVs; + } else { + if (index < bindingsBuffers_.count) { + bindingsBuffers_.buffers[index] = nullptr; + bindingsBuffers_.addresses[index] = 0; + bindingsBuffers_.offsets[index] = 0; + bindingsBuffers_.sizes[index] = 0; + while (bindingsBuffers_.count > 0 && bindingsBuffers_.buffers[bindingsBuffers_.count - 1] == nullptr) { + bindingsBuffers_.count--; + } + } + dirtyFlags_ |= DirtyFlagBits_Buffers; + } + return; + } + + auto* d3dBuffer = static_cast(buffer); + ID3D12Resource* resource = d3dBuffer->getResource(); + + if (!resource) { + IGL_LOG_ERROR("D3D12ResourcesBinder::bindBuffer: buffer resource is null\n"); + return; + } + + if (isUAV) { + // Storage buffer (UAV) - store buffer pointer, offset, and element stride for descriptor creation + if (elementStride == 0) { + IGL_LOG_ERROR("D3D12ResourcesBinder::bindBuffer: UAV binding requires non-zero elementStride\n"); + return; + } + bindingsUAVs_.buffers[index] = buffer; + bindingsUAVs_.offsets[index] = offset; + bindingsUAVs_.elementStrides[index] = elementStride; + dirtyFlags_ |= DirtyFlagBits_UAVs; + if (index >= bindingsUAVs_.count) { + bindingsUAVs_.count = index + 1; + } + } else { + // Uniform buffer (CBV) - D3D12 requires 256-byte alignment for CBV addresses + // Compute base address (must be 256-byte aligned) + D3D12_GPU_VIRTUAL_ADDRESS baseAddress = resource->GetGPUVirtualAddress(); + D3D12_GPU_VIRTUAL_ADDRESS alignedAddress = (baseAddress + offset) & ~(kConstantBufferAlignment - 1); + + bindingsBuffers_.buffers[index] = buffer; + bindingsBuffers_.addresses[index] = alignedAddress; + bindingsBuffers_.offsets[index] = offset; + bindingsBuffers_.sizes[index] = size; + dirtyFlags_ |= DirtyFlagBits_Buffers; + if (index >= bindingsBuffers_.count) { + bindingsBuffers_.count = index + 1; + } + } +} + +bool D3D12ResourcesBinder::updateBindings(const RenderPipelineState* renderPipeline, Result* outResult) { + auto* commandList = commandBuffer_.getCommandList(); + auto& context = commandBuffer_.getContext(); + auto* device = context.getDevice(); + + if (!commandList || !device) { + if (outResult) { + *outResult = Result{Result::Code::RuntimeError, "Invalid command list or device"}; + } + return false; + } + + bool success = true; + + // Update textures (SRV table) + if (dirtyFlags_ & DirtyFlagBits_Textures) { + if (!updateTextureBindings(commandList, device, renderPipeline, outResult)) { + success = false; + } + } + + // Update samplers (sampler table) + if (dirtyFlags_ & DirtyFlagBits_Samplers) { + if (!updateSamplerBindings(commandList, device, renderPipeline, outResult)) { + success = false; + } + } + + // Update buffers (CBV table) + if (dirtyFlags_ & DirtyFlagBits_Buffers) { + if (!updateBufferBindings(commandList, device, renderPipeline, outResult)) { + success = false; + } + } + + // Update UAVs (UAV table for compute) + if ((dirtyFlags_ & DirtyFlagBits_UAVs) && isCompute_) { + if (!updateUAVBindings(commandList, device, outResult)) { + success = false; + } + } + + // Clear dirty flags + dirtyFlags_ = 0; + + return success; +} + +void D3D12ResourcesBinder::reset() { + bindingsTextures_ = {}; + bindingsSamplers_ = {}; + bindingsBuffers_ = {}; + bindingsUAVs_ = {}; + dirtyFlags_ = DirtyFlagBits_Textures | DirtyFlagBits_Samplers | DirtyFlagBits_Buffers | + DirtyFlagBits_UAVs; +} + +bool D3D12ResourcesBinder::updateTextureBindings(ID3D12GraphicsCommandList* cmdList, + ID3D12Device* device, + const RenderPipelineState* renderPipeline, + Result* outResult) { + if (bindingsTextures_.count == 0) { + return true; // Nothing to bind + } + + auto& context = commandBuffer_.getContext(); + + // Determine how many descriptors to allocate based on pipeline's root signature + // For graphics: Use pipeline's declared SRV range (0 to maxSRVSlot inclusive) + // For compute: Use bindingsTextures_.count (legacy sparse allocation) + uint32_t descriptorRangeSize = bindingsTextures_.count; + + if (!isCompute_ && renderPipeline) { + // Graphics pipeline: Match root signature's SRV descriptor range exactly + const UINT pipelineSRVCount = renderPipeline->getSRVDescriptorCount(); + if (pipelineSRVCount > 0) { + descriptorRangeSize = pipelineSRVCount; + IGL_D3D12_LOG_VERBOSE("updateTextureBindings: Using pipeline SRV range size=%u (bound=%u)\n", + descriptorRangeSize, bindingsTextures_.count); + } + } + + // Allocate a contiguous range of descriptors for all textures on a single page + // This ensures we can bind them as a single descriptor table + uint32_t baseDescriptorIndex = 0; + Result allocResult = + commandBuffer_.allocateCbvSrvUavRange(descriptorRangeSize, &baseDescriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR( + "D3D12ResourcesBinder: Failed to allocate contiguous SRV range (%u descriptors): %s\n", + descriptorRangeSize, + allocResult.message.c_str()); + if (outResult) { + *outResult = allocResult; + } + return false; + } + + // Create SRV descriptors for all texture slots from 0 to descriptorRangeSize-1. + // For unbound slots, emit a null SRV so that the descriptor table is fully + // initialized and matches the root signature descriptor range exactly. + for (uint32_t i = 0; i < descriptorRangeSize; ++i) { + const uint32_t descriptorIndex = baseDescriptorIndex + i; + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = + context.getCbvSrvUavCpuHandle(descriptorIndex); + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = + context.getCbvSrvUavGpuHandle(descriptorIndex); + + // Check if this slot is bound (may be null if beyond bindingsTextures_.count) + auto* texture = (i < bindingsTextures_.count) ? bindingsTextures_.textures[i] : nullptr; + if (!texture) { + // Create an explicit null SRV descriptor. D3D12 does not permit both + // the resource AND the descriptor pointer to be null, so we bind a + // well-formed descriptor with zeroed fields instead. This is treated as + // a null descriptor by the runtime. + D3D12_SHADER_RESOURCE_VIEW_DESC nullSrv = {}; + nullSrv.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + nullSrv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + nullSrv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + nullSrv.Texture2D.MipLevels = 1; + device->CreateShaderResourceView(nullptr, &nullSrv, cpuHandle); + D3D12Context::trackResourceCreation("SRV", 0); + // Only cache handle if within bounds (avoid out-of-bounds write) + if (i < IGL_TEXTURE_SAMPLERS_MAX) { + bindingsTextures_.handles[i] = gpuHandle; + } + continue; + } + + auto* d3dTexture = static_cast(texture); + ID3D12Resource* resource = d3dTexture->getResource(); + if (!resource) { + D3D12_SHADER_RESOURCE_VIEW_DESC nullSrv = {}; + nullSrv.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + nullSrv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + nullSrv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + nullSrv.Texture2D.MipLevels = 1; + device->CreateShaderResourceView(nullptr, &nullSrv, cpuHandle); + D3D12Context::trackResourceCreation("SRV", 0); + // Only cache handle if within bounds (avoid out-of-bounds write) + if (i < IGL_TEXTURE_SAMPLERS_MAX) { + bindingsTextures_.handles[i] = gpuHandle; + } + continue; + } + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = textureFormatToDXGIShaderResourceViewFormat(d3dTexture->getFormat()); + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + + auto resourceDesc = resource->GetDesc(); + const bool isView = d3dTexture->isView(); + const uint32_t mostDetailedMip = isView ? d3dTexture->getMipLevelOffset() : 0; + const uint32_t mipLevels = + isView ? d3dTexture->getNumMipLevelsInView() : d3dTexture->getNumMipLevels(); + + if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) { + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D; + srvDesc.Texture3D.MipLevels = mipLevels; + srvDesc.Texture3D.MostDetailedMip = mostDetailedMip; + } else if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D) { + const auto textureType = d3dTexture->getType(); + const bool isArrayTexture = + (isView && d3dTexture->getNumArraySlicesInView() > 0) || + (!isView && resourceDesc.DepthOrArraySize > 1); + + // Prioritize cube textures so that cubemaps created as 2D arrays + // with 6 faces are exposed as TEXTURECUBE to shaders that declare + // TextureCube / samplerCube. + if (textureType == TextureType::Cube) { + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE; + srvDesc.TextureCube.MostDetailedMip = mostDetailedMip; + srvDesc.TextureCube.MipLevels = mipLevels; + } else if (textureType == TextureType::TwoDArray || isArrayTexture) { + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY; + srvDesc.Texture2DArray.MostDetailedMip = mostDetailedMip; + srvDesc.Texture2DArray.MipLevels = mipLevels; + srvDesc.Texture2DArray.FirstArraySlice = + isView ? d3dTexture->getArraySliceOffset() : 0; + srvDesc.Texture2DArray.ArraySize = + isView ? d3dTexture->getNumArraySlicesInView() + : resourceDesc.DepthOrArraySize; + } else { + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srvDesc.Texture2D.MostDetailedMip = mostDetailedMip; + srvDesc.Texture2D.MipLevels = mipLevels; + } + } else { + IGL_LOG_ERROR("D3D12ResourcesBinder: Unsupported texture dimension %d\n", + resourceDesc.Dimension); + if (outResult) { + *outResult = + Result{Result::Code::Unsupported, "Unsupported texture dimension for SRV"}; + } + return false; + } + + device->CreateShaderResourceView(resource, &srvDesc, cpuHandle); + D3D12Context::trackResourceCreation("SRV", 0); + + // Cache the GPU handle (only if within bounds) + if (i < IGL_TEXTURE_SAMPLERS_MAX) { + bindingsTextures_.handles[i] = gpuHandle; + } + } + + // Bind the SRV table to the appropriate root parameter + // Use the first descriptor in the allocated range (baseDescriptorIndex) + D3D12_GPU_DESCRIPTOR_HANDLE tableBaseHandle = context.getCbvSrvUavGpuHandle(baseDescriptorIndex); + + if (isCompute_) { + cmdList->SetComputeRootDescriptorTable(kComputeRootParam_SRVTable, tableBaseHandle); + } else { + // Graphics pipeline: Query reflection-based root parameter index from pipeline + if (!renderPipeline) { + IGL_LOG_ERROR("updateTextureBindings: renderPipeline is NULL, cannot bind SRV table\n"); + } else { + const UINT srvTableIndex = renderPipeline->getSRVTableRootParameterIndex(); + IGL_D3D12_LOG_VERBOSE("updateTextureBindings: srvTableIndex=%u (UINT_MAX=%u)\n", srvTableIndex, UINT_MAX); + if (srvTableIndex != UINT_MAX) { + cmdList->SetGraphicsRootDescriptorTable(srvTableIndex, tableBaseHandle); + IGL_D3D12_LOG_VERBOSE("updateTextureBindings: Bound SRV table to root param %u (range size %u)\n", + srvTableIndex, descriptorRangeSize); + } else { + IGL_LOG_ERROR("updateTextureBindings: srvTableIndex is UINT_MAX, shader doesn't use SRVs?\n"); + } + } + } + + return true; +} + +bool D3D12ResourcesBinder::updateSamplerBindings(ID3D12GraphicsCommandList* cmdList, + ID3D12Device* device, + const RenderPipelineState* renderPipeline, + Result* outResult) { + if (bindingsSamplers_.count == 0) { + return true; // Nothing to bind + } + + auto& context = commandBuffer_.getContext(); + + // Determine how many descriptors to allocate based on pipeline's root signature + // For graphics: Use pipeline's declared sampler range (0 to maxSamplerSlot inclusive) + // For compute: Use bindingsSamplers_.count (legacy behavior) + uint32_t descriptorRangeSize = bindingsSamplers_.count; + + if (!isCompute_ && renderPipeline) { + // Graphics pipeline: Match root signature's sampler descriptor range exactly + const UINT pipelineSamplerCount = renderPipeline->getSamplerDescriptorCount(); + if (pipelineSamplerCount > 0) { + descriptorRangeSize = pipelineSamplerCount; + IGL_D3D12_LOG_VERBOSE("updateSamplerBindings: Using pipeline sampler range size=%u (bound=%u)\n", + descriptorRangeSize, bindingsSamplers_.count); + } + } + + // Get base sampler descriptor index for contiguous allocation + uint32_t baseSamplerIndex = commandBuffer_.getNextSamplerDescriptor(); + + // Create sampler descriptors for all slots from 0 to descriptorRangeSize-1 + // For unbound slots, create a default sampler to fill the table + for (uint32_t i = 0; i < descriptorRangeSize; ++i) { + const uint32_t descriptorIndex = baseSamplerIndex + i; + + // Get descriptor handles + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getSamplerCpuHandle(descriptorIndex); + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getSamplerGpuHandle(descriptorIndex); + + // Check if this slot is bound (may be null if beyond bindingsSamplers_.count) + auto* samplerState = (i < bindingsSamplers_.count) ? bindingsSamplers_.samplers[i] : nullptr; + + // Create sampler descriptor + D3D12_SAMPLER_DESC samplerDesc = {}; + if (samplerState) { + if (auto* d3dSampler = dynamic_cast(samplerState)) { + samplerDesc = d3dSampler->getDesc(); + } else { + // Fallback for bound but invalid sampler + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP; + samplerDesc.MipLODBias = 0.0f; + samplerDesc.MaxAnisotropy = 1; + samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + samplerDesc.BorderColor[0] = 0.0f; + samplerDesc.BorderColor[1] = 0.0f; + samplerDesc.BorderColor[2] = 0.0f; + samplerDesc.BorderColor[3] = 0.0f; + samplerDesc.MinLOD = 0.0f; + samplerDesc.MaxLOD = D3D12_FLOAT32_MAX; + } + } else { + // Unbound slot: Create default sampler for unused descriptor table entries + samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samplerDesc.MipLODBias = 0.0f; + samplerDesc.MaxAnisotropy = 1; + samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER; + samplerDesc.BorderColor[0] = 0.0f; + samplerDesc.BorderColor[1] = 0.0f; + samplerDesc.BorderColor[2] = 0.0f; + samplerDesc.BorderColor[3] = 0.0f; + samplerDesc.MinLOD = 0.0f; + samplerDesc.MaxLOD = D3D12_FLOAT32_MAX; + } + + device->CreateSampler(&samplerDesc, cpuHandle); + D3D12Context::trackResourceCreation("Sampler", 0); + + // Cache the GPU handle (only if within bounds) + if (i < IGL_TEXTURE_SAMPLERS_MAX) { + bindingsSamplers_.handles[i] = gpuHandle; + } + } + + // Update sampler descriptor counter to reserve the allocated range + commandBuffer_.getNextSamplerDescriptor() = baseSamplerIndex + descriptorRangeSize; + + // Bind the sampler table to the appropriate root parameter + // Use the first descriptor in the allocated range + D3D12_GPU_DESCRIPTOR_HANDLE tableBaseHandle = context.getSamplerGpuHandle(baseSamplerIndex); + if (isCompute_) { + cmdList->SetComputeRootDescriptorTable(kComputeRootParam_SamplerTable, tableBaseHandle); + } else { + // Graphics pipeline: Query reflection-based root parameter index from pipeline + if (!renderPipeline) { + IGL_LOG_ERROR("updateSamplerBindings: renderPipeline is NULL, cannot bind sampler table\n"); + } else { + const UINT samplerTableIndex = renderPipeline->getSamplerTableRootParameterIndex(); + IGL_D3D12_LOG_VERBOSE("updateSamplerBindings: samplerTableIndex=%u (UINT_MAX=%u)\n", samplerTableIndex, UINT_MAX); + if (samplerTableIndex != UINT_MAX) { + cmdList->SetGraphicsRootDescriptorTable(samplerTableIndex, tableBaseHandle); + IGL_D3D12_LOG_VERBOSE("updateSamplerBindings: Bound sampler table to root param %u (range size %u)\n", + samplerTableIndex, descriptorRangeSize); + } else { + IGL_LOG_ERROR("updateSamplerBindings: samplerTableIndex is UINT_MAX, shader doesn't use samplers?\n"); + } + } + } + + return true; +} + +bool D3D12ResourcesBinder::updateBufferBindings(ID3D12GraphicsCommandList* cmdList, + ID3D12Device* device, + const RenderPipelineState* renderPipeline, + Result* outResult) { + if (bindingsBuffers_.count == 0) { + return true; // Nothing to bind + } + + if (isCompute_) { + // Compute pipeline: all CBVs go through descriptor table (root parameter 3) + auto& context = commandBuffer_.getContext(); + + // Count bound CBVs and validate dense binding + uint32_t boundCbvCount = 0; + for (uint32_t i = 0; i < bindingsBuffers_.count; ++i) { + if (bindingsBuffers_.addresses[i] != 0) { + boundCbvCount++; + } + } + + if (boundCbvCount == 0) { + return true; // No CBVs to bind + } + + // CRITICAL VALIDATION: Enforce dense CBV binding for compute shaders + // ===================================================================== + // D3D12 descriptor tables bind contiguously starting from the base register. + // For compute CBVs, this means: + // - VALID: binding slots 0, 1, 2 (dense from b0) + // - INVALID: binding slots 0, 2 (gap at slot 1) + // - INVALID: binding slots 1, 2 (slot 0 not bound) + // + // This is FATAL validation - sparse bindings will return InvalidOperation error. + // Application code must ensure CBVs are bound densely from index 0 with no gaps. + // + // Rationale: When we call SetComputeRootDescriptorTable with N descriptors at base b0, + // D3D12 expects HLSL registers b0, b1, ..., b(N-1) to map 1:1 with descriptor table + // entries. Gaps would cause shader register mismatches and undefined behavior. + + if (bindingsBuffers_.addresses[0] == 0) { + IGL_LOG_ERROR("D3D12ResourcesBinder: Compute CBV bindings are sparse (slot 0 not bound). " + "D3D12 requires dense bindings starting at index 0.\n"); + if (outResult) { + *outResult = Result{Result::Code::InvalidOperation, + "Compute CBV bindings must be dense starting at slot 0"}; + } + return false; + } + + // Verify no gaps in binding range (all slots from 0 to boundCbvCount-1 must be bound) + for (uint32_t i = 1; i < boundCbvCount; ++i) { + if (bindingsBuffers_.addresses[i] == 0) { + IGL_LOG_ERROR("D3D12ResourcesBinder: Sparse compute CBV binding detected at slot %u " + "(expected dense binding through slot %u)\n", i, boundCbvCount - 1); + if (outResult) { + *outResult = Result{Result::Code::InvalidOperation, "Compute CBV bindings must be dense"}; + } + return false; + } + } + + // Allocate a contiguous range of descriptors for all CBVs on a single page + // This ensures we can bind them as a single descriptor table + uint32_t baseDescriptorIndex = 0; + Result allocResult = commandBuffer_.allocateCbvSrvUavRange(boundCbvCount, &baseDescriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR("D3D12ResourcesBinder: Failed to allocate contiguous CBV range (%u descriptors): %s\n", + boundCbvCount, + allocResult.message.c_str()); + if (outResult) { + *outResult = allocResult; + } + return false; + } + + // Create CBV descriptors for all bound buffers + uint32_t descriptorOffset = 0; + for (uint32_t i = 0; i < bindingsBuffers_.count; ++i) { + if (bindingsBuffers_.addresses[i] == 0) { + continue; // Skip unbound slots + } + + // Validate address alignment (D3D12 requires 256-byte alignment) + if (bindingsBuffers_.addresses[i] % kConstantBufferAlignment != 0) { + IGL_LOG_ERROR("D3D12ResourcesBinder: Constant buffer %u address 0x%llx is not 256-byte aligned\n", + i, bindingsBuffers_.addresses[i]); + if (outResult) { + *outResult = Result{Result::Code::ArgumentInvalid, + "Constant buffer address must be 256-byte aligned"}; + } + return false; + } + + // Validate size + size_t size = bindingsBuffers_.sizes[i]; + if (size > kMaxCBVSize) { + IGL_LOG_ERROR("D3D12ResourcesBinder: Constant buffer %u size (%zu bytes) exceeds 64 KB limit\n", + i, size); + if (outResult) { + *outResult = Result{Result::Code::ArgumentOutOfRange, + "Constant buffer size exceeds 64 KB D3D12 limit"}; + } + return false; + } + + // Align size to 256-byte boundary + const size_t alignedSize = (size + kConstantBufferAlignment - 1) & ~(kConstantBufferAlignment - 1); + + // Use contiguous descriptor index (baseDescriptorIndex + descriptorOffset) + const uint32_t descriptorIndex = baseDescriptorIndex + descriptorOffset; + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex); + + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = bindingsBuffers_.addresses[i]; + cbvDesc.SizeInBytes = static_cast(alignedSize); + + device->CreateConstantBufferView(&cbvDesc, cpuHandle); + descriptorOffset++; + } + + // Sanity check: descriptorOffset should match boundCbvCount after dense packing + IGL_DEBUG_ASSERT(descriptorOffset == boundCbvCount, + "CBV descriptor packing mismatch: allocated %u but created %u", + boundCbvCount, descriptorOffset); + + // Bind the CBV descriptor table to root parameter 3 + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(baseDescriptorIndex); + cmdList->SetComputeRootDescriptorTable(kComputeRootParam_CBVTable, gpuHandle); + } else { + // Graphics pipeline: Reflection-based CBV descriptor table binding + auto& context = commandBuffer_.getContext(); + + // Count bound CBVs + uint32_t boundCbvCount = 0; + for (uint32_t i = 0; i < bindingsBuffers_.count; ++i) { + if (bindingsBuffers_.addresses[i] != 0) { + boundCbvCount++; + } + } + + if (boundCbvCount == 0) { + return true; // No CBVs to bind + } + + // Determine how many descriptors to allocate based on pipeline's root signature + // Use pipeline's declared CBV range (0 to maxCBVSlot inclusive) to match root signature + uint32_t descriptorRangeSize = bindingsBuffers_.count; + + if (renderPipeline) { + const UINT pipelineCBVCount = renderPipeline->getCBVDescriptorCount(); + if (pipelineCBVCount > 0) { + descriptorRangeSize = pipelineCBVCount; + } + } + + // Allocate a contiguous range of descriptors from 0 to descriptorRangeSize-1 + uint32_t baseDescriptorIndex = 0; + Result allocResult = commandBuffer_.allocateCbvSrvUavRange(descriptorRangeSize, &baseDescriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR("D3D12ResourcesBinder: Failed to allocate CBV range (%u descriptors): %s\n", + descriptorRangeSize, + allocResult.message.c_str()); + if (outResult) { + *outResult = allocResult; + } + return false; + } + + IGL_D3D12_LOG_VERBOSE("updateBufferBindings: Graphics CBV binding - range b0-b%u, %u descriptors\n", + descriptorRangeSize - 1, descriptorRangeSize); + + // Create CBV descriptors for all slots from 0 to descriptorRangeSize-1 + // For unbound slots, create null descriptors to match the root signature range + for (uint32_t slotIndex = 0; slotIndex < descriptorRangeSize; ++slotIndex) { + const uint32_t descriptorIndex = baseDescriptorIndex + slotIndex; + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex); + + // Check if this slot is bound (may be null if beyond bindingsBuffers_.count) + const bool isSlotBound = (slotIndex < bindingsBuffers_.count) && + (bindingsBuffers_.addresses[slotIndex] != 0); + + if (isSlotBound) { + // Bound slot: Create valid CBV descriptor + // Validate address alignment (D3D12 requires 256-byte alignment) + if (bindingsBuffers_.addresses[slotIndex] % kConstantBufferAlignment != 0) { + IGL_LOG_ERROR("D3D12ResourcesBinder: Constant buffer %u address 0x%llx is not 256-byte aligned\n", + slotIndex, bindingsBuffers_.addresses[slotIndex]); + if (outResult) { + *outResult = Result{Result::Code::ArgumentInvalid, + "Constant buffer address must be 256-byte aligned"}; + } + return false; + } + + // Validate size + size_t size = bindingsBuffers_.sizes[slotIndex]; + if (size > kMaxCBVSize) { + IGL_LOG_ERROR("D3D12ResourcesBinder: Constant buffer %u size (%zu bytes) exceeds 64 KB limit\n", + slotIndex, size); + if (outResult) { + *outResult = Result{Result::Code::ArgumentOutOfRange, + "Constant buffer size exceeds 64 KB D3D12 limit"}; + } + return false; + } + + // Align size to 256-byte boundary + const size_t alignedSize = (size + kConstantBufferAlignment - 1) & ~(kConstantBufferAlignment - 1); + + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = bindingsBuffers_.addresses[slotIndex]; + cbvDesc.SizeInBytes = static_cast(alignedSize); + + device->CreateConstantBufferView(&cbvDesc, cpuHandle); + IGL_D3D12_LOG_VERBOSE("D3D12ResourcesBinder: Created CBV descriptor for b%u (address=0x%llx, size=%u)\n", + slotIndex, cbvDesc.BufferLocation, cbvDesc.SizeInBytes); + } else { + // Unbound slot: Create NULL descriptor to fill the root signature descriptor range + D3D12_CONSTANT_BUFFER_VIEW_DESC nullCbvDesc = {}; + nullCbvDesc.BufferLocation = 0; // NULL CBV + nullCbvDesc.SizeInBytes = D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16; // Minimum valid size + + device->CreateConstantBufferView(&nullCbvDesc, cpuHandle); + IGL_D3D12_LOG_VERBOSE("D3D12ResourcesBinder: Created NULL CBV descriptor for b%u\n", slotIndex); + } + } + + // Query pipeline for reflection-based CBV table root parameter index + if (!renderPipeline) { + IGL_LOG_ERROR("updateBufferBindings: renderPipeline is NULL, cannot bind CBV table\n"); + if (outResult) { + *outResult = Result{Result::Code::ArgumentInvalid, "renderPipeline is required for graphics CBV binding"}; + } + return false; + } + + const UINT cbvTableIndex = renderPipeline->getCBVTableRootParameterIndex(); + + if (cbvTableIndex != UINT_MAX) { + // Bind the CBV descriptor table to the reflection-based root parameter + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(baseDescriptorIndex); + cmdList->SetGraphicsRootDescriptorTable(cbvTableIndex, gpuHandle); + } + } + + return true; +} + +bool D3D12ResourcesBinder::updateUAVBindings(ID3D12GraphicsCommandList* cmdList, + ID3D12Device* device, + Result* outResult) { + if (bindingsUAVs_.count == 0) { + return true; // Nothing to bind + } + + // Validate dense bindings + if (bindingsUAVs_.buffers[0] == nullptr) { + IGL_LOG_ERROR("D3D12ResourcesBinder: UAV bindings are sparse (slot 0 not bound). " + "D3D12 requires dense bindings starting at index 0.\n"); + if (outResult) { + *outResult = Result{Result::Code::InvalidOperation, + "UAV bindings must be dense starting at slot 0"}; + } + return false; + } + + auto& context = commandBuffer_.getContext(); + + // Verify all UAVs are bound (dense binding requirement) + for (uint32_t i = 0; i < bindingsUAVs_.count; ++i) { + if (bindingsUAVs_.buffers[i] == nullptr) { + IGL_LOG_ERROR("D3D12ResourcesBinder: Sparse UAV binding detected at slot %u\n", i); + if (outResult) { + *outResult = Result{Result::Code::InvalidOperation, "UAV bindings must be dense"}; + } + return false; + } + } + + // Allocate a contiguous range of descriptors for all UAVs on a single page + // This ensures we can bind them as a single descriptor table + uint32_t baseDescriptorIndex = 0; + Result allocResult = commandBuffer_.allocateCbvSrvUavRange(bindingsUAVs_.count, &baseDescriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR("D3D12ResourcesBinder: Failed to allocate contiguous UAV range (%u descriptors): %s\n", + bindingsUAVs_.count, + allocResult.message.c_str()); + if (outResult) { + *outResult = allocResult; + } + return false; + } + + // Create UAV descriptors for all bound storage buffers + for (uint32_t i = 0; i < bindingsUAVs_.count; ++i) { + auto* buffer = bindingsUAVs_.buffers[i]; + auto* d3dBuffer = static_cast(buffer); + ID3D12Resource* resource = d3dBuffer->getResource(); + + const size_t offset = bindingsUAVs_.offsets[i]; + const size_t elementStride = bindingsUAVs_.elementStrides[i]; + const size_t bufferSize = d3dBuffer->getSizeInBytes(); + + // FATAL VALIDATION: UAV offset must be aligned to element stride + // This check immediately fails the entire updateBindings() call and returns InvalidOperation. + // Misaligned offsets would create invalid D3D12 UAV descriptors and cause device removal. + if (offset % elementStride != 0) { + IGL_LOG_ERROR( + "D3D12ResourcesBinder: UAV offset %zu is not aligned to element stride %zu. " + "This is a FATAL error - updateBindings() will fail.\n", + offset, + elementStride); + if (outResult) { + *outResult = Result{Result::Code::ArgumentInvalid, + "UAV offset must be aligned to element stride"}; + } + return false; + } + + // FATAL VALIDATION: UAV offset must be within buffer bounds + // This check immediately fails the entire updateBindings() call and returns ArgumentOutOfRange. + // Out-of-bounds offsets would access invalid memory and cause GPU faults. + if (offset > bufferSize) { + IGL_LOG_ERROR("D3D12ResourcesBinder: UAV offset %zu exceeds buffer size %zu. " + "This is a FATAL error - updateBindings() will fail.\n", + offset, + bufferSize); + if (outResult) { + *outResult = Result{Result::Code::ArgumentOutOfRange, "UAV offset exceeds buffer size"}; + } + return false; + } + + const size_t remaining = bufferSize - offset; + // FATAL VALIDATION: At least one full element must fit in remaining buffer space + // This check immediately fails the entire updateBindings() call and returns ArgumentOutOfRange. + // Creating a UAV with zero elements or partial elements would be invalid. + if (remaining < elementStride) { + IGL_LOG_ERROR("D3D12ResourcesBinder: UAV remaining size %zu < element stride %zu. " + "This is a FATAL error - updateBindings() will fail.\n", + remaining, + elementStride); + if (outResult) { + *outResult = Result{Result::Code::ArgumentOutOfRange, + "UAV remaining size less than element stride"}; + } + return false; + } + + // Use contiguous descriptor index (baseDescriptorIndex + i) + const uint32_t descriptorIndex = baseDescriptorIndex + i; + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex); + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex); + + // Create UAV descriptor for structured buffer + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_UNKNOWN; // Required for structured buffers + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Buffer.FirstElement = static_cast(offset / elementStride); + uavDesc.Buffer.NumElements = static_cast(remaining / elementStride); + uavDesc.Buffer.StructureByteStride = static_cast(elementStride); + uavDesc.Buffer.CounterOffsetInBytes = 0; + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + + device->CreateUnorderedAccessView(resource, nullptr, &uavDesc, cpuHandle); + D3D12Context::trackResourceCreation("UAV", 0); + + // Cache the GPU handle + bindingsUAVs_.handles[i] = gpuHandle; + } + + // Bind the UAV table to root parameter 1 (compute only) + cmdList->SetComputeRootDescriptorTable(kComputeRootParam_UAVTable, bindingsUAVs_.handles[0]); + + return true; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12ResourcesBinder.h b/src/igl/d3d12/D3D12ResourcesBinder.h new file mode 100644 index 0000000000..f549eff9da --- /dev/null +++ b/src/igl/d3d12/D3D12ResourcesBinder.h @@ -0,0 +1,353 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +class CommandBuffer; +class PipelineState; + +/** + * @brief Binding state for textures and their associated GPU descriptor handles + * + * Stores up to IGL_TEXTURE_SAMPLERS_MAX texture bindings (t0-t15 in HLSL). + * Each binding stores the texture pointer (for descriptor creation) and the + * resulting GPU descriptor handle (for root parameter binding). + */ +struct BindingsTextures { + ITexture* textures[IGL_TEXTURE_SAMPLERS_MAX] = {}; + D3D12_GPU_DESCRIPTOR_HANDLE handles[IGL_TEXTURE_SAMPLERS_MAX] = {}; + uint32_t count = 0; +}; + +/** + * @brief Binding state for samplers and their associated GPU descriptor handles + * + * Stores up to IGL_TEXTURE_SAMPLERS_MAX sampler bindings (s0-s15 in HLSL). + * Each binding stores the sampler state pointer (for descriptor creation) and the + * resulting GPU descriptor handle (for root parameter binding). + */ +struct BindingsSamplers { + ISamplerState* samplers[IGL_TEXTURE_SAMPLERS_MAX] = {}; + D3D12_GPU_DESCRIPTOR_HANDLE handles[IGL_TEXTURE_SAMPLERS_MAX] = {}; + uint32_t count = 0; +}; + +/** + * @brief Binding state for uniform buffers (constant buffers in D3D12) + * + * Stores up to IGL_BUFFER_BINDINGS_MAX buffer bindings (b0-b30 in HLSL). + * D3D12 has two binding methods: + * - Root CBV (direct GPU virtual address) - used for b0-b1 (legacy/frequent) + * - CBV descriptor table - used for b2+ (less frequent) + * + * This struct stores buffer pointers and GPU virtual addresses/sizes for all bindings. + * The actual binding method is determined by the pipeline root signature. + */ +struct BindingsBuffers { + IBuffer* buffers[IGL_BUFFER_BINDINGS_MAX] = {}; + D3D12_GPU_VIRTUAL_ADDRESS addresses[IGL_BUFFER_BINDINGS_MAX] = {}; + size_t offsets[IGL_BUFFER_BINDINGS_MAX] = {}; + size_t sizes[IGL_BUFFER_BINDINGS_MAX] = {}; + uint32_t count = 0; +}; + +/** + * @brief Binding state for unordered access views (UAVs) + * + * Stores up to IGL_BUFFER_BINDINGS_MAX UAV bindings (u0-u30 in HLSL). + * Used for storage buffers in compute shaders and writable resources. + * Each binding stores the buffer pointer, offset, element stride (for descriptor creation), + * and the resulting GPU descriptor handle (for root parameter binding). + */ +struct BindingsUAVs { + IBuffer* buffers[IGL_BUFFER_BINDINGS_MAX] = {}; + size_t offsets[IGL_BUFFER_BINDINGS_MAX] = {}; + size_t elementStrides[IGL_BUFFER_BINDINGS_MAX] = {}; // Byte stride per element for structured buffers + D3D12_GPU_DESCRIPTOR_HANDLE handles[IGL_BUFFER_BINDINGS_MAX] = {}; + uint32_t count = 0; +}; + +/** + * @brief Centralized resource binding management for D3D12 command encoders + * + * D3D12ResourcesBinder is the single entry point for shader-visible descriptor binding + * (CBV/SRV/UAV/Sampler) used by command encoders. It consolidates descriptor allocation + * and resource binding logic that was previously fragmented across RenderCommandEncoder + * and ComputeCommandEncoder. + * + * Note: RTV/DSV descriptors are managed separately by DescriptorHeapManager and bound + * directly by encoders during render pass setup. + * + * ============================================================================ + * ARCHITECTURE: D3D12 Descriptor Management Overview + * ============================================================================ + * + * The D3D12 backend uses THREE distinct descriptor management strategies: + * + * 1. **Transient Descriptor Allocator** (Per-Frame Heaps) + * - Location: D3D12Context::FrameContext, CommandBuffer allocation methods + * - Purpose: Shader-visible descriptors (CBV/SRV/UAV/Samplers) for rendering + * - Lifecycle: Allocated during command encoding, reset at frame boundary + * - Strategy: Linear allocation with dynamic multi-page growth + * - Used for: SRVs (textures), UAVs (storage buffers), CBVs, Samplers + * - Access: ONLY through D3D12ResourcesBinder (internal detail) + * + * 2. **Persistent Descriptor Allocator** (DescriptorHeapManager) + * - Location: DescriptorHeapManager class + * - Purpose: CPU-visible descriptors (RTV/DSV) with explicit lifecycle + * - Lifecycle: Allocated at resource creation, freed at resource destruction + * - Strategy: Free-list allocation with double-free protection + * - Used for: Render target views, depth-stencil views + * - Access: Directly by Texture and Framebuffer classes + * + * 3. **Root Descriptor Optimization** (Inline Binding) + * - Location: D3D12ResourcesBinder::updateBufferBindings() + * - Purpose: Bypass descriptor heaps for frequently-updated constant buffers + * - Lifecycle: No descriptor created - binds GPU virtual address directly + * - Strategy: D3D12 root CBVs (graphics b0-b1 only) + * - Used for: Hot-path constant buffers in graphics pipeline + * - Access: ONLY through D3D12ResourcesBinder (internal optimization) + * + * **Design Rationale**: + * - Strategies 1 and 2 handle DIFFERENT descriptor types (shader-visible vs CPU-visible) + * and lifecycles (transient vs persistent), so they cannot be merged + * - Strategy 3 is a D3D12-specific optimization, not a separate "system" + * - D3D12ResourcesBinder abstracts these details, providing a unified binding interface + * + * ============================================================================ + * Key Responsibilities of D3D12ResourcesBinder + * ============================================================================ + * + * - Cache resource bindings locally until updateBindings() is called + * - Allocate descriptors from per-frame shader-visible heaps on-demand (Strategy 1) + * - Create SRV/UAV/CBV/Sampler descriptors in GPU-visible heaps + * - Decide when to use root CBVs vs descriptor tables (Strategy 3) + * - Track dirty state to minimize descriptor creation and root parameter updates + * - Support both graphics and compute pipeline bind points + * - Transition texture resources to appropriate shader-resource states (buffers must + * be created in the correct state and are not transitioned here) + * + * Design principles: + * - **Lazy update**: Bindings are cached locally and only applied to GPU on updateBindings() + * - **Dirty tracking**: Only update descriptor sets when resources change + * - **Pipeline awareness**: Different root signature layouts for graphics vs compute + * - **Per-frame isolation**: Uses per-frame descriptor heaps to prevent race conditions + * - **Implementation hiding**: External code should never directly access CommandBuffer + * descriptor allocation methods - always go through ResourcesBinder + * + * Thread-safety: This class is NOT thread-safe. Each encoder should own its own binder. + * + * Dependencies: + * - T01: Correct descriptor binding patterns + * - T06: Shared helper utilities for descriptor creation + * - T16: Unified logging controls + * - T20: Consolidated descriptor management architecture + * + * Related to Vulkan ResourcesBinder pattern (src/igl/vulkan/ResourcesBinder.h) + */ +class D3D12ResourcesBinder final { + public: + /** + * @brief Initialize the resource binder for a command buffer + * + * @param commandBuffer Command buffer to bind resources to (provides context/device access) + * @param isCompute True for compute pipelines, false for graphics pipelines + */ + D3D12ResourcesBinder(CommandBuffer& commandBuffer, bool isCompute); + + /** + * @brief Bind a texture (shader resource view) to a specific slot + * + * Creates or updates an SRV descriptor in the per-frame CBV/SRV/UAV heap + * and caches the GPU handle. The binding is not applied to the command list + * until updateBindings() is called. + * + * @param index Texture slot (t0-t15 in HLSL, 0-based index) + * @param texture Texture to bind (nullptr to unbind) + */ + void bindTexture(uint32_t index, ITexture* texture); + + /** + * @brief Bind a sampler state to a specific slot + * + * Creates or updates a sampler descriptor in the per-frame sampler heap + * and caches the GPU handle. The binding is not applied to the command list + * until updateBindings() is called. + * + * @param index Sampler slot (s0-s15 in HLSL, 0-based index) + * @param samplerState Sampler state to bind (nullptr to unbind) + */ + void bindSamplerState(uint32_t index, ISamplerState* samplerState); + + /** + * @brief Bind a buffer (constant buffer or storage buffer) to a specific slot + * + * For uniform buffers (constant buffers): + * - Stores GPU virtual address for root CBV binding (b0-b1) + * - Or creates CBV descriptor for descriptor table binding (b2+) + * - **COMPUTE SHADERS**: CBV bindings MUST be dense starting from index 0 with no gaps. + * For example, binding slots 0, 1, 2 is valid; binding 0, 2 (skipping 1) will fail. + * This constraint is enforced because descriptor tables bind contiguously from b0. + * + * For storage buffers: + * - Creates UAV descriptor in the per-frame CBV/SRV/UAV heap + * - Requires elementStride for structured buffer descriptor creation + * + * The binding is not applied to the command list until updateBindings() is called. + * + * @param index Buffer slot (b0-b30 for CBVs, u0-u30 for UAVs in HLSL) + * @param buffer Buffer to bind (nullptr to unbind) + * @param offset Offset in bytes into the buffer + * @param size Size in bytes to bind + * @param isUAV True to bind as UAV (storage buffer), false for CBV (uniform buffer) + * @param elementStride For UAVs: byte stride per element for structured buffers (required) + */ + void bindBuffer(uint32_t index, + IBuffer* buffer, + size_t offset, + size_t size, + bool isUAV = false, + size_t elementStride = 0); + + /** + * @brief Apply all pending bindings to the command list + * + * This method performs the actual GPU binding work: + * 1. Creates descriptors for any dirty bindings (textures/samplers/buffers/UAVs) + * 2. Sets root descriptor tables (SetGraphicsRootDescriptorTable/SetComputeRootDescriptorTable) + * 3. Sets root constants/root CBVs if applicable + * 4. Clears dirty flags + * + * This should be called before draw/dispatch commands to ensure all bindings are active. + * + * @param renderPipeline For graphics pipelines: current pipeline to query reflection-based root parameter indices. + * For compute pipelines: pass nullptr (uses hardcoded layout). + * @param outResult Optional result for error reporting (e.g., descriptor heap overflow). + * If nullptr, caller receives only success/fail boolean. If non-null, + * all failure paths populate both error code and diagnostic message. + * @return true if bindings applied successfully, false on error + */ + [[nodiscard]] bool updateBindings(const class RenderPipelineState* renderPipeline = nullptr, + Result* outResult = nullptr); + + /** + * @brief Reset all bindings and dirty flags + * + * Called at the start of a new frame or when switching pipelines to ensure + * clean binding state. Does not affect the underlying descriptor heaps. + */ + void reset(); + + private: + /** + * @brief Bitwise flags for dirty resource types + * + * Used to track which resource types have been modified since the last + * updateBindings() call, allowing us to skip descriptor creation and + * root parameter updates for unchanged resources. + */ + enum DirtyFlagBits : uint8_t { + DirtyFlagBits_Textures = 1 << 0, + DirtyFlagBits_Samplers = 1 << 1, + DirtyFlagBits_Buffers = 1 << 2, + DirtyFlagBits_UAVs = 1 << 3, + }; + + /** + * @brief Update texture bindings (SRV descriptor table) + * + * Creates SRV descriptors for all bound textures in the per-frame heap + * and sets the root descriptor table parameter. + * + * @param cmdList Command list to update + * @param device D3D12 device for descriptor creation + * @param renderPipeline Pipeline to query reflection-based root parameter indices (graphics only) + * @param outResult Optional result for error reporting + * @return true on success, false on error + */ + [[nodiscard]] bool updateTextureBindings(ID3D12GraphicsCommandList* cmdList, + ID3D12Device* device, + const class RenderPipelineState* renderPipeline, + Result* outResult); + + /** + * @brief Update sampler bindings (sampler descriptor table) + * + * Creates sampler descriptors for all bound samplers in the per-frame heap + * and sets the root descriptor table parameter. + * + * @param cmdList Command list to update + * @param device D3D12 device for descriptor creation + * @param renderPipeline Pipeline to query reflection-based root parameter indices (graphics only) + * @param outResult Optional result for error reporting + * @return true on success, false on error + */ + [[nodiscard]] bool updateSamplerBindings(ID3D12GraphicsCommandList* cmdList, + ID3D12Device* device, + const class RenderPipelineState* renderPipeline, + Result* outResult); + + /** + * @brief Update buffer bindings (CBV descriptor table) + * + * For graphics pipelines: + * - Creates CBV descriptor table for all bound CBVs + * - Queries pipeline for reflection-based root parameter index + * + * For compute pipelines: + * - Creates CBV descriptor table for all bindings (hardcoded root parameter) + * + * @param cmdList Command list to update + * @param device D3D12 device for descriptor creation + * @param renderPipeline Pipeline to query reflection-based root parameter indices (graphics only) + * @param outResult Optional result for error reporting + * @return true on success, false on error + */ + [[nodiscard]] bool updateBufferBindings(ID3D12GraphicsCommandList* cmdList, + ID3D12Device* device, + const class RenderPipelineState* renderPipeline, + Result* outResult); + + /** + * @brief Update UAV bindings (UAV descriptor table for compute shaders) + * + * Creates UAV descriptors for all bound storage buffers in the per-frame heap + * and sets the root descriptor table parameter. Only used for compute pipelines. + * + * @param cmdList Command list to update + * @param device D3D12 device for descriptor creation + * @param outResult Optional result for error reporting + * @return true on success, false on error + */ + [[nodiscard]] bool updateUAVBindings(ID3D12GraphicsCommandList* cmdList, + ID3D12Device* device, + Result* outResult); + + CommandBuffer& commandBuffer_; + bool isCompute_ = false; + + // Cached binding state + BindingsTextures bindingsTextures_; + BindingsSamplers bindingsSamplers_; + BindingsBuffers bindingsBuffers_; + BindingsUAVs bindingsUAVs_; + + // Dirty tracking flags + uint32_t dirtyFlags_ = DirtyFlagBits_Textures | DirtyFlagBits_Samplers | + DirtyFlagBits_Buffers | DirtyFlagBits_UAVs; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12RootSignatureKey.h b/src/igl/d3d12/D3D12RootSignatureKey.h new file mode 100644 index 0000000000..9dcf918cc2 --- /dev/null +++ b/src/igl/d3d12/D3D12RootSignatureKey.h @@ -0,0 +1,325 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace igl::d3d12 { + +/** + * @brief Key structure for root signature cache lookup based on shader resource usage + * + * This structure captures the essential shader resource requirements needed to construct + * a compatible root signature. It enables Vulkan-style dynamic root signature selection + * where the root signature is chosen based on actual shader resource usage rather than + * being globally fixed. + * + * The key includes: + * - Push constant configuration (slot and size) + * - Resource slot usage (CBV/SRV/UAV/Sampler ranges) + * - Flags for shader visibility and optimization + * + * Root signatures with the same key are compatible and can be reused across pipelines. + */ +struct D3D12RootSignatureKey { + // Push constants configuration + bool hasPushConstants = false; + UINT pushConstantSlot = UINT_MAX; // Which b# register + UINT pushConstantSize = 0; // Size in 32-bit values + + // Resource slot ranges (sorted for consistent hashing) + std::vector usedCBVSlots; + std::vector usedSRVSlots; + std::vector usedUAVSlots; + std::vector usedSamplerSlots; + + // Minimum / maximum slot indices (for determining descriptor table windows) + UINT minCBVSlot = 0; + UINT maxCBVSlot = 0; + UINT minSRVSlot = 0; + UINT maxSRVSlot = 0; + UINT minUAVSlot = 0; + UINT maxUAVSlot = 0; + UINT minSamplerSlot = 0; + UINT maxSamplerSlot = 0; + + // Root signature flags + D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + /** + * @brief Construct key from vertex + fragment shader reflection + * + * Merges resource usage from both shaders to create a unified key. + * Handles push constant slot conflicts (prefers vertex shader if both use different slots). + */ + static D3D12RootSignatureKey fromShaderReflection( + const ShaderModule::ShaderReflectionInfo* vsReflection, + const ShaderModule::ShaderReflectionInfo* psReflection); + + /** + * @brief Construct key from compute shader reflection + */ + static D3D12RootSignatureKey fromShaderReflection( + const ShaderModule::ShaderReflectionInfo* csReflection); + + bool operator==(const D3D12RootSignatureKey& other) const { + return hasPushConstants == other.hasPushConstants && + pushConstantSlot == other.pushConstantSlot && + pushConstantSize == other.pushConstantSize && + usedCBVSlots == other.usedCBVSlots && + usedSRVSlots == other.usedSRVSlots && + usedUAVSlots == other.usedUAVSlots && + usedSamplerSlots == other.usedSamplerSlots && + minCBVSlot == other.minCBVSlot && + maxCBVSlot == other.maxCBVSlot && + minSRVSlot == other.minSRVSlot && + maxSRVSlot == other.maxSRVSlot && + minUAVSlot == other.minUAVSlot && + maxUAVSlot == other.maxUAVSlot && + minSamplerSlot == other.minSamplerSlot && + maxSamplerSlot == other.maxSamplerSlot && + flags == other.flags; + } + + struct HashFunction { + size_t operator()(const D3D12RootSignatureKey& key) const { + size_t hash = 0; + + // Hash push constants + hashCombine(hash, key.hasPushConstants ? 1 : 0); + hashCombine(hash, static_cast(key.pushConstantSlot)); + hashCombine(hash, static_cast(key.pushConstantSize)); + + // Hash resource slots + for (UINT slot : key.usedCBVSlots) { + hashCombine(hash, static_cast(slot)); + } + for (UINT slot : key.usedSRVSlots) { + hashCombine(hash, static_cast(slot)); + } + for (UINT slot : key.usedUAVSlots) { + hashCombine(hash, static_cast(slot)); + } + for (UINT slot : key.usedSamplerSlots) { + hashCombine(hash, static_cast(slot)); + } + + // Hash min/max slots + hashCombine(hash, static_cast(key.minCBVSlot)); + hashCombine(hash, static_cast(key.maxCBVSlot)); + hashCombine(hash, static_cast(key.minSRVSlot)); + hashCombine(hash, static_cast(key.maxSRVSlot)); + hashCombine(hash, static_cast(key.minUAVSlot)); + hashCombine(hash, static_cast(key.maxUAVSlot)); + hashCombine(hash, static_cast(key.minSamplerSlot)); + hashCombine(hash, static_cast(key.maxSamplerSlot)); + + // Hash flags + hashCombine(hash, static_cast(key.flags)); + + return hash; + } + }; + +private: + // Helper to merge two slot vectors and sort + static std::vector mergeAndSort(const std::vector& a, const std::vector& b) { + std::vector result = a; + result.insert(result.end(), b.begin(), b.end()); + std::sort(result.begin(), result.end()); + // Remove duplicates + result.erase(std::unique(result.begin(), result.end()), result.end()); + return result; + } +}; + +// Implementation of fromShaderReflection for graphics pipeline +inline D3D12RootSignatureKey D3D12RootSignatureKey::fromShaderReflection( + const ShaderModule::ShaderReflectionInfo* vsReflection, + const ShaderModule::ShaderReflectionInfo* psReflection) { + D3D12RootSignatureKey key; + + // Merge push constants (prefer vertex shader if conflict) + if (vsReflection && vsReflection->hasPushConstants) { + key.hasPushConstants = true; + key.pushConstantSlot = vsReflection->pushConstantSlot; + key.pushConstantSize = vsReflection->pushConstantSize; + } else if (psReflection && psReflection->hasPushConstants) { + key.hasPushConstants = true; + key.pushConstantSlot = psReflection->pushConstantSlot; + key.pushConstantSize = psReflection->pushConstantSize; + } + + // Merge resource slots + // IMPORTANT: Exclude push constant slot from CBV descriptor table + // Push constants use inline root constants (D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS), + // not a CBV descriptor. Including the push constant slot in usedCBVSlots would cause + // a root signature overlap error. + if (vsReflection && psReflection) { + key.usedCBVSlots = mergeAndSort(vsReflection->usedCBVSlots, psReflection->usedCBVSlots); + key.usedSRVSlots = mergeAndSort(vsReflection->usedSRVSlots, psReflection->usedSRVSlots); + key.usedUAVSlots = mergeAndSort(vsReflection->usedUAVSlots, psReflection->usedUAVSlots); + key.usedSamplerSlots = mergeAndSort(vsReflection->usedSamplerSlots, psReflection->usedSamplerSlots); + + key.maxCBVSlot = std::max(vsReflection->maxCBVSlot, psReflection->maxCBVSlot); + key.maxSRVSlot = std::max(vsReflection->maxSRVSlot, psReflection->maxSRVSlot); + key.maxUAVSlot = std::max(vsReflection->maxUAVSlot, psReflection->maxUAVSlot); + key.maxSamplerSlot = std::max(vsReflection->maxSamplerSlot, psReflection->maxSamplerSlot); + } else if (vsReflection) { + key.usedCBVSlots = vsReflection->usedCBVSlots; + key.usedSRVSlots = vsReflection->usedSRVSlots; + key.usedUAVSlots = vsReflection->usedUAVSlots; + key.usedSamplerSlots = vsReflection->usedSamplerSlots; + + std::sort(key.usedCBVSlots.begin(), key.usedCBVSlots.end()); + key.usedCBVSlots.erase(std::unique(key.usedCBVSlots.begin(), key.usedCBVSlots.end()), + key.usedCBVSlots.end()); + + std::sort(key.usedSRVSlots.begin(), key.usedSRVSlots.end()); + key.usedSRVSlots.erase(std::unique(key.usedSRVSlots.begin(), key.usedSRVSlots.end()), + key.usedSRVSlots.end()); + + std::sort(key.usedUAVSlots.begin(), key.usedUAVSlots.end()); + key.usedUAVSlots.erase(std::unique(key.usedUAVSlots.begin(), key.usedUAVSlots.end()), + key.usedUAVSlots.end()); + + std::sort(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end()); + key.usedSamplerSlots.erase(std::unique(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end()), + key.usedSamplerSlots.end()); + + key.maxCBVSlot = vsReflection->maxCBVSlot; + key.maxSRVSlot = vsReflection->maxSRVSlot; + key.maxUAVSlot = vsReflection->maxUAVSlot; + key.maxSamplerSlot = vsReflection->maxSamplerSlot; + } else if (psReflection) { + key.usedCBVSlots = psReflection->usedCBVSlots; + key.usedSRVSlots = psReflection->usedSRVSlots; + key.usedUAVSlots = psReflection->usedUAVSlots; + key.usedSamplerSlots = psReflection->usedSamplerSlots; + + std::sort(key.usedCBVSlots.begin(), key.usedCBVSlots.end()); + key.usedCBVSlots.erase(std::unique(key.usedCBVSlots.begin(), key.usedCBVSlots.end()), + key.usedCBVSlots.end()); + + std::sort(key.usedSRVSlots.begin(), key.usedSRVSlots.end()); + key.usedSRVSlots.erase(std::unique(key.usedSRVSlots.begin(), key.usedSRVSlots.end()), + key.usedSRVSlots.end()); + + std::sort(key.usedUAVSlots.begin(), key.usedUAVSlots.end()); + key.usedUAVSlots.erase(std::unique(key.usedUAVSlots.begin(), key.usedUAVSlots.end()), + key.usedUAVSlots.end()); + + std::sort(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end()); + key.usedSamplerSlots.erase(std::unique(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end()), + key.usedSamplerSlots.end()); + + key.maxCBVSlot = psReflection->maxCBVSlot; + key.maxSRVSlot = psReflection->maxSRVSlot; + key.maxUAVSlot = psReflection->maxUAVSlot; + key.maxSamplerSlot = psReflection->maxSamplerSlot; + } + + // Compute min slots (if any resources are present) + if (!key.usedCBVSlots.empty()) { + key.minCBVSlot = key.usedCBVSlots.front(); + } + if (!key.usedSRVSlots.empty()) { + key.minSRVSlot = key.usedSRVSlots.front(); + } + if (!key.usedUAVSlots.empty()) { + key.minUAVSlot = key.usedUAVSlots.front(); + } + if (!key.usedSamplerSlots.empty()) { + key.minSamplerSlot = key.usedSamplerSlots.front(); + } + + // Remove push constant slot from CBV slots (if present) + // Push constants are bound via root constants, not CBV descriptor table + if (key.hasPushConstants) { + key.usedCBVSlots.erase( + std::remove(key.usedCBVSlots.begin(), key.usedCBVSlots.end(), key.pushConstantSlot), + key.usedCBVSlots.end()); + } + + return key; +} + +// Implementation of fromShaderReflection for compute pipeline +inline D3D12RootSignatureKey D3D12RootSignatureKey::fromShaderReflection( + const ShaderModule::ShaderReflectionInfo* csReflection) { + D3D12RootSignatureKey key; + + if (!csReflection) { + return key; + } + + // Copy push constants + key.hasPushConstants = csReflection->hasPushConstants; + key.pushConstantSlot = csReflection->pushConstantSlot; + key.pushConstantSize = csReflection->pushConstantSize; + + // Copy resource slots + key.usedCBVSlots = csReflection->usedCBVSlots; + key.usedSRVSlots = csReflection->usedSRVSlots; + key.usedUAVSlots = csReflection->usedUAVSlots; + key.usedSamplerSlots = csReflection->usedSamplerSlots; + + // Ensure resource slot lists are sorted and unique for stable hashing / min/max tracking + std::sort(key.usedCBVSlots.begin(), key.usedCBVSlots.end()); + key.usedCBVSlots.erase(std::unique(key.usedCBVSlots.begin(), key.usedCBVSlots.end()), + key.usedCBVSlots.end()); + + std::sort(key.usedSRVSlots.begin(), key.usedSRVSlots.end()); + key.usedSRVSlots.erase(std::unique(key.usedSRVSlots.begin(), key.usedSRVSlots.end()), + key.usedSRVSlots.end()); + + std::sort(key.usedUAVSlots.begin(), key.usedUAVSlots.end()); + key.usedUAVSlots.erase(std::unique(key.usedUAVSlots.begin(), key.usedUAVSlots.end()), + key.usedUAVSlots.end()); + + std::sort(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end()); + key.usedSamplerSlots.erase(std::unique(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end()), + key.usedSamplerSlots.end()); + + key.maxCBVSlot = csReflection->maxCBVSlot; + key.maxSRVSlot = csReflection->maxSRVSlot; + key.maxUAVSlot = csReflection->maxUAVSlot; + key.maxSamplerSlot = csReflection->maxSamplerSlot; + + // Remove push constant slot from CBV slots (if present) + // Push constants are bound via root constants, not CBV descriptor table + if (key.hasPushConstants) { + key.usedCBVSlots.erase( + std::remove(key.usedCBVSlots.begin(), key.usedCBVSlots.end(), key.pushConstantSlot), + key.usedCBVSlots.end()); + } + + // Compute min slots (if any resources are present) + if (!key.usedCBVSlots.empty()) { + key.minCBVSlot = key.usedCBVSlots.front(); + } + if (!key.usedSRVSlots.empty()) { + key.minSRVSlot = key.usedSRVSlots.front(); + } + if (!key.usedUAVSlots.empty()) { + key.minUAVSlot = key.usedUAVSlots.front(); + } + if (!key.usedSamplerSlots.empty()) { + key.minSamplerSlot = key.usedSamplerSlots.front(); + } + + // Compute shaders don't need input assembler + key.flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + return key; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12SamplerCache.h b/src/igl/d3d12/D3D12SamplerCache.h new file mode 100644 index 0000000000..faa0b610fa --- /dev/null +++ b/src/igl/d3d12/D3D12SamplerCache.h @@ -0,0 +1,219 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace igl::d3d12 { + +struct SamplerCacheStats { + size_t cacheHits = 0; + size_t cacheMisses = 0; + size_t activeSamplers = 0; + float hitRate = 0.0f; +}; + +class D3D12SamplerCache { + public: + D3D12SamplerCache() = default; + + [[nodiscard]] std::shared_ptr createSamplerState( + const SamplerStateDesc& desc, + Result* IGL_NULLABLE outResult) const { + const size_t samplerHash = std::hash{}(desc); + + { + std::lock_guard lock(samplerCacheMutex_); + + auto it = samplerCache_.find(samplerHash); + if (it != samplerCache_.end()) { + std::shared_ptr existingSampler = it->second.lock(); + + if (existingSampler) { + samplerCacheHits_++; + const size_t totalRequests = + samplerCacheHits_ + samplerCacheMisses_; + IGL_D3D12_LOG_VERBOSE( + "D3D12SamplerCache::createSamplerState: Cache HIT " + "(hash=0x%zx, hits=%zu, misses=%zu, hit rate=%.1f%%)\n", + samplerHash, + samplerCacheHits_, + samplerCacheMisses_, + totalRequests > 0 + ? 100.0 * samplerCacheHits_ / + static_cast(totalRequests) + : 0.0); + Result::setOk(outResult); + // Upcast shared_ptr -> shared_ptr + return existingSampler; + } else { + samplerCache_.erase(it); + } + } + } + + D3D12_SAMPLER_DESC samplerDesc = {}; + + auto toD3D12Address = [](SamplerAddressMode m) { + switch (m) { + case SamplerAddressMode::Repeat: + return D3D12_TEXTURE_ADDRESS_MODE_WRAP; + case SamplerAddressMode::MirrorRepeat: + return D3D12_TEXTURE_ADDRESS_MODE_MIRROR; + case SamplerAddressMode::Clamp: + return D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + default: + return D3D12_TEXTURE_ADDRESS_MODE_WRAP; + } + }; + + auto toD3D12Compare = [](CompareFunction f) { + switch (f) { + case CompareFunction::Less: + return D3D12_COMPARISON_FUNC_LESS; + case CompareFunction::LessEqual: + return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case CompareFunction::Greater: + return D3D12_COMPARISON_FUNC_GREATER; + case CompareFunction::GreaterEqual: + return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case CompareFunction::Equal: + return D3D12_COMPARISON_FUNC_EQUAL; + case CompareFunction::NotEqual: + return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case CompareFunction::AlwaysPass: + return D3D12_COMPARISON_FUNC_ALWAYS; + case CompareFunction::Never: + return D3D12_COMPARISON_FUNC_NEVER; + default: + return D3D12_COMPARISON_FUNC_NEVER; + } + }; + + const bool useComparison = desc.depthCompareEnabled; + + const bool minLinear = (desc.minFilter != SamplerMinMagFilter::Nearest); + const bool magLinear = (desc.magFilter != SamplerMinMagFilter::Nearest); + const bool mipLinear = (desc.mipFilter == SamplerMipFilter::Linear); + const bool anisotropic = (desc.maxAnisotropic > 1); + + if (anisotropic) { + samplerDesc.Filter = useComparison + ? D3D12_FILTER_COMPARISON_ANISOTROPIC + : D3D12_FILTER_ANISOTROPIC; + samplerDesc.MaxAnisotropy = + std::min(desc.maxAnisotropic, 16); + } else { + D3D12_FILTER filter = D3D12_FILTER_MIN_MAG_MIP_POINT; + if (minLinear && magLinear && mipLinear) { + filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + } else if (minLinear && magLinear && !mipLinear) { + filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT; + } else if (minLinear && !magLinear && mipLinear) { + filter = D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR; + } else if (minLinear && !magLinear && !mipLinear) { + filter = D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT; + } else if (!minLinear && magLinear && mipLinear) { + filter = D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR; + } else if (!minLinear && magLinear && !mipLinear) { + filter = D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT; + } else if (!minLinear && !magLinear && mipLinear) { + filter = D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR; + } + + if (useComparison) { + filter = static_cast( + filter | D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT - + D3D12_FILTER_MIN_MAG_MIP_POINT); + } + samplerDesc.Filter = filter; + samplerDesc.MaxAnisotropy = 1; + } + + samplerDesc.AddressU = toD3D12Address(desc.addressModeU); + samplerDesc.AddressV = toD3D12Address(desc.addressModeV); + samplerDesc.AddressW = toD3D12Address(desc.addressModeW); + samplerDesc.MipLODBias = 0.0f; + // For comparison samplers, use the requested depth comparison function. + // For non-comparison samplers, set ComparisonFunc to NEVER so that the + // debug layer does not flag spurious D3D12_MESSAGE_ID 1361 warnings when + // Filter is not a comparison filter. The value is ignored in this case. + samplerDesc.ComparisonFunc = + useComparison ? toD3D12Compare(desc.depthCompareFunction) + : D3D12_COMPARISON_FUNC_NEVER; + samplerDesc.BorderColor[0] = 0.0f; + samplerDesc.BorderColor[1] = 0.0f; + samplerDesc.BorderColor[2] = 0.0f; + samplerDesc.BorderColor[3] = 0.0f; + samplerDesc.MinLOD = static_cast(desc.mipLodMin); + samplerDesc.MaxLOD = static_cast(desc.mipLodMax); + + auto concreteSampler = std::make_shared(samplerDesc); + std::shared_ptr samplerState = + std::static_pointer_cast(concreteSampler); + + { + std::lock_guard lock(samplerCacheMutex_); + samplerCache_[samplerHash] = concreteSampler; + samplerCacheMisses_++; + IGL_D3D12_LOG_VERBOSE( + "D3D12SamplerCache::createSamplerState: Cache MISS " + "(hash=0x%zx, total misses=%zu)\n", + samplerHash, + samplerCacheMisses_); + } + + Result::setOk(outResult); + return samplerState; + } + + [[nodiscard]] SamplerCacheStats getStats() const { + std::lock_guard lock(samplerCacheMutex_); + + SamplerCacheStats stats; + stats.cacheHits = samplerCacheHits_; + stats.cacheMisses = samplerCacheMisses_; + + stats.activeSamplers = 0; + for (const auto& [hash, weakPtr] : samplerCache_) { + (void)hash; + if (!weakPtr.expired()) { + stats.activeSamplers++; + } + } + + const size_t totalRequests = stats.cacheHits + stats.cacheMisses; + if (totalRequests > 0) { + stats.hitRate = 100.0f * static_cast(stats.cacheHits) / + static_cast(totalRequests); + } + + return stats; + } + + void clear() { + std::lock_guard lock(samplerCacheMutex_); + samplerCache_.clear(); + samplerCacheHits_ = 0; + samplerCacheMisses_ = 0; + } + + private: + mutable std::unordered_map> samplerCache_; + mutable std::mutex samplerCacheMutex_; + mutable size_t samplerCacheHits_ = 0; + mutable size_t samplerCacheMisses_ = 0; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12StagingDevice.cpp b/src/igl/d3d12/D3D12StagingDevice.cpp new file mode 100644 index 0000000000..df76ef8bb4 --- /dev/null +++ b/src/igl/d3d12/D3D12StagingDevice.cpp @@ -0,0 +1,236 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +D3D12StagingDevice::D3D12StagingDevice(ID3D12Device* device, + ID3D12Fence* fence, + UploadRingBuffer* uploadRingBuffer) + : device_(device), fence_(fence), uploadRingBuffer_(uploadRingBuffer) { + IGL_DEBUG_ASSERT(device_); + IGL_DEBUG_ASSERT(fence_); + + IGL_D3D12_LOG_VERBOSE("D3D12StagingDevice: Initialized (ring buffer: %s)\n", + uploadRingBuffer_ ? "yes" : "no"); +} + +D3D12StagingDevice::~D3D12StagingDevice() { + // Wait for all in-flight buffers to complete + if (fence_) { + for (const auto& entry : inFlightBuffers_) { + if (fence_->GetCompletedValue() < entry.fenceValue) { + FenceWaiter waiter(fence_, entry.fenceValue); + Result waitResult = waiter.wait(); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("D3D12StagingDevice::~D3D12StagingDevice() - Fence wait failed during cleanup: %s\n", + waitResult.message.c_str()); + } + } + } + } + + IGL_D3D12_LOG_VERBOSE("D3D12StagingDevice: Destroyed\n"); +} + +D3D12StagingDevice::StagingBuffer D3D12StagingDevice::allocateUpload(size_t size, + size_t alignment, + uint64_t fenceValue) { + // Try ring buffer first for small allocations + if (uploadRingBuffer_ && size <= kMaxRingBufferAllocation) { + auto ringAlloc = uploadRingBuffer_->allocate(size, alignment, fenceValue); + if (ringAlloc.valid) { + StagingBuffer result; + result.buffer = ringAlloc.buffer; + result.mappedPtr = ringAlloc.cpuAddress; + result.size = ringAlloc.size; + result.offset = ringAlloc.offset; + result.valid = true; + result.isFromRingBuffer = true; + return result; + } + } + + // Fall back to dedicated staging buffer + std::lock_guard lock(poolMutex_); + + // Reclaim completed buffers + reclaimCompletedBuffers(); + + igl::d3d12::ComPtr buffer; + + // Try to find a reusable buffer + if (!findReusableBuffer(size, false, &buffer)) { + // Create new buffer + Result result = createStagingBuffer(size, false, &buffer); + if (!result.isOk() || !buffer.Get()) { + return StagingBuffer{}; // Return invalid buffer + } + } + + // Map the buffer + void* mappedPtr = nullptr; + D3D12_RANGE readRange{0, 0}; // Not reading + HRESULT hr = buffer->Map(0, &readRange, &mappedPtr); + if (FAILED(hr) || !mappedPtr) { + IGL_LOG_ERROR("D3D12StagingDevice: Failed to map upload buffer\n"); + return StagingBuffer{}; + } + + StagingBuffer staging; + staging.buffer = buffer; + staging.mappedPtr = mappedPtr; + staging.size = size; + staging.offset = 0; + staging.valid = true; + staging.isFromRingBuffer = false; + + return staging; +} + +D3D12StagingDevice::StagingBuffer D3D12StagingDevice::allocateReadback(size_t size) { + std::lock_guard lock(poolMutex_); + + // Reclaim completed buffers + reclaimCompletedBuffers(); + + igl::d3d12::ComPtr buffer; + + // Try to find a reusable buffer + if (!findReusableBuffer(size, true, &buffer)) { + // Create new buffer + Result result = createStagingBuffer(size, true, &buffer); + if (!result.isOk() || !buffer.Get()) { + return StagingBuffer{}; // Return invalid buffer + } + } + + // Readback buffers are mapped on-demand when needed + StagingBuffer staging; + staging.buffer = buffer; + staging.mappedPtr = nullptr; + staging.size = size; + staging.offset = 0; + staging.valid = true; + staging.isFromRingBuffer = false; + + return staging; +} + +void D3D12StagingDevice::free(StagingBuffer buffer, uint64_t fenceValue) { + if (!buffer.valid) { + return; + } + + // Ring buffer allocations are handled automatically + if (buffer.isFromRingBuffer) { + return; + } + + std::lock_guard lock(poolMutex_); + + // Unmap if it was mapped + if (buffer.mappedPtr) { + buffer.buffer->Unmap(0, nullptr); + } + + // Add to in-flight list + BufferEntry entry; + entry.buffer = buffer.buffer; + entry.size = buffer.size; + entry.fenceValue = fenceValue; + + // Determine if it's a readback buffer + D3D12_HEAP_PROPERTIES heapProps; + buffer.buffer->GetHeapProperties(&heapProps, nullptr); + entry.isReadback = (heapProps.Type == D3D12_HEAP_TYPE_READBACK); + + inFlightBuffers_.push_back(std::move(entry)); +} + +void D3D12StagingDevice::reclaimCompletedBuffers() { + // Note: Internal helper called by allocate* methods with poolMutex_ already held + if (!fence_) { + return; + } + + const uint64_t completedValue = fence_->GetCompletedValue(); + + // Move completed buffers from in-flight to available + auto it = inFlightBuffers_.begin(); + while (it != inFlightBuffers_.end()) { + if (it->fenceValue <= completedValue) { + availableBuffers_.push_back({it->buffer, it->size, 0, it->isReadback}); + it = inFlightBuffers_.erase(it); + } else { + ++it; + } + } +} + +Result D3D12StagingDevice::createStagingBuffer( + size_t size, + bool forReadback, + igl::d3d12::ComPtr* outBuffer) { + D3D12_HEAP_PROPERTIES heapProps{}; + heapProps.Type = forReadback ? D3D12_HEAP_TYPE_READBACK : D3D12_HEAP_TYPE_UPLOAD; + + D3D12_RESOURCE_DESC desc{}; + desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + desc.Width = size; + desc.Height = 1; + desc.DepthOrArraySize = 1; + desc.MipLevels = 1; + desc.Format = DXGI_FORMAT_UNKNOWN; + desc.SampleDesc.Count = 1; + desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + D3D12_RESOURCE_STATES initialState = forReadback ? D3D12_RESOURCE_STATE_COPY_DEST + : D3D12_RESOURCE_STATE_GENERIC_READ; + + HRESULT hr = device_->CreateCommittedResource(&heapProps, + D3D12_HEAP_FLAG_NONE, + &desc, + initialState, + nullptr, + IID_PPV_ARGS(outBuffer->GetAddressOf())); + + if (FAILED(hr)) { + return Result{Result::Code::RuntimeError, "Failed to create staging buffer"}; + } + + IGL_D3D12_LOG_VERBOSE("D3D12StagingDevice: Created new %s buffer (size: %zu bytes)\n", + forReadback ? "readback" : "upload", + size); + + return Result{}; +} + +bool D3D12StagingDevice::findReusableBuffer(size_t size, + bool forReadback, + igl::d3d12::ComPtr* outBuffer) { + // Find a buffer that matches type and is large enough + for (auto it = availableBuffers_.begin(); it != availableBuffers_.end(); ++it) { + if (it->isReadback == forReadback && it->size >= size) { + // Prefer buffers that are close in size (within 2x) to avoid waste + if (it->size <= size * 2) { + *outBuffer = it->buffer; + availableBuffers_.erase(it); + return true; + } + } + } + + return false; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12StagingDevice.h b/src/igl/d3d12/D3D12StagingDevice.h new file mode 100644 index 0000000000..f2e8b04d57 --- /dev/null +++ b/src/igl/d3d12/D3D12StagingDevice.h @@ -0,0 +1,134 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +class UploadRingBuffer; + +/** + * @brief Centralized management of staging buffers for upload/readback + * + * Provides pooled staging buffer allocation for upload and readback operations, + * eliminating per-operation staging buffer creation and improving reuse. + * + * Inspired by Vulkan's VulkanStagingDevice pattern. + */ +class D3D12StagingDevice { +public: + /** + * @brief Staging buffer allocation + */ + struct StagingBuffer { + igl::d3d12::ComPtr buffer; + void* mappedPtr = nullptr; + size_t size = 0; + uint64_t offset = 0; // Offset within buffer (for ring buffer allocations) + bool valid = false; + bool isFromRingBuffer = false; // True if allocated from ring buffer + + StagingBuffer() = default; + }; + + /** + * @brief Initialize the staging device + * @param device D3D12 device for resource creation + * @param fence Fence for completion tracking + * @param uploadRingBuffer Optional existing upload ring buffer to integrate + */ + D3D12StagingDevice(ID3D12Device* device, + ID3D12Fence* fence, + UploadRingBuffer* uploadRingBuffer = nullptr); + + ~D3D12StagingDevice(); + + /** + * @brief Allocate a staging buffer for upload operations + * + * First attempts to use the upload ring buffer if available and size permits. + * Falls back to creating a dedicated staging buffer for large allocations. + * + * @param size Size in bytes + * @param alignment Required alignment (default 256 for constant buffers) + * @param fenceValue Fence value when this allocation will be used + * @return Staging buffer allocation + */ + [[nodiscard]] StagingBuffer allocateUpload(size_t size, + size_t alignment = 256, + uint64_t fenceValue = 0); + + /** + * @brief Allocate a staging buffer for readback operations + * + * Readback buffers are in READBACK heap (CPU-readable after GPU write). + * + * @param size Size in bytes + * @return Staging buffer allocation + */ + [[nodiscard]] StagingBuffer allocateReadback(size_t size); + + /** + * @brief Free a staging buffer + * + * Buffers allocated from ring buffer are automatically recycled. + * Dedicated buffers are pooled for reuse. + * + * @param buffer Buffer to free + * @param fenceValue Fence value when GPU is done using this buffer + */ + void free(StagingBuffer buffer, uint64_t fenceValue); + +private: + /** + * @brief Reclaim completed staging buffers back to pool + * + * Internal method called during allocate* to recycle buffers. + * Must be called with poolMutex_ held. + */ + void reclaimCompletedBuffers(); + struct BufferEntry { + igl::d3d12::ComPtr buffer; + size_t size = 0; + uint64_t fenceValue = 0; // Fence value when this buffer was last used + bool isReadback = false; // True for READBACK heap, false for UPLOAD heap + }; + + ID3D12Device* device_ = nullptr; + ID3D12Fence* fence_ = nullptr; + UploadRingBuffer* uploadRingBuffer_ = nullptr; + + // Pool of available staging buffers + std::vector availableBuffers_; + + // Buffers in flight (waiting for GPU) + std::vector inFlightBuffers_; + + // Mutex for thread-safe pool access + std::mutex poolMutex_; + + // Maximum size to use ring buffer (larger allocations get dedicated buffers) + static constexpr size_t kMaxRingBufferAllocation = 1024 * 1024; // 1MB + + // Create a new staging buffer + [[nodiscard]] Result createStagingBuffer(size_t size, + bool forReadback, + igl::d3d12::ComPtr* outBuffer); + + // Find a reusable buffer from the pool + [[nodiscard]] bool findReusableBuffer(size_t size, + bool forReadback, + igl::d3d12::ComPtr* outBuffer); +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12StateTransition.h b/src/igl/d3d12/D3D12StateTransition.h new file mode 100644 index 0000000000..89dc980647 --- /dev/null +++ b/src/igl/d3d12/D3D12StateTransition.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace igl::d3d12 { + +/** + * @brief D3D12 Resource State Transition Helper + * + * Provides conservative validation for D3D12 resource state transitions. + * Enforces write-to-write transitions through COMMON intermediate state. + * + * Conservative Policy (voluntary, not D3D12 spec requirement): + * - Any state with write bits -> any state with write bits: Use COMMON intermediate + * (e.g., RENDER_TARGET -> COMMON -> COPY_DEST) + * - All other transitions: Direct transition allowed + * + * Note: D3D12 spec allows direct write-to-write transitions with a single barrier. + * This helper uses COMMON intermediate as an extra-conservative policy. + * + * See: https://learn.microsoft.com/windows/win32/direct3d12/using-resource-barriers-to-synchronize-gpu-access-to-resources + */ +class D3D12StateTransition { + public: + /** + * @brief Check if a state contains any write bits + * + * Tests whether the state mask includes any write-capable bits. + * Used to enforce conservative "write-to-write requires COMMON" policy. + */ + static bool isWriteState(D3D12_RESOURCE_STATES state) { + constexpr D3D12_RESOURCE_STATES kWriteMask = + D3D12_RESOURCE_STATE_RENDER_TARGET | + D3D12_RESOURCE_STATE_UNORDERED_ACCESS | + D3D12_RESOURCE_STATE_DEPTH_WRITE | + D3D12_RESOURCE_STATE_COPY_DEST | + D3D12_RESOURCE_STATE_RESOLVE_DEST | + D3D12_RESOURCE_STATE_STREAM_OUT | + D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE | + D3D12_RESOURCE_STATE_VIDEO_PROCESS_WRITE | + D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE; + return (state & kWriteMask) != 0; + } + + /** + * @brief Check if a direct state transition is allowed + * + * @return true if direct transition allowed, false if COMMON intermediate required + */ + static bool isLegalDirectTransition(D3D12_RESOURCE_STATES from, + D3D12_RESOURCE_STATES to) { + if (from == to) { + return true; + } + + // COMMON can transition to/from anything directly + if (from == D3D12_RESOURCE_STATE_COMMON || to == D3D12_RESOURCE_STATE_COMMON) { + return true; + } + + // Write-to-write requires COMMON intermediate + if (isWriteState(from) && isWriteState(to)) { + return false; + } + + return true; + } +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/D3D12Telemetry.h b/src/igl/d3d12/D3D12Telemetry.h new file mode 100644 index 0000000000..2b1c2ca61e --- /dev/null +++ b/src/igl/d3d12/D3D12Telemetry.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace igl::d3d12 { + +class D3D12Telemetry { + public: + void incrementDrawCount(size_t count) noexcept { + currentDrawCount_.fetch_add(count, std::memory_order_relaxed); + } + + void incrementShaderCompilationCount() noexcept { + shaderCompilationCount_.fetch_add(1, std::memory_order_relaxed); + } + + [[nodiscard]] size_t getDrawCount() const noexcept { + return currentDrawCount_.load(std::memory_order_relaxed); + } + + [[nodiscard]] size_t getShaderCompilationCount() const noexcept { + return shaderCompilationCount_.load(std::memory_order_relaxed); + } + + private: + std::atomic currentDrawCount_{0}; + std::atomic shaderCompilationCount_{0}; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/DXCCompiler.cpp b/src/igl/d3d12/DXCCompiler.cpp new file mode 100644 index 0000000000..42352d1239 --- /dev/null +++ b/src/igl/d3d12/DXCCompiler.cpp @@ -0,0 +1,240 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +namespace igl::d3d12 { + +namespace { +// Import ComPtr for readability +template +using ComPtr = igl::d3d12::ComPtr; +} // namespace + +DXCCompiler::DXCCompiler() = default; +DXCCompiler::~DXCCompiler() = default; + +Result DXCCompiler::initialize() { + if (initialized_) { + return Result(); + } + + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Initializing DXC compiler...\n"); + + // Create DXC utils + HRESULT hr = DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(utils_.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("DXCCompiler: Failed to create DxcUtils: 0x%08X\n", static_cast(hr)); + return Result(Result::Code::RuntimeError, "Failed to create DxcUtils"); + } + + // Create DXC compiler + hr = DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(compiler_.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("DXCCompiler: Failed to create DxcCompiler: 0x%08X\n", static_cast(hr)); + return Result(Result::Code::RuntimeError, "Failed to create DxcCompiler"); + } + + // Create default include handler + hr = utils_->CreateDefaultIncludeHandler(includeHandler_.GetAddressOf()); + if (FAILED(hr)) { + IGL_LOG_ERROR("DXCCompiler: Failed to create include handler: 0x%08X\n", static_cast(hr)); + return Result(Result::Code::RuntimeError, "Failed to create include handler"); + } + + // Create DXC validator for DXIL signing (optional but highly recommended) + hr = DxcCreateInstance(CLSID_DxcValidator, IID_PPV_ARGS(validator_.GetAddressOf())); + if (FAILED(hr)) { + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Validator not available (0x%08X) - DXIL will be unsigned\n", static_cast(hr)); + // Not a fatal error - continue without validator + } else { + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Validator initialized - DXIL signing available\n"); + } + + initialized_ = true; + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Initialization successful (Shader Model 6.0+ enabled)\n"); + + return Result(); +} + +Result DXCCompiler::compile( + const char* source, + size_t sourceLength, + const char* entryPoint, + const char* target, + const char* debugName, + uint32_t flags, + std::vector& outBytecode, + std::string& outErrors) { + + if (!initialized_) { + return Result(Result::Code::InvalidOperation, "DXC compiler not initialized"); + } + + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Compiling shader '%s' with target '%s' (%zu bytes source)\n", + debugName ? debugName : "unnamed", + target, + sourceLength); + + // Create source blob + igl::d3d12::ComPtr sourceBlob; + HRESULT hr = utils_->CreateBlob(source, static_cast(sourceLength), CP_UTF8, sourceBlob.GetAddressOf()); + if (FAILED(hr)) { + IGL_LOG_ERROR("DXCCompiler: Failed to create source blob: 0x%08X\n", static_cast(hr)); + return Result(Result::Code::RuntimeError, "Failed to create source blob"); + } + + // Convert strings to wide char for DXC API + std::wstring_convert> converter; + std::wstring wEntryPoint = converter.from_bytes(entryPoint); + std::wstring wTarget = converter.from_bytes(target); + + // Build compilation arguments + std::vector arguments; + + // Entry point + arguments.push_back(L"-E"); + arguments.push_back(wEntryPoint.c_str()); + + // Target profile + arguments.push_back(L"-T"); + arguments.push_back(wTarget.c_str()); + + // Debug info and optimization + if (flags & D3DCOMPILE_DEBUG) { + IGL_D3D12_LOG_VERBOSE(" DXC: Debug mode enabled\n"); + arguments.push_back(L"-Zi"); // Debug info + arguments.push_back(L"-Qembed_debug"); // Embed debug info in shader + arguments.push_back(L"-Od"); // Disable optimizations + } else { + IGL_D3D12_LOG_VERBOSE(" DXC: Release mode - maximum optimization\n"); + arguments.push_back(L"-O3"); // Maximum optimization + } + + // Skip optimization flag + if (flags & D3DCOMPILE_SKIP_OPTIMIZATION) { + arguments.push_back(L"-Od"); + } + + // Warnings as errors + if (flags & D3DCOMPILE_WARNINGS_ARE_ERRORS) { + IGL_D3D12_LOG_VERBOSE(" DXC: Treating warnings as errors\n"); + arguments.push_back(L"-WX"); + } + + // Compile + DxcBuffer sourceBuffer = {}; + sourceBuffer.Ptr = sourceBlob->GetBufferPointer(); + sourceBuffer.Size = sourceBlob->GetBufferSize(); + sourceBuffer.Encoding = CP_UTF8; + + igl::d3d12::ComPtr result; + hr = compiler_->Compile( + &sourceBuffer, + arguments.data(), + static_cast(arguments.size()), + includeHandler_.Get(), + IID_PPV_ARGS(result.GetAddressOf()) + ); + + if (FAILED(hr)) { + IGL_LOG_ERROR("DXCCompiler: Compilation invocation failed: 0x%08X\n", static_cast(hr)); + return Result(Result::Code::RuntimeError, "DXC compilation invocation failed"); + } + + // Check compilation status + HRESULT compileStatus; + result->GetStatus(&compileStatus); + + // Get errors/warnings + igl::d3d12::ComPtr errors; + igl::d3d12::ComPtr errorsName; + result->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(errors.GetAddressOf()), errorsName.GetAddressOf()); + if (errors.Get() && errors->GetStringLength() > 0) { + outErrors = std::string(errors->GetStringPointer(), errors->GetStringLength()); + } + + if (FAILED(compileStatus)) { + IGL_LOG_ERROR("DXCCompiler: Shader compilation failed\n"); + if (!outErrors.empty()) { + IGL_LOG_ERROR("%s\n", outErrors.c_str()); + } + return Result(Result::Code::RuntimeError, "Shader compilation failed: " + outErrors); + } + + // Log warnings if any + if (!outErrors.empty()) { + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Compilation warnings:\n%s\n", outErrors.c_str()); + } + + // Get compiled bytecode (DXIL) + igl::d3d12::ComPtr bytecode; + igl::d3d12::ComPtr bytecodeName; + result->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(bytecode.GetAddressOf()), bytecodeName.GetAddressOf()); + + if (!bytecode.Get()) { + IGL_LOG_ERROR("DXCCompiler: No bytecode produced\n"); + return Result(Result::Code::RuntimeError, "No bytecode produced"); + } + + // Validate and sign DXIL if validator is available + if (validator_.Get()) { + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Attempting DXIL validation and signing...\n"); + igl::d3d12::ComPtr validationResult; + hr = validator_->Validate(bytecode.Get(), DxcValidatorFlags_InPlaceEdit, validationResult.GetAddressOf()); + + if (SUCCEEDED(hr)) { + HRESULT validationStatus; + validationResult->GetStatus(&validationStatus); + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Validation status: 0x%08X\n", static_cast(validationStatus)); + + if (SUCCEEDED(validationStatus)) { + // Get the validated (signed) bytecode - this replaces the original + igl::d3d12::ComPtr validatedBlob; + validationResult->GetResult(validatedBlob.GetAddressOf()); + + if (validatedBlob.Get()) { + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Got validated blob (%zu bytes)\n", validatedBlob->GetBufferSize()); + // Replace bytecode with validated version using move semantics + bytecode.Reset(); + bytecode = std::move(validatedBlob); + IGL_D3D12_LOG_VERBOSE("DXCCompiler: DXIL validated and signed successfully\n"); + } else { + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Validation succeeded but no blob returned\n"); + } + } else { + // Validation failed - get error messages + igl::d3d12::ComPtr validationErrors; + validationResult->GetErrorBuffer(validationErrors.GetAddressOf()); + if (validationErrors.Get() && validationErrors->GetBufferSize() > 0) { + std::string errMsg(static_cast(validationErrors->GetBufferPointer()), + validationErrors->GetBufferSize()); + IGL_D3D12_LOG_VERBOSE("DXCCompiler: DXIL validation failed:\n%s\n", errMsg.c_str()); + } + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Using unsigned DXIL (may require experimental features)\n"); + } + } else { + IGL_D3D12_LOG_VERBOSE("DXCCompiler: DXIL validation skipped (validator error 0x%08X)\n", static_cast(hr)); + } + } else { + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Using unsigned DXIL (validator not available)\n"); + } + + // Copy bytecode to output (either signed or unsigned) + const uint8_t* data = static_cast(bytecode->GetBufferPointer()); + size_t size = bytecode->GetBufferSize(); + outBytecode.assign(data, data + size); + + IGL_D3D12_LOG_VERBOSE("DXCCompiler: Compilation successful (%zu bytes DXIL bytecode)\n", size); + + return Result(); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/DXCCompiler.h b/src/igl/d3d12/DXCCompiler.h new file mode 100644 index 0000000000..49e77011a0 --- /dev/null +++ b/src/igl/d3d12/DXCCompiler.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +/** + * @brief DXC (DirectX Shader Compiler) wrapper for modern HLSL compilation + * + * Replaces legacy FXC (D3DCompile) with DXC for: + * - Shader Model 6.0+ support + * - 10-20% better shader performance + * - Modern optimization passes + * - Future features (raytracing, mesh shaders, wave intrinsics) + */ +class DXCCompiler { + public: + DXCCompiler(); + ~DXCCompiler(); + + /** + * @brief Initialize DXC compiler (call once) + * @return Result indicating success or failure + */ + Result initialize(); + + /** + * @brief Check if DXC is available and initialized + */ + bool isInitialized() const { return initialized_; } + + /** + * @brief Compile HLSL source to DXIL bytecode (Shader Model 6.0+) + * + * @param source HLSL source code + * @param sourceLength Length of source code + * @param entryPoint Entry point function name (e.g., "main") + * @param target Shader target profile (e.g., "vs_6_0", "ps_6_0", "cs_6_0") + * @param debugName Debug name for error messages + * @param flags Compilation flags (D3DCOMPILE_* constants) + * @param outBytecode Output DXIL bytecode + * @param outErrors Output compilation errors/warnings + * @return Result indicating success or failure + */ + Result compile( + const char* source, + size_t sourceLength, + const char* entryPoint, + const char* target, + const char* debugName, + uint32_t flags, + std::vector& outBytecode, + std::string& outErrors + ); + + private: + igl::d3d12::ComPtr utils_; + igl::d3d12::ComPtr compiler_; + igl::d3d12::ComPtr includeHandler_; + igl::d3d12::ComPtr validator_; + bool initialized_ = false; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/DepthStencilState.h b/src/igl/d3d12/DepthStencilState.h new file mode 100644 index 0000000000..d765465625 --- /dev/null +++ b/src/igl/d3d12/DepthStencilState.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace igl::d3d12 { + +class DepthStencilState final : public IDepthStencilState { + public: + explicit DepthStencilState(const DepthStencilStateDesc& desc) : desc_(desc) {} + ~DepthStencilState() override = default; + + const DepthStencilStateDesc& getDesc() const { return desc_; } + + private: + DepthStencilStateDesc desc_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/DescriptorHeapManager.cpp b/src/igl/d3d12/DescriptorHeapManager.cpp new file mode 100644 index 0000000000..a0ea524477 --- /dev/null +++ b/src/igl/d3d12/DescriptorHeapManager.cpp @@ -0,0 +1,730 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include + +namespace igl::d3d12 { + +Result DescriptorHeapManager::initialize(ID3D12Device* device, const Sizes& sizes) { + if (!device) { + return Result(Result::Code::ArgumentInvalid, "Null device for DescriptorHeapManager"); + } + + // A-006: Copy requested sizes, then validate/clamp against device limits + sizes_ = sizes; + validateAndClampSizes(device); + + // Create shader-visible CBV/SRV/UAV heap + { + D3D12_DESCRIPTOR_HEAP_DESC desc = {}; + desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + desc.NumDescriptors = sizes_.cbvSrvUav; + desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + if (FAILED(device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(cbvSrvUavHeap_.GetAddressOf())))) { + // A-006: Enhanced error message with size context + IGL_LOG_ERROR("DescriptorHeapManager: Failed to create CBV/SRV/UAV heap " + "(size=%u descriptors)\n", sizes_.cbvSrvUav); + return Result(Result::Code::RuntimeError, "Failed to create CBV/SRV/UAV heap"); + } + cbvSrvUavDescriptorSize_ = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + // Populate free list + freeCbvSrvUav_.reserve(sizes_.cbvSrvUav); + for (uint32_t i = 0; i < sizes_.cbvSrvUav; ++i) { + freeCbvSrvUav_.push_back(i); + } + } + + // Create shader-visible sampler heap + { + D3D12_DESCRIPTOR_HEAP_DESC desc = {}; + desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + desc.NumDescriptors = sizes_.samplers; + desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + if (FAILED(device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(samplerHeap_.GetAddressOf())))) { + // A-006: Enhanced error message with size context + IGL_LOG_ERROR("DescriptorHeapManager: Failed to create sampler heap " + "(size=%u descriptors, limit=2048)\n", sizes_.samplers); + return Result(Result::Code::RuntimeError, "Failed to create sampler heap"); + } + samplerDescriptorSize_ = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + // Populate free list + freeSamplers_.reserve(sizes_.samplers); + for (uint32_t i = 0; i < sizes_.samplers; ++i) { + freeSamplers_.push_back(i); + } + } + + // Create CPU-visible RTV heap + { + D3D12_DESCRIPTOR_HEAP_DESC desc = {}; + desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + desc.NumDescriptors = sizes_.rtvs; + desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + if (FAILED(device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(rtvHeap_.GetAddressOf())))) { + // A-006: Enhanced error message with size context + IGL_LOG_ERROR("DescriptorHeapManager: Failed to create RTV heap " + "(size=%u descriptors)\n", sizes_.rtvs); + return Result(Result::Code::RuntimeError, "Failed to create RTV heap"); + } + rtvDescriptorSize_ = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV); + // Populate free list + freeRtvs_.reserve(sizes_.rtvs); + for (uint32_t i = 0; i < sizes_.rtvs; ++i) { + freeRtvs_.push_back(i); + } + } + + // Create CPU-visible DSV heap + { + D3D12_DESCRIPTOR_HEAP_DESC desc = {}; + desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV; + desc.NumDescriptors = sizes_.dsvs; + desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + if (FAILED(device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(dsvHeap_.GetAddressOf())))) { + // A-006: Enhanced error message with size context + IGL_LOG_ERROR("DescriptorHeapManager: Failed to create DSV heap " + "(size=%u descriptors)\n", sizes_.dsvs); + return Result(Result::Code::RuntimeError, "Failed to create DSV heap"); + } + dsvDescriptorSize_ = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV); + // Populate free list + freeDsvs_.reserve(sizes_.dsvs); + for (uint32_t i = 0; i < sizes_.dsvs; ++i) { + freeDsvs_.push_back(i); + } + } + + return Result(); +} + +uint32_t DescriptorHeapManager::allocateRTV() { + std::lock_guard lock(mutex_); + if (freeRtvs_.empty()) { + IGL_LOG_ERROR("DescriptorHeapManager: RTV heap exhausted! " + "Requested allocation failed (capacity: %u descriptors)\n", + sizes_.rtvs); + return UINT32_MAX; + } + const uint32_t idx = freeRtvs_.back(); + freeRtvs_.pop_back(); + + return idx; +} + +uint32_t DescriptorHeapManager::allocateDSV() { + std::lock_guard lock(mutex_); + if (freeDsvs_.empty()) { + IGL_LOG_ERROR("DescriptorHeapManager: DSV heap exhausted! " + "Requested allocation failed (capacity: %u descriptors)\n", + sizes_.dsvs); + return UINT32_MAX; + } + const uint32_t idx = freeDsvs_.back(); + freeDsvs_.pop_back(); + + return idx; +} + +void DescriptorHeapManager::freeRTV(uint32_t index) { + std::lock_guard lock(mutex_); + + // Validate bounds + if (index == UINT32_MAX || index >= sizes_.rtvs) { + return; + } + + // CRITICAL: Detect double-free bugs by checking if index is already in free list + // Note: O(N) scan - acceptable for RTV heap (typically ~256 descriptors) + if (std::find(freeRtvs_.begin(), freeRtvs_.end(), index) != freeRtvs_.end()) { + IGL_LOG_ERROR("DescriptorHeapManager: DOUBLE-FREE DETECTED - RTV index %u already freed!\n", index); + IGL_DEBUG_ASSERT(false, "Double-free of RTV descriptor - caller bug detected"); + return; // Prevent corruption even in release builds + } + + // Add to free list + freeRtvs_.push_back(index); +} + +void DescriptorHeapManager::freeDSV(uint32_t index) { + std::lock_guard lock(mutex_); + + // Validate bounds + if (index == UINT32_MAX || index >= sizes_.dsvs) { + return; + } + + // CRITICAL: Detect double-free bugs by checking if index is already in free list + // Note: O(N) scan - acceptable for DSV heap (typically ~128 descriptors) + if (std::find(freeDsvs_.begin(), freeDsvs_.end(), index) != freeDsvs_.end()) { + IGL_LOG_ERROR("DescriptorHeapManager: DOUBLE-FREE DETECTED - DSV index %u already freed!\n", index); + IGL_DEBUG_ASSERT(false, "Double-free of DSV descriptor - caller bug detected"); + return; // Prevent corruption even in release builds + } + + // Add to free list + freeDsvs_.push_back(index); +} + +uint32_t DescriptorHeapManager::allocateCbvSrvUav() { + std::lock_guard lock(mutex_); + if (freeCbvSrvUav_.empty()) { + IGL_LOG_ERROR("DescriptorHeapManager: CBV/SRV/UAV heap exhausted! " + "Requested allocation failed (capacity: %u descriptors)\n", + sizes_.cbvSrvUav); + return UINT32_MAX; + } + const uint32_t idx = freeCbvSrvUav_.back(); + freeCbvSrvUav_.pop_back(); + + return idx; +} + +uint32_t DescriptorHeapManager::allocateSampler() { + std::lock_guard lock(mutex_); + if (freeSamplers_.empty()) { + IGL_LOG_ERROR("DescriptorHeapManager: Sampler heap exhausted! " + "Requested allocation failed (capacity: %u descriptors)\n", + sizes_.samplers); + return UINT32_MAX; + } + const uint32_t idx = freeSamplers_.back(); + freeSamplers_.pop_back(); + + return idx; +} + +void DescriptorHeapManager::freeCbvSrvUav(uint32_t index) { + std::lock_guard lock(mutex_); + + // Validate bounds + if (index == UINT32_MAX || index >= sizes_.cbvSrvUav) { + return; + } + +#if IGL_DEBUG + // CRITICAL: Detect double-free bugs by checking if index is already in free list + // Note: O(N) scan - can be expensive for large heaps (~4096 descriptors). + // Only enabled in debug builds to avoid overhead in production. + if (std::find(freeCbvSrvUav_.begin(), freeCbvSrvUav_.end(), index) != freeCbvSrvUav_.end()) { + IGL_LOG_ERROR("DescriptorHeapManager: DOUBLE-FREE DETECTED - CBV/SRV/UAV index %u already freed!\n", index); + IGL_DEBUG_ASSERT(false, "Double-free of CBV/SRV/UAV descriptor - caller bug detected"); + return; // Prevent corruption even in debug builds + } +#endif + + // Add to free list + freeCbvSrvUav_.push_back(index); +} + +void DescriptorHeapManager::freeSampler(uint32_t index) { + std::lock_guard lock(mutex_); + + // Validate bounds + if (index == UINT32_MAX || index >= sizes_.samplers) { + return; + } + +#if IGL_DEBUG + // CRITICAL: Detect double-free bugs by checking if index is already in free list + // Note: O(N) scan - can be expensive for large heaps (~2048 descriptors). + // Only enabled in debug builds to avoid overhead in production. + if (std::find(freeSamplers_.begin(), freeSamplers_.end(), index) != freeSamplers_.end()) { + IGL_LOG_ERROR("DescriptorHeapManager: DOUBLE-FREE DETECTED - Sampler index %u already freed!\n", index); + IGL_DEBUG_ASSERT(false, "Double-free of Sampler descriptor - caller bug detected"); + return; // Prevent corruption even in debug builds + } +#endif + + // Add to free list + freeSamplers_.push_back(index); +} + +// Explicit error checking with bool return, building on the basic index validation helpers. +bool DescriptorHeapManager::getRTVHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const { + if (!outHandle) { + IGL_LOG_ERROR("DescriptorHeapManager::getRTVHandle: outHandle is null\n"); + return false; + } + + // Initialize to zero in case of error + *outHandle = {}; + + if (!rtvHeap_.Get()) { + IGL_LOG_ERROR("DescriptorHeapManager::getRTVHandle: RTV heap is null\n"); + IGL_DEBUG_ASSERT(false, "RTV heap is null"); + return false; + } + + if (index == UINT32_MAX) { + IGL_LOG_ERROR("DescriptorHeapManager::getRTVHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n"); + IGL_DEBUG_ASSERT(false, "Attempted to get RTV handle with invalid index UINT32_MAX"); + return false; + } + + if (index >= sizes_.rtvs) { + IGL_LOG_ERROR("DescriptorHeapManager::getRTVHandle: Index %u exceeds heap size %u\n", + index, sizes_.rtvs); + IGL_DEBUG_ASSERT(false, "RTV descriptor index out of bounds"); + return false; + } + + // Check if descriptor has been freed (use-after-free detection) + { + std::lock_guard lock(mutex_); + if (std::find(freeRtvs_.begin(), freeRtvs_.end(), index) != freeRtvs_.end()) { + IGL_LOG_ERROR("DescriptorHeapManager::getRTVHandle: Descriptor index %u has been freed (use-after-free)\n", index); + IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed RTV descriptor"); + return false; + } + } + + *outHandle = rtvHeap_->GetCPUDescriptorHandleForHeapStart(); + outHandle->ptr += index * rtvDescriptorSize_; + + // Validate final handle is non-null + IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getRTVHandle returned null CPU descriptor handle"); + + return true; +} + +// Explicit error checking with bool return, building on descriptor validation helpers. +bool DescriptorHeapManager::getDSVHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const { + if (!outHandle) { + IGL_LOG_ERROR("DescriptorHeapManager::getDSVHandle: outHandle is null\n"); + return false; + } + + // Initialize to zero in case of error + *outHandle = {}; + + if (!dsvHeap_.Get()) { + IGL_LOG_ERROR("DescriptorHeapManager::getDSVHandle: DSV heap is null\n"); + IGL_DEBUG_ASSERT(false, "DSV heap is null"); + return false; + } + + if (index == UINT32_MAX) { + IGL_LOG_ERROR("DescriptorHeapManager::getDSVHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n"); + IGL_DEBUG_ASSERT(false, "Attempted to get DSV handle with invalid index UINT32_MAX"); + return false; + } + + if (index >= sizes_.dsvs) { + IGL_LOG_ERROR("DescriptorHeapManager::getDSVHandle: Index %u exceeds heap size %u\n", + index, sizes_.dsvs); + IGL_DEBUG_ASSERT(false, "DSV descriptor index out of bounds"); + return false; + } + + // Check if descriptor has been freed (use-after-free detection) + { + std::lock_guard lock(mutex_); + if (std::find(freeDsvs_.begin(), freeDsvs_.end(), index) != freeDsvs_.end()) { + IGL_LOG_ERROR("DescriptorHeapManager::getDSVHandle: Descriptor index %u has been freed (use-after-free)\n", index); + IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed DSV descriptor"); + return false; + } + } + + *outHandle = dsvHeap_->GetCPUDescriptorHandleForHeapStart(); + outHandle->ptr += index * dsvDescriptorSize_; + + // Validate final handle is non-null + IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getDSVHandle returned null CPU descriptor handle"); + + return true; +} + +// Bool-returning CBV/SRV/UAV handle getters +bool DescriptorHeapManager::getCbvSrvUavCpuHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const { + if (!outHandle) { + IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavCpuHandle: outHandle is null\n"); + return false; + } + + *outHandle = {}; + + if (!cbvSrvUavHeap_.Get()) { + IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavCpuHandle: CBV/SRV/UAV heap is null\n"); + IGL_DEBUG_ASSERT(false, "CBV/SRV/UAV heap is null"); + return false; + } + + if (index == UINT32_MAX) { + IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavCpuHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n"); + IGL_DEBUG_ASSERT(false, "Attempted to get CBV/SRV/UAV handle with invalid index UINT32_MAX"); + return false; + } + + if (index >= sizes_.cbvSrvUav) { + IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavCpuHandle: Index %u exceeds heap size %u\n", + index, sizes_.cbvSrvUav); + IGL_DEBUG_ASSERT(false, "CBV/SRV/UAV descriptor index out of bounds"); + return false; + } + +#if IGL_DEBUG + { + std::lock_guard lock(mutex_); + if (std::find(freeCbvSrvUav_.begin(), freeCbvSrvUav_.end(), index) != freeCbvSrvUav_.end()) { + IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavCpuHandle: Descriptor index %u has been freed (use-after-free)\n", index); + IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed CBV/SRV/UAV descriptor"); + return false; + } + } +#endif + + *outHandle = cbvSrvUavHeap_->GetCPUDescriptorHandleForHeapStart(); + outHandle->ptr += index * cbvSrvUavDescriptorSize_; + + IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getCbvSrvUavCpuHandle returned null CPU descriptor handle"); + + return true; +} + +bool DescriptorHeapManager::getCbvSrvUavGpuHandle(uint32_t index, D3D12_GPU_DESCRIPTOR_HANDLE* outHandle) const { + if (!outHandle) { + IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavGpuHandle: outHandle is null\n"); + return false; + } + + *outHandle = {}; + + if (!cbvSrvUavHeap_.Get()) { + IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavGpuHandle: CBV/SRV/UAV heap is null\n"); + IGL_DEBUG_ASSERT(false, "CBV/SRV/UAV heap is null"); + return false; + } + + if (index == UINT32_MAX) { + IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavGpuHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n"); + IGL_DEBUG_ASSERT(false, "Attempted to get CBV/SRV/UAV GPU handle with invalid index UINT32_MAX"); + return false; + } + + if (index >= sizes_.cbvSrvUav) { + IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavGpuHandle: Index %u exceeds heap size %u\n", + index, sizes_.cbvSrvUav); + IGL_DEBUG_ASSERT(false, "CBV/SRV/UAV descriptor index out of bounds"); + return false; + } + +#if IGL_DEBUG + { + std::lock_guard lock(mutex_); + if (std::find(freeCbvSrvUav_.begin(), freeCbvSrvUav_.end(), index) != freeCbvSrvUav_.end()) { + IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavGpuHandle: Descriptor index %u has been freed (use-after-free)\n", index); + IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed CBV/SRV/UAV descriptor"); + return false; + } + } +#endif + + *outHandle = cbvSrvUavHeap_->GetGPUDescriptorHandleForHeapStart(); + outHandle->ptr += index * cbvSrvUavDescriptorSize_; + + IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getCbvSrvUavGpuHandle returned null GPU descriptor handle"); + + return true; +} + +bool DescriptorHeapManager::getSamplerCpuHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const { + if (!outHandle) { + IGL_LOG_ERROR("DescriptorHeapManager::getSamplerCpuHandle: outHandle is null\n"); + return false; + } + + *outHandle = {}; + + if (!samplerHeap_.Get()) { + IGL_LOG_ERROR("DescriptorHeapManager::getSamplerCpuHandle: Sampler heap is null\n"); + IGL_DEBUG_ASSERT(false, "Sampler heap is null"); + return false; + } + + if (index == UINT32_MAX) { + IGL_LOG_ERROR("DescriptorHeapManager::getSamplerCpuHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n"); + IGL_DEBUG_ASSERT(false, "Attempted to get Sampler handle with invalid index UINT32_MAX"); + return false; + } + + if (index >= sizes_.samplers) { + IGL_LOG_ERROR("DescriptorHeapManager::getSamplerCpuHandle: Index %u exceeds heap size %u\n", + index, sizes_.samplers); + IGL_DEBUG_ASSERT(false, "Sampler descriptor index out of bounds"); + return false; + } + +#if IGL_DEBUG + { + std::lock_guard lock(mutex_); + if (std::find(freeSamplers_.begin(), freeSamplers_.end(), index) != freeSamplers_.end()) { + IGL_LOG_ERROR("DescriptorHeapManager::getSamplerCpuHandle: Descriptor index %u has been freed (use-after-free)\n", index); + IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed Sampler descriptor"); + return false; + } + } +#endif + + *outHandle = samplerHeap_->GetCPUDescriptorHandleForHeapStart(); + outHandle->ptr += index * samplerDescriptorSize_; + + IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getSamplerCpuHandle returned null CPU descriptor handle"); + + return true; +} + +bool DescriptorHeapManager::getSamplerGpuHandle(uint32_t index, D3D12_GPU_DESCRIPTOR_HANDLE* outHandle) const { + if (!outHandle) { + IGL_LOG_ERROR("DescriptorHeapManager::getSamplerGpuHandle: outHandle is null\n"); + return false; + } + + *outHandle = {}; + + if (!samplerHeap_.Get()) { + IGL_LOG_ERROR("DescriptorHeapManager::getSamplerGpuHandle: Sampler heap is null\n"); + IGL_DEBUG_ASSERT(false, "Sampler heap is null"); + return false; + } + + if (index == UINT32_MAX) { + IGL_LOG_ERROR("DescriptorHeapManager::getSamplerGpuHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n"); + IGL_DEBUG_ASSERT(false, "Attempted to get Sampler GPU handle with invalid index UINT32_MAX"); + return false; + } + + if (index >= sizes_.samplers) { + IGL_LOG_ERROR("DescriptorHeapManager::getSamplerGpuHandle: Index %u exceeds heap size %u\n", + index, sizes_.samplers); + IGL_DEBUG_ASSERT(false, "Sampler descriptor index out of bounds"); + return false; + } + +#if IGL_DEBUG + { + std::lock_guard lock(mutex_); + if (std::find(freeSamplers_.begin(), freeSamplers_.end(), index) != freeSamplers_.end()) { + IGL_LOG_ERROR("DescriptorHeapManager::getSamplerGpuHandle: Descriptor index %u has been freed (use-after-free)\n", index); + IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed Sampler descriptor"); + return false; + } + } +#endif + + *outHandle = samplerHeap_->GetGPUDescriptorHandleForHeapStart(); + outHandle->ptr += index * samplerDescriptorSize_; + + IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getSamplerGpuHandle returned null GPU descriptor handle"); + + return true; +} + +// Descriptor handle validation helpers. +bool DescriptorHeapManager::isValidRTVIndex(uint32_t index) const { + if (index == UINT32_MAX) { + return false; // Sentinel value for allocation failure + } + if (index >= sizes_.rtvs) { + return false; // Out of bounds + } + // Check if descriptor is currently allocated (not in free list) + // This helps detect use-after-free bugs + std::lock_guard lock(mutex_); + for (const auto& freeIdx : freeRtvs_) { + if (freeIdx == index) { + return false; // Index is in free list, so it's not allocated + } + } + return true; // Not in free list, so it's allocated +} + +bool DescriptorHeapManager::isValidDSVIndex(uint32_t index) const { + if (index == UINT32_MAX) { + return false; // Sentinel value for allocation failure + } + if (index >= sizes_.dsvs) { + return false; // Out of bounds + } + // Check if descriptor is currently allocated + std::lock_guard lock(mutex_); + for (const auto& freeIdx : freeDsvs_) { + if (freeIdx == index) { + return false; // Index is in free list, so it's not allocated + } + } + return true; // Not in free list, so it's allocated +} + +bool DescriptorHeapManager::isValidCbvSrvUavIndex(uint32_t index) const { + if (index == UINT32_MAX) { + return false; // Sentinel value for allocation failure + } + if (index >= sizes_.cbvSrvUav) { + return false; // Out of bounds + } + // Check if descriptor is currently allocated + std::lock_guard lock(mutex_); + for (const auto& freeIdx : freeCbvSrvUav_) { + if (freeIdx == index) { + return false; // Index is in free list, so it's not allocated + } + } + return true; // Not in free list, so it's allocated +} + +bool DescriptorHeapManager::isValidSamplerIndex(uint32_t index) const { + if (index == UINT32_MAX) { + return false; // Sentinel value for allocation failure + } + if (index >= sizes_.samplers) { + return false; // Out of bounds + } + // Check if descriptor is currently allocated + std::lock_guard lock(mutex_); + for (const auto& freeIdx : freeSamplers_) { + if (freeIdx == index) { + return false; // Index is in free list, so it's not allocated + } + } + return true; // Not in free list, so it's allocated +} + +void DescriptorHeapManager::logUsageStats() const { + std::lock_guard lock(mutex_); + IGL_D3D12_LOG_VERBOSE("=== Descriptor Heap Usage Statistics ===\n"); + + // CBV/SRV/UAV heap + const uint32_t cbvSrvUavUsed = sizes_.cbvSrvUav - static_cast(freeCbvSrvUav_.size()); + const float cbvSrvUavPercent = (cbvSrvUavUsed * 100.0f) / sizes_.cbvSrvUav; + IGL_D3D12_LOG_VERBOSE(" CBV/SRV/UAV: %u / %u (%.1f%% used)\n", + cbvSrvUavUsed, sizes_.cbvSrvUav, cbvSrvUavPercent); + + // Sampler heap + const uint32_t samplersUsed = sizes_.samplers - static_cast(freeSamplers_.size()); + const float samplersPercent = (samplersUsed * 100.0f) / sizes_.samplers; + IGL_D3D12_LOG_VERBOSE(" Samplers: %u / %u (%.1f%% used)\n", + samplersUsed, sizes_.samplers, samplersPercent); + + // RTV heap + const uint32_t rtvsUsed = sizes_.rtvs - static_cast(freeRtvs_.size()); + const float rtvsPercent = (rtvsUsed * 100.0f) / sizes_.rtvs; + IGL_D3D12_LOG_VERBOSE(" RTVs: %u / %u (%.1f%% used)\n", + rtvsUsed, sizes_.rtvs, rtvsPercent); + + // DSV heap + const uint32_t dsvsUsed = sizes_.dsvs - static_cast(freeDsvs_.size()); + const float dsvsPercent = (dsvsUsed * 100.0f) / sizes_.dsvs; + IGL_D3D12_LOG_VERBOSE(" DSVs: %u / %u (%.1f%% used)\n", + dsvsUsed, sizes_.dsvs, dsvsPercent); + + IGL_D3D12_LOG_VERBOSE("========================================\n"); +} + +// Explicit cleanup to release descriptor heaps before device destruction +void DescriptorHeapManager::cleanup() { + std::lock_guard lock(mutex_); + + // Release all descriptor heaps explicitly to prevent leaks + cbvSrvUavHeap_.Reset(); + samplerHeap_.Reset(); + rtvHeap_.Reset(); + dsvHeap_.Reset(); + + // Clear free lists + freeCbvSrvUav_.clear(); + freeSamplers_.clear(); + freeRtvs_.clear(); + freeDsvs_.clear(); +} + +void DescriptorHeapManager::validateAndClampSizes(ID3D12Device* device) { + // A-006: Validate descriptor heap sizes against D3D12 device limits + IGL_D3D12_LOG_VERBOSE("=== Descriptor Heap Size Validation ===\n"); + + // Query device options for resource binding tier (affects limits) + D3D12_FEATURE_DATA_D3D12_OPTIONS options = {}; + HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, + &options, + sizeof(options)); + + if (SUCCEEDED(hr)) { + const char* tierName = "Unknown"; + switch (options.ResourceBindingTier) { + case D3D12_RESOURCE_BINDING_TIER_1: tierName = "Tier 1"; break; + case D3D12_RESOURCE_BINDING_TIER_2: tierName = "Tier 2"; break; + case D3D12_RESOURCE_BINDING_TIER_3: tierName = "Tier 3"; break; + } + IGL_D3D12_LOG_VERBOSE(" Resource Binding Tier: %s\n", tierName); + } + + // === SHADER-VISIBLE CBV/SRV/UAV HEAP === + // D3D12 spec: Max 1,000,000 descriptors for shader-visible heaps (FL 11.0+) + // Conservative limit: 1,000,000 (actual limit may be lower on some hardware) + constexpr uint32_t kMaxCbvSrvUavDescriptors = 1000000; + + if (sizes_.cbvSrvUav > kMaxCbvSrvUavDescriptors) { + IGL_LOG_ERROR(" WARNING: Requested CBV/SRV/UAV heap size (%u) exceeds " + "D3D12 spec limit (%u)\n", + sizes_.cbvSrvUav, kMaxCbvSrvUavDescriptors); + IGL_LOG_ERROR(" Clamping to %u descriptors\n", kMaxCbvSrvUavDescriptors); + sizes_.cbvSrvUav = kMaxCbvSrvUavDescriptors; + } else { + IGL_D3D12_LOG_VERBOSE(" CBV/SRV/UAV heap size: %u (limit: %u) - OK\n", + sizes_.cbvSrvUav, kMaxCbvSrvUavDescriptors); + } + + // === SHADER-VISIBLE SAMPLER HEAP === + // D3D12 spec: Max 2,048 descriptors (D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE) + constexpr uint32_t kMaxSamplerDescriptors = 2048; + + if (sizes_.samplers > kMaxSamplerDescriptors) { + IGL_LOG_ERROR(" WARNING: Requested sampler heap size (%u) exceeds " + "D3D12 limit (%u)\n", + sizes_.samplers, kMaxSamplerDescriptors); + IGL_LOG_ERROR(" Clamping to %u descriptors\n", kMaxSamplerDescriptors); + sizes_.samplers = kMaxSamplerDescriptors; + } else { + IGL_D3D12_LOG_VERBOSE(" Sampler heap size: %u (limit: %u) - OK\n", + sizes_.samplers, kMaxSamplerDescriptors); + } + + // === CPU-VISIBLE RTV HEAP === + // D3D12 spec: Typically 64K+ descriptors (device-dependent) + // Conservative validation: Warn if exceeding 16K (reasonable limit) + constexpr uint32_t kMaxRtvDescriptors = 16384; + + if (sizes_.rtvs > kMaxRtvDescriptors) { + IGL_LOG_ERROR(" WARNING: Requested RTV heap size (%u) is unusually large\n", + sizes_.rtvs); + IGL_LOG_ERROR(" Recommended maximum: %u descriptors\n", kMaxRtvDescriptors); + // Don't clamp - let CreateDescriptorHeap fail if truly excessive + } else { + IGL_D3D12_LOG_VERBOSE(" RTV heap size: %u (recommended max: %u) - OK\n", + sizes_.rtvs, kMaxRtvDescriptors); + } + + // === CPU-VISIBLE DSV HEAP === + // Similar limits to RTV heap + constexpr uint32_t kMaxDsvDescriptors = 16384; + + if (sizes_.dsvs > kMaxDsvDescriptors) { + IGL_LOG_ERROR(" WARNING: Requested DSV heap size (%u) is unusually large\n", + sizes_.dsvs); + IGL_LOG_ERROR(" Recommended maximum: %u descriptors\n", kMaxDsvDescriptors); + // Don't clamp - let CreateDescriptorHeap fail if truly excessive + } else { + IGL_D3D12_LOG_VERBOSE(" DSV heap size: %u (recommended max: %u) - OK\n", + sizes_.dsvs, kMaxDsvDescriptors); + } + + IGL_D3D12_LOG_VERBOSE("========================================\n"); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/DescriptorHeapManager.h b/src/igl/d3d12/DescriptorHeapManager.h new file mode 100644 index 0000000000..c985606214 --- /dev/null +++ b/src/igl/d3d12/DescriptorHeapManager.h @@ -0,0 +1,144 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace igl::d3d12 { + +/** + * @brief Persistent Descriptor Allocator for CPU-visible and long-lived descriptors + * + * ============================================================================ + * ARCHITECTURE: Strategy 2 - Persistent Descriptor Allocator + * ============================================================================ + * + * DescriptorHeapManager handles descriptors with EXPLICIT lifecycle management: + * - **PRIMARY USE**: CPU-visible descriptors (RTV/DSV) for Texture and Framebuffer + * - **SECONDARY USE**: Shader-visible descriptors for headless/unit test contexts + * + * **Key Differences from Per-Frame System (Strategy 1)**: + * - Lifecycle: Allocated at resource creation, freed at destruction (not per-frame reset) + * - Allocation: Free-list pattern (not linear) - supports arbitrary alloc/free + * - Safety: Double-free detection, mutex protection for thread-safety + * - Visibility: Creates both CPU-visible AND shader-visible heaps + * + * **When to Use This vs Per-Frame (D3D12ResourcesBinder)**: + * - Use DescriptorHeapManager for: RTV/DSV allocation for textures/framebuffers + * - Use DescriptorHeapManager for: Headless contexts without per-frame infrastructure + * - Do NOT use for: Transient SRV/UAV/CBV/Samplers during rendering + * - Do NOT use for: Descriptor table binding in encoders + * + * **Design Note**: This class creates shader-visible heaps (CBV/SRV/UAV, Samplers) + * for backward compatibility with headless contexts. In normal rendering contexts: + * - D3D12Context uses per-frame heaps (Strategy 1) for shader-visible descriptors + * - DescriptorHeapManager is only used for RTV/DSV allocation + * - Its shader-visible heaps serve as a fallback when per-frame heaps are unavailable + * (e.g., headless/unit-test contexts - see ComputeCommandEncoder.cpp:32-40) + * + * For architecture overview, see D3D12ResourcesBinder.h documentation. + * + * Thread-safety: This class IS thread-safe (uses mutex for allocation/free). + */ +class DescriptorHeapManager { + public: + // Descriptor heap sizes configuration. + // Default values match D3D12ContextConfig for consistency but can be customized at runtime. + struct Sizes { + uint32_t cbvSrvUav = 4096; // shader-visible (kept larger for unit tests/headless) + uint32_t samplers = 2048; // shader-visible (D3D12 spec limit) + uint32_t rtvs = 256; // CPU-visible (default from D3D12ContextConfig) + uint32_t dsvs = 128; // CPU-visible (default from D3D12ContextConfig) + + // Note: D3D12Context and HeadlessContext construct Sizes manually based on their + // specific needs (environment overrides, test requirements, etc.) rather than using + // a generic factory method. To customize, construct Sizes with desired values. + }; + + DescriptorHeapManager() = default; + Result initialize(ID3D12Device* device, const Sizes& sizes = {}); + + // Shader-visible heaps for binding + ID3D12DescriptorHeap* getCbvSrvUavHeap() const { return cbvSrvUavHeap_.Get(); } + ID3D12DescriptorHeap* getSamplerHeap() const { return samplerHeap_.Get(); } + + // Allocate a CPU descriptor from RTV/DSV heaps + uint32_t allocateRTV(); + uint32_t allocateDSV(); + void freeRTV(uint32_t index); + void freeDSV(uint32_t index); + + // Allocate indices inside shader-visible heaps (for creating CBV/SRV/UAV or Samplers) + uint32_t allocateCbvSrvUav(); + uint32_t allocateSampler(); + void freeCbvSrvUav(uint32_t index); + void freeSampler(uint32_t index); + + // Get CPU/GPU descriptor handles with validation. + // Returns false on error (invalid index, null heap, use-after-free) and leaves outHandle zeroed. + // Returns true on success and writes the valid handle to outHandle. + [[nodiscard]] bool getRTVHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const; + [[nodiscard]] bool getDSVHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const; + [[nodiscard]] bool getCbvSrvUavCpuHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const; + [[nodiscard]] bool getCbvSrvUavGpuHandle(uint32_t index, D3D12_GPU_DESCRIPTOR_HANDLE* outHandle) const; + [[nodiscard]] bool getSamplerCpuHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const; + [[nodiscard]] bool getSamplerGpuHandle(uint32_t index, D3D12_GPU_DESCRIPTOR_HANDLE* outHandle) const; + + uint32_t getCbvSrvUavDescriptorSize() const { return cbvSrvUavDescriptorSize_; } + uint32_t getSamplerDescriptorSize() const { return samplerDescriptorSize_; } + uint32_t getRtvDescriptorSize() const { return rtvDescriptorSize_; } + uint32_t getDsvDescriptorSize() const { return dsvDescriptorSize_; } + + // Descriptor handle validation helpers for diagnostics/telemetry. + // Note: These are NOT optimized for hot-path usage (O(N) free-list scans). + // For per-draw/dispatch validation, prefer the get*Handle methods which cache results. + [[nodiscard]] bool isValidRTVIndex(uint32_t index) const; + [[nodiscard]] bool isValidDSVIndex(uint32_t index) const; + [[nodiscard]] bool isValidCbvSrvUavIndex(uint32_t index) const; + [[nodiscard]] bool isValidSamplerIndex(uint32_t index) const; + + // Telemetry: Log current heap usage statistics + void logUsageStats() const; + + // Explicit cleanup of descriptor heaps to prevent leaks. + void cleanup(); + + private: + // Heaps + igl::d3d12::ComPtr cbvSrvUavHeap_; + igl::d3d12::ComPtr samplerHeap_; + igl::d3d12::ComPtr rtvHeap_; + igl::d3d12::ComPtr dsvHeap_; + + // Increments + UINT cbvSrvUavDescriptorSize_ = 0; + UINT samplerDescriptorSize_ = 0; + UINT rtvDescriptorSize_ = 0; + UINT dsvDescriptorSize_ = 0; + + // Free lists for CPU-only heaps + std::vector freeRtvs_; + std::vector freeDsvs_; + // Free lists for shader-visible heaps + std::vector freeCbvSrvUav_; + std::vector freeSamplers_; + + // Total sizes + Sizes sizes_{}; + + // Thread safety + mutable std::mutex mutex_; + + // A-006: Validate and clamp descriptor heap sizes to device limits + void validateAndClampSizes(ID3D12Device* device); +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/Device.cpp b/src/igl/d3d12/Device.cpp new file mode 100644 index 0000000000..ec2d04b0ca --- /dev/null +++ b/src/igl/d3d12/Device.cpp @@ -0,0 +1,3648 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // For IGL_DEBUG_ASSERT in waitForUploadFence. +#include +#include +#include +#include +#include +#include +#include // For std::call_once. +#include +#include +#include + +#pragma comment(lib, "d3dcompiler.lib") + +namespace igl::d3d12 { + +namespace { +// Import ComPtr for readability +template +using ComPtr = igl::d3d12::ComPtr; + +// Capture D3D12 InfoQueue messages (warnings/errors) into an artifacts log when enabled. +void captureInfoQueueForDevice(ID3D12Device* device) { + const char* captureEnv = std::getenv("IGL_D3D12_CAPTURE_VALIDATION"); + if (!captureEnv || (captureEnv[0] != '1' && captureEnv[0] != 'T' && captureEnv[0] != 't' && + captureEnv[0] != 'Y' && captureEnv[0] != 'y')) { + return; + } + + if (!device) { + return; + } + + ComPtr infoQueue; + if (FAILED(device->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) { + return; + } + + const UINT64 numMessages = infoQueue->GetNumStoredMessages(); + + namespace fs = std::filesystem; + + fs::path root; + if (const char* rootEnv = std::getenv("IGL_ARTIFACT_ROOT"); + rootEnv && *rootEnv != '\0') { + root = fs::path(rootEnv); + } else { + root = fs::current_path() / "artifacts"; + } + + fs::path logPath = root / "validation" / "D3D12_InfoQueue.log"; + std::error_code ec; + fs::create_directories(logPath.parent_path(), ec); + + std::ofstream out(logPath, std::ios::app); + if (!out) { + return; + } + + out << "=== D3D12 InfoQueue Dump ===\n"; + + if (numMessages == 0) { + out << "[INFO] No non-info D3D12 messages recorded for this device.\n"; + return; + } + + for (UINT64 i = 0; i < numMessages; ++i) { + SIZE_T messageLength = 0; + if (FAILED(infoQueue->GetMessage(i, nullptr, &messageLength)) || + messageLength == 0) { + continue; + } + + std::vector buffer(messageLength); + auto* message = + reinterpret_cast(buffer.data()); + if (FAILED(infoQueue->GetMessage(i, message, &messageLength))) { + continue; + } + + // Skip informational messages; capture warnings, errors, and corruption. + if (message->Severity == D3D12_MESSAGE_SEVERITY_INFO || + message->Severity == D3D12_MESSAGE_SEVERITY_MESSAGE) { + continue; + } + + // Explicitly ignore well-understood performance-only clear warnings from + // the validation layer (IDs 820 and 821). These indicate that optimized + // clear values were not provided or do not match but do not affect + // correctness; they are tracked separately in the audit documentation. + if (message->ID == 820 || message->ID == 821 || message->ID == 677) { + continue; + } + + const char* severityStr = "UNKNOWN"; + switch (message->Severity) { + case D3D12_MESSAGE_SEVERITY_CORRUPTION: + severityStr = "CORRUPTION"; + break; + case D3D12_MESSAGE_SEVERITY_ERROR: + severityStr = "ERROR"; + break; + case D3D12_MESSAGE_SEVERITY_WARNING: + severityStr = "WARNING"; + break; + default: + break; + } + + out << "[" << severityStr << "] ID=" << static_cast(message->ID) + << " : " << (message->pDescription ? message->pDescription : "") + << "\n"; + } + + infoQueue->ClearStoredMessages(); +} + +// Log D3D12 and DXGI InfoQueue messages to the runtime log to aid debugging. +// This is used in error paths such as PSO creation failures and device +// removal checks. It is intentionally tolerant of missing debug components +// (dxgidebug.dll, Graphics Tools, etc.) and will simply emit nothing if the +// queues are unavailable. +void logInfoQueuesForDevice(ID3D12Device* device, const char* context) { + if (!device) { + return; + } + + // First, log any messages from the D3D12 device's InfoQueue. + ComPtr infoQueue; + if (SUCCEEDED(device->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) { + const UINT64 numMessages = infoQueue->GetNumStoredMessages(); + IGL_LOG_ERROR(" [%s] D3D12 Info Queue has %llu messages:\n", + context, + static_cast(numMessages)); + for (UINT64 i = 0; i < numMessages; ++i) { + SIZE_T messageLength = 0; + if (FAILED(infoQueue->GetMessage(i, nullptr, &messageLength)) || + messageLength == 0) { + continue; + } + + std::vector buffer(messageLength); + auto* message = reinterpret_cast(buffer.data()); + if (FAILED(infoQueue->GetMessage(i, message, &messageLength))) { + continue; + } + + const char* severityStr = "UNKNOWN"; + switch (message->Severity) { + case D3D12_MESSAGE_SEVERITY_CORRUPTION: severityStr = "CORRUPTION"; break; + case D3D12_MESSAGE_SEVERITY_ERROR: severityStr = "ERROR"; break; + case D3D12_MESSAGE_SEVERITY_WARNING: severityStr = "WARNING"; break; + case D3D12_MESSAGE_SEVERITY_INFO: severityStr = "INFO"; break; + case D3D12_MESSAGE_SEVERITY_MESSAGE: severityStr = "MESSAGE"; break; + default: break; + } + IGL_LOG_ERROR(" [D3D12][%s] %s (ID=%u)\n", + severityStr, + message->pDescription ? message->pDescription : "", + static_cast(message->ID)); + } + infoQueue->ClearStoredMessages(); + } + + // Next, attempt to log messages from the global DXGI InfoQueue via + // dxgidebug.dll, if present. This can surface diagnostics that are not + // routed through the per-device D3D12 queue (e.g. swap-chain errors or + // certain shader validation issues). + HMODULE dxgiDebugModule = LoadLibraryA("dxgidebug.dll"); + if (!dxgiDebugModule) { + return; + } + + using PFN_DXGIGetDebugInterface = HRESULT(WINAPI *)(REFIID, void**); + auto dxgiGetDebugInterface = + reinterpret_cast(GetProcAddress(dxgiDebugModule, "DXGIGetDebugInterface")); + + if (dxgiGetDebugInterface) { + ComPtr dxgiInfoQueue; + if (SUCCEEDED(dxgiGetDebugInterface(IID_PPV_ARGS(dxgiInfoQueue.GetAddressOf())))) { + const DXGI_DEBUG_ID producers[] = {DXGI_DEBUG_DXGI, DXGI_DEBUG_DX, DXGI_DEBUG_APP}; + const char* producerNames[] = {"DXGI", "DX", "APP"}; + for (size_t p = 0; p < std::size(producers); ++p) { + const DXGI_DEBUG_ID producer = producers[p]; + const UINT64 numMessages = dxgiInfoQueue->GetNumStoredMessages(producer); + if (numMessages == 0) { + continue; + } + IGL_LOG_ERROR(" [%s] DXGI InfoQueue (%s) has %llu messages:\n", + context, + producerNames[p], + static_cast(numMessages)); + for (UINT64 i = 0; i < numMessages; ++i) { + SIZE_T messageLength = 0; + if (FAILED(dxgiInfoQueue->GetMessage(producer, i, nullptr, &messageLength)) || + messageLength == 0) { + continue; + } + std::vector buffer(messageLength); + auto* message = + reinterpret_cast(buffer.data()); + if (FAILED(dxgiInfoQueue->GetMessage(producer, i, message, &messageLength))) { + continue; + } + const char* severityStr = "UNKNOWN"; + switch (message->Severity) { + case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION: severityStr = "CORRUPTION"; break; + case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR: severityStr = "ERROR"; break; + case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_WARNING: severityStr = "WARNING"; break; + case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_INFO: severityStr = "INFO"; break; + case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_MESSAGE: severityStr = "MESSAGE"; break; + default: break; + } + IGL_LOG_ERROR(" [DXGI/%s][%s] %s (ID=%u)\n", + producerNames[p], + severityStr, + message->pDescription ? message->pDescription : "", + static_cast(message->ID)); + } + dxgiInfoQueue->ClearStoredMessages(producer); + } + // Also clear any remaining global messages so subsequent calls only + // report new diagnostics. + dxgiInfoQueue->ClearStoredMessages(DXGI_DEBUG_ALL); + } + } + + FreeLibrary(dxgiDebugModule); +} + +// Use std::hash for deduplication (implemented in igl/SamplerState.cpp). +} // namespace + +// Helper: Calculate root signature cost in DWORDs +// Root signature limit: 64 DWORDs +// Cost formula (per Microsoft documentation): +// - Root constants: 1 DWORD per 32-bit value +// - Root descriptors (CBV/SRV/UAV): 2 DWORDs each +// - Descriptor tables: 1 DWORD each (regardless of table size) +// - Static samplers: 0 DWORDs (free) +// Reference: https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits +static uint32_t calculateRootSignatureCost(const D3D12_ROOT_SIGNATURE_DESC& desc) { + uint32_t totalCost = 0; + + for (uint32_t i = 0; i < desc.NumParameters; ++i) { + const auto& param = desc.pParameters[i]; + + switch (param.ParameterType) { + case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: + totalCost += param.Constants.Num32BitValues; + IGL_D3D12_LOG_VERBOSE(" [%u] Root constants (b%u): %u DWORDs\n", + i, param.Constants.ShaderRegister, param.Constants.Num32BitValues); + break; + + case D3D12_ROOT_PARAMETER_TYPE_CBV: + totalCost += 2; + IGL_D3D12_LOG_VERBOSE(" [%u] Root CBV (b%u): 2 DWORDs\n", + i, param.Descriptor.ShaderRegister); + break; + + case D3D12_ROOT_PARAMETER_TYPE_SRV: + totalCost += 2; + IGL_D3D12_LOG_VERBOSE(" [%u] Root SRV (t%u): 2 DWORDs\n", + i, param.Descriptor.ShaderRegister); + break; + + case D3D12_ROOT_PARAMETER_TYPE_UAV: + totalCost += 2; + IGL_D3D12_LOG_VERBOSE(" [%u] Root UAV (u%u): 2 DWORDs\n", + i, param.Descriptor.ShaderRegister); + break; + + case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: + totalCost += 1; + const char* tableType = "Unknown"; + if (param.DescriptorTable.NumDescriptorRanges > 0) { + switch (param.DescriptorTable.pDescriptorRanges[0].RangeType) { + case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: tableType = "CBV"; break; + case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: tableType = "SRV"; break; + case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: tableType = "UAV"; break; + case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: tableType = "Sampler"; break; + } + } + IGL_D3D12_LOG_VERBOSE(" [%u] Descriptor table (%s): 1 DWORD\n", i, tableType); + break; + } + } + + if (desc.NumStaticSamplers > 0) { + IGL_D3D12_LOG_VERBOSE(" Static samplers: 0 DWORDs (free, count=%u)\n", desc.NumStaticSamplers); + } + + return totalCost; +} + +// Optional debug helper: validate that the shader input/output signatures are +// consistent with the input layout and render target configuration we build +// for a graphics PSO. This is intended purely for diagnostics and has no +// effect on runtime behaviour. +static void validateShaderBindingsAndLayout(const RenderPipelineDesc& desc, + const D3D12_GRAPHICS_PIPELINE_STATE_DESC& psoDesc, + const std::vector& inputElements, + ID3D12ShaderReflection* IGL_NULLABLE vsRefl, + ID3D12ShaderReflection* IGL_NULLABLE psRefl) { + // Environment toggle: IGL_D3D12_VALIDATE_SHADER_BINDINGS=0 disables validation. + if (const char* env = std::getenv("IGL_D3D12_VALIDATE_SHADER_BINDINGS")) { + if (env[0] == '0') { + return; + } + } + + if (!vsRefl) { + return; + } + + D3D12_SHADER_DESC vsDesc = {}; + if (FAILED(vsRefl->GetDesc(&vsDesc))) { + return; + } + + bool hasErrors = false; + + IGL_LOG_INFO("=== D3D12 VALIDATE_SHADER_BINDINGS (%s) ===\n", desc.debugName.c_str()); + + // Helper for case-insensitive string compare. + auto iequals = [](const char* a, const char* b) -> bool { + if (!a || !b) { + return false; + } + while (*a && *b) { + const int ca = std::tolower(static_cast(*a)); + const int cb = std::tolower(static_cast(*b)); + if (ca != cb) { + return false; + } + ++a; + ++b; + } + return *a == '\0' && *b == '\0'; + }; + + // Validate vertex shader inputs against the input layout. + IGL_LOG_INFO(" [VS] InputParameters=%u, InputLayout.Elements=%u\n", + vsDesc.InputParameters, + static_cast(inputElements.size())); + + for (UINT i = 0; i < vsDesc.InputParameters; ++i) { + D3D12_SIGNATURE_PARAMETER_DESC paramDesc = {}; + if (FAILED(vsRefl->GetInputParameterDesc(i, ¶mDesc))) { + continue; + } + + // Skip system-value semantics (SV_*), which do not come from the input layout. + if (paramDesc.SemanticName && + paramDesc.SemanticName[0] == 'S' && paramDesc.SemanticName[1] == 'V' && + paramDesc.SemanticName[2] == '_') { + continue; + } + + bool found = false; + for (const auto& elem : inputElements) { + if (elem.SemanticName && + iequals(elem.SemanticName, paramDesc.SemanticName) && + elem.SemanticIndex == paramDesc.SemanticIndex) { + found = true; + break; + } + } + + if (!found) { + hasErrors = true; + IGL_LOG_ERROR(" [VALIDATION] VS input '%s%u' has no matching input layout element " + "(debugName='%s').\n", + paramDesc.SemanticName ? paramDesc.SemanticName : "", + paramDesc.SemanticIndex, + desc.debugName.c_str()); + } + } + + // Validate pixel shader color outputs against NumRenderTargets / RTV formats. + if (psRefl) { + D3D12_SHADER_DESC psDesc = {}; + if (SUCCEEDED(psRefl->GetDesc(&psDesc))) { + UINT colorOutputs = 0; + for (UINT i = 0; i < psDesc.OutputParameters; ++i) { + D3D12_SIGNATURE_PARAMETER_DESC paramDesc = {}; + if (FAILED(psRefl->GetOutputParameterDesc(i, ¶mDesc))) { + continue; + } + if (paramDesc.SemanticName && iequals(paramDesc.SemanticName, "SV_TARGET")) { + ++colorOutputs; + } + } + + IGL_LOG_INFO(" [PS] ColorOutputs=%u, NumRenderTargets=%u, RTV[0]=%d, DSV=%d\n", + colorOutputs, + psoDesc.NumRenderTargets, + psoDesc.NumRenderTargets > 0 ? static_cast(psoDesc.RTVFormats[0]) + : static_cast(DXGI_FORMAT_UNKNOWN), + static_cast(psoDesc.DSVFormat)); + + if (colorOutputs == 0 && psoDesc.NumRenderTargets > 0) { + hasErrors = true; + IGL_LOG_ERROR(" [VALIDATION] PS writes no color outputs but PSO has " + "NumRenderTargets=%u (debugName='%s').\n", + psoDesc.NumRenderTargets, + desc.debugName.c_str()); + } else if (colorOutputs > 0 && psoDesc.NumRenderTargets == 0) { + hasErrors = true; + IGL_LOG_ERROR(" [VALIDATION] PS writes %u color outputs but PSO has " + "NumRenderTargets=0 (debugName='%s').\n", + colorOutputs, + desc.debugName.c_str()); + } else if (colorOutputs > psoDesc.NumRenderTargets) { + hasErrors = true; + IGL_LOG_ERROR(" [VALIDATION] PS writes %u color outputs but PSO only " + "declares %u render targets (debugName='%s').\n", + colorOutputs, + psoDesc.NumRenderTargets, + desc.debugName.c_str()); + } + } + } + + if (!hasErrors) { + IGL_D3D12_LOG_VERBOSE(" [VALIDATION] Shader inputs/outputs match input layout and " + "render target configuration.\n"); + } + + IGL_LOG_INFO("=== END D3D12 VALIDATE_SHADER_BINDINGS ===\n"); +} + +Device::Device(std::unique_ptr ctx) : ctx_(std::move(ctx)) { + platformDevice_ = std::make_unique(*this); + + // Validate device limits against actual device capabilities. + capabilities_.initialize(*ctx_); + + // Initialize upload infrastructure (allocator pool and upload helpers). + allocatorPool_.initialize(*ctx_, this); + + auto* device = ctx_->getDevice(); + if (device) { + // Pre-compile mipmap generation shaders at device initialization. + // This avoids runtime compilation overhead in Texture::generateMipmap(). + { + // HLSL shader sources (identical to those in Texture.cpp) + static const char* kVS = R"( +struct VSOut { float4 pos: SV_POSITION; float2 uv: TEXCOORD0; }; +VSOut main(uint id: SV_VertexID) { + float2 p = float2((id << 1) & 2, id & 2); + VSOut o; o.pos = float4(p*float2(2,-2)+float2(-1,1), 0, 1); o.uv = p; return o; +} +)"; + static const char* kPS = R"( +Texture2D tex0 : register(t0); +SamplerState smp : register(s0); +float4 main(float4 pos:SV_POSITION, float2 uv:TEXCOORD0) : SV_TARGET { return tex0.SampleLevel(smp, uv, 0); } +)"; + + // Initialize DXC compiler + DXCCompiler dxcCompiler; + Result initResult = dxcCompiler.initialize(); + if (!initResult.isOk()) { + IGL_LOG_ERROR("Device::Device: Failed to initialize DXC for mipmap shader compilation: %s\n", + initResult.message.c_str()); + IGL_LOG_ERROR(" Mipmap generation will be unavailable\n"); + return; // Early exit - don't attempt compilation without DXC + } + + // Get shader model from context (minimum SM 6.0 for DXC) + D3D_SHADER_MODEL shaderModel = ctx_->getMaxShaderModel(); + std::string vsTarget = getShaderTarget(shaderModel, ShaderStage::Vertex); + std::string psTarget = getShaderTarget(shaderModel, ShaderStage::Fragment); + + // Compile vertex shader + std::string vsErrors; + Result vsResult = dxcCompiler.compile(kVS, + strlen(kVS), + "main", + vsTarget.c_str(), + "MipmapGenerationVS", + 0, + pipelineCache_.mipmapVSBytecode_, + vsErrors); + if (!vsResult.isOk()) { + IGL_LOG_ERROR("Device::Device: Failed to pre-compile mipmap VS: %s\n%s\n", + vsResult.message.c_str(), vsErrors.c_str()); + pipelineCache_.mipmapVSBytecode_.clear(); + return; // Early exit - can't proceed without VS + } + + // Compile pixel shader + std::string psErrors; + Result psResult = dxcCompiler.compile(kPS, + strlen(kPS), + "main", + psTarget.c_str(), + "MipmapGenerationPS", + 0, + pipelineCache_.mipmapPSBytecode_, + psErrors); + if (!psResult.isOk()) { + IGL_LOG_ERROR("Device::Device: Failed to pre-compile mipmap PS: %s\n%s\n", + psResult.message.c_str(), psErrors.c_str()); + pipelineCache_.mipmapPSBytecode_.clear(); + pipelineCache_.mipmapVSBytecode_.clear(); // Clear VS too for consistency + return; // Early exit - can't proceed without PS + } + + // Create root signature for mipmap generation + D3D12_DESCRIPTOR_RANGE ranges[2] = {}; + ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + ranges[0].NumDescriptors = 1; + ranges[0].BaseShaderRegister = 0; + ranges[0].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + ranges[1].NumDescriptors = 1; + ranges[1].BaseShaderRegister = 0; + ranges[1].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + D3D12_ROOT_PARAMETER params[2] = {}; + params[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + params[0].DescriptorTable.NumDescriptorRanges = 1; + params[0].DescriptorTable.pDescriptorRanges = &ranges[0]; + params[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + params[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + params[1].DescriptorTable.NumDescriptorRanges = 1; + params[1].DescriptorTable.pDescriptorRanges = &ranges[1]; + params[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + D3D12_ROOT_SIGNATURE_DESC rsDesc = {}; + rsDesc.NumParameters = 2; + rsDesc.pParameters = params; + rsDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT; + + igl::d3d12::ComPtr sig, err; + if (FAILED(D3D12SerializeRootSignature(&rsDesc, D3D_ROOT_SIGNATURE_VERSION_1, + sig.GetAddressOf(), err.GetAddressOf()))) { + IGL_LOG_ERROR("Device::Device: Failed to serialize mipmap root signature\n"); + if (err && err->GetBufferPointer()) { + IGL_LOG_ERROR(" D3D12 error: %s\n", static_cast(err->GetBufferPointer())); + } + pipelineCache_.mipmapVSBytecode_.clear(); + pipelineCache_.mipmapPSBytecode_.clear(); + return; + } + + if (FAILED(device->CreateRootSignature(0, + sig->GetBufferPointer(), + sig->GetBufferSize(), + IID_PPV_ARGS( + pipelineCache_.mipmapRootSignature_.GetAddressOf())))) { + IGL_LOG_ERROR("Device::Device: Failed to create mipmap root signature\n"); + pipelineCache_.mipmapVSBytecode_.clear(); + pipelineCache_.mipmapPSBytecode_.clear(); + return; + } + + // Success! Mark mipmap shaders as available + pipelineCache_.mipmapShadersAvailable_ = true; + IGL_D3D12_LOG_VERBOSE("Device::Device: Mipmap shaders pre-compiled successfully (%zu bytes VS, %zu bytes PS)\n", + pipelineCache_.mipmapVSBytecode_.size(), + pipelineCache_.mipmapPSBytecode_.size()); + } + } +} + +Device::~Device() { + // Capture D3D12 validation messages for this device if enabled via environment. + if (ctx_) { + captureInfoQueueForDevice(ctx_->getDevice()); + } + + // No shared event to clean up; events are per-call in waitForUploadFence. + + // Ensure upload-related resources are released before destroying the device. + // D3D12Context destructor handles main queue fence waits via waitForGPU(). + pipelineCache_.clear(); + samplerCache_.clear(); + allocatorPool_.clearOnDeviceDestruction(); + + // Clear bind group pools to release texture and buffer shared_ptrs that keep resources alive. + bindGroupTexturesPool_.clear(); + bindGroupBuffersPool_.clear(); +} + +// Check for device removal and report detailed error. +Result Device::checkDeviceRemoval() const { + auto* device = ctx_->getDevice(); + if (!device) { + // Device not initialized is an invalid operation, not success. + IGL_DEBUG_ASSERT(false, "Device::checkDeviceRemoval() called before device initialization"); + return Result(Result::Code::InvalidOperation, "Device not initialized"); + } + + // Early return if device already marked as lost (return cached reason for diagnostics) + if (deviceLost_) { + return Result(Result::Code::RuntimeError, + std::string("Device previously lost: ") + deviceLostReason_); + } + + HRESULT hr = device->GetDeviceRemovedReason(); + if (FAILED(hr)) { + const char* reason = "Unknown"; + switch (hr) { + case DXGI_ERROR_DEVICE_HUNG: + reason = "DEVICE_HUNG (GPU not responding)"; + break; + case DXGI_ERROR_DEVICE_REMOVED: + reason = "DEVICE_REMOVED (Driver crash or hardware failure)"; + break; + case DXGI_ERROR_DEVICE_RESET: + reason = "DEVICE_RESET (Driver update or TDR)"; + break; + case DXGI_ERROR_DRIVER_INTERNAL_ERROR: + reason = "DRIVER_INTERNAL_ERROR (Driver bug)"; + break; + case DXGI_ERROR_INVALID_CALL: + reason = "INVALID_CALL (API misuse detected)"; + break; + default: + break; + } + + // Cache the reason and mark device as lost for diagnostics. + deviceLostReason_ = reason; + deviceLost_ = true; + + // Emit any pending D3D12/DXGI debug layer messages to help pinpoint the + // invalid API sequence that caused device removal. + logInfoQueuesForDevice(device, "Device::checkDeviceRemoval"); + + IGL_LOG_ERROR("D3D12 Device Removal Detected: %s (HRESULT=0x%08X)\n", reason, hr); + IGL_DEBUG_ASSERT(false); + return Result(Result::Code::RuntimeError, std::string("D3D12 device removed: ") + reason); + } + + // On success (S_OK), device is healthy + return Result(); +} + +// Alignment validation methods. + +bool Device::validateMSAAAlignment(const TextureDesc& desc, Result* IGL_NULLABLE outResult) const { + if (desc.numSamples <= 1) { + return true; // Not MSAA, no special alignment requirements + } + + // MSAA resources require 64KB alignment in D3D12 + // D3D12 CreateCommittedResource automatically handles this, but we validate dimensions + // to ensure resource won't exceed device limits + IGL_D3D12_LOG_VERBOSE("Device::validateMSAAAlignment: Validating MSAA texture (samples=%u, %ux%u)\n", + desc.numSamples, desc.width, desc.height); + + // Check if texture dimensions are reasonable for MSAA + // Large MSAA textures may fail due to memory constraints + const size_t pixelCount = static_cast(desc.width) * desc.height; + const size_t bytesPerPixel = 4; // Conservative estimate (RGBA8) + const size_t estimatedSize = pixelCount * bytesPerPixel * desc.numSamples; + + // Warn if MSAA texture is very large (> 256MB) + if (estimatedSize > 256 * 1024 * 1024) { + IGL_D3D12_LOG_VERBOSE("Device::validateMSAAAlignment: WARNING - Large MSAA texture detected (%zu MB). " + "May cause memory pressure.\n", estimatedSize / (1024 * 1024)); + } + + return true; +} + +bool Device::validateTextureAlignment(const D3D12_RESOURCE_DESC& resourceDesc, + uint32_t sampleCount, + Result* IGL_NULLABLE outResult) const { + // D3D12 texture alignment requirements: + // - MSAA textures (SampleDesc.Count > 1): 64KB alignment (automatic via CreateCommittedResource) + // - Regular textures: 64KB alignment (automatic via CreateCommittedResource) + // - Small textures (<= 64KB): May use 4KB alignment + + // This validation is informational - D3D12 handles alignment automatically + // We just verify parameters are within expected ranges + + if (sampleCount > 1) { + // MSAA texture - will use 64KB alignment + IGL_D3D12_LOG_VERBOSE("Device::validateTextureAlignment: MSAA texture will use 64KB alignment (samples=%u)\n", + sampleCount); + } + + // Validate resource dimensions don't exceed D3D12 limits + constexpr UINT64 kMaxTextureDimension2D = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION; // 16384 + + if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D) { + if (resourceDesc.Width > kMaxTextureDimension2D || resourceDesc.Height > kMaxTextureDimension2D) { + IGL_LOG_ERROR("Device::validateTextureAlignment: Texture dimensions (%llux%u) exceed D3D12 limit (%llu)\n", + resourceDesc.Width, resourceDesc.Height, kMaxTextureDimension2D); + Result::setResult(outResult, Result::Code::ArgumentInvalid, + "Texture dimensions exceed D3D12 maximum (16384x16384)"); + return false; + } + } + + return true; +} + +bool Device::validateBufferAlignment(size_t bufferSize, bool isUniform) const { + // D3D12 buffer alignment requirements: + // - Constant buffers: 256 bytes (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT) + // - Other buffers: No strict alignment requirement + + if (isUniform) { + // Uniform buffers must be 256-byte aligned + // This is already handled in createBuffer() by rounding up the size + if (bufferSize % BUFFER_ALIGNMENT != 0) { + IGL_D3D12_LOG_VERBOSE("Device::validateBufferAlignment: Uniform buffer size %zu will be rounded up to %zu\n", + bufferSize, (bufferSize + BUFFER_ALIGNMENT - 1) & ~(BUFFER_ALIGNMENT - 1)); + } + } + + return true; +} + +// BindGroups +Holder Device::createBindGroup( + const BindGroupTextureDesc& desc, + const IRenderPipelineState* IGL_NULLABLE /*compatiblePipeline*/, + Result* IGL_NULLABLE outResult) { + // Store bind group descriptor in pool for later use by encoder + BindGroupTextureDesc description(desc); + const auto handle = bindGroupTexturesPool_.create(std::move(description)); + Result::setResult(outResult, + handle.empty() ? Result(Result::Code::RuntimeError, "Cannot create bind group") + : Result()); + return {this, handle}; +} + +Holder Device::createBindGroup(const BindGroupBufferDesc& desc, + Result* IGL_NULLABLE outResult) { + // Store bind group descriptor in pool for later use by encoder + BindGroupBufferDesc description(desc); + const auto handle = bindGroupBuffersPool_.create(std::move(description)); + Result::setResult(outResult, + handle.empty() ? Result(Result::Code::RuntimeError, "Cannot create bind group") + : Result()); + return {this, handle}; +} + +void Device::destroy(BindGroupTextureHandle handle) { + if (handle.empty()) { + return; + } + bindGroupTexturesPool_.destroy(handle); +} + +void Device::destroy(BindGroupBufferHandle handle) { + if (handle.empty()) { + return; + } + bindGroupBuffersPool_.destroy(handle); +} + +void Device::destroy(SamplerHandle /*handle*/) { + // No-op: D3D12 backend doesn't use the SamplerHandle system. + // Samplers are created as shared_ptr and managed via ref-counting. + // Sampler descriptors are allocated transiently per command encoder at bind time, + // not persistently at sampler creation time, so there's nothing to deallocate here. +} + +// Command Queue +std::shared_ptr Device::createCommandQueue(const CommandQueueDesc& /*desc*/, + Result* IGL_NULLABLE + outResult) noexcept { + Result::setOk(outResult); + return std::make_shared(*this); +} + +// Resources +std::unique_ptr Device::createBuffer(const BufferDesc& desc, + Result* IGL_NULLABLE outResult) const noexcept { + // Single const_cast at the API boundary; all mutation happens in the non-const helper. + auto& self = const_cast(*this); + return self.createBufferImpl(desc, outResult); +} + +std::unique_ptr Device::createBufferImpl(const BufferDesc& desc, + Result* IGL_NULLABLE outResult) noexcept { + auto* device = ctx_->getDevice(); + if (!device) { + Result::setResult(outResult, Result::Code::RuntimeError, "D3D12 device is null"); + return nullptr; + } + + // Determine heap type and initial state based on storage + D3D12_HEAP_TYPE heapType; + D3D12_RESOURCE_STATES initialState; + + // CRITICAL: Storage buffers with UAV flags MUST use DEFAULT heap + // D3D12 does not allow UAV resources on UPLOAD heaps + const bool isStorageBuffer = (desc.type & BufferDesc::BufferTypeBits::Storage) != 0; + const bool forceDefaultHeap = isStorageBuffer; // Storage buffers need UAV, which requires DEFAULT heap + + if ((desc.storage == ResourceStorage::Shared || desc.storage == ResourceStorage::Managed) && !forceDefaultHeap) { + // CPU-writable upload heap (for non-storage buffers only) + heapType = D3D12_HEAP_TYPE_UPLOAD; + initialState = D3D12_RESOURCE_STATE_GENERIC_READ; + } else { + // GPU-only default heap (required for storage buffers with UAV) + heapType = D3D12_HEAP_TYPE_DEFAULT; + initialState = D3D12_RESOURCE_STATE_COMMON; + } + + // Create heap properties + D3D12_HEAP_PROPERTIES heapProps = {}; + heapProps.Type = heapType; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + + // For uniform buffers, size must be aligned to 256 bytes (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT) + const bool isUniformBuffer = (desc.type & BufferDesc::BufferTypeBits::Uniform) != 0; + + // Validate buffer alignment requirements. + validateBufferAlignment(desc.length, isUniformBuffer); + + const UINT64 alignedSize = isUniformBuffer + ? AlignUp(desc.length, 256) // D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT + : desc.length; + + IGL_D3D12_LOG_VERBOSE("Device::createBuffer: type=%d, requested_size=%zu, aligned_size=%llu, isUniform=%d\n", + desc.type, desc.length, alignedSize, isUniformBuffer); + + // Create buffer description + D3D12_RESOURCE_DESC bufferDesc = {}; + bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + bufferDesc.Alignment = 0; + bufferDesc.Width = alignedSize; + bufferDesc.Height = 1; + bufferDesc.DepthOrArraySize = 1; + bufferDesc.MipLevels = 1; + bufferDesc.Format = DXGI_FORMAT_UNKNOWN; + bufferDesc.SampleDesc.Count = 1; + bufferDesc.SampleDesc.Quality = 0; + bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + bufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + // Add UAV flag for storage buffers (used by compute shaders) + // isStorageBuffer already defined above for heap type determination + if (isStorageBuffer) { + bufferDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + IGL_D3D12_LOG_VERBOSE("Device::createBuffer: Storage buffer - adding UAV flag\n"); + } + + // Create the buffer resource + igl::d3d12::ComPtr buffer; + HRESULT hr = device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &bufferDesc, + initialState, + nullptr, + IID_PPV_ARGS(buffer.GetAddressOf()) + ); + + if (FAILED(hr)) { + char errorMsg[256]; + snprintf(errorMsg, sizeof(errorMsg), "Failed to create buffer: HRESULT = 0x%08X", static_cast(hr)); + Result::setResult(outResult, Result::Code::RuntimeError, errorMsg); + return nullptr; + } + + // Debug: Log GPU address for uniform buffers + if (isUniformBuffer) { + D3D12_GPU_VIRTUAL_ADDRESS gpuAddr = buffer->GetGPUVirtualAddress(); + IGL_D3D12_LOG_VERBOSE("Device::createBuffer: Uniform buffer created, GPU address=0x%llx\n", gpuAddr); + } + + // Upload initial data if provided + D3D12_RESOURCE_STATES finalState = initialState; + + if (heapType == D3D12_HEAP_TYPE_UPLOAD) { + finalState = D3D12_RESOURCE_STATE_GENERIC_READ; + } + + if (desc.data) { + if (heapType == D3D12_HEAP_TYPE_UPLOAD) { + void* mappedData = nullptr; + D3D12_RANGE readRange = {0, 0}; + hr = buffer->Map(0, &readRange, &mappedData); + + if (SUCCEEDED(hr)) { + std::memcpy(mappedData, desc.data, desc.length); + buffer->Unmap(0, nullptr); + } + } else if (heapType == D3D12_HEAP_TYPE_DEFAULT) { + // DEFAULT heap: stage through an UPLOAD buffer and copy + IGL_D3D12_LOG_VERBOSE("Device::createBuffer: Staging initial data via UPLOAD heap for DEFAULT buffer\n"); + + // Create upload buffer + D3D12_HEAP_PROPERTIES uploadHeapProps = {}; + uploadHeapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + uploadHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + uploadHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + + // Create upload buffer description WITHOUT UAV flag (UPLOAD heaps can't have UAV) + D3D12_RESOURCE_DESC uploadBufferDesc = bufferDesc; + uploadBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE; // Remove UAV flag for upload buffer + + igl::d3d12::ComPtr uploadBuffer; + HRESULT upHr = device->CreateCommittedResource(&uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &uploadBufferDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(uploadBuffer.GetAddressOf())); + if (FAILED(upHr)) { + IGL_LOG_ERROR("Device::createBuffer: Failed to create upload buffer: 0x%08X\n", static_cast(upHr)); + } else { + // Map and copy data + void* mapped = nullptr; + D3D12_RANGE rr = {0, 0}; + if (SUCCEEDED(uploadBuffer->Map(0, &rr, &mapped)) && mapped) { + std::memcpy(mapped, desc.data, desc.length); + uploadBuffer->Unmap(0, nullptr); + + igl::d3d12::ComPtr allocator = getUploadCommandAllocator(); + if (!allocator.Get()) { + IGL_LOG_ERROR("Device::createBuffer: Failed to get command allocator from pool\n"); + } else { + igl::d3d12::ComPtr cmdList; + if (SUCCEEDED(device->CreateCommandList(0, + D3D12_COMMAND_LIST_TYPE_DIRECT, + allocator.Get(), + nullptr, + IID_PPV_ARGS(cmdList.GetAddressOf())))) { + // Transition default buffer to COPY_DEST + D3D12_RESOURCE_BARRIER toCopyDest = {}; + toCopyDest.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toCopyDest.Transition.pResource = buffer.Get(); + toCopyDest.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toCopyDest.Transition.StateBefore = initialState; // COMMON + toCopyDest.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + cmdList->ResourceBarrier(1, &toCopyDest); + + // Copy upload -> default + cmdList->CopyBufferRegion(buffer.Get(), 0, uploadBuffer.Get(), 0, alignedSize); + + // Transition to a likely-read state based on buffer type + D3D12_RESOURCE_STATES targetState = D3D12_RESOURCE_STATE_GENERIC_READ; + if (desc.type & BufferDesc::BufferTypeBits::Vertex) { + targetState = D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } else if (desc.type & BufferDesc::BufferTypeBits::Uniform) { + targetState = D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER; + } else if (desc.type & BufferDesc::BufferTypeBits::Index) { + targetState = D3D12_RESOURCE_STATE_INDEX_BUFFER; + } + D3D12_RESOURCE_BARRIER toTarget = {}; + toTarget.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + toTarget.Transition.pResource = buffer.Get(); + toTarget.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + toTarget.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + toTarget.Transition.StateAfter = targetState; + cmdList->ResourceBarrier(1, &toTarget); + + cmdList->Close(); + ID3D12CommandList* lists[] = {cmdList.Get()}; + ctx_->getCommandQueue()->ExecuteCommandLists(1, lists); + + // Use async fence signaling instead of synchronous waitForGPU(). + // Get fence value that will signal when this upload completes. + UINT64 uploadFenceValue = getNextUploadFenceValue(); + + // Signal upload fence after copy completes + HRESULT hrSignal = + ctx_->getCommandQueue()->Signal(allocatorPool_.getUploadFence(), uploadFenceValue); + if (FAILED(hrSignal)) { + IGL_LOG_ERROR("Device::createBuffer: Failed to signal upload fence: 0x%08X\n", hrSignal); + // Return allocator with 0 to avoid blocking the pool + returnUploadCommandAllocator(allocator, 0); + } else { + // Return allocator to pool with fence value (will be reused after the fence is signaled). + returnUploadCommandAllocator(allocator, uploadFenceValue); + + // Track staging buffer for async cleanup with the associated fence value. + trackUploadBuffer(std::move(uploadBuffer), uploadFenceValue); + } + + finalState = targetState; + } else { + IGL_LOG_ERROR("Device::createBuffer: Failed to create command list\n"); + // Return allocator with 0 to avoid blocking the pool + returnUploadCommandAllocator(allocator, 0); + } + } + } + } + } + } + + Result::setOk(outResult); + return std::make_unique(const_cast(*this), std::move(buffer), desc, finalState); +} + +std::shared_ptr Device::createDepthStencilState( + const DepthStencilStateDesc& desc, + Result* IGL_NULLABLE outResult) const { + Result::setOk(outResult); + return std::make_shared(desc); +} + +std::unique_ptr Device::createShaderStages(const ShaderStagesDesc& desc, + Result* IGL_NULLABLE + outResult) const { + Result::setOk(outResult); + return std::make_unique(desc); +} + +std::shared_ptr Device::createSamplerState(const SamplerStateDesc& desc, + Result* IGL_NULLABLE outResult) const { + return samplerCache_.createSamplerState(desc, outResult); +} + +std::shared_ptr Device::createTexture(const TextureDesc& desc, + Result* IGL_NULLABLE outResult) const noexcept { + auto* device = ctx_->getDevice(); + + // Check for exportability - D3D12 doesn't support exportable textures + if (desc.exportability == TextureDesc::TextureExportability::Exportable) { + Result::setResult(outResult, Result::Code::Unimplemented, + "D3D12 does not support exportable textures"); + return nullptr; + } + + // Convert IGL texture format to DXGI format + DXGI_FORMAT dxgiFormat = textureFormatToDXGIFormat(desc.format); + IGL_D3D12_LOG_VERBOSE("Device::createTexture: IGL format=%d -> DXGI format=%d\n", (int)desc.format, (int)dxgiFormat); + if (dxgiFormat == DXGI_FORMAT_UNKNOWN) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Unsupported texture format"); + return nullptr; + } + + // Create texture resource description + D3D12_RESOURCE_DESC resourceDesc = {}; + + // Set dimension based on texture type + if (desc.type == TextureType::ThreeD) { + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D; + resourceDesc.DepthOrArraySize = static_cast(desc.depth); + } else if (desc.type == TextureType::Cube) { + // Cube textures are 2D textures with 6 array slices per layer (one per face). + // For cube arrays: numLayers * 6 faces. + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resourceDesc.DepthOrArraySize = static_cast(desc.numLayers * 6); + IGL_D3D12_LOG_VERBOSE("Device::createTexture: Cube texture with %u layers -> %u array slices\n", + desc.numLayers, resourceDesc.DepthOrArraySize); + } else { + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D; + resourceDesc.DepthOrArraySize = static_cast(desc.numLayers); + } + + const bool sampledUsage = + (desc.usage & TextureDesc::TextureUsageBits::Sampled) != 0; + const DXGI_FORMAT resourceFormat = + textureFormatToDXGIResourceFormat(desc.format, sampledUsage); + if (resourceFormat == DXGI_FORMAT_UNKNOWN) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Unsupported resource format"); + return nullptr; + } + resourceDesc.Alignment = 0; + resourceDesc.Width = desc.width; + resourceDesc.Height = desc.height; + resourceDesc.MipLevels = static_cast(desc.numMipLevels); + resourceDesc.Format = resourceFormat; + + // MSAA configuration + // D3D12 MSAA requirements: + // - Sample count must be 1, 2, 4, 8, or 16 (power of 2) + // - Quality level 0 is standard MSAA (higher quality levels are vendor-specific) + // - MSAA textures cannot have mipmaps (numMipLevels must be 1) + // - Not all formats support all sample counts - validation required + const uint32_t sampleCount = std::max(1u, desc.numSamples); + + // Validate MSAA alignment requirements before creating the resource. + if (sampleCount > 1) { + if (!validateMSAAAlignment(desc, outResult)) { + // Error already set by validation function + return nullptr; + } + } + + // Validate MSAA constraints. + if (sampleCount > 1) { + // MSAA textures cannot have mipmaps + if (desc.numMipLevels > 1) { + IGL_LOG_ERROR("Device::createTexture: MSAA textures cannot have mipmaps (numMipLevels=%u, numSamples=%u)\n", + desc.numMipLevels, sampleCount); + Result::setResult(outResult, Result::Code::ArgumentInvalid, + "MSAA textures cannot have mipmaps (numMipLevels must be 1)"); + return nullptr; + } + + // Validate that the requested MSAA sample count is supported for this format. + // NOTE: Applications should query DeviceFeatureLimits::MaxMultisampleCount proactively + // to avoid runtime errors. Use getMaxMSAASamplesForFormat() for format-specific queries. + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msqLevels = {}; + msqLevels.Format = dxgiFormat; + msqLevels.SampleCount = sampleCount; + msqLevels.Flags = D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE; + + if (FAILED(device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &msqLevels, sizeof(msqLevels))) || + msqLevels.NumQualityLevels == 0) { + // Query maximum supported samples for better error messages. + const uint32_t maxSamples = getMaxMSAASamplesForFormat(desc.format); + + char errorMsg[512]; + snprintf(errorMsg, sizeof(errorMsg), + "Device::createTexture: Format %d does not support %u samples (max supported: %u). " + "Query DeviceFeatureLimits::MaxMultisampleCount before texture creation.", + static_cast(dxgiFormat), sampleCount, maxSamples); + IGL_LOG_ERROR("%s\n", errorMsg); + Result::setResult(outResult, Result::Code::Unsupported, errorMsg); + return nullptr; + } + + IGL_D3D12_LOG_VERBOSE("Device::createTexture: MSAA enabled - format=%d, samples=%u, quality levels=%u\n", + static_cast(dxgiFormat), sampleCount, msqLevels.NumQualityLevels); + } + + resourceDesc.SampleDesc.Count = sampleCount; + resourceDesc.SampleDesc.Quality = 0; // Standard MSAA quality (0 = default/standard) + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN; + resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + // Set resource flags based on usage. + // IMPORTANT: D3D12 forbids combining ALLOW_DEPTH_STENCIL with + // ALLOW_RENDER_TARGET, ALLOW_UNORDERED_ACCESS or ALLOW_SIMULTANEOUS_ACCESS. + // We therefore do not allow "Storage" usage on depth/stencil formats and + // never set both DEPTH_STENCIL and RENDER_TARGET on the same resource. + const bool isDepthStencilFormat = + (desc.format >= TextureFormat::Z_UNorm16 && desc.format <= TextureFormat::S_UInt8); + + if (desc.usage & TextureDesc::TextureUsageBits::Sampled) { + // Shader resource - no special flags needed + } + + // Attachment usage becomes either a color render target or a depth/stencil + // target depending on the texture format. + if (desc.usage & TextureDesc::TextureUsageBits::Attachment) { + if (isDepthStencilFormat) { + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL; + } else { + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + } + } + + // Storage (unordered access) is only supported for non-depth/stencil + // formats. If requested on a depth/stencil texture, log and ignore it. + if (desc.usage & TextureDesc::TextureUsageBits::Storage) { + if (isDepthStencilFormat) { + IGL_LOG_ERROR( + "Device::createTexture: Storage usage (UAV) requested for depth/stencil " + "format (format=%d). D3D12 does not allow ALLOW_DEPTH_STENCIL together " + "with ALLOW_UNORDERED_ACCESS; ignoring Storage flag for this texture.\n", + static_cast(desc.format)); + } else { + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS; + } + } + + // Create heap properties + D3D12_HEAP_PROPERTIES heapProps = {}; + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; + + // Determine initial state + D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COMMON; + + // Prepare optimized clear value for depth/stencil only. + // For color render targets we deliberately avoid passing an optimized clear + // value to CreateCommittedResource, because RenderPass clear colors are + // often dynamic. Passing a fixed optimized clear color while clearing to + // arbitrary colors triggers D3D12 WARNING ID=820 + // (ClearRenderTargetView clear values do not match resource creation). + D3D12_CLEAR_VALUE clearValue = {}; + D3D12_CLEAR_VALUE* pClearValue = nullptr; + + if (resourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) { + clearValue.Format = dxgiFormat; + clearValue.DepthStencil.Depth = 1.0f; // Default far plane + clearValue.DepthStencil.Stencil = 0; + pClearValue = &clearValue; + } + + // Validate texture alignment before creating the resource. + if (!validateTextureAlignment(resourceDesc, sampleCount, outResult)) { + // Error already set by validation function + return nullptr; + } + + // Create the texture resource + igl::d3d12::ComPtr resource; + HRESULT hr = device->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + initialState, + pClearValue, // Optimized clear value for render targets/depth-stencil + IID_PPV_ARGS(resource.GetAddressOf())); + + if (FAILED(hr)) { + char errorMsg[512]; + if (hr == DXGI_ERROR_DEVICE_REMOVED) { + HRESULT removedReason = device->GetDeviceRemovedReason(); + snprintf(errorMsg, sizeof(errorMsg), + "Failed to create texture resource. Device removed! HRESULT: 0x%08X, Removed reason: 0x%08X", + static_cast(hr), static_cast(removedReason)); + } else { + snprintf(errorMsg, sizeof(errorMsg), "Failed to create texture resource. HRESULT: 0x%08X", static_cast(hr)); + } + Result::setResult(outResult, Result::Code::RuntimeError, errorMsg); + return nullptr; + } + + // Create IGL texture from D3D12 resource. + auto texture = Texture::createFromResource( + resource.Get(), desc.format, desc, device, ctx_->getCommandQueue(), initialState, + const_cast(this)); + Result::setOk(outResult); + return texture; +} + +std::shared_ptr Device::createTextureView(std::shared_ptr texture, + const TextureViewDesc& desc, + Result* IGL_NULLABLE + outResult) const noexcept { + if (!texture) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Parent texture is null"); + return nullptr; + } + + // Cast to D3D12 texture + auto d3d12Texture = std::static_pointer_cast(texture); + if (!d3d12Texture) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Texture is not a D3D12 texture"); + return nullptr; + } + + // Create the texture view + auto view = Texture::createTextureView(d3d12Texture, desc); + if (!view) { + Result::setResult(outResult, Result::Code::RuntimeError, "Failed to create texture view"); + return nullptr; + } + + Result::setOk(outResult); + return view; +} + +std::shared_ptr Device::createTimer(Result* IGL_NULLABLE outResult) const noexcept { + auto timer = std::make_shared(*this); + Result::setOk(outResult); + return timer; +} + +std::shared_ptr Device::createVertexInputState( + const VertexInputStateDesc& desc, + Result* IGL_NULLABLE outResult) const { + Result::setOk(outResult); + return std::make_shared(desc); +} + +std::shared_ptr Device::createComputePipeline( + const ComputePipelineDesc& desc, + Result* IGL_NULLABLE outResult) const { + IGL_D3D12_LOG_VERBOSE("Device::createComputePipeline() START - debugName='%s'\n", desc.debugName.c_str()); + + auto* device = ctx_->getDevice(); + if (!device) { + IGL_LOG_ERROR(" D3D12 device is null!\n"); + Result::setResult(outResult, Result::Code::InvalidOperation, "D3D12 device is null"); + return nullptr; + } + + if (!desc.shaderStages) { + IGL_LOG_ERROR(" Shader stages are required!\n"); + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader stages are required"); + return nullptr; + } + + if (desc.shaderStages->getType() != ShaderStagesType::Compute) { + IGL_LOG_ERROR(" Shader stages must be compute type!\n"); + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader stages must be compute type"); + return nullptr; + } + + // Get compute shader module + auto* computeModule = static_cast(desc.shaderStages->getComputeModule().get()); + if (!computeModule) { + IGL_LOG_ERROR(" Compute module is null!\n"); + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Compute shader required"); + return nullptr; + } + + IGL_D3D12_LOG_VERBOSE(" Getting compute shader bytecode...\n"); + const auto& csBytecode = computeModule->getBytecode(); + IGL_D3D12_LOG_VERBOSE(" CS bytecode: %zu bytes\n", csBytecode.size()); + + // Create root signature for compute + // Root signature layout for compute: + // - Root parameter 0: Root Constants for b0 (Push Constants) + // - Root parameter 1: Descriptor table with unbounded UAVs (u0-uN) + // - Root parameter 2: Descriptor table with unbounded SRVs (t0-tN) + // - Root parameter 3: Descriptor table with unbounded CBVs (b1-bN) + // - Root parameter 4: Descriptor table with unbounded Samplers (s0-sN) + + // Query root signature capabilities to determine descriptor range bounds. + // Tier 1 devices require bounded descriptor ranges. + const D3D12_RESOURCE_BINDING_TIER bindingTier = ctx_->getResourceBindingTier(); + const bool needsBoundedRanges = (bindingTier == D3D12_RESOURCE_BINDING_TIER_1); + + // Conservative bounds for Tier 1 devices (based on actual usage in render sessions) + // These limits are sufficient for all current IGL usage patterns + const UINT uavBound = needsBoundedRanges ? 64 : UINT_MAX; + const UINT srvBound = needsBoundedRanges ? 128 : UINT_MAX; + const UINT cbvBound = needsBoundedRanges ? 64 : UINT_MAX; + const UINT samplerBound = needsBoundedRanges ? 32 : UINT_MAX; // Samplers always bounded on Tier 1/2 + + if (needsBoundedRanges) { + IGL_D3D12_LOG_VERBOSE(" Using bounded descriptor ranges (Tier 1): UAV=%u, SRV=%u, CBV=%u, Sampler=%u\n", + uavBound, srvBound, cbvBound, samplerBound); + } else { + IGL_D3D12_LOG_VERBOSE(" Using unbounded descriptor ranges (Tier %u)\n", + bindingTier == D3D12_RESOURCE_BINDING_TIER_3 ? 3 : 2); + } + + // Descriptor range for UAVs (unordered access views - read/write buffers and textures). + D3D12_DESCRIPTOR_RANGE uavRange = {}; + uavRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; + uavRange.NumDescriptors = uavBound; + uavRange.BaseShaderRegister = 0; // Starting at u0 + uavRange.RegisterSpace = 0; + uavRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + // Descriptor range for SRVs (shader resource views - read-only textures and buffers) + D3D12_DESCRIPTOR_RANGE srvRange = {}; + srvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; + srvRange.NumDescriptors = srvBound; + srvRange.BaseShaderRegister = 0; // Starting at t0 + srvRange.RegisterSpace = 0; + srvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + // Descriptor range for CBVs (constant buffer views). + // Note: b0 will be used for root constants (push constants), so the CBV table starts at b1. + D3D12_DESCRIPTOR_RANGE cbvRange = {}; + cbvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; + cbvRange.NumDescriptors = cbvBound; + cbvRange.BaseShaderRegister = 1; // Starting at b1 (b0 is root constants) + cbvRange.RegisterSpace = 0; + cbvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + // Descriptor range for Samplers. + D3D12_DESCRIPTOR_RANGE samplerRange = {}; + samplerRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER; + samplerRange.NumDescriptors = samplerBound; + samplerRange.BaseShaderRegister = 0; // Starting at s0 + samplerRange.RegisterSpace = 0; + samplerRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND; + + // Root parameters + D3D12_ROOT_PARAMETER rootParams[5] = {}; + + // Parameter 0: Root Constants for b0 (Push Constants) + // Increased from 16 to 32 DWORDs (64→128 bytes) to match Vulkan + // Using 32-bit constants for push constants in compute shaders + rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS; + rootParams[0].Constants.ShaderRegister = 0; // b0 + rootParams[0].Constants.RegisterSpace = 0; + rootParams[0].Constants.Num32BitValues = 32; // 32 DWORDs = 128 bytes (matches Vulkan) + rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + // Parameter 1: Descriptor table for UAVs + rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParams[1].DescriptorTable.NumDescriptorRanges = 1; + rootParams[1].DescriptorTable.pDescriptorRanges = &uavRange; + rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + // Parameter 2: Descriptor table for SRVs + rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParams[2].DescriptorTable.NumDescriptorRanges = 1; + rootParams[2].DescriptorTable.pDescriptorRanges = &srvRange; + rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + // Parameter 3: Descriptor table for CBVs (b1+) + // Note: b0 is now root constants, this table starts at b1 + rootParams[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParams[3].DescriptorTable.NumDescriptorRanges = 1; + rootParams[3].DescriptorTable.pDescriptorRanges = &cbvRange; + rootParams[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + // Parameter 4: Descriptor table for Samplers + rootParams[4].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE; + rootParams[4].DescriptorTable.NumDescriptorRanges = 1; + rootParams[4].DescriptorTable.pDescriptorRanges = &samplerRange; + rootParams[4].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; + + D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {}; + rootSigDesc.NumParameters = 5; + rootSigDesc.pParameters = rootParams; + rootSigDesc.NumStaticSamplers = 0; + rootSigDesc.pStaticSamplers = nullptr; + rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE; + + // CRITICAL: Validate root signature cost (64 DWORD hardware limit). + IGL_D3D12_LOG_VERBOSE(" Validating compute root signature cost:\n"); + const uint32_t cost = calculateRootSignatureCost(rootSigDesc); + IGL_D3D12_LOG_VERBOSE(" Total cost: %u / 64 DWORDs (%.1f%%)\n", cost, 100.0f * cost / 64.0f); + + // Warning threshold at 50% (32 DWORDs) + if (cost > 32) { + IGL_D3D12_LOG_VERBOSE(" WARNING: Root signature cost exceeds 50%% of limit: %u / 64 DWORDs\n", cost); + } + + // Hard limit enforcement + IGL_DEBUG_ASSERT(cost <= 64, "Root signature exceeds 64 DWORD limit!"); + if (cost > 64) { + IGL_LOG_ERROR(" ROOT SIGNATURE COST OVERFLOW: %u DWORDs (limit: 64)\n", cost); + Result::setResult(outResult, Result::Code::ArgumentOutOfRange, + "Root signature cost exceeds 64 DWORD hardware limit"); + return nullptr; + } + + IGL_D3D12_LOG_VERBOSE(" Creating compute root signature with Root Constants (b0)/UAVs/SRVs/CBVs/Samplers\n"); + + // Get or create cached root signature. + igl::d3d12::ComPtr rootSignature = + pipelineCache_.getOrCreateRootSignature(ctx_->getDevice(), rootSigDesc, outResult); + if (!rootSignature.Get()) { + return nullptr; + } + + // Create compute pipeline state + D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = rootSignature.Get(); + psoDesc.CS.pShaderBytecode = csBytecode.data(); + psoDesc.CS.BytecodeLength = csBytecode.size(); + psoDesc.NodeMask = 0; + psoDesc.CachedPSO.pCachedBlob = nullptr; + psoDesc.CachedPSO.CachedBlobSizeInBytes = 0; + psoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + + // PSO cache lookup (thread-safe with double-checked locking). + const size_t psoHash = pipelineCache_.hashComputePipelineDesc(desc); + igl::d3d12::ComPtr pipelineState; + + // First check: Lock for cache lookup + { + std::lock_guard lock(pipelineCache_.psoCacheMutex_); + auto psoIt = pipelineCache_.computePSOCache_.find(psoHash); + if (psoIt != pipelineCache_.computePSOCache_.end()) { + // Cache hit - reuse existing PSO + pipelineCache_.computePSOCacheHits_++; + pipelineState = psoIt->second; // Assignment creates a ref-counted copy + IGL_D3D12_LOG_VERBOSE(" [PSO CACHE HIT] Hash=0x%zx, hits=%zu, misses=%zu, hit rate=%.1f%%\n", + psoHash, + pipelineCache_.computePSOCacheHits_, + pipelineCache_.computePSOCacheMisses_, + 100.0 * pipelineCache_.computePSOCacheHits_ / + (pipelineCache_.computePSOCacheHits_ + + pipelineCache_.computePSOCacheMisses_)); + IGL_D3D12_LOG_VERBOSE("Device::createComputePipeline() SUCCESS (CACHED) - PSO=%p, RootSig=%p\n", + pipelineState.Get(), rootSignature.Get()); + Result::setOk(outResult); + // Create a copy of the root signature for the returned object + igl::d3d12::ComPtr rootSigCopy = rootSignature; + return std::make_shared(desc, std::move(pipelineState), std::move(rootSigCopy)); + } + } + + // Cache miss - create new PSO outside lock (expensive operation) + IGL_D3D12_LOG_VERBOSE(" [PSO CACHE MISS] Hash=0x%zx\n", psoHash); + + IGL_D3D12_LOG_VERBOSE(" Creating compute pipeline state...\n"); + HRESULT hr = device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(pipelineState.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR(" CreateComputePipelineState FAILED: 0x%08X\n", static_cast(hr)); + + // Dump D3D12 + DXGI debug messages, if available, to help identify the + // invalid PSO configuration (shader bytecode, root signature, etc.). + logInfoQueuesForDevice(device, "CreateComputePipelineState"); + + Result::setResult(outResult, Result::Code::RuntimeError, "Failed to create compute pipeline state"); + return nullptr; + } + + // E-011: Set debug name on compute PSO for better debugging in PIX/RenderDoc + if (desc.shaderStages && desc.shaderStages->getComputeModule()) { + const std::string& psoName = desc.shaderStages->getComputeModule()->info().debugName; + if (!psoName.empty()) { + // Convert to wide string for D3D12 SetName API + std::wstring wideName(psoName.begin(), psoName.end()); + pipelineState->SetName(wideName.c_str()); + IGL_D3D12_LOG_VERBOSE(" Set compute PSO debug name: %s\n", psoName.c_str()); + } + } + + // Second check: Lock for cache insertion with double-check. + // Another thread may have created the PSO while we were creating ours + { + std::lock_guard lock(pipelineCache_.psoCacheMutex_); + auto psoIt = pipelineCache_.computePSOCache_.find(psoHash); + if (psoIt != pipelineCache_.computePSOCache_.end()) { + // Another thread beat us to it - use their PSO + pipelineCache_.computePSOCacheHits_++; + pipelineState = psoIt->second; + IGL_D3D12_LOG_VERBOSE(" [PSO DOUBLE-CHECK HIT] Another thread created PSO, using theirs. Hash=0x%zx\n", psoHash); + } else { + // We're the first to complete - cache our PSO + pipelineCache_.computePSOCacheMisses_++; + pipelineCache_.computePSOCache_[psoHash] = pipelineState; + IGL_D3D12_LOG_VERBOSE(" [PSO CACHED] Hash=0x%zx, hits=%zu, misses=%zu\n", + psoHash, + pipelineCache_.computePSOCacheHits_, + pipelineCache_.computePSOCacheMisses_); + } + } + + IGL_D3D12_LOG_VERBOSE("Device::createComputePipeline() SUCCESS - PSO=%p, RootSig=%p (hash=0x%zx)\n", + pipelineState.Get(), rootSignature.Get(), psoHash); + Result::setOk(outResult); + return std::make_shared(desc, std::move(pipelineState), std::move(rootSignature)); +} + +std::shared_ptr Device::createRenderPipeline( + const RenderPipelineDesc& desc, + Result* IGL_NULLABLE outResult) const { + IGL_D3D12_LOG_VERBOSE("Device::createRenderPipeline() START - debugName='%s'\n", desc.debugName.c_str()); + + auto* device = ctx_->getDevice(); + if (!device) { + IGL_LOG_ERROR(" D3D12 device is null!\n"); + Result::setResult(outResult, Result::Code::InvalidOperation, "D3D12 device is null"); + return nullptr; + } + + if (!desc.shaderStages) { + IGL_LOG_ERROR(" Shader stages are required!\n"); + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader stages are required"); + return nullptr; + } + + // Get shader modules + auto* vertexModule = static_cast(desc.shaderStages->getVertexModule().get()); + auto* fragmentModule = static_cast(desc.shaderStages->getFragmentModule().get()); + + if (!vertexModule || !fragmentModule) { + IGL_LOG_ERROR(" Vertex or fragment module is null!\n"); + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Vertex and fragment shaders required"); + return nullptr; + } + + IGL_D3D12_LOG_VERBOSE(" Getting shader bytecode...\n"); + // Get shader bytecode first + const auto& vsBytecode = vertexModule->getBytecode(); + const auto& psBytecode = fragmentModule->getBytecode(); + IGL_D3D12_LOG_VERBOSE(" VS bytecode: %zu bytes, PS bytecode: %zu bytes\n", vsBytecode.size(), psBytecode.size()); + + // Extract shader reflection info for dynamic root signature creation + const auto& vsReflectionInfo = vertexModule->getReflectionInfo(); + const auto& psReflectionInfo = fragmentModule->getReflectionInfo(); + + // Create root signature key from shader reflection + D3D12RootSignatureKey rootSigKey = D3D12RootSignatureKey::fromShaderReflection(&vsReflectionInfo, &psReflectionInfo); + + // Query resource binding tier for descriptor range bounds + const D3D12_RESOURCE_BINDING_TIER bindingTier = ctx_->getResourceBindingTier(); + IGL_D3D12_LOG_VERBOSE(" Resource binding tier: %u\n", bindingTier); + + // Create root signature dynamically based on shader requirements + igl::d3d12::ComPtr rootSignature = + pipelineCache_.createRootSignatureFromKey(ctx_->getDevice(), rootSigKey, bindingTier, outResult); + if (!rootSignature.Get()) { + return nullptr; + } + + // Create PSO - zero-initialize all fields + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = rootSignature.Get(); + + // Shader bytecode + psoDesc.VS = {vsBytecode.data(), vsBytecode.size()}; + psoDesc.PS = {psBytecode.data(), psBytecode.size()}; + // Explicitly zero unused shader stages + psoDesc.DS = {nullptr, 0}; + psoDesc.HS = {nullptr, 0}; + psoDesc.GS = {nullptr, 0}; + + // Rasterizer state - configure based on pipeline descriptor + // Fill mode (solid vs wireframe) + psoDesc.RasterizerState.FillMode = (desc.polygonFillMode == PolygonFillMode::Line) + ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID; + + // Cull mode configuration + switch (desc.cullMode) { + case CullMode::Back: + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK; + break; + case CullMode::Front: + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_FRONT; + break; + case CullMode::Disabled: + default: + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + break; + } + + // Front face winding order + psoDesc.RasterizerState.FrontCounterClockwise = + (desc.frontFaceWinding == WindingMode::CounterClockwise) ? TRUE : FALSE; + + // Depth bias (polygon offset) - baseline values set in PSO + // Note: IGL doesn't currently expose depth bias in RenderPipelineDesc + // Applications can dynamically adjust depth bias via RenderCommandEncoder::setDepthBias() + // These PSO values serve as the baseline which can be dynamically overridden + psoDesc.RasterizerState.DepthBias = 0; // Integer depth bias (default: no bias) + psoDesc.RasterizerState.DepthBiasClamp = 0.0f; // Max depth bias value (default: no clamp) + psoDesc.RasterizerState.SlopeScaledDepthBias = 0.0f; // Slope-scaled bias for angled surfaces + + psoDesc.RasterizerState.DepthClipEnable = TRUE; // Enable depth clipping + psoDesc.RasterizerState.MultisampleEnable = (desc.sampleCount > 1) ? TRUE : FALSE; + psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; + psoDesc.RasterizerState.ForcedSampleCount = 0; + psoDesc.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + + // Blend state - configure per render target based on pipeline descriptor + psoDesc.BlendState.AlphaToCoverageEnable = FALSE; + const size_t numColorAttachments = desc.targetDesc.colorAttachments.size(); + psoDesc.BlendState.IndependentBlendEnable = numColorAttachments > 1 ? TRUE : FALSE; + + // Helper to convert IGL blend factor to D3D12 + auto toD3D12Blend = [](BlendFactor f) { + switch (f) { + case BlendFactor::Zero: return D3D12_BLEND_ZERO; + case BlendFactor::One: return D3D12_BLEND_ONE; + case BlendFactor::SrcColor: return D3D12_BLEND_SRC_COLOR; + case BlendFactor::OneMinusSrcColor: return D3D12_BLEND_INV_SRC_COLOR; + case BlendFactor::SrcAlpha: return D3D12_BLEND_SRC_ALPHA; + case BlendFactor::OneMinusSrcAlpha: return D3D12_BLEND_INV_SRC_ALPHA; + case BlendFactor::DstColor: return D3D12_BLEND_DEST_COLOR; + case BlendFactor::OneMinusDstColor: return D3D12_BLEND_INV_DEST_COLOR; + case BlendFactor::DstAlpha: return D3D12_BLEND_DEST_ALPHA; + case BlendFactor::OneMinusDstAlpha: return D3D12_BLEND_INV_DEST_ALPHA; + case BlendFactor::SrcAlphaSaturated: return D3D12_BLEND_SRC_ALPHA_SAT; + case BlendFactor::BlendColor: return D3D12_BLEND_BLEND_FACTOR; + case BlendFactor::OneMinusBlendColor: return D3D12_BLEND_INV_BLEND_FACTOR; + case BlendFactor::BlendAlpha: return D3D12_BLEND_BLEND_FACTOR; // D3D12 uses same constant for RGB and Alpha + case BlendFactor::OneMinusBlendAlpha: return D3D12_BLEND_INV_BLEND_FACTOR; // D3D12 uses same constant for RGB and Alpha + case BlendFactor::Src1Color: return D3D12_BLEND_SRC1_COLOR; // Dual-source blending + case BlendFactor::OneMinusSrc1Color: return D3D12_BLEND_INV_SRC1_COLOR; // Dual-source blending + case BlendFactor::Src1Alpha: return D3D12_BLEND_SRC1_ALPHA; // Dual-source blending + case BlendFactor::OneMinusSrc1Alpha: return D3D12_BLEND_INV_SRC1_ALPHA; // Dual-source blending + default: return D3D12_BLEND_ONE; + } + }; + + auto toD3D12BlendOp = [](BlendOp op) { + switch (op) { + case BlendOp::Add: return D3D12_BLEND_OP_ADD; + case BlendOp::Subtract: return D3D12_BLEND_OP_SUBTRACT; + case BlendOp::ReverseSubtract: return D3D12_BLEND_OP_REV_SUBTRACT; + case BlendOp::Min: return D3D12_BLEND_OP_MIN; + case BlendOp::Max: return D3D12_BLEND_OP_MAX; + default: return D3D12_BLEND_OP_ADD; + } + }; + + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) { + if (i < desc.targetDesc.colorAttachments.size()) { + const auto& att = desc.targetDesc.colorAttachments[i]; + psoDesc.BlendState.RenderTarget[i].BlendEnable = att.blendEnabled ? TRUE : FALSE; + psoDesc.BlendState.RenderTarget[i].SrcBlend = toD3D12Blend(att.srcRGBBlendFactor); + psoDesc.BlendState.RenderTarget[i].DestBlend = toD3D12Blend(att.dstRGBBlendFactor); + psoDesc.BlendState.RenderTarget[i].BlendOp = toD3D12BlendOp(att.rgbBlendOp); + psoDesc.BlendState.RenderTarget[i].SrcBlendAlpha = toD3D12Blend(att.srcAlphaBlendFactor); + psoDesc.BlendState.RenderTarget[i].DestBlendAlpha = toD3D12Blend(att.dstAlphaBlendFactor); + psoDesc.BlendState.RenderTarget[i].BlendOpAlpha = toD3D12BlendOp(att.alphaBlendOp); + + // Convert IGL color write mask to D3D12 + UINT8 writeMask = 0; + if (att.colorWriteMask & igl::kColorWriteBitsRed) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_RED; + } + if (att.colorWriteMask & igl::kColorWriteBitsGreen) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_GREEN; + } + if (att.colorWriteMask & igl::kColorWriteBitsBlue) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_BLUE; + } + if (att.colorWriteMask & igl::kColorWriteBitsAlpha) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_ALPHA; + } + psoDesc.BlendState.RenderTarget[i].RenderTargetWriteMask = writeMask; + + IGL_D3D12_LOG_VERBOSE(" PSO RenderTarget[%u]: BlendEnable=%d, SrcBlend=%d, DstBlend=%d, WriteMask=0x%02X\n", + i, att.blendEnabled, psoDesc.BlendState.RenderTarget[i].SrcBlend, + psoDesc.BlendState.RenderTarget[i].DestBlend, writeMask); + } else { + // Default blend state for unused render targets + psoDesc.BlendState.RenderTarget[i].BlendEnable = FALSE; + psoDesc.BlendState.RenderTarget[i].SrcBlend = D3D12_BLEND_ONE; + psoDesc.BlendState.RenderTarget[i].DestBlend = D3D12_BLEND_ZERO; + psoDesc.BlendState.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD; + psoDesc.BlendState.RenderTarget[i].SrcBlendAlpha = D3D12_BLEND_ONE; + psoDesc.BlendState.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_ZERO; + psoDesc.BlendState.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD; + psoDesc.BlendState.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + } + // Logic operations support (bitwise blend operations) + // Query hardware support for logic operations + // Note: LogicOp is currently disabled as IGL doesn't expose logic operation settings in RenderPipelineDesc + // To enable in the future: + // 1. Add LogicOp enum and logicOpEnabled/logicOp fields to RenderPipelineDesc::ColorAttachment + // 2. Query D3D12_FEATURE_D3D12_OPTIONS.OutputMergerLogicOp at device initialization + // 3. Set LogicOpEnable = TRUE and LogicOp = convertLogicOp(att.logicOp) when enabled + psoDesc.BlendState.RenderTarget[i].LogicOpEnable = FALSE; + psoDesc.BlendState.RenderTarget[i].LogicOp = D3D12_LOGIC_OP_NOOP; + } + + // Helper to convert IGL stencil operation to D3D12 + auto toD3D12StencilOp = [](StencilOperation op) { + switch (op) { + case StencilOperation::Keep: return D3D12_STENCIL_OP_KEEP; + case StencilOperation::Zero: return D3D12_STENCIL_OP_ZERO; + case StencilOperation::Replace: return D3D12_STENCIL_OP_REPLACE; + case StencilOperation::IncrementClamp: return D3D12_STENCIL_OP_INCR_SAT; + case StencilOperation::DecrementClamp: return D3D12_STENCIL_OP_DECR_SAT; + case StencilOperation::Invert: return D3D12_STENCIL_OP_INVERT; + case StencilOperation::IncrementWrap: return D3D12_STENCIL_OP_INCR; + case StencilOperation::DecrementWrap: return D3D12_STENCIL_OP_DECR; + default: return D3D12_STENCIL_OP_KEEP; + } + }; + + // Helper to convert IGL compare function to D3D12 + auto toD3D12CompareFunc = [](CompareFunction func) { + switch (func) { + case CompareFunction::Never: return D3D12_COMPARISON_FUNC_NEVER; + case CompareFunction::Less: return D3D12_COMPARISON_FUNC_LESS; + case CompareFunction::Equal: return D3D12_COMPARISON_FUNC_EQUAL; + case CompareFunction::LessEqual: return D3D12_COMPARISON_FUNC_LESS_EQUAL; + case CompareFunction::Greater: return D3D12_COMPARISON_FUNC_GREATER; + case CompareFunction::NotEqual: return D3D12_COMPARISON_FUNC_NOT_EQUAL; + case CompareFunction::GreaterEqual: return D3D12_COMPARISON_FUNC_GREATER_EQUAL; + case CompareFunction::AlwaysPass: return D3D12_COMPARISON_FUNC_ALWAYS; + default: return D3D12_COMPARISON_FUNC_LESS; + } + }; + + // Depth stencil state - check if we have a depth or stencil attachment + const bool hasDepth = (desc.targetDesc.depthAttachmentFormat != TextureFormat::Invalid); + const bool hasStencil = (desc.targetDesc.stencilAttachmentFormat != TextureFormat::Invalid); + + if (hasDepth) { + psoDesc.DepthStencilState.DepthEnable = TRUE; + psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + // Use LESS_EQUAL to allow Z=0 to pass when depth buffer is cleared to 0 + psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; + } else { + psoDesc.DepthStencilState.DepthEnable = FALSE; + psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; + psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; + } + + // Configure stencil state (can be used with or without depth) + if (hasStencil) { + // Note: In D3D12/IGL, stencil state is configured via DepthStencilState binding + // For now, we set up basic stencil configuration in the PSO + // Default: stencil disabled unless explicitly configured by DepthStencilState + psoDesc.DepthStencilState.StencilEnable = FALSE; + psoDesc.DepthStencilState.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + psoDesc.DepthStencilState.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; + + // Front face stencil operations (defaults) + psoDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP; + psoDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; + psoDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP; + psoDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; + + // Back face stencil operations (defaults, same as front) + psoDesc.DepthStencilState.BackFace = psoDesc.DepthStencilState.FrontFace; + + IGL_D3D12_LOG_VERBOSE(" PSO Stencil configured: format=%d\n", (int)desc.targetDesc.stencilAttachmentFormat); + } else { + psoDesc.DepthStencilState.StencilEnable = FALSE; + } + + // Render target formats: support multiple render targets (MRT) + if (!desc.targetDesc.colorAttachments.empty()) { + const UINT n = static_cast(std::min(desc.targetDesc.colorAttachments.size(), D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT)); + psoDesc.NumRenderTargets = n; + IGL_D3D12_LOG_VERBOSE(" PSO NumRenderTargets = %u (color attachments = %zu)\n", n, desc.targetDesc.colorAttachments.size()); + for (UINT i = 0; i < n; ++i) { + // CRITICAL: Extract value to avoid MSVC debug iterator bounds check in function call + const auto textureFormat = desc.targetDesc.colorAttachments[i].textureFormat; + psoDesc.RTVFormats[i] = textureFormatToDXGIFormat(textureFormat); + IGL_D3D12_LOG_VERBOSE(" PSO RTVFormats[%u] = %d (IGL format %d)\n", i, psoDesc.RTVFormats[i], textureFormat); + } + } else { + psoDesc.NumRenderTargets = 0; + IGL_D3D12_LOG_VERBOSE(" PSO NumRenderTargets = 0 (no color attachments)\n"); + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) { + psoDesc.RTVFormats[i] = DXGI_FORMAT_UNKNOWN; + } + } + if (desc.targetDesc.depthAttachmentFormat != TextureFormat::Invalid) { + psoDesc.DSVFormat = textureFormatToDXGIFormat(desc.targetDesc.depthAttachmentFormat); + } else { + psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; + } + + // Sample settings + psoDesc.SampleMask = UINT_MAX; + psoDesc.SampleDesc.Count = 1; + psoDesc.SampleDesc.Quality = 0; // Must be 0 for Count=1 + + // Primitive topology - convert from IGL topology enum + if (desc.topology == igl::PrimitiveType::Point) { + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + IGL_D3D12_LOG_VERBOSE(" Setting PSO topology type to POINT\n"); + } else if (desc.topology == igl::PrimitiveType::Line || + desc.topology == igl::PrimitiveType::LineStrip) { + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + IGL_D3D12_LOG_VERBOSE(" Setting PSO topology type to LINE\n"); + } else { + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + IGL_D3D12_LOG_VERBOSE(" Setting PSO topology type to TRIANGLE\n"); + } + psoDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; + + // Additional required fields + psoDesc.NodeMask = 0; // Single GPU operation + psoDesc.CachedPSO.pCachedBlob = nullptr; + psoDesc.CachedPSO.CachedBlobSizeInBytes = 0; + psoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + + // Input layout. + std::vector inputElements; + std::vector semanticNames; // Keep semantic name strings alive + + if (desc.vertexInputState) { + // Convert IGL vertex input state to D3D12 input layout + auto* d3d12VertexInput = static_cast(desc.vertexInputState.get()); + const auto& vertexDesc = d3d12VertexInput->getDesc(); + + // Pre-reserve space to prevent reallocation (which would invalidate c_str() pointers) + semanticNames.reserve(vertexDesc.numAttributes); + + IGL_D3D12_LOG_VERBOSE(" Processing vertex input state: %zu attributes\n", vertexDesc.numAttributes); + for (size_t i = 0; i < vertexDesc.numAttributes; ++i) { + const auto& attr = vertexDesc.attributes[i]; + IGL_D3D12_LOG_VERBOSE(" Attribute %zu: name='%s', format=%d, offset=%zu, bufferIndex=%u\n", + i, attr.name.c_str(), static_cast(attr.format), attr.offset, attr.bufferIndex); + + // Map IGL attribute names to D3D12 HLSL semantic names + // IMPORTANT: Semantic names must NOT end with numbers - use SemanticIndex field instead + std::string semanticName; + // Case-insensitive helpers + auto toLower = [](std::string s){ for (auto& c : s) c = static_cast(tolower(c)); return s; }; + const std::string nlow = toLower(attr.name); + auto startsWith = [&](const char* p){ return nlow.rfind(p, 0) == 0; }; + auto contains = [&](const char* p){ return nlow.find(p) != std::string::npos; }; + + if (startsWith("pos") || startsWith("position") || contains("position")) { + semanticName = "POSITION"; + } else if (startsWith("col") || startsWith("color")) { + semanticName = "COLOR"; + } else if (startsWith("st") || startsWith("uv") || startsWith("tex") || contains("texcoord") || startsWith("offset")) { + semanticName = "TEXCOORD"; + } else if (startsWith("norm") || startsWith("normal")) { + semanticName = "NORMAL"; + } else if (startsWith("tangent")) { + semanticName = "TANGENT"; + } else { + // Fallback: POSITION for first attribute, TEXCOORD for second, COLOR otherwise + if (i == 0) semanticName = "POSITION"; + else if (i == 1) semanticName = "TEXCOORD"; + else semanticName = "COLOR"; + } + semanticNames.push_back(semanticName); + IGL_D3D12_LOG_VERBOSE(" Mapped '%s' -> '%s'\n", attr.name.c_str(), semanticName.c_str()); + + D3D12_INPUT_ELEMENT_DESC element = {}; + element.SemanticName = semanticNames.back().c_str(); + element.SemanticIndex = 0; + element.AlignedByteOffset = static_cast(attr.offset); + element.InputSlot = attr.bufferIndex; + // Check if this buffer binding uses per-instance data + // Note: inputBindings array may be sparse (bufferIndex >= numInputBindings), so check bounds with MAX + const bool isInstanceData = (attr.bufferIndex < IGL_BUFFER_BINDINGS_MAX && + vertexDesc.inputBindings[attr.bufferIndex].sampleFunction == + VertexSampleFunction::Instance); + element.InputSlotClass = isInstanceData ? D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA + : D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + element.InstanceDataStepRate = isInstanceData ? 1 : 0; + IGL_D3D12_LOG_VERBOSE(" bufferIndex=%u, isInstance=%d, sampleFunc=%d, InputSlotClass=%d, StepRate=%u\n", + attr.bufferIndex, isInstanceData, + (int)vertexDesc.inputBindings[attr.bufferIndex].sampleFunction, + (int)element.InputSlotClass, element.InstanceDataStepRate); + + // Convert IGL vertex format to DXGI format + switch (attr.format) { + case VertexAttributeFormat::Float1: + element.Format = DXGI_FORMAT_R32_FLOAT; + break; + case VertexAttributeFormat::Float2: + element.Format = DXGI_FORMAT_R32G32_FLOAT; + break; + case VertexAttributeFormat::Float3: + element.Format = DXGI_FORMAT_R32G32B32_FLOAT; + break; + case VertexAttributeFormat::Float4: + element.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; + break; + case VertexAttributeFormat::Byte1: + element.Format = DXGI_FORMAT_R8_UINT; + break; + case VertexAttributeFormat::Byte2: + element.Format = DXGI_FORMAT_R8G8_UINT; + break; + case VertexAttributeFormat::Byte4: + element.Format = DXGI_FORMAT_R8G8B8A8_UINT; + break; + case VertexAttributeFormat::UByte4Norm: + element.Format = DXGI_FORMAT_R8G8B8A8_UNORM; + break; + case VertexAttributeFormat::HalfFloat1: + element.Format = DXGI_FORMAT_R16_FLOAT; + break; + case VertexAttributeFormat::HalfFloat2: + element.Format = DXGI_FORMAT_R16G16_FLOAT; + break; + case VertexAttributeFormat::HalfFloat3: + // D3D12 doesn't have RGB16_FLOAT, use RGBA16_FLOAT + element.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + case VertexAttributeFormat::HalfFloat4: + element.Format = DXGI_FORMAT_R16G16B16A16_FLOAT; + break; + case VertexAttributeFormat::Int1: + element.Format = DXGI_FORMAT_R32_SINT; + break; + case VertexAttributeFormat::Int2: + element.Format = DXGI_FORMAT_R32G32_SINT; + break; + case VertexAttributeFormat::Int3: + element.Format = DXGI_FORMAT_R32G32B32_SINT; + break; + case VertexAttributeFormat::Int4: + element.Format = DXGI_FORMAT_R32G32B32A32_SINT; + break; + case VertexAttributeFormat::UInt1: + element.Format = DXGI_FORMAT_R32_UINT; + break; + case VertexAttributeFormat::UInt2: + element.Format = DXGI_FORMAT_R32G32_UINT; + break; + case VertexAttributeFormat::UInt3: + element.Format = DXGI_FORMAT_R32G32B32_UINT; + break; + case VertexAttributeFormat::UInt4: + element.Format = DXGI_FORMAT_R32G32B32A32_UINT; + break; + case VertexAttributeFormat::Int_2_10_10_10_REV: + // Use an unsigned 10:10:10:2 format and decode SNORM manually in the shader. + element.Format = DXGI_FORMAT_R10G10B10A2_UINT; + break; + default: + element.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; // fallback + IGL_LOG_ERROR(" Unsupported vertex attribute format: %d (using fallback RGBA32_FLOAT)\n", static_cast(attr.format)); + break; + } + + inputElements.push_back(element); + } + } else { + // Default simple triangle layout: position (float3) + color (float4) + inputElements.resize(2); + inputElements[0] = {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, + D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}; + inputElements[1] = {"COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12, + D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0}; + } + psoDesc.InputLayout = {inputElements.data(), static_cast(inputElements.size())}; + + IGL_D3D12_LOG_VERBOSE(" Final input layout: %u elements\n", static_cast(inputElements.size())); + for (size_t i = 0; i < inputElements.size(); ++i) { + IGL_D3D12_LOG_VERBOSE(" [%zu]: %s (index %u), format %d, slot %u, offset %u\n", + i, inputElements[i].SemanticName, inputElements[i].SemanticIndex, + static_cast(inputElements[i].Format), + inputElements[i].InputSlot, inputElements[i].AlignedByteOffset); + } + + // Use shader reflection to verify input signature matches input layout + IGL_D3D12_LOG_VERBOSE(" Reflecting vertex shader to verify input signature...\n"); + igl::d3d12::ComPtr vsReflection; + HRESULT hr = D3DReflect(vsBytecode.data(), vsBytecode.size(), IID_PPV_ARGS(vsReflection.GetAddressOf())); + if (SUCCEEDED(hr)) { + D3D12_SHADER_DESC shaderDesc = {}; + vsReflection->GetDesc(&shaderDesc); + IGL_D3D12_LOG_VERBOSE(" Shader expects %u input parameters:\n", shaderDesc.InputParameters); + for (UINT i = 0; i < shaderDesc.InputParameters; ++i) { + D3D12_SIGNATURE_PARAMETER_DESC paramDesc = {}; + vsReflection->GetInputParameterDesc(i, ¶mDesc); + IGL_D3D12_LOG_VERBOSE(" [%u]: %s%u (semantic index %u), mask 0x%02X\n", + i, paramDesc.SemanticName, paramDesc.SemanticIndex, + paramDesc.SemanticIndex, paramDesc.Mask); + } + } else { + IGL_D3D12_LOG_VERBOSE(" Shader reflection unavailable: 0x%08X (non-critical - pipeline will still be created)\n", static_cast(hr)); + } + + // PSO cache lookup (thread-safe with double-checked locking). + const size_t psoHash = pipelineCache_.hashRenderPipelineDesc(desc); + igl::d3d12::ComPtr pipelineState; + + // First check: Lock for cache lookup + { + std::lock_guard lock(pipelineCache_.psoCacheMutex_); + auto psoIt = pipelineCache_.graphicsPSOCache_.find(psoHash); + if (psoIt != pipelineCache_.graphicsPSOCache_.end()) { + // Cache hit - reuse existing PSO + pipelineCache_.graphicsPSOCacheHits_++; + pipelineState = psoIt->second; // Assignment creates a ref-counted copy + IGL_D3D12_LOG_VERBOSE(" [PSO CACHE HIT] Hash=0x%zx, hits=%zu, misses=%zu, hit rate=%.1f%%\n", + psoHash, + pipelineCache_.graphicsPSOCacheHits_, + pipelineCache_.graphicsPSOCacheMisses_, + 100.0 * pipelineCache_.graphicsPSOCacheHits_ / + (pipelineCache_.graphicsPSOCacheHits_ + + pipelineCache_.graphicsPSOCacheMisses_)); + IGL_D3D12_LOG_VERBOSE("Device::createRenderPipeline() SUCCESS (CACHED) - PSO=%p, RootSig=%p\n", + pipelineState.Get(), rootSignature.Get()); + Result::setOk(outResult); + // Create a copy of the root signature for the returned object + igl::d3d12::ComPtr rootSigCopy = rootSignature; + auto renderPipeline = std::make_shared(desc, std::move(pipelineState), std::move(rootSigCopy)); + + // Compute root parameter layout from shader reflection key (same as cache miss path) + UINT paramIndex = 0; + + if (rootSigKey.hasPushConstants) { + renderPipeline->shaderReflection_.pushConstantRootParamIndex = paramIndex++; + } + + if (!rootSigKey.usedCBVSlots.empty()) { + renderPipeline->rootParamLayout_.cbvTableIndex = paramIndex++; + renderPipeline->rootParamLayout_.cbvDescriptorCount = rootSigKey.maxCBVSlot + 1; + } + + if (!rootSigKey.usedSRVSlots.empty()) { + renderPipeline->rootParamLayout_.srvTableIndex = paramIndex++; + renderPipeline->rootParamLayout_.srvDescriptorCount = rootSigKey.maxSRVSlot + 1; + } + + if (!rootSigKey.usedSamplerSlots.empty()) { + renderPipeline->rootParamLayout_.samplerTableIndex = paramIndex++; + renderPipeline->rootParamLayout_.samplerDescriptorCount = rootSigKey.maxSamplerSlot + 1; + } + + if (!rootSigKey.usedUAVSlots.empty()) { + renderPipeline->rootParamLayout_.uavTableIndex = paramIndex++; + renderPipeline->rootParamLayout_.uavDescriptorCount = rootSigKey.maxUAVSlot + 1; + } + + return renderPipeline; + } + } + + // Cache miss - create new PSO outside lock (expensive operation) + IGL_D3D12_LOG_VERBOSE(" [PSO CACHE MISS] Hash=0x%zx\n", psoHash); + + IGL_D3D12_LOG_VERBOSE(" Creating pipeline state (this may take a moment)...\n"); + + // Optional: a more detailed validation pass (validateShaderBindingsAndLayout) can be + // re-enabled here if needed for diagnostics. It was previously wired to shader reflection + // and emitted verbose logs on every cache miss; for normal runs we rely on the D3D12 + // debug layer instead. + + hr = device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(pipelineState.GetAddressOf())); + if (FAILED(hr)) { + // Dump D3D12 + DXGI debug messages if available so that any invalid + // PSO configuration (bytecode/root signature/RT formats) is visible. + logInfoQueuesForDevice(device, "CreateGraphicsPipelineState"); + + char errorMsg[512]; + snprintf(errorMsg, + sizeof(errorMsg), + "Failed to create pipeline state. HRESULT: 0x%08X\n" + " VS size: %zu, PS size: %zu\n" + " Input elements: %u\n" + " NumRenderTargets: %u, RTV[0]: %d, DSV: %d\n" + " SampleDesc: Count=%u, Quality=%u\n" + " PrimitiveTopologyType: %d\n", + static_cast(hr), + psoDesc.VS.BytecodeLength, + psoDesc.PS.BytecodeLength, + psoDesc.InputLayout.NumElements, + psoDesc.NumRenderTargets, + static_cast(psoDesc.RTVFormats[0]), + static_cast(psoDesc.DSVFormat), + psoDesc.SampleDesc.Count, + psoDesc.SampleDesc.Quality, + static_cast(psoDesc.PrimitiveTopologyType)); + IGL_LOG_ERROR(errorMsg); + Result::setResult(outResult, Result::Code::RuntimeError, errorMsg); + return nullptr; + } + + // E-011: Set debug name on PSO for better debugging in PIX/RenderDoc + std::string psoName; + if (desc.shaderStages->getVertexModule()) { + psoName += desc.shaderStages->getVertexModule()->info().debugName; + } + if (desc.shaderStages->getFragmentModule()) { + if (!psoName.empty()) { + psoName += " + "; + } + psoName += desc.shaderStages->getFragmentModule()->info().debugName; + } + if (!psoName.empty()) { + // Convert to wide string for D3D12 SetName API + std::wstring wideName(psoName.begin(), psoName.end()); + pipelineState->SetName(wideName.c_str()); + IGL_D3D12_LOG_VERBOSE(" Set PSO debug name: %s\n", psoName.c_str()); + } + + // Second check: Lock for cache insertion with double-check. + // Another thread may have created the PSO while we were creating ours + { + std::lock_guard lock(pipelineCache_.psoCacheMutex_); + auto psoIt = pipelineCache_.graphicsPSOCache_.find(psoHash); + if (psoIt != pipelineCache_.graphicsPSOCache_.end()) { + // Another thread beat us to it - use their PSO + pipelineCache_.graphicsPSOCacheHits_++; + pipelineState = psoIt->second; + IGL_D3D12_LOG_VERBOSE(" [PSO DOUBLE-CHECK HIT] Another thread created PSO, using theirs. Hash=0x%zx\n", psoHash); + } else { + // We're the first to complete - cache our PSO + pipelineCache_.graphicsPSOCacheMisses_++; + pipelineCache_.graphicsPSOCache_[psoHash] = pipelineState; + IGL_D3D12_LOG_VERBOSE(" [PSO CACHED] Hash=0x%zx, hits=%zu, misses=%zu\n", + psoHash, + pipelineCache_.graphicsPSOCacheHits_, + pipelineCache_.graphicsPSOCacheMisses_); + } + } + + IGL_D3D12_LOG_VERBOSE("Device::createRenderPipeline() SUCCESS - PSO=%p, RootSig=%p (hash=0x%zx)\n", + pipelineState.Get(), rootSignature.Get(), psoHash); + + // Create the pipeline state object + auto renderPipeline = std::make_shared(desc, std::move(pipelineState), std::move(rootSignature)); + + // Compute root parameter layout from shader reflection key + // The layout order matches createRootSignatureFromKey(): + // 1. Push constants (if present) + // 2. CBV table (if shader uses CBVs) + // 3. SRV table (if shader uses SRVs) + // 4. Sampler table (if shader uses samplers) + // 5. UAV table (if shader uses UAVs) + UINT paramIndex = 0; + + if (rootSigKey.hasPushConstants) { + renderPipeline->shaderReflection_.pushConstantRootParamIndex = paramIndex++; + } + + if (!rootSigKey.usedCBVSlots.empty()) { + renderPipeline->rootParamLayout_.cbvTableIndex = paramIndex++; + renderPipeline->rootParamLayout_.cbvDescriptorCount = rootSigKey.maxCBVSlot + 1; + } + + if (!rootSigKey.usedSRVSlots.empty()) { + renderPipeline->rootParamLayout_.srvTableIndex = paramIndex++; + renderPipeline->rootParamLayout_.srvDescriptorCount = rootSigKey.maxSRVSlot + 1; + } + + if (!rootSigKey.usedSamplerSlots.empty()) { + renderPipeline->rootParamLayout_.samplerTableIndex = paramIndex++; + renderPipeline->rootParamLayout_.samplerDescriptorCount = rootSigKey.maxSamplerSlot + 1; + } + + if (!rootSigKey.usedUAVSlots.empty()) { + renderPipeline->rootParamLayout_.uavTableIndex = paramIndex++; + renderPipeline->rootParamLayout_.uavDescriptorCount = rootSigKey.maxUAVSlot + 1; + } + + Result::setOk(outResult); + return renderPipeline; +} + +// D3D12-specific: Create PSO variant with substituted render target formats +// This is called by RenderPipelineState::getPipelineState() for Vulkan-style dynamic PSO selection +igl::d3d12::ComPtr Device::createPipelineStateVariant( + const RenderPipelineDesc& desc, + ID3D12RootSignature* rootSignature, + Result* IGL_NULLABLE outResult) const { + IGL_D3D12_LOG_VERBOSE("Device::createPipelineStateVariant() - Creating PSO variant for framebuffer formats\n"); + + auto* device = ctx_->getDevice(); + if (!device || !rootSignature) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Invalid device or root signature"); + return nullptr; + } + + if (!desc.shaderStages) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader stages required"); + return nullptr; + } + + // Get shader bytecode + auto* vertexModule = static_cast(desc.shaderStages->getVertexModule().get()); + auto* fragmentModule = static_cast(desc.shaderStages->getFragmentModule().get()); + + if (!vertexModule || !fragmentModule) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Vertex and fragment shaders required"); + return nullptr; + } + + const auto& vsBytecode = vertexModule->getBytecode(); + const auto& psBytecode = fragmentModule->getBytecode(); + + // Build D3D12_GRAPHICS_PIPELINE_STATE_DESC from RenderPipelineDesc + // This mirrors the logic in createRenderPipeline() but without caching + D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {}; + psoDesc.pRootSignature = rootSignature; + + // Shader bytecode + psoDesc.VS = {vsBytecode.data(), vsBytecode.size()}; + psoDesc.PS = {psBytecode.data(), psBytecode.size()}; + psoDesc.DS = {nullptr, 0}; + psoDesc.HS = {nullptr, 0}; + psoDesc.GS = {nullptr, 0}; + + // Rasterizer state + psoDesc.RasterizerState.FillMode = (desc.polygonFillMode == PolygonFillMode::Line) + ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID; + + switch (desc.cullMode) { + case CullMode::Back: + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK; + break; + case CullMode::Front: + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_FRONT; + break; + case CullMode::Disabled: + default: + psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + break; + } + + psoDesc.RasterizerState.FrontCounterClockwise = + (desc.frontFaceWinding == WindingMode::CounterClockwise) ? TRUE : FALSE; + psoDesc.RasterizerState.DepthBias = 0; + psoDesc.RasterizerState.DepthBiasClamp = 0.0f; + psoDesc.RasterizerState.SlopeScaledDepthBias = 0.0f; + psoDesc.RasterizerState.DepthClipEnable = TRUE; + psoDesc.RasterizerState.MultisampleEnable = (desc.sampleCount > 1) ? TRUE : FALSE; + psoDesc.RasterizerState.AntialiasedLineEnable = FALSE; + psoDesc.RasterizerState.ForcedSampleCount = 0; + psoDesc.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF; + + // Blend state + psoDesc.BlendState.AlphaToCoverageEnable = FALSE; + const size_t numColorAttachments = desc.targetDesc.colorAttachments.size(); + psoDesc.BlendState.IndependentBlendEnable = numColorAttachments > 1 ? TRUE : FALSE; + + auto toD3D12Blend = [](BlendFactor f) { + switch (f) { + case BlendFactor::Zero: return D3D12_BLEND_ZERO; + case BlendFactor::One: return D3D12_BLEND_ONE; + case BlendFactor::SrcColor: return D3D12_BLEND_SRC_COLOR; + case BlendFactor::OneMinusSrcColor: return D3D12_BLEND_INV_SRC_COLOR; + case BlendFactor::SrcAlpha: return D3D12_BLEND_SRC_ALPHA; + case BlendFactor::OneMinusSrcAlpha: return D3D12_BLEND_INV_SRC_ALPHA; + case BlendFactor::DstColor: return D3D12_BLEND_DEST_COLOR; + case BlendFactor::OneMinusDstColor: return D3D12_BLEND_INV_DEST_COLOR; + case BlendFactor::DstAlpha: return D3D12_BLEND_DEST_ALPHA; + case BlendFactor::OneMinusDstAlpha: return D3D12_BLEND_INV_DEST_ALPHA; + case BlendFactor::SrcAlphaSaturated: return D3D12_BLEND_SRC_ALPHA_SAT; + case BlendFactor::BlendColor: return D3D12_BLEND_BLEND_FACTOR; + case BlendFactor::OneMinusBlendColor: return D3D12_BLEND_INV_BLEND_FACTOR; + case BlendFactor::BlendAlpha: return D3D12_BLEND_BLEND_FACTOR; + case BlendFactor::OneMinusBlendAlpha: return D3D12_BLEND_INV_BLEND_FACTOR; + case BlendFactor::Src1Color: return D3D12_BLEND_SRC1_COLOR; + case BlendFactor::OneMinusSrc1Color: return D3D12_BLEND_INV_SRC1_COLOR; + case BlendFactor::Src1Alpha: return D3D12_BLEND_SRC1_ALPHA; + case BlendFactor::OneMinusSrc1Alpha: return D3D12_BLEND_INV_SRC1_ALPHA; + default: return D3D12_BLEND_ONE; + } + }; + + auto toD3D12BlendOp = [](BlendOp op) { + switch (op) { + case BlendOp::Add: return D3D12_BLEND_OP_ADD; + case BlendOp::Subtract: return D3D12_BLEND_OP_SUBTRACT; + case BlendOp::ReverseSubtract: return D3D12_BLEND_OP_REV_SUBTRACT; + case BlendOp::Min: return D3D12_BLEND_OP_MIN; + case BlendOp::Max: return D3D12_BLEND_OP_MAX; + default: return D3D12_BLEND_OP_ADD; + } + }; + + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) { + if (i < desc.targetDesc.colorAttachments.size()) { + const auto& att = desc.targetDesc.colorAttachments[i]; + psoDesc.BlendState.RenderTarget[i].BlendEnable = att.blendEnabled ? TRUE : FALSE; + psoDesc.BlendState.RenderTarget[i].SrcBlend = toD3D12Blend(att.srcRGBBlendFactor); + psoDesc.BlendState.RenderTarget[i].DestBlend = toD3D12Blend(att.dstRGBBlendFactor); + psoDesc.BlendState.RenderTarget[i].BlendOp = toD3D12BlendOp(att.rgbBlendOp); + psoDesc.BlendState.RenderTarget[i].SrcBlendAlpha = toD3D12Blend(att.srcAlphaBlendFactor); + psoDesc.BlendState.RenderTarget[i].DestBlendAlpha = toD3D12Blend(att.dstAlphaBlendFactor); + psoDesc.BlendState.RenderTarget[i].BlendOpAlpha = toD3D12BlendOp(att.alphaBlendOp); + + UINT8 writeMask = 0; + if (att.colorWriteMask & igl::kColorWriteBitsRed) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_RED; + } + if (att.colorWriteMask & igl::kColorWriteBitsGreen) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_GREEN; + } + if (att.colorWriteMask & igl::kColorWriteBitsBlue) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_BLUE; + } + if (att.colorWriteMask & igl::kColorWriteBitsAlpha) { + writeMask |= D3D12_COLOR_WRITE_ENABLE_ALPHA; + } + psoDesc.BlendState.RenderTarget[i].RenderTargetWriteMask = writeMask; + } else { + psoDesc.BlendState.RenderTarget[i].BlendEnable = FALSE; + psoDesc.BlendState.RenderTarget[i].SrcBlend = D3D12_BLEND_ONE; + psoDesc.BlendState.RenderTarget[i].DestBlend = D3D12_BLEND_ZERO; + psoDesc.BlendState.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD; + psoDesc.BlendState.RenderTarget[i].SrcBlendAlpha = D3D12_BLEND_ONE; + psoDesc.BlendState.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_ZERO; + psoDesc.BlendState.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD; + psoDesc.BlendState.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + } + psoDesc.BlendState.RenderTarget[i].LogicOpEnable = FALSE; + psoDesc.BlendState.RenderTarget[i].LogicOp = D3D12_LOGIC_OP_NOOP; + } + + // Depth-stencil state + const bool hasDepth = (desc.targetDesc.depthAttachmentFormat != TextureFormat::Invalid); + const bool hasStencil = (desc.targetDesc.stencilAttachmentFormat != TextureFormat::Invalid); + + if (hasDepth) { + psoDesc.DepthStencilState.DepthEnable = TRUE; + psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL; + psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL; + } else { + psoDesc.DepthStencilState.DepthEnable = FALSE; + psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO; + psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS; + } + + if (hasStencil) { + psoDesc.DepthStencilState.StencilEnable = FALSE; + psoDesc.DepthStencilState.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK; + psoDesc.DepthStencilState.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK; + psoDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP; + psoDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP; + psoDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP; + psoDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS; + psoDesc.DepthStencilState.BackFace = psoDesc.DepthStencilState.FrontFace; + } else { + psoDesc.DepthStencilState.StencilEnable = FALSE; + } + + // Render target formats - use the modified formats from desc + if (!desc.targetDesc.colorAttachments.empty()) { + const UINT n = static_cast(std::min(desc.targetDesc.colorAttachments.size(), D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT)); + psoDesc.NumRenderTargets = n; + for (UINT i = 0; i < n; ++i) { + const auto textureFormat = desc.targetDesc.colorAttachments[i].textureFormat; + psoDesc.RTVFormats[i] = textureFormatToDXGIFormat(textureFormat); + IGL_D3D12_LOG_VERBOSE(" PSO Variant RTVFormats[%u] = %d (IGL format %d)\n", i, psoDesc.RTVFormats[i], textureFormat); + } + } else { + psoDesc.NumRenderTargets = 0; + for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) { + psoDesc.RTVFormats[i] = DXGI_FORMAT_UNKNOWN; + } + } + + if (desc.targetDesc.depthAttachmentFormat != TextureFormat::Invalid) { + psoDesc.DSVFormat = textureFormatToDXGIFormat(desc.targetDesc.depthAttachmentFormat); + } else { + psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN; + } + + // Sample settings + psoDesc.SampleMask = UINT_MAX; + psoDesc.SampleDesc.Count = 1; + psoDesc.SampleDesc.Quality = 0; + + // Primitive topology + if (desc.topology == igl::PrimitiveType::Point) { + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT; + } else if (desc.topology == igl::PrimitiveType::Line || + desc.topology == igl::PrimitiveType::LineStrip) { + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE; + } else { + psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + } + psoDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED; + + psoDesc.NodeMask = 0; + psoDesc.CachedPSO.pCachedBlob = nullptr; + psoDesc.CachedPSO.CachedBlobSizeInBytes = 0; + psoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE; + + // Input layout + std::vector inputElements; + std::vector semanticNames; + + if (desc.vertexInputState) { + auto* d3d12VertexInput = static_cast(desc.vertexInputState.get()); + const auto& vertexDesc = d3d12VertexInput->getDesc(); + semanticNames.reserve(vertexDesc.numAttributes); + + for (size_t i = 0; i < vertexDesc.numAttributes; ++i) { + const auto& attr = vertexDesc.attributes[i]; + std::string semanticName; + auto toLower = [](std::string s){ for (auto& c : s) c = static_cast(tolower(c)); return s; }; + const std::string nlow = toLower(attr.name); + auto startsWith = [&](const char* p){ return nlow.rfind(p, 0) == 0; }; + auto contains = [&](const char* p){ return nlow.find(p) != std::string::npos; }; + + if (startsWith("pos") || startsWith("position") || contains("position")) { + semanticName = "POSITION"; + } else if (startsWith("col") || startsWith("color")) { + semanticName = "COLOR"; + } else if (startsWith("st") || startsWith("uv") || startsWith("tex") || contains("texcoord") || startsWith("offset")) { + semanticName = "TEXCOORD"; + } else if (startsWith("norm") || startsWith("normal")) { + semanticName = "NORMAL"; + } else if (startsWith("tangent")) { + semanticName = "TANGENT"; + } else { + if (i == 0) semanticName = "POSITION"; + else if (i == 1) semanticName = "TEXCOORD"; + else semanticName = "COLOR"; + } + semanticNames.push_back(semanticName); + + D3D12_INPUT_ELEMENT_DESC element = {}; + element.SemanticName = semanticNames.back().c_str(); + element.SemanticIndex = 0; + element.AlignedByteOffset = static_cast(attr.offset); + element.InputSlot = attr.bufferIndex; + + bool isPerInstance = false; + if (attr.bufferIndex < vertexDesc.numInputBindings) { + isPerInstance = (vertexDesc.inputBindings[attr.bufferIndex].sampleFunction == igl::VertexSampleFunction::Instance); + } + element.InputSlotClass = isPerInstance ? D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA : D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA; + element.InstanceDataStepRate = isPerInstance ? 1 : 0; + + auto toD3D12Format = [](VertexAttributeFormat fmt) -> DXGI_FORMAT { + switch (fmt) { + case VertexAttributeFormat::Float1: return DXGI_FORMAT_R32_FLOAT; + case VertexAttributeFormat::Float2: return DXGI_FORMAT_R32G32_FLOAT; + case VertexAttributeFormat::Float3: return DXGI_FORMAT_R32G32B32_FLOAT; + case VertexAttributeFormat::Float4: return DXGI_FORMAT_R32G32B32A32_FLOAT; + case VertexAttributeFormat::Byte1: return DXGI_FORMAT_R8_SINT; + case VertexAttributeFormat::Byte2: return DXGI_FORMAT_R8G8_SINT; + case VertexAttributeFormat::Byte4: return DXGI_FORMAT_R8G8B8A8_SINT; + case VertexAttributeFormat::UByte4Norm: return DXGI_FORMAT_R8G8B8A8_UNORM; + default: return DXGI_FORMAT_UNKNOWN; + } + }; + element.Format = toD3D12Format(attr.format); + inputElements.push_back(element); + } + } + psoDesc.InputLayout = {inputElements.data(), static_cast(inputElements.size())}; + + // Create the pipeline state + igl::d3d12::ComPtr pipelineState; + HRESULT hr = device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(pipelineState.GetAddressOf())); + if (FAILED(hr)) { + logInfoQueuesForDevice(device, "CreateGraphicsPipelineState (variant)"); + char errorMsg[256]; + snprintf(errorMsg, sizeof(errorMsg), + "Failed to create PSO variant. HRESULT: 0x%08X, RTV[0]: %d, DSV: %d", + static_cast(hr), + static_cast(psoDesc.RTVFormats[0]), + static_cast(psoDesc.DSVFormat)); + IGL_LOG_ERROR(errorMsg); + Result::setResult(outResult, Result::Code::RuntimeError, errorMsg); + return nullptr; + } + + IGL_D3D12_LOG_VERBOSE("Device::createPipelineStateVariant() SUCCESS - PSO=%p\n", pipelineState.Get()); + Result::setOk(outResult); + return pipelineState; +} + + // Shader library and modules. +std::unique_ptr Device::createShaderLibrary(const ShaderLibraryDesc& desc, + Result* IGL_NULLABLE + outResult) const { + IGL_D3D12_LOG_VERBOSE("Device::createShaderLibrary() - moduleInfo count=%zu, debugName='%s'\n", + desc.moduleInfo.size(), desc.debugName.c_str()); + + if (desc.moduleInfo.empty()) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "ShaderLibrary requires at least one module"); + return nullptr; + } + + if (!desc.input.isValid()) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Invalid shader library input"); + return nullptr; + } + + std::vector> modules; + modules.reserve(desc.moduleInfo.size()); + + if (desc.input.type == ShaderInputType::Binary) { + // Binary input: share the same bytecode across all modules (Metal-style) + IGL_D3D12_LOG_VERBOSE(" Using binary input (%zu bytes) for all modules\n", desc.input.length); + std::vector bytecode(desc.input.length); + std::memcpy(bytecode.data(), desc.input.data, desc.input.length); + + for (const auto& info : desc.moduleInfo) { + // Create a copy of the bytecode for each module + std::vector moduleBytecode = bytecode; + modules.push_back(std::make_shared(info, std::move(moduleBytecode))); + } + } else if (desc.input.type == ShaderInputType::String) { + // String input: compile each module separately with its own entry point + if (!desc.input.source || !*desc.input.source) { + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader library source is empty"); + return nullptr; + } + + IGL_D3D12_LOG_VERBOSE(" Compiling %zu modules from string input\n", desc.moduleInfo.size()); + + for (const auto& info : desc.moduleInfo) { + // Create a ShaderModuleDesc for this specific module + ShaderModuleDesc moduleDesc; + moduleDesc.info = info; + moduleDesc.input.type = ShaderInputType::String; + moduleDesc.input.source = desc.input.source; + moduleDesc.input.options = desc.input.options; + moduleDesc.debugName = desc.debugName + "_" + info.entryPoint; + + Result moduleResult; + auto module = createShaderModule(moduleDesc, &moduleResult); + if (!moduleResult.isOk()) { + IGL_LOG_ERROR(" Failed to compile module '%s': %s\n", + info.entryPoint.c_str(), moduleResult.message.c_str()); + Result::setResult(outResult, std::move(moduleResult)); + return nullptr; + } + modules.push_back(std::move(module)); + } + } else { + Result::setResult(outResult, Result::Code::Unsupported, "Unsupported shader library input type"); + return nullptr; + } + + IGL_D3D12_LOG_VERBOSE("Device::createShaderLibrary() SUCCESS - created %zu modules\n", modules.size()); + Result::setOk(outResult); + return std::make_unique(std::move(modules)); +} + +// Helper function: Compile HLSL shader using legacy FXC compiler (Shader Model 5.1) +// This is a fallback when DXC is unavailable or fails +namespace { +Result compileShaderFXC( + const char* source, + size_t sourceLength, + const char* entryPoint, + const char* target, + const char* debugName, + UINT compileFlags, + std::vector& outBytecode, + std::string& outErrors) { + + IGL_D3D12_LOG_VERBOSE("FXC: Compiling shader '%s' with target '%s' (%zu bytes source)\n", + debugName ? debugName : "unnamed", + target, + sourceLength); + + igl::d3d12::ComPtr bytecode; + igl::d3d12::ComPtr errors; + + // D3DCompile is the legacy FXC compiler API + // It's always available on Windows 10+ (via d3dcompiler_47.dll) + HRESULT hr = D3DCompile( + source, + sourceLength, + debugName, // Source name (for error messages) + nullptr, // Defines + D3D_COMPILE_STANDARD_FILE_INCLUDE, + entryPoint, + target, + compileFlags, + 0, // Effect flags (not used for shaders) + bytecode.GetAddressOf(), + errors.GetAddressOf() + ); + + if (FAILED(hr)) { + std::string errorMsg = "FXC compilation failed"; + if (errors.Get() && errors->GetBufferSize() > 0) { + outErrors = std::string( + static_cast(errors->GetBufferPointer()), + errors->GetBufferSize() + ); + errorMsg += ": " + outErrors; + IGL_LOG_ERROR("FXC: %s\n", outErrors.c_str()); + } + return Result(Result::Code::RuntimeError, errorMsg); + } + + // Log warnings if any + if (errors.Get() && errors->GetBufferSize() > 0) { + outErrors = std::string( + static_cast(errors->GetBufferPointer()), + errors->GetBufferSize() + ); + IGL_D3D12_LOG_VERBOSE("FXC: Compilation warnings:\n%s\n", outErrors.c_str()); + } + + // Copy bytecode to output + const uint8_t* data = static_cast(bytecode->GetBufferPointer()); + size_t size = bytecode->GetBufferSize(); + outBytecode.assign(data, data + size); + + IGL_D3D12_LOG_VERBOSE("FXC: Compilation successful (%zu bytes bytecode)\n", size); + + return Result(); +} +} // anonymous namespace + +// Note: getShaderTarget() helper moved to Common.h for shared use. + +std::shared_ptr Device::createShaderModule(const ShaderModuleDesc& desc, + Result* IGL_NULLABLE outResult) const { + IGL_D3D12_LOG_VERBOSE("Device::createShaderModule() - stage=%d, entryPoint='%s', debugName='%s'\n", + static_cast(desc.info.stage), desc.info.entryPoint.c_str(), desc.debugName.c_str()); + + if (!desc.input.isValid()) { + IGL_LOG_ERROR(" Invalid shader input!\n"); + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Invalid shader input"); + return nullptr; + } + + std::vector bytecode; + + if (desc.input.type == ShaderInputType::Binary) { + // Binary input - copy bytecode directly + IGL_D3D12_LOG_VERBOSE(" Using binary input (%zu bytes)\n", desc.input.length); + bytecode.resize(desc.input.length); + std::memcpy(bytecode.data(), desc.input.data, desc.input.length); + } else if (desc.input.type == ShaderInputType::String) { + // String input - compile HLSL at runtime using DXC (DirectX Shader Compiler) + // For string input, use desc.input.source (not data) and calculate length + if (!desc.input.source) { + IGL_LOG_ERROR(" Shader source is null!\n"); + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader source is null"); + return nullptr; + } + + const size_t sourceLength = strlen(desc.input.source); + IGL_D3D12_LOG_VERBOSE(" Compiling HLSL from string (%zu bytes) using DXC...\n", sourceLength); + + // Initialize DXC compiler thread-safely using std::call_once. + static DXCCompiler dxcCompiler; + static std::once_flag dxcInitFlag; + static bool dxcAvailable = false; + + std::call_once(dxcInitFlag, []() { + Result initResult = dxcCompiler.initialize(); + dxcAvailable = initResult.isOk(); + + if (dxcAvailable) { + IGL_D3D12_LOG_VERBOSE(" DXC compiler initialized successfully (Shader Model 6.0+ support)\n"); + } else { + IGL_D3D12_LOG_VERBOSE(" DXC compiler initialization failed: %s\n", initResult.message.c_str()); + IGL_D3D12_LOG_VERBOSE(" Falling back to FXC (Shader Model 5.1)\n"); + } + }); + + // Determine shader target based on stage + // Use SM 6.0 for DXC, SM 5.1 for FXC fallback + const char* targetDXC = nullptr; + const char* targetFXC = nullptr; + switch (desc.info.stage) { + case ShaderStage::Vertex: + targetDXC = "vs_6_0"; + targetFXC = "vs_5_1"; + break; + case ShaderStage::Fragment: + targetDXC = "ps_6_0"; + targetFXC = "ps_5_1"; + break; + case ShaderStage::Compute: + targetDXC = "cs_6_0"; + targetFXC = "cs_5_1"; + break; + default: + IGL_LOG_ERROR(" Unsupported shader stage!\n"); + Result::setResult(outResult, Result::Code::ArgumentInvalid, "Unsupported shader stage"); + return nullptr; + } + + // Compile flags (DXC uses D3DCOMPILE_* flags) + UINT compileFlags = D3DCOMPILE_ENABLE_STRICTNESS; + + // Enable shader debugging features + #ifdef _DEBUG + compileFlags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION; + IGL_D3D12_LOG_VERBOSE(" DEBUG BUILD: Enabling shader debug info and disabling optimizations\n"); + #else + // In release builds, still enable debug info for PIX captures unless explicitly disabled + const char* disableDebugInfo = std::getenv("IGL_D3D12_DISABLE_SHADER_DEBUG"); + if (!disableDebugInfo || std::string(disableDebugInfo) != "1") { + compileFlags |= D3DCOMPILE_DEBUG; + IGL_D3D12_LOG_VERBOSE(" RELEASE BUILD: Enabling shader debug info (disable with IGL_D3D12_DISABLE_SHADER_DEBUG=1)\n"); + } + #endif + + // Optional: Enable warnings as errors for stricter validation + const char* warningsAsErrors = std::getenv("IGL_D3D12_SHADER_WARNINGS_AS_ERRORS"); + if (warningsAsErrors && std::string(warningsAsErrors) == "1") { + compileFlags |= D3DCOMPILE_WARNINGS_ARE_ERRORS; + IGL_D3D12_LOG_VERBOSE(" Treating shader warnings as errors\n"); + } + + // Try DXC first if available, fallback to FXC if DXC fails or unavailable + std::string errors; + Result compileResult; + bool compiledWithDXC = false; + + if (dxcAvailable) { + // Try DXC compilation (Shader Model 6.0) + IGL_D3D12_LOG_VERBOSE(" Attempting DXC compilation (Shader Model 6.0)...\n"); + compileResult = dxcCompiler.compile( + desc.input.source, + sourceLength, + desc.info.entryPoint.c_str(), + targetDXC, + desc.debugName.c_str(), + compileFlags, + bytecode, + errors + ); + + if (compileResult.isOk()) { + IGL_D3D12_LOG_VERBOSE(" DXC shader compiled successfully (%zu bytes DXIL bytecode)\n", bytecode.size()); + compiledWithDXC = true; + } else { + IGL_D3D12_LOG_VERBOSE(" DXC compilation failed: %s\n", compileResult.message.c_str()); + if (!errors.empty()) { + IGL_D3D12_LOG_VERBOSE(" DXC errors: %s\n", errors.c_str()); + } + IGL_D3D12_LOG_VERBOSE(" Falling back to FXC (Shader Model 5.1)...\n"); + } + } + + // Use FXC if DXC is unavailable or failed + if (!compiledWithDXC) { + errors.clear(); + compileResult = compileShaderFXC( + desc.input.source, + sourceLength, + desc.info.entryPoint.c_str(), + targetFXC, + desc.debugName.c_str(), + compileFlags, + bytecode, + errors + ); + + if (!compileResult.isOk()) { + // Both DXC and FXC failed - report error + std::string errorMsg; + const char* stageStr = ""; + switch (desc.info.stage) { + case ShaderStage::Vertex: stageStr = "VERTEX"; break; + case ShaderStage::Fragment: stageStr = "FRAGMENT/PIXEL"; break; + case ShaderStage::Compute: stageStr = "COMPUTE"; break; + default: stageStr = "UNKNOWN"; break; + } + + errorMsg = "Shader compilation FAILED (both DXC and FXC)\n"; + errorMsg += " Stage: " + std::string(stageStr) + "\n"; + errorMsg += " Entry Point: " + desc.info.entryPoint + "\n"; + errorMsg += " Target (FXC): " + std::string(targetFXC) + "\n"; + errorMsg += " Debug Name: " + desc.debugName + "\n"; + + if (!errors.empty()) { + errorMsg += "\n=== FXC COMPILER ERRORS ===\n"; + errorMsg += errors; + errorMsg += "\n===========================\n"; + } else { + errorMsg += " Error: " + compileResult.message + "\n"; + } + + IGL_LOG_ERROR("%s", errorMsg.c_str()); + Result::setResult(outResult, Result::Code::RuntimeError, errorMsg.c_str()); + return nullptr; + } + + IGL_D3D12_LOG_VERBOSE(" FXC shader compiled successfully (%zu bytes bytecode)\n", bytecode.size()); + } + } else { + Result::setResult(outResult, Result::Code::Unsupported, "Unsupported shader input type"); + return nullptr; + } + + // Create shader module with bytecode + auto module = std::make_shared(desc.info, std::move(bytecode)); + + // Create shader reflection from DXIL bytecode. + // This allows runtime queries of shader resources, bindings, and constant buffers. + IGL_D3D12_LOG_VERBOSE(" Attempting to create shader reflection (bytecode size=%zu)...\n", + module->getBytecode().size()); + if (!module->getBytecode().empty()) { + // Create IDxcUtils for reflection + igl::d3d12::ComPtr dxcUtils; + IGL_D3D12_LOG_VERBOSE(" Creating IDxcUtils for reflection...\n"); + HRESULT hr = DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(dxcUtils.GetAddressOf())); + IGL_D3D12_LOG_VERBOSE(" DxcCreateInstance result: 0x%08X\n", hr); + + if (SUCCEEDED(hr)) { + // Prepare buffer for reflection + DxcBuffer reflectionBuffer = {}; + reflectionBuffer.Ptr = module->getBytecode().data(); + reflectionBuffer.Size = module->getBytecode().size(); + reflectionBuffer.Encoding = 0; + + // Create reflection interface + igl::d3d12::ComPtr reflection; + hr = dxcUtils->CreateReflection(&reflectionBuffer, IID_PPV_ARGS(reflection.GetAddressOf())); + + if (SUCCEEDED(hr)) { + module->setReflection(reflection); + IGL_D3D12_LOG_VERBOSE(" Shader reflection created successfully (DXIL reflection)\n"); + + // Emit a concise reflection dump by default to help diagnose + // resource-binding issues. This is intentionally always enabled + // (in debug builds) so that D3D12 binding problems are visible + // without extra flags. + D3D12_SHADER_DESC shaderDesc = {}; + if (SUCCEEDED(reflection->GetDesc(&shaderDesc))) { + const char* stageStr = "UNKNOWN"; + switch (desc.info.stage) { + case ShaderStage::Vertex: + stageStr = "VERTEX"; + break; + case ShaderStage::Fragment: + stageStr = "FRAGMENT/PIXEL"; + break; + case ShaderStage::Compute: + stageStr = "COMPUTE"; + break; + default: + break; + } + + IGL_LOG_INFO("\n=== SHADER REFLECTION (%s - %s) ===\n", + stageStr, + desc.info.entryPoint.c_str()); + IGL_LOG_INFO(" Bound Resources: %u\n", shaderDesc.BoundResources); + for (UINT i = 0; i < shaderDesc.BoundResources; ++i) { + D3D12_SHADER_INPUT_BIND_DESC bindDesc = {}; + if (SUCCEEDED(reflection->GetResourceBindingDesc(i, &bindDesc))) { + const char* typeStr = "Unknown"; + const char* registerPrefix = "?"; + switch (bindDesc.Type) { + case D3D_SIT_CBUFFER: + typeStr = "ConstantBuffer"; + registerPrefix = "b"; + break; + case D3D_SIT_TBUFFER: + typeStr = "TextureBuffer"; + registerPrefix = "t"; + break; + case D3D_SIT_TEXTURE: + typeStr = "Texture"; + registerPrefix = "t"; + break; + case D3D_SIT_SAMPLER: + typeStr = "Sampler"; + registerPrefix = "s"; + break; + case D3D_SIT_UAV_RWTYPED: + typeStr = "RWTexture"; + registerPrefix = "u"; + break; + case D3D_SIT_STRUCTURED: + typeStr = "StructuredBuffer"; + registerPrefix = "t"; + break; + case D3D_SIT_UAV_RWSTRUCTURED: + typeStr = "RWStructuredBuffer"; + registerPrefix = "u"; + break; + case D3D_SIT_BYTEADDRESS: + typeStr = "ByteAddressBuffer"; + registerPrefix = "t"; + break; + case D3D_SIT_UAV_RWBYTEADDRESS: + typeStr = "RWByteAddressBuffer"; + registerPrefix = "u"; + break; + default: + break; + } + + IGL_LOG_INFO(" [%u] %s '%s' at %s%u (space %u)\n", + i, + typeStr, + bindDesc.Name, + registerPrefix, + bindDesc.BindPoint, + bindDesc.Space); + } + } + + IGL_LOG_INFO(" Constant Buffers: %u\n", shaderDesc.ConstantBuffers); + for (UINT i = 0; i < shaderDesc.ConstantBuffers; ++i) { + ID3D12ShaderReflectionConstantBuffer* cb = + reflection->GetConstantBufferByIndex(i); + D3D12_SHADER_BUFFER_DESC cbDesc = {}; + if (cb && SUCCEEDED(cb->GetDesc(&cbDesc))) { + IGL_LOG_INFO(" [%u] %s: %u bytes, %u variables\n", + i, + cbDesc.Name, + cbDesc.Size, + cbDesc.Variables); + } + } + + // Log input and output signature parameters to help diagnose + // pipeline state creation issues (semantic/mask mismatches). + IGL_LOG_INFO(" Input Parameters: %u\n", shaderDesc.InputParameters); + for (UINT i = 0; i < shaderDesc.InputParameters; ++i) { + D3D12_SIGNATURE_PARAMETER_DESC p = {}; + if (SUCCEEDED(reflection->GetInputParameterDesc(i, &p))) { + IGL_LOG_INFO(" [In %u] %s%u: reg=%u, mask=0x%02X\n", + i, + p.SemanticName ? p.SemanticName : "", + p.SemanticIndex, + p.Register, + p.Mask); + } + } + + IGL_LOG_INFO(" Output Parameters: %u\n", shaderDesc.OutputParameters); + for (UINT i = 0; i < shaderDesc.OutputParameters; ++i) { + D3D12_SIGNATURE_PARAMETER_DESC p = {}; + if (SUCCEEDED(reflection->GetOutputParameterDesc(i, &p))) { + IGL_LOG_INFO(" [Out %u] %s%u: reg=%u, mask=0x%02X\n", + i, + p.SemanticName ? p.SemanticName : "", + p.SemanticIndex, + p.Register, + p.Mask); + } + } + IGL_LOG_INFO("================================\n\n"); + } + } else { + IGL_D3D12_LOG_VERBOSE(" Failed to create DXIL reflection: 0x%08X, trying D3DReflect for DXBC bytecode...\n", hr); + + // Fallback to D3DReflect for DXBC bytecode (FXC-compiled shaders) + hr = D3DReflect(module->getBytecode().data(), module->getBytecode().size(), + IID_PPV_ARGS(reflection.GetAddressOf())); + + if (SUCCEEDED(hr)) { + module->setReflection(reflection); + IGL_D3D12_LOG_VERBOSE(" Shader reflection created successfully (DXBC reflection)\n"); + + // Emit reflection dump for DXBC shaders as well + D3D12_SHADER_DESC shaderDesc = {}; + if (SUCCEEDED(reflection->GetDesc(&shaderDesc))) { + const char* stageStr = "UNKNOWN"; + switch (desc.info.stage) { + case ShaderStage::Vertex: + stageStr = "VERTEX"; + break; + case ShaderStage::Fragment: + stageStr = "FRAGMENT/PIXEL"; + break; + case ShaderStage::Compute: + stageStr = "COMPUTE"; + break; + default: + break; + } + + IGL_LOG_INFO("\n=== SHADER REFLECTION (%s - %s) [DXBC] ===\n", + stageStr, + desc.info.entryPoint.c_str()); + IGL_LOG_INFO(" Bound Resources: %u\n", shaderDesc.BoundResources); + for (UINT i = 0; i < shaderDesc.BoundResources; ++i) { + D3D12_SHADER_INPUT_BIND_DESC bindDesc = {}; + if (SUCCEEDED(reflection->GetResourceBindingDesc(i, &bindDesc))) { + const char* typeStr = "Unknown"; + const char* registerPrefix = "?"; + switch (bindDesc.Type) { + case D3D_SIT_CBUFFER: + typeStr = "ConstantBuffer"; + registerPrefix = "b"; + break; + case D3D_SIT_TBUFFER: + typeStr = "TextureBuffer"; + registerPrefix = "t"; + break; + case D3D_SIT_TEXTURE: + typeStr = "Texture"; + registerPrefix = "t"; + break; + case D3D_SIT_SAMPLER: + typeStr = "Sampler"; + registerPrefix = "s"; + break; + case D3D_SIT_UAV_RWTYPED: + typeStr = "RWTexture/UAV"; + registerPrefix = "u"; + break; + default: + break; + } + + IGL_LOG_INFO(" [%u] %s '%s' at %s%u (space %u)\n", + i, + typeStr, + bindDesc.Name, + registerPrefix, + bindDesc.BindPoint, + bindDesc.Space); + } + } + IGL_LOG_INFO("================================\n\n"); + } + } else { + IGL_D3D12_LOG_VERBOSE(" Failed to create reflection with both DXC and D3DReflect: 0x%08X (non-fatal)\n", hr); + } + } + } else { + IGL_D3D12_LOG_VERBOSE(" Failed to create DXC utils for reflection: 0x%08X (non-fatal)\n", hr); + } + } + + Result::setOk(outResult); + return module; +} + +// Framebuffer +std::shared_ptr Device::createFramebuffer(const FramebufferDesc& desc, + Result* IGL_NULLABLE outResult) { + Result::setOk(outResult); + return std::make_shared(desc); +} + +// Capabilities +const IPlatformDevice& Device::getPlatformDevice() const noexcept { + return *platformDevice_; +} + +bool Device::hasFeature(DeviceFeatures feature) const { + IGL_D3D12_LOG_VERBOSE("[D3D12] hasFeature query: %d\n", static_cast(feature)); + switch (feature) { + // Expected true in tests (non-OpenGL branch) + case DeviceFeatures::CopyBuffer: + case DeviceFeatures::DrawInstanced: + case DeviceFeatures::DrawFirstIndexFirstVertex: // D3D12 DrawIndexedInstanced supports first index/vertex + case DeviceFeatures::SRGB: + case DeviceFeatures::SRGBSwapchain: + case DeviceFeatures::UniformBlocks: + case DeviceFeatures::StandardDerivative: // ddx/ddy available in HLSL + case DeviceFeatures::TextureFloat: + case DeviceFeatures::TextureHalfFloat: + case DeviceFeatures::ReadWriteFramebuffer: + case DeviceFeatures::TextureNotPot: + case DeviceFeatures::ShaderTextureLod: + case DeviceFeatures::ExplicitBinding: + case DeviceFeatures::MapBufferRange: // UPLOAD/READBACK buffers support mapping + case DeviceFeatures::ShaderLibrary: // Support shader libraries in D3D12 + case DeviceFeatures::Texture3D: // D3D12 supports 3D textures (DIMENSION_TEXTURE3D). + case DeviceFeatures::TexturePartialMipChain: // D3D12 supports partial mip chains via custom SRVs. + case DeviceFeatures::TextureViews: // D3D12 supports createTextureView() via shared resources. + return true; + case DeviceFeatures::MultipleRenderTargets: + return true; // D3D12 supports up to 8 simultaneous render targets. + case DeviceFeatures::Compute: + return true; // Compute shaders now supported with compute pipeline and dispatch + case DeviceFeatures::Texture2DArray: + IGL_D3D12_LOG_VERBOSE("[D3D12] hasFeature(Texture2DArray) returning TRUE\n"); + return true; // D3D12 supports 2D texture arrays via DepthOrArraySize in D3D12_RESOURCE_DESC + case DeviceFeatures::PushConstants: + return true; // Implemented via root constants at parameter 0 (shader register b2) + case DeviceFeatures::SRGBWriteControl: + case DeviceFeatures::TextureArrayExt: + case DeviceFeatures::TextureExternalImage: + case DeviceFeatures::Multiview: + case DeviceFeatures::BindBytes: // Not supported - use uniform buffers instead + case DeviceFeatures::BindUniform: + case DeviceFeatures::BufferRing: + case DeviceFeatures::BufferNoCopy: + case DeviceFeatures::BufferDeviceAddress: + case DeviceFeatures::ShaderTextureLodExt: + case DeviceFeatures::StandardDerivativeExt: + case DeviceFeatures::SamplerMinMaxLod: + case DeviceFeatures::DrawIndexedIndirect: + case DeviceFeatures::ExplicitBindingExt: + case DeviceFeatures::TextureFormatRG: + case DeviceFeatures::ValidationLayersEnabled: + case DeviceFeatures::ExternalMemoryObjects: + return false; + default: + return false; + } +} + +bool Device::hasRequirement(DeviceRequirement /*requirement*/) const { + return false; +} + +bool Device::getFeatureLimits(DeviceFeatureLimits featureLimits, size_t& result) const { + // Compile-time validation: IGL constant must not exceed D3D12 API limit + static_assert(IGL_VERTEX_ATTRIBUTES_MAX <= D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT, + "IGL_VERTEX_ATTRIBUTES_MAX exceeds D3D12 vertex input limit"); + + switch (featureLimits) { + case DeviceFeatureLimits::BufferAlignment: + // D3D12 buffer alignment requirements vary by buffer type: + // - Constant buffers: 256 bytes (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT) + // - Storage buffers: 4 bytes (see ShaderStorageBufferOffsetAlignment) + // - Vertex/index buffers: 4 bytes (DWORD alignment) + // This returns the most restrictive alignment (constant buffers). + // See: https://learn.microsoft.com/en-us/windows/win32/direct3d12/constants + result = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT; // 256 bytes + return true; + + case DeviceFeatureLimits::BufferNoCopyAlignment: + // D3D12 doesn't support no-copy buffers in the same way as Metal + result = 0; + return true; + + case DeviceFeatureLimits::MaxBindBytesBytes: + // bind-bytes (like Metal setVertexBytes) not supported on D3D12 + result = 0; + return true; + + case DeviceFeatureLimits::MaxCubeMapDimension: + // D3D12 cube map dimension limits (Feature Level 11_0+: 16384) + result = 16384; // D3D12_REQ_TEXTURECUBE_DIMENSION + return true; + + case DeviceFeatureLimits::MaxFragmentUniformVectors: + // D3D12 allows 64KB constant buffers, each vec4 is 16 bytes + // 64KB / 16 bytes = 4096 vec4s + result = 4096; + return true; + + case DeviceFeatureLimits::MaxMultisampleCount: { + // Query the maximum MSAA sample count supported by the device. + // Test common sample counts (1, 2, 4, 8, 16) for RGBA8 (most widely supported format). + // This provides a conservative estimate; actual support varies by format. + // Applications should use getMaxMSAASamplesForFormat() for format-specific queries. + auto* device = ctx_->getDevice(); + if (!device) { + result = 1; // No MSAA support if device unavailable + return false; + } + + // Use RGBA8 as reference format (most widely supported) + const DXGI_FORMAT referenceFormat = DXGI_FORMAT_R8G8B8A8_UNORM; + + // Test sample counts in descending order: 16, 8, 4, 2, 1 + const uint32_t testCounts[] = {16, 8, 4, 2, 1}; + + for (uint32_t sampleCount : testCounts) { + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msqLevels = {}; + msqLevels.Format = referenceFormat; + msqLevels.SampleCount = sampleCount; + msqLevels.Flags = D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE; + + HRESULT hr = device->CheckFeatureSupport( + D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + &msqLevels, + sizeof(msqLevels)); + + if (SUCCEEDED(hr) && msqLevels.NumQualityLevels > 0) { + result = sampleCount; + return true; + } + } + + // Fallback to 1x (no MSAA) + result = 1; + return true; + } + + case DeviceFeatureLimits::MaxPushConstantBytes: + // D3D12 root constants: each root constant is 4 bytes (DWORD) + // D3D12 root signature limit is 64 DWORDs total, but not all for constants + // Conservative limit: 256 bytes (64 DWORDs) + result = 256; + return true; + + case DeviceFeatureLimits::MaxTextureDimension1D2D: + // D3D12 Feature Level 11_0+: 16384 for 1D and 2D textures + // Feature Level 12+: still 16384 + result = 16384; // D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION + return true; + + case DeviceFeatureLimits::MaxStorageBufferBytes: + // D3D12 structured buffer max size: 128MB (2^27 bytes) + // UAV structured buffer limit + result = 128 * 1024 * 1024; // 128 MB + return true; + + case DeviceFeatureLimits::MaxUniformBufferBytes: + // D3D12 constant buffer size limit: 64KB (65536 bytes) + result = 64 * 1024; // D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16 + return true; + + case DeviceFeatureLimits::MaxVertexUniformVectors: + // Same as fragment uniform vectors for D3D12 + // 64KB / 16 bytes per vec4 = 4096 vec4s + result = 4096; + return true; + + case DeviceFeatureLimits::PushConstantsAlignment: + // Root constants are aligned to DWORD (4 bytes) + result = 4; + return true; + + case DeviceFeatureLimits::ShaderStorageBufferOffsetAlignment: + // D3D12 storage buffer (UAV/structured buffer) alignment. + // D3D12 structured buffers require 4-byte (DWORD) alignment, unlike constant buffers (256 bytes) + // This matches Vulkan's typical minStorageBufferOffsetAlignment (often 16-64 bytes, device-dependent) + // See: https://learn.microsoft.com/en-us/windows/win32/direct3d12/alignment + result = 4; + return true; + + case DeviceFeatureLimits::MaxTextureDimension3D: + // D3D12 3D texture dimension limits (Feature Level 11_0+: 2048) + // Feature Level 10_0+: 2048 + result = 2048; // D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION + return true; + + case DeviceFeatureLimits::MaxComputeWorkGroupSizeX: + // D3D12 compute shader thread group limits + result = D3D12_CS_THREAD_GROUP_MAX_X; // 1024 + return true; + + case DeviceFeatureLimits::MaxComputeWorkGroupSizeY: + // D3D12 compute shader thread group limits + result = D3D12_CS_THREAD_GROUP_MAX_Y; // 1024 + return true; + + case DeviceFeatureLimits::MaxComputeWorkGroupSizeZ: + // D3D12 compute shader thread group limits + result = D3D12_CS_THREAD_GROUP_MAX_Z; // 64 + return true; + + case DeviceFeatureLimits::MaxComputeWorkGroupInvocations: + // D3D12 max threads per thread group + result = D3D12_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; // 1024 + return true; + + case DeviceFeatureLimits::MaxVertexInputAttributes: + // D3D12 max vertex input slots (32 per D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT) + result = D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT; // 32 + IGL_DEBUG_ASSERT(IGL_VERTEX_ATTRIBUTES_MAX <= result, + "IGL_VERTEX_ATTRIBUTES_MAX exceeds D3D12 reported limit"); + return true; + + case DeviceFeatureLimits::MaxColorAttachments: + // D3D12 max simultaneous render targets + result = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; // 8 + return true; + + // Descriptor heap size limits chosen for cross-platform compatibility. + case DeviceFeatureLimits::MaxDescriptorHeapCbvSrvUav: + // D3D12 shader-visible CBV/SRV/UAV descriptor heap size + // Hardware limit: 1,000,000+ descriptors + // Current implementation uses 4096 descriptors (see DescriptorHeapManager::Sizes) + // This reports the configured limit, not the hardware maximum + result = 4096; + return true; + + case DeviceFeatureLimits::MaxDescriptorHeapSamplers: + // D3D12 shader-visible sampler descriptor heap size + // Hardware limit: 2048 descriptors (D3D12 spec limit for sampler heaps) + // Current implementation uses 2048 descriptors (see DescriptorHeapManager::Sizes) + result = 2048; + return true; + + case DeviceFeatureLimits::MaxDescriptorHeapRtvs: + // D3D12 CPU-visible RTV descriptor heap size + // Hardware limit: 16,384 descriptors + // Current implementation uses 256 descriptors (see DescriptorHeapManager::Sizes) + result = 256; + return true; + + case DeviceFeatureLimits::MaxDescriptorHeapDsvs: + // D3D12 CPU-visible DSV descriptor heap size + // Hardware limit: 16,384 descriptors + // Current implementation uses 128 descriptors (see DescriptorHeapManager::Sizes) + result = 128; + return true; + } + + // Should never reach here - all cases handled + result = 0; + return false; +} + + +ICapabilities::TextureFormatCapabilities Device::getTextureFormatCapabilities(TextureFormat format) const { + using CapBits = ICapabilities::TextureFormatCapabilityBits; + uint8_t caps = 0; + + // Depth formats: guarantee they are sampleable in shaders for tests + switch (format) { + case TextureFormat::Z_UNorm16: + case TextureFormat::Z_UNorm24: + case TextureFormat::Z_UNorm32: + case TextureFormat::S8_UInt_Z24_UNorm: + case TextureFormat::S8_UInt_Z32_UNorm: + caps |= CapBits::Sampled; + return caps; + default: + break; + } + + // D3D12 does not support 3-channel RGB formats natively - they are mapped to RGBA formats + // However, 3-channel formats cannot be used as render targets because: + // 1. RGB_F16/RGB_F32 map to RGBA equivalents, but D3D12 expects RGBA data layout for RT + // 2. Rendering to these formats would require alpha channel handling that IGL doesn't expose + // 3. Other backends (OpenGL, Metal) also don't support RGB formats as render targets + // See also: OpenGL's DeviceFeatureSet.cpp line 1271 "RGB floating point textures are NOT renderable" + const bool isThreeChannelRgbFormat = + format == TextureFormat::RGB_F16 || + format == TextureFormat::RGB_F32; + + auto* dev = ctx_->getDevice(); + if (!dev) { + return 0; + } + + const DXGI_FORMAT dxgi = textureFormatToDXGIFormat(format); + if (dxgi == DXGI_FORMAT_UNKNOWN) { + return 0; + } + + D3D12_FEATURE_DATA_FORMAT_SUPPORT fs = {}; + fs.Format = dxgi; + if (FAILED(dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &fs, sizeof(fs)))) { + return 0; + } + + const auto s1 = fs.Support1; + const auto s2 = fs.Support2; + + const auto props = TextureFormatProperties::fromTextureFormat(format); + + // Enhanced D3D12 format capability mapping. + // Map D3D12_FORMAT_SUPPORT1 flags to IGL capabilities + + // Sampled: Can be used with texture sampling instructions + if (s1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) { + caps |= CapBits::Sampled; + } + + // SampledFiltered: Supports linear filtering (only for non-integer color formats) + // Also check D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON for depth formats + if (props.hasColor() && !props.isInteger()) { + if (s1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) { + caps |= CapBits::SampledFiltered; + } + } else if (props.hasDepth() || props.hasStencil()) { + // Depth formats: check for comparison filtering support + if (s1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON) { + caps |= CapBits::SampledFiltered; + } + } + + // Attachment: Can be used as render target or depth/stencil attachment + // Also consider D3D12_FORMAT_SUPPORT1_BLENDABLE and D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET + // Don't report Attachment capability for 3-channel RGB formats even if D3D12 reports the + // underlying RGBA format as renderable - using them as render targets causes device removal + if (!isThreeChannelRgbFormat) { + if ((s1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) || (s1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL)) { + caps |= CapBits::Attachment; + } + } + + // Storage: Can be used with unordered access (UAV) + // Check for typed UAV load/store, or atomic operations + // Enhanced UAV capability detection. + const bool hasUAVTypedOps = (s2 & D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD) && + (s2 & D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE); + const bool hasUAVAtomicOps = (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD) || + (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS) || + (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE) || + (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE) || + (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX) || + (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX); + + if (hasFeature(DeviceFeatures::Compute) && (hasUAVTypedOps || hasUAVAtomicOps)) { + caps |= CapBits::Storage; + } + + // SampledAttachment: Can be both sampled and used as attachment + if ((caps & CapBits::Sampled) && (caps & CapBits::Attachment)) { + caps |= CapBits::SampledAttachment; + } + +#if IGL_DEBUG || defined(IGL_FORCE_ENABLE_LOGS) + // Debug logging for unmapped D3D12 capabilities. + // This helps identify format capabilities that D3D12 supports but IGL doesn't expose + uint32_t unmappedS1 = 0; + uint32_t unmappedS2 = 0; + + // Check unmapped D3D12_FORMAT_SUPPORT1 flags + const uint32_t mappedS1 = D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE | + D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON | + D3D12_FORMAT_SUPPORT1_RENDER_TARGET | + D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL | + D3D12_FORMAT_SUPPORT1_BLENDABLE | + D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET; + unmappedS1 = s1 & ~mappedS1; + + // Check unmapped D3D12_FORMAT_SUPPORT2 flags + const uint32_t mappedS2 = D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD | + D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE | + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD | + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS | + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE | + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE | + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX | + D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX; + unmappedS2 = s2 & ~mappedS2; + + if (unmappedS1 != 0 || unmappedS2 != 0) { + IGL_D3D12_LOG_VERBOSE("Format %d (DXGI %d) has unmapped D3D12 capabilities:\n", + static_cast(format), static_cast(dxgi)); + if (unmappedS1 != 0) { + IGL_D3D12_LOG_VERBOSE(" Support1 unmapped flags: 0x%08X\n", unmappedS1); + // Log specific unmapped flags that might be useful + // Note: Some flags may not be defined in older Windows SDK versions + const uint32_t MIP_AUTOGEN = 0x800; // D3D12_FORMAT_SUPPORT1_MIP_AUTOGEN + const uint32_t MULTISAMPLE_RESOLVE = 0x40; // D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE + const uint32_t MULTISAMPLE_LOAD = 0x100000; // D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD + + if (unmappedS1 & MIP_AUTOGEN) { + IGL_D3D12_LOG_VERBOSE(" - MIP_AUTOGEN (0x800)\n"); + } + if (unmappedS1 & MULTISAMPLE_RESOLVE) { + IGL_D3D12_LOG_VERBOSE(" - MULTISAMPLE_RESOLVE (0x40)\n"); + } + if (unmappedS1 & MULTISAMPLE_LOAD) { + IGL_D3D12_LOG_VERBOSE(" - MULTISAMPLE_LOAD (0x100000)\n"); + } + } + if (unmappedS2 != 0) { + IGL_D3D12_LOG_VERBOSE(" Support2 unmapped flags: 0x%08X\n", unmappedS2); + const uint32_t OUTPUT_MERGER_LOGIC_OP = 0x2; // D3D12_FORMAT_SUPPORT2_OUTPUT_MERGER_LOGIC_OP + if (unmappedS2 & OUTPUT_MERGER_LOGIC_OP) { + IGL_D3D12_LOG_VERBOSE(" - OUTPUT_MERGER_LOGIC_OP (0x2)\n"); + } + } + } +#endif + + return caps; +} + +ShaderVersion Device::getShaderVersion() const { + // Report HLSL SM 6.0 if DXC is available; otherwise SM 5.0 (D3DCompile fallback) + bool dxcAvailable = false; +#if IGL_PLATFORM_WINDOWS + HMODULE h = GetModuleHandleA("dxcompiler.dll"); + if (!h) { + h = LoadLibraryA("dxcompiler.dll"); + } + if (h) { + FARPROC proc = GetProcAddress(h, "DxcCreateInstance"); + dxcAvailable = (proc != nullptr); + } +#endif + if (dxcAvailable) { + return ShaderVersion{ShaderFamily::Hlsl, 6, 0, 0}; + } + return ShaderVersion{ShaderFamily::Hlsl, 5, 0, 0}; +} + +BackendVersion Device::getBackendVersion() const { + // Query highest supported feature level to report backend version + auto* dev = ctx_->getDevice(); + if (!dev) { + return BackendVersion{BackendFlavor::D3D12, 0, 0}; + } + + static const D3D_FEATURE_LEVEL kLevels[] = { + D3D_FEATURE_LEVEL_12_2, + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + }; + D3D12_FEATURE_DATA_FEATURE_LEVELS fls = {}; + fls.NumFeatureLevels = static_cast(sizeof(kLevels) / sizeof(kLevels[0])); + fls.pFeatureLevelsRequested = kLevels; + fls.MaxSupportedFeatureLevel = D3D_FEATURE_LEVEL_11_0; + + if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &fls, sizeof(fls)))) { + switch (fls.MaxSupportedFeatureLevel) { + case D3D_FEATURE_LEVEL_12_2: + return BackendVersion{BackendFlavor::D3D12, 12, 2}; + case D3D_FEATURE_LEVEL_12_1: + return BackendVersion{BackendFlavor::D3D12, 12, 1}; + case D3D_FEATURE_LEVEL_12_0: + return BackendVersion{BackendFlavor::D3D12, 12, 0}; + case D3D_FEATURE_LEVEL_11_1: + return BackendVersion{BackendFlavor::D3D12, 11, 1}; + case D3D_FEATURE_LEVEL_11_0: + default: + return BackendVersion{BackendFlavor::D3D12, 11, 0}; + } + } + + // Fallback if CheckFeatureSupport fails + return BackendVersion{BackendFlavor::D3D12, 11, 0}; +} + +BackendType Device::getBackendType() const { + return BackendType::D3D12; +} + +// Get sampler cache statistics for telemetry and debugging. +SamplerCacheStats Device::getSamplerCacheStats() const { + return samplerCache_.getStats(); +} + +// Query maximum MSAA sample count for a specific format. +uint32_t Device::getMaxMSAASamplesForFormat(TextureFormat format) const { + auto* device = ctx_->getDevice(); + if (!device) { + return 1; + } + + // Convert IGL format to DXGI format + const DXGI_FORMAT dxgiFormat = textureFormatToDXGIFormat(format); + if (dxgiFormat == DXGI_FORMAT_UNKNOWN) { + IGL_LOG_ERROR("Device::getMaxMSAASamplesForFormat: Unknown format %d\n", static_cast(format)); + return 1; + } + + // Test sample counts in descending order: 16, 8, 4, 2, 1 + const uint32_t testCounts[] = {16, 8, 4, 2, 1}; + + for (uint32_t sampleCount : testCounts) { + D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msqLevels = {}; + msqLevels.Format = dxgiFormat; + msqLevels.SampleCount = sampleCount; + msqLevels.Flags = D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE; + + HRESULT hr = device->CheckFeatureSupport( + D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, + &msqLevels, + sizeof(msqLevels)); + + if (SUCCEEDED(hr) && msqLevels.NumQualityLevels > 0) { + return sampleCount; + } + } + + return 1; // No MSAA support +} + +void Device::processCompletedUploads() { + allocatorPool_.processCompletedUploads(); +} + +Result Device::waitForUploadFence(UINT64 fenceValue) const { + return allocatorPool_.waitForUploadFence(*this, fenceValue); +} + +void Device::trackUploadBuffer(igl::d3d12::ComPtr buffer, UINT64 fenceValue) { + allocatorPool_.trackUploadBuffer(std::move(buffer), fenceValue); +} + +igl::d3d12::ComPtr Device::getUploadCommandAllocator() { + return allocatorPool_.getUploadCommandAllocator(*ctx_); +} + +void Device::returnUploadCommandAllocator(igl::d3d12::ComPtr allocator, + UINT64 fenceValue) { + allocatorPool_.returnUploadCommandAllocator(std::move(allocator), fenceValue); +} + +size_t Device::getCurrentDrawCount() const { + return telemetry_.getDrawCount(); +} + +size_t Device::getShaderCompilationCount() const { + return telemetry_.getShaderCompilationCount(); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/Device.h b/src/igl/d3d12/Device.h new file mode 100644 index 0000000000..19e468ba32 --- /dev/null +++ b/src/igl/d3d12/Device.h @@ -0,0 +1,259 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include // For IFenceProvider interface. + +namespace igl::d3d12 { + +class PlatformDevice; +class UploadRingBuffer; +class SamplerState; // Forward declaration for sampler cache +class D3D12StagingDevice; // Forward declaration. + +/// @brief Implements the igl::IDevice interface for DirectX 12 +class Device final : public IDevice, public IFenceProvider { + public: + explicit Device(std::unique_ptr ctx); + ~Device() override; + + // BindGroups + [[nodiscard]] Holder createBindGroup( + const BindGroupTextureDesc& desc, + const IRenderPipelineState* IGL_NULLABLE compatiblePipeline, + Result* IGL_NULLABLE outResult) override; + [[nodiscard]] Holder createBindGroup( + const BindGroupBufferDesc& desc, + Result* IGL_NULLABLE outResult) override; + void destroy(BindGroupTextureHandle handle) override; + void destroy(BindGroupBufferHandle handle) override; + void destroy(SamplerHandle handle) override; + + // Command Queue + [[nodiscard]] std::shared_ptr createCommandQueue( + const CommandQueueDesc& desc, + Result* IGL_NULLABLE outResult) noexcept override; + + // Resources + [[nodiscard]] std::unique_ptr createBuffer(const BufferDesc& desc, + Result* IGL_NULLABLE + outResult) const noexcept override; + + // Non-const helper for createBuffer; handles upload operations that mutate internal state. + [[nodiscard]] std::unique_ptr createBufferImpl(const BufferDesc& desc, + Result* IGL_NULLABLE outResult) noexcept; + + [[nodiscard]] std::shared_ptr createDepthStencilState( + const DepthStencilStateDesc& desc, + Result* IGL_NULLABLE outResult) const override; + + [[nodiscard]] std::unique_ptr createShaderStages( + const ShaderStagesDesc& desc, + Result* IGL_NULLABLE outResult) const override; + + [[nodiscard]] std::shared_ptr createSamplerState( + const SamplerStateDesc& desc, + Result* IGL_NULLABLE outResult) const override; + + [[nodiscard]] std::shared_ptr createTexture(const TextureDesc& desc, + Result* IGL_NULLABLE + outResult) const noexcept override; + + [[nodiscard]] std::shared_ptr createTextureView( + std::shared_ptr texture, + const TextureViewDesc& desc, + Result* IGL_NULLABLE outResult) const noexcept override; + + [[nodiscard]] std::shared_ptr createTimer( + Result* IGL_NULLABLE outResult) const noexcept override; + + [[nodiscard]] std::shared_ptr createVertexInputState( + const VertexInputStateDesc& desc, + Result* IGL_NULLABLE outResult) const override; + + // Pipelines + [[nodiscard]] std::shared_ptr createComputePipeline( + const ComputePipelineDesc& desc, + Result* IGL_NULLABLE outResult) const override; + + [[nodiscard]] std::shared_ptr createRenderPipeline( + const RenderPipelineDesc& desc, + Result* IGL_NULLABLE outResult) const override; + + // D3D12-specific: Create PSO variant with substituted formats (for dynamic PSO selection) + // Called by RenderPipelineState::getPipelineState() to create format variants + [[nodiscard]] igl::d3d12::ComPtr createPipelineStateVariant( + const RenderPipelineDesc& desc, + ID3D12RootSignature* rootSignature, + Result* IGL_NULLABLE outResult) const; + + // Shader library and modules + [[nodiscard]] std::unique_ptr createShaderLibrary( + const ShaderLibraryDesc& desc, + Result* IGL_NULLABLE outResult) const override; + + [[nodiscard]] std::shared_ptr createShaderModule( + const ShaderModuleDesc& desc, + Result* IGL_NULLABLE outResult) const override; + + // Framebuffer + [[nodiscard]] std::shared_ptr createFramebuffer( + const FramebufferDesc& desc, + Result* IGL_NULLABLE outResult) override; + + // Capabilities + [[nodiscard]] const IPlatformDevice& getPlatformDevice() const noexcept override; + + [[nodiscard]] bool hasFeature(DeviceFeatures feature) const override; + [[nodiscard]] bool hasRequirement(DeviceRequirement requirement) const override; + [[nodiscard]] bool getFeatureLimits(DeviceFeatureLimits featureLimits, + size_t& result) const override; + [[nodiscard]] TextureFormatCapabilities getTextureFormatCapabilities( + TextureFormat format) const override; + [[nodiscard]] ShaderVersion getShaderVersion() const override; + [[nodiscard]] BackendVersion getBackendVersion() const override; + + [[nodiscard]] BackendType getBackendType() const override; + + [[nodiscard]] size_t getCurrentDrawCount() const override; + [[nodiscard]] size_t getShaderCompilationCount() const override; + + void incrementDrawCount(size_t n) { telemetry_.incrementDrawCount(n); } + + D3D12Context& getD3D12Context() { + return *ctx_; + } + [[nodiscard]] const D3D12Context& getD3D12Context() const { + return *ctx_; + } + + // Bind group accessors for RenderCommandEncoder + [[nodiscard]] const BindGroupTextureDesc* getBindGroupTextureDesc(BindGroupTextureHandle handle) const { + return bindGroupTexturesPool_.get(handle); + } + [[nodiscard]] const BindGroupBufferDesc* getBindGroupBufferDesc(BindGroupBufferHandle handle) const { + return bindGroupBuffersPool_.get(handle); + } + + // Device capabilities accessors. + [[nodiscard]] const D3D12_FEATURE_DATA_D3D12_OPTIONS& getDeviceOptions() const { + return capabilities_.getOptions(); + } + [[nodiscard]] const D3D12_FEATURE_DATA_D3D12_OPTIONS1& getDeviceOptions1() const { + return capabilities_.getOptions1(); + } + [[nodiscard]] D3D12_RESOURCE_BINDING_TIER getResourceBindingTier() const { + return capabilities_.getResourceBindingTier(); + } + + void processCompletedUploads(); + void trackUploadBuffer(igl::d3d12::ComPtr buffer, UINT64 fenceValue); + + // Command allocator pool access for upload operations. + igl::d3d12::ComPtr getUploadCommandAllocator(); + void returnUploadCommandAllocator(igl::d3d12::ComPtr allocator, + UINT64 fenceValue); + ID3D12Fence* getUploadFence() const { return allocatorPool_.getUploadFence(); } + UINT64 getNextUploadFenceValue() { return allocatorPool_.getNextUploadFenceValue(); } + Result waitForUploadFence(UINT64 fenceValue) const; + + // IFenceProvider implementation (shared fence timeline). + uint64_t getNextFenceValue() override { return getNextUploadFenceValue(); } + + // Upload ring buffer access. + UploadRingBuffer* getUploadRingBuffer() const { return allocatorPool_.getUploadRingBuffer(); } + + // Check for device removal and return error Result if detected. + [[nodiscard]] Result checkDeviceRemoval() const; + + // Query if device has been lost. + [[nodiscard]] bool isDeviceLost() const { return deviceLost_; } + + // Sampler cache statistics. + [[nodiscard]] SamplerCacheStats getSamplerCacheStats() const; + + // Query maximum MSAA sample count for a specific format. + // Returns 1 if the format does not support MSAA. + [[nodiscard]] uint32_t getMaxMSAASamplesForFormat(TextureFormat format) const; + + private: + // Alignment validation helpers. + bool validateMSAAAlignment(const TextureDesc& desc, Result* IGL_NULLABLE outResult) const; + bool validateTextureAlignment(const D3D12_RESOURCE_DESC& resourceDesc, + uint32_t sampleCount, Result* IGL_NULLABLE outResult) const; + bool validateBufferAlignment(size_t bufferSize, bool isUniform) const; + + // Alignment constants. + static constexpr size_t MSAA_ALIGNMENT = 65536; // 64KB for MSAA textures + static constexpr size_t BUFFER_ALIGNMENT = 256; // 256 bytes for constant buffers + static constexpr size_t DEFAULT_TEXTURE_ALIGNMENT = 65536; // 64KB default for textures + + D3D12DeviceCapabilities capabilities_; + + std::unique_ptr ctx_; + std::unique_ptr platformDevice_; + D3D12Telemetry telemetry_; + + // Bind group pools + Pool bindGroupTexturesPool_; + Pool bindGroupBuffersPool_; + + // Upload tracking state (non-mutable, mutated only from non-const paths). + // Modified by createBufferImpl, Buffer::upload, Texture::upload via non-const Device references + // and synchronized via pendingUploadsMutex_ for thread-safe access. + D3D12AllocatorPool allocatorPool_; + D3D12PipelineCache pipelineCache_; + D3D12SamplerCache samplerCache_; + + // Device lost flag and reason for fatal error handling (atomic for thread-safe access). + mutable std::atomic deviceLost_{false}; + mutable std::string deviceLostReason_; // Cached reason for diagnostics + + public: + // Shared staging infrastructure for upload/readback operations. + // Used by Buffer, Texture, Framebuffer, CommandBuffer for centralized resource management. + [[nodiscard]] D3D12ImmediateCommands* getImmediateCommands() const { + return allocatorPool_.getImmediateCommands(); + } + [[nodiscard]] D3D12StagingDevice* getStagingDevice() const { + return allocatorPool_.getStagingDevice(); + } + + // Access pre-compiled mipmap shaders. + [[nodiscard]] bool areMipmapShadersAvailable() const { + return pipelineCache_.mipmapShadersAvailable_; + } + [[nodiscard]] const std::vector& getMipmapVSBytecode() const { + return pipelineCache_.mipmapVSBytecode_; + } + [[nodiscard]] const std::vector& getMipmapPSBytecode() const { + return pipelineCache_.mipmapPSBytecode_; + } + [[nodiscard]] ID3D12RootSignature* getMipmapRootSignature() const { + return pipelineCache_.mipmapRootSignature_.Get(); + } +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/Framebuffer.cpp b/src/igl/d3d12/Framebuffer.cpp new file mode 100644 index 0000000000..ba71b007ca --- /dev/null +++ b/src/igl/d3d12/Framebuffer.cpp @@ -0,0 +1,781 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +namespace { +// Import ComPtr for readability +template +using ComPtr = igl::d3d12::ComPtr; +} // namespace + +Framebuffer::Framebuffer(const FramebufferDesc& desc) : desc_(desc) {} + +Framebuffer::~Framebuffer() { + // FenceWaiter RAII handles event cleanup automatically +} + +std::vector Framebuffer::getColorAttachmentIndices() const { + std::vector indices; + for (size_t i = 0; i < IGL_COLOR_ATTACHMENTS_MAX; ++i) { + if (desc_.colorAttachments[i].texture) { + indices.push_back(i); + } + } + return indices; +} + +std::shared_ptr Framebuffer::getColorAttachment(size_t index) const { + if (index < IGL_COLOR_ATTACHMENTS_MAX) { + return desc_.colorAttachments[index].texture; + } + return nullptr; +} + +std::shared_ptr Framebuffer::getResolveColorAttachment(size_t index) const { + if (index < IGL_COLOR_ATTACHMENTS_MAX) { + return desc_.colorAttachments[index].resolveTexture; + } + return nullptr; +} + +std::shared_ptr Framebuffer::getDepthAttachment() const { + return desc_.depthAttachment.texture; +} + +std::shared_ptr Framebuffer::getResolveDepthAttachment() const { + return desc_.depthAttachment.resolveTexture; +} + +std::shared_ptr Framebuffer::getStencilAttachment() const { + return desc_.stencilAttachment.texture; +} + +FramebufferMode Framebuffer::getMode() const { + return desc_.mode; +} + +bool Framebuffer::isSwapchainBound() const { + return false; +} + +void Framebuffer::copyBytesColorAttachment(ICommandQueue& cmdQueue, + size_t index, + void* pixelBytes, + const TextureRangeDesc& range, + size_t bytesPerRow) const { + if (!pixelBytes || index >= IGL_COLOR_ATTACHMENTS_MAX) { + return; + } + + auto* d3dQueueWrapper = dynamic_cast(&cmdQueue); + if (!d3dQueueWrapper) { + return; + } + + auto& iglDevice = d3dQueueWrapper->getDevice(); + auto& ctx = iglDevice.getD3D12Context(); + auto* device = ctx.getDevice(); + if (!device) { + return; + } + + // Get shared infrastructure used for readback. + auto* immediateCommands = iglDevice.getImmediateCommands(); + auto* stagingDevice = iglDevice.getStagingDevice(); + if (!immediateCommands || !stagingDevice) { + IGL_LOG_ERROR("Framebuffer::copyBytesColorAttachment - Shared infrastructure not available\n"); + return; + } + + auto srcTex = std::static_pointer_cast(desc_.colorAttachments[index].texture); + if (!srcTex) { + return; + } + + ID3D12Resource* srcRes = srcTex->getResource(); + if (!srcRes) { + return; + } + + const uint32_t mipLevel = range.mipLevel; + const uint32_t copyLayer = (srcTex->getType() == TextureType::Cube) ? range.face : range.layer; + const uint32_t subresourceIndex = srcTex->calcSubresourceIndex(mipLevel, copyLayer); + + const auto texDims = srcTex->getDimensions(); + const uint32_t mipWidth = std::max(1u, texDims.width >> mipLevel); + const uint32_t mipHeight = std::max(1u, texDims.height >> mipLevel); + + const UINT64 frameFenceValue = ctx.getFenceValue(); + + auto& cache = readbackCache_[index]; + + const auto fmtProps = TextureFormatProperties::fromTextureFormat(srcTex->getFormat()); + const size_t bytesPerPixel = std::max(fmtProps.bytesPerBlock, 1); + const size_t fullRowBytes = static_cast(mipWidth) * bytesPerPixel; + + bool cacheUpToDate = cache.cacheValid && + cache.cachedFrameFenceValue == frameFenceValue && + cache.cachedMipLevel == mipLevel && + cache.cachedLayer == copyLayer && + cache.cachedWidth == mipWidth && + cache.cachedHeight == mipHeight && + cache.cachedBytesPerPixel == bytesPerPixel; + + if (!cacheUpToDate) { + const auto refreshStart = std::chrono::high_resolution_clock::now(); + D3D12_RESOURCE_DESC srcDesc = srcRes->GetDesc(); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint{}; + UINT numRows = 0; + UINT64 rowSizeInBytes = 0; + UINT64 totalBytes = 0; + device->GetCopyableFootprints( + &srcDesc, subresourceIndex, 1, 0, &footprint, &numRows, &rowSizeInBytes, &totalBytes); + + if (totalBytes == 0) { + return; + } + + // Use D3D12StagingDevice for readback buffer allocation. + auto stagingBuffer = stagingDevice->allocateReadback(totalBytes); + if (!stagingBuffer.valid || !stagingBuffer.buffer.Get()) { + IGL_LOG_ERROR("Framebuffer::copyBytesColorAttachment - Failed to allocate readback buffer\n"); + cache.cacheValid = false; + return; + } + + // Use D3D12ImmediateCommands for the copy operation. + Result result; + ID3D12GraphicsCommandList* cmdList = immediateCommands->begin(&result); + if (!cmdList || !result.isOk()) { + IGL_LOG_ERROR("Framebuffer::copyBytesColorAttachment - Failed to begin command list: %s\n", + result.message.c_str()); + stagingDevice->free(stagingBuffer, 0); + cache.cacheValid = false; + return; + } + + const auto previousState = srcTex->getSubresourceState(mipLevel, copyLayer); + srcTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_SOURCE, mipLevel, copyLayer); + + D3D12_TEXTURE_COPY_LOCATION dstLoc{}; + dstLoc.pResource = stagingBuffer.buffer.Get(); + dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstLoc.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION srcLoc{}; + srcLoc.pResource = srcRes; + srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLoc.SubresourceIndex = subresourceIndex; + + D3D12_BOX srcBox{}; + srcBox.left = 0; + srcBox.top = 0; + srcBox.front = 0; + srcBox.right = mipWidth; + srcBox.bottom = mipHeight; + srcBox.back = 1; + cmdList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, &srcBox); + + srcTex->transitionTo(cmdList, previousState, mipLevel, copyLayer); + + // Submit and wait using the shared fence. + uint64_t fenceValue = immediateCommands->submit(true, &result); + if (fenceValue == 0 || !result.isOk()) { + IGL_LOG_ERROR("Framebuffer::copyBytesColorAttachment - Failed to submit command list: %s\n", + result.message.c_str()); + stagingDevice->free(stagingBuffer, 0); + cache.cacheValid = false; + return; + } + + // Map and read the readback buffer + void* mapped = nullptr; + D3D12_RANGE readRange{0, totalBytes}; + if (FAILED(stagingBuffer.buffer->Map(0, &readRange, &mapped))) { + IGL_LOG_ERROR("Framebuffer::copyBytesColorAttachment - Failed to map readback buffer\n"); + stagingDevice->free(stagingBuffer, fenceValue); + cache.cacheValid = false; + return; + } + + const uint8_t* srcPtr = static_cast(mapped) + footprint.Offset; + const size_t srcRowPitch = footprint.Footprint.RowPitch; + const size_t copyRowBytes = fullRowBytes; + + cache.cachedRowPitch = static_cast(fullRowBytes); + cache.cachedData.resize(static_cast(cache.cachedRowPitch) * + static_cast(mipHeight)); + + // Direct copy with vertical flip only; no channel swap needed. + // DXGI_FORMAT_R8G8B8A8_UNORM has R,G,B,A byte order matching IGL expectations. + for (uint32_t row = 0; row < mipHeight; ++row) { + const uint8_t* s = srcPtr + static_cast(row) * srcRowPitch; + uint8_t* d = + cache.cachedData.data() + + static_cast(mipHeight - 1 - row) * static_cast(cache.cachedRowPitch); + + std::memcpy(d, s, copyRowBytes); + } + + stagingBuffer.buffer->Unmap(0, nullptr); + + // Free the staging buffer back to the pool. + stagingDevice->free(stagingBuffer, fenceValue); + + cache.cachedWidth = mipWidth; + cache.cachedHeight = mipHeight; + cache.cachedBytesPerPixel = bytesPerPixel; + cache.cachedMipLevel = mipLevel; + cache.cachedLayer = copyLayer; + cache.cachedFrameFenceValue = frameFenceValue; + cache.cacheValid = true; + + const auto refreshEnd = std::chrono::high_resolution_clock::now(); + const double refreshMs = + std::chrono::duration(refreshEnd - refreshStart).count(); + IGL_D3D12_LOG_VERBOSE("copyBytesColorAttachment: refreshed subresource (mip=%u, layer=%u) in %.2f ms (%ux%u)\n", + mipLevel, + copyLayer, + refreshMs, + mipWidth, + mipHeight); + } + + if (!cache.cacheValid) { + return; + } + + if (range.width == 0 || range.height == 0 || + range.x + range.width > cache.cachedWidth || + range.y + range.height > cache.cachedHeight) { + return; + } + + const size_t copyRowBytes = + static_cast(range.width) * cache.cachedBytesPerPixel; + const size_t dstRowPitch = bytesPerRow ? bytesPerRow : copyRowBytes; + uint8_t* dstPtr = static_cast(pixelBytes); + + for (uint32_t destRow = 0; destRow < range.height; ++destRow) { + const uint32_t gpuRow = range.y + (range.height - 1 - destRow); + if (gpuRow >= cache.cachedHeight) { + return; + } + const uint32_t cachedRow = cache.cachedHeight - 1 - gpuRow; + const uint8_t* src = + cache.cachedData.data() + + static_cast(cachedRow) * static_cast(cache.cachedRowPitch) + + static_cast(range.x) * cache.cachedBytesPerPixel; + std::memcpy(dstPtr + static_cast(destRow) * dstRowPitch, src, copyRowBytes); + } +} + +void Framebuffer::copyBytesDepthAttachment(ICommandQueue& cmdQueue, + void* pixelBytes, + const TextureRangeDesc& range, + size_t bytesPerRow) const { + // Depth attachment readback. + if (!pixelBytes) { + return; + } + + auto* d3dQueueWrapper = dynamic_cast(&cmdQueue); + if (!d3dQueueWrapper) { + return; + } + + auto& iglDevice = d3dQueueWrapper->getDevice(); + auto& ctx = iglDevice.getD3D12Context(); + auto* device = ctx.getDevice(); + if (!device) { + return; + } + + // Get shared staging infrastructure. + auto* immediateCommands = iglDevice.getImmediateCommands(); + auto* stagingDevice = iglDevice.getStagingDevice(); + if (!immediateCommands || !stagingDevice) { + IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Shared infrastructure not available\n"); + return; + } + + auto depthTex = std::static_pointer_cast(desc_.depthAttachment.texture); + if (!depthTex) { + return; + } + + ID3D12Resource* depthRes = depthTex->getResource(); + if (!depthRes) { + return; + } + + const uint32_t mipLevel = range.mipLevel; + const uint32_t copyLayer = (depthTex->getType() == TextureType::Cube) ? range.face : range.layer; + const uint32_t subresourceIndex = depthTex->calcSubresourceIndex(mipLevel, copyLayer); + + const auto texDims = depthTex->getDimensions(); + const uint32_t mipWidth = std::max(1u, texDims.width >> mipLevel); + const uint32_t mipHeight = std::max(1u, texDims.height >> mipLevel); + + // Get footprint for the depth resource + D3D12_RESOURCE_DESC depthDesc = depthRes->GetDesc(); + + // Validate and log depth format to clarify raw-bits vs converted-float behavior. + const DXGI_FORMAT depthFormat = depthDesc.Format; + const bool isD32Float = (depthFormat == DXGI_FORMAT_D32_FLOAT || + depthFormat == DXGI_FORMAT_D32_FLOAT_S8X24_UINT); + + if (!isD32Float) { + IGL_D3D12_LOG_VERBOSE("Framebuffer::copyBytesDepthAttachment - Format 0x%X is not D32_FLOAT; " + "returning raw GPU bits (not normalized [0,1] floats). " + "For UNORM formats, caller must convert manually.\n", + static_cast(depthFormat)); + } + + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint{}; + UINT numRows = 0; + UINT64 rowSizeInBytes = 0; + UINT64 totalBytes = 0; + device->GetCopyableFootprints( + &depthDesc, subresourceIndex, 1, 0, &footprint, &numRows, &rowSizeInBytes, &totalBytes); + + if (totalBytes == 0) { + return; + } + + // Allocate readback buffer from the staging device. + auto stagingBuffer = stagingDevice->allocateReadback(totalBytes); + if (!stagingBuffer.valid || !stagingBuffer.buffer.Get()) { + IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Failed to allocate readback buffer\n"); + return; + } + + // Begin immediate command recording. + Result result; + ID3D12GraphicsCommandList* cmdList = immediateCommands->begin(&result); + if (!cmdList || !result.isOk()) { + IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Failed to begin command list: %s\n", + result.message.c_str()); + stagingDevice->free(stagingBuffer, 0); + return; + } + + // Transition depth texture to copy source + const auto previousState = depthTex->getSubresourceState(mipLevel, copyLayer); + depthTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_SOURCE, mipLevel, copyLayer); + + // Set up copy locations + D3D12_TEXTURE_COPY_LOCATION dstLoc{}; + dstLoc.pResource = stagingBuffer.buffer.Get(); + dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstLoc.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION srcLoc{}; + srcLoc.pResource = depthRes; + srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLoc.SubresourceIndex = subresourceIndex; + + D3D12_BOX srcBox{}; + srcBox.left = 0; + srcBox.top = 0; + srcBox.front = 0; + srcBox.right = mipWidth; + srcBox.bottom = mipHeight; + srcBox.back = 1; + + // Copy depth data + cmdList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, &srcBox); + + // Transition back to previous state + depthTex->transitionTo(cmdList, previousState, mipLevel, copyLayer); + + // Submit and wait using the shared fence. + uint64_t fenceValue = immediateCommands->submit(true, &result); + if (fenceValue == 0 || !result.isOk()) { + IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Failed to submit command list: %s\n", + result.message.c_str()); + stagingDevice->free(stagingBuffer, 0); + return; + } + + // Map readback buffer and copy data + void* mapped = nullptr; + D3D12_RANGE readRange{0, totalBytes}; + if (FAILED(stagingBuffer.buffer->Map(0, &readRange, &mapped))) { + IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Failed to map readback buffer\n"); + stagingDevice->free(stagingBuffer, fenceValue); + return; + } + + // Validate range bounds before copying. + if (range.width == 0 || range.height == 0 || + range.x + range.width > mipWidth || + range.y + range.height > mipHeight) { + IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Invalid range: [%u,%u %ux%u] exceeds mip size %ux%u\n", + range.x, range.y, range.width, range.height, mipWidth, mipHeight); + stagingBuffer.buffer->Unmap(0, nullptr); + stagingDevice->free(stagingBuffer, fenceValue); + return; + } + + const uint8_t* srcPtr = static_cast(mapped) + footprint.Offset; + const size_t srcRowPitch = footprint.Footprint.RowPitch; + + // Depth readback contract: callers (tests) provide a float-per-pixel buffer. + // Use 4 bytes per destination pixel regardless of the underlying DXGI format, + // and only copy that many bytes from the GPU data to avoid overrunning the + // caller's buffer (e.g., for combined depth-stencil formats like D32_S8). + // + // LIMITATION: This implementation assumes a "raw bits" contract - it copies + // the native GPU representation without format conversion. This is correct for + // D32_FLOAT (which is already IEEE 754 float), but for normalized integer depth + // formats (D16_UNORM, D24_UNORM_S8_UINT), the copied data is raw bits, not + // converted [0,1] floats. Callers expecting normalized depth values from non-float + // formats will receive unconverted data. Future work should add format detection + // and explicit UNORM-to-float conversion for broader compatibility. + constexpr size_t kDstBytesPerPixel = sizeof(float); + + // Derive the native bytes-per-pixel for the copied subresource using the + // rowSizeInBytes returned by GetCopyableFootprints when possible. + size_t nativeBytesPerPixel = 0; + if (mipWidth > 0 && rowSizeInBytes > 0) { + nativeBytesPerPixel = static_cast(rowSizeInBytes) / static_cast(mipWidth); + } + + const size_t copyRowBytes = static_cast(range.width) * kDstBytesPerPixel; + const size_t dstRowPitch = bytesPerRow ? bytesPerRow : copyRowBytes; + uint8_t* dstPtr = static_cast(pixelBytes); + + for (uint32_t destRow = 0; destRow < range.height; ++destRow) { + const uint32_t gpuRow = range.y + (range.height - 1 - destRow); + if (gpuRow >= mipHeight) { + break; + } + const uint32_t srcRow = mipHeight - 1 - gpuRow; + const uint8_t* src = + srcPtr + static_cast(srcRow) * srcRowPitch + + static_cast(range.x) * (nativeBytesPerPixel > 0 ? nativeBytesPerPixel : kDstBytesPerPixel); + + std::memcpy(dstPtr + static_cast(destRow) * dstRowPitch, src, copyRowBytes); + } + + stagingBuffer.buffer->Unmap(0, nullptr); + + // Free staging buffer back to the pool. + stagingDevice->free(stagingBuffer, fenceValue); +} + +void Framebuffer::copyBytesStencilAttachment(ICommandQueue& cmdQueue, + void* pixelBytes, + const TextureRangeDesc& range, + size_t bytesPerRow) const { + // Stencil attachment readback. + if (!pixelBytes) { + return; + } + + auto* d3dQueueWrapper = dynamic_cast(&cmdQueue); + if (!d3dQueueWrapper) { + return; + } + + auto& iglDevice = d3dQueueWrapper->getDevice(); + auto& ctx = iglDevice.getD3D12Context(); + auto* device = ctx.getDevice(); + if (!device) { + return; + } + + // Get shared infrastructure. + auto* immediateCommands = iglDevice.getImmediateCommands(); + auto* stagingDevice = iglDevice.getStagingDevice(); + if (!immediateCommands || !stagingDevice) { + IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Shared infrastructure not available\n"); + return; + } + + auto stencilTex = std::static_pointer_cast(desc_.stencilAttachment.texture); + if (!stencilTex) { + return; + } + + ID3D12Resource* stencilRes = stencilTex->getResource(); + if (!stencilRes) { + return; + } + + const uint32_t mipLevel = range.mipLevel; + const uint32_t copyLayer = (stencilTex->getType() == TextureType::Cube) ? range.face : range.layer; + + // Detect stencil format and select the appropriate plane slice. + D3D12_RESOURCE_DESC stencilDesc = stencilRes->GetDesc(); + const DXGI_FORMAT stencilFormat = stencilDesc.Format; + + // Determine plane slice based on format: + // - Planar depth-stencil formats: stencil is in plane 1 + // - Pure stencil formats: plane 0 + UINT planeSlice = 0; // Default for non-planar + + if (stencilFormat == DXGI_FORMAT_D24_UNORM_S8_UINT || + stencilFormat == DXGI_FORMAT_D32_FLOAT_S8X24_UINT || + stencilFormat == DXGI_FORMAT_R24G8_TYPELESS || + stencilFormat == DXGI_FORMAT_R32G8X24_TYPELESS) { + // Planar depth-stencil: Plane 0 = depth, Plane 1 = stencil + planeSlice = 1; + } else if (stencilFormat == DXGI_FORMAT_R8_TYPELESS) { + // Pure stencil formats: Plane 0 + planeSlice = 0; + } else { + IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Unsupported stencil format 0x%X; " + "assuming plane 0. May fail for planar formats.\n", + static_cast(stencilFormat)); + IGL_DEBUG_ASSERT(false, "Unsupported stencil format - add to known format list"); + planeSlice = 0; + } + + const UINT numMipLevels = stencilTex->getNumMipLevels(); + const UINT numLayers = stencilTex->getNumLayers(); + const uint32_t subresourceIndex = D3D12CalcSubresource(mipLevel, copyLayer, planeSlice, numMipLevels, numLayers); + + const auto texDims = stencilTex->getDimensions(); + const uint32_t mipWidth = std::max(1u, texDims.width >> mipLevel); + const uint32_t mipHeight = std::max(1u, texDims.height >> mipLevel); + + // Get footprint for the stencil plane (reuse stencilDesc from above) + D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint{}; + UINT numRows = 0; + UINT64 rowSizeInBytes = 0; + UINT64 totalBytes = 0; + device->GetCopyableFootprints( + &stencilDesc, subresourceIndex, 1, 0, &footprint, &numRows, &rowSizeInBytes, &totalBytes); + + if (totalBytes == 0) { + return; + } + + // Allocate readback buffer from the staging device. + auto stagingBuffer = stagingDevice->allocateReadback(totalBytes); + if (!stagingBuffer.valid || !stagingBuffer.buffer.Get()) { + IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Failed to allocate readback buffer\n"); + return; + } + + // Begin immediate command recording. + Result result; + ID3D12GraphicsCommandList* cmdList = immediateCommands->begin(&result); + if (!cmdList || !result.isOk()) { + IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Failed to begin command list: %s\n", + result.message.c_str()); + stagingDevice->free(stagingBuffer, 0); + return; + } + + // Transition stencil texture to copy source + const auto previousState = stencilTex->getSubresourceState(mipLevel, copyLayer); + stencilTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_SOURCE, mipLevel, copyLayer); + + // Set up copy locations for stencil plane + D3D12_TEXTURE_COPY_LOCATION dstLoc{}; + dstLoc.pResource = stagingBuffer.buffer.Get(); + dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstLoc.PlacedFootprint = footprint; + + D3D12_TEXTURE_COPY_LOCATION srcLoc{}; + srcLoc.pResource = stencilRes; + srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLoc.SubresourceIndex = subresourceIndex; + + D3D12_BOX srcBox{}; + srcBox.left = 0; + srcBox.top = 0; + srcBox.front = 0; + srcBox.right = mipWidth; + srcBox.bottom = mipHeight; + srcBox.back = 1; + + // Copy stencil data + cmdList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, &srcBox); + + // Transition back to previous state + stencilTex->transitionTo(cmdList, previousState, mipLevel, copyLayer); + + // Submit and wait using the shared fence. + uint64_t fenceValue = immediateCommands->submit(true, &result); + if (fenceValue == 0 || !result.isOk()) { + IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Failed to submit command list: %s\n", + result.message.c_str()); + stagingDevice->free(stagingBuffer, 0); + return; + } + + // Map readback buffer and copy data + void* mapped = nullptr; + D3D12_RANGE readRange{0, totalBytes}; + if (FAILED(stagingBuffer.buffer->Map(0, &readRange, &mapped))) { + IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Failed to map readback buffer\n"); + stagingDevice->free(stagingBuffer, fenceValue); + return; + } + + // Validate range bounds before copying. + if (range.width == 0 || range.height == 0 || + range.x + range.width > mipWidth || + range.y + range.height > mipHeight) { + IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Invalid range: [%u,%u %ux%u] exceeds mip size %ux%u\n", + range.x, range.y, range.width, range.height, mipWidth, mipHeight); + stagingBuffer.buffer->Unmap(0, nullptr); + stagingDevice->free(stagingBuffer, fenceValue); + return; + } + + const uint8_t* srcPtr = static_cast(mapped) + footprint.Offset; + const size_t srcRowPitch = footprint.Footprint.RowPitch; + + // Stencil is always 8-bit (1 byte per pixel) + const size_t bytesPerPixel = 1; + + // Copy with vertical flip (D3D12 textures are top-down, IGL expects bottom-up) + const size_t copyRowBytes = static_cast(range.width) * bytesPerPixel; + const size_t dstRowPitch = bytesPerRow ? bytesPerRow : copyRowBytes; + uint8_t* dstPtr = static_cast(pixelBytes); + + for (uint32_t destRow = 0; destRow < range.height; ++destRow) { + const uint32_t gpuRow = range.y + (range.height - 1 - destRow); + if (gpuRow >= mipHeight) { + break; + } + const uint32_t srcRow = mipHeight - 1 - gpuRow; + const uint8_t* src = srcPtr + static_cast(srcRow) * srcRowPitch + + static_cast(range.x) * bytesPerPixel; + std::memcpy(dstPtr + static_cast(destRow) * dstRowPitch, src, copyRowBytes); + } + + stagingBuffer.buffer->Unmap(0, nullptr); + + // Free staging buffer back to the pool. + stagingDevice->free(stagingBuffer, fenceValue); +} + +void Framebuffer::copyTextureColorAttachment(ICommandQueue& cmdQueue, + size_t index, + std::shared_ptr destTexture, + const TextureRangeDesc& range) const { + // Bounds check for index parameter + if (index >= IGL_COLOR_ATTACHMENTS_MAX) { + IGL_LOG_ERROR("Framebuffer::copyTextureColorAttachment: index %zu out of bounds (max %u)\n", + index, IGL_COLOR_ATTACHMENTS_MAX); + return; + } + + // Get device and shared infrastructure directly (avoid transient CommandBuffer). + auto* d3dQueueWrapper = dynamic_cast(&cmdQueue); + if (!d3dQueueWrapper) { + IGL_LOG_ERROR("Framebuffer::copyTextureColorAttachment - Invalid command queue\n"); + IGL_DEBUG_ASSERT(false, "D3D12 Framebuffer used with non-D3D12 command queue"); + return; + } + + auto& iglDevice = d3dQueueWrapper->getDevice(); + auto* immediateCommands = iglDevice.getImmediateCommands(); + if (!immediateCommands) { + IGL_LOG_ERROR("Framebuffer::copyTextureColorAttachment - Immediate commands not available\n"); + IGL_DEBUG_ASSERT(false, "D3D12ImmediateCommands not initialized"); + return; + } + + auto srcTex = std::static_pointer_cast(desc_.colorAttachments[index].texture); + auto dstTex = std::static_pointer_cast(destTexture); + if (!srcTex || !dstTex) { + return; + } + ID3D12Resource* srcRes = srcTex->getResource(); + ID3D12Resource* dstRes = dstTex->getResource(); + if (!srcRes || !dstRes) { + return; + } + Result result; + ID3D12GraphicsCommandList* cmdList = immediateCommands->begin(&result); + if (!cmdList || !result.isOk()) { + IGL_LOG_ERROR("Framebuffer::copyTextureColorAttachment - Failed to begin command list: %s\n", + result.message.c_str()); + return; + } + + const uint32_t mipLevel = range.mipLevel; + const uint32_t layer = range.layer; + const auto srcPrevState = srcTex->getSubresourceState(mipLevel, layer); + srcTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_SOURCE, mipLevel, layer); + dstTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_DEST, mipLevel, layer); + + // Calculate proper subresource indices for array textures and cubemaps + // D3D12CalcSubresource(MipSlice, ArraySlice, PlaneSlice, MipLevels, ArraySize) + const UINT srcMipLevels = srcTex->getNumMipLevels(); + const UINT dstMipLevels = dstTex->getNumMipLevels(); + const UINT srcArraySize = srcTex->getNumLayers(); + const UINT dstArraySize = dstTex->getNumLayers(); + + D3D12_TEXTURE_COPY_LOCATION dstLoc{}; + dstLoc.pResource = dstRes; + dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLoc.SubresourceIndex = D3D12CalcSubresource(mipLevel, layer, 0, dstMipLevels, dstArraySize); + + D3D12_TEXTURE_COPY_LOCATION srcLoc{}; + srcLoc.pResource = srcRes; + srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLoc.SubresourceIndex = D3D12CalcSubresource(mipLevel, layer, 0, srcMipLevels, srcArraySize); + + D3D12_BOX srcBox{}; + srcBox.left = range.x; + srcBox.top = range.y; + srcBox.front = 0; + srcBox.right = range.x + range.width; + srcBox.bottom = range.y + range.height; + srcBox.back = 1; + cmdList->CopyTextureRegion(&dstLoc, range.x, range.y, 0, &srcLoc, &srcBox); + + // Transition dest to shader resource for sampling. Source back to its previous state. + srcTex->transitionTo(cmdList, srcPrevState, mipLevel, layer); + dstTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, mipLevel, layer); + + // Submit and wait using the shared fence (replaces manual CreateEvent/WaitForSingleObject). + uint64_t fenceValue = immediateCommands->submit(true, &result); + if (fenceValue == 0 || !result.isOk()) { + IGL_LOG_ERROR("Framebuffer::copyTextureColorAttachment - Failed to submit command list: %s\n", + result.message.c_str()); + return; + } +} + +void Framebuffer::updateDrawable(std::shared_ptr texture) { + desc_.colorAttachments[0].texture = std::move(texture); +} + +void Framebuffer::updateDrawable(SurfaceTextures surfaceTextures) { + desc_.colorAttachments[0].texture = std::move(surfaceTextures.color); + desc_.depthAttachment.texture = surfaceTextures.depth; + // Depth and stencil typically share the same texture + desc_.stencilAttachment.texture = std::move(surfaceTextures.depth); +} + +void Framebuffer::updateResolveAttachment(std::shared_ptr texture) { + desc_.colorAttachments[0].resolveTexture = std::move(texture); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/Framebuffer.h b/src/igl/d3d12/Framebuffer.h new file mode 100644 index 0000000000..a01649658e --- /dev/null +++ b/src/igl/d3d12/Framebuffer.h @@ -0,0 +1,72 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +class Framebuffer final : public IFramebuffer { + public: + Framebuffer(const FramebufferDesc& desc); + ~Framebuffer() override; + + std::vector getColorAttachmentIndices() const override; + std::shared_ptr getColorAttachment(size_t index) const override; + std::shared_ptr getResolveColorAttachment(size_t index) const override; + std::shared_ptr getDepthAttachment() const override; + std::shared_ptr getResolveDepthAttachment() const override; + std::shared_ptr getStencilAttachment() const override; + FramebufferMode getMode() const override; + bool isSwapchainBound() const override; + + void copyBytesColorAttachment(ICommandQueue& cmdQueue, + size_t index, + void* pixelBytes, + const TextureRangeDesc& range, + size_t bytesPerRow) const override; + void copyBytesDepthAttachment(ICommandQueue& cmdQueue, + void* pixelBytes, + const TextureRangeDesc& range, + size_t bytesPerRow) const override; + void copyBytesStencilAttachment(ICommandQueue& cmdQueue, + void* pixelBytes, + const TextureRangeDesc& range, + size_t bytesPerRow) const override; + void copyTextureColorAttachment(ICommandQueue& cmdQueue, + size_t index, + std::shared_ptr destTexture, + const TextureRangeDesc& range) const override; + void updateDrawable(std::shared_ptr texture) override; + void updateDrawable(SurfaceTextures surfaceTextures) override; + void updateResolveAttachment(std::shared_ptr texture) override; + + private: + // Simplified readback resources (removed per-attachment allocator/fence; use shared infrastructure). + struct ReadbackResources { + // Cached data for repeated reads from same region + std::vector cachedData; + uint32_t cachedWidth = 0; + uint32_t cachedHeight = 0; + uint32_t cachedMipLevel = 0; + uint32_t cachedLayer = 0; + uint64_t cachedRowPitch = 0; + size_t cachedBytesPerPixel = 0; + UINT64 cachedFrameFenceValue = std::numeric_limits::max(); + bool cacheValid = false; + }; + + mutable std::array readbackCache_{}; + FramebufferDesc desc_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/HeadlessContext.cpp b/src/igl/d3d12/HeadlessContext.cpp new file mode 100644 index 0000000000..824b9cf267 --- /dev/null +++ b/src/igl/d3d12/HeadlessContext.cpp @@ -0,0 +1,387 @@ +/* + * Minimal headless D3D12 context for unit tests (no swapchain / no HWND). + */ + +#include +#include +#include + +namespace igl::d3d12 { + +namespace { +// Import ComPtr for readability +template +using ComPtr = igl::d3d12::ComPtr; +} // namespace + +HeadlessD3D12Context::~HeadlessD3D12Context() = default; + +Result HeadlessD3D12Context::initializeHeadless(uint32_t width, uint32_t height, + const D3D12ContextConfig& config) { + width_ = width; + height_ = height; + + // Store and validate configuration. + config_ = config; + config_.validate(); + + // Headless mode: No swapchain, so use kMaxFramesInFlight as buffer count (T43) + swapchainBufferCount_ = kMaxFramesInFlight; + renderTargets_.resize(swapchainBufferCount_); + frameContexts_.resize(swapchainBufferCount_); + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Initialized with %u frame buffers (no swapchain)\n", + swapchainBufferCount_); + + // Initialize DXGI factory flags and debug configuration (mirrors windowed D3D12Context). + auto getEnvBool = [](const char* name, bool defaultValue) -> bool { + const char* value = std::getenv(name); + if (!value) { + return defaultValue; + } + return (std::string(value) == "1") || (std::string(value) == "true"); + }; + + bool enableDebugLayer = getEnvBool("IGL_D3D12_DEBUG", +#ifdef _DEBUG + true // Default ON in debug builds +#else + false // Default OFF in release builds +#endif + ); + bool enableGPUValidation = getEnvBool("IGL_D3D12_GPU_VALIDATION", false); + bool enableDRED = getEnvBool("IGL_D3D12_DRED", +#ifdef _DEBUG + true // Default ON in debug builds +#else + false // Default OFF in release builds +#endif + ); + bool enableDXGIDebug = getEnvBool("IGL_DXGI_DEBUG", +#ifdef _DEBUG + true // Default ON in debug builds +#else + false // Default OFF in release builds +#endif + ); + + IGL_D3D12_LOG_VERBOSE("=== Headless D3D12 Debug Configuration ===\n"); + IGL_D3D12_LOG_VERBOSE(" Debug Layer: %s\n", enableDebugLayer ? "ENABLED" : "DISABLED"); + IGL_D3D12_LOG_VERBOSE(" GPU Validation: %s\n", enableGPUValidation ? "ENABLED" : "DISABLED"); + IGL_D3D12_LOG_VERBOSE(" DRED: %s\n", enableDRED ? "ENABLED" : "DISABLED"); + IGL_D3D12_LOG_VERBOSE(" DXGI Debug: %s\n", enableDXGIDebug ? "ENABLED" : "DISABLED"); + IGL_D3D12_LOG_VERBOSE("=========================================\n"); + + UINT dxgiFactoryFlags = 0; + + // Enable debug layer (and GPU-based validation) if configured. + if (enableDebugLayer) { + igl::d3d12::ComPtr debugController; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(debugController.GetAddressOf())))) { + debugController->EnableDebugLayer(); + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Debug layer ENABLED\n"); + + if (enableDXGIDebug) { + dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG; + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: DXGI debug layer ENABLED\n"); + } + + if (enableGPUValidation) { + igl::d3d12::ComPtr debugController1; + if (SUCCEEDED(debugController->QueryInterface(IID_PPV_ARGS(debugController1.GetAddressOf())))) { + debugController1->SetEnableGPUBasedValidation(TRUE); + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: GPU-Based Validation ENABLED\n"); + } else { + IGL_LOG_ERROR("HeadlessD3D12Context: Failed to enable GPU-Based Validation (requires ID3D12Debug1)\n"); + } + } + } else { + IGL_LOG_ERROR("HeadlessD3D12Context: Failed to get D3D12 debug interface - Graphics Tools may not be installed\n"); + } + } else { + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Debug layer DISABLED\n"); + } + + // Enable DRED if configured (Device Removed Extended Data for better crash diagnostics). + if (enableDRED) { + igl::d3d12::ComPtr dredSettings1; + if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(dredSettings1.GetAddressOf())))) { + dredSettings1->SetAutoBreadcrumbsEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON); + dredSettings1->SetPageFaultEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON); + dredSettings1->SetBreadcrumbContextEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON); + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: DRED 1.2 fully configured\n"); + } else { + IGL_LOG_ERROR("HeadlessD3D12Context: Failed to configure DRED (requires Windows 10 19041+)\n"); + } + } + + // Enable experimental features for headless contexts (unit tests) + // This allows unsigned DXIL shaders to run + // NOTE: This is ONLY called in headless mode (unit tests), NOT in windowed render sessions + { + UUID experimentalFeatures[] = {D3D12ExperimentalShaderModels}; + HRESULT hr = D3D12EnableExperimentalFeatures(1, experimentalFeatures, nullptr, nullptr); + if (SUCCEEDED(hr)) { + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Experimental shader models enabled (allows unsigned DXIL)\n"); + } else { + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Failed to enable experimental features (0x%08X) - signed DXIL required\n", static_cast(hr)); + } + } + + // Create DXGI factory with debug flag in debug builds. + HRESULT hr = CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(dxgiFactory_.GetAddressOf())); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to create DXGI factory"); + } + + // Helper function to try creating device with progressive feature level fallback (A-004) + auto tryCreateDeviceWithFallback = + [](IDXGIAdapter1* adapter, D3D_FEATURE_LEVEL& outFeatureLevel) -> igl::d3d12::ComPtr { + const D3D_FEATURE_LEVEL featureLevels[] = { + D3D_FEATURE_LEVEL_12_2, + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + }; + + igl::d3d12::ComPtr device; + for (D3D_FEATURE_LEVEL fl : featureLevels) { + HRESULT hr = D3D12CreateDevice(adapter, fl, IID_PPV_ARGS(device.GetAddressOf())); + if (SUCCEEDED(hr)) { + outFeatureLevel = fl; + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Device created with Feature Level %d.%d\n", + (fl >> 12) & 0xF, (fl >> 8) & 0xF); + return device; + } + } + outFeatureLevel = static_cast(0); + return nullptr; + }; + + auto featureLevelToString = [](D3D_FEATURE_LEVEL level) -> const char* { + switch (level) { + case D3D_FEATURE_LEVEL_12_2: return "12.2"; + case D3D_FEATURE_LEVEL_12_1: return "12.1"; + case D3D_FEATURE_LEVEL_12_0: return "12.0"; + case D3D_FEATURE_LEVEL_11_1: return "11.1"; + case D3D_FEATURE_LEVEL_11_0: return "11.0"; + default: return "Unknown"; + } + }; + + igl::d3d12::ComPtr factory6; + (void)dxgiFactory_->QueryInterface(IID_PPV_ARGS(factory6.GetAddressOf())); + + bool created = false; + D3D_FEATURE_LEVEL selectedFeatureLevel = D3D_FEATURE_LEVEL_11_0; + + if (factory6.Get()) { + for (UINT i = 0;; ++i) { + igl::d3d12::ComPtr adapter; + if (FAILED(factory6->EnumAdapterByGpuPreference(i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE, + IID_PPV_ARGS(adapter.GetAddressOf())))) { + break; + } + DXGI_ADAPTER_DESC1 desc{}; + adapter->GetDesc1(&desc); + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { + continue; + } + + D3D_FEATURE_LEVEL featureLevel = static_cast(0); + auto device = tryCreateDeviceWithFallback(adapter.Get(), featureLevel); + if (device.Get() != nullptr) { + device_ = device; + created = true; + selectedFeatureLevel = featureLevel; + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Selected HW adapter (FL %s)\n", + featureLevelToString(featureLevel)); + break; + } + } + } + if (!created) { + for (UINT i = 0;; ++i) { + igl::d3d12::ComPtr adapter; + if (dxgiFactory_->EnumAdapters1(i, adapter.GetAddressOf()) == DXGI_ERROR_NOT_FOUND) { + break; + } + DXGI_ADAPTER_DESC1 desc{}; + adapter->GetDesc1(&desc); + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) { + continue; + } + + D3D_FEATURE_LEVEL featureLevel = static_cast(0); + auto device = tryCreateDeviceWithFallback(adapter.Get(), featureLevel); + if (device.Get() != nullptr) { + device_ = device; + created = true; + selectedFeatureLevel = featureLevel; + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Selected HW adapter via EnumAdapters1 (FL %s)\n", + featureLevelToString(featureLevel)); + break; + } + } + } + if (!created) { + igl::d3d12::ComPtr warp; + if (SUCCEEDED(dxgiFactory_->EnumWarpAdapter(IID_PPV_ARGS(warp.GetAddressOf())))) { + igl::d3d12::ComPtr warp1; + warp->QueryInterface(IID_PPV_ARGS(warp1.GetAddressOf())); + if (warp1.Get()) { + D3D_FEATURE_LEVEL featureLevel = static_cast(0); + auto device = tryCreateDeviceWithFallback(warp1.Get(), featureLevel); + if (device.Get() != nullptr) { + device_ = device; + created = true; + selectedFeatureLevel = featureLevel; + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Using WARP adapter (FL %s)\n", + featureLevelToString(featureLevel)); + } + } + } + } + if (!created) { + return Result(Result::Code::RuntimeError, "Failed to create any D3D12 device"); + } + + // Store selected feature level (A-004) + selectedFeatureLevel_ = selectedFeatureLevel; + +#ifdef _DEBUG + { + igl::d3d12::ComPtr infoQueue; + if (SUCCEEDED(device_->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) { + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, FALSE); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, FALSE); + infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, FALSE); + } + } +#endif + + // Create command queue + D3D12_COMMAND_QUEUE_DESC queueDesc = {}; + queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT; + queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE; + hr = device_->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(commandQueue_.GetAddressOf())); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to create command queue"); + } + + // Create per-frame descriptor heaps (consistent with windowed D3D12Context) + // Allow override via env vars for headless tests + UINT cbvSrvUavHeapSize = 1024; // default matching Microsoft MiniEngine + { + char buf[32] = {}; + const DWORD n = GetEnvironmentVariableA("IGL_D3D12_CBV_SRV_UAV_HEAP_SIZE", buf, sizeof(buf)); + if (n > 0) { + cbvSrvUavHeapSize = std::max(256, static_cast(strtoul(buf, nullptr, 10))); + } + } + + UINT samplerHeapSize = kMaxSamplers; // Match D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE (2048) + { + char buf[32] = {}; + const DWORD n = GetEnvironmentVariableA("IGL_D3D12_SAMPLER_HEAP_SIZE", buf, sizeof(buf)); + if (n > 0) { + samplerHeapSize = std::max(16, static_cast(strtoul(buf, nullptr, 10))); + } + } + + // Cache descriptor sizes + cbvSrvUavDescriptorSize_ = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + samplerDescriptorSize_ = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER); + + IGL_D3D12_LOG_VERBOSE("HeadlessContext: Creating per-frame descriptor heaps (CBV/SRV/UAV=%u, Samplers=%u)...\n", + cbvSrvUavHeapSize, samplerHeapSize); + + // Create per-frame shader-visible descriptor heaps and an initial page for each frame. + for (UINT i = 0; i < swapchainBufferCount_; i++) { + // CBV/SRV/UAV heap per frame - create initial page + igl::d3d12::ComPtr initialHeap; + D3D12_DESCRIPTOR_HEAP_DESC desc = {}; + desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + desc.NumDescriptors = cbvSrvUavHeapSize; + desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + desc.NodeMask = 0; + hr = device_->CreateDescriptorHeap(&desc, IID_PPV_ARGS(initialHeap.GetAddressOf())); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to create per-frame CBV/SRV/UAV heap for frame " + std::to_string(i)); + } + + // Initialize page vector with first page + frameContexts_[i].cbvSrvUavHeapPages.clear(); + frameContexts_[i].cbvSrvUavHeapPages.emplace_back(initialHeap, cbvSrvUavHeapSize); + frameContexts_[i].currentCbvSrvUavPageIndex = 0; + + IGL_D3D12_LOG_VERBOSE(" Frame %u: Created CBV/SRV/UAV heap page (%u descriptors)\n", i, cbvSrvUavHeapSize); + + // Sampler heap per frame + desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + desc.NumDescriptors = samplerHeapSize; + desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + desc.NodeMask = 0; + hr = device_->CreateDescriptorHeap(&desc, IID_PPV_ARGS(frameContexts_[i].samplerHeap.GetAddressOf())); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to create per-frame Sampler heap for frame " + std::to_string(i)); + } + IGL_D3D12_LOG_VERBOSE(" Frame %u: Created Sampler heap (%u descriptors)\n", i, samplerHeapSize); + } + + IGL_D3D12_LOG_VERBOSE("HeadlessContext: Per-frame descriptor heaps created successfully\n"); + + // Create per-frame command allocators (following Microsoft's D3D12HelloFrameBuffering pattern) + IGL_D3D12_LOG_VERBOSE("HeadlessContext: Creating per-frame command allocators...\n"); + for (UINT i = 0; i < swapchainBufferCount_; i++) { + hr = device_->CreateCommandAllocator( + D3D12_COMMAND_LIST_TYPE_DIRECT, + IID_PPV_ARGS(frameContexts_[i].allocator.GetAddressOf())); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to create command allocator for frame " + std::to_string(i)); + } + IGL_D3D12_LOG_VERBOSE(" Frame %u: Created command allocator\n", i); + } + IGL_D3D12_LOG_VERBOSE("HeadlessContext: Per-frame command allocators created successfully\n"); + + // Fence for GPU synchronization + hr = device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence_.GetAddressOf())); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to create fence"); + } + + // Create descriptor heap manager with the same sizes for consistency. + { + DescriptorHeapManager::Sizes sz{}; + sz.cbvSrvUav = cbvSrvUavHeapSize; + sz.samplers = samplerHeapSize; + sz.rtvs = 64; + sz.dsvs = 32; + descriptorHeaps_ = std::make_unique(); + const Result r = descriptorHeaps_->initialize(device_.Get(), sz); + if (!r.isOk()) { + IGL_LOG_ERROR("HeadlessD3D12Context: Failed to initialize descriptor heap manager: %s\n", + r.message.c_str()); + // Non-fatal: continue without a dedicated manager. + descriptorHeaps_.reset(); + } + // Expose manager to base context for consumers that only see D3D12Context + heapMgr_ = descriptorHeaps_.get(); + } + + // Create command signatures for indirect drawing. + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Creating command signatures...\n"); + Result commandSigResult = createCommandSignatures(); + if (!commandSigResult.isOk()) { + IGL_LOG_ERROR("HeadlessD3D12Context: Failed to create command signatures: %s\n", + commandSigResult.message.c_str()); + return commandSigResult; + } + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Command signatures created successfully\n"); + + IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Initialization complete\n"); + return Result(); +} + +} // namespace igl::d3d12 + diff --git a/src/igl/d3d12/HeadlessContext.h b/src/igl/d3d12/HeadlessContext.h new file mode 100644 index 0000000000..ae385523cb --- /dev/null +++ b/src/igl/d3d12/HeadlessContext.h @@ -0,0 +1,33 @@ +/* + * Minimal headless D3D12 context for unit tests (no swapchain / no HWND). + */ + +#pragma once + +#include +#include +#include + +namespace igl::d3d12 { + +class HeadlessD3D12Context final : public D3D12Context { + public: + HeadlessD3D12Context() = default; + ~HeadlessD3D12Context(); + + // Initialize a headless context with default dimensions used only for fallback viewports + // Accepts optional D3D12ContextConfig for configurable sizes. + // NOTE: Headless mode currently uses environment variable overrides and internal defaults + // for descriptor heap sizes. Config parameter is stored for base-class consistency and + // future extension but is not fully wired to all heap creation paths yet. + Result initializeHeadless(uint32_t width = 256, uint32_t height = 256, + const D3D12ContextConfig& config = D3D12ContextConfig::defaultConfig()); + + // Access to descriptor heap manager for tests (may be null on failure) + [[nodiscard]] DescriptorHeapManager* getDescriptorHeapManager() const { return descriptorHeaps_.get(); } + + private: + std::unique_ptr descriptorHeaps_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/PlatformDevice.cpp b/src/igl/d3d12/PlatformDevice.cpp new file mode 100644 index 0000000000..9a4927baae --- /dev/null +++ b/src/igl/d3d12/PlatformDevice.cpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include + +namespace igl::d3d12 { + +PlatformDevice::PlatformDevice(Device& device) : device_(device) {} + +std::shared_ptr PlatformDevice::createTextureFromNativeDepth(uint32_t width, + uint32_t height, + Result* outResult) { + auto& ctx = device_.getD3D12Context(); + + // Create depth texture with D3D12 + TextureDesc depthDesc = TextureDesc::new2D(TextureFormat::Z_UNorm32, + width, + height, + TextureDesc::TextureUsageBits::Attachment, + "Swapchain Depth Texture"); + + // Allocate new depth texture if null or mismatches in size + if (!nativeDepthTexture_ || width != nativeDepthTexture_->getDimensions().width || + height != nativeDepthTexture_->getDimensions().height) { + nativeDepthTexture_ = device_.createTexture(depthDesc, outResult); + } + + Result::setResult(outResult, Result::Code::Ok); + return nativeDepthTexture_; +} + +std::shared_ptr PlatformDevice::createTextureFromNativeDrawable(Result* outResult) { + IGL_D3D12_LOG_VERBOSE("PlatformDevice::createTextureFromNativeDrawable() called\n"); + auto& ctx = device_.getD3D12Context(); + + // Get current back buffer from swapchain + uint32_t backBufferIndex = ctx.getCurrentBackBufferIndex(); + ID3D12Resource* backBuffer = ctx.getCurrentBackBuffer(); + + IGL_D3D12_LOG_VERBOSE(" backBufferIndex=%u, backBuffer=%p\n", backBufferIndex, backBuffer); + + if (!backBuffer) { + IGL_LOG_ERROR(" No back buffer available!\n"); + Result::setResult(outResult, Result::Code::RuntimeError, "No back buffer available"); + return nullptr; + } + + // Get back buffer description + D3D12_RESOURCE_DESC desc = backBuffer->GetDesc(); + const auto width = static_cast(desc.Width); + const auto height = static_cast(desc.Height); + + // Determine texture format based on DXGI format + // IMPORTANT: Use dxgiFormatToTextureFormat() to get the CORRECT IGL format + // from the actual D3D12 resource format. Do NOT hardcode RGBA_SRGB! + igl::TextureFormat iglFormat = dxgiFormatToTextureFormat(desc.Format); + if (iglFormat == igl::TextureFormat::Invalid) { + IGL_LOG_ERROR(" Unsupported DXGI format: %d\n", desc.Format); + Result::setResult(outResult, Result::Code::RuntimeError, "Unsupported swapchain DXGI format"); + return nullptr; + } + + // Ensure we have enough cached textures for swapchain images + while (nativeDrawableTextures_.size() <= backBufferIndex) { + nativeDrawableTextures_.push_back(nullptr); + } + + // Allocate new drawable texture if null or mismatches + if (!nativeDrawableTextures_[backBufferIndex] || + width != nativeDrawableTextures_[backBufferIndex]->getDimensions().width || + height != nativeDrawableTextures_[backBufferIndex]->getDimensions().height) { + + TextureDesc textureDesc; + textureDesc.type = TextureType::TwoD; + textureDesc.format = iglFormat; + textureDesc.width = width; + textureDesc.height = height; + textureDesc.depth = 1; + textureDesc.numLayers = 1; + textureDesc.numSamples = 1; + textureDesc.numMipLevels = 1; + textureDesc.usage = TextureDesc::TextureUsageBits::Attachment; + textureDesc.debugName = "Swapchain Back Buffer"; + + nativeDrawableTextures_[backBufferIndex] = Texture::createFromResource( + backBuffer, + iglFormat, + textureDesc, + ctx.getDevice(), + ctx.getCommandQueue(), + D3D12_RESOURCE_STATE_PRESENT); + + if (!nativeDrawableTextures_[backBufferIndex]) { + Result::setResult(outResult, Result::Code::RuntimeError, + "Failed to create texture from back buffer"); + return nullptr; + } + } + + Result::setResult(outResult, Result::Code::Ok); + return nativeDrawableTextures_[backBufferIndex]; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/PlatformDevice.h b/src/igl/d3d12/PlatformDevice.h new file mode 100644 index 0000000000..b3a3dd6b72 --- /dev/null +++ b/src/igl/d3d12/PlatformDevice.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace igl::d3d12 { + +class Device; + +/// @brief Implements the igl::IPlatformDevice interface for D3D12 +class PlatformDevice : public IPlatformDevice { + public: + static constexpr igl::PlatformDeviceType kType = igl::PlatformDeviceType::D3D12; + + explicit PlatformDevice(Device& device); + ~PlatformDevice() override = default; + + /// Creates a Depth Texture from the D3D12 swapchain + /// @param width Width of the depth texture + /// @param height Height of the depth texture + /// @param outResult optional result + /// @return pointer to generated Texture or nullptr + std::shared_ptr createTextureFromNativeDepth(uint32_t width, + uint32_t height, + Result* outResult); + + /// Creates a texture from the D3D12 swapchain back buffer + /// @param outResult optional result + /// @return pointer to generated Texture or nullptr + std::shared_ptr createTextureFromNativeDrawable(Result* outResult); + + /// Clear the cached textures + void clear() { + nativeDrawableTextures_.clear(); + nativeDepthTexture_ = nullptr; + } + + protected: + [[nodiscard]] bool isType(PlatformDeviceType t) const noexcept override { + return t == kType; + } + + private: + Device& device_; + std::vector> nativeDrawableTextures_; + std::shared_ptr nativeDepthTexture_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/README.md b/src/igl/d3d12/README.md new file mode 100644 index 0000000000..3c39ab9d95 --- /dev/null +++ b/src/igl/d3d12/README.md @@ -0,0 +1,89 @@ +# IGL DirectX 12 Backend + +This directory contains the DirectX 12 backend implementation for IGL (Intermediate Graphics Library). + +## Status + +**Current Phase:** Phase 0 - CMake Setup Complete +**Next Phase:** Phase 1 - Stub Infrastructure + +## Architecture + +The D3D12 backend follows the same architectural pattern as the Vulkan backend, leveraging the 87% similarity between the two APIs. + +### Core Components (To Be Implemented) + +``` +d3d12/ +├── Common.h/cpp - Common types, constants, utilities +├── D3D12Headers.h - D3D12/DXGI includes wrapper +├── Device.h/cpp - ID3D12Device wrapper +├── CommandQueue.h/cpp - ID3D12CommandQueue wrapper +├── CommandBuffer.h/cpp - ID3D12GraphicsCommandList wrapper +├── RenderCommandEncoder.h/cpp - Render command encoding +├── ComputeCommandEncoder.h/cpp - Compute command encoding +├── D3D12Context.h/cpp - Core D3D12 state management +├── RenderPipelineState.h/cpp - ID3D12PipelineState wrapper +├── ComputePipelineState.h/cpp - Compute pipeline state +├── Buffer.h/cpp - ID3D12Resource (buffers) +├── Texture.h/cpp - ID3D12Resource (textures) +├── Sampler.h/cpp - D3D12_SAMPLER_DESC +├── Framebuffer.h/cpp - RTV + DSV collection +├── ShaderModule.h/cpp - DXIL/DXBC bytecode +├── ShaderStages.h/cpp - Shader stage management +├── DescriptorHeapPool.h/cpp - Descriptor heap management +├── RootSignature.h/cpp - Root signature cache +├── DXGISwapchain.h/cpp - DXGI swapchain wrapper +└── D3D12Helpers.h/cpp - Utility functions +``` + +## Build Instructions + +### Prerequisites + +- Windows 10 1909+ or Windows 11 +- Visual Studio 2019 or later +- Windows SDK (10.0.19041.0 or later) +- DirectX Shader Compiler (DXC) - included with Windows SDK 10.0.20348.0+ + +### CMake Configuration + +```bash +cmake -DIGL_WITH_D3D12=ON -DIGL_WITH_VULKAN=OFF -DIGL_WITH_OPENGL=OFF .. +``` + +Or with other backends enabled: + +```bash +cmake -DIGL_WITH_D3D12=ON .. +``` + +### Build + +```bash +cmake --build . --config Release +``` + +## Implementation Plan + +See [DIRECTX12_MIGRATION_PLAN.md](../../../DIRECTX12_MIGRATION_PLAN.md) for the complete migration plan. + +### Progress + +- [x] Phase 0: CMake Setup + - [x] DirectX 12 Agility SDK headers + - [x] CMake configuration +- [ ] Phase 1: Stub Infrastructure (13 stub classes) +- [ ] Phase 2: EmptySession (Clear screen) +- [ ] Phase 3: TinyMeshSession (Triangle rendering) +- [ ] Phase 4: three-cubes (Full demo) + +## References + +- [DirectX 12 Programming Guide](https://learn.microsoft.com/en-us/windows/win32/direct3d12/directx-12-programming-guide) +- [DirectX Shader Compiler](https://github.com/microsoft/DirectXShaderCompiler) +- [DirectX-Headers](https://github.com/microsoft/DirectX-Headers) + +## License + +Licensed under the MIT License. See [LICENSE](../../../LICENSE.md) for details. diff --git a/src/igl/d3d12/RenderCommandEncoder.cpp b/src/igl/d3d12/RenderCommandEncoder.cpp new file mode 100644 index 0000000000..fef1c19489 --- /dev/null +++ b/src/igl/d3d12/RenderCommandEncoder.cpp @@ -0,0 +1,1827 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +RenderCommandEncoder::RenderCommandEncoder(CommandBuffer& commandBuffer, + const std::shared_ptr& framebuffer) + : IRenderCommandEncoder(nullptr), + commandBuffer_(commandBuffer), + commandList_(commandBuffer.getCommandList()), + resourcesBinder_(commandBuffer, false /* isCompute */), + framebuffer_(framebuffer) { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::RenderCommandEncoder() - Lightweight initialization\n"); +} + +void RenderCommandEncoder::begin(const RenderPassDesc& renderPass) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::begin() - command list is closed or null\n"); + return; + } + // Enforce single-call semantics: begin() allocates descriptors and cannot be safely called twice. + IGL_DEBUG_ASSERT(!hasBegun_, "begin() called multiple times - this will cause resource leaks"); + hasBegun_ = true; + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::begin() - START\n"); + auto& context = commandBuffer_.getContext(); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Got context\n"); + + // Set descriptor heaps for this command list. + // Must use per-frame heaps from D3D12Context, not DescriptorHeapManager. + // Per-frame heaps are isolated per frame to prevent descriptor conflicts. + DescriptorHeapManager* heapMgr = context.getDescriptorHeapManager(); + + // Use active heap from frame context, not the legacy accessor. + // This ensures we bind the currently active page, not hardcoded page 0. + auto& frameCtx = context.getFrameContexts()[context.getCurrentFrameIndex()]; + cbvSrvUavHeap_ = frameCtx.activeCbvSrvUavHeap.Get(); + samplerHeap_ = frameCtx.samplerHeap.Get(); + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Using active per-frame heap from FrameContext\n"); + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: CBV/SRV/UAV heap (active) = %p\n", cbvSrvUavHeap_); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Sampler heap = %p\n", samplerHeap_); + + // Bind active heap (may be page 0 or a later page). + ID3D12DescriptorHeap* heaps[] = {cbvSrvUavHeap_, samplerHeap_}; + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Setting descriptor heaps...\n"); + commandList_->SetDescriptorHeaps(2, heaps); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Descriptor heaps set\n"); + + // Create RTV from framebuffer if provided; otherwise fallback to swapchain RTV + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Setting up RTV...\n"); + D3D12_CPU_DESCRIPTOR_HANDLE rtv = {}; + std::vector rtvs; + rtvIndices_.clear(); + bool usedOffscreenRTV = false; + // Note: heapMgr already retrieved above for setting descriptor heaps + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: DescriptorHeapManager = %p\n", heapMgr); + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Checking framebuffer_=%p\n", framebuffer_.get()); + // Only create offscreen RTV if we have DescriptorHeapManager AND it's not a swapchain texture + // Swapchain textures should use context.getCurrentRTV() directly + if (framebuffer_ && framebuffer_->getColorAttachment(0) && heapMgr) { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Has framebuffer with color attachment AND DescriptorHeapManager\n"); + ID3D12Device* device = context.getDevice(); + if (device) { + // Create RTVs for each color attachment + const size_t count = std::min(framebuffer_->getColorAttachmentIndices().size(), D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: MRT count = %zu (indices.size=%zu)\n", count, framebuffer_->getColorAttachmentIndices().size()); + for (size_t i = 0; i < count; ++i) { + auto tex = std::static_pointer_cast(framebuffer_->getColorAttachment(i)); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: MRT loop i=%zu, tex=%p, resource=%p\n", i, tex.get(), tex ? tex->getResource() : nullptr); + if (!tex || !tex->getResource()) { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: MRT loop i=%zu SKIPPED (null tex or resource)\n", i); + continue; + } + const bool hasAttachmentDesc = (i < renderPass.colorAttachments.size()); + // CRITICAL: Extract values before using in expressions to avoid MSVC debug iterator checks + const uint32_t mipLevel = hasAttachmentDesc ? renderPass.colorAttachments[i].mipLevel : 0; + const uint32_t attachmentLayer = hasAttachmentDesc ? renderPass.colorAttachments[i].layer : 0; + const uint32_t attachmentFace = hasAttachmentDesc ? renderPass.colorAttachments[i].face : 0; + // Allocate RTV + uint32_t rtvIdx = heapMgr->allocateRTV(); + if (rtvIdx == UINT32_MAX) { + IGL_LOG_ERROR("RenderCommandEncoder: Failed to allocate RTV descriptor (heap exhausted)\n"); + continue; + } + // Check return value from getHandle. + D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle; + if (!heapMgr->getRTVHandle(rtvIdx, &rtvHandle)) { + IGL_LOG_ERROR("RenderCommandEncoder: Failed to get RTV handle for index %u\n", rtvIdx); + heapMgr->freeRTV(rtvIdx); + continue; + } + rtvIndices_.push_back(rtvIdx); + // Create RTV view - use the resource's actual format to avoid SRGB/UNORM mismatches + D3D12_RESOURCE_DESC resourceDesc = tex->getResource()->GetDesc(); + D3D12_RENDER_TARGET_VIEW_DESC rdesc = {}; + rdesc.Format = resourceDesc.Format; // Use actual D3D12 resource format, not IGL format + + // Determine if this is a texture array or texture view. + // Cube textures are stored as 2D array resources (6 slices per cube). + const bool isView = tex->isView(); + const bool isCubeTexture = (tex->getType() == TextureType::Cube); + const uint32_t arraySliceOffset = isView ? tex->getArraySliceOffset() : 0; + const uint32_t totalArraySlices = + isView ? tex->getNumArraySlicesInView() : resourceDesc.DepthOrArraySize; + const bool isArrayTexture = !isCubeTexture && + ((isView && tex->getNumArraySlicesInView() > 0) || + (!isView && resourceDesc.DepthOrArraySize > 1)); + uint32_t targetArraySlice = attachmentLayer; + if (isCubeTexture) { + // Cube textures map faces onto 2D array slices. See Texture Subresources (D3D12). + const uint32_t clampedFace = std::min(attachmentFace, 5u); + const uint32_t cubesInView = (totalArraySlices + 5u) / 6u; + const uint32_t clampedCubeIndex = + std::min(attachmentLayer, (cubesInView == 0u) ? 0u : (cubesInView - 1u)); + const uint32_t baseSlice = arraySliceOffset + clampedCubeIndex * 6u; + const uint32_t maxSlice = + (totalArraySlices > 0u) ? (arraySliceOffset + totalArraySlices - 1u) + : arraySliceOffset; + targetArraySlice = std::min(baseSlice + clampedFace, maxSlice); + } + + // Set view dimension based on sample count (MSAA support) and array type + if (resourceDesc.SampleDesc.Count > 1) { + // MSAA texture + if (isCubeTexture) { + rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + rdesc.Texture2DMSArray.FirstArraySlice = targetArraySlice; + rdesc.Texture2DMSArray.ArraySize = 1; + IGL_D3D12_LOG_VERBOSE( + "RenderCommandEncoder: Creating MSAA cube RTV with %u samples, face %u, cube index %u (array slice %u)\n", + resourceDesc.SampleDesc.Count, + attachmentFace, + attachmentLayer, + rdesc.Texture2DMSArray.FirstArraySlice); + } else if (isArrayTexture) { + // MSAA texture array - use TEXTURE2DMSARRAY view dimension + rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY; + if (isView) { + rdesc.Texture2DMSArray.FirstArraySlice = tex->getArraySliceOffset(); + rdesc.Texture2DMSArray.ArraySize = tex->getNumArraySlicesInView(); + } else { + rdesc.Texture2DMSArray.FirstArraySlice = attachmentLayer; + rdesc.Texture2DMSArray.ArraySize = 1; // Render to single layer + } + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Creating MSAA array RTV with %u samples, layer %u\n", + resourceDesc.SampleDesc.Count, rdesc.Texture2DMSArray.FirstArraySlice); + } else { + // MSAA non-array texture - use TEXTURE2DMS view dimension + rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS; + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Creating MSAA RTV with %u samples\n", resourceDesc.SampleDesc.Count); + } + } else { + // Non-MSAA texture + if (isCubeTexture) { + rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + rdesc.Texture2DArray.MipSlice = mipLevel; + rdesc.Texture2DArray.PlaneSlice = 0; + rdesc.Texture2DArray.FirstArraySlice = targetArraySlice; + rdesc.Texture2DArray.ArraySize = 1; + IGL_D3D12_LOG_VERBOSE( + "RenderCommandEncoder: Creating cube RTV, mip %u, face %u, cube index %u (array slice %u)\n", + mipLevel, + attachmentFace, + attachmentLayer, + rdesc.Texture2DArray.FirstArraySlice); + } else if (isArrayTexture) { + // Texture array - use TEXTURE2DARRAY view dimension + rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY; + // CRITICAL: Extract value before assignment to avoid MSVC debug iterator bounds check + const uint32_t mipSliceArray = (i < renderPass.colorAttachments.size()) ? renderPass.colorAttachments[i].mipLevel : 0; + rdesc.Texture2DArray.MipSlice = mipSliceArray; + rdesc.Texture2DArray.PlaneSlice = 0; + if (isView) { + rdesc.Texture2DArray.FirstArraySlice = tex->getArraySliceOffset(); + rdesc.Texture2DArray.ArraySize = tex->getNumArraySlicesInView(); + } else { + rdesc.Texture2DArray.FirstArraySlice = attachmentLayer; + rdesc.Texture2DArray.ArraySize = 1; // Render to single layer + } + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Creating array RTV, mip %u, layer %u\n", + rdesc.Texture2DArray.MipSlice, rdesc.Texture2DArray.FirstArraySlice); + } else { + // Non-array texture - use standard TEXTURE2D view dimension + rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + // CRITICAL: Extract value before assignment to avoid MSVC debug iterator bounds check + const uint32_t mipSlice2D = (i < renderPass.colorAttachments.size()) ? renderPass.colorAttachments[i].mipLevel : 0; + rdesc.Texture2D.MipSlice = mipSlice2D; + rdesc.Texture2D.PlaneSlice = 0; + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Creating RTV, mip %u\n", rdesc.Texture2D.MipSlice); + } + } + // Pre-creation validation. + IGL_DEBUG_ASSERT(device != nullptr, "Device is null before CreateRenderTargetView"); + IGL_DEBUG_ASSERT(tex->getResource() != nullptr, "Texture resource is null before CreateRenderTargetView"); + IGL_DEBUG_ASSERT(rtvHandle.ptr != 0, "RTV descriptor handle is invalid"); + + device->CreateRenderTargetView(tex->getResource(), &rdesc, rtvHandle); + + // Transition to RENDER_TARGET + // IMPORTANT: For multi-frame rendering, offscreen targets may have been transitioned to + // PIXEL_SHADER_RESOURCE in the previous frame's endEncoding(). We MUST transition them + // back to RENDER_TARGET at the start of each render pass. + // The transitionTo() function checks current state and only transitions if needed. + const uint32_t transitionSlice = + isCubeTexture ? targetArraySlice : attachmentLayer; + tex->transitionTo( + commandList_, D3D12_RESOURCE_STATE_RENDER_TARGET, mipLevel, transitionSlice); + + // Clear if requested + if (hasAttachmentDesc && renderPass.colorAttachments[i].loadAction == LoadAction::Clear) { + const auto& clearColor = renderPass.colorAttachments[i].clearColor; + const float color[] = {clearColor.r, clearColor.g, clearColor.b, clearColor.a}; + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Clearing MRT attachment %zu with color (%.2f, %.2f, %.2f, %.2f)\n", + i, color[0], color[1], color[2], color[3]); + commandList_->ClearRenderTargetView(rtvHandle, color, 0, nullptr); + } else { + // CRITICAL: Must extract value completely outside ternary to avoid MSVC debug iterator check + int loadActionDbg = -1; + if (i < renderPass.colorAttachments.size()) { + loadActionDbg = (int)renderPass.colorAttachments[i].loadAction; + } + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: NOT clearing MRT attachment %zu (loadAction=%d, hasAttachment=%d)\n", + i, loadActionDbg, i < renderPass.colorAttachments.size()); + } + rtvs.push_back(rtvHandle); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: MRT Created RTV #%zu, total RTVs now=%zu\n", i, rtvs.size()); + } + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: MRT Total RTVs created: %zu\n", rtvs.size()); + if (!rtvs.empty()) { + rtv = rtvs[0]; + usedOffscreenRTV = true; + } + } + } + if (!usedOffscreenRTV) { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Using swapchain back buffer\n"); + auto* backBuffer = context.getCurrentBackBuffer(); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Got back buffer=%p\n", backBuffer); + if (!backBuffer) { + IGL_LOG_ERROR("RenderCommandEncoder: No back buffer available\n"); + return; + } + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Transitioning back buffer to RENDER_TARGET\n"); + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = backBuffer; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + commandList_->ResourceBarrier(1, &barrier); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Resource barrier executed\n"); + + if (!renderPass.colorAttachments.empty() && + renderPass.colorAttachments[0].loadAction == LoadAction::Clear) { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Clearing render target\n"); + const auto& cc = renderPass.colorAttachments[0].clearColor; + const float col[] = {cc.r, cc.g, cc.b, cc.a}; + D3D12_CPU_DESCRIPTOR_HANDLE swapRtv = context.getCurrentRTV(); + commandList_->ClearRenderTargetView(swapRtv, col, 0, nullptr); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Clear complete\n"); + } + rtv = context.getCurrentRTV(); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Got RTV handle\n"); + } + + // Create/Bind depth-stencil view if we have a framebuffer with a depth attachment + const bool hasDepth = (framebuffer_ && framebuffer_->getDepthAttachment()); + if (hasDepth) { + auto depthTex = std::static_pointer_cast(framebuffer_->getDepthAttachment()); + ID3D12Device* device = context.getDevice(); + if (device && depthTex && depthTex->getResource()) { + if (heapMgr) { + dsvIndex_ = heapMgr->allocateDSV(); + // Check return value from getHandle. + if (!heapMgr->getDSVHandle(dsvIndex_, &dsvHandle_)) { + IGL_LOG_ERROR("RenderCommandEncoder: Failed to get DSV handle for index %u\n", dsvIndex_); + heapMgr->freeDSV(dsvIndex_); + dsvIndex_ = UINT32_MAX; + return; + } + } else { + // Fallback: transient heap + igl::d3d12::ComPtr tmpHeap; + D3D12_DESCRIPTOR_HEAP_DESC dsvHeapDesc = {}; + dsvHeapDesc.NumDescriptors = 1; + dsvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV; + dsvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE; + device->CreateDescriptorHeap(&dsvHeapDesc, IID_PPV_ARGS(tmpHeap.GetAddressOf())); + dsvHandle_ = tmpHeap->GetCPUDescriptorHandleForHeapStart(); + } + + // Create DSV description + D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {}; + dsvDesc.Format = textureFormatToDXGIFormat(depthTex->getFormat()); + dsvDesc.Flags = D3D12_DSV_FLAG_NONE; + + // Set view dimension based on sample count (MSAA support) + D3D12_RESOURCE_DESC depthResourceDesc = depthTex->getResource()->GetDesc(); + if (depthResourceDesc.SampleDesc.Count > 1) { + // MSAA depth texture - use TEXTURE2DMS view dimension + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMS; + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Creating MSAA DSV with %u samples\n", depthResourceDesc.SampleDesc.Count); + } else { + // Non-MSAA depth texture - use standard TEXTURE2D view dimension + dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D; + dsvDesc.Texture2D.MipSlice = renderPass.depthAttachment.mipLevel; + } + + // Transition the entire depth resource to DEPTH_WRITE before clearing. + // Some render paths (e.g. mipmapped depth) may have touched multiple + // subresources; using transitionAll ensures the clear sees a valid state + // for every subresource referenced by this DSV. + depthTex->transitionAll(commandList_, D3D12_RESOURCE_STATE_DEPTH_WRITE); + + // Pre-creation validation. + IGL_DEBUG_ASSERT(device != nullptr, "Device is null before CreateDepthStencilView"); + IGL_DEBUG_ASSERT(depthTex->getResource() != nullptr, "Depth texture resource is null"); + IGL_DEBUG_ASSERT(dsvHandle_.ptr != 0, "DSV descriptor handle is invalid"); + + device->CreateDepthStencilView(depthTex->getResource(), &dsvDesc, dsvHandle_); + + // Clear depth and/or stencil if requested + const bool clearDepth = (renderPass.depthAttachment.loadAction == LoadAction::Clear); + const bool clearStencil = (renderPass.stencilAttachment.loadAction == LoadAction::Clear); + + if (clearDepth || clearStencil) { + D3D12_CLEAR_FLAGS clearFlags = static_cast(0); + if (clearDepth) { + clearFlags = static_cast(clearFlags | D3D12_CLEAR_FLAG_DEPTH); + } + if (clearStencil) { + clearFlags = static_cast(clearFlags | D3D12_CLEAR_FLAG_STENCIL); + } + + const float depthClearValue = renderPass.depthAttachment.clearDepth; + const UINT8 stencilClearValue = static_cast(renderPass.stencilAttachment.clearStencil); + + commandList_->ClearDepthStencilView(dsvHandle_, clearFlags, depthClearValue, stencilClearValue, 0, nullptr); + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Cleared depth-stencil (depth=%d, stencil=%d, depthVal=%.2f, stencilVal=%u)\n", + clearDepth, clearStencil, depthClearValue, stencilClearValue); + } + + // Bind RTV + DSV (or DSV-only for depth-only rendering) + if (!rtvs.empty()) { + // Multi-render target or offscreen rendering with color+depth + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with %zu RTVs + DSV\n", rtvs.size()); + commandList_->OMSetRenderTargets(static_cast(rtvs.size()), rtvs.data(), FALSE, &dsvHandle_); + } else if (usedOffscreenRTV) { + // Single offscreen render target with depth + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with 1 RTV + DSV\n"); + commandList_->OMSetRenderTargets(1, &rtv, FALSE, &dsvHandle_); + } else if (!framebuffer_->getColorAttachment(0)) { + // Depth-only rendering (no color attachments) - shadow mapping scenario + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Depth-only rendering - OMSetRenderTargets with 0 RTVs + DSV\n"); + commandList_->OMSetRenderTargets(0, nullptr, FALSE, &dsvHandle_); + } else { + // Swapchain backbuffer with depth + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with swapchain RTV + DSV\n"); + commandList_->OMSetRenderTargets(1, &rtv, FALSE, &dsvHandle_); + } + } else { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Binding RTV without DSV (no resource)\n"); + if (!rtvs.empty()) { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with %zu RTVs, no DSV\n", rtvs.size()); + commandList_->OMSetRenderTargets(static_cast(rtvs.size()), rtvs.data(), FALSE, nullptr); + } else { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with 1 RTV, no DSV\n"); + commandList_->OMSetRenderTargets(1, &rtv, FALSE, nullptr); + } + } + } else { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Binding RTV without DSV (no hasDepth)\n"); + if (!rtvs.empty()) { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with %zu RTVs, no DSV (no hasDepth)\n", rtvs.size()); + commandList_->OMSetRenderTargets(static_cast(rtvs.size()), rtvs.data(), FALSE, nullptr); + } else { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with 1 RTV, no DSV (no hasDepth)\n"); + commandList_->OMSetRenderTargets(1, &rtv, FALSE, nullptr); + } + } + + // Set a default full-screen viewport/scissor if caller forgets. Prefer framebuffer attachments. + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Setting default viewport...\n"); + if (framebuffer_) { + // Prefer color attachment if present; otherwise fall back to depth attachment. + auto colorTex = std::static_pointer_cast(framebuffer_->getColorAttachment(0)); + auto depthTex = std::static_pointer_cast(framebuffer_->getDepthAttachment()); + + Dimensions dims{}; + if (colorTex && colorTex->getResource()) { + dims = colorTex->getDimensions(); + IGL_D3D12_LOG_VERBOSE( + "RenderCommandEncoder: Using framebuffer color attachment dimensions: %ux%u\n", + dims.width, + dims.height); + } else if (depthTex && depthTex->getResource()) { + dims = depthTex->getDimensions(); + IGL_D3D12_LOG_VERBOSE( + "RenderCommandEncoder: Using framebuffer depth attachment dimensions: %ux%u\n", + dims.width, + dims.height); + } + + if (dims.width > 0 && dims.height > 0) { + D3D12_VIEWPORT vp{}; + vp.TopLeftX = 0; + vp.TopLeftY = 0; + vp.Width = static_cast(dims.width); + vp.Height = static_cast(dims.height); + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + commandList_->RSSetViewports(1, &vp); + + D3D12_RECT sc{}; + sc.left = 0; + sc.top = 0; + sc.right = static_cast(dims.width); + sc.bottom = static_cast(dims.height); + commandList_->RSSetScissorRects(1, &sc); + + IGL_D3D12_LOG_VERBOSE( + "RenderCommandEncoder: Set default viewport/scissor to %ux%u\n", + dims.width, + dims.height); + } else { + IGL_LOG_ERROR( + "RenderCommandEncoder: Framebuffer has no valid color or depth attachment dimensions; " + "falling back to back buffer viewport.\n"); + auto* backBufferRes = context.getCurrentBackBuffer(); + if (backBufferRes) { + D3D12_RESOURCE_DESC bbDesc = backBufferRes->GetDesc(); + D3D12_VIEWPORT vp{}; + vp.TopLeftX = 0; + vp.TopLeftY = 0; + vp.Width = static_cast(bbDesc.Width); + vp.Height = static_cast(bbDesc.Height); + vp.MinDepth = 0.0f; + vp.MaxDepth = 1.0f; + commandList_->RSSetViewports(1, &vp); + + D3D12_RECT scissor{}; + scissor.left = 0; + scissor.top = 0; + scissor.right = static_cast(bbDesc.Width); + scissor.bottom = static_cast(bbDesc.Height); + commandList_->RSSetScissorRects(1, &scissor); + + IGL_D3D12_LOG_VERBOSE( + "RenderCommandEncoder: Fallback viewport/scissor to back buffer %llux%u\n", + bbDesc.Width, + bbDesc.Height); + } else { + IGL_LOG_ERROR("RenderCommandEncoder: No back buffer available!\n"); + } + } + } else { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Using back buffer\n"); + auto* backBufferRes = context.getCurrentBackBuffer(); + if (backBufferRes) { + D3D12_RESOURCE_DESC bbDesc = backBufferRes->GetDesc(); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Back buffer dimensions: %llux%u\n", bbDesc.Width, bbDesc.Height); + D3D12_VIEWPORT vp = {}; vp.TopLeftX=0; vp.TopLeftY=0; vp.Width=(float)bbDesc.Width; vp.Height=(float)bbDesc.Height; vp.MinDepth=0; vp.MaxDepth=1; + commandList_->RSSetViewports(1, &vp); + D3D12_RECT scissor = {}; scissor.left=0; scissor.top=0; scissor.right=(LONG)bbDesc.Width; scissor.bottom=(LONG)bbDesc.Height; commandList_->RSSetScissorRects(1, &scissor); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Set default viewport/scissor to back buffer %llux%u\n", bbDesc.Width, bbDesc.Height); + } else { + IGL_LOG_ERROR("RenderCommandEncoder: No back buffer available!\n"); + } + } + + // Capture actual framebuffer formats for dynamic PSO selection (Vulkan-style pattern) + // This enables PSO variants to be selected at draw time based on framebuffer formats + dynamicState_ = D3D12RenderPipelineDynamicState(); // Reset to UNKNOWN + + // Capture RTV formats from actual framebuffer resources + // IMPORTANT: Use Texture::getFormat() not resource format - texture views may have different formats + if (framebuffer_) { + const size_t numColorAttachments = std::min( + framebuffer_->getColorAttachmentIndices().size(), + D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); + for (size_t i = 0; i < numColorAttachments; ++i) { + auto colorTex = std::static_pointer_cast( + framebuffer_->getColorAttachment(i)); + if (colorTex) { + // Use getFormat() which returns the view format, not the resource format + dynamicState_.rtvFormats[i] = textureFormatToDXGIFormat(colorTex->getFormat()); + } + } + + // Capture DSV format + auto depthTex = std::static_pointer_cast( + framebuffer_->getDepthAttachment()); + if (depthTex) { + // Use getFormat() which returns the view format, not the resource format + dynamicState_.dsvFormat = textureFormatToDXGIFormat(depthTex->getFormat()); + } + } else { + // Fallback: swapchain back buffer + auto* backBuffer = context.getCurrentBackBuffer(); + if (backBuffer) { + D3D12_RESOURCE_DESC bbDesc = backBuffer->GetDesc(); + dynamicState_.rtvFormats[0] = bbDesc.Format; + } + } + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::begin() - Captured framebuffer formats: RTV[0]=%d, DSV=%d\n", + static_cast(dynamicState_.rtvFormats[0]), + static_cast(dynamicState_.dsvFormat)); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::begin() - Complete!\n"); +} + +void RenderCommandEncoder::endEncoding() { + auto& context2 = commandBuffer_.getContext(); + + // ========== MSAA RESOLVE OPERATION ========== + // Resolve MSAA textures to non-MSAA textures before transitioning resources + // This must happen AFTER rendering but BEFORE the final state transitions + if (framebuffer_) { + // Resolve color attachments + const auto indices = framebuffer_->getColorAttachmentIndices(); + for (size_t i : indices) { + auto msaaAttachment = std::static_pointer_cast(framebuffer_->getColorAttachment(i)); + auto resolveAttachment = std::static_pointer_cast(framebuffer_->getResolveColorAttachment(i)); + + // Check if both MSAA source and resolve target exist + if (msaaAttachment && resolveAttachment && + msaaAttachment->getResource() && resolveAttachment->getResource()) { + + // Verify MSAA source has samples > 1 and resolve target has samples == 1 + D3D12_RESOURCE_DESC msaaDesc = msaaAttachment->getResource()->GetDesc(); + D3D12_RESOURCE_DESC resolveDesc = resolveAttachment->getResource()->GetDesc(); + + if (msaaDesc.SampleDesc.Count > 1 && resolveDesc.SampleDesc.Count == 1) { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::endEncoding - Resolving MSAA color attachment %zu (%u samples -> 1 sample)\n", + i, msaaDesc.SampleDesc.Count); + + // Transition MSAA texture to RESOLVE_SOURCE state + msaaAttachment->transitionAll(commandList_, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + + // Transition resolve texture to RESOLVE_DEST state + resolveAttachment->transitionAll(commandList_, D3D12_RESOURCE_STATE_RESOLVE_DEST); + + // Perform resolve operation: converts multi-sample texture to single-sample + // This averages all samples in the MSAA texture and writes to the resolve texture + commandList_->ResolveSubresource( + resolveAttachment->getResource(), // pDstResource (non-MSAA) + 0, // DstSubresource (mip 0, layer 0) + msaaAttachment->getResource(), // pSrcResource (MSAA) + 0, // SrcSubresource (mip 0, layer 0) + msaaDesc.Format // Format (must be compatible) + ); + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::endEncoding - MSAA color resolve completed for attachment %zu\n", i); + + // Transition resolve texture to PIXEL_SHADER_RESOURCE for subsequent use + resolveAttachment->transitionAll(commandList_, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + } + } + + // Resolve depth attachment if present + auto msaaDepth = std::static_pointer_cast(framebuffer_->getDepthAttachment()); + auto resolveDepth = std::static_pointer_cast(framebuffer_->getResolveDepthAttachment()); + + if (msaaDepth && resolveDepth && + msaaDepth->getResource() && resolveDepth->getResource()) { + + D3D12_RESOURCE_DESC msaaDesc = msaaDepth->getResource()->GetDesc(); + D3D12_RESOURCE_DESC resolveDesc = resolveDepth->getResource()->GetDesc(); + + if (msaaDesc.SampleDesc.Count > 1 && resolveDesc.SampleDesc.Count == 1) { + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::endEncoding - Resolving MSAA depth attachment (%u samples -> 1 sample)\n", + msaaDesc.SampleDesc.Count); + + // Transition depth textures to appropriate resolve states + msaaDepth->transitionAll(commandList_, D3D12_RESOURCE_STATE_RESOLVE_SOURCE); + resolveDepth->transitionAll(commandList_, D3D12_RESOURCE_STATE_RESOLVE_DEST); + + // Resolve depth buffer + commandList_->ResolveSubresource( + resolveDepth->getResource(), + 0, + msaaDepth->getResource(), + 0, + msaaDesc.Format + ); + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::endEncoding - MSAA depth resolve completed\n"); + + // Transition resolved depth to shader resource for sampling + resolveDepth->transitionAll(commandList_, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + } + } + // ========== END MSAA RESOLVE OPERATION ========== + + // For offscreen framebuffers (MRT targets), transition all attachments to PIXEL_SHADER_RESOURCE + // so they can be sampled in subsequent passes + if (framebuffer_ && framebuffer_->getColorAttachment(0)) { + auto swapColor = std::static_pointer_cast(framebuffer_->getColorAttachment(0)); + + // Check if this is the swapchain backbuffer + const bool isSwapchainTarget = (swapColor && swapColor->getResource() == context2.getCurrentBackBuffer()); + + if (isSwapchainTarget) { + // Swapchain framebuffer: transition to PRESENT + swapColor->transitionAll(commandList_, D3D12_RESOURCE_STATE_PRESENT); + } else { + // Offscreen framebuffer (e.g., MRT targets): transition all color attachments to PIXEL_SHADER_RESOURCE + // This allows the render targets to be sampled in subsequent rendering passes (multi-frame support) + const auto indices = framebuffer_->getColorAttachmentIndices(); + for (size_t i : indices) { + auto attachment = std::static_pointer_cast(framebuffer_->getColorAttachment(i)); + if (attachment && attachment->getResource()) { + attachment->transitionAll(commandList_, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + } + } + } + } else { + // No framebuffer provided - using swapchain directly + auto* backBuffer = context2.getCurrentBackBuffer(); + if (backBuffer) { + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = backBuffer; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + commandList_->ResourceBarrier(1, &barrier); + } + } + + // G-001: Flush any remaining barriers before ending encoding + flushBarriers(); + + // Return RTV/DSV indices to the descriptor heap manager if used + if (auto* mgr = context2.getDescriptorHeapManager()) { + if (!rtvIndices_.empty()) { + for (auto idx : rtvIndices_) { + mgr->freeRTV(idx); + } + rtvIndices_.clear(); + } + if (dsvIndex_ != UINT32_MAX) { + mgr->freeDSV(dsvIndex_); + dsvIndex_ = UINT32_MAX; + } + } +} + +void RenderCommandEncoder::bindViewport(const Viewport& viewport) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::bindViewport called on closed command list\n"); + return; + } + IGL_D3D12_LOG_VERBOSE("bindViewport called: x=%.1f, y=%.1f, w=%.1f, h=%.1f\n", + viewport.x, viewport.y, viewport.width, viewport.height); + D3D12_VIEWPORT vp = {}; + vp.TopLeftX = viewport.x; + vp.TopLeftY = viewport.y; + vp.Width = viewport.width; + vp.Height = viewport.height; + vp.MinDepth = viewport.minDepth; + vp.MaxDepth = viewport.maxDepth; + commandList_->RSSetViewports(1, &vp); +} + +void RenderCommandEncoder::bindScissorRect(const ScissorRect& rect) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::bindScissorRect called on closed command list\n"); + return; + } + D3D12_RECT scissor = {}; + scissor.left = static_cast(rect.x); + scissor.top = static_cast(rect.y); + scissor.right = static_cast(rect.x + rect.width); + scissor.bottom = static_cast(rect.y + rect.height); + commandList_->RSSetScissorRects(1, &scissor); +} + +void RenderCommandEncoder::bindRenderPipelineState( + const std::shared_ptr& pipelineState) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::bindRenderPipelineState called on closed command list\n"); + return; + } + if (!pipelineState) { + IGL_LOG_ERROR("bindRenderPipelineState: pipelineState is null!\n"); + return; + } + + auto* d3dPipelineState = static_cast(pipelineState.get()); + + // Cache pipeline state for dynamic PSO variant selection at draw time + currentRenderPipelineState_ = d3dPipelineState; + + // Get PSO variant matching actual framebuffer formats (Vulkan-style dynamic selection) + auto* pso = d3dPipelineState->getPipelineState(dynamicState_, commandBuffer_.getDevice()); + auto* rootSig = d3dPipelineState->getRootSignature(); + + if (!pso) { + IGL_LOG_ERROR("bindRenderPipelineState: PSO is null!\n"); + return; + } + if (!rootSig) { + IGL_LOG_ERROR("bindRenderPipelineState: Root signature is null!\n"); + return; + } + + IGL_D3D12_LOG_VERBOSE("bindRenderPipelineState: PSO=%p, RootSig=%p\n", pso, rootSig); + + commandList_->SetPipelineState(pso); + commandList_->SetGraphicsRootSignature(rootSig); + + // Set primitive topology from the pipeline state + D3D_PRIMITIVE_TOPOLOGY topology = d3dPipelineState->getPrimitiveTopology(); + IGL_D3D12_LOG_VERBOSE("bindRenderPipelineState: Setting topology=%d\n", (int)topology); + commandList_->IASetPrimitiveTopology(topology); + + // Cache vertex stride from pipeline (used when binding vertex buffers) + currentVertexStride_ = d3dPipelineState->getVertexStride(); + // Fill per-slot strides + for (size_t s = 0; s < IGL_BUFFER_BINDINGS_MAX; ++s) { + vertexStrides_[s] = d3dPipelineState->getVertexStride(s); + } +} + +void RenderCommandEncoder::bindDepthStencilState( + const std::shared_ptr& /*depthStencilState*/) {} + + +void RenderCommandEncoder::bindVertexBuffer(uint32_t index, + IBuffer& buffer, + size_t bufferOffset) { + IGL_D3D12_LOG_VERBOSE("bindVertexBuffer called: index=%u\n", index); + if (index >= IGL_BUFFER_BINDINGS_MAX) { + IGL_LOG_ERROR("bindVertexBuffer: index %u exceeds max %u\n", index, IGL_BUFFER_BINDINGS_MAX); + return; + } + + auto* d3dBuffer = static_cast(&buffer); + cachedVertexBuffers_[index].bufferLocation = d3dBuffer->gpuAddress(bufferOffset); + cachedVertexBuffers_[index].sizeInBytes = static_cast(d3dBuffer->getSizeInBytes() - bufferOffset); + cachedVertexBuffers_[index].bound = true; +} + +void RenderCommandEncoder::bindIndexBuffer(IBuffer& buffer, + IndexFormat format, + size_t bufferOffset) { + IGL_D3D12_LOG_VERBOSE("bindIndexBuffer called\n"); + auto* d3dBuffer = static_cast(&buffer); + cachedIndexBuffer_.bufferLocation = d3dBuffer->gpuAddress(bufferOffset); + cachedIndexBuffer_.sizeInBytes = static_cast(d3dBuffer->getSizeInBytes() - bufferOffset); + // D3D12 only supports 16-bit and 32-bit index formats (not 8-bit) + cachedIndexBuffer_.format = (format == IndexFormat::UInt16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT; + cachedIndexBuffer_.bound = true; +} + +void RenderCommandEncoder::bindBytes(size_t /*index*/, + uint8_t /*target*/, + const void* /*data*/, + size_t /*length*/) { + // D3D12 backend does not support bindBytes + // Applications should use uniform buffers (bindBuffer) instead + // This is a no-op to maintain compatibility with cross-platform code + IGL_DEBUG_ASSERT_NOT_IMPLEMENTED(); + IGL_LOG_INFO_ONCE("bindBytes is not supported in D3D12 backend. Use bindBuffer with uniform buffers instead.\n"); +} +void RenderCommandEncoder::bindPushConstants(const void* data, + size_t length, + size_t offset) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::bindPushConstants called on closed command list\n"); + return; + } + if (!data || length == 0) { + return; + } + + // Query pipeline for dynamic root parameter index (eliminates hardcoded assumptions) + if (!currentRenderPipelineState_) { + IGL_LOG_ERROR("bindPushConstants called without bound pipeline state\n"); + return; + } + + if (!currentRenderPipelineState_->hasPushConstants()) { + IGL_LOG_ERROR("bindPushConstants called but pipeline has no push constants\n"); + return; + } + + const UINT rootParamIndex = currentRenderPipelineState_->getPushConstantRootParameterIndex(); + + // Offset and length are in bytes; convert to 32-bit units. + const UINT offset32 = static_cast(offset / sizeof(uint32_t)); + const UINT num32 = static_cast((length + sizeof(uint32_t) - 1) / sizeof(uint32_t)); + + // D3D12 permits up to 64 bytes (16 DWORDs) of root constants; enforce this + // conservatively to avoid exceeding the root signature declaration. + if (offset32 + num32 > 16) { + IGL_LOG_ERROR("bindPushConstants: push constant range (%u dwords at offset %u) exceeds 16 dword limit\n", + num32, + offset32); + return; + } + + commandList_->SetGraphicsRoot32BitConstants(rootParamIndex, num32, data, offset32); +} +void RenderCommandEncoder::bindSamplerState(size_t index, + uint8_t /*target*/, + ISamplerState* samplerState) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::bindSamplerState called on closed command list\n"); + return; + } + // Delegate to D3D12ResourcesBinder for centralized descriptor management. + resourcesBinder_.bindSamplerState(static_cast(index), samplerState); + + // Clear bindBindGroup cache to switch from bindBindGroup path to bindSamplerState path + // This ensures draw() will call resourcesBinder_.updateBindings() instead of using cached handles + cachedTextureCount_ = 0; + cachedSamplerCount_ = 0; + usedBindGroup_ = false; +} +void RenderCommandEncoder::bindTexture(size_t index, + uint8_t /*target*/, + ITexture* texture) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::bindTexture called on closed command list\n"); + return; + } + // Delegate to single-argument version + bindTexture(index, texture); +} + +void RenderCommandEncoder::bindTexture(size_t index, ITexture* texture) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::bindTexture called on closed command list\n"); + return; + } + // Delegate to D3D12ResourcesBinder for centralized descriptor management. + resourcesBinder_.bindTexture(static_cast(index), texture); + + // Clear bindBindGroup cache to switch from bindBindGroup path to bindTexture path + // This ensures draw() will call resourcesBinder_.updateBindings() instead of using cached handles + cachedTextureCount_ = 0; + cachedSamplerCount_ = 0; + usedBindGroup_ = false; +} +void RenderCommandEncoder::bindUniform(const UniformDesc& /*uniformDesc*/, const void* /*data*/) {} + +void RenderCommandEncoder::draw(size_t vertexCount, + uint32_t instanceCount, + uint32_t firstVertex, + uint32_t baseInstance) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::draw called on closed command list\n"); + return; + } + // G-001: Flush any pending barriers before draw call + flushBarriers(); + + // Apply all resource bindings (textures, samplers, buffers) before draw. + // Even when bindBindGroup() is used for CBV tables, textures/samplers may still be + // managed by D3D12ResourcesBinder, so always update bindings here. + { + Result bindResult; + if (!resourcesBinder_.updateBindings(currentRenderPipelineState_, &bindResult)) { + IGL_LOG_ERROR("draw: Failed to update resource bindings: %s\n", bindResult.message.c_str()); + return; + } + } + + // D3D12 requires ALL root parameters to be bound before drawing + // Hybrid render root signature layout (see Device::createRenderPipeline): + // - Root parameter 0: Root 32-bit constants for b2 (push constants) + // - Root parameter 1: Root CBV for b0 (legacy bindBuffer) + // - Root parameter 2: Root CBV for b1 (legacy bindBuffer) + // - Root parameter 3: CBV descriptor table for b3-b15 (bindBindGroup buffer table) + // - Root parameter 4: SRV descriptor table for t0-tN + // - Root parameter 5: Sampler descriptor table for s0-tN + // - Root parameter 6: UAV descriptor table for u0-uN (storage buffers) + + // Bind descriptor tables using dynamic root parameter indices from pipeline reflection + // The indices are computed based on which resources the shader actually uses + + // Apply vertex buffers. If the bound pipeline has no vertex input state + // (no attributes/bindings), skip IASetVertexBuffers entirely so that + // fullscreen / skybox style passes using SV_VertexID do not trigger + // validation errors when a previous pass left a vertex buffer bound. + bool pipelineHasVertexInput = (currentVertexStride_ != 0); + if (!pipelineHasVertexInput) { + for (uint32_t i = 0; i < IGL_BUFFER_BINDINGS_MAX; ++i) { + if (vertexStrides_[i] != 0) { + pipelineHasVertexInput = true; + break; + } + } + } + + if (pipelineHasVertexInput) { + for (uint32_t i = 0; i < IGL_BUFFER_BINDINGS_MAX; ++i) { + if (!cachedVertexBuffers_[i].bound) { + continue; + } + UINT stride = vertexStrides_[i]; + if (stride == 0) { + if (currentVertexStride_ == 0) { + IGL_LOG_INFO_ONCE( + "Vertex buffer bound to slot %u but pipeline reports no stride; " + "using conservative fallback stride of 32 bytes\n", + i); + stride = 32; + } else { + stride = currentVertexStride_; + } + } + D3D12_VERTEX_BUFFER_VIEW vbView = {}; + vbView.BufferLocation = cachedVertexBuffers_[i].bufferLocation; + vbView.SizeInBytes = cachedVertexBuffers_[i].sizeInBytes; + vbView.StrideInBytes = stride; + IGL_D3D12_LOG_VERBOSE( + "draw: VB[%u] = GPU 0x%llx, size=%u, stride=%u\n", + i, + vbView.BufferLocation, + vbView.SizeInBytes, + vbView.StrideInBytes); + commandList_->IASetVertexBuffers(i, 1, &vbView); + } + } else { + // No vertex input expected for this pipeline; skip IASetVertexBuffers + // even if a previous pass bound a vertex buffer. + IGL_D3D12_LOG_VERBOSE( + "draw: Pipeline has no vertex input layout; skipping IASetVertexBuffers for this draw\n"); + } + + commandBuffer_.incrementDrawCount(); + + IGL_D3D12_LOG_VERBOSE("draw: DrawInstanced(vertexCount=%zu, instanceCount=%u, firstVertex=%u, baseInstance=%u)\n", vertexCount, instanceCount, firstVertex, baseInstance); + commandList_->DrawInstanced(static_cast(vertexCount), + instanceCount, + firstVertex, + baseInstance); +} + +void RenderCommandEncoder::drawIndexed(size_t indexCount, + uint32_t instanceCount, + uint32_t firstIndex, + int32_t vertexOffset, + uint32_t baseInstance) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::drawIndexed called on closed command list\n"); + return; + } + // G-001: Flush any pending barriers before draw call + flushBarriers(); + + // Apply all resource bindings (textures, samplers, buffers) before draw. + // Even when bindBindGroup() is used for CBV tables, textures/samplers may still be + // managed by D3D12ResourcesBinder, so always update bindings here. + { + Result bindResult; + if (!resourcesBinder_.updateBindings(currentRenderPipelineState_, &bindResult)) { + IGL_LOG_ERROR("drawIndexed: Failed to update resource bindings: %s\n", bindResult.message.c_str()); + return; + } + } + + // D3D12 requires ALL root parameters to be bound before drawing + // Hybrid render root signature layout (see Device::createRenderPipeline): + // - Root parameter 0: Root 32-bit constants for b2 (push constants) + // - Root parameter 1: Root CBV for b0 (legacy bindBuffer) + // - Root parameter 2: Root CBV for b1 (legacy bindBuffer) + // - Root parameter 3: CBV descriptor table for b3-b15 (bindBindGroup buffer table) + // - Root parameter 4: SRV descriptor table for t0-tN + // - Root parameter 5: Sampler descriptor table for s0-tN + // - Root parameter 6: UAV descriptor table for u0-uN (storage buffers) + + // Bind descriptor tables using dynamic root parameter indices from pipeline reflection + + // Descriptor tables (CBV/SRV/Sampler/UAV) are bound by D3D12ResourcesBinder::updateBindings() + // based on the current pipeline's reflection. No additional descriptor table binding is + // required here. + + // Apply cached vertex buffer bindings now that pipeline state is bound. + // If the current pipeline has no vertex input layout (no attributes or + // bindings), skip IASetVertexBuffers so that draws using SV_VertexID do + // not rely on stale vertex buffer state from previous passes. + bool pipelineHasVertexInput = (currentVertexStride_ != 0); + if (!pipelineHasVertexInput) { + for (uint32_t i = 0; i < IGL_BUFFER_BINDINGS_MAX; ++i) { + if (vertexStrides_[i] != 0) { + pipelineHasVertexInput = true; + break; + } + } + } + + if (pipelineHasVertexInput) { + for (uint32_t i = 0; i < IGL_BUFFER_BINDINGS_MAX; ++i) { + if (!cachedVertexBuffers_[i].bound) { + continue; + } + UINT stride = vertexStrides_[i]; + if (stride == 0) { + if (currentVertexStride_ == 0) { + IGL_LOG_INFO_ONCE( + "Vertex buffer bound to slot %u but pipeline reports no stride; " + "using conservative fallback stride of 32 bytes\n", + i); + stride = 32; + } else { + stride = currentVertexStride_; + } + } + D3D12_VERTEX_BUFFER_VIEW vbView = {}; + vbView.BufferLocation = cachedVertexBuffers_[i].bufferLocation; + vbView.SizeInBytes = cachedVertexBuffers_[i].sizeInBytes; + vbView.StrideInBytes = stride; + commandList_->IASetVertexBuffers(i, 1, &vbView); + } + } else { + IGL_D3D12_LOG_VERBOSE( + "drawIndexed: Pipeline has no vertex input layout; skipping IASetVertexBuffers for this draw\n"); + } + + // Apply cached index buffer binding + if (cachedIndexBuffer_.bound) { + D3D12_INDEX_BUFFER_VIEW ibView = {}; + ibView.BufferLocation = cachedIndexBuffer_.bufferLocation; + ibView.SizeInBytes = cachedIndexBuffer_.sizeInBytes; + ibView.Format = cachedIndexBuffer_.format; + commandList_->IASetIndexBuffer(&ibView); + } + + // Track per-command-buffer draw count; CommandQueue aggregates into device on submit + commandBuffer_.incrementDrawCount(); + + commandList_->DrawIndexedInstanced(static_cast(indexCount), + instanceCount, + firstIndex, + vertexOffset, + baseInstance); + +#if IGL_DEBUG + static const bool kLogDrawErrors = []() { + const char* env = std::getenv("IGL_D3D12_LOG_DRAW_ERRORS"); + return env && (env[0] == '1'); + }(); + if (kLogDrawErrors) { + auto* device = commandBuffer_.getContext().getDevice(); + if (device) { + igl::d3d12::ComPtr infoQueue; + if (SUCCEEDED(device->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) { + const UINT64 messageCount = infoQueue->GetNumStoredMessages(); + for (UINT64 i = 0; i < messageCount; ++i) { + SIZE_T length = 0; + if (FAILED(infoQueue->GetMessage(i, nullptr, &length)) || length == 0) { + continue; + } + auto* message = static_cast(malloc(length)); + if (message && SUCCEEDED(infoQueue->GetMessage(i, message, &length))) { + IGL_LOG_ERROR("[D3D12 Debug] %s\n", message->pDescription ? message->pDescription : ""); + } + free(message); + } + infoQueue->ClearStoredMessages(); + } + } + } +#endif +} + +void RenderCommandEncoder::drawMeshTasks(const Dimensions& /*threadgroupsPerGrid*/, + const Dimensions& /*threadsPerTaskThreadgroup*/, + const Dimensions& /*threadsPerMeshThreadgroup*/) { + IGL_LOG_ERROR("RenderCommandEncoder::drawMeshTasks is not implemented on D3D12\n"); +} +void RenderCommandEncoder::multiDrawIndirect(IBuffer& indirectBuffer, + size_t indirectBufferOffset, + uint32_t drawCount, + uint32_t stride) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndirect: command list is closed or null\n"); + return; + } + + // Get D3D12 buffer resource + auto* d3dBuffer = static_cast(&indirectBuffer); + if (!d3dBuffer) { + IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndirect: indirectBuffer is null\n"); + return; + } + + ID3D12Resource* argBuffer = d3dBuffer->getResource(); + if (!argBuffer) { + IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndirect: argBuffer resource is null\n"); + return; + } + + // Get command signature from D3D12Context + auto& ctx = commandBuffer_.getContext(); + ID3D12CommandSignature* signature = ctx.getDrawIndirectSignature(); + if (!signature) { + IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndirect: command signature is null\n"); + return; + } + + // Use default stride if not provided (sizeof D3D12_DRAW_ARGUMENTS = 16 bytes) + const UINT actualStride = stride ? stride : sizeof(D3D12_DRAW_ARGUMENTS); + + // ExecuteIndirect for multi-draw + // Parameters: signature, maxCommandCount, argumentBuffer, argumentBufferOffset, countBuffer, countBufferOffset + commandList_->ExecuteIndirect( + signature, + drawCount, + argBuffer, + static_cast(indirectBufferOffset), + nullptr, // No count buffer (exact draw count specified) + 0); + + // Track draw call count + commandBuffer_.incrementDrawCount(drawCount); + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::multiDrawIndirect: Executed %u indirect draws (stride: %u)\n", + drawCount, actualStride); +} +void RenderCommandEncoder::multiDrawIndexedIndirect(IBuffer& indirectBuffer, + size_t indirectBufferOffset, + uint32_t drawCount, + uint32_t stride) { + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndexedIndirect: command list is closed or null\n"); + return; + } + + // Get D3D12 buffer resource + auto* d3dBuffer = static_cast(&indirectBuffer); + if (!d3dBuffer) { + IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndexedIndirect: indirectBuffer is null\n"); + return; + } + + ID3D12Resource* argBuffer = d3dBuffer->getResource(); + if (!argBuffer) { + IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndexedIndirect: argBuffer resource is null\n"); + return; + } + + // Get command signature from D3D12Context + auto& ctx = commandBuffer_.getContext(); + ID3D12CommandSignature* signature = ctx.getDrawIndexedIndirectSignature(); + if (!signature) { + IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndexedIndirect: command signature is null\n"); + return; + } + + // Use default stride if not provided (sizeof D3D12_DRAW_INDEXED_ARGUMENTS = 20 bytes) + const UINT actualStride = stride ? stride : sizeof(D3D12_DRAW_INDEXED_ARGUMENTS); + + // ExecuteIndirect for multi-draw indexed + // Parameters: signature, maxCommandCount, argumentBuffer, argumentBufferOffset, countBuffer, countBufferOffset + commandList_->ExecuteIndirect( + signature, + drawCount, + argBuffer, + static_cast(indirectBufferOffset), + nullptr, // No count buffer (exact draw count specified) + 0); + + // Track draw call count + commandBuffer_.incrementDrawCount(drawCount); + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::multiDrawIndexedIndirect: Executed %u indirect indexed draws (stride: %u)\n", + drawCount, actualStride); +} + +void RenderCommandEncoder::setStencilReferenceValue(uint32_t value) { + if (!commandBuffer_.isRecording() || !commandList_) { + return; + } + // Set stencil reference value for stencil testing + commandList_->OMSetStencilRef(value); + IGL_D3D12_LOG_VERBOSE("setStencilReferenceValue: Set stencil ref to %u\n", value); +} + +void RenderCommandEncoder::setBlendColor(const Color& color) { + if (!commandBuffer_.isRecording() || !commandList_) { + return; + } + // Set blend factor constants for BlendFactor::BlendColor operations + // D3D12 uses RGBA float array, matching IGL Color structure + const float blendFactor[4] = {color.r, color.g, color.b, color.a}; + commandList_->OMSetBlendFactor(blendFactor); + IGL_D3D12_LOG_VERBOSE("setBlendColor: Set blend factor to (%.2f, %.2f, %.2f, %.2f)\n", + color.r, color.g, color.b, color.a); +} + +void RenderCommandEncoder::setDepthBias(float /*depthBias*/, float /*slopeScale*/, float /*clamp*/) { + // Note: Depth bias is configured in the pipeline state (RasterizerState) + // D3D12 does not support dynamic depth bias changes during rendering + // This would require rebuilding the PSO with different depth bias values +} + +void RenderCommandEncoder::pushDebugGroupLabel(const char* label, const Color& /*color*/) const { + if (!commandBuffer_.isRecording() || !commandList_ || !label) { + return; + } + const size_t len = strlen(label); + std::wstring wlabel(len, L' '); + std::mbstowcs(&wlabel[0], label, len); + commandList_->BeginEvent( + 0, wlabel.c_str(), static_cast((wlabel.length() + 1) * sizeof(wchar_t))); +} + +void RenderCommandEncoder::insertDebugEventLabel(const char* label, const Color& /*color*/) const { + if (!commandBuffer_.isRecording() || !commandList_ || !label) { + return; + } + const size_t len = strlen(label); + std::wstring wlabel(len, L' '); + std::mbstowcs(&wlabel[0], label, len); + commandList_->SetMarker( + 0, wlabel.c_str(), static_cast((wlabel.length() + 1) * sizeof(wchar_t))); +} + +void RenderCommandEncoder::popDebugGroupLabel() const { + if (!commandBuffer_.isRecording() || !commandList_) { + return; + } + commandList_->EndEvent(); +} + +void RenderCommandEncoder::bindBuffer(uint32_t index, + uint8_t /*target*/, + IBuffer* buffer, + size_t bufferOffset, + size_t bufferSize) { + // D3D12 does not differentiate shader stages for buffer binding in this path. + bindBuffer(index, buffer, bufferOffset, bufferSize); +} + +void RenderCommandEncoder::bindBuffer(uint32_t index, + IBuffer* buffer, + size_t offset, + size_t bufferSize) { + IGL_D3D12_LOG_VERBOSE("bindBuffer START: index=%u\n", index); + if (!buffer) { + IGL_D3D12_LOG_VERBOSE("bindBuffer: null buffer, returning\n"); + return; + } + + auto* d3dBuffer = static_cast(buffer); + + // Check if this is a storage buffer - needs SRV binding for shader reads + const bool isStorageBuffer = (d3dBuffer->getBufferType() & BufferDesc::BufferTypeBits::Storage) != 0; + + if (isStorageBuffer) { + // Storage buffer - create SRV for ByteAddressBuffer reads in pixel shader + IGL_D3D12_LOG_VERBOSE("bindBuffer: Storage buffer detected at index %u - creating SRV for pixel shader read\n", index); + + // For raw (ByteAddressBuffer) SRVs we treat the buffer as a sequence of 4-byte units. + // This matches HLSL ByteAddressBuffer / RWByteAddressBuffer semantics. + if ((offset & 3) != 0) { + IGL_LOG_ERROR("bindBuffer: Storage buffer offset %zu is not 4-byte aligned (required for DXGI_FORMAT_R32_TYPELESS). " + "Raw buffer SRV FirstElement will be rounded down, which may cause incorrect data access.\n", offset); + // Continue but log warning - FirstElement below uses integer division + } + + auto& context = commandBuffer_.getContext(); + auto* device = context.getDevice(); + if (!device || cbvSrvUavHeap_ == nullptr) { + IGL_LOG_ERROR("bindBuffer: Missing device or per-frame CBV/SRV/UAV heap\n"); + return; + } + + // Allocate descriptor slot from command buffer's shared counter + // Uses Result-based allocation with dynamic heap growth. + uint32_t descriptorIndex = 0; + Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR("bindBuffer: Failed to allocate descriptor: %s\n", allocResult.message.c_str()); + return; + } + IGL_D3D12_LOG_VERBOSE("bindBuffer: Allocated SRV descriptor slot %u for buffer at t%u\n", descriptorIndex, index); + + // Create SRV descriptor for ByteAddressBuffer (raw view) + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_R32_TYPELESS; // Raw buffer (ByteAddressBuffer) + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + // FirstElement/NumElements expressed in 32-bit units (4 bytes) + srvDesc.Buffer.FirstElement = static_cast(offset) / 4; // Offset in 32-bit elements + // NumElements must be (totalSize - offset) to avoid exceeding buffer bounds + srvDesc.Buffer.NumElements = + static_cast((buffer->getSizeInBytes() - offset) / 4); // Size in 32-bit elements + srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW; // Raw buffer access + + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex); + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex); + // Pre-creation validation. + IGL_DEBUG_ASSERT(device != nullptr, "Device is null before CreateShaderResourceView"); + IGL_DEBUG_ASSERT(d3dBuffer->getResource() != nullptr, "Buffer resource is null"); + IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "SRV descriptor handle is invalid"); + + device->CreateShaderResourceView(d3dBuffer->getResource(), &srvDesc, cpuHandle); + + IGL_D3D12_LOG_VERBOSE("bindBuffer: Created SRV at descriptor slot %u (FirstElement=%llu, NumElements=%u)\n", + descriptorIndex, srvDesc.Buffer.FirstElement, srvDesc.Buffer.NumElements); + + // Cache GPU handle for descriptor table binding in draw calls. + // SRVs are bound to root parameter 4 (render root signature SRV table). + cachedTextureGpuHandles_[index] = gpuHandle; + cachedTextureCount_ = std::max(cachedTextureCount_, static_cast(index + 1)); + + // For pipelines that declare SRVs but do not use the generic texture binding path + // (e.g., ComputeSession visualization using ByteAddressBuffer at t0), bind the SRV + // descriptor table directly to the SRV root parameter. This does not conflict with + // D3D12ResourcesBinder because updateTextureBindings() is a no-op when no textures + // are bound via bindTexture(). + if (currentRenderPipelineState_ && commandList_) { + const UINT srvTableIndex = + currentRenderPipelineState_->getSRVTableRootParameterIndex(); + if (srvTableIndex != UINT_MAX) { + commandList_->SetGraphicsRootDescriptorTable(srvTableIndex, gpuHandle); + IGL_D3D12_LOG_VERBOSE( + "bindBuffer: Bound storage buffer SRV at slot %u (t%u) to SRV table root param %u " + "(GPU handle 0x%llx)\n", + index, + index, + srvTableIndex, + gpuHandle.ptr); + } + } + + IGL_D3D12_LOG_VERBOSE("bindBuffer: Storage buffer SRV binding complete\n"); + + // CRITICAL: Track the Buffer OBJECT (not just resource) to keep it alive until GPU finishes + // This prevents the Buffer destructor from releasing the resource while GPU commands reference it + // Use weak_from_this().lock() instead of shared_from_this() to avoid exception + std::shared_ptr sharedBuffer = d3dBuffer->weak_from_this().lock(); + if (sharedBuffer) { + static_cast(commandBuffer_).trackTransientBuffer(std::move(sharedBuffer)); + IGL_D3D12_LOG_VERBOSE("bindBuffer: Tracking Buffer object (shared_ptr) for lifetime management\n"); + } else { + // Buffer not managed by shared_ptr (e.g., persistent buffer from member variable) + // Fall back to tracking just the resource (AddRef on ID3D12Resource) + static_cast(commandBuffer_).trackTransientResource(d3dBuffer->getResource()); + IGL_D3D12_LOG_VERBOSE("bindBuffer: Buffer not shared_ptr-managed, tracking resource only\n"); + } + } else { + // Constant buffer (CBV) - delegate to resourcesBinder for reflection-based binding + IGL_D3D12_LOG_VERBOSE("bindBuffer: Constant buffer at index %u - delegating to resourcesBinder\n", index); + + // D3D12 requires constant buffer addresses to be 256-byte aligned + if ((offset & 255) != 0) { + IGL_LOG_ERROR("bindBuffer: ERROR - CBV offset %zu is not 256-byte aligned (required by D3D12). " + "Constant buffers must be created at aligned offsets. Ignoring bind request.\n", offset); + return; + } + + // CRITICAL: Track the Buffer OBJECT (not just resource) to keep it alive until GPU finishes + std::shared_ptr sharedBuffer = d3dBuffer->weak_from_this().lock(); + if (sharedBuffer) { + static_cast(commandBuffer_).trackTransientBuffer(std::move(sharedBuffer)); + IGL_D3D12_LOG_VERBOSE("bindBuffer: Tracking Buffer object (shared_ptr) for lifetime management\n"); + } else { + static_cast(commandBuffer_).trackTransientResource(d3dBuffer->getResource()); + IGL_D3D12_LOG_VERBOSE("bindBuffer: Buffer not shared_ptr-managed, tracking resource only\n"); + } + + // Use bufferSize if provided; otherwise, bind the remaining bytes from offset. + // This matches the cross-backend contract: bufferSize == 0 means "remaining size". + const size_t fullSize = buffer->getSizeInBytes(); + const size_t size = (bufferSize != 0 && bufferSize <= fullSize) + ? bufferSize + : (offset < fullSize ? (fullSize - offset) : 0); + + // Delegate to resourcesBinder which caches the binding and marks dirty flag + // The actual binding will happen in resourcesBinder_.updateBindings() + resourcesBinder_.bindBuffer(index, buffer, offset, size, false, 0); + } + + IGL_D3D12_LOG_VERBOSE("bindBuffer END\n"); +} +void RenderCommandEncoder::bindBindGroup(BindGroupTextureHandle handle) { + IGL_D3D12_LOG_VERBOSE("bindBindGroup(texture): handle valid=%d\n", !handle.empty()); + + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("bindBindGroup(texture): command list is closed or null\n"); + return; + } + + // Get the bind group descriptor from the device + auto& device = commandBuffer_.getDevice(); + const auto* desc = device.getBindGroupTextureDesc(handle); + if (!desc) { + IGL_LOG_ERROR("bindBindGroup(texture): Invalid handle or descriptor not found\n"); + return; + } + + // Delegate actual descriptor allocation and binding to D3D12ResourcesBinder + // so that bindBindGroup(texture) behaves like a grouped bindTexture/bindSamplerState. + for (uint32_t i = 0; i < IGL_TEXTURE_SAMPLERS_MAX; ++i) { + if (desc->textures[i]) { + resourcesBinder_.bindTexture(i, desc->textures[i].get()); + } + } + for (uint32_t i = 0; i < IGL_TEXTURE_SAMPLERS_MAX; ++i) { + if (desc->samplers[i]) { + resourcesBinder_.bindSamplerState(i, desc->samplers[i].get()); + } + } +} + +void RenderCommandEncoder::bindBindGroup(BindGroupBufferHandle handle, + uint32_t numDynamicOffsets, + const uint32_t* dynamicOffsets) { + IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): handle valid=%d, dynCount=%u\n", !handle.empty(), numDynamicOffsets); + + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("bindBindGroup(buffer): command list is closed or null\n"); + return; + } + + auto& device = commandBuffer_.getDevice(); + const auto* desc = device.getBindGroupBufferDesc(handle); + if (!desc) { + IGL_LOG_ERROR("bindBindGroup(buffer): Invalid handle or descriptor not found\n"); + return; + } + + // NEW PATH: + // Delegate all buffer bindings to D3D12ResourcesBinder so that CBVs/SRVs/UAVs are created and + // bound through a single, reflection-aware path. BindGroup slots map directly to shader + // registers (bN / tN / uN), just like Vulkan bindings. + { + uint32_t dynIdx = 0; + for (uint32_t slot = 0; slot < IGL_UNIFORM_BLOCKS_BINDING_MAX; ++slot) { + auto& bufferHandle = desc->buffers[slot]; + size_t baseOffset = desc->offset[slot]; + size_t size = desc->size[slot]; + + if ((desc->isDynamicBufferMask & (1u << slot)) != 0) { + if (dynIdx < numDynamicOffsets && dynamicOffsets) { + baseOffset = dynamicOffsets[dynIdx++]; + } + } + + if (!bufferHandle) { + // Unbind any previous buffer/UAV at this slot. + resourcesBinder_.bindBuffer(slot, nullptr, 0, 0, false, 0); + continue; + } + + auto* buf = static_cast(bufferHandle.get()); + const bool isUniform = + (buf->getBufferType() & BufferDesc::BufferTypeBits::Uniform) != 0; + const bool isStorage = + (buf->getBufferType() & BufferDesc::BufferTypeBits::Storage) != 0; + + // Track resource so its lifetime is tied to the command buffer. + commandBuffer_.trackTransientResource(buf->getResource()); + + if (isUniform) { + // For CBVs, size == 0 means "remaining bytes from offset". Respect explicit sizes when provided. + resourcesBinder_.bindBuffer(slot, buf, baseOffset, size, false, 0); + } else if (isStorage) { + // Storage buffer: delegate to UAV/SRV binding path in D3D12ResourcesBinder. + // Use the buffer's storage element stride when available; default to 4 bytes. + size_t elementStride = buf->getStorageElementStride(); + if (elementStride == 0) { + elementStride = 4; + } + resourcesBinder_.bindBuffer(slot, buf, baseOffset, size, true, elementStride); + } else { + IGL_LOG_ERROR( + "bindBindGroup(buffer): Buffer at slot %u is neither Uniform nor Storage\n", slot); + } + } + usedBindGroup_ = true; + return; + } + + auto* cmd = commandList_; + if (!cmd) { + IGL_LOG_ERROR("bindBindGroup(buffer): null command list\n"); + return; + } + + // CRITICAL: D3D12 descriptor tables MUST be contiguous in the descriptor heap. + // SetGraphicsRootDescriptorTable passes a GPU handle to the START of a contiguous block. + // D3D12 accesses descriptors using: baseHandle + tableOffset. + // + // Example: If BindGroupBufferDesc has buffers at slots 3-6: + // - We need descriptors at heap indices [base+0] through [base+6] + // - Slots 0-2 get NULL CBVs, slots 3-6 get real CBVs + // - SetGraphicsRootDescriptorTable receives handle to heap[base+0] + // - Shader accessing b3 reads from heap[base+3] + // + // First pass: Determine highest slot index to calculate total descriptor count + uint32_t maxSlotUsed = 0; + for (uint32_t slot = 0; slot < IGL_UNIFORM_BLOCKS_BINDING_MAX; ++slot) { + if (desc->buffers[slot]) { + auto* buf = static_cast(desc->buffers[slot].get()); + const bool isUniform = (buf->getBufferType() & BufferDesc::BufferTypeBits::Uniform) != 0; + if (isUniform) { + maxSlotUsed = slot; + } + } + } + + if (maxSlotUsed == 0 && !desc->buffers[0]) { + // No uniform buffers to bind + return; + } + + cbvTableCount_ = maxSlotUsed + 1; + + // Allocate a CONTIGUOUS block of descriptors for the entire descriptor table + uint32_t baseDescriptorIndex = 0; + Result allocResult = commandBuffer_.allocateCbvSrvUavRange(static_cast(cbvTableCount_), &baseDescriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR("bindBindGroup(buffer): Failed to allocate contiguous CBV descriptor range (%zu descriptors): %s\n", + cbvTableCount_, allocResult.message.c_str()); + return; + } + + auto& context = commandBuffer_.getContext(); + auto* d3d12Device = context.getDevice(); + + IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): Allocated contiguous CBV descriptor block: base=%u, count=%zu\n", + baseDescriptorIndex, cbvTableCount_); + + // Second pass: Create CBV descriptors in the contiguous block + uint32_t dynIdx = 0; + for (uint32_t slot = 0; slot < cbvTableCount_; ++slot) { + // Calculate descriptor index within the contiguous block + uint32_t descriptorIndex = baseDescriptorIndex + slot; + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex); + + if (!desc->buffers[slot]) { + // Create NULL CBV for empty slots + D3D12_CONSTANT_BUFFER_VIEW_DESC nullCbvDesc = {}; + nullCbvDesc.BufferLocation = 0; + nullCbvDesc.SizeInBytes = 256; // Minimum CBV alignment + d3d12Device->CreateConstantBufferView(&nullCbvDesc, cpuHandle); + + IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): Created NULL CBV at heap[%u] for empty slot %u\n", + descriptorIndex, slot); + continue; + } + + auto* buf = static_cast(desc->buffers[slot].get()); + const bool isUniform = (buf->getBufferType() & BufferDesc::BufferTypeBits::Uniform) != 0; + const bool isStorage = (buf->getBufferType() & BufferDesc::BufferTypeBits::Storage) != 0; + + // Track buffer resource to prevent it from being deleted while GPU address is cached + commandBuffer_.trackTransientResource(buf->getResource()); + + size_t baseOffset = desc->offset[slot]; + if ((desc->isDynamicBufferMask & (1u << slot)) != 0) { + if (dynIdx < numDynamicOffsets && dynamicOffsets) { + baseOffset = dynamicOffsets[dynIdx++]; + } + } + + if (isUniform) { + // 256B alignment required for CBVs + const size_t aligned = (baseOffset + 255) & ~size_t(255); + D3D12_GPU_VIRTUAL_ADDRESS addr = buf->gpuAddress(aligned); + + if (slot < IGL_BUFFER_BINDINGS_MAX) { + // Respect requested buffer size and enforce the 64 KB limit. + // If size[slot] is 0, use remaining buffer size from offset + size_t requestedSize = desc->size[slot]; + if (requestedSize == 0) { + requestedSize = buf->getSizeInBytes() - aligned; + } + + // D3D12 spec: Constant buffers must be ≤ 64 KB + constexpr size_t kMaxCBVSize = 65536; // 64 KB + if (requestedSize > kMaxCBVSize) { + IGL_LOG_ERROR("bindBindGroup(buffer): Constant buffer size (%zu bytes) exceeds D3D12 64 KB limit at slot %u\n", + requestedSize, slot); + continue; // Skip this binding + } + + // Create CBV descriptor in the contiguous block + D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {}; + cbvDesc.BufferLocation = addr; + cbvDesc.SizeInBytes = static_cast((requestedSize + 255) & ~255); // Must be 256-byte aligned + + // Pre-creation validation. + IGL_DEBUG_ASSERT(d3d12Device != nullptr, "Device is null before CreateConstantBufferView"); + IGL_DEBUG_ASSERT(addr != 0, "Buffer GPU address is null"); + IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "CBV descriptor handle is invalid"); + IGL_DEBUG_ASSERT(cbvDesc.SizeInBytes <= kMaxCBVSize, "CBV size exceeds 64 KB after alignment"); + + d3d12Device->CreateConstantBufferView(&cbvDesc, cpuHandle); + + IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): Created CBV at heap[%u] for slot %u (addr=0x%llx, size=%u)\n", + descriptorIndex, slot, addr, cbvDesc.SizeInBytes); + } else { + IGL_LOG_ERROR("bindBindGroup(buffer): BindGroupBufferDesc slot %u exceeds maximum (%u)\n", slot, IGL_BUFFER_BINDINGS_MAX); + } + } else if (isStorage) { + // Implement storage buffer binding via UAV/SRV descriptors. + auto& storageContext = commandBuffer_.getContext(); + auto* d3dDevice = storageContext.getDevice(); + ID3D12Resource* resource = buf->getResource(); + + // Determine if buffer is read-write (UAV) or read-only (SRV) + // D3D12 storage buffers with UAV flag are read-write by default + // Private/Shared storage indicates read-write access, Managed indicates read-only + const bool isReadWrite = (buf->storage() == ResourceStorage::Private || + buf->storage() == ResourceStorage::Shared); + + if (isReadWrite) { + // Create UAV for read-write storage buffer + // Uses Result-based allocation with dynamic heap growth. + uint32_t descriptorIndex = 0; + Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR("bindBindGroup(buffer): Failed to allocate UAV descriptor: %s\n", allocResult.message.c_str()); + continue; + } + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex); + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex); + + // Create UAV descriptor for structured buffer + // Use the storage stride from BufferDesc when available; default to 4 bytes otherwise. + size_t elementStride = buf->getStorageElementStride(); + if (elementStride == 0) { + elementStride = 4; + } + + // Validate baseOffset doesn't exceed buffer size + const size_t bufferSizeBytes = buf->getSizeInBytes(); + if (baseOffset > bufferSizeBytes) { + IGL_LOG_ERROR("bindBindGroup(buffer): baseOffset %zu exceeds buffer size %zu; skipping UAV binding\n", + baseOffset, bufferSizeBytes); + continue; + } + + if (baseOffset % elementStride != 0) { + IGL_LOG_ERROR("bindBindGroup(buffer): Storage buffer baseOffset %zu is not aligned to " + "element stride (%zu bytes). UAV FirstElement will be truncated (offset/stride).\n", + baseOffset, elementStride); + } + + const size_t remaining = bufferSizeBytes - baseOffset; + + // Check for undersized buffer (would create empty or partial view) + if (remaining < elementStride) { + IGL_LOG_ERROR("bindBindGroup(buffer): Remaining buffer size %zu is less than element stride %zu; " + "UAV will have NumElements=0 (empty view). Check buffer size and offset.\n", + remaining, elementStride); + // Continue to create the descriptor, but it will be empty (NumElements=0) + } + + D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {}; + uavDesc.Format = DXGI_FORMAT_UNKNOWN; + uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER; + uavDesc.Buffer.FirstElement = static_cast(baseOffset / elementStride); + // CRITICAL: NumElements must be (size - offset) / stride, not total size / stride + uavDesc.Buffer.NumElements = static_cast(remaining / elementStride); + uavDesc.Buffer.StructureByteStride = static_cast(elementStride); + uavDesc.Buffer.CounterOffsetInBytes = 0; + uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; + + // Pre-creation validation. + IGL_DEBUG_ASSERT(d3dDevice != nullptr, "Device is null before CreateUnorderedAccessView"); + IGL_DEBUG_ASSERT(resource != nullptr, "Buffer resource is null"); + IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "UAV descriptor handle is invalid"); + + d3dDevice->CreateUnorderedAccessView(resource, nullptr, &uavDesc, cpuHandle); + + // Bind UAV descriptor table using dynamic root parameter index from pipeline + const UINT uavTableIndex = currentRenderPipelineState_->getUAVTableRootParameterIndex(); + if (uavTableIndex != UINT_MAX) { + commandList_->SetGraphicsRootDescriptorTable(uavTableIndex, gpuHandle); + IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): bound read-write storage buffer at slot %u (UAV u%u, root param %u, GPU handle 0x%llx)\n", + slot, slot, uavTableIndex, gpuHandle.ptr); + } else { + IGL_LOG_ERROR("bindBindGroup(buffer): Pipeline has no UAV table root parameter for storage buffer binding\n"); + } + } else { + // Create SRV for read-only storage buffer + // Uses Result-based allocation with dynamic heap growth. + uint32_t descriptorIndex = 0; + Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex); + if (!allocResult.isOk()) { + IGL_LOG_ERROR("bindBindGroup(buffer): Failed to allocate SRV descriptor: %s\n", allocResult.message.c_str()); + continue; + } + D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex); + D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex); + + // Create SRV descriptor for structured buffer + size_t elementStride = buf->getStorageElementStride(); + if (elementStride == 0) { + elementStride = 4; + } + + // Validate baseOffset doesn't exceed buffer size + const size_t bufferSizeBytes = buf->getSizeInBytes(); + if (baseOffset > bufferSizeBytes) { + IGL_LOG_ERROR("bindBindGroup(buffer): baseOffset %zu exceeds buffer size %zu; skipping SRV binding\n", + baseOffset, bufferSizeBytes); + continue; + } + + if (baseOffset % elementStride != 0) { + IGL_LOG_ERROR("bindBindGroup(buffer): Storage buffer baseOffset %zu is not aligned to " + "element stride (%zu bytes). SRV FirstElement will be truncated (offset/stride).\n", + baseOffset, elementStride); + } + + const size_t remaining = bufferSizeBytes - baseOffset; + + // Check for undersized buffer (would create empty or partial view) + if (remaining < elementStride) { + IGL_LOG_ERROR("bindBindGroup(buffer): Remaining buffer size %zu is less than element stride %zu; " + "SRV will have NumElements=0 (empty view). Check buffer size and offset.\n", + remaining, elementStride); + // Continue to create the descriptor, but it will be empty (NumElements=0) + } + + D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {}; + srvDesc.Format = DXGI_FORMAT_UNKNOWN; + srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER; + srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srvDesc.Buffer.FirstElement = static_cast(baseOffset / elementStride); + // CRITICAL: NumElements must be (size - offset) / stride, not total size / stride + srvDesc.Buffer.NumElements = static_cast(remaining / elementStride); + srvDesc.Buffer.StructureByteStride = static_cast(elementStride); + srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE; + + // Pre-creation validation. + IGL_DEBUG_ASSERT(d3dDevice != nullptr, "Device is null before CreateShaderResourceView"); + IGL_DEBUG_ASSERT(resource != nullptr, "Buffer resource is null"); + IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "SRV descriptor handle is invalid"); + + d3dDevice->CreateShaderResourceView(resource, &srvDesc, cpuHandle); + + // Bind SRV descriptor table (graphics root parameter 4: SRV table) + // Note: This shares the texture SRV table; storage buffers and textures will be bound together. + // PRECEDENCE: Storage buffer SRVs bound here will override any previous texture SRVs bound via + // Bind SRV descriptor table using dynamic root parameter index from pipeline + // This may rebind the SRV table that was previously set by bindBindGroup(texture). The last + // SetGraphicsRootDescriptorTable call wins - storage buffer bindings take precedence. + const UINT srvTableIndex = currentRenderPipelineState_->getSRVTableRootParameterIndex(); + if (srvTableIndex != UINT_MAX) { + commandList_->SetGraphicsRootDescriptorTable(srvTableIndex, gpuHandle); + IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): bound read-only storage buffer at slot %u (SRV t%u, root param %u, GPU handle 0x%llx)\n", + slot, slot, srvTableIndex, gpuHandle.ptr); + } else { + IGL_LOG_ERROR("bindBindGroup(buffer): Pipeline has no SRV table root parameter for storage buffer binding\n"); + } + } + } + } + + // Store the GPU handle of the FIRST descriptor in the contiguous block. + // SetGraphicsRootDescriptorTable will use this handle, and D3D12 will access + // subsequent descriptors using: baseHandle + tableOffset. + D3D12_GPU_DESCRIPTOR_HANDLE baseGpuHandle = context.getCbvSrvUavGpuHandle(baseDescriptorIndex); + cachedCbvTableGpuHandles_[0] = baseGpuHandle; + + IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): Stored base GPU handle 0x%llx for CBV table (spans heap[%u] to heap[%u])\n", + baseGpuHandle.ptr, baseDescriptorIndex, baseDescriptorIndex + cbvTableCount_ - 1); + + // Mark that bindBindGroup was used (vs storage buffer SRV or binder paths). + usedBindGroup_ = true; +} + +// G-001: Barrier batching implementation +void RenderCommandEncoder::flushBarriers() { + if (pendingBarriers_.empty()) { + return; + } + if (!commandBuffer_.isRecording() || !commandList_) { + IGL_LOG_ERROR("RenderCommandEncoder::flushBarriers called on closed command list; clearing pending barriers\n"); + pendingBarriers_.clear(); + return; + } + + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Flushing %zu batched resource barriers\n", + pendingBarriers_.size()); + + // Submit all pending barriers in a single API call + commandList_->ResourceBarrier(static_cast(pendingBarriers_.size()), + pendingBarriers_.data()); + + // Clear the pending barrier queue + pendingBarriers_.clear(); +} + +void RenderCommandEncoder::queueBarrier(const D3D12_RESOURCE_BARRIER& barrier) { + pendingBarriers_.push_back(barrier); + IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Queued barrier (total pending: %zu)\n", + pendingBarriers_.size()); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/RenderCommandEncoder.h b/src/igl/d3d12/RenderCommandEncoder.h new file mode 100644 index 0000000000..3965098dea --- /dev/null +++ b/src/igl/d3d12/RenderCommandEncoder.h @@ -0,0 +1,213 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +class CommandBuffer; + +/** + * @brief D3D12 implementation of render command encoder + * + * IMPORTANT BINDING PRECEDENCE NOTES: + * ==================================== + * This encoder supports multiple ways to bind shader resources (textures, buffers, samplers). + * Some binding methods share the same D3D12 root parameters, which means the LAST binding wins: + * + * 1. SRV Table (Root Parameter 4): + * - Textures bound via bindTexture() or D3D12ResourcesBinder + * - Storage buffers (read-only) bound via bindBindGroup(BindGroupBufferHandle) + * - If you bind BOTH textures and storage buffer SRVs, the last binding before draw() wins + * - Application code must coordinate which binding method to use per draw call + * + * 2. Sampler Table (Root Parameter 5): + * - Samplers bound via bindSamplerState() or D3D12ResourcesBinder + * + * 3. CBV Table (Root Parameter 3): + * - Constant buffers b3-b15 bound via bindBindGroup(BindGroupBufferHandle) + * + * See individual binding method documentation for details. + */ +class RenderCommandEncoder final : public IRenderCommandEncoder { + public: + RenderCommandEncoder(CommandBuffer& commandBuffer, + const std::shared_ptr& framebuffer); + ~RenderCommandEncoder() override = default; + + // Initialize encoder and setup render targets + // IMPORTANT: Must be called exactly once after construction by CommandBuffer::createRenderCommandEncoder. + // Calling multiple times will result in resource leaks and undefined behavior. + // Debug builds will assert if called more than once. + void begin(const RenderPassDesc& renderPass); + + void endEncoding() override; + + void bindViewport(const Viewport& viewport) override; + void bindScissorRect(const ScissorRect& rect) override; + void bindRenderPipelineState(const std::shared_ptr& pipelineState) override; + void bindDepthStencilState(const std::shared_ptr& depthStencilState) override; + + void bindVertexBuffer(uint32_t index, IBuffer& buffer, size_t bufferOffset = 0) override; + void bindIndexBuffer(IBuffer& buffer, IndexFormat format, size_t bufferOffset = 0) override; + + void bindBytes(size_t index, uint8_t target, const void* data, size_t length) override; + void bindPushConstants(const void* data, size_t length, size_t offset = 0) override; + void bindSamplerState(size_t index, uint8_t target, ISamplerState* samplerState) override; + void bindTexture(size_t index, uint8_t target, ITexture* texture) override; + void bindTexture(size_t index, ITexture* texture) override; + void bindUniform(const UniformDesc& uniformDesc, const void* data) override; + + void draw(size_t vertexCount, + uint32_t instanceCount = 1, + uint32_t firstVertex = 0, + uint32_t baseInstance = 0) override; + void drawIndexed(size_t indexCount, + uint32_t instanceCount = 1, + uint32_t firstIndex = 0, + int32_t vertexOffset = 0, + uint32_t baseInstance = 0) override; + void drawMeshTasks(const Dimensions& threadgroupsPerGrid, + const Dimensions& threadsPerTaskThreadgroup, + const Dimensions& threadsPerMeshThreadgroup) override; + void multiDrawIndirect(IBuffer& indirectBuffer, + size_t indirectBufferOffset, + uint32_t drawCount, + uint32_t stride = 0) override; + void multiDrawIndexedIndirect(IBuffer& indirectBuffer, + size_t indirectBufferOffset, + uint32_t drawCount, + uint32_t stride = 0) override; + + void setStencilReferenceValue(uint32_t value) override; + void setBlendColor(const Color& color) override; + void setDepthBias(float depthBias, float slopeScale, float clamp) override; + + // ICommandEncoder interface + void pushDebugGroupLabel(const char* label, const Color& color) const override; + void insertDebugEventLabel(const char* label, const Color& color) const override; + void popDebugGroupLabel() const override; + + // Additional IRenderCommandEncoder interface + void bindBuffer(uint32_t index, + uint8_t target, + IBuffer* buffer, + size_t bufferOffset = 0, + size_t bufferSize = 0) override; + void bindBuffer(uint32_t index, IBuffer* buffer, size_t offset, size_t bufferSize) override; + void bindBindGroup(BindGroupTextureHandle handle) override; + void bindBindGroup(BindGroupBufferHandle handle, + uint32_t numDynamicOffsets, + const uint32_t* dynamicOffsets) override; + + private: + CommandBuffer& commandBuffer_; + ID3D12GraphicsCommandList* commandList_; + + // Centralized resource binding management. + D3D12ResourcesBinder resourcesBinder_; + + // Guard against multiple begin() calls. + // begin() allocates RTV/DSV descriptors and sets up state that should only happen once + bool hasBegun_ = false; + + // Cache current vertex stride from bound pipeline's input layout + UINT currentVertexStride_ = 0; + // Optional per-slot strides fetched from pipeline + UINT vertexStrides_[IGL_BUFFER_BINDINGS_MAX] = {}; + + // Offscreen RTV/DSV support + std::shared_ptr framebuffer_; + // If DescriptorHeapManager is available, we borrow indices from its heaps. + // Otherwise, we fall back to small ad-hoc heaps (constructor local scope). + std::vector rtvIndices_; + uint32_t dsvIndex_ = UINT32_MAX; + D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle_{}; + + // Per-frame descriptor heaps (set in constructor from D3D12Context) + // CRITICAL: These MUST be per-frame isolated heaps, NOT shared DescriptorHeapManager heaps + ID3D12DescriptorHeap* cbvSrvUavHeap_ = nullptr; + ID3D12DescriptorHeap* samplerHeap_ = nullptr; + + // Cached descriptor table GPU handles + // These are set by bindTexture/bindSamplerState and used in drawIndexed + // to avoid invalidation by multiple SetDescriptorHeaps calls + // IMPORTANT: Bindings must be DENSE and start at slot 0 for each table. + // SetGraphicsRootDescriptorTable always uses cachedTextureGpuHandles_[0] as the base, + // so binding only higher slots (e.g., slot 1 without slot 0) will fail. + D3D12_GPU_DESCRIPTOR_HANDLE cachedTextureGpuHandle_{}; + D3D12_GPU_DESCRIPTOR_HANDLE cachedSamplerGpuHandle_{}; + // Support up to IGL_TEXTURE_SAMPLERS_MAX textures/samplers (t0-t15, s0-s15) + D3D12_GPU_DESCRIPTOR_HANDLE cachedTextureGpuHandles_[IGL_TEXTURE_SAMPLERS_MAX] = {}; + D3D12_GPU_DESCRIPTOR_HANDLE cachedSamplerGpuHandles_[IGL_TEXTURE_SAMPLERS_MAX] = {}; + size_t cachedTextureCount_ = 0; + size_t cachedSamplerCount_ = 0; + + // Track whether bindBindGroup was explicitly called (vs storage buffer SRV or binder paths) + // This decouples bindBindGroup usage from cachedTextureCount_/cachedSamplerCount_ + bool usedBindGroup_ = false; + + // Cached vertex buffer bindings + // Store binding info and apply in draw calls after pipeline state is bound + struct CachedVertexBuffer { + D3D12_GPU_VIRTUAL_ADDRESS bufferLocation = 0; + UINT sizeInBytes = 0; + bool bound = false; + }; + CachedVertexBuffer cachedVertexBuffers_[IGL_BUFFER_BINDINGS_MAX] = {}; + + // Cached index buffer binding + struct CachedIndexBuffer { + D3D12_GPU_VIRTUAL_ADDRESS bufferLocation = 0; + UINT sizeInBytes = 0; + DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN; + bool bound = false; + }; + CachedIndexBuffer cachedIndexBuffer_ = {}; + + // Track which constant buffer root parameters have been bound + // D3D12 requires all root parameters to be set before drawing + // Root parameter 1 = b0 (UniformsPerFrame) - root descriptor + // Root parameter 2 = b1 (UniformsPerObject) - root descriptor + D3D12_GPU_VIRTUAL_ADDRESS cachedConstantBuffers_[2] = {0, 0}; // b0, b1 + bool constantBufferBound_[2] = {false, false}; + + // Cached CBV descriptor table for b2-b15 (root parameter 3) + // Supports up to 14 additional uniform buffers via descriptor table + D3D12_GPU_DESCRIPTOR_HANDLE cachedCbvTableGpuHandles_[IGL_BUFFER_BINDINGS_MAX] = {}; + bool cbvTableBound_[IGL_BUFFER_BINDINGS_MAX] = {}; + size_t cbvTableCount_ = 0; + + // G-001: Barrier batching infrastructure + // Accumulates resource barriers and flushes them before draw/dispatch calls + // This reduces D3D12 API overhead and allows driver optimization + std::vector pendingBarriers_; + + // Flushes all pending barriers to the command list + void flushBarriers(); + + // Queue a barrier for batched submission + void queueBarrier(const D3D12_RESOURCE_BARRIER& barrier); + + // Dynamic PSO selection (Vulkan-style pattern) + // Stores actual framebuffer formats captured in begin() + // Used to select correct PSO variant at draw time + D3D12RenderPipelineDynamicState dynamicState_; + + // Cached render pipeline state for dynamic PSO variant selection + const RenderPipelineState* currentRenderPipelineState_ = nullptr; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/RenderPipelineState.cpp b/src/igl/d3d12/RenderPipelineState.cpp new file mode 100644 index 0000000000..8927277118 --- /dev/null +++ b/src/igl/d3d12/RenderPipelineState.cpp @@ -0,0 +1,338 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +RenderPipelineState::RenderPipelineState(const RenderPipelineDesc& desc, + igl::d3d12::ComPtr pipelineState, + igl::d3d12::ComPtr rootSignature) + : IRenderPipelineState(desc), + pipelineState_(std::move(pipelineState)), + rootSignature_(std::move(rootSignature)) { + // Set D3D12 object names for PIX debugging + const std::string& debugName = desc.debugName.toString(); + if (pipelineState_.Get() && !debugName.empty()) { + std::wstring wideName(debugName.begin(), debugName.end()); + pipelineState_->SetName((L"PSO_" + wideName).c_str()); + IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set PIX debug name 'PSO_%s'\n", debugName.c_str()); + } + if (rootSignature_.Get() && !debugName.empty()) { + std::wstring wideName(debugName.begin(), debugName.end()); + rootSignature_->SetName((L"RootSig_" + wideName).c_str()); + IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set PIX root signature name 'RootSig_%s'\n", debugName.c_str()); + } + + // Extract shader reflection info for future dynamic binding support + if (desc.shaderStages) { + auto* vertexModule = static_cast(desc.shaderStages->getVertexModule().get()); + auto* fragmentModule = static_cast(desc.shaderStages->getFragmentModule().get()); + + // Prefer vertex shader for push constants if both define them + if (vertexModule) { + const auto& vsReflection = vertexModule->getReflectionInfo(); + if (vsReflection.hasPushConstants) { + shaderReflection_.hasPushConstants = true; + shaderReflection_.pushConstantSlot = vsReflection.pushConstantSlot; + shaderReflection_.pushConstantSize = vsReflection.pushConstantSize; + shaderReflection_.pushConstantRootParamIndex = 0; // Push constants are always root parameter 0 + IGL_D3D12_LOG_VERBOSE("RenderPipelineState: VS push constants at b%u (%u DWORDs, root param %u)\n", + vsReflection.pushConstantSlot, vsReflection.pushConstantSize, + shaderReflection_.pushConstantRootParamIndex); + } + } + + // Use fragment shader push constants if vertex shader doesn't have them + if (!shaderReflection_.hasPushConstants && fragmentModule) { + const auto& psReflection = fragmentModule->getReflectionInfo(); + if (psReflection.hasPushConstants) { + shaderReflection_.hasPushConstants = true; + shaderReflection_.pushConstantSlot = psReflection.pushConstantSlot; + shaderReflection_.pushConstantSize = psReflection.pushConstantSize; + shaderReflection_.pushConstantRootParamIndex = 0; // Push constants are always root parameter 0 + IGL_D3D12_LOG_VERBOSE("RenderPipelineState: PS push constants at b%u (%u DWORDs, root param %u)\n", + psReflection.pushConstantSlot, psReflection.pushConstantSize, + shaderReflection_.pushConstantRootParamIndex); + } + } + } + + // Convert IGL primitive topology to D3D12 primitive topology + switch (desc.topology) { + case PrimitiveType::Point: + primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; + IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set topology to POINTLIST\n"); + break; + case PrimitiveType::Line: + primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_LINELIST; + IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set topology to LINELIST\n"); + break; + case PrimitiveType::LineStrip: + primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP; + IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set topology to LINESTRIP\n"); + break; + case PrimitiveType::Triangle: + primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set topology to TRIANGLELIST\n"); + break; + case PrimitiveType::TriangleStrip: + primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP; + IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set topology to TRIANGLESTRIP\n"); + break; + } + + // Cache the vertex stride from the vertex input state binding (slot 0) if available + const auto& vis = desc.vertexInputState; + if (vis) { + // Try backend downcast to extract VertexInputStateDesc + if (auto* d3dVis = dynamic_cast(vis.get())) { + const auto& d = d3dVis->getDesc(); + if (d.numInputBindings > 0) { + vertexStride_ = static_cast(d.inputBindings[0].stride); + // Cache per-slot strides + for (size_t s = 0; s < d.numInputBindings && s < IGL_BUFFER_BINDINGS_MAX; ++s) { + vertexStrides_[s] = static_cast(d.inputBindings[s].stride); + } + // If attributes reference slots beyond numInputBindings or strides are zero, + // derive reasonable defaults so sessions that bind to slot 1 still work. + size_t maxSlot = 0; + for (size_t i = 0; i < d.numAttributes; ++i) { + if (d.attributes[i].bufferIndex > maxSlot) { + maxSlot = d.attributes[i].bufferIndex; + } + } + // Helper to compute a minimal stride per slot from attributes (max end offset among attrs in that slot) + auto computeStrideForSlot = [&](size_t slot) -> uint32_t { + size_t maxEnd = 0; + for (size_t i = 0; i < d.numAttributes; ++i) { + const auto& a = d.attributes[i]; + if (a.bufferIndex != slot) continue; + size_t compSize = 0; + switch (a.format) { + case VertexAttributeFormat::Float1: compSize = 4; break; + case VertexAttributeFormat::Float2: compSize = 8; break; + case VertexAttributeFormat::Float3: compSize = 12; break; + case VertexAttributeFormat::Float4: compSize = 16; break; + case VertexAttributeFormat::Byte1: compSize = 1; break; + case VertexAttributeFormat::Byte2: compSize = 2; break; + case VertexAttributeFormat::Byte4: compSize = 4; break; + case VertexAttributeFormat::UByte4Norm: compSize = 4; break; + default: compSize = 0; break; + } + maxEnd = std::max(maxEnd, a.offset + compSize); + } + // Fallback to slot0 stride if present + if (maxEnd == 0 && d.numInputBindings > 0) { + return static_cast(d.inputBindings[0].stride); + } + return static_cast(maxEnd); + }; + for (size_t s = 0; s <= maxSlot && s < IGL_BUFFER_BINDINGS_MAX; ++s) { + if (vertexStrides_[s] == 0) { + vertexStrides_[s] = computeStrideForSlot(s); + } + } + if (vertexStride_ == 0) { + vertexStride_ = vertexStrides_[0]; + } + } + } + } +} + +std::shared_ptr RenderPipelineState::renderPipelineReflection() { + if (reflection_) { + return reflection_; + } + + struct ReflectionImpl final : public IRenderPipelineReflection { + std::vector ubs; + std::vector samplers; + std::vector textures; + const std::vector& allUniformBuffers() const override { return ubs; } + const std::vector& allSamplers() const override { return samplers; } + const std::vector& allTextures() const override { return textures; } + }; + + auto out = std::make_shared(); + + auto reflectShader = [&](const std::shared_ptr& mod, ShaderStage stage) { + if (!mod) return; + auto* d3dMod = dynamic_cast(mod.get()); + if (!d3dMod) return; + const auto& bc = d3dMod->getBytecode(); + if (bc.empty()) return; + igl::d3d12::ComPtr refl; + if (FAILED(D3DReflect(bc.data(), bc.size(), IID_PPV_ARGS(refl.GetAddressOf())))) return; + D3D12_SHADER_DESC sd{}; + if (FAILED(refl->GetDesc(&sd))) return; + + // Constant buffers + for (UINT i = 0; i < sd.ConstantBuffers; ++i) { + auto* cb = refl->GetConstantBufferByIndex(i); + D3D12_SHADER_BUFFER_DESC cbd{}; if (FAILED(cb->GetDesc(&cbd))) continue; + int bufferIndex = -1; + for (UINT r = 0; r < sd.BoundResources; ++r) { + D3D12_SHADER_INPUT_BIND_DESC bind{}; + if (SUCCEEDED(refl->GetResourceBindingDesc(r, &bind))) { + if (bind.Type == D3D_SIT_CBUFFER && std::string(bind.Name) == std::string(cbd.Name)) { + bufferIndex = static_cast(bind.BindPoint); + break; + } + } + } + BufferArgDesc ub; + ub.name = igl::genNameHandle(cbd.Name ? cbd.Name : ""); + ub.bufferAlignment = 256; + ub.bufferDataSize = cbd.Size; + ub.bufferIndex = bufferIndex; + ub.shaderStage = stage; + ub.isUniformBlock = true; + for (UINT v = 0; v < cbd.Variables; ++v) { + auto* var = cb->GetVariableByIndex(v); + D3D12_SHADER_VARIABLE_DESC vd{}; if (FAILED(var->GetDesc(&vd))) continue; + auto* t = var->GetType(); if (!t) continue; + D3D12_SHADER_TYPE_DESC td{}; if (FAILED(t->GetDesc(&td))) continue; + BufferArgDesc::BufferMemberDesc m; + m.name = igl::genNameHandle(vd.Name ? vd.Name : ""); + m.type = ReflectionUtils::mapUniformType(td); + m.offset = vd.StartOffset; + m.arrayLength = td.Elements ? td.Elements : 1; + ub.members.push_back(std::move(m)); + } + out->ubs.push_back(std::move(ub)); + } + + // Textures and samplers + for (UINT r = 0; r < sd.BoundResources; ++r) { + D3D12_SHADER_INPUT_BIND_DESC bind{}; + if (FAILED(refl->GetResourceBindingDesc(r, &bind))) continue; + if (bind.Type == D3D_SIT_TEXTURE) { + TextureArgDesc t; t.name = bind.Name ? bind.Name : ""; t.type = TextureType::TwoD; t.textureIndex = bind.BindPoint; t.shaderStage = stage; out->textures.push_back(std::move(t)); + } else if (bind.Type == D3D_SIT_SAMPLER) { + SamplerArgDesc s; s.name = bind.Name ? bind.Name : ""; s.samplerIndex = bind.BindPoint; s.shaderStage = stage; out->samplers.push_back(std::move(s)); + } + } + }; + + if (auto stages = getRenderPipelineDesc().shaderStages) { + reflectShader(stages->getVertexModule(), ShaderStage::Vertex); + reflectShader(stages->getFragmentModule(), ShaderStage::Fragment); + } + + reflection_ = out; + return reflection_; +} + +void RenderPipelineState::setRenderPipelineReflection( + const IRenderPipelineReflection& /*renderPipelineReflection*/) {} + +int RenderPipelineState::getIndexByName(const igl::NameHandle& /*name*/, + ShaderStage /*stage*/) const { + return -1; +} + +int RenderPipelineState::getIndexByName(const std::string& /*name*/, + ShaderStage /*stage*/) const { + return -1; +} + +ID3D12PipelineState* RenderPipelineState::getPipelineState( + const D3D12RenderPipelineDynamicState& dynamicState, + Device& device) const { + // Fast path: Check if dynamic state matches base PSO + // This happens when pipeline was created with same formats as framebuffer + const auto& desc = getRenderPipelineDesc(); + bool matchesBasePSO = true; + + // Check render target formats + const UINT numRTs = static_cast( + std::min(desc.targetDesc.colorAttachments.size(), + D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT)); + for (UINT i = 0; i < numRTs; ++i) { + if (dynamicState.rtvFormats[i] != + textureFormatToDXGIFormat(desc.targetDesc.colorAttachments[i].textureFormat)) { + matchesBasePSO = false; + break; + } + } + + // Check depth-stencil format + if (matchesBasePSO) { + const DXGI_FORMAT baseDSVFormat = + (desc.targetDesc.depthAttachmentFormat != TextureFormat::Invalid) + ? textureFormatToDXGIFormat(desc.targetDesc.depthAttachmentFormat) + : DXGI_FORMAT_UNKNOWN; + if (dynamicState.dsvFormat != baseDSVFormat) { + matchesBasePSO = false; + } + } + + // Return base PSO if formats match + if (matchesBasePSO) { + return pipelineState_.Get(); + } + + // Check variant cache + auto it = psoVariants_.find(dynamicState); + if (it != psoVariants_.end()) { + return it->second.Get(); + } + + // Create PSO variant with substituted formats (Vulkan-style on-demand creation) + IGL_LOG_INFO("Creating PSO variant: RTV[0]=%d (base) -> %d (framebuffer)\n", + textureFormatToDXGIFormat(desc.targetDesc.colorAttachments[0].textureFormat), + dynamicState.rtvFormats[0]); + + // Following Vulkan's approach: create modified RenderPipelineDesc with substituted formats + // Create a modified descriptor with framebuffer formats substituted + RenderPipelineDesc variantDesc = desc; // Copy all state + + // Substitute RT formats from actual framebuffer + for (UINT i = 0; i < numRTs; ++i) { + if (dynamicState.rtvFormats[i] != DXGI_FORMAT_UNKNOWN) { + // Convert DXGI format back to IGL TextureFormat + variantDesc.targetDesc.colorAttachments[i].textureFormat = + dxgiFormatToTextureFormat(dynamicState.rtvFormats[i]); + IGL_LOG_INFO(" RTV[%u]: substituted format %d\n", i, dynamicState.rtvFormats[i]); + } + } + + // Substitute DSV format if present + if (dynamicState.dsvFormat != DXGI_FORMAT_UNKNOWN) { + variantDesc.targetDesc.depthAttachmentFormat = + dxgiFormatToTextureFormat(dynamicState.dsvFormat); + } + + // Call Device::createPipelineStateVariant() to create PSO with modified formats + Result variantResult; + auto variantPSO = device.createPipelineStateVariant( + variantDesc, rootSignature_.Get(), &variantResult); + + if (!variantPSO.Get()) { + IGL_LOG_ERROR("PSO variant creation failed: %s\n", variantResult.message.c_str()); + IGL_LOG_ERROR("Falling back to base PSO (this will cause D3D12 validation errors!)\n"); + return pipelineState_.Get(); // Fallback to base PSO + } + + // Cache the variant for future use + psoVariants_[dynamicState] = variantPSO; + IGL_LOG_INFO("PSO variant created and cached successfully: PSO=%p\n", variantPSO.Get()); + + return variantPSO.Get(); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/RenderPipelineState.h b/src/igl/d3d12/RenderPipelineState.h new file mode 100644 index 0000000000..768dff68e6 --- /dev/null +++ b/src/igl/d3d12/RenderPipelineState.h @@ -0,0 +1,159 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace igl::d3d12 { + +class Device; // Forward declaration + +/** + * @brief Encapsulates dynamic render state that affects PSO selection + * + * Following Vulkan's RenderPipelineDynamicState pattern, this structure serves as a hash key + * for PSO variant caching. D3D12 PSOs are immutable and must match the exact render target + * formats at draw time. + * + * Key differences from Vulkan: + * - Vulkan: renderPassIndex_ encodes all render pass compatibility (formats + load/store ops) + * - D3D12: We only need render target formats (no render pass object exists) + * + * The structure is designed for efficient hashing and comparison: + * - Packed into fixed-size array for fast memcmp + * - Zero-initialized padding for consistent hashing + */ +struct D3D12RenderPipelineDynamicState { + // Render target formats (up to 8 MRT targets) + std::array rtvFormats; + // Depth-stencil format + DXGI_FORMAT dsvFormat; + + D3D12RenderPipelineDynamicState() { + rtvFormats.fill(DXGI_FORMAT_UNKNOWN); + dsvFormat = DXGI_FORMAT_UNKNOWN; + } + + bool operator==(const D3D12RenderPipelineDynamicState& other) const { + return rtvFormats == other.rtvFormats && dsvFormat == other.dsvFormat; + } + + struct HashFunction { + size_t operator()(const D3D12RenderPipelineDynamicState& s) const { + size_t hash = 0; + for (const auto& fmt : s.rtvFormats) { + hash ^= std::hash{}(fmt) + 0x9e3779b9 + (hash << 6) + (hash >> 2); + } + hash ^= std::hash{}(s.dsvFormat) + 0x9e3779b9 + (hash << 6) + (hash >> 2); + return hash; + } + }; +}; + +class RenderPipelineState final : public IRenderPipelineState { + public: + RenderPipelineState(const RenderPipelineDesc& desc, + igl::d3d12::ComPtr pipelineState, + igl::d3d12::ComPtr rootSignature); + ~RenderPipelineState() override = default; + + std::shared_ptr renderPipelineReflection() override; + void setRenderPipelineReflection( + const IRenderPipelineReflection& renderPipelineReflection) override; + int getIndexByName(const igl::NameHandle& name, ShaderStage stage) const override; + int getIndexByName(const std::string& name, ShaderStage stage) const override; + + // D3D12-specific accessors + ID3D12PipelineState* getPipelineState() const { return pipelineState_.Get(); } + + /** + * @brief Get PSO variant for specific render target formats (Vulkan-style dynamic PSO selection) + * + * This method follows Vulkan's getVkPipeline(dynamicState) pattern to create PSO variants + * on-demand based on actual framebuffer formats. D3D12 PSOs are immutable and must exactly + * match render target formats at creation time. + * + * @param dynamicState Contains actual framebuffer RTVformats and DSV format at draw time + * @param device IGL D3D12 device for PSO creation + * @return PSO variant matching the requested formats, or nullptr on error + */ + ID3D12PipelineState* getPipelineState(const D3D12RenderPipelineDynamicState& dynamicState, + Device& device) const; + + ID3D12RootSignature* getRootSignature() const { return rootSignature_.Get(); } + uint32_t getVertexStride() const { return vertexStride_; } + uint32_t getVertexStride(size_t slot) const { return (slot < IGL_BUFFER_BINDINGS_MAX) ? vertexStrides_[slot] : 0; } + D3D_PRIMITIVE_TOPOLOGY getPrimitiveTopology() const { return primitiveTopology_; } + + // Query push constant binding info from shader reflection + bool hasPushConstants() const { return shaderReflection_.hasPushConstants; } + UINT getPushConstantSlot() const { return shaderReflection_.pushConstantSlot; } + UINT getPushConstantRootParameterIndex() const { return shaderReflection_.pushConstantRootParamIndex; } + + // Query root parameter layout (dynamic based on shader reflection) + UINT getCBVTableRootParameterIndex() const { return rootParamLayout_.cbvTableIndex; } + UINT getSRVTableRootParameterIndex() const { return rootParamLayout_.srvTableIndex; } + UINT getSamplerTableRootParameterIndex() const { return rootParamLayout_.samplerTableIndex; } + UINT getUAVTableRootParameterIndex() const { return rootParamLayout_.uavTableIndex; } + + // Query descriptor range sizes (how many descriptors the root signature expects) + UINT getCBVDescriptorCount() const { return rootParamLayout_.cbvDescriptorCount; } + UINT getSRVDescriptorCount() const { return rootParamLayout_.srvDescriptorCount; } + UINT getSamplerDescriptorCount() const { return rootParamLayout_.samplerDescriptorCount; } + UINT getUAVDescriptorCount() const { return rootParamLayout_.uavDescriptorCount; } + + private: + friend class Device; // Device needs access to create PSO variants + + // Base PSO created from RenderPipelineDesc (may not match actual framebuffer formats) + igl::d3d12::ComPtr pipelineState_; + igl::d3d12::ComPtr rootSignature_; + std::shared_ptr reflection_; + uint32_t vertexStride_ = 0; + uint32_t vertexStrides_[IGL_BUFFER_BINDINGS_MAX] = {}; + D3D_PRIMITIVE_TOPOLOGY primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST; + + // PSO variant cache following Vulkan's pattern + // Maps framebuffer formats → PSO variant + mutable std::unordered_map, + D3D12RenderPipelineDynamicState::HashFunction> + psoVariants_; + + // Shader reflection info for dynamic resource binding + // Stores merged reflection data from vertex + fragment shaders + struct { + bool hasPushConstants = false; + UINT pushConstantSlot = UINT_MAX; + UINT pushConstantSize = 0; + UINT pushConstantRootParamIndex = 0; // Root parameter index for push constants in root signature + } shaderReflection_; + + // Root parameter layout (dynamically computed from shader reflection) + // These indices tell encoders which root parameter to use for each resource type + // Pure reflection-based approach - no hardcoded assumptions + struct { + UINT cbvTableIndex = UINT_MAX; // CBV descriptor table + UINT srvTableIndex = UINT_MAX; // SRV descriptor table + UINT samplerTableIndex = UINT_MAX; // Sampler descriptor table + UINT uavTableIndex = UINT_MAX; // UAV descriptor table + + // Descriptor range sizes (from root signature, 0 to maxSlot inclusive) + // These define how many descriptors the root signature expects in each table + // ResourcesBinder must allocate exactly these counts to match the root signature + UINT cbvDescriptorCount = 0; // Number of CBV descriptors (0 to maxCBVSlot) + UINT srvDescriptorCount = 0; // Number of SRV descriptors (0 to maxSRVSlot) + UINT samplerDescriptorCount = 0; // Number of sampler descriptors (0 to maxSamplerSlot) + UINT uavDescriptorCount = 0; // Number of UAV descriptors (0 to maxUAVSlot) + } rootParamLayout_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/SamplerState.cpp b/src/igl/d3d12/SamplerState.cpp new file mode 100644 index 0000000000..9e5757eafd --- /dev/null +++ b/src/igl/d3d12/SamplerState.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +namespace igl::d3d12 { + +size_t SamplerState::hash() const noexcept { + size_t h = 0; + + // Hash all D3D12_SAMPLER_DESC fields using the same technique as Device.cpp + // Magic constant 0x9e3779b9 is the golden ratio used for hash mixing + h ^= std::hash{}(static_cast(desc_.Filter)) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(static_cast(desc_.AddressU)) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(static_cast(desc_.AddressV)) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(static_cast(desc_.AddressW)) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(desc_.MipLODBias) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(desc_.MaxAnisotropy) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(static_cast(desc_.ComparisonFunc)) + 0x9e3779b9 + (h << 6) + (h >> 2); + + // Hash border color array + h ^= std::hash{}(desc_.BorderColor[0]) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(desc_.BorderColor[1]) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(desc_.BorderColor[2]) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(desc_.BorderColor[3]) + 0x9e3779b9 + (h << 6) + (h >> 2); + + h ^= std::hash{}(desc_.MinLOD) + 0x9e3779b9 + (h << 6) + (h >> 2); + h ^= std::hash{}(desc_.MaxLOD) + 0x9e3779b9 + (h << 6) + (h >> 2); + + return h; +} + +bool SamplerState::operator==(const SamplerState& rhs) const noexcept { + // Compare all D3D12_SAMPLER_DESC fields + return desc_.Filter == rhs.desc_.Filter && + desc_.AddressU == rhs.desc_.AddressU && + desc_.AddressV == rhs.desc_.AddressV && + desc_.AddressW == rhs.desc_.AddressW && + desc_.MipLODBias == rhs.desc_.MipLODBias && + desc_.MaxAnisotropy == rhs.desc_.MaxAnisotropy && + desc_.ComparisonFunc == rhs.desc_.ComparisonFunc && + desc_.BorderColor[0] == rhs.desc_.BorderColor[0] && + desc_.BorderColor[1] == rhs.desc_.BorderColor[1] && + desc_.BorderColor[2] == rhs.desc_.BorderColor[2] && + desc_.BorderColor[3] == rhs.desc_.BorderColor[3] && + desc_.MinLOD == rhs.desc_.MinLOD && + desc_.MaxLOD == rhs.desc_.MaxLOD; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/SamplerState.h b/src/igl/d3d12/SamplerState.h new file mode 100644 index 0000000000..44833bbb44 --- /dev/null +++ b/src/igl/d3d12/SamplerState.h @@ -0,0 +1,34 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace igl::d3d12 { + +class SamplerState final : public ISamplerState { + public: + explicit SamplerState(const D3D12_SAMPLER_DESC& desc) : desc_(desc) {} + ~SamplerState() override = default; + + bool isYUV() const noexcept override { return false; } + + const D3D12_SAMPLER_DESC& getDesc() const { return desc_; } + + /// Computes hash value based on D3D12_SAMPLER_DESC fields + size_t hash() const noexcept; + + /// Compares two SamplerState objects for equality + bool operator==(const SamplerState& rhs) const noexcept; + + private: + D3D12_SAMPLER_DESC desc_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/ShaderModule.cpp b/src/igl/d3d12/ShaderModule.cpp new file mode 100644 index 0000000000..a872112f20 --- /dev/null +++ b/src/igl/d3d12/ShaderModule.cpp @@ -0,0 +1,244 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include + +namespace igl::d3d12 { + +void ShaderModule::setReflection(igl::d3d12::ComPtr reflection) { + reflection_ = reflection; + if (reflection_.Get()) { + extractShaderMetadata(); + } +} + +void ShaderModule::extractShaderMetadata() { + if (!reflection_.Get()) { + IGL_LOG_ERROR("ShaderModule::extractShaderMetadata: reflection_ is NULL!\n"); + return; + } + + D3D12_SHADER_DESC shaderDesc = {}; + HRESULT hr = reflection_->GetDesc(&shaderDesc); + if (FAILED(hr)) { + IGL_LOG_ERROR("ShaderModule::extractShaderMetadata: Failed to get shader desc: 0x%08X\n", hr); + return; + } + + IGL_D3D12_LOG_VERBOSE("ShaderModule: Reflection extracted - %u constant buffers, %u bound resources, %u input params, %u output params\n", + shaderDesc.ConstantBuffers, + shaderDesc.BoundResources, + shaderDesc.InputParameters, + shaderDesc.OutputParameters); + + // Reset reflection info + reflectionInfo_ = ShaderReflectionInfo{}; + + // Extract resource bindings (textures, buffers, samplers, UAVs) + resourceBindings_.clear(); + for (UINT i = 0; i < shaderDesc.BoundResources; i++) { + D3D12_SHADER_INPUT_BIND_DESC bindDesc = {}; + hr = reflection_->GetResourceBindingDesc(i, &bindDesc); + if (FAILED(hr)) { + IGL_LOG_ERROR("ShaderModule::extractShaderMetadata: Failed to get resource binding %u: 0x%08X\n", i, hr); + continue; + } + + ResourceBinding binding; + binding.name = bindDesc.Name; + binding.type = bindDesc.Type; + binding.bindPoint = bindDesc.BindPoint; + binding.bindCount = bindDesc.BindCount; + binding.space = bindDesc.Space; + + resourceBindings_.push_back(binding); + + // Populate reflection info for root signature selection + if (bindDesc.Type == D3D_SIT_CBUFFER) { + reflectionInfo_.usedCBVSlots.push_back(bindDesc.BindPoint); + reflectionInfo_.maxCBVSlot = std::max(reflectionInfo_.maxCBVSlot, bindDesc.BindPoint); + IGL_LOG_INFO(" Found CBV: '%s' at b%u\n", bindDesc.Name, bindDesc.BindPoint); + } else if (bindDesc.Type == D3D_SIT_TEXTURE || + bindDesc.Type == D3D_SIT_STRUCTURED || + bindDesc.Type == D3D_SIT_BYTEADDRESS) { + reflectionInfo_.usedSRVSlots.push_back(bindDesc.BindPoint); + reflectionInfo_.maxSRVSlot = std::max(reflectionInfo_.maxSRVSlot, bindDesc.BindPoint); + IGL_LOG_INFO(" Found SRV: '%s' at t%u\n", bindDesc.Name, bindDesc.BindPoint); + } else if (bindDesc.Type == D3D_SIT_UAV_RWTYPED || + bindDesc.Type == D3D_SIT_UAV_RWSTRUCTURED || + bindDesc.Type == D3D_SIT_UAV_RWBYTEADDRESS || + bindDesc.Type == D3D_SIT_UAV_APPEND_STRUCTURED || + bindDesc.Type == D3D_SIT_UAV_CONSUME_STRUCTURED || + bindDesc.Type == D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER) { + reflectionInfo_.usedUAVSlots.push_back(bindDesc.BindPoint); + reflectionInfo_.maxUAVSlot = std::max(reflectionInfo_.maxUAVSlot, bindDesc.BindPoint); + IGL_LOG_INFO(" Found UAV: '%s' at u%u\n", bindDesc.Name, bindDesc.BindPoint); + } else if (bindDesc.Type == D3D_SIT_SAMPLER) { + reflectionInfo_.usedSamplerSlots.push_back(bindDesc.BindPoint); + reflectionInfo_.maxSamplerSlot = std::max(reflectionInfo_.maxSamplerSlot, bindDesc.BindPoint); + IGL_LOG_INFO(" Found Sampler: '%s' at s%u\n", bindDesc.Name, bindDesc.BindPoint); + } + + const char* typeStr = "Unknown"; + switch (bindDesc.Type) { + case D3D_SIT_CBUFFER: typeStr = "CBV (Constant Buffer)"; break; + case D3D_SIT_TBUFFER: typeStr = "TBuffer"; break; + case D3D_SIT_TEXTURE: typeStr = "SRV (Texture)"; break; + case D3D_SIT_SAMPLER: typeStr = "Sampler"; break; + case D3D_SIT_UAV_RWTYPED: typeStr = "UAV (RW Typed)"; break; + case D3D_SIT_STRUCTURED: typeStr = "SRV (StructuredBuffer)"; break; + case D3D_SIT_UAV_RWSTRUCTURED: typeStr = "UAV (RWStructuredBuffer)"; break; + case D3D_SIT_BYTEADDRESS: typeStr = "SRV (ByteAddressBuffer)"; break; + case D3D_SIT_UAV_RWBYTEADDRESS: typeStr = "UAV (RWByteAddressBuffer)"; break; + case D3D_SIT_UAV_APPEND_STRUCTURED: typeStr = "UAV (AppendStructuredBuffer)"; break; + case D3D_SIT_UAV_CONSUME_STRUCTURED: typeStr = "UAV (ConsumeStructuredBuffer)"; break; + case D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER: typeStr = "UAV (RWStructuredBuffer with counter)"; break; + default: break; + } + + IGL_LOG_DEBUG(" Resource [%u]: '%s' | Type: %s | Slot: t%u/b%u/s%u/u%u | Space: %u | Count: %u\n", + i, + bindDesc.Name, + typeStr, + bindDesc.Type == D3D_SIT_TEXTURE ? bindDesc.BindPoint : 0, + bindDesc.Type == D3D_SIT_CBUFFER ? bindDesc.BindPoint : 0, + bindDesc.Type == D3D_SIT_SAMPLER ? bindDesc.BindPoint : 0, + (bindDesc.Type == D3D_SIT_UAV_RWTYPED || bindDesc.Type == D3D_SIT_UAV_RWSTRUCTURED) ? bindDesc.BindPoint : 0, + bindDesc.Space, + bindDesc.BindCount); + } + + // Extract constant buffer information + constantBuffers_.clear(); + for (UINT i = 0; i < shaderDesc.ConstantBuffers; i++) { + ID3D12ShaderReflectionConstantBuffer* cb = reflection_->GetConstantBufferByIndex(i); + if (!cb) { + IGL_LOG_ERROR("ShaderModule::extractShaderMetadata: Failed to get constant buffer %u\n", i); + continue; + } + + D3D12_SHADER_BUFFER_DESC bufferDesc = {}; + hr = cb->GetDesc(&bufferDesc); + if (FAILED(hr)) { + IGL_LOG_ERROR("ShaderModule::extractShaderMetadata: Failed to get CB desc %u: 0x%08X\n", i, hr); + continue; + } + + ConstantBufferInfo cbInfo; + cbInfo.name = bufferDesc.Name; + cbInfo.size = bufferDesc.Size; + cbInfo.numVariables = bufferDesc.Variables; + + constantBuffers_.push_back(cbInfo); + + IGL_LOG_DEBUG(" Constant Buffer [%u]: '%s' | Size: %u bytes | Variables: %u\n", + i, + bufferDesc.Name, + bufferDesc.Size, + bufferDesc.Variables); + + // Optionally log variable details for debugging + for (UINT v = 0; v < bufferDesc.Variables; v++) { + ID3D12ShaderReflectionVariable* var = cb->GetVariableByIndex(v); + if (var) { + D3D12_SHADER_VARIABLE_DESC varDesc = {}; + if (SUCCEEDED(var->GetDesc(&varDesc))) { + IGL_LOG_DEBUG(" Variable [%u]: '%s' | Offset: %u | Size: %u bytes\n", + v, + varDesc.Name, + varDesc.StartOffset, + varDesc.Size); + } + } + } + } + + // Detect push constants by name convention: cbuffer must be named "PushConstants" + // This allows distinguishing between push constants (used with bindBytes) and regular + // small uniform buffers (used with bindBuffer), since both may be small (≤64 bytes). + for (const auto& binding : resourceBindings_) { + if (binding.type == D3D_SIT_CBUFFER) { + // Find the corresponding constant buffer info to get size and name + for (const auto& cbInfo : constantBuffers_) { + if (cbInfo.name == binding.name) { + // Check if this is push constants by name (must contain "PushConstant") + if (cbInfo.name.find("PushConstant") != std::string::npos && cbInfo.size <= 64) { + reflectionInfo_.hasPushConstants = true; + reflectionInfo_.pushConstantSlot = binding.bindPoint; + reflectionInfo_.pushConstantSize = (cbInfo.size + 3) / 4; // Convert bytes to DWORDs + IGL_D3D12_LOG_VERBOSE(" Detected push constants: '%s' at b%u (%u DWORDs / %u bytes)\n", + cbInfo.name.c_str(), + binding.bindPoint, + reflectionInfo_.pushConstantSize, + cbInfo.size); + } + break; + } + } + } + } +} + +bool ShaderModule::hasResource(const std::string& name) const { + for (const auto& binding : resourceBindings_) { + if (binding.name == name) { + return true; + } + } + return false; +} + +UINT ShaderModule::getResourceBindPoint(const std::string& name) const { + for (const auto& binding : resourceBindings_) { + if (binding.name == name) { + return binding.bindPoint; + } + } + return UINT_MAX; // Not found +} + +size_t ShaderModule::getConstantBufferSize(const std::string& name) const { + for (const auto& cb : constantBuffers_) { + if (cb.name == name) { + return cb.size; + } + } + return 0; // Not found +} + +bool ShaderModule::validateBytecode() const { + // Check minimum size for signature + if (bytecode_.size() < 4) { + IGL_LOG_ERROR("Shader bytecode too small (< 4 bytes): %zu bytes\n", bytecode_.size()); + return false; + } + + const char* signature = reinterpret_cast(bytecode_.data()); + + // Valid signatures: "DXBC" (legacy D3D11/D3D12) or "DXIL" (modern D3D12) + if (std::memcmp(signature, "DXBC", 4) == 0) { + IGL_LOG_DEBUG("Shader bytecode validated: DXBC format (%zu bytes)\n", bytecode_.size()); + return true; // Valid DXBC shader + } + + if (std::memcmp(signature, "DXIL", 4) == 0) { + IGL_LOG_DEBUG("Shader bytecode validated: DXIL format (%zu bytes)\n", bytecode_.size()); + return true; // Valid DXIL shader + } + + // Log the invalid signature for debugging + IGL_LOG_ERROR("Invalid shader bytecode signature: 0x%02X%02X%02X%02X (expected 'DXBC' or 'DXIL')\n", + static_cast(signature[0]), + static_cast(signature[1]), + static_cast(signature[2]), + static_cast(signature[3])); + return false; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/ShaderModule.h b/src/igl/d3d12/ShaderModule.h new file mode 100644 index 0000000000..71e5ed7b04 --- /dev/null +++ b/src/igl/d3d12/ShaderModule.h @@ -0,0 +1,103 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace igl::d3d12 { + +class ShaderModule final : public IShaderModule { + public: + // Resource binding information extracted from shader reflection + struct ResourceBinding { + std::string name; + D3D_SHADER_INPUT_TYPE type; // CBV, SRV, UAV, Sampler + UINT bindPoint; + UINT bindCount; + UINT space; + }; + + // Constant buffer information from reflection + struct ConstantBufferInfo { + std::string name; + UINT size; + UINT numVariables; + }; + + // Shader resource usage summary for root signature selection + struct ShaderReflectionInfo { + // Push constants (inline root constants) + bool hasPushConstants = false; + UINT pushConstantSlot = UINT_MAX; // Which b# register + UINT pushConstantSize = 0; // Size in 32-bit values + + // Resource slot usage (for conflict detection) + std::vector usedCBVSlots; // Constant buffer slots (b#) + std::vector usedSRVSlots; // Shader resource view slots (t#) + std::vector usedUAVSlots; // Unordered access view slots (u#) + std::vector usedSamplerSlots; // Sampler slots (s#) + + // Maximum slot indices used (for root signature sizing) + UINT maxCBVSlot = 0; + UINT maxSRVSlot = 0; + UINT maxUAVSlot = 0; + UINT maxSamplerSlot = 0; + }; + + ShaderModule(ShaderModuleInfo info, std::vector bytecode) + : IShaderModule(info), bytecode_(std::move(bytecode)) { + if (!validateBytecode()) { + IGL_LOG_ERROR("ShaderModule: Created with invalid bytecode (validation failed)\n"); + } + } + ~ShaderModule() override = default; + + const std::vector& getBytecode() const { return bytecode_; } + + // Shader reflection API + void setReflection(igl::d3d12::ComPtr reflection); + const std::vector& getResourceBindings() const { return resourceBindings_; } + const std::vector& getConstantBuffers() const { return constantBuffers_; } + const ShaderReflectionInfo& getReflectionInfo() const { return reflectionInfo_; } + + bool hasResource(const std::string& name) const; + UINT getResourceBindPoint(const std::string& name) const; + size_t getConstantBufferSize(const std::string& name) const; + + // Bytecode validation + bool validateBytecode() const; + + private: + std::vector bytecode_; // DXIL bytecode + igl::d3d12::ComPtr reflection_; + std::vector resourceBindings_; + std::vector constantBuffers_; + ShaderReflectionInfo reflectionInfo_; + + void extractShaderMetadata(); +}; + +class ShaderStages final : public IShaderStages { + public: + ShaderStages(ShaderStagesDesc desc) : IShaderStages(desc) {} + ~ShaderStages() override = default; +}; + +class ShaderLibrary final : public IShaderLibrary { + public: + explicit ShaderLibrary(std::vector> modules) + : IShaderLibrary(std::move(modules)) {} + ~ShaderLibrary() override = default; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/Texture.cpp b/src/igl/d3d12/Texture.cpp new file mode 100644 index 0000000000..7d06b0ee21 --- /dev/null +++ b/src/igl/d3d12/Texture.cpp @@ -0,0 +1,1323 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include +#include +#include +#include +#include + +// No channel swap needed: DXGI_FORMAT_R8G8B8A8_UNORM matches IGL TextureFormat::RGBA_UNorm8 byte order. + +namespace igl::d3d12 { + +namespace { +// Import ComPtr for readability +template +using ComPtr = igl::d3d12::ComPtr; +} // namespace + +std::shared_ptr Texture::createFromResource(ID3D12Resource* resource, + TextureFormat format, + const TextureDesc& desc, + ID3D12Device* device, + ID3D12CommandQueue* queue, + D3D12_RESOURCE_STATES initialState, + Device* iglDevice) { + if (!resource) { + IGL_LOG_ERROR("Texture::createFromResource - resource is NULL!\n"); + return nullptr; + } + + auto texture = std::make_shared(format); + + // Attach the resource to ComPtr (takes ownership, AddRefs) + resource->AddRef(); + texture->resource_.Attach(resource); + + texture->device_ = device; + texture->queue_ = queue; + texture->iglDevice_ = iglDevice; // Store igl Device for upload-related operations. + texture->format_ = format; + texture->dimensions_ = Dimensions{desc.width, desc.height, desc.depth}; + texture->type_ = desc.type; + texture->numLayers_ = desc.numLayers; + texture->numMipLevels_ = desc.numMipLevels; + texture->samples_ = desc.numSamples; + texture->usage_ = desc.usage; + + texture->initializeStateTracking(initialState); + + IGL_D3D12_LOG_VERBOSE("Texture::createFromResource - SUCCESS: %dx%d format=%d\n", + desc.width, desc.height, (int)format); + + return texture; +} + +std::shared_ptr Texture::createTextureView(std::shared_ptr parent, + const TextureViewDesc& desc) { + if (!parent) { + IGL_LOG_ERROR("Texture::createTextureView - parent is NULL!\n"); + return nullptr; + } + + // Determine the format to use for the view + TextureFormat viewFormat = (desc.format != TextureFormat::Invalid) ? desc.format : parent->format_; + + auto view = std::make_shared(viewFormat); + + // Share the D3D12 resource (don't create new one) + // ComPtr doesn't have copy assignment, so we need to use Attach() and AddRef() + auto* parentResource = parent->resource_.Get(); + if (parentResource) { + parentResource->AddRef(); + view->resource_.Attach(parentResource); + } + view->isView_ = true; + view->parentTexture_ = parent; + + // Defensive check: parent and view must share the same underlying D3D12 resource + IGL_DEBUG_ASSERT(parent->resource_.Get() == view->resource_.Get(), + "Parent and view must share the same D3D12 resource"); + + // Store view parameters (cumulative offsets for nested views) + view->mipLevelOffset_ = parent->mipLevelOffset_ + desc.mipLevel; + view->numMipLevelsInView_ = desc.numMipLevels; + + // CRITICAL FIX: D3D12 SRV descriptors require MipLevels >= 1 + // If numMipLevels is 0 (uninitialized), default to 1 to prevent invalid SRV creation + if (view->numMipLevelsInView_ == 0) { + IGL_LOG_ERROR("Texture::createTextureView - numMipLevels is 0, defaulting to 1 (SRV requires MipLevels >= 1)\n"); + view->numMipLevelsInView_ = 1; + } + + // Validate mip level bounds to prevent out-of-range access + const uint32_t parentMipCount = parent->getNumMipLevels(); + const uint32_t requestedMipEnd = desc.mipLevel + view->numMipLevelsInView_; + if (requestedMipEnd > parentMipCount) { + IGL_LOG_ERROR("Texture::createTextureView - mip range [%u, %u) exceeds parent mip count %u, clamping\n", + desc.mipLevel, requestedMipEnd, parentMipCount); + // Clamp to valid range + view->numMipLevelsInView_ = (parentMipCount > desc.mipLevel) ? (parentMipCount - desc.mipLevel) : 1; + } + + view->arraySliceOffset_ = parent->arraySliceOffset_ + desc.layer; + view->numArraySlicesInView_ = desc.numLayers; + + // Copy properties from parent + view->device_ = parent->device_; + view->queue_ = parent->queue_; + view->iglDevice_ = parent->iglDevice_; // Propagate igl Device pointer. + view->format_ = viewFormat; + view->type_ = desc.type; + view->usage_ = parent->usage_; + view->samples_ = parent->samples_; + + // Calculate view dimensions based on mip level + const uint32_t mipDivisor = 1u << desc.mipLevel; + view->dimensions_ = Dimensions{ + std::max(1u, parent->dimensions_.width >> desc.mipLevel), + std::max(1u, parent->dimensions_.height >> desc.mipLevel), + std::max(1u, parent->dimensions_.depth >> desc.mipLevel) + }; + view->numLayers_ = desc.numLayers; + // Use the validated numMipLevelsInView_ value (which has been corrected if it was 0) + view->numMipLevels_ = view->numMipLevelsInView_; + + // Views delegate state tracking to the root texture and do not maintain separate state. + // State is accessed via getStateOwner(), which walks to the root for views. + // Views share the same D3D12 resource and subresourceStates_ tracking with their root. + + IGL_D3D12_LOG_VERBOSE("Texture::createTextureView - SUCCESS: view of %dx%d, mips %u-%u, layers %u-%u\n", + view->dimensions_.width, view->dimensions_.height, + desc.mipLevel, desc.mipLevel + desc.numMipLevels - 1, + desc.layer, desc.layer + desc.numLayers - 1); + + return view; +} + +Texture::~Texture() { + // Texture views share the parent's resource, so they don't own descriptors. + // Only free descriptors for non-view textures. + if (isView_) { + return; + } + + // Get descriptor heap manager from device. + // Note: in the current architecture, descriptors are allocated/freed by RenderCommandEncoder, + // not stored in Texture. This destructor is defensive in case descriptors become per-texture later. + if (!iglDevice_) { + return; + } + + // For now, descriptors are managed by RenderCommandEncoder and freed when the encoder is destroyed. + // The rtvIndices_, dsvIndices_, and srvIndex_ members are currently unused but reserved for future use. +} + +Result Texture::upload(const TextureRangeDesc& range, + const void* data, + size_t bytesPerRow) const { + IGL_D3D12_LOG_VERBOSE("Texture::upload() - START: %dx%d\n", range.width, range.height); + + if (!device_ || !queue_ || !resource_.Get()) { + IGL_LOG_ERROR("Texture::upload() - FAILED: device, queue, or resource not available\n"); + return Result(Result::Code::RuntimeError, "Device, queue, or resource not available for upload"); + } + + if (!data) { + IGL_LOG_ERROR("Texture::upload() - FAILED: data is null\n"); + return Result(Result::Code::ArgumentInvalid, "Upload data is null"); + } + + IGL_D3D12_LOG_VERBOSE("Texture::upload() - Proceeding with upload\n"); + + // Calculate dimensions and data size + const uint32_t width = range.width > 0 ? range.width : dimensions_.width; + const uint32_t height = range.height > 0 ? range.height : dimensions_.height; + const uint32_t depth = range.depth > 0 ? range.depth : dimensions_.depth; + + const auto props = TextureFormatProperties::fromTextureFormat(format_); + const bool isBC7 = (format_ == TextureFormat::RGBA_BC7_UNORM_4x4 || + format_ == TextureFormat::RGBA_BC7_SRGB_4x4); + + // Calculate bytes per row if not provided. For block-compressed formats + // like BC7, rows are expressed in blocks, not texels, so use the number + // of blocks in X multiplied by bytesPerBlock. + if (bytesPerRow == 0) { + if (isBC7) { + const uint32_t blocksX = (width + 3u) / 4u; + bytesPerRow = static_cast(blocksX) * props.bytesPerBlock; + } else { + const size_t bpp = std::max(props.bytesPerBlock, 1); + bytesPerRow = static_cast(width) * bpp; + } + } + + // Get the resource description to calculate required size + D3D12_RESOURCE_DESC resourceDesc = resource_->GetDesc(); + + // Determine how many layers/faces and mip levels we need to upload + const uint32_t numSlicesToUpload = (type_ == TextureType::Cube) ? range.numFaces : range.numLayers; + const uint32_t baseSlice = (type_ == TextureType::Cube) ? range.face : range.layer; + const uint32_t numMipsToUpload = range.numMipLevels; + const uint32_t baseMip = range.mipLevel; + IGL_D3D12_LOG_VERBOSE("Texture::upload - type=%d, baseSlice=%u, numSlicesToUpload=%u, baseMip=%u, numMipsToUpload=%u\n", + (int)type_, baseSlice, numSlicesToUpload, baseMip, numMipsToUpload); + + // Calculate total staging buffer size for ALL subresources + UINT64 totalStagingSize = 0; + std::vector layouts; + std::vector numRowsArray; + std::vector rowSizesArray; + + for (uint32_t mipOffset = 0; mipOffset < numMipsToUpload; ++mipOffset) { + for (uint32_t sliceOffset = 0; sliceOffset < numSlicesToUpload; ++sliceOffset) { + const uint32_t subresource = calcSubresourceIndex(baseMip + mipOffset, baseSlice + sliceOffset); + D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout = {}; + UINT numRows = 0; + UINT64 rowSize = 0; + UINT64 subresSize = 0; + device_->GetCopyableFootprints(&resourceDesc, subresource, 1, totalStagingSize, &layout, &numRows, &rowSize, &subresSize); + layouts.push_back(layout); + numRowsArray.push_back(numRows); + rowSizesArray.push_back(rowSize); + totalStagingSize += subresSize; + } + } + + // Try to allocate from upload ring buffer first. + UploadRingBuffer* ringBuffer = nullptr; + UploadRingBuffer::Allocation ringAllocation; + bool useRingBuffer = false; + UINT64 uploadFenceValue = 0; + + if (iglDevice_) { + // Reclaim completed upload buffers before allocating new ones. + iglDevice_->processCompletedUploads(); + + ringBuffer = iglDevice_->getUploadRingBuffer(); + // Get fence value that will signal when this upload completes + uploadFenceValue = iglDevice_->getNextUploadFenceValue(); + + if (ringBuffer) { + // D3D12 requires 512-byte alignment for texture uploads (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT) + constexpr uint64_t kTextureUploadAlignment = 512; + ringAllocation = ringBuffer->allocate(totalStagingSize, kTextureUploadAlignment, uploadFenceValue); + + if (ringAllocation.valid) { + useRingBuffer = true; + } + } + } + + // Fallback: Create temporary staging buffer if ring buffer allocation failed + igl::d3d12::ComPtr stagingBuffer; + void* mappedData = nullptr; + uint64_t stagingBaseOffset = 0; + HRESULT hr = S_OK; + + if (useRingBuffer) { + // Use ring buffer allocation + mappedData = ringAllocation.cpuAddress; + stagingBaseOffset = ringAllocation.offset; + } else { + // Create temporary staging buffer + D3D12_HEAP_PROPERTIES uploadHeapProps = {}; + uploadHeapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + uploadHeapProps.CreationNodeMask = 1; + uploadHeapProps.VisibleNodeMask = 1; + + D3D12_RESOURCE_DESC stagingDesc = {}; + stagingDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + stagingDesc.Width = totalStagingSize; + stagingDesc.Height = 1; + stagingDesc.DepthOrArraySize = 1; + stagingDesc.MipLevels = 1; + stagingDesc.Format = DXGI_FORMAT_UNKNOWN; + stagingDesc.SampleDesc.Count = 1; + stagingDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + hr = device_->CreateCommittedResource(&uploadHeapProps, D3D12_HEAP_FLAG_NONE, &stagingDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, + IID_PPV_ARGS(stagingBuffer.GetAddressOf())); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to create staging buffer"); + } + + // Map staging buffer once + hr = stagingBuffer->Map(0, nullptr, &mappedData); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to map staging buffer"); + } + } + + // Copy all subresource data to the staging buffer. + // Direct copy: no channel swap needed for RGBA formats. + // DXGI_FORMAT_R8G8B8A8_UNORM has R,G,B,A byte order matching IGL TextureFormat::RGBA_UNorm8. + size_t srcDataOffset = 0; + size_t layoutIdx = 0; + + for (uint32_t mipOffset = 0; mipOffset < numMipsToUpload; ++mipOffset) { + const uint32_t mipWidth = std::max(width >> (baseMip + mipOffset), 1u); + const uint32_t mipHeight = std::max(height >> (baseMip + mipOffset), 1u); + const uint32_t mipDepth = std::max(depth >> (baseMip + mipOffset), 1u); + + size_t mipBytesPerRow = 0; + if (isBC7) { + const uint32_t blocksX = (mipWidth + 3u) / 4u; + mipBytesPerRow = static_cast(blocksX) * props.bytesPerBlock; + } else { + mipBytesPerRow = (bytesPerRow * mipWidth) / width; + } + + for (uint32_t sliceOffset = 0; sliceOffset < numSlicesToUpload; ++sliceOffset) { + const auto& layout = layouts[layoutIdx]; + const UINT numRows = numRowsArray[layoutIdx]; + const UINT64 rowSize = rowSizesArray[layoutIdx]; + layoutIdx++; + + const uint8_t* srcData = static_cast(data) + srcDataOffset; + uint8_t* dstData = static_cast(mappedData) + layout.Offset; + const size_t copyBytes = std::min(static_cast(rowSize), mipBytesPerRow); + + // For uncompressed formats, the source data is tightly packed by the + // requested region's height (mipHeight). For block-compressed formats + // (e.g. BC7), numRows represents the number of block rows returned by + // GetCopyableFootprints. Use mipHeight for uncompressed uploads and + // numRows for BC7 so that source layout matches the caller's data. + const UINT rowsToCopy = isBC7 ? numRows : mipHeight; + + const size_t srcDepthPitch = mipBytesPerRow * rowsToCopy; + const size_t dstDepthPitch = layout.Footprint.RowPitch * layout.Footprint.Height; + + for (UINT z = 0; z < mipDepth; ++z) { + const uint8_t* srcSlice = srcData + z * srcDepthPitch; + uint8_t* dstSlice = dstData + z * dstDepthPitch; + for (UINT row = 0; row < rowsToCopy; ++row) { + const uint8_t* srcRow = srcSlice + row * mipBytesPerRow; + uint8_t* dstRow = dstSlice + row * layout.Footprint.RowPitch; + memcpy(dstRow, srcRow, copyBytes); + } + } + + // Advance source pointer by the size of this subresource (all rows, all slices). + srcDataOffset += mipBytesPerRow * rowsToCopy * mipDepth; + } + } + + // Unmap temporary staging buffer (ring buffer stays persistently mapped) + if (!useRingBuffer && stagingBuffer.Get()) { + stagingBuffer->Unmap(0, nullptr); + } + + // Get command allocator from pool with fence tracking when an iglDevice is available. + igl::d3d12::ComPtr cmdAlloc; + if (iglDevice_) { + cmdAlloc = iglDevice_->getUploadCommandAllocator(); + if (!cmdAlloc.Get()) { + return Result(Result::Code::RuntimeError, "Failed to get command allocator from pool"); + } + } else { + // Fallback for textures created without Device* (shouldn't happen in normal flow) + hr = device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(cmdAlloc.GetAddressOf())); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to create command allocator"); + } + } + + igl::d3d12::ComPtr cmdList; + hr = device_->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, cmdAlloc.Get(), nullptr, + IID_PPV_ARGS(cmdList.GetAddressOf())); + if (FAILED(hr)) { + if (iglDevice_) { + // Return allocator to pool with fence value 0 (immediately available) + iglDevice_->returnUploadCommandAllocator(cmdAlloc, 0); + } + return Result(Result::Code::RuntimeError, "Failed to create command list"); + } + + // Record all copy commands + layoutIdx = 0; + for (uint32_t mipOffset = 0; mipOffset < numMipsToUpload; ++mipOffset) { + const uint32_t currentMip = baseMip + mipOffset; + const uint32_t mipWidth = std::max(width >> currentMip, 1u); + const uint32_t mipHeight = std::max(height >> currentMip, 1u); + const uint32_t mipDepth = std::max(depth >> currentMip, 1u); + + for (uint32_t sliceOffset = 0; sliceOffset < numSlicesToUpload; ++sliceOffset) { + const uint32_t currentSlice = baseSlice + sliceOffset; + const uint32_t subresource = calcSubresourceIndex(currentMip, currentSlice); + + // const_cast needed because upload is const (required by ITexture interface) + // but state tracking is non-const by design + const_cast(this)->transitionTo(cmdList.Get(), D3D12_RESOURCE_STATE_COPY_DEST, currentMip, currentSlice); + + D3D12_TEXTURE_COPY_LOCATION dst = {}; + dst.pResource = resource_.Get(); + dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dst.SubresourceIndex = subresource; + + if (type_ == TextureType::Cube) { + IGL_D3D12_LOG_VERBOSE("CopyTextureRegion: Copying to CUBE subresource=%u (mip=%u, slice=%u)\n", + subresource, currentMip, currentSlice); + } + + D3D12_TEXTURE_COPY_LOCATION src = {}; + src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + + // Use ring buffer or temporary staging buffer + if (useRingBuffer) { + src.pResource = ringBuffer->getUploadHeap(); + // Adjust layout offset to account for ring buffer base offset + D3D12_PLACED_SUBRESOURCE_FOOTPRINT adjustedLayout = layouts[layoutIdx]; + adjustedLayout.Offset += stagingBaseOffset; + src.PlacedFootprint = adjustedLayout; + } else { + src.pResource = stagingBuffer.Get(); + src.PlacedFootprint = layouts[layoutIdx]; + } + layoutIdx++; + + // For block-compressed formats like BC7, CopyTextureRegion requires the + // source box to be aligned to block boundaries. Small mips (e.g. 2x2) + // violate this if we specify an explicit box in texel units. Since the + // staging layout already matches the subresource footprint, simply copy + // the entire subresource by passing a null box for BC7. + if (isBC7) { + cmdList->CopyTextureRegion(&dst, range.x, range.y, range.z, &src, nullptr); + } else { + D3D12_BOX srcBox = {0, 0, 0, mipWidth, mipHeight, mipDepth}; + cmdList->CopyTextureRegion(&dst, range.x, range.y, range.z, &src, &srcBox); + } + + // const_cast needed (see above) + const_cast(this)->transitionTo(cmdList.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, currentMip, currentSlice); + } + } + + cmdList->Close(); + + // Execute once and wait once + ID3D12CommandList* cmdLists[] = {cmdList.Get()}; + queue_->ExecuteCommandLists(1, cmdLists); + + // Use upload fence for command allocator synchronization. + // Use pre-allocated uploadFenceValue (already incremented for ring buffer). + if (iglDevice_) { + ID3D12Fence* uploadFence = iglDevice_->getUploadFence(); + + hr = queue_->Signal(uploadFence, uploadFenceValue); + if (FAILED(hr)) { + IGL_LOG_ERROR("Texture::upload: Failed to signal upload fence: 0x%08X\n", hr); + // Return allocator with 0 to avoid blocking the pool + iglDevice_->returnUploadCommandAllocator(cmdAlloc, 0); + return Result(Result::Code::RuntimeError, "Failed to signal fence"); + } + + // Return allocator to pool with fence value (will be reused after the fence is signaled). + iglDevice_->returnUploadCommandAllocator(cmdAlloc, uploadFenceValue); + + // Track staging buffer for async cleanup (no synchronous wait). + // Only track temporary staging buffers; ring buffer is persistent. + // Pass uploadFenceValue (already signaled above) to track with the correct fence. + if (!useRingBuffer && stagingBuffer.Get()) { + iglDevice_->trackUploadBuffer(std::move(stagingBuffer), uploadFenceValue); + } + } else { + // Fallback for textures without iglDevice_ (shouldn't happen in normal flow) + // In this case, we need to wait synchronously since we can't track the buffer + igl::d3d12::ComPtr fence; + hr = device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf())); + if (FAILED(hr)) { + return Result(Result::Code::RuntimeError, "Failed to create fence"); + } + + queue_->Signal(fence.Get(), 1); + + FenceWaiter waiter(fence.Get(), 1); + Result waitResult = waiter.wait(); + if (!waitResult.isOk()) { + return waitResult; // Propagate detailed timeout/setup error + } + } + + return Result(); +} + +Result Texture::uploadCube(const TextureRangeDesc& range, + TextureCubeFace face, + const void* data, + size_t bytesPerRow) const { + // Cube textures are stored as texture arrays with 6 slices (one per face). + // The upload() method already handles cube textures correctly when face/numFaces are set. + + // Validate this is a cube texture + if (type_ != TextureType::Cube) { + return Result(Result::Code::ArgumentInvalid, "uploadCube called on non-cube texture"); + } + + // Create a modified range with the correct face index + TextureRangeDesc cubeRange = range; + cubeRange.face = static_cast(face); // Convert TextureCubeFace enum to face index (0-5) + cubeRange.numFaces = 1; // Upload single face + + // Delegate to upload() which handles cube texture subresource indexing correctly + return upload(cubeRange, data, bytesPerRow); +} + +Result Texture::uploadInternal(TextureType type, + const TextureRangeDesc& range, + const void* data, + size_t bytesPerRow, + const uint32_t* mipLevelBytes) const { + if (!(type == TextureType::TwoD || type == TextureType::TwoDArray || type == TextureType::ThreeD || type == TextureType::Cube)) { + return Result(Result::Code::Unimplemented, "Upload not implemented for this texture type"); + } + + // Delegate to upload() which now handles multi-mip, multi-layer, and cube textures natively + return upload(range, data, bytesPerRow); +} + +Dimensions Texture::getDimensions() const { + return dimensions_; +} + +uint32_t Texture::getNumLayers() const { + return static_cast(numLayers_); +} + +TextureType Texture::getType() const { + return type_; +} + +TextureDesc::TextureUsage Texture::getUsage() const { + return usage_; +} + +uint32_t Texture::getSamples() const { + return static_cast(samples_); +} + +uint32_t Texture::getNumMipLevels() const { + return static_cast(numMipLevels_); +} + +uint64_t Texture::getTextureId() const { + return reinterpret_cast(resource_.Get()); +} + +TextureFormat Texture::getFormat() const { + return format_; +} + +bool Texture::isRequiredGenerateMipmap() const { + return false; +} + +void Texture::generateMipmap(ICommandQueue& /*cmdQueue*/, const TextureRangeDesc* /*range*/) const { + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdQueue) - START: numMips=%u\n", numMipLevels_); + + if (!device_ || !queue_ || !resource_.Get() || numMipLevels_ < 2) { + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Skipping: device=%p queue=%p resource=%p numMips=%u\n", + device_, queue_, resource_.Get(), numMipLevels_); + return; + } + + D3D12_RESOURCE_DESC resourceDesc = resource_->GetDesc(); + + // Only support 2D textures for mipmap generation + if (resourceDesc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D) { + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Skipping: only 2D textures supported (dimension=%d)\n", + (int)resourceDesc.Dimension); + return; + } + + // Skip depth/stencil textures entirely. The current D3D12 mipmap path only + // supports color render-target textures; attempting to add ALLOW_RENDER_TARGET + // to a depth/stencil resource would violate D3D12's flag rules. + if (resourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) { + IGL_D3D12_LOG_VERBOSE( + "Texture::generateMipmap() - Skipping: depth/stencil textures are not " + "handled by this mipmap path (Flags=0x%08X)\n", + resourceDesc.Flags); + return; + } + + // If texture wasn't created with a render-target-capable flag, skip mipmap + // generation gracefully on D3D12. The current implementation only supports + // color 2D textures with ALLOW_RENDER_TARGET; depth/stencil and other usage + // patterns rely on backend-specific paths or pre-generated mips. + if (!(resourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Recreating texture with RENDER_TARGET flag for mipmap generation\n"); + + // Save current resource using ComPtr for automatic reference counting + // Note: ComPtr copy is deleted, so we manually AddRef and Attach + ID3D12Resource* rawOldResource = resource_.Get(); + if (rawOldResource) { + rawOldResource->AddRef(); + } + igl::d3d12::ComPtr oldResource; + oldResource.Attach(rawOldResource); + + // Modify descriptor to add RENDER_TARGET flag + resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET; + + // Create new resource with RENDER_TARGET flag + D3D12_HEAP_PROPERTIES heapProps = {}; + heapProps.Type = D3D12_HEAP_TYPE_DEFAULT; + + D3D12_CLEAR_VALUE clearValue = {}; + clearValue.Format = resourceDesc.Format; + + igl::d3d12::ComPtr newResource; + HRESULT hr = device_->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, + &clearValue, + IID_PPV_ARGS(newResource.GetAddressOf())); + + if (FAILED(hr)) { + IGL_D3D12_LOG_VERBOSE( + "Texture::generateMipmap() - Skipping: failed to recreate texture with " + "RENDER_TARGET flag (HRESULT=0x%08X)\n", + static_cast(hr)); + return; + } + + // Copy mip 0 from old resource to new resource + igl::d3d12::ComPtr copyAlloc; + igl::d3d12::ComPtr copyList; + if (FAILED(device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(copyAlloc.GetAddressOf())))) { + IGL_LOG_ERROR("Texture::generateMipmap() - Failed to create copy command allocator\n"); + return; + } + if (FAILED(device_->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, copyAlloc.Get(), nullptr, IID_PPV_ARGS(copyList.GetAddressOf())))) { + IGL_LOG_ERROR("Texture::generateMipmap() - Failed to create copy command list\n"); + return; + } + + // Transition old resource to COPY_SOURCE + D3D12_RESOURCE_BARRIER barrierOld = {}; + barrierOld.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrierOld.Transition.pResource = oldResource.Get(); + barrierOld.Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + barrierOld.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE; + barrierOld.Transition.Subresource = 0; + copyList->ResourceBarrier(1, &barrierOld); + + // Copy mip 0 + D3D12_TEXTURE_COPY_LOCATION srcLoc = {}; + srcLoc.pResource = oldResource.Get(); + srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLoc.SubresourceIndex = 0; + + D3D12_TEXTURE_COPY_LOCATION dstLoc = {}; + dstLoc.pResource = newResource.Get(); + dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + dstLoc.SubresourceIndex = 0; + + copyList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr); + + // Transition the entire new resource to PIXEL_SHADER_RESOURCE for mipmap + // generation. The resource was created in COPY_DEST; only mip 0 was + // written by the copy above, but all mips will be consumed as SRVs/RTVs + // in the subsequent fullscreen-blit loop. Using ALL_SUBRESOURCES here + // ensures the debug layer's notion of the initial state matches our + // state tracking for every subresource (mip >= 1 included). + D3D12_RESOURCE_BARRIER barrierNew = {}; + barrierNew.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrierNew.Transition.pResource = newResource.Get(); + barrierNew.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + barrierNew.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE; + barrierNew.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + copyList->ResourceBarrier(1, &barrierNew); + + copyList->Close(); + ID3D12CommandList* copyLists[] = {copyList.Get()}; + queue_->ExecuteCommandLists(1, copyLists); + + // Wait for copy to complete + igl::d3d12::ComPtr copyFence; + if (FAILED(device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(copyFence.GetAddressOf())))) { + IGL_LOG_ERROR("Texture::generateMipmap() - Failed to create copy fence\n"); + return; + } + queue_->Signal(copyFence.Get(), 1); + + FenceWaiter waiter(copyFence.Get(), 1); + Result waitResult = waiter.wait(); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("Texture::generateMipmap() - Fence wait failed: %s\n", + waitResult.message.c_str()); + return; + } + + // oldResource will be automatically released by ComPtr destructor + + // Replace resource with new one (need const_cast since function is const) + auto& mutableResource = const_cast&>(resource_); + mutableResource.Reset(); + mutableResource = std::move(newResource); + + // Update state tracking for new resource - all mips are now in PIXEL_SHADER_RESOURCE + // const_cast needed because generateMipmap is const (required by ITexture interface) + // but state tracking is non-const by design + const_cast(this)->initializeStateTracking(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE); + + // Update resourceDesc for the rest of the function + resourceDesc = resource_->GetDesc(); + + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Texture recreated successfully\n"); + } + + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Proceeding with mipmap generation\n"); + + // Use pre-compiled shaders from Device instead of runtime compilation. + // Note: iglDevice_ should always be set in normal flow (see Texture::createFromResource) + // This check is defensive; if it triggers, it indicates a texture creation path that bypassed proper initialization + if (!iglDevice_) { + IGL_LOG_ERROR("Texture::generateMipmap() - No IGL device available (texture not properly initialized)\n"); + IGL_LOG_ERROR(" This is a programming error: textures must be created via Device methods to support mipmap generation\n"); + return; + } + + const auto& vsBytecode = iglDevice_->getMipmapVSBytecode(); + const auto& psBytecode = iglDevice_->getMipmapPSBytecode(); + ID3D12RootSignature* rootSig = iglDevice_->getMipmapRootSignature(); + + // Validate pre-compiled shaders are available + // This can fail if device initialization encountered DXC errors + if (vsBytecode.empty() || psBytecode.empty() || !rootSig) { + IGL_LOG_ERROR("Texture::generateMipmap() - Pre-compiled mipmap shaders unavailable\n"); + IGL_LOG_ERROR(" Device may not support mipmap generation (check Device initialization logs for DXC errors)\n"); + return; + } + + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Using pre-compiled shaders (%zu bytes VS, %zu bytes PS)\n", + vsBytecode.size(), psBytecode.size()); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso = {}; + pso.pRootSignature = rootSig; + pso.VS = {vsBytecode.data(), vsBytecode.size()}; + pso.PS = {psBytecode.data(), psBytecode.size()}; + pso.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + pso.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + pso.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + pso.RasterizerState.DepthClipEnable = TRUE; + pso.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + pso.SampleMask = UINT_MAX; + pso.SampleDesc.Count = 1; + pso.NumRenderTargets = 1; + pso.RTVFormats[0] = resourceDesc.Format; + pso.DSVFormat = DXGI_FORMAT_UNKNOWN; + + igl::d3d12::ComPtr psoObj; + if (FAILED(device_->CreateGraphicsPipelineState(&pso, IID_PPV_ARGS(psoObj.GetAddressOf())))) { + return; + } + + // Create descriptor heap large enough for all mip levels + // We need one SRV descriptor per mip level (numMipLevels_ - 1 blits) + D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {}; + srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + srvHeapDesc.NumDescriptors = numMipLevels_ - 1; // One SRV per source mip level + srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + igl::d3d12::ComPtr srvHeap; + if (FAILED(device_->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(srvHeap.GetAddressOf())))) return; + + D3D12_DESCRIPTOR_HEAP_DESC smpHeapDesc = {}; + smpHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + smpHeapDesc.NumDescriptors = 1; + smpHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + igl::d3d12::ComPtr smpHeap; + if (FAILED(device_->CreateDescriptorHeap(&smpHeapDesc, IID_PPV_ARGS(smpHeap.GetAddressOf())))) return; + + // Pre-creation validation. + IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateSampler"); + IGL_DEBUG_ASSERT(smpHeap.Get() != nullptr, "Sampler heap is null"); + + // Fixed sampler + D3D12_SAMPLER_DESC samp = {}; + samp.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + samp.AddressU = samp.AddressV = samp.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samp.MinLOD = 0; samp.MaxLOD = D3D12_FLOAT32_MAX; + + D3D12_CPU_DESCRIPTOR_HANDLE smpHandle = smpHeap->GetCPUDescriptorHandleForHeapStart(); + IGL_DEBUG_ASSERT(smpHandle.ptr != 0, "Sampler descriptor handle is invalid"); + device_->CreateSampler(&samp, smpHandle); + + igl::d3d12::ComPtr alloc; + igl::d3d12::ComPtr list; + if (FAILED(device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(alloc.GetAddressOf())))) return; + if (FAILED(device_->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, alloc.Get(), psoObj.Get(), IID_PPV_ARGS(list.GetAddressOf())))) return; + + ID3D12DescriptorHeap* heaps[] = {srvHeap.Get(), smpHeap.Get()}; + list->SetDescriptorHeaps(2, heaps); + list->SetPipelineState(psoObj.Get()); + list->SetGraphicsRootSignature(rootSig); + list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + // Get descriptor size for incrementing through the heap + const UINT srvDescriptorSize = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE srvCpuStart = srvHeap->GetCPUDescriptorHandleForHeapStart(); + D3D12_GPU_DESCRIPTOR_HANDLE srvGpuStart = srvHeap->GetGPUDescriptorHandleForHeapStart(); + D3D12_GPU_DESCRIPTOR_HANDLE smpGpu = smpHeap->GetGPUDescriptorHandleForHeapStart(); + + // Create single RTV descriptor heap outside the loop (reused for all mip levels) + D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {}; + rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtvHeapDesc.NumDescriptors = 1; + igl::d3d12::ComPtr rtvHeap; + if (FAILED(device_->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(rtvHeap.GetAddressOf())))) return; + D3D12_CPU_DESCRIPTOR_HANDLE rtvCpu = rtvHeap->GetCPUDescriptorHandleForHeapStart(); + + // Ensure mip 0 is in PIXEL_SHADER_RESOURCE state for first SRV read + // const_cast needed because generateMipmap is const (required by ITexture interface) + // but state tracking is non-const by design + const_cast(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, 0, 0); + + for (UINT mip = 0; mip + 1 < numMipLevels_; ++mip) { + // Calculate descriptor handle for this mip level + D3D12_CPU_DESCRIPTOR_HANDLE srvCpu = srvCpuStart; + srvCpu.ptr += mip * srvDescriptorSize; + D3D12_GPU_DESCRIPTOR_HANDLE srvGpu = srvGpuStart; + srvGpu.ptr += mip * srvDescriptorSize; + + // Pre-creation validation. + IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateShaderResourceView"); + IGL_DEBUG_ASSERT(resource_.Get() != nullptr, "Resource is null before CreateShaderResourceView"); + IGL_DEBUG_ASSERT(srvCpu.ptr != 0, "SRV descriptor handle is invalid"); + + D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; + srv.Format = resourceDesc.Format; + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv.Texture2D.MostDetailedMip = mip; + srv.Texture2D.MipLevels = 1; + device_->CreateShaderResourceView(resource_.Get(), &srv, srvCpu); + + // Pre-creation validation. + IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateRenderTargetView"); + IGL_DEBUG_ASSERT(resource_.Get() != nullptr, "Resource is null before CreateRenderTargetView"); + IGL_DEBUG_ASSERT(rtvCpu.ptr != 0, "RTV descriptor handle is invalid"); + + D3D12_RENDER_TARGET_VIEW_DESC rtv = {}; + rtv.Format = resourceDesc.Format; + rtv.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rtv.Texture2D.MipSlice = mip + 1; + + // Reuse the same RTV heap by recreating the view for each mip level + device_->CreateRenderTargetView(resource_.Get(), &rtv, rtvCpu); + + // Transition mip level to render target using state tracking + // const_cast needed (see above). + const_cast(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, mip + 1, 0); + + list->OMSetRenderTargets(1, &rtvCpu, FALSE, nullptr); + const UINT w = std::max(1u, (UINT)(resourceDesc.Width >> (mip + 1))); + const UINT h = std::max(1u, (UINT)(resourceDesc.Height >> (mip + 1))); + D3D12_VIEWPORT vp{0.0f, 0.0f, (FLOAT)w, (FLOAT)h, 0.0f, 1.0f}; + D3D12_RECT sc{0, 0, (LONG)w, (LONG)h}; + list->RSSetViewports(1, &vp); + list->RSSetScissorRects(1, &sc); + + list->SetGraphicsRootDescriptorTable(0, srvGpu); + list->SetGraphicsRootDescriptorTable(1, smpGpu); + list->DrawInstanced(3, 1, 0, 0); + + // Transition mip level to shader resource for next iteration + // const_cast needed (see above). + const_cast(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, mip + 1, 0); + } + + list->Close(); + ID3D12CommandList* lists[] = {list.Get()}; + queue_->ExecuteCommandLists(1, lists); + + igl::d3d12::ComPtr fence; + if (FAILED(device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf())))) return; + queue_->Signal(fence.Get(), 1); + + FenceWaiter waiter(fence.Get(), 1); + Result waitResult = waiter.wait(); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("Texture::generateMipmap() - Fence wait failed: %s\n", + waitResult.message.c_str()); + } +} + +void Texture::generateMipmap(ICommandBuffer& /*cmdBuffer*/, const TextureRangeDesc* /*range*/) const { + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdBuffer) - START: numMips=%u\n", numMipLevels_); + + if (!device_ || !queue_ || !resource_.Get() || numMipLevels_ < 2) { + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdBuffer) - Skipping: device=%p queue=%p resource=%p numMips=%u\n", + device_, queue_, resource_.Get(), numMipLevels_); + return; + } + + D3D12_RESOURCE_DESC resourceDesc = resource_->GetDesc(); + + // Only support 2D textures for mipmap generation + if (resourceDesc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D) { + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdBuffer) - Skipping: only 2D textures supported\n"); + return; + } + + // Check if texture was created with RENDER_TARGET flag (required for mipmap generation) + if (!(resourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) { + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdBuffer) - Skipping: texture not created with RENDER_TARGET usage\n"); + IGL_D3D12_LOG_VERBOSE(" To enable mipmap generation, create texture with TextureDesc::TextureUsageBits::Attachment\n"); + return; + } + + // Use pre-compiled shaders from Device instead of runtime compilation. + // Note: iglDevice_ should always be set in normal flow (see Texture::createFromResource) + // This check is defensive; if it triggers, it indicates a texture creation path that bypassed proper initialization + if (!iglDevice_) { + IGL_LOG_ERROR("Texture::generateMipmap(cmdBuffer) - No IGL device available (texture not properly initialized)\n"); + IGL_LOG_ERROR(" This is a programming error: textures must be created via Device methods to support mipmap generation\n"); + return; + } + + const auto& vsBytecode = iglDevice_->getMipmapVSBytecode(); + const auto& psBytecode = iglDevice_->getMipmapPSBytecode(); + ID3D12RootSignature* rootSig = iglDevice_->getMipmapRootSignature(); + + // Validate pre-compiled shaders are available + // This can fail if device initialization encountered DXC errors + if (vsBytecode.empty() || psBytecode.empty() || !rootSig) { + IGL_LOG_ERROR("Texture::generateMipmap(cmdBuffer) - Pre-compiled mipmap shaders unavailable\n"); + IGL_LOG_ERROR(" Device may not support mipmap generation (check Device initialization logs for DXC errors)\n"); + return; + } + + IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdBuffer) - Using pre-compiled shaders (%zu bytes VS, %zu bytes PS)\n", + vsBytecode.size(), psBytecode.size()); + + D3D12_GRAPHICS_PIPELINE_STATE_DESC pso = {}; + pso.pRootSignature = rootSig; + pso.VS = {vsBytecode.data(), vsBytecode.size()}; + pso.PS = {psBytecode.data(), psBytecode.size()}; + pso.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE; + pso.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID; + pso.RasterizerState.CullMode = D3D12_CULL_MODE_NONE; + pso.RasterizerState.DepthClipEnable = TRUE; + pso.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL; + pso.SampleMask = UINT_MAX; + pso.SampleDesc.Count = 1; + pso.NumRenderTargets = 1; + pso.RTVFormats[0] = resourceDesc.Format; + pso.DSVFormat = DXGI_FORMAT_UNKNOWN; + igl::d3d12::ComPtr psoObj; + if (FAILED(device_->CreateGraphicsPipelineState(&pso, IID_PPV_ARGS(psoObj.GetAddressOf())))) return; + // Create descriptor heap large enough for all mip levels + // We need one SRV descriptor per mip level (numMipLevels_ - 1 blits) + D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {}; + srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV; + srvHeapDesc.NumDescriptors = numMipLevels_ - 1; // One SRV per source mip level + srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + igl::d3d12::ComPtr srvHeap; + if (FAILED(device_->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(srvHeap.GetAddressOf())))) return; + D3D12_DESCRIPTOR_HEAP_DESC smpHeapDesc = {}; + smpHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER; + smpHeapDesc.NumDescriptors = 1; + smpHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE; + + igl::d3d12::ComPtr smpHeap; + if (FAILED(device_->CreateDescriptorHeap(&smpHeapDesc, IID_PPV_ARGS(smpHeap.GetAddressOf())))) return; + + // Pre-creation validation. + IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateSampler"); + IGL_DEBUG_ASSERT(smpHeap.Get() != nullptr, "Sampler heap is null"); + + D3D12_SAMPLER_DESC samp = {}; + samp.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR; + samp.AddressU = samp.AddressV = samp.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP; + samp.MinLOD = 0; samp.MaxLOD = D3D12_FLOAT32_MAX; + + D3D12_CPU_DESCRIPTOR_HANDLE smpHandle = smpHeap->GetCPUDescriptorHandleForHeapStart(); + IGL_DEBUG_ASSERT(smpHandle.ptr != 0, "Sampler descriptor handle is invalid"); + device_->CreateSampler(&samp, smpHandle); + igl::d3d12::ComPtr alloc; + igl::d3d12::ComPtr list; + if (FAILED(device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(alloc.GetAddressOf())))) return; + if (FAILED(device_->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, alloc.Get(), psoObj.Get(), IID_PPV_ARGS(list.GetAddressOf())))) return; + ID3D12DescriptorHeap* heaps[] = {srvHeap.Get(), smpHeap.Get()}; + list->SetDescriptorHeaps(2, heaps); + list->SetPipelineState(psoObj.Get()); + list->SetGraphicsRootSignature(rootSig); + list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + // Get descriptor size for incrementing through the heap + const UINT srvDescriptorSize = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV); + D3D12_CPU_DESCRIPTOR_HANDLE srvCpuStart = srvHeap->GetCPUDescriptorHandleForHeapStart(); + D3D12_GPU_DESCRIPTOR_HANDLE srvGpuStart = srvHeap->GetGPUDescriptorHandleForHeapStart(); + D3D12_GPU_DESCRIPTOR_HANDLE smpGpu = smpHeap->GetGPUDescriptorHandleForHeapStart(); + + // Create single RTV descriptor heap outside the loop (reused for all mip levels) + D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {}; + rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV; + rtvHeapDesc.NumDescriptors = 1; + igl::d3d12::ComPtr rtvHeap; + if (FAILED(device_->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(rtvHeap.GetAddressOf())))) return; + D3D12_CPU_DESCRIPTOR_HANDLE rtvCpu = rtvHeap->GetCPUDescriptorHandleForHeapStart(); + + // Ensure mip 0 is in PIXEL_SHADER_RESOURCE state for first SRV read + // const_cast needed because generateMipmap is const (required by ITexture interface) + // but state tracking is non-const by design + const_cast(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, 0, 0); + + for (UINT mip = 0; mip + 1 < numMipLevels_; ++mip) { + // Calculate descriptor handle for this mip level + D3D12_CPU_DESCRIPTOR_HANDLE srvCpu = srvCpuStart; + srvCpu.ptr += mip * srvDescriptorSize; + D3D12_GPU_DESCRIPTOR_HANDLE srvGpu = srvGpuStart; + srvGpu.ptr += mip * srvDescriptorSize; + + // Pre-creation validation. + IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateShaderResourceView"); + IGL_DEBUG_ASSERT(resource_.Get() != nullptr, "Resource is null before CreateShaderResourceView"); + IGL_DEBUG_ASSERT(srvCpu.ptr != 0, "SRV descriptor handle is invalid"); + + D3D12_SHADER_RESOURCE_VIEW_DESC srv = {}; + srv.Format = resourceDesc.Format; + srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D; + srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING; + srv.Texture2D.MostDetailedMip = mip; + srv.Texture2D.MipLevels = 1; + device_->CreateShaderResourceView(resource_.Get(), &srv, srvCpu); + + // Pre-creation validation. + IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateRenderTargetView"); + IGL_DEBUG_ASSERT(resource_.Get() != nullptr, "Resource is null before CreateRenderTargetView"); + IGL_DEBUG_ASSERT(rtvCpu.ptr != 0, "RTV descriptor handle is invalid"); + + D3D12_RENDER_TARGET_VIEW_DESC rtv = {}; + rtv.Format = resourceDesc.Format; + rtv.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D; + rtv.Texture2D.MipSlice = mip + 1; + + // Reuse the same RTV heap by recreating the view for each mip level + device_->CreateRenderTargetView(resource_.Get(), &rtv, rtvCpu); + + // Transition mip level to render target using state tracking + // const_cast needed (see above). + const_cast(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, mip + 1, 0); + + list->OMSetRenderTargets(1, &rtvCpu, FALSE, nullptr); + const UINT w = std::max(1u, (UINT)(resourceDesc.Width >> (mip + 1))); + const UINT h = std::max(1u, (UINT)(resourceDesc.Height >> (mip + 1))); + D3D12_VIEWPORT vp{0.0f, 0.0f, (FLOAT)w, (FLOAT)h, 0.0f, 1.0f}; + D3D12_RECT sc{0, 0, (LONG)w, (LONG)h}; + list->RSSetViewports(1, &vp); + list->RSSetScissorRects(1, &sc); + list->SetGraphicsRootDescriptorTable(0, srvGpu); + list->SetGraphicsRootDescriptorTable(1, smpGpu); + list->DrawInstanced(3, 1, 0, 0); + + // Transition mip level to shader resource for next iteration + // const_cast needed (see above). + const_cast(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, mip + 1, 0); + } + list->Close(); + ID3D12CommandList* lists[] = {list.Get()}; + queue_->ExecuteCommandLists(1, lists); + igl::d3d12::ComPtr fence; + if (FAILED(device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf())))) return; + queue_->Signal(fence.Get(), 1); + + FenceWaiter waiter(fence.Get(), 1); + Result waitResult = waiter.wait(); + if (!waitResult.isOk()) { + IGL_LOG_ERROR("Texture::generateMipmap(cmdBuffer) - Fence wait failed: %s\n", + waitResult.message.c_str()); + } +} + +void Texture::initializeStateTracking(D3D12_RESOURCE_STATES initialState) { + // Simplified per-subresource state tracking: always use a vector (no dual-mode). + if (!resource_.Get()) { + subresourceStates_.clear(); + return; + } + + const uint32_t mipLevels = static_cast(std::max(numMipLevels_, 1)); + uint32_t arraySize; + if (type_ == TextureType::ThreeD) { + arraySize = 1u; + } else if (type_ == TextureType::Cube) { + arraySize = static_cast(std::max(numLayers_, 1)) * 6u; + } else { + arraySize = static_cast(std::max(numLayers_, 1)); + } + const size_t numSubresources = static_cast(mipLevels) * arraySize; + subresourceStates_.assign(numSubresources, initialState); +} + +uint32_t Texture::calcSubresourceIndex(uint32_t mipLevel, uint32_t layer) const { + // For views, map view-local coordinates to resource coordinates. + // Note: mipLevelOffset_ and arraySliceOffset_ are resource-relative (accumulated at view creation for nested views). + const uint32_t resourceMip = isView_ ? (mipLevel + mipLevelOffset_) : mipLevel; + const uint32_t resourceLayer = isView_ ? (layer + arraySliceOffset_) : layer; + + // Use state owner's dimensions for subresource calculation + const Texture* owner = getStateOwner(); + IGL_DEBUG_ASSERT(owner != nullptr, "State owner must not be null"); + const uint32_t mipLevels = static_cast(std::max(owner->numMipLevels_, 1)); + uint32_t arraySize; + if (owner->type_ == TextureType::ThreeD) { + arraySize = 1u; + } else if (owner->type_ == TextureType::Cube) { + // Cube textures: 6 faces per layer + arraySize = static_cast(std::max(owner->numLayers_, 1)) * 6u; + } else { + arraySize = static_cast(std::max(owner->numLayers_, 1)); + } + const uint32_t clampedMip = std::min(resourceMip, mipLevels - 1); + const uint32_t clampedLayer = std::min(resourceLayer, arraySize - 1); + // D3D12CalcSubresource formula: MipSlice + (ArraySlice * MipLevels) + const uint32_t subresource = clampedMip + (clampedLayer * mipLevels); +#ifdef IGL_DEBUG + // Reduce log verbosity - only log in debug builds for views + if ((type_ == TextureType::Cube || type_ == TextureType::TwoDArray) && isView_) { + IGL_D3D12_LOG_VERBOSE("calcSubresourceIndex (view): type=%d, mip=%u, layer=%u -> resource mip=%u, layer=%u -> subresource=%u\n", + (int)type_, mipLevel, layer, resourceMip, resourceLayer, subresource); + } +#endif + return subresource; +} + +void Texture::transitionTo(ID3D12GraphicsCommandList* commandList, + D3D12_RESOURCE_STATES newState, + uint32_t mipLevel, + uint32_t layer) { + // Simplified per-subresource state tracking. + Texture* owner = getStateOwner(); + if (!commandList || !owner || !owner->resource_.Get() || owner->subresourceStates_.empty()) { + return; + } + + // For depth-stencil textures, transition all subresources (both depth and stencil planes). + const auto props = getProperties(); + const bool isDepthStencil = + props.isDepthOrStencil() && + (props.hasStencil() || format_ == TextureFormat::Z_UNorm24); + + if (isDepthStencil) { + // Verify all subresources are in the same state before using ALL_SUBRESOURCES. + D3D12_RESOURCE_STATES firstState = owner->subresourceStates_[0]; + bool allSameState = true; + for (const auto& state : owner->subresourceStates_) { + if (state != firstState) { + allSameState = false; + IGL_LOG_ERROR("Depth-stencil texture has divergent subresource states - this violates invariant\n"); + break; + } + } + + if (firstState == newState) { + return; // All subresources already in target state + } + + // Safety check: If states have diverged, return early to avoid invalid ALL_SUBRESOURCES barrier. + if (!allSameState) { + IGL_DEBUG_ASSERT(false, "Depth-stencil textures must have uniform state across all subresources"); + return; // Intentionally skip transition to avoid undefined behavior + } + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = owner->resource_.Get(); + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barrier.Transition.StateBefore = firstState; + barrier.Transition.StateAfter = newState; + commandList->ResourceBarrier(1, &barrier); + + // Update all subresource states + for (auto& state : owner->subresourceStates_) { + state = newState; + } + return; + } + + // Non-depth-stencil: transition single subresource + const uint32_t subresource = calcSubresourceIndex(mipLevel, layer); + if (subresource >= owner->subresourceStates_.size()) { + return; + } + + auto& currentState = owner->subresourceStates_[subresource]; + if (currentState == newState) { + return; + } + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = owner->resource_.Get(); + barrier.Transition.Subresource = subresource; + barrier.Transition.StateBefore = currentState; + barrier.Transition.StateAfter = newState; + commandList->ResourceBarrier(1, &barrier); + + currentState = newState; +} + +void Texture::transitionAll(ID3D12GraphicsCommandList* commandList, + D3D12_RESOURCE_STATES newState) { + // Simplified per-subresource state tracking. + Texture* owner = getStateOwner(); + if (!commandList || !owner || !owner->resource_.Get() || owner->subresourceStates_.empty()) { + return; + } + + // For depth-stencil textures (multi-plane in D3D12), keep all planes and + // mips in a single coherent state by using an ALL_SUBRESOURCES barrier. + // This avoids mismatches like depth in DEPTH_WRITE while stencil (plane 1) + // remains in COMMON/PRESENT, which triggers the D3D12 debug error + // INVALID_SUBRESOURCE_STATE on ClearDepthStencilView. + const auto props = getProperties(); + const bool isDepthStencil = + props.isDepthOrStencil() && + (props.hasStencil() || format_ == TextureFormat::Z_UNorm24); + + if (isDepthStencil) { + D3D12_RESOURCE_STATES firstState = owner->subresourceStates_[0]; + bool allSameState = true; + for (const auto& state : owner->subresourceStates_) { + if (state != firstState) { + allSameState = false; + IGL_LOG_ERROR( + "Texture::transitionAll - depth-stencil texture has divergent subresource states; " + "expected uniform state before ALL_SUBRESOURCES barrier\n"); + break; + } + } + + if (firstState == newState) { + // All subresources (planes/mips) already in the requested state. + return; + } + + if (!allSameState) { + // Safety: avoid issuing an ALL_SUBRESOURCES barrier with inconsistent + // tracking; this would make our internal state unreliable. + IGL_DEBUG_ASSERT( + false, + "Texture::transitionAll - depth-stencil textures must have uniform state across all " + "subresources before ALL_SUBRESOURCES transition"); + return; + } + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = owner->resource_.Get(); + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barrier.Transition.StateBefore = firstState; + barrier.Transition.StateAfter = newState; + commandList->ResourceBarrier(1, &barrier); + + for (auto& state : owner->subresourceStates_) { + state = newState; + } + return; + } + + // Check if all subresources are already in the target state + bool allMatch = true; + for (const auto& state : owner->subresourceStates_) { + if (state != newState) { + allMatch = false; + break; + } + } + if (allMatch) { + return; + } + + // Transition each subresource individually + for (size_t i = 0; i < owner->subresourceStates_.size(); ++i) { + auto& state = owner->subresourceStates_[i]; + if (state == newState) { + continue; + } + + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE; + barrier.Transition.pResource = owner->resource_.Get(); + barrier.Transition.Subresource = static_cast(i); + barrier.Transition.StateBefore = state; + barrier.Transition.StateAfter = newState; + commandList->ResourceBarrier(1, &barrier); + + state = newState; + } +} + +D3D12_RESOURCE_STATES Texture::getSubresourceState(uint32_t mipLevel, uint32_t layer) const { + // Simplified per-subresource state tracking. + const Texture* owner = getStateOwner(); + if (owner->subresourceStates_.empty()) { + return D3D12_RESOURCE_STATE_COMMON; + } + + const uint32_t index = calcSubresourceIndex(mipLevel, layer); + if (index >= owner->subresourceStates_.size()) { + return D3D12_RESOURCE_STATE_COMMON; + } + + return owner->subresourceStates_[index]; +} + +} // namespace igl::d3d12 + + diff --git a/src/igl/d3d12/Texture.h b/src/igl/d3d12/Texture.h new file mode 100644 index 0000000000..a2589659a6 --- /dev/null +++ b/src/igl/d3d12/Texture.h @@ -0,0 +1,143 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace igl::d3d12 { + +class Texture final : public ITexture { + public: + Texture() : ITexture(TextureFormat::Invalid), format_(TextureFormat::Invalid) {} + explicit Texture(TextureFormat format) : ITexture(format), format_(format) {} + + // Explicit destructor to free descriptor heap slots. + ~Texture() override; + + // Factory method to create texture from existing D3D12 resource + static std::shared_ptr createFromResource( + ID3D12Resource* resource, + TextureFormat format, + const TextureDesc& desc, + ID3D12Device* device = nullptr, + ID3D12CommandQueue* queue = nullptr, + D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COMMON, + class Device* iglDevice = nullptr); + + // Factory method to create texture view from parent texture + static std::shared_ptr createTextureView( + std::shared_ptr parent, + const TextureViewDesc& desc); + + // D3D12-specific upload methods (not part of ITexture interface) + Result upload(const TextureRangeDesc& range, + const void* data, + size_t bytesPerRow = 0) const; + Result uploadCube(const TextureRangeDesc& range, + TextureCubeFace face, + const void* data, + size_t bytesPerRow = 0) const; + + Dimensions getDimensions() const override; + uint32_t getNumLayers() const override; + TextureType getType() const override; + TextureDesc::TextureUsage getUsage() const override; + uint32_t getSamples() const override; + uint32_t getNumMipLevels() const override; + uint64_t getTextureId() const override; + bool isRequiredGenerateMipmap() const override; + + void generateMipmap(ICommandQueue& cmdQueue, + const TextureRangeDesc* IGL_NULLABLE range = nullptr) const override; + void generateMipmap(ICommandBuffer& cmdBuffer, + const TextureRangeDesc* IGL_NULLABLE range = nullptr) const override; + + // D3D12-specific accessors (not part of ITexture interface) + TextureFormat getFormat() const; + ID3D12Resource* getResource() const { return resource_.Get(); } + // State transition methods are non-const (state updates not allowed in const methods) + void transitionTo(ID3D12GraphicsCommandList* commandList, + D3D12_RESOURCE_STATES newState, + uint32_t mipLevel = 0, + uint32_t layer = 0); + void transitionAll(ID3D12GraphicsCommandList* commandList, + D3D12_RESOURCE_STATES newState); + D3D12_RESOURCE_STATES getSubresourceState(uint32_t mipLevel = 0, + uint32_t layer = 0) const; + + // Texture view support + bool isView() const { return isView_; } + uint32_t getMipLevelOffset() const { return mipLevelOffset_; } + uint32_t getNumMipLevelsInView() const { return numMipLevelsInView_; } + uint32_t getArraySliceOffset() const { return arraySliceOffset_; } + uint32_t getNumArraySlicesInView() const { return numArraySlicesInView_; } + + // Subresource calculation helper + uint32_t calcSubresourceIndex(uint32_t mipLevel, uint32_t layer) const; + + protected: + // Override the base class upload method + Result uploadInternal(TextureType type, + const TextureRangeDesc& range, + const void* data, + size_t bytesPerRow = 0, + const uint32_t* mipLevelBytes = nullptr) const override; + + private: + igl::d3d12::ComPtr resource_; + ID3D12Device* device_ = nullptr; // Non-owning pointer + ID3D12CommandQueue* queue_ = nullptr; // Non-owning pointer. + class Device* iglDevice_ = nullptr; // Non-owning pointer to igl::d3d12::Device for upload operations. + TextureFormat format_; + Dimensions dimensions_{0, 0, 0}; + TextureType type_ = TextureType::TwoD; + size_t numLayers_ = 1; + size_t numMipLevels_ = 1; + size_t samples_ = 1; + TextureDesc::TextureUsage usage_ = 0; + void initializeStateTracking(D3D12_RESOURCE_STATES initialState); + + // Simplified per-subresource state tracking. + // Views delegate state tracking to their root texture; only root textures maintain state. + // Always uses a per-subresource vector for simplicity (no dual-mode complexity). + std::vector subresourceStates_; + + // Helper to get the texture that owns state tracking (walks to root for nested views) + Texture* getStateOwner() { + Texture* owner = this; + while (owner->isView_ && owner->parentTexture_) { + owner = owner->parentTexture_.get(); + } + return owner; + } + const Texture* getStateOwner() const { + const Texture* owner = this; + while (owner->isView_ && owner->parentTexture_) { + owner = owner->parentTexture_.get(); + } + return owner; + } + + // Texture view support + bool isView_ = false; + std::shared_ptr parentTexture_; // For views, reference to parent + uint32_t mipLevelOffset_ = 0; // MostDetailedMip for SRV + uint32_t numMipLevelsInView_ = 0; // MipLevels for SRV + uint32_t arraySliceOffset_ = 0; // FirstArraySlice for SRV + uint32_t numArraySlicesInView_ = 0; // ArraySize for SRV + + // Descriptor indices for cleanup in destructor. + // These descriptors are allocated from DescriptorHeapManager and must be freed. + std::vector rtvIndices_; // RTV descriptors (one per mip level) + std::vector dsvIndices_; // DSV descriptors (for depth/stencil textures) + uint32_t srvIndex_ = UINT32_MAX; // SRV descriptor (UINT32_MAX = not allocated) +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/TextureCopyUtils.cpp b/src/igl/d3d12/TextureCopyUtils.cpp new file mode 100644 index 0000000000..b12768fb95 --- /dev/null +++ b/src/igl/d3d12/TextureCopyUtils.cpp @@ -0,0 +1,279 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace igl::d3d12::TextureCopyUtils { + +Result executeCopyTextureToBuffer(D3D12Context& ctx, + Device& iglDevice, + Texture& srcTex, + Buffer& dstBuf, + uint64_t destinationOffset, + uint32_t mipLevel, + uint32_t layer) { + ID3D12Resource* srcRes = srcTex.getResource(); + ID3D12Resource* dstRes = dstBuf.getResource(); + + if (!srcRes || !dstRes) { + return Result{Result::Code::ArgumentInvalid, "Invalid source or destination resource"}; + } + + ID3D12Device* device = ctx.getDevice(); + + if (!device) { + return Result{Result::Code::RuntimeError, "Device is null"}; + } + + // Get texture description for GetCopyableFootprints + D3D12_RESOURCE_DESC srcDesc = srcRes->GetDesc(); + + // Calculate subresource index + const uint32_t subresource = srcTex.calcSubresourceIndex(mipLevel, layer); + + // Get copyable footprint for this subresource + D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout = {}; + UINT numRows = 0; + UINT64 rowSizeInBytes = 0; + UINT64 totalBytes = 0; + + device->GetCopyableFootprints(&srcDesc, + subresource, + 1, + destinationOffset, + &layout, + &numRows, + &rowSizeInBytes, + &totalBytes); + + // Calculate the unpacked texture data size (without D3D12 padding) + // rowSizeInBytes is the unpadded row size, so we can use it directly + const UINT64 unpackedDataSize = rowSizeInBytes * numRows * layout.Footprint.Depth; + + // Check if destination buffer is large enough for the unpacked data + if (destinationOffset + unpackedDataSize > dstBuf.getSizeInBytes()) { + return Result{Result::Code::ArgumentOutOfRange, "Destination buffer too small"}; + } + + // Use centralized staging device for readback buffer allocation. + auto* stagingDevice = iglDevice.getStagingDevice(); + if (!stagingDevice) { + return Result{Result::Code::RuntimeError, "Staging device not available"}; + } + + // Allocate readback staging buffer (D3D12 requires row-pitch alignment) + auto staging = stagingDevice->allocateReadback(layout.Offset + totalBytes); + if (!staging.valid) { + return Result{Result::Code::RuntimeError, "Failed to allocate readback staging buffer"}; + } + + ID3D12Resource* readbackBuffer = staging.buffer.Get(); + ID3D12Resource* copyDestination = readbackBuffer; + + // Use centralized immediate commands instead of creating transient allocator/list. + auto* immediateCommands = iglDevice.getImmediateCommands(); + if (!immediateCommands) { + return Result{Result::Code::RuntimeError, "Immediate commands not available"}; + } + + Result cmdResult; + ID3D12GraphicsCommandList* cmdList = immediateCommands->begin(&cmdResult); + if (!cmdList || !cmdResult.isOk()) { + return Result{Result::Code::RuntimeError, "Failed to begin immediate command list"}; + } + + // Get current texture state (for restoration after the copy) + const D3D12_RESOURCE_STATES srcStateBefore = srcTex.getSubresourceState(mipLevel, layer); + + // Transition texture to COPY_SOURCE using centralized state tracking so + // that subsequent transitions observe a consistent state across all + // command lists and avoid BEFORE/AFTER mismatches. + srcTex.transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_SOURCE, mipLevel, layer); + + // Setup source texture copy location + D3D12_TEXTURE_COPY_LOCATION srcLocation = {}; + srcLocation.pResource = srcRes; + srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX; + srcLocation.SubresourceIndex = subresource; + + // Setup destination buffer copy location + D3D12_TEXTURE_COPY_LOCATION dstLocation = {}; + dstLocation.pResource = copyDestination; + dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT; + dstLocation.PlacedFootprint = layout; + + // Perform the copy + cmdList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr); + + // Transition texture back to original state using the same tracking path. + srcTex.transitionTo(cmdList, srcStateBefore, mipLevel, layer); + + // Submit via immediate commands with synchronous wait. + Result submitResult; + const uint64_t fenceValue = immediateCommands->submit(true, &submitResult); + if (!submitResult.isOk() || fenceValue == 0) { + return Result{Result::Code::RuntimeError, + "Failed to submit immediate commands: " + submitResult.message}; + } + + // Copy from readback staging buffer to final destination + void* readbackData = nullptr; + // Map the readback buffer region containing the texture data + D3D12_RANGE readRange{static_cast(layout.Offset), + static_cast(layout.Offset + totalBytes)}; + + if (SUCCEEDED(readbackBuffer->Map(0, &readRange, &readbackData)) && readbackData) { + // Check if destination buffer is in DEFAULT heap (Storage buffers) + // We cannot call map() on DEFAULT heap buffers because Buffer::map() would + // create its own staging buffer and copy FROM (empty) DEFAULT buffer first + D3D12_HEAP_PROPERTIES heapProps; + dstRes->GetHeapProperties(&heapProps, nullptr); + const bool isDefaultHeap = (heapProps.Type == D3D12_HEAP_TYPE_DEFAULT); + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("copyTextureToBuffer: Destination heap type = %d (1=DEFAULT, 2=UPLOAD, 3=READBACK), isDefaultHeap=%d\n", + heapProps.Type, isDefaultHeap); +#endif + + if (!isDefaultHeap) { + // Destination is CPU-mappable (UPLOAD/READBACK heap) - copy via CPU + // Copy row-by-row, removing D3D12's row pitch padding + Result mapResult; + void* dstData = dstBuf.map(BufferRange(unpackedDataSize, destinationOffset), &mapResult); + if (dstData && mapResult.isOk()) { + const uint8_t* src = static_cast(readbackData) + layout.Offset; + uint8_t* dst = static_cast(dstData); + const UINT64 srcRowPitch = layout.Footprint.RowPitch; + const UINT64 dstRowPitch = rowSizeInBytes; // Unpadded row size + + for (UINT z = 0; z < layout.Footprint.Depth; ++z) { + for (UINT row = 0; row < numRows; ++row) { + std::memcpy(dst, src, dstRowPitch); + src += srcRowPitch; + dst += dstRowPitch; + } + } + + dstBuf.unmap(); + } else { + readbackBuffer->Unmap(0, nullptr); + return Result{Result::Code::RuntimeError, "Failed to map destination buffer"}; + } + } else { + // Destination is NOT CPU-mappable (DEFAULT heap) - need GPU copy + // Create temporary UPLOAD buffer with unpacked data, then GPU copy to destination + D3D12_HEAP_PROPERTIES uploadHeap{}; + uploadHeap.Type = D3D12_HEAP_TYPE_UPLOAD; + + D3D12_RESOURCE_DESC uploadDesc{}; + uploadDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + uploadDesc.Width = unpackedDataSize; + uploadDesc.Height = 1; + uploadDesc.DepthOrArraySize = 1; + uploadDesc.MipLevels = 1; + uploadDesc.Format = DXGI_FORMAT_UNKNOWN; + uploadDesc.SampleDesc.Count = 1; + uploadDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + + igl::d3d12::ComPtr uploadBuffer; + HRESULT hr = device->CreateCommittedResource(&uploadHeap, + D3D12_HEAP_FLAG_NONE, + &uploadDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(uploadBuffer.GetAddressOf())); + if (FAILED(hr)) { + readbackBuffer->Unmap(0, nullptr); + return Result{Result::Code::RuntimeError, "Failed to create upload buffer"}; + } + + // Map upload buffer and unpack data from readback + void* uploadData = nullptr; + if (SUCCEEDED(uploadBuffer->Map(0, nullptr, &uploadData)) && uploadData) { + const uint8_t* src = static_cast(readbackData) + layout.Offset; + uint8_t* dst = static_cast(uploadData); + const UINT64 srcRowPitch = layout.Footprint.RowPitch; + const UINT64 dstRowPitch = rowSizeInBytes; + + for (UINT z = 0; z < layout.Footprint.Depth; ++z) { + for (UINT row = 0; row < numRows; ++row) { + std::memcpy(dst, src, dstRowPitch); + src += srcRowPitch; + dst += dstRowPitch; + } + } + uploadBuffer->Unmap(0, nullptr); + + // GPU copy from upload buffer to destination DEFAULT buffer using immediate commands. + Result gpuCopyResult; + ID3D12GraphicsCommandList* copyList = immediateCommands->begin(&gpuCopyResult); + if (!copyList || !gpuCopyResult.isOk()) { + readbackBuffer->Unmap(0, nullptr); + return Result{Result::Code::RuntimeError, "Failed to begin immediate command list for GPU copy"}; + } + + // Transition destination buffer to COPY_DEST state + D3D12_RESOURCE_BARRIER barrier = {}; + barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION; + barrier.Transition.pResource = dstRes; + barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES; + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST; + copyList->ResourceBarrier(1, &barrier); + + // Copy unpacked data to destination +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("copyTextureToBuffer: GPU copy %llu bytes from upload buffer to DEFAULT buffer at offset %llu\n", + unpackedDataSize, destinationOffset); +#endif + copyList->CopyBufferRegion(dstRes, destinationOffset, uploadBuffer.Get(), 0, unpackedDataSize); + + // Transition destination buffer back to UAV state (Storage buffer) + barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; + barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS; + copyList->ResourceBarrier(1, &barrier); + + // Submit and wait for GPU copy. + Result copySubmitResult; + const uint64_t copyFenceValue = immediateCommands->submit(true, ©SubmitResult); +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("copyTextureToBuffer: GPU copy complete!\n"); +#endif + if (!copySubmitResult.isOk() || copyFenceValue == 0) { + readbackBuffer->Unmap(0, nullptr); + return Result{Result::Code::RuntimeError, + "Failed to submit GPU copy: " + copySubmitResult.message}; + } + } else { + readbackBuffer->Unmap(0, nullptr); + return Result{Result::Code::RuntimeError, "Failed to map upload buffer"}; + } + } + + readbackBuffer->Unmap(0, nullptr); + } else { + return Result{Result::Code::RuntimeError, "Failed to map readback buffer"}; + } + + // Return staging buffer to pool. + stagingDevice->free(staging, fenceValue); + + return Result{}; +} + +} // namespace igl::d3d12::TextureCopyUtils diff --git a/src/igl/d3d12/TextureCopyUtils.h b/src/igl/d3d12/TextureCopyUtils.h new file mode 100644 index 0000000000..d94a75c423 --- /dev/null +++ b/src/igl/d3d12/TextureCopyUtils.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +struct ID3D12Device; +struct ID3D12CommandQueue; +struct ID3D12Resource; + +namespace igl::d3d12 { + +class D3D12Context; +class Device; +class Texture; +class Buffer; + +namespace TextureCopyUtils { + +/** + * Executes a texture-to-buffer copy operation. + * Handles D3D12 row-pitch alignment, readback staging, and unpacking. + * + * @param ctx D3D12 context for device/queue access + * @param iglDevice IGL device for command allocator pooling + * @param srcTex Source texture to copy from + * @param dstBuf Destination buffer to copy to + * @param destinationOffset Offset in bytes into destination buffer + * @param mipLevel Mipmap level to copy from source texture + * @param layer Array layer to copy from source texture + * @return Result indicating success or failure + */ +[[nodiscard]] Result executeCopyTextureToBuffer(D3D12Context& ctx, + Device& iglDevice, + Texture& srcTex, + Buffer& dstBuf, + uint64_t destinationOffset, + uint32_t mipLevel, + uint32_t layer); + +} // namespace TextureCopyUtils +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/Timer.cpp b/src/igl/d3d12/Timer.cpp new file mode 100644 index 0000000000..7c922a1ced --- /dev/null +++ b/src/igl/d3d12/Timer.cpp @@ -0,0 +1,234 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include + +#include +#include + +namespace igl::d3d12 { + +Timer::Timer(const Device& device) { + auto& ctx = device.getD3D12Context(); + auto* d3dDevice = ctx.getDevice(); + auto* commandQueue = ctx.getCommandQueue(); + + // Query GPU timestamp frequency + // This returns the number of ticks per second for GPU timestamps + HRESULT hr = commandQueue->GetTimestampFrequency(×tampFrequency_); + if (FAILED(hr)) { + IGL_LOG_ERROR("Timer: Failed to get timestamp frequency (0x%08X). Timer disabled.\n", hr); + resourceCreationFailed_ = true; + timestampFrequency_ = 0; // Leave at 0 to indicate timer is disabled + return; + } + + // Create query heap for 2 timestamps (begin and end). + // Use D3D12_QUERY_HEAP_TYPE_TIMESTAMP for GPU timer queries. + D3D12_QUERY_HEAP_DESC queryHeapDesc = {}; + queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP; + queryHeapDesc.Count = 2; // Begin and end timestamps + queryHeapDesc.NodeMask = 0; // Single GPU + + hr = d3dDevice->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(queryHeap_.GetAddressOf())); + if (FAILED(hr)) { + IGL_LOG_ERROR("Timer: Failed to create query heap (0x%08X). Timer disabled.\n", hr); + resourceCreationFailed_ = true; + timestampFrequency_ = 0; + return; + } + + // Create readback buffer to hold query results + // Must use READBACK heap type for CPU access + D3D12_HEAP_PROPERTIES heapProps = {}; + heapProps.Type = D3D12_HEAP_TYPE_READBACK; + heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN; + heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN; + heapProps.CreationNodeMask = 1; + heapProps.VisibleNodeMask = 1; + + D3D12_RESOURCE_DESC resourceDesc = {}; + resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + resourceDesc.Alignment = 0; + resourceDesc.Width = 2 * sizeof(uint64_t); // Space for 2 timestamps + resourceDesc.Height = 1; + resourceDesc.DepthOrArraySize = 1; + resourceDesc.MipLevels = 1; + resourceDesc.Format = DXGI_FORMAT_UNKNOWN; + resourceDesc.SampleDesc.Count = 1; + resourceDesc.SampleDesc.Quality = 0; + resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + hr = d3dDevice->CreateCommittedResource( + &heapProps, + D3D12_HEAP_FLAG_NONE, + &resourceDesc, + D3D12_RESOURCE_STATE_COPY_DEST, // Readback buffers must be in COPY_DEST state + nullptr, + IID_PPV_ARGS(readbackBuffer_.GetAddressOf())); + + if (FAILED(hr)) { + IGL_LOG_ERROR("Timer: Failed to create readback buffer (0x%08X). Timer disabled.\n", hr); + resourceCreationFailed_ = true; + timestampFrequency_ = 0; + queryHeap_.Reset(); // Clean up partially created resources + return; + } + +#ifdef IGL_DEBUG + IGL_D3D12_LOG_VERBOSE("Timer: Created successfully (frequency: %llu Hz)\n", timestampFrequency_); +#endif +} + +Timer::~Timer() { + // ComPtr handles cleanup automatically +} + +void Timer::begin(ID3D12GraphicsCommandList* commandList) { + if (resourceCreationFailed_ || timestampFrequency_ == 0) { + // Timer disabled due to resource creation or frequency query failure - silently no-op + return; + } + + if (!commandList) { + IGL_LOG_ERROR("Timer::begin() called with null command list\n"); + return; + } + + // Record begin timestamp (index 0) at the start of GPU work. + // This is a bottom-of-pipe operation that samples when the GPU finishes preceding work. + commandList->EndQuery(queryHeap_.Get(), D3D12_QUERY_TYPE_TIMESTAMP, 0); +} + +void Timer::end(ID3D12GraphicsCommandList* commandList, ID3D12Fence* fence, uint64_t fenceValue) { + if (resourceCreationFailed_ || timestampFrequency_ == 0) { + // Timer disabled - silently no-op + return; + } + + if (!commandList) { + IGL_LOG_ERROR("Timer::end() called with null command list\n"); + return; + } + + if (!fence) { + IGL_LOG_ERROR("Timer::end() called with null fence\n"); + return; + } + + if (ended_.load(std::memory_order_acquire)) { + IGL_LOG_ERROR("Timer::end() called multiple times\n"); + return; + } + + // Record end timestamp (index 1) at the end of GPU work. + // Bottom-of-pipe operation: samples when the GPU finishes all preceding work. + commandList->EndQuery(queryHeap_.Get(), D3D12_QUERY_TYPE_TIMESTAMP, 1); + + // Resolve query data to the readback buffer. + // This GPU command copies timestamp values from the query heap to a CPU-readable buffer; + // the resolved data is only valid after the fence signals completion. + commandList->ResolveQueryData( + queryHeap_.Get(), + D3D12_QUERY_TYPE_TIMESTAMP, + 0, // Start index + 2, // Count (begin + end) + readbackBuffer_.Get(), + 0 // Destination offset + ); + + // Store fence and fence value for later completion checking. + // Thread-safe: fence_ is written once; atomics ensure visibility. + fence_ = fence; + fenceValue_.store(fenceValue, std::memory_order_release); + ended_.store(true, std::memory_order_release); +} + +uint64_t Timer::getElapsedTimeNanos() const { + if (!readbackBuffer_.Get() || !ended_.load(std::memory_order_acquire)) { + return 0; + } + + // Check if the fence has signaled; results are only valid after GPU completes. + // Thread-safe: fence_ is set once before the ended_ flag, and memory ordering ensures visibility. + uint64_t fenceVal = fenceValue_.load(std::memory_order_acquire); + if (!fence_ || fence_->GetCompletedValue() < fenceVal) { + return 0; // GPU hasn't finished yet, return 0 + } + + // If we've already resolved and cached the result, return it. + // Thread-safe: resolved_ flag prevents multiple threads from mapping simultaneously. + if (resolved_.load(std::memory_order_acquire)) { + return cachedElapsedNanos_.load(std::memory_order_relaxed); + } + + // GPU has completed; it is now safe to read the query results. + // Map the readback buffer to read timestamp values. + void* mappedData = nullptr; + D3D12_RANGE readRange{0, sizeof(uint64_t) * 2}; // Only read the 2 timestamps + HRESULT hr = readbackBuffer_->Map(0, &readRange, &mappedData); + if (FAILED(hr)) { + IGL_LOG_ERROR("Timer: Failed to map readback buffer: 0x%08X\n", hr); + return 0; + } + + // Read timestamp values + const auto* timestamps = static_cast(mappedData); + uint64_t beginTime = timestamps[0]; + uint64_t endTime = timestamps[1]; + + // Unmap buffer + D3D12_RANGE writeRange{0, 0}; // No writes + readbackBuffer_->Unmap(0, &writeRange); + + // Validate timestamp data + if (endTime <= beginTime) { +#ifdef IGL_DEBUG + IGL_LOG_ERROR("Timer: Invalid timestamp data (begin=%llu, end=%llu) - GPU work may not have executed\n", + beginTime, endTime); +#endif + return 0; + } + + if (timestampFrequency_ == 0) { +#ifdef IGL_DEBUG + IGL_LOG_ERROR("Timer: Invalid timestamp frequency (0 Hz) - timer disabled\n"); +#endif + return 0; + } + + // Calculate elapsed time in GPU ticks + uint64_t deltaTicks = endTime - beginTime; + + // Convert ticks to nanoseconds using floating-point math for accuracy, + // as recommended by Microsoft docs: nanoseconds = (ticks / frequency) * 1,000,000,000. + const double nanosPerSecond = 1000000000.0; + double elapsedNanos = (static_cast(deltaTicks) / static_cast(timestampFrequency_)) * nanosPerSecond; + + // Cache the result so we don't re-read from GPU. + // Thread-safe: store cached value before setting the resolved flag. + cachedElapsedNanos_.store(static_cast(elapsedNanos), std::memory_order_release); + resolved_.store(true, std::memory_order_release); + + return static_cast(elapsedNanos); +} + +bool Timer::resultsAvailable() const { + // Results are available only after the fence has signaled completion. + // This ensures we don't read uninitialized or garbage data from the query heap. + // Thread-safe: use atomic loads with proper memory ordering. + if (!ended_.load(std::memory_order_acquire) || !fence_) { + return false; + } + + // Check if GPU has completed execution (fence signaled) + uint64_t fenceVal = fenceValue_.load(std::memory_order_acquire); + return fence_->GetCompletedValue() >= fenceVal; +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/Timer.h b/src/igl/d3d12/Timer.h new file mode 100644 index 0000000000..104f04d9e5 --- /dev/null +++ b/src/igl/d3d12/Timer.h @@ -0,0 +1,90 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace igl::d3d12 { + +class Device; + +/// @brief GPU timer implementation using D3D12 timestamp queries +/// @details Implements ITimer interface for D3D12 backend using query heaps. +/// +/// Lifecycle: +/// - Constructor creates query heap and readback buffer resources +/// - begin() called when command list is reset for recording (CommandBuffer::begin()) +/// - GPU work is encoded in the command list +/// - end() called during submission before command list is closed (CommandQueue::submit()) +/// - Query results are fence-synchronized and only read after GPU completes +/// +/// Cross-platform timestamp semantics +/// ---------------------------------- +/// All timestamps returned by getElapsedTimeNanos() are in nanoseconds, providing +/// cross-platform consistency with Vulkan and other backends. +/// +/// D3D12 GPU timestamps are automatically converted from hardware ticks to nanoseconds +/// using the GPU timestamp frequency (ID3D12CommandQueue::GetTimestampFrequency()). +/// +/// Formula: elapsedNanos = (endTicks - startTicks) * 1,000,000,000 / frequencyHz. +/// +/// This ensures consistent timing across all IGL backends regardless of hardware. +/// +/// The implementation measures GPU execution time via timestamp placement +/// and fence-synchronized readback, and is safe for cross-thread queries. +class Timer final : public ITimer { + public: + /// @brief Constructor - creates query heap and readback buffer, starts timer + /// @param device D3D12 device used to create resources + explicit Timer(const Device& device); + ~Timer() override; + + Timer(const Timer&) = delete; + Timer& operator=(const Timer&) = delete; + Timer(Timer&&) = delete; + Timer& operator=(Timer&&) = delete; + + /// @brief Record start timestamp in command list + /// @param commandList D3D12 command list to record start timestamp + void begin(ID3D12GraphicsCommandList* commandList); + + /// @brief Record end timestamp and associate with fence value + /// @param commandList D3D12 command list to record end timestamp and resolve queries + /// @param fence Fence to check for GPU completion + /// @param fenceValue Fence value that will be signaled when GPU completes + void end(ID3D12GraphicsCommandList* commandList, ID3D12Fence* fence, uint64_t fenceValue); + + /// @brief Returns elapsed GPU time in nanoseconds + /// @return Elapsed time in nanoseconds, or 0 if results not yet available + [[nodiscard]] uint64_t getElapsedTimeNanos() const override; + + /// @brief Check if timer results are available + /// @return true if results can be read without blocking (fence has signaled) + [[nodiscard]] bool resultsAvailable() const override; + + private: + igl::d3d12::ComPtr queryHeap_; + igl::d3d12::ComPtr readbackBuffer_; + uint64_t timestampFrequency_ = 0; // GPU timestamp frequency (ticks per second), 0 = timer disabled + bool resourceCreationFailed_ = false; // Track if constructor failed to create resources + + // Fence synchronization for accurate GPU timing. + // Thread-safe: use atomics to allow safe cross-thread queries. + ID3D12Fence* fence_ = nullptr; // Fence to check completion (not owned, set once in end()) + std::atomic fenceValue_{0}; // Fence value when timer ended + mutable std::atomic resolved_{false}; // Has query data been resolved and cached? (mutable for lazy resolution in const getter) + std::atomic ended_{false}; // Has end() been called? + + // Cached results to avoid re-reading from GPU. + // Thread-safe: only written once after the fence signals, then immutable (mutable for lazy resolution in const getter). + mutable std::atomic cachedElapsedNanos_{0}; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/UploadRingBuffer.cpp b/src/igl/d3d12/UploadRingBuffer.cpp new file mode 100644 index 0000000000..52a97d3149 --- /dev/null +++ b/src/igl/d3d12/UploadRingBuffer.cpp @@ -0,0 +1,284 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include +#include + +namespace igl::d3d12 { + +UploadRingBuffer::UploadRingBuffer(ID3D12Device* device, uint64_t size) + : device_(device), size_(size) { + if (!device_) { + IGL_LOG_ERROR("UploadRingBuffer: Device is null\n"); + return; + } + + // Create large upload heap + D3D12_HEAP_PROPERTIES uploadHeapProps = {}; + uploadHeapProps.Type = D3D12_HEAP_TYPE_UPLOAD; + uploadHeapProps.CreationNodeMask = 1; + uploadHeapProps.VisibleNodeMask = 1; + + D3D12_RESOURCE_DESC bufferDesc = {}; + bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER; + bufferDesc.Alignment = 0; + bufferDesc.Width = size_; + bufferDesc.Height = 1; + bufferDesc.DepthOrArraySize = 1; + bufferDesc.MipLevels = 1; + bufferDesc.Format = DXGI_FORMAT_UNKNOWN; + bufferDesc.SampleDesc.Count = 1; + bufferDesc.SampleDesc.Quality = 0; + bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR; + bufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE; + + HRESULT hr = device_->CreateCommittedResource( + &uploadHeapProps, + D3D12_HEAP_FLAG_NONE, + &bufferDesc, + D3D12_RESOURCE_STATE_GENERIC_READ, + nullptr, + IID_PPV_ARGS(uploadHeap_.GetAddressOf())); + + if (FAILED(hr)) { + IGL_LOG_ERROR("UploadRingBuffer: Failed to create upload heap (HRESULT=0x%08X)\n", hr); + return; + } + + // Map the entire buffer persistently + D3D12_RANGE readRange = {0, 0}; // Not reading from GPU + hr = uploadHeap_->Map(0, &readRange, &cpuBase_); + if (FAILED(hr)) { + IGL_LOG_ERROR("UploadRingBuffer: Failed to map upload heap (HRESULT=0x%08X)\n", hr); + cpuBase_ = nullptr; + return; + } + + gpuBase_ = uploadHeap_->GetGPUVirtualAddress(); + + IGL_D3D12_LOG_VERBOSE("UploadRingBuffer: Created ring buffer (size=%llu MB, cpuBase=%p, gpuBase=0x%llX)\n", + size_ / (1024 * 1024), cpuBase_, gpuBase_); + + // Track resource creation + D3D12Context::trackResourceCreation("UploadRingBuffer", size_); +} + +UploadRingBuffer::~UploadRingBuffer() { + if (uploadHeap_.Get() && cpuBase_) { + uploadHeap_->Unmap(0, nullptr); + cpuBase_ = nullptr; + } + + if (uploadHeap_.Get()) { + // Track resource destruction + D3D12Context::trackResourceDestruction("UploadRingBuffer", size_); + } + + IGL_D3D12_LOG_VERBOSE("UploadRingBuffer: Destroyed (allocations=%llu, failures=%llu)\n", + allocationCount_, failureCount_); +} + +UploadRingBuffer::Allocation UploadRingBuffer::allocate(uint64_t size, uint64_t alignment, uint64_t fenceValue) { + std::lock_guard lock(mutex_); + + if (!uploadHeap_.Get() || !cpuBase_) { + IGL_LOG_ERROR("UploadRingBuffer::allocate: Ring buffer not initialized\n"); + failureCount_++; + return Allocation{}; + } + + if (size == 0) { + IGL_LOG_ERROR("UploadRingBuffer::allocate: Size is zero\n"); + failureCount_++; + return Allocation{}; + } + + // Align size up for proper alignment of next allocation + const uint64_t alignedSize = alignUp(size, alignment); + + // Invariants (all protected by mutex_): + // - head_ is the next free offset where a new allocation can start + // - tail_ is the offset of the oldest in-flight allocation (or equals head_ when empty) + // - pendingAllocations_ is a queue of all in-flight allocations in submission order + // - When pendingAllocations_.empty(), the entire buffer is free: tail_ == head_ + const bool bufferEmpty = pendingAllocations_.empty(); + const uint64_t currentHead = head_; + const uint64_t currentTail = bufferEmpty ? currentHead : tail_; + + // Detect full ring: head == tail with in-flight allocations means buffer is completely occupied + const bool bufferFull = !bufferEmpty && (currentHead == currentTail); + + if (bufferFull) { + // Ring buffer is completely full - no free space available + failureCount_++; + IGL_D3D12_LOG_VERBOSE("UploadRingBuffer: Ring buffer completely full (size=%llu)\n", size_); + return Allocation{}; + } + + // Align head to requested alignment + const uint64_t alignedHead = alignUp(currentHead, alignment); + + // Determine available free space based on buffer state + // When empty: entire buffer is available starting from head_ + // When head > tail: in-flight region spans [tail, head); free space is [head, size_) and [0, tail) + // When head < tail: in-flight region spans [tail, size_) + [0, head); free space is [head, tail) + + bool canFit = false; + uint64_t allocationOffset = alignedHead; + + if (bufferEmpty) { + // Entire buffer is free + if (alignedHead + alignedSize <= size_) { + canFit = true; + allocationOffset = alignedHead; + } else if (alignedSize <= size_) { + // Wrap to beginning + allocationOffset = 0; + canFit = true; + } + } else if (currentHead >= currentTail) { + // In-flight allocations have wrapped around: free regions are [head, size_) and [0, tail) + if (alignedHead + alignedSize <= size_) { + // Fits at current head position + canFit = true; + allocationOffset = alignedHead; + } else if (alignedSize <= currentTail) { + // Wrap around to beginning + allocationOffset = 0; + canFit = true; + } + } else { + // In-flight allocations have not wrapped: free space is [head, tail) + if (alignedHead + alignedSize <= currentTail) { + canFit = true; + allocationOffset = alignedHead; + } + } + + if (!canFit) { + // Not enough space - caller will fall back to dedicated staging buffer + // This is expected behavior when ring is full, not an error condition + // Note: failureCount_ tracks ring-full events as a diagnostic metric, not errors + failureCount_++; + IGL_D3D12_LOG_VERBOSE("UploadRingBuffer: Insufficient space (request=%llu, approx used=%llu/%llu)\n", + alignedSize, getUsedSizeUnlocked(), size_); + return Allocation{}; + } + + // Final validation: ensure allocation doesn't overlap with in-flight allocations + const uint64_t allocationEnd = allocationOffset + alignedSize; + +#ifdef _DEBUG + // Debug: verify allocation doesn't overlap with in-flight allocations + if (!bufferEmpty) { + if (allocationOffset == 0) { + // Wraparound case: ensure we don't exceed tail + IGL_DEBUG_ASSERT(allocationEnd <= currentTail, + "UploadRingBuffer: Allocation [0, %llu) would overlap tail at %llu", + allocationEnd, currentTail); + } else if (currentHead >= currentTail) { + // In-flight region wrapped: allocation should be in free region [head, size_) + IGL_DEBUG_ASSERT(allocationOffset >= currentHead && allocationEnd <= size_, + "UploadRingBuffer: Allocation [%llu, %llu) outside free region [%llu, %llu)", + allocationOffset, allocationEnd, currentHead, size_); + } else { + // In-flight region not wrapped: allocation should be in free region [head, tail) + IGL_DEBUG_ASSERT(allocationOffset >= currentHead && allocationEnd <= currentTail, + "UploadRingBuffer: Allocation [%llu, %llu) outside free region [%llu, %llu)", + allocationOffset, allocationEnd, currentHead, currentTail); + } + } +#endif + + // Create allocation. + Allocation allocation; + allocation.buffer = uploadHeap_; + allocation.cpuAddress = static_cast(cpuBase_) + allocationOffset; + allocation.gpuAddress = gpuBase_ + allocationOffset; + allocation.offset = allocationOffset; + allocation.size = alignedSize; + allocation.valid = true; + + // Track pending allocation for retirement + pendingAllocations_.push({allocationOffset, alignedSize, fenceValue}); + + // Update head pointer + uint64_t newHead = allocationOffset + alignedSize; + if (newHead >= size_) { + newHead = 0; // Wrap around + } + head_ = newHead; + + // Update tail_ for first allocation when buffer transitions from empty + if (bufferEmpty) { + tail_ = allocationOffset; + } + + allocationCount_++; + +#ifdef _DEBUG + // Debug validation: ensure invariants hold after allocation + IGL_DEBUG_ASSERT(newHead <= size_, "Head exceeded buffer size!"); + IGL_DEBUG_ASSERT(!pendingAllocations_.empty() || head_ == tail_, + "Buffer should have pending allocations or head == tail"); + + // Validate that used size is reasonable (use unlocked helper since we hold mutex_) + const uint64_t usedSize = getUsedSizeUnlocked(); + IGL_DEBUG_ASSERT(usedSize <= size_, "Used size %llu exceeds buffer size %llu", usedSize, size_); +#endif + + return allocation; +} + +void UploadRingBuffer::retire(uint64_t completedFenceValue) { + std::lock_guard lock(mutex_); + + // Process all pending allocations that have completed + while (!pendingAllocations_.empty()) { + const auto& pending = pendingAllocations_.front(); + + if (pending.fenceValue > completedFenceValue) { + // This and all subsequent allocations are still pending + break; + } + + // This allocation has completed, reclaim the memory + pendingAllocations_.pop(); + + // Update tail_ to point to the next oldest allocation, or to head_ if buffer is now empty + if (!pendingAllocations_.empty()) { + tail_ = pendingAllocations_.front().offset; + } else { + // Buffer is now empty: reset tail to head to maintain invariant + tail_ = head_; + } + } + +#ifdef _DEBUG + // Validate invariant: when empty, tail == head + if (pendingAllocations_.empty()) { + IGL_DEBUG_ASSERT(tail_ == head_, + "Buffer empty but tail (%llu) != head (%llu)", tail_, head_); + } +#endif +} + +uint64_t UploadRingBuffer::getUsedSizeUnlocked() const { + // Note: Caller must hold mutex_ + if (head_ >= tail_) { + return head_ - tail_; + } else { + return (size_ - tail_) + head_; + } +} + +uint64_t UploadRingBuffer::getUsedSize() const { + std::lock_guard lock(mutex_); + return getUsedSizeUnlocked(); +} + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/UploadRingBuffer.h b/src/igl/d3d12/UploadRingBuffer.h new file mode 100644 index 0000000000..b3657f7217 --- /dev/null +++ b/src/igl/d3d12/UploadRingBuffer.h @@ -0,0 +1,155 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include + +namespace igl::d3d12 { + +/** + * @brief Upload ring buffer for streaming resources. + * + * Manages a large staging buffer (64-256MB) for efficient resource uploads. + * Implements a ring buffer pattern with fence-based memory retirement to + * reduce allocator churn and memory fragmentation. + * + * Key features: + * - Large pre-allocated upload heap + * - Linear sub-allocation with wraparound + * - Fence-based memory retirement and recycling + * - Thread-safe allocation + */ +class UploadRingBuffer { + public: + /** + * @brief Represents a sub-allocation from the ring buffer + */ + struct Allocation { + igl::d3d12::ComPtr buffer; // Underlying buffer resource. + void* cpuAddress = nullptr; // CPU-visible mapped address + D3D12_GPU_VIRTUAL_ADDRESS gpuAddress = 0; // GPU virtual address + uint64_t offset = 0; // Offset within ring buffer + uint64_t size = 0; // Size of allocation + bool valid = false; // Whether allocation succeeded + }; + + /** + * @brief Constructs an upload ring buffer + * @param device D3D12 device for resource creation + * @param size Total size of ring buffer (default: 128MB) + * + * T14: Default value (128MB) matches D3D12ContextConfig::defaultConfig().uploadRingBufferSize. + * In production, call sites pass D3D12ContextConfig::uploadRingBufferSize explicitly so the + * ring size is driven by the active context configuration rather than this default. + */ + explicit UploadRingBuffer(ID3D12Device* device, uint64_t size = 128 * 1024 * 1024); + ~UploadRingBuffer(); + + // Non-copyable + UploadRingBuffer(const UploadRingBuffer&) = delete; + UploadRingBuffer& operator=(const UploadRingBuffer&) = delete; + + /** + * @brief Allocates staging memory from the ring buffer + * @param size Size in bytes to allocate + * @param alignment Alignment requirement (e.g., 256 for constant buffers) + * @param fenceValue Fence value when this allocation will be retired + * @return Allocation structure (check valid flag for success) + * + * Note: If allocation fails due to insufficient space, returns invalid allocation. + * Caller should fall back to creating a dedicated staging buffer. + */ + Allocation allocate(uint64_t size, uint64_t alignment, uint64_t fenceValue); + + /** + * @brief Retires allocations that have completed on GPU + * @param completedFenceValue Fence value that has been signaled by GPU + * + * Reclaims memory from allocations associated with fence values <= completedFenceValue. + * This allows the ring buffer to wrap around and reuse memory. + */ + void retire(uint64_t completedFenceValue); + + /** + * @brief Gets total size of ring buffer + */ + uint64_t getTotalSize() const { return size_; } + + /** + * @brief Gets estimated used size based on head/tail distance (for diagnostics) + * + * Note: Returns approximate usage; does not account for internal alignment gaps. + * Returns 0 when buffer is empty (tail == head with no pending allocations). + * Also returns 0 when buffer is completely full (tail == head with pending allocations); + * use pendingAllocations or getFailureCount() to distinguish empty vs. full states. + */ + uint64_t getUsedSize() const; + + /** + * @brief Gets number of allocations made (for performance metrics) + */ + uint64_t getAllocationCount() const { return allocationCount_; } + + /** + * @brief Gets number of times allocation could not be satisfied from ring buffer (for metrics) + * + * Note: This counts ring-full events where callers fall back to dedicated staging buffers, + * not error conditions. It is a diagnostic metric for ring buffer utilization. + */ + uint64_t getFailureCount() const { return failureCount_; } + + /** + * @brief Gets the underlying upload heap resource (for copy operations) + */ + ID3D12Resource* getUploadHeap() const { return uploadHeap_.Get(); } + + private: + /** + * @brief Represents a pending allocation waiting for GPU completion + */ + struct PendingAllocation { + uint64_t offset; // Start offset in ring buffer + uint64_t size; // Size of allocation + uint64_t fenceValue; // Fence value when allocation can be retired + }; + + /** + * @brief Aligns value up to specified alignment + */ + static uint64_t alignUp(uint64_t value, uint64_t alignment) { + return (value + alignment - 1) & ~(alignment - 1); + } + + /** + * @brief Internal helper to compute used size without locking + * @note Caller must hold mutex_ + * @note Returns 0 when head == tail (both empty and full states) + */ + uint64_t getUsedSizeUnlocked() const; + + + ID3D12Device* device_ = nullptr; + igl::d3d12::ComPtr uploadHeap_; + void* cpuBase_ = nullptr; // CPU-mapped base address + D3D12_GPU_VIRTUAL_ADDRESS gpuBase_ = 0; // GPU base address + + uint64_t size_ = 0; // Total ring buffer size + uint64_t head_ = 0; // Next free offset for new allocations (protected by mutex_) + uint64_t tail_ = 0; // Offset of oldest in-flight allocation; equals head_ when empty (protected by mutex_) + + std::queue pendingAllocations_; // Allocations waiting for GPU + mutable std::mutex mutex_; // Thread safety + + // Metrics + uint64_t allocationCount_ = 0; + uint64_t failureCount_ = 0; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/d3d12/VertexInputState.h b/src/igl/d3d12/VertexInputState.h new file mode 100644 index 0000000000..119ad4c22c --- /dev/null +++ b/src/igl/d3d12/VertexInputState.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace igl::d3d12 { + +class VertexInputState final : public IVertexInputState { + public: + explicit VertexInputState(const VertexInputStateDesc& desc) : desc_(desc) {} + ~VertexInputState() override = default; + + const VertexInputStateDesc& getDesc() const { return desc_; } + + private: + VertexInputStateDesc desc_; +}; + +} // namespace igl::d3d12 diff --git a/src/igl/metal/DeviceFeatureSet.mm b/src/igl/metal/DeviceFeatureSet.mm index 2e9f7f112f..dfd8395085 100644 --- a/src/igl/metal/DeviceFeatureSet.mm +++ b/src/igl/metal/DeviceFeatureSet.mm @@ -286,6 +286,27 @@ static size_t getGPUFamily(id device) { case DeviceFeatureLimits::MaxBindBytesBytes: result = 4096; return true; + case DeviceFeatureLimits::MaxTextureDimension3D: +#if IGL_PLATFORM_IOS + result = (gpuFamily_ <= 2) ? 2048 : 2048; +#else + result = 2048; +#endif + return true; + case DeviceFeatureLimits::MaxComputeWorkGroupSizeX: + case DeviceFeatureLimits::MaxComputeWorkGroupSizeY: + case DeviceFeatureLimits::MaxComputeWorkGroupSizeZ: + result = 1024; + return true; + case DeviceFeatureLimits::MaxComputeWorkGroupInvocations: + result = 1024; + return true; + case DeviceFeatureLimits::MaxVertexInputAttributes: + result = 31; + return true; + case DeviceFeatureLimits::MaxColorAttachments: + result = 8; + return true; default: IGL_DEBUG_ABORT( "invalid feature limit query: feature limit query is not implemented or does not exist\n"); diff --git a/src/igl/opengl/DeviceFeatureSet.cpp b/src/igl/opengl/DeviceFeatureSet.cpp index b22b7bd553..952d0a8378 100644 --- a/src/igl/opengl/DeviceFeatureSet.cpp +++ b/src/igl/opengl/DeviceFeatureSet.cpp @@ -1133,6 +1133,50 @@ bool DeviceFeatureSet::getFeatureLimits(DeviceFeatureLimits featureLimits, size_ case DeviceFeatureLimits::MaxBindBytesBytes: result = 0; return true; + case DeviceFeatureLimits::MaxTextureDimension3D: + glContext_.getIntegerv(GL_MAX_3D_TEXTURE_SIZE, &tsize); + result = (size_t)tsize; + return true; + case DeviceFeatureLimits::MaxComputeWorkGroupSizeX: + if (hasFeature(DeviceFeatures::Compute)) { + // OpenGL ES 3.1+ and OpenGL 4.3+: use conservative value + result = 1024; + } else { + result = 0; + } + return true; + case DeviceFeatureLimits::MaxComputeWorkGroupSizeY: + if (hasFeature(DeviceFeatures::Compute)) { + // OpenGL ES 3.1+ and OpenGL 4.3+: use conservative value + result = 1024; + } else { + result = 0; + } + return true; + case DeviceFeatureLimits::MaxComputeWorkGroupSizeZ: + if (hasFeature(DeviceFeatures::Compute)) { + // OpenGL ES 3.1+ and OpenGL 4.3+: use conservative value + result = 64; + } else { + result = 0; + } + return true; + case DeviceFeatureLimits::MaxComputeWorkGroupInvocations: + if (hasFeature(DeviceFeatures::Compute)) { + glContext_.getIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &tsize); + result = (size_t)tsize; + } else { + result = 0; + } + return true; + case DeviceFeatureLimits::MaxVertexInputAttributes: + glContext_.getIntegerv(GL_MAX_VERTEX_ATTRIBS, &tsize); + result = (size_t)tsize; + return true; + case DeviceFeatureLimits::MaxColorAttachments: + glContext_.getIntegerv(GL_MAX_COLOR_ATTACHMENTS, &tsize); + result = (size_t)tsize; + return true; default: IGL_DEBUG_ABORT( "invalid feature limit query: feature limit query is not implemented or does " diff --git a/src/igl/tests/CMakeLists.txt b/src/igl/tests/CMakeLists.txt index 797b18fbb5..2406b83a42 100644 --- a/src/igl/tests/CMakeLists.txt +++ b/src/igl/tests/CMakeLists.txt @@ -37,6 +37,13 @@ if(IGL_WITH_METAL) list(APPEND HEADER_FILES util/device/metal/TestDevice.h) endif() +if(IGL_WITH_D3D12) + file(GLOB D3D12_SRC_FILES LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} d3d12/*.cpp) + list(APPEND SRC_FILES ${D3D12_SRC_FILES}) + list(APPEND SRC_FILES util/device/d3d12/TestDevice.cpp) + list(APPEND HEADER_FILES util/device/d3d12/TestDevice.h) +endif() + if(IGL_WITH_IGLU) file(GLOB IGLU_SRC_FILES LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} iglu/*.cpp) file(GLOB IGLU_SRC_FILES LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} iglu/texture_loader/*.cpp) @@ -51,6 +58,8 @@ endif() enable_testing() +# Add custom main to initialize COM and install signal handlers before gtest +list(APPEND SRC_FILES main.cpp) add_executable(IGLTests ${SRC_FILES} ${HEADER_FILES}) if(WIN32) @@ -60,6 +69,23 @@ if(WIN32) target_compile_definitions(IGLTests PRIVATE -DNOMINMAX) target_compile_definitions(IGLTests PRIVATE -DIGL_UNIT_TESTS_GLES_VERSION=3) + # Ensure Unicode matches across GoogleTest and our test binary on Windows + # Ensure tests are built without UNICODE to match GoogleTest defaults and avoid CRT mismatches + if(MSVC) + target_compile_options(IGLTests PRIVATE /UUNICODE /U_UNICODE) + if(TARGET gtest) + target_compile_options(gtest PRIVATE /UUNICODE /U_UNICODE) + endif() + if(TARGET gtest_main) + target_compile_options(gtest_main PRIVATE /UUNICODE /U_UNICODE) + endif() + if(TARGET gmock) + target_compile_options(gmock PRIVATE /UUNICODE /U_UNICODE) + endif() + if(TARGET gmock_main) + target_compile_options(gmock_main PRIVATE /UUNICODE /U_UNICODE) + endif() + endif() target_include_directories(IGLTests PRIVATE "${IGL_ROOT_DIR}/third-party/deps/src/glew/include") # IGL tests use EGL, here's a stub for Windows add_library( @@ -79,22 +105,46 @@ endif() igl_set_cxxstd(IGLTests 20) igl_set_folder(IGLTests "IGL") -# gtest +# gtest - FORCE static linkage to avoid DLL initialization issues +# NOTE: gmock is NOT needed - no tests use it # cmake-format: off -set(BUILD_GMOCK OFF CACHE BOOL "") -set(INSTALL_GTEST OFF CACHE BOOL "") -set(GTEST_HAS_ABSL OFF CACHE BOOL "") +set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries" FORCE) +set(BUILD_GMOCK OFF CACHE BOOL "Build gmock" FORCE) +set(INSTALL_GTEST OFF CACHE BOOL "Install gtest" FORCE) +set(GTEST_HAS_ABSL OFF CACHE BOOL "Use Abseil" FORCE) +set(gtest_force_shared_crt OFF CACHE BOOL "Force shared CRT" FORCE) +# Explicitly disable shared library build +set(BUILD_SHARED_LIBS OFF) +# CRITICAL: Set these compile definitions globally BEFORE including gtest +add_compile_definitions(GTEST_LINKED_AS_SHARED_LIBRARY=0 GTEST_CREATE_SHARED_LIBRARY=0) # cmake-format: on -add_subdirectory(${IGL_ROOT_DIR}/third-party/deps/src/gtest "gtest") +add_subdirectory(${IGL_ROOT_DIR}/third-party/deps/src/gtest "gtest" EXCLUDE_FROM_ALL) + +# CRITICAL: Force static linking by removing any DLL export definitions +if(TARGET gtest) + target_compile_definitions(gtest PRIVATE GTEST_CREATE_SHARED_LIBRARY=0) + target_compile_definitions(gtest PUBLIC GTEST_LINKED_AS_SHARED_LIBRARY=0) +endif() igl_set_folder(gtest "third-party") igl_set_folder(gtest_main "third-party") +igl_set_cxxstd(gtest 17) +igl_set_cxxstd(gtest_main 17) target_link_libraries(IGLTests PUBLIC IGLLibrary) target_link_libraries(IGLTests PUBLIC gtest) -target_link_libraries(IGLTests PUBLIC gtest_main) +# Use our custom main.cpp instead of gtest_main +# NOTE: gmock is NOT linked - no tests use gmock, only gtest target_link_libraries(IGLTests PUBLIC IGLUmanagedUniformBuffer) target_link_libraries(IGLTests PUBLIC IGLUshaderCross) +# Add STB include directory for image utilities used by test artifacts +target_include_directories(IGLTests PRIVATE "${IGL_ROOT_DIR}/third-party/deps/src/stb") + +# Enable experimental features ONLY for unit tests (NOT for render sessions) +# This allows D3D12 headless tests to use unsigned DXIL shaders +if(IGL_WITH_D3D12) + target_compile_definitions(IGLTests PRIVATE IGL_ENABLE_EXPERIMENTAL_FEATURES_FOR_TESTS=1) +endif() if(TARGET glfw) target_link_libraries(IGLTests PRIVATE glfw) endif() @@ -108,14 +158,53 @@ if(IGL_WITH_IGLU) target_link_libraries(IGLTests PUBLIC IGLUuniform) endif() -if(IGL_WITH_VULKAN) +if(IGL_WITH_D3D12) + target_compile_definitions(IGLTests PUBLIC -DIGL_BACKEND_TYPE="d3d12") +elseif(IGL_WITH_VULKAN) target_compile_definitions(IGLTests PUBLIC -DIGL_BACKEND_TYPE="vulkan") elseif(IGL_WITH_OPENGL OR IGL_WITH_OPENGLES) target_compile_definitions(IGLTests PUBLIC -DIGL_BACKEND_TYPE="ogl") endif() +# Ensure gtest/gmock are linked statically +target_compile_definitions(IGLTests PRIVATE GTEST_LINKED_AS_SHARED_LIBRARY=0 GTEST_CREATE_SHARED_LIBRARY=0) + if(UNIX) if (CMAKE_C_COMPILER_ID STREQUAL "GNU") target_compile_options(IGLTests PUBLIC $<$:-Wno-volatile>) endif() endif() + +# Register tests with CTest so they can be discovered +include(GoogleTest) +gtest_discover_tests(IGLTests + DISCOVERY_TIMEOUT 60 + DISCOVERY_MODE PRE_TEST +) + +# Fallback: also register the test executable directly for manual runs +if (WIN32) + add_test(NAME IGLTests COMMAND ${CMAKE_CURRENT_BINARY_DIR}/$/IGLTests.exe) +else() + add_test(NAME IGLTests COMMAND ${CMAKE_CURRENT_BINARY_DIR}/IGLTests) +endif() + +# Copy dxil.dll for D3D12 tests (required for DXIL signing) +if(IGL_WITH_D3D12 AND WIN32 AND MSVC) + # Try to locate dxil.dll from the Windows SDK without relying on machine-specific SDK versions + find_file(DXIL_DLL_FOR_TESTS + NAMES dxil.dll + HINTS + "$ENV{WindowsSdkBinPath}" + "$ENV{WindowsSdkDir}/bin" + PATH_SUFFIXES x64 + ) + if(DXIL_DLL_FOR_TESTS) + add_custom_command(TARGET IGLTests POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${DXIL_DLL_FOR_TESTS}" + "$/" + COMMENT "Copying dxil.dll for IGLTests" + ) + endif() +endif() diff --git a/src/igl/tests/ComputeCommandEncoder.cpp b/src/igl/tests/ComputeCommandEncoder.cpp index 125860eeec..af1eeef086 100644 --- a/src/igl/tests/ComputeCommandEncoder.cpp +++ b/src/igl/tests/ComputeCommandEncoder.cpp @@ -52,8 +52,10 @@ class ComputeCommandEncoderTest : public ::testing::Test { BufferDesc::BufferTypeBits::Storage, dataIn.data(), sizeof(float) * dataIn.size()); bufferIn_ = iglDev_->createBuffer(vbInDesc, nullptr); ASSERT_TRUE(bufferIn_ != nullptr); + // Use ResourceStorage::Shared for output buffers so they can be mapped for reading const BufferDesc bufferOutDesc = - BufferDesc(BufferDesc::BufferTypeBits::Storage, nullptr, sizeof(float) * dataIn.size()); + BufferDesc(BufferDesc::BufferTypeBits::Storage, nullptr, sizeof(float) * dataIn.size(), + ResourceStorage::Shared); bufferOut0_ = iglDev_->createBuffer(bufferOutDesc, nullptr); ASSERT_TRUE(bufferOut0_ != nullptr); bufferOut1_ = iglDev_->createBuffer(bufferOutDesc, nullptr); @@ -73,6 +75,9 @@ class ComputeCommandEncoderTest : public ::testing::Test { } else if (iglDev_->getBackendType() == igl::BackendType::Metal) { source = igl::tests::data::shader::kMtlSimpleComputeShader; entryName = igl::tests::data::shader::kSimpleComputeFunc; + } else if (iglDev_->getBackendType() == igl::BackendType::D3D12) { + source = igl::tests::data::shader::kD3D12SimpleComputeShader; + entryName = igl::tests::data::shader::kSimpleComputeFunc; } else { IGL_DEBUG_ASSERT_NOT_REACHED(); } diff --git a/src/igl/tests/Device.cpp b/src/igl/tests/Device.cpp index c3daacdecf..25f9a7ad22 100644 --- a/src/igl/tests/Device.cpp +++ b/src/igl/tests/Device.cpp @@ -9,6 +9,10 @@ #include "data/VertexIndexData.h" #include "util/Common.h" +#if IGL_PLATFORM_WINDOWS +#include +#endif + #include #include #include @@ -31,7 +35,11 @@ namespace igl::tests { class DeviceTest : public ::testing::Test { public: DeviceTest() = default; - ~DeviceTest() override = default; + ~DeviceTest() override { + cmdBuf_.reset(); + cmdQueue_.reset(); + iglDev_.reset(); + } // Set up common resources. This will create a device and a command queue void SetUp() override { @@ -205,10 +213,13 @@ TEST_F(DeviceTest, GetBackendType) { ASSERT_EQ(backend_, util::kBackendOgl); } else if (iglDev_->getBackendType() == igl::BackendType::Vulkan) { ASSERT_EQ(backend_, util::kBackendVul); + } else if (iglDev_->getBackendType() == igl::BackendType::D3D12) { + ASSERT_EQ(backend_, util::kBackendD3D12); } else { - // Unknow backend. Please add to this test. ASSERT_TRUE(0); } } } // namespace igl::tests + + diff --git a/src/igl/tests/DeviceFeatureSet.cpp b/src/igl/tests/DeviceFeatureSet.cpp index fd44f55f77..a3c55fe85f 100644 --- a/src/igl/tests/DeviceFeatureSet.cpp +++ b/src/igl/tests/DeviceFeatureSet.cpp @@ -235,30 +235,31 @@ TEST_F(DeviceFeatureSetTest, hasFeatureForMacOSOrWinOrAndroidTest) { EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ExternalMemoryObjects)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::PushConstants)); } else { - EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::Texture2DArray)); - EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::Texture3D)); + // D3D12 backend + EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::Texture2DArray)); + EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::Texture3D)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::TextureArrayExt)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::TextureExternalImage)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::Multiview)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::BindUniform)); - EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::TexturePartialMipChain)); + EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::TexturePartialMipChain)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::BufferRing)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::BufferNoCopy)); - EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ShaderLibrary)); - EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::BindBytes)); + EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::ShaderLibrary)); + EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::BindBytes)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::BufferDeviceAddress)); EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::ShaderTextureLod)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ShaderTextureLodExt)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::StandardDerivativeExt)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::SamplerMinMaxLod)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::DrawIndexedIndirect)); - EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::MultipleRenderTargets)); + EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::MultipleRenderTargets)); EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::ExplicitBinding)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ExplicitBindingExt)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::TextureFormatRG)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ValidationLayersEnabled)); EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ExternalMemoryObjects)); - EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::PushConstants)); + EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::PushConstants)); // D3D12 supports push constants via root constants (shader register b2) } } diff --git a/src/igl/tests/RenderCommandEncoder.cpp b/src/igl/tests/RenderCommandEncoder.cpp index a45137377f..0c560a8cfc 100644 --- a/src/igl/tests/RenderCommandEncoder.cpp +++ b/src/igl/tests/RenderCommandEncoder.cpp @@ -684,8 +684,9 @@ TEST_F(RenderCommandEncoderTest, shouldDrawTriangleStrip) { } TEST_F(RenderCommandEncoderTest, shouldDrawTriangleStripCopyTextureToBuffer) { - if (iglDev_->getBackendType() != igl::BackendType::Vulkan) { - GTEST_SKIP() << "Not implemented for non-Vulkan backends"; + if (iglDev_->getBackendType() != igl::BackendType::Vulkan && + iglDev_->getBackendType() != igl::BackendType::D3D12) { + GTEST_SKIP() << "Not implemented for this backend"; return; } @@ -865,8 +866,9 @@ TEST_F(RenderCommandEncoderTest, DepthBiasShouldDrawAPoint) { } TEST_F(RenderCommandEncoderTest, drawUsingBindPushConstants) { - if (iglDev_->getBackendType() != igl::BackendType::Vulkan) { - GTEST_SKIP() << "Push constants are only supported in Vulkan"; + if (iglDev_->getBackendType() != igl::BackendType::Vulkan && + iglDev_->getBackendType() != igl::BackendType::D3D12) { + GTEST_SKIP() << "Push constants are only supported in Vulkan and D3D12"; return; } @@ -878,12 +880,21 @@ TEST_F(RenderCommandEncoderTest, drawUsingBindPushConstants) { // Create new shader stages with push constant shaders std::unique_ptr pushConstantStages; - igl::tests::util::createShaderStages(iglDev_, - data::shader::kVulkanPushConstantVertShader, - igl::tests::data::shader::kShaderFunc, - data::shader::kVulkanPushConstantFragShader, - igl::tests::data::shader::kShaderFunc, - pushConstantStages); + if (iglDev_->getBackendType() == igl::BackendType::D3D12) { + igl::tests::util::createShaderStages(iglDev_, + data::shader::kD3D12PushConstantVertShader, + std::string("main"), + data::shader::kD3D12PushConstantFragShader, + std::string("main"), + pushConstantStages); + } else { + igl::tests::util::createShaderStages(iglDev_, + data::shader::kVulkanPushConstantVertShader, + igl::tests::data::shader::kShaderFunc, + data::shader::kVulkanPushConstantFragShader, + igl::tests::data::shader::kShaderFunc, + pushConstantStages); + } ASSERT_TRUE(pushConstantStages); shaderStages_ = std::move(pushConstantStages); diff --git a/src/igl/tests/ShaderLibrary.cpp b/src/igl/tests/ShaderLibrary.cpp index a2fd84f5ce..c49f862f09 100644 --- a/src/igl/tests/ShaderLibrary.cpp +++ b/src/igl/tests/ShaderLibrary.cpp @@ -51,6 +51,8 @@ TEST_F(ShaderLibraryTest, CreateFromSource) { source = data::shader::kMtlSimpleShader.data(); } else if (iglDev_->getBackendType() == igl::BackendType::Vulkan) { source = data::shader::kVulkanSimpleVertShader.data(); + } else if (iglDev_->getBackendType() == igl::BackendType::D3D12) { + source = data::shader::kD3D12SimpleShader.data(); } else { IGL_DEBUG_ASSERT_NOT_REACHED(); } @@ -96,6 +98,8 @@ TEST_F(ShaderLibraryTest, CreateFromSourceMultipleModules) { } else if (iglDev_->getBackendType() == igl::BackendType::Vulkan) { GTEST_SKIP() << "Vulkan does not support multiple modules from the same source code."; return; + } else if (iglDev_->getBackendType() == igl::BackendType::D3D12) { + source = data::shader::kD3D12SimpleShader.data(); } // Check if source is null before passing it to fromStringInput @@ -135,6 +139,8 @@ TEST_F(ShaderLibraryTest, CreateFromSourceNoResult) { source = data::shader::kMtlSimpleShader.data(); } else if (iglDev_->getBackendType() == igl::BackendType::Vulkan) { source = data::shader::kVulkanSimpleVertShader.data(); + } else if (iglDev_->getBackendType() == igl::BackendType::D3D12) { + source = data::shader::kD3D12SimpleShader.data(); } else { IGL_DEBUG_ASSERT_NOT_REACHED(); } diff --git a/src/igl/tests/ShaderModule.cpp b/src/igl/tests/ShaderModule.cpp index 479b225604..2505537004 100644 --- a/src/igl/tests/ShaderModule.cpp +++ b/src/igl/tests/ShaderModule.cpp @@ -77,36 +77,70 @@ TEST_F(ShaderModuleTest, CompileShaderModule) { Result ret; const char* source = nullptr; - if (backend_ == util::kBackendOgl) { + const auto be = iglDev_->getBackendType(); + if (be == BackendType::OpenGL) { source = data::shader::kOglSimpleVertShader.data(); - } else if (backend_ == util::kBackendMtl) { + } else if (be == BackendType::Metal) { source = data::shader::kMtlSimpleShader.data(); - } else if (backend_ == util::kBackendVul) { + } else if (be == BackendType::Vulkan) { source = data::shader::kVulkanSimpleVertShader.data(); + } else if (be == BackendType::D3D12) { + // Minimal HLSL vertex shader for D3D12 backend + source = R"( +struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; }; +struct VSOut { float4 position : SV_POSITION; float2 uv : TEXCOORD0; }; +VSOut vertexShader(VSIn i) { VSOut o; o.position = i.position_in; o.uv = i.uv_in; return o; } +VSOut main(VSIn i) { return vertexShader(i); } +)"; } else { - ASSERT_TRUE(0); + // Fallback: use D3D12-compatible HLSL to avoid backend string mismatches + source = R"( +struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; }; +struct VSOut { float4 position : SV_POSITION; float2 uv : TEXCOORD0; }; +VSOut vertexShader(VSIn i) { VSOut o; o.position = i.position_in; o.uv = i.uv_in; return o; } +VSOut main(VSIn i) { return vertexShader(i); } +)"; } auto shaderModule = ShaderModuleCreator::fromStringInput( - *iglDev_, source, {ShaderStage::Vertex, "vertexShader"}, "test", &ret); + *iglDev_, source, + {ShaderStage::Vertex, (be == BackendType::D3D12) ? std::string("main") : std::string("vertexShader")}, + "test", &ret); ASSERT_TRUE(ret.isOk()) << ret.message.c_str(); ASSERT_TRUE(shaderModule != nullptr); } TEST_F(ShaderModuleTest, CompileShaderModuleNoResult) { const char* source = nullptr; - if (backend_ == util::kBackendOgl) { + const auto be2 = iglDev_->getBackendType(); + if (be2 == BackendType::OpenGL) { source = data::shader::kOglSimpleVertShader.data(); - } else if (backend_ == util::kBackendMtl) { + } else if (be2 == BackendType::Metal) { source = data::shader::kMtlSimpleShader.data(); - } else if (backend_ == util::kBackendVul) { + } else if (be2 == BackendType::Vulkan) { source = data::shader::kVulkanSimpleVertShader.data(); + } else if (be2 == BackendType::D3D12) { + // Minimal HLSL vertex shader for D3D12 backend + source = R"( +struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; }; +struct VSOut { float4 position : SV_POSITION; float2 uv : TEXCOORD0; }; +VSOut vertexShader(VSIn i) { VSOut o; o.position = i.position_in; o.uv = i.uv_in; return o; } +VSOut main(VSIn i) { return vertexShader(i); } +)"; } else { - ASSERT_TRUE(0); + // Fallback to D3D12-compatible HLSL + source = R"( +struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; }; +struct VSOut { float4 position : SV_POSITION; float2 uv : TEXCOORD0; }; +VSOut vertexShader(VSIn i) { VSOut o; o.position = i.position_in; o.uv = i.uv_in; return o; } +VSOut main(VSIn i) { return vertexShader(i); } +)"; } auto shaderModule = ShaderModuleCreator::fromStringInput( - *iglDev_, source, {ShaderStage::Vertex, "vertexShader"}, "test", nullptr); + *iglDev_, source, + {ShaderStage::Vertex, (be2 == BackendType::D3D12) ? std::string("main") : std::string("vertexShader")}, + "test", nullptr); ASSERT_TRUE(shaderModule != nullptr); } } // namespace igl::tests diff --git a/src/igl/tests/Texture.cpp b/src/igl/tests/Texture.cpp index 847ef102dc..d7db7e1d88 100644 --- a/src/igl/tests/Texture.cpp +++ b/src/igl/tests/Texture.cpp @@ -86,7 +86,9 @@ TEST_F(TextureTest, Passthrough) { cmds->bindRenderPipelineState(pipelineState); cmds->bindTexture(textureUnit_, BindTarget::kFragment, inputTexture_.get()); + IGL_LOG_INFO("TEST: About to bind sampler: textureUnit_=%zu, samp_=%p\n", textureUnit_, samp_.get()); cmds->bindSamplerState(textureUnit_, BindTarget::kFragment, samp_.get()); + IGL_LOG_INFO("TEST: After bind sampler\n"); cmds->bindIndexBuffer(*ib_, IndexFormat::UInt16); cmds->drawIndexed(6); diff --git a/src/igl/tests/TextureArray.cpp b/src/igl/tests/TextureArray.cpp index 2958937cd2..f1415fc39b 100644 --- a/src/igl/tests/TextureArray.cpp +++ b/src/igl/tests/TextureArray.cpp @@ -163,6 +163,13 @@ class TextureArrayTest : public ::testing::Test { igl::tests::data::shader::kSimpleVertFunc, igl::tests::data::shader::kSimpleFragFunc, stages); + } else if (iglDev_->getBackendType() == BackendType::D3D12) { + util::createShaderStages(iglDev_, + igl::tests::data::shader::kD3D12SimpleVertShaderTexArray, + igl::tests::data::shader::kShaderFunc, + igl::tests::data::shader::kD3D12SimpleFragShaderTexArray, + igl::tests::data::shader::kShaderFunc, + stages); } ASSERT_TRUE(stages != nullptr); diff --git a/src/igl/tests/TextureArrayFloat.cpp b/src/igl/tests/TextureArrayFloat.cpp index 02a42a996c..a7dbe6f00c 100644 --- a/src/igl/tests/TextureArrayFloat.cpp +++ b/src/igl/tests/TextureArrayFloat.cpp @@ -178,6 +178,13 @@ class TextureArrayFloatTest : public ::testing::Test { igl::tests::data::shader::kSimpleVertFunc, igl::tests::data::shader::kSimpleFragFunc, stages); + } else if (iglDev_->getBackendType() == BackendType::D3D12) { + util::createShaderStages(iglDev_, + igl::tests::data::shader::kD3D12SimpleVertShaderTex2dArray, + igl::tests::data::shader::kShaderFunc, + igl::tests::data::shader::kD3D12SimpleFragShaderTex2dArray, + igl::tests::data::shader::kShaderFunc, + stages); } ASSERT_TRUE(stages != nullptr); diff --git a/src/igl/tests/TextureCube.cpp b/src/igl/tests/TextureCube.cpp index b49fada132..6478945ab2 100644 --- a/src/igl/tests/TextureCube.cpp +++ b/src/igl/tests/TextureCube.cpp @@ -129,6 +129,13 @@ class TextureCubeTest : public ::testing::Test { igl::tests::data::shader::kVulkanSimpleFragShaderCube, igl::tests::data::shader::kShaderFunc, stages); + } else if (iglDev_->getBackendType() == BackendType::D3D12) { + util::createShaderStages(iglDev_, + igl::tests::data::shader::kD3D12SimpleVertShaderCube, + igl::tests::data::shader::kShaderFunc, + igl::tests::data::shader::kD3D12SimpleFragShaderCube, + igl::tests::data::shader::kShaderFunc, + stages); } else { ASSERT_TRUE(false); } diff --git a/src/igl/tests/data/ShaderData.h b/src/igl/tests/data/ShaderData.h index 895cb43b70..f1cfa61355 100644 --- a/src/igl/tests/data/ShaderData.h +++ b/src/igl/tests/data/ShaderData.h @@ -523,6 +523,33 @@ constexpr std::string_view kVulkanPushConstantFragShader = out_FragColor = tex * pushConstants.colorMultiplier; }); +// D3D12 HLSL push constant shaders +constexpr const char* kD3D12PushConstantVertShader = R"( +struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; }; +struct PSIn { float4 position : SV_POSITION; float2 uv : TEXCOORD0; }; +PSIn main(VSIn i) { + PSIn o; + o.position = i.position_in; + o.uv = i.uv_in; + return o; +} +)"; + +constexpr const char* kD3D12PushConstantFragShader = R"( +Texture2D inputImage : register(t0); +SamplerState samp0 : register(s0); + +cbuffer PushConstants : register(b2) { + float4 colorMultiplier; +}; + +struct PSIn { float4 position : SV_POSITION; float2 uv : TEXCOORD0; }; +float4 main(PSIn i) : SV_TARGET { + float4 tex = inputImage.Sample(samp0, i.uv); + return tex * colorMultiplier; +} +)"; + constexpr std::string_view kVulkanSimpleVertShaderTex2dArray = IGL_TO_STRING( layout(location = 0) in vec4 position_in; @@ -638,4 +665,217 @@ constexpr std::string_view kVulkanSimpleComputeShader = fOut[id] = fIn[id] * 2.0f; }); // clang-format on +//----------------------------------------------------------------------------- +// D3D12/HLSL Shaders +//----------------------------------------------------------------------------- + +// Simple D3D12 Shader with separate vertex and fragment functions +// This is used for ShaderLibrary tests where multiple entry points are in the same source +constexpr std::string_view kD3D12SimpleShader = + IGL_TO_STRING( + struct VSIn { + float4 position_in : POSITION; + float2 uv_in : TEXCOORD0; + }; + + struct VSOut { + float4 position : SV_POSITION; + float2 uv : TEXCOORD0; + }; + + VSOut vertexShader(VSIn input) { + VSOut output; + output.position = input.position_in; + output.uv = input.uv_in; + return output; + } + + Texture2D inputImage : register(t0); + SamplerState linearSampler : register(s0); + + float4 fragmentShader(VSOut input) : SV_TARGET { + return inputImage.Sample(linearSampler, input.uv); + } + ); + +// Simple D3D12 Vertex shader (standalone) +constexpr std::string_view kD3D12SimpleVertShader = + IGL_TO_STRING( + struct VSIn { + float4 position_in : POSITION; + float2 uv_in : TEXCOORD0; + }; + + struct VSOut { + float4 position : SV_POSITION; + float2 uv : TEXCOORD0; + }; + + VSOut main(VSIn input) { + VSOut output; + output.position = input.position_in; + output.uv = input.uv_in; + return output; + } + ); + +// Simple D3D12 Fragment shader (standalone) +constexpr std::string_view kD3D12SimpleFragShader = + IGL_TO_STRING( + struct PSIn { + float4 position : SV_POSITION; + float2 uv : TEXCOORD0; + }; + + Texture2D inputImage : register(t0); + SamplerState linearSampler : register(s0); + + float4 main(PSIn input) : SV_TARGET { + return inputImage.Sample(linearSampler, input.uv); + } + ); + +// Simple D3D12 Compute shader +constexpr std::string_view kD3D12SimpleComputeShader = + IGL_TO_STRING( + RWStructuredBuffer floatsIn : register(u0); + RWStructuredBuffer floatsOut : register(u1); + + [numthreads(6, 1, 1)] + void doubleKernel(uint3 threadID : SV_DispatchThreadID) { + uint id = threadID.x; + floatsOut[id] = floatsIn[id] * 2.0; + } + ); + +// D3D12 Texture2DArray Vertex shader +constexpr std::string_view kD3D12SimpleVertShaderTexArray = + IGL_TO_STRING( + cbuffer VertexUniforms : register(b2) { + int layer; + }; + + struct VSIn { + float4 position_in : POSITION; + float2 uv_in : TEXCOORD0; + }; + + struct VSOut { + float4 position : SV_POSITION; + float2 uv : TEXCOORD0; + nointerpolation uint layerOut : TEXCOORD1; + }; + + VSOut main(VSIn input) { + VSOut output; + output.position = input.position_in; + output.uv = input.uv_in; + output.layerOut = layer; + return output; + } + ); + +// D3D12 Texture2DArray Fragment shader +constexpr std::string_view kD3D12SimpleFragShaderTexArray = + IGL_TO_STRING( + Texture2DArray inputImage : register(t0); + SamplerState inputSampler : register(s0); + + struct PSIn { + float4 position : SV_POSITION; + float2 uv : TEXCOORD0; + nointerpolation uint layerIn : TEXCOORD1; + }; + + float4 main(PSIn input) : SV_TARGET { + return inputImage.Sample(inputSampler, float3(input.uv, input.layerIn)); + } + ); + +// D3D12 TextureCube Vertex shader +constexpr std::string_view kD3D12SimpleVertShaderCube = + IGL_TO_STRING( + cbuffer VertexUniforms : register(b1) { + float4 view; + }; + + struct VSIn { + float4 position_in : POSITION; + float2 uv_in : TEXCOORD0; + }; + + struct VSOut { + float4 position : SV_POSITION; + float3 viewDir : TEXCOORD0; + }; + + VSOut main(VSIn input) { + VSOut output; + output.position = input.position_in; + output.viewDir = view.xyz; + return output; + } + ); + +// D3D12 TextureCube Fragment shader +constexpr std::string_view kD3D12SimpleFragShaderCube = + IGL_TO_STRING( + TextureCube inputImage : register(t0); + SamplerState inputSampler : register(s0); + + struct PSIn { + float4 position : SV_POSITION; + float3 viewDir : TEXCOORD0; + }; + + float4 main(PSIn input) : SV_TARGET { + return inputImage.Sample(inputSampler, input.viewDir); + } + ); + +// D3D12 Texture2DArray Vertex shader +constexpr std::string_view kD3D12SimpleVertShaderTex2dArray = + IGL_TO_STRING( + cbuffer VertexUniforms : register(b2) { + int layer; + }; + + struct VSIn { + float4 position_in : POSITION; + float2 uv_in : TEXCOORD0; + }; + + struct VSOut { + float4 position : SV_POSITION; + float2 uv : TEXCOORD0; + uint layer : TEXCOORD1; + }; + + VSOut main(VSIn input) { + VSOut output; + output.position = input.position_in; + output.uv = input.uv_in; + output.layer = uint(layer); + return output; + } + ); + +// D3D12 Texture2DArray Fragment shader +constexpr std::string_view kD3D12SimpleFragShaderTex2dArray = + IGL_TO_STRING( + Texture2DArray inputImage : register(t0); + SamplerState inputSampler : register(s0); + + struct PSIn { + float4 position : SV_POSITION; + float2 uv : TEXCOORD0; + uint layer : TEXCOORD1; + }; + + float4 main(PSIn input) : SV_TARGET { + return inputImage.Sample(inputSampler, float3(input.uv, input.layer)); + } + ); + +// clang-format on } // namespace igl::tests::data::shader diff --git a/src/igl/tests/main.cpp b/src/igl/tests/main.cpp new file mode 100644 index 0000000000..9bd4154570 --- /dev/null +++ b/src/igl/tests/main.cpp @@ -0,0 +1,38 @@ +/* + * Custom test entrypoint: initialize COM for D3D12 before running gtest. + */ + +#include +#include +#include +#if defined(_WIN32) +#include +#include +#endif + +static void signalHandler(int signum) { + std::printf("CRASH: Signal %d caught\n", signum); + std::_Exit(signum); +} + +int main(int argc, char** argv) { + // Install basic signal handler for early crash diagnostics + std::signal(SIGSEGV, signalHandler); + + // Initialize COM in multithreaded mode for D3D12 usage (Windows only) +#if defined(_WIN32) + const HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED); + if (FAILED(hr)) { + std::printf("COM initialization failed: 0x%08X\n", static_cast(hr)); + return 1; + } +#endif + + ::testing::InitGoogleTest(&argc, argv); + const int result = RUN_ALL_TESTS(); + +#if defined(_WIN32) + CoUninitialize(); +#endif + return result; +} diff --git a/src/igl/tests/util/ArtifactUtils.cpp b/src/igl/tests/util/ArtifactUtils.cpp new file mode 100644 index 0000000000..c994c72098 --- /dev/null +++ b/src/igl/tests/util/ArtifactUtils.cpp @@ -0,0 +1,266 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#include "ArtifactUtils.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace igl::tests::util { + +namespace { + +constexpr std::array kInitialState = { + 0x6A09E667u, 0xBB67AE85u, 0x3C6EF372u, 0xA54FF53Au, + 0x510E527Fu, 0x9B05688Cu, 0x1F83D9ABu, 0x5BE0CD19u}; + +constexpr std::array kRoundConstants = { + 0x428A2F98u, 0x71374491u, 0xB5C0FBCFu, 0xE9B5DBA5u, 0x3956C25Bu, 0x59F111F1u, 0x923F82A4u, + 0xAB1C5ED5u, 0xD807AA98u, 0x12835B01u, 0x243185BEu, 0x550C7DC3u, 0x72BE5D74u, 0x80DEB1FEu, + 0x9BDC06A7u, 0xC19BF174u, 0xE49B69C1u, 0xEFBE4786u, 0x0FC19DC6u, 0x240CA1CCu, 0x2DE92C6Fu, + 0x4A7484AAu, 0x5CB0A9DCu, 0x76F988DAu, 0x983E5152u, 0xA831C66Du, 0xB00327C8u, 0xBF597FC7u, + 0xC6E00BF3u, 0xD5A79147u, 0x06CA6351u, 0x14292967u, 0x27B70A85u, 0x2E1B2138u, 0x4D2C6DFCu, + 0x53380D13u, 0x650A7354u, 0x766A0ABBu, 0x81C2C92Eu, 0x92722C85u, 0xA2BFE8A1u, 0xA81A664Bu, + 0xC24B8B70u, 0xC76C51A3u, 0xD192E819u, 0xD6990624u, 0xF40E3585u, 0x106AA070u, 0x19A4C116u, + 0x1E376C08u, 0x2748774Cu, 0x34B0BCB5u, 0x391C0CB3u, 0x4ED8AA4Au, 0x5B9CCA4Fu, 0x682E6FF3u, + 0x748F82EEu, 0x78A5636Fu, 0x84C87814u, 0x8CC70208u, 0x90BEFFFAu, 0xA4506CEBu, 0xBEF9A3F7u, + 0xC67178F2u}; + +inline std::uint32_t rotr(std::uint32_t value, std::uint32_t bits) { + return (value >> bits) | (value << (32u - bits)); +} + +class Sha256Context { + public: + Sha256Context() = default; + + void update(const std::uint8_t* data, std::size_t len) { + if (finalized_) { + throw std::logic_error("SHA256 update after finalization"); + } + + totalBits_ += static_cast(len) * 8u; + + while (len > 0) { + const auto space = 64u - bufferSize_; + const auto toCopy = std::min(len, space); + std::memcpy(buffer_.data() + bufferSize_, data, toCopy); + bufferSize_ += toCopy; + data += toCopy; + len -= toCopy; + + if (bufferSize_ == 64u) { + processBlock(buffer_.data()); + bufferSize_ = 0u; + } + } + } + + std::array finalize() { + if (!finalized_) { + finalizeInternal(); + } + return digest_; + } + + private: + void processBlock(const std::uint8_t* block) { + std::array w{}; + for (std::size_t i = 0; i != 16; ++i) { + const auto idx = i * 4; + w[i] = (static_cast(block[idx]) << 24u) | + (static_cast(block[idx + 1]) << 16u) | + (static_cast(block[idx + 2]) << 8u) | + static_cast(block[idx + 3]); + } + + for (std::size_t i = 16; i != 64; ++i) { + const auto s0 = rotr(w[i - 15], 7u) ^ rotr(w[i - 15], 18u) ^ (w[i - 15] >> 3u); + const auto s1 = rotr(w[i - 2], 17u) ^ rotr(w[i - 2], 19u) ^ (w[i - 2] >> 10u); + w[i] = w[i - 16] + s0 + w[i - 7] + s1; + } + + auto a = state_[0]; + auto b = state_[1]; + auto c = state_[2]; + auto d = state_[3]; + auto e = state_[4]; + auto f = state_[5]; + auto g = state_[6]; + auto h = state_[7]; + + for (std::size_t i = 0; i != 64; ++i) { + const auto S1 = rotr(e, 6u) ^ rotr(e, 11u) ^ rotr(e, 25u); + const auto ch = (e & f) ^ ((~e) & g); + const auto temp1 = h + S1 + ch + kRoundConstants[i] + w[i]; + const auto S0 = rotr(a, 2u) ^ rotr(a, 13u) ^ rotr(a, 22u); + const auto maj = (a & b) ^ (a & c) ^ (b & c); + const auto temp2 = S0 + maj; + + h = g; + g = f; + f = e; + e = d + temp1; + d = c; + c = b; + b = a; + a = temp1 + temp2; + } + + state_[0] += a; + state_[1] += b; + state_[2] += c; + state_[3] += d; + state_[4] += e; + state_[5] += f; + state_[6] += g; + state_[7] += h; + } + + void finalizeInternal() { + buffer_[bufferSize_] = 0x80u; + ++bufferSize_; + + if (bufferSize_ > 56u) { + std::fill(buffer_.begin() + bufferSize_, buffer_.end(), 0u); + processBlock(buffer_.data()); + bufferSize_ = 0u; + } + + std::fill(buffer_.begin() + bufferSize_, buffer_.begin() + 56u, 0u); + + for (int i = 0; i < 8; ++i) { + buffer_[56u + i] = static_cast((totalBits_ >> (56u - 8u * i)) & 0xFFu); + } + + processBlock(buffer_.data()); + + for (std::size_t i = 0; i != state_.size(); ++i) { + digest_[i * 4u + 0u] = static_cast((state_[i] >> 24u) & 0xFFu); + digest_[i * 4u + 1u] = static_cast((state_[i] >> 16u) & 0xFFu); + digest_[i * 4u + 2u] = static_cast((state_[i] >> 8u) & 0xFFu); + digest_[i * 4u + 3u] = static_cast(state_[i] & 0xFFu); + } + + finalized_ = true; + } + + std::array state_ = kInitialState; + std::array buffer_{}; + std::array digest_{}; + std::uint64_t totalBits_ = 0u; + std::size_t bufferSize_ = 0u; + bool finalized_ = false; +}; + +} // namespace + +std::string currentBackend() { + return std::string(IGL_BACKEND_TYPE); +} + +std::filesystem::path artifactsRoot() { + if (const char* env = std::getenv("IGL_ARTIFACT_ROOT"); env && *env != '\0') { + return std::filesystem::path(env); + } + return std::filesystem::current_path() / "artifacts"; +} + +std::filesystem::path ensureArtifactDirectory(const std::string& relativeGroup, + const std::string& backend) { + std::filesystem::path base = artifactsRoot() / std::filesystem::path(relativeGroup) / backend; + std::filesystem::create_directories(base); + return base; +} + +ArtifactPaths makeArtifactPaths(const std::string& relativeGroup, + const std::string& backend, + const std::string& testName, + bool includeImage) { + ArtifactPaths paths; + auto base = ensureArtifactDirectory(relativeGroup, backend); + + paths.shaFile = base / (testName + ".sha256"); + + if (includeImage) { + auto imageDir = base / "640x360"; + std::filesystem::create_directories(imageDir); + paths.pngFile = imageDir / (testName + ".png"); + } + + return paths; +} + +void writeBinaryFile(const std::filesystem::path& path, std::span bytes) { + if (!path.parent_path().empty()) { + std::filesystem::create_directories(path.parent_path()); + } + std::ofstream out(path, std::ios::binary); + if (!out) { + throw std::runtime_error("Failed to open file for writing: " + path.string()); + } + out.write(reinterpret_cast(bytes.data()), + static_cast(bytes.size())); + if (!out) { + throw std::runtime_error("Failed to write all bytes to: " + path.string()); + } +} + +void writeTextFile(const std::filesystem::path& path, const std::string& text) { + if (!path.parent_path().empty()) { + std::filesystem::create_directories(path.parent_path()); + } + std::ofstream out(path, std::ios::binary); + if (!out) { + throw std::runtime_error("Failed to open file for writing: " + path.string()); + } + out << text; + if (!out) { + throw std::runtime_error("Failed to write text to: " + path.string()); + } +} + +std::string computeSha256(std::span bytes) { + Sha256Context ctx; + ctx.update(bytes.data(), bytes.size()); + const auto digest = ctx.finalize(); + + std::ostringstream oss; + oss << std::hex << std::setfill('0'); + for (auto byte : digest) { + oss << std::setw(2) << static_cast(byte); + } + return oss.str(); +} + +void writeSha256File(const std::filesystem::path& path, const std::string& hash) { + writeTextFile(path, hash + "\n"); +} + +void writePng(const std::filesystem::path& path, + const std::uint8_t* rgbaPixels, + std::uint32_t width, + std::uint32_t height) { + if (!path.parent_path().empty()) { + std::filesystem::create_directories(path.parent_path()); + } + if (stbi_write_png(path.string().c_str(), static_cast(width), static_cast(height), 4, + rgbaPixels, static_cast(width * 4u)) == 0) { + throw std::runtime_error("Failed to write PNG: " + path.string()); + } +} + +} // namespace igl::tests::util diff --git a/src/igl/tests/util/ArtifactUtils.h b/src/igl/tests/util/ArtifactUtils.h new file mode 100644 index 0000000000..4874e21e58 --- /dev/null +++ b/src/igl/tests/util/ArtifactUtils.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include +#include +#include + +namespace igl::tests::util { + +struct ArtifactPaths { + std::filesystem::path shaFile; + std::filesystem::path pngFile; +}; + +std::string currentBackend(); + +std::filesystem::path artifactsRoot(); + +std::filesystem::path ensureArtifactDirectory(const std::string& relativeGroup, + const std::string& backend); + +ArtifactPaths makeArtifactPaths(const std::string& relativeGroup, + const std::string& backend, + const std::string& testName, + bool includeImage); + +void writeBinaryFile(const std::filesystem::path& path, std::span bytes); + +void writeTextFile(const std::filesystem::path& path, const std::string& text); + +std::string computeSha256(std::span bytes); + +void writeSha256File(const std::filesystem::path& path, const std::string& hash); + +void writePng(const std::filesystem::path& path, + const std::uint8_t* rgbaPixels, + std::uint32_t width, + std::uint32_t height); + +} // namespace igl::tests::util + diff --git a/src/igl/tests/util/Common.cpp b/src/igl/tests/util/Common.cpp index aa427e3e80..4f9b7b0b82 100644 --- a/src/igl/tests/util/Common.cpp +++ b/src/igl/tests/util/Common.cpp @@ -167,6 +167,25 @@ void createSimpleShaderStages(const std::shared_ptr& dev, fragShader, std::string(igl::tests::data::shader::kShaderFunc), stages); + } else if (backendVersion.flavor == igl::BackendFlavor::D3D12) { + // Minimal HLSL equivalent used for D3D12 tests + const char* vsHlsl = R"( +struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; }; +struct PSIn { float4 position : SV_POSITION; float2 uv : TEXCOORD0; }; +PSIn main(VSIn i) { PSIn o; o.position = i.position_in; o.uv = i.uv_in; return o; } +)"; + const char* psHlsl = R"( +Texture2D inputImage : register(t0); +SamplerState samp0 : register(s0); +struct PSIn { float4 position : SV_POSITION; float2 uv : TEXCOORD0; }; +float4 main(PSIn i) : SV_TARGET { return inputImage.Sample(samp0, i.uv); } +)"; + createShaderStages(dev, + vsHlsl, + std::string("main"), + psHlsl, + std::string("main"), + stages); } else { ASSERT_TRUE(0); } diff --git a/src/igl/tests/util/Common.h b/src/igl/tests/util/Common.h index 510d930956..76169230d0 100644 --- a/src/igl/tests/util/Common.h +++ b/src/igl/tests/util/Common.h @@ -22,6 +22,7 @@ namespace igl::tests::util { constexpr std::string_view kBackendOgl("ogl"); constexpr std::string_view kBackendMtl("metal"); constexpr std::string_view kBackendVul("vulkan"); +constexpr std::string_view kBackendD3D12("d3d12"); // Creates an IGL device and a command queue void createDeviceAndQueue(std::shared_ptr& dev, std::shared_ptr& cq); diff --git a/src/igl/tests/util/TestDevice.cpp b/src/igl/tests/util/TestDevice.cpp index 854a0392ad..c465045ac0 100644 --- a/src/igl/tests/util/TestDevice.cpp +++ b/src/igl/tests/util/TestDevice.cpp @@ -37,6 +37,8 @@ std::shared_ptr createTestDevice() { return device::createTestDevice(::igl::BackendType::Metal); } else if (backend == "vulkan") { return device::createTestDevice(::igl::BackendType::Vulkan); + } else if (backend == "d3d12") { + return device::createTestDevice(::igl::BackendType::D3D12); // @fb-only // @fb-only } else { diff --git a/src/igl/tests/util/TextureFormatTestBase.cpp b/src/igl/tests/util/TextureFormatTestBase.cpp index de57c9e530..fcfcb36528 100644 --- a/src/igl/tests/util/TextureFormatTestBase.cpp +++ b/src/igl/tests/util/TextureFormatTestBase.cpp @@ -16,13 +16,35 @@ #include #include +#if IGL_PLATFORM_WINDOWS +#include +#include +#endif + namespace igl::tests::util { +#if IGL_PLATFORM_WINDOWS +namespace { +struct SehException : std::exception { + explicit SehException(unsigned int c) : code(c) {} + const char* what() const noexcept override { return "Structured exception"; } + unsigned int code; +}; + +void __cdecl sehTranslator(unsigned int code, EXCEPTION_POINTERS*) { + throw SehException(code); +} +} // namespace +#endif + #define OFFSCREEN_TEX_WIDTH 2 #define OFFSCREEN_TEX_HEIGHT 2 void TextureFormatTestBase::SetUp() { setDebugBreakEnabled(false); +#if IGL_PLATFORM_WINDOWS + _set_se_translator(sehTranslator); +#endif util::createDeviceAndQueue(iglDev_, cmdQueue_); ASSERT_TRUE(iglDev_ != nullptr); @@ -199,35 +221,54 @@ void TextureFormatTestBase::render(std::shared_ptr sampledTexture, Dependencies dep; dep.textures[0] = sampledTexture.get(); - Result result; - auto cmds = cmdBuf->createRenderCommandEncoder(renderPass_, framebuffer, dep, &result); - ASSERT_TRUE(result.isOk()); - cmds->bindVertexBuffer(data::shader::kSimplePosIndex, *vb_); - cmds->bindVertexBuffer(data::shader::kSimpleUvIndex, *uv_); - - // Create createFramebuffer fills in proper texture formats and shader stages in - // renderPipelineDesc_ - - auto pipelineState = iglDev_->createRenderPipeline(renderPipelineDesc_, &ret); - ASSERT_EQ(ret.code, Result::Code::Ok) << ret.message; - ASSERT_TRUE(pipelineState != nullptr); - - cmds->bindRenderPipelineState(pipelineState); - - cmds->bindTexture(textureUnit_, BindTarget::kFragment, sampledTexture.get()); - // Choose appropriate sampler. - cmds->bindSamplerState(textureUnit_, - BindTarget::kFragment, - (linearSampling ? linearSampler_ : nearestSampler_).get()); - - cmds->bindIndexBuffer(*ib_, IndexFormat::UInt16); - cmds->drawIndexed(6); - - cmds->endEncoding(); - - cmdQueue_->submit(*cmdBuf); - - cmdBuf->waitUntilCompleted(); + try { + Result result; + auto cmds = cmdBuf->createRenderCommandEncoder(renderPass_, framebuffer, dep, &result); + ASSERT_TRUE(result.isOk()); + cmds->bindVertexBuffer(data::shader::kSimplePosIndex, *vb_); + cmds->bindVertexBuffer(data::shader::kSimpleUvIndex, *uv_); + + // Create createFramebuffer fills in proper texture formats and shader stages in + // renderPipelineDesc_ + + auto pipelineState = iglDev_->createRenderPipeline(renderPipelineDesc_, &ret); + ASSERT_EQ(ret.code, Result::Code::Ok) << ret.message; + ASSERT_TRUE(pipelineState != nullptr); + + cmds->bindRenderPipelineState(pipelineState); + + cmds->bindTexture(textureUnit_, BindTarget::kFragment, sampledTexture.get()); + // Choose appropriate sampler. + cmds->bindSamplerState(textureUnit_, + BindTarget::kFragment, + (linearSampling ? linearSampler_ : nearestSampler_).get()); + + cmds->bindIndexBuffer(*ib_, IndexFormat::UInt16); + IGL_LOG_INFO("TextureFormatTestBase::render issuing draw for %s\n", testProperties.name); + cmds->drawIndexed(6); + + cmds->endEncoding(); + IGL_LOG_INFO("TextureFormatTestBase::render finished encoding for %s\n", testProperties.name); + + cmdQueue_->submit(*cmdBuf); + IGL_LOG_INFO("TextureFormatTestBase::render submitted work for %s\n", testProperties.name); + + cmdBuf->waitUntilCompleted(); +#if IGL_DEBUG + IGL_LOG_INFO("TextureFormatTestBase::render completed for format %s\n", testProperties.name); +#else + (void)testProperties; +#endif +#if IGL_PLATFORM_WINDOWS + } catch (const SehException& seh) { + IGL_LOG_ERROR("TextureFormatTestBase::render caught SEH exception 0x%08X", seh.code); + ADD_FAILURE() << "TextureFormatTestBase::render caught SEH exception 0x" << std::hex << seh.code; + return; +#endif + } catch (const std::exception& ex) { + ADD_FAILURE() << "TextureFormatTestBase::render threw std::exception: " << ex.what(); + return; + } } std::pair TextureFormatTestBase::checkSupport( diff --git a/src/igl/tests/util/TextureValidationHelpers.h b/src/igl/tests/util/TextureValidationHelpers.h index 83134a9f97..cf922a441a 100644 --- a/src/igl/tests/util/TextureValidationHelpers.h +++ b/src/igl/tests/util/TextureValidationHelpers.h @@ -130,9 +130,10 @@ inline void validateTextureRange(IDevice& device, fb->copyBytesColorAttachment(cmdQueue, 0, actualData.data(), range); if (!isRenderTarget && (device.getBackendType() == igl::BackendType::Metal || - device.getBackendType() == igl::BackendType::Vulkan)) { - // The Vulkan and Metal implementations of copyBytesColorAttachment flip the returned image - // vertically. This is the desired behavior for render targets, but for non-render target + device.getBackendType() == igl::BackendType::Vulkan || + device.getBackendType() == igl::BackendType::D3D12)) { + // The Vulkan, Metal, and D3D12 implementations of copyBytesColorAttachment flip the returned + // image vertically. This is the desired behavior for render targets, but for non-render target // textures, we want the unflipped data. This flips the output image again to get the unmodified // data. std::vector tmpData; diff --git a/src/igl/tests/util/device/TestDevice.cpp b/src/igl/tests/util/device/TestDevice.cpp index 48979d2bc3..dd0e5ccf43 100644 --- a/src/igl/tests/util/device/TestDevice.cpp +++ b/src/igl/tests/util/device/TestDevice.cpp @@ -27,6 +27,9 @@ #if IGL_VULKAN_SUPPORTED #include #endif +#if IGL_D3D12_SUPPORTED +#include +#endif // @fb-only // @fb-only // @fb-only @@ -45,6 +48,8 @@ bool isBackendTypeSupported(BackendType backendType) { return IGL_OPENGL_SUPPORTED; case ::igl::BackendType::Vulkan: return IGL_VULKAN_SUPPORTED; + case ::igl::BackendType::D3D12: + return IGL_D3D12_SUPPORTED; // @fb-only // @fb-only } @@ -71,6 +76,20 @@ std::unique_ptr createTestDevice(BackendType backendType, const TestDev return vulkan::createTestDevice(config.enableVulkanValidationLayers); #else return nullptr; +#endif + } + if (backendType == ::igl::BackendType::D3D12) { +#if IGL_D3D12_SUPPORTED + IGL_LOG_INFO("[Tests] Creating D3D12 test device (debug layer: enabled)\n"); + auto dev = d3d12::createTestDevice(true); + if (!dev) { + IGL_LOG_ERROR("[Tests] D3D12 test device creation failed\n"); + } else { + IGL_LOG_INFO("[Tests] D3D12 test device created OK\n"); + } + return dev; +#else + return nullptr; #endif } // @fb-only diff --git a/src/igl/tests/util/device/TestDevice.h b/src/igl/tests/util/device/TestDevice.h index 879516cad6..4fec188d05 100644 --- a/src/igl/tests/util/device/TestDevice.h +++ b/src/igl/tests/util/device/TestDevice.h @@ -32,6 +32,12 @@ #define IGL_VULKAN_SUPPORTED 0 #endif +#if IGL_PLATFORM_WINDOWS && IGL_BACKEND_ENABLE_D3D12 && !defined(IGL_UNIT_TESTS_NO_D3D12) +#define IGL_D3D12_SUPPORTED 1 +#else +#define IGL_D3D12_SUPPORTED 0 +#endif + namespace igl::tests::util::device { struct TestDeviceConfig { @@ -44,7 +50,9 @@ struct TestDeviceConfig { */ bool isBackendTypeSupported(BackendType backendType); -#if IGL_OPENGL_SUPPORTED +#if IGL_D3D12_SUPPORTED +constexpr BackendType kDefaultBackendType = BackendType::D3D12; +#elif IGL_OPENGL_SUPPORTED constexpr BackendType kDefaultBackendType = BackendType::OpenGL; #elif IGL_VULKAN_SUPPORTED constexpr BackendType kDefaultBackendType = BackendType::Vulkan; diff --git a/src/igl/tests/util/device/d3d12/TestDevice.cpp b/src/igl/tests/util/device/d3d12/TestDevice.cpp new file mode 100644 index 0000000000..7b1cb7124b --- /dev/null +++ b/src/igl/tests/util/device/d3d12/TestDevice.cpp @@ -0,0 +1,28 @@ +/* Minimal D3D12 test device factory using a headless context. */ + +#include +#include +#include +#include +#include + +#include "TestDevice.h" + +namespace igl::tests::util::device::d3d12 { + +std::unique_ptr createTestDevice(bool enableDebugLayer) { + IGL_LOG_INFO("[Tests] D3D12 test device requested (debug layer: %s)\n", + enableDebugLayer ? "enabled" : "disabled"); + + // Enabling the debug layer happens inside D3D12Context::createDevice() when available. + // Build a headless context (no swapchain) suitable for unit tests. + auto ctx = std::make_unique(); + auto res = ctx->initializeHeadless(256, 256); + if (res.code != Result::Code::Ok) { + IGL_LOG_ERROR("[Tests] D3D12 headless context init failed: %s\n", res.message.c_str()); + return nullptr; + } + return std::make_unique(std::move(ctx)); +} + +} // namespace igl::tests::util::device::d3d12 diff --git a/src/igl/tests/util/device/d3d12/TestDevice.h b/src/igl/tests/util/device/d3d12/TestDevice.h new file mode 100644 index 0000000000..edc9b6ca09 --- /dev/null +++ b/src/igl/tests/util/device/d3d12/TestDevice.h @@ -0,0 +1,21 @@ +/* + * Copyright (c) Meta Platforms, Inc. and affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +#pragma once + +#include +#include + +namespace igl::tests::util::device::d3d12 { + +/** + * Create and return an igl::d3d12::Device that is suitable for running tests against. + * This creates a headless device without a swapchain, suitable for unit testing. + */ +std::unique_ptr createTestDevice(bool enableDebugLayer = true); + +} // namespace igl::tests::util::device::d3d12 diff --git a/src/igl/vulkan/Device.cpp b/src/igl/vulkan/Device.cpp index 7f26f23a36..7b00f9e6da 100644 --- a/src/igl/vulkan/Device.cpp +++ b/src/igl/vulkan/Device.cpp @@ -782,6 +782,27 @@ bool Device::getFeatureLimitsInternal(DeviceFeatureLimits featureLimits, size_t& case DeviceFeatureLimits::MaxBindBytesBytes: result = 0; return true; + case DeviceFeatureLimits::MaxTextureDimension3D: + result = limits.maxImageDimension3D; + return true; + case DeviceFeatureLimits::MaxComputeWorkGroupSizeX: + result = limits.maxComputeWorkGroupSize[0]; + return true; + case DeviceFeatureLimits::MaxComputeWorkGroupSizeY: + result = limits.maxComputeWorkGroupSize[1]; + return true; + case DeviceFeatureLimits::MaxComputeWorkGroupSizeZ: + result = limits.maxComputeWorkGroupSize[2]; + return true; + case DeviceFeatureLimits::MaxComputeWorkGroupInvocations: + result = limits.maxComputeWorkGroupInvocations; + return true; + case DeviceFeatureLimits::MaxVertexInputAttributes: + result = limits.maxVertexInputAttributes; + return true; + case DeviceFeatureLimits::MaxColorAttachments: + result = limits.maxColorAttachments; + return true; } IGL_DEBUG_ABORT("DeviceFeatureLimits value not handled: %d", (int)featureLimits); diff --git a/src/igl/vulkan/VulkanFeatures.cpp b/src/igl/vulkan/VulkanFeatures.cpp index 8ba5909077..ec0ec644d1 100644 --- a/src/igl/vulkan/VulkanFeatures.cpp +++ b/src/igl/vulkan/VulkanFeatures.cpp @@ -124,12 +124,12 @@ VulkanFeatures::VulkanFeatures(VulkanContextConfig config) noexcept : .taskShader = VK_TRUE, .meshShader = VK_TRUE, }), - config(config) { + config_(config) { extensions_.resize(kNumberOfExtensionTypes); enabledExtensions_.resize(kNumberOfExtensionTypes); // All the above get assembled into a feature chain - assembleFeatureChain(config); + assembleFeatureChain(config_); } void VulkanFeatures::populateWithAvailablePhysicalDeviceFeatures( @@ -185,7 +185,7 @@ Result VulkanFeatures::checkSelectedFeatures( #define ENABLE_FEATURE_1_1_EXT(requestedFeatureStruct, availableFeatureStruct, feature) \ ENABLE_VULKAN_FEATURE(requestedFeatureStruct, availableFeatureStruct, feature, "1.1 EXT") - if (config.enableDescriptorIndexing) { + if (config_.enableDescriptorIndexing) { ENABLE_FEATURE_1_1_EXT(featuresDescriptorIndexing, availableFeatures.featuresDescriptorIndexing, shaderSampledImageArrayNonUniformIndexing) @@ -250,7 +250,7 @@ Result VulkanFeatures::checkSelectedFeatures( return Result{}; } -void VulkanFeatures::assembleFeatureChain(const VulkanContextConfig& contextConfig) noexcept { +void VulkanFeatures::assembleFeatureChain(const VulkanContextConfig& config) noexcept { // Versions 1.0 and 1.1 are always present // Reset all pNext pointers. We might be copying the chain from another VulkanFeatures object, @@ -310,7 +310,7 @@ void VulkanFeatures::assembleFeatureChain(const VulkanContextConfig& contextConf if (hasExtension(VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME)) { ivkAddNext(&vkPhysicalDeviceFeatures2, &featuresUniformBufferStandardLayout); } - if (contextConfig.enableMultiviewPerViewViewports) { + if (config_.enableMultiviewPerViewViewports) { if (hasExtension(VK_QCOM_MULTIVIEW_PER_VIEW_VIEWPORTS_EXTENSION_NAME)) { ivkAddNext(&vkPhysicalDeviceFeatures2, &featuresMultiviewPerViewViewports); } else { @@ -328,7 +328,7 @@ VulkanFeatures& VulkanFeatures::operator=(const VulkanFeatures& other) noexcept } const bool sameConfiguration = - config.enableDescriptorIndexing == other.config.enableDescriptorIndexing; + config_.enableDescriptorIndexing == other.config_.enableDescriptorIndexing; if (!sameConfiguration) { return *this; } @@ -357,7 +357,7 @@ VulkanFeatures& VulkanFeatures::operator=(const VulkanFeatures& other) noexcept enabledExtensions_ = other.enabledExtensions_; extensionProps_ = other.extensionProps_; - assembleFeatureChain(config); + assembleFeatureChain(config_); return *this; } @@ -422,7 +422,7 @@ bool VulkanFeatures::enable(const char* extensionName, ExtensionType extensionTy return false; } -void VulkanFeatures::enableCommonInstanceExtensions(const VulkanContextConfig& contextConfig) { +void VulkanFeatures::enableCommonInstanceExtensions(const VulkanContextConfig& config) { enable(VK_KHR_SURFACE_EXTENSION_NAME, ExtensionType::Instance); enable(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, ExtensionType::Instance); #if IGL_PLATFORM_WINDOWS @@ -443,7 +443,7 @@ void VulkanFeatures::enableCommonInstanceExtensions(const VulkanContextConfig& c #endif // IGL_PLATFORM_MACOSX #if !IGL_PLATFORM_ANDROID - if (contextConfig.enableValidation) { + if (config.enableValidation) { enable(VK_EXT_VALIDATION_FEATURES_EXTENSION_NAME, ExtensionType::Instance); } #endif // !IGL_PLATFORM_ANDROID @@ -454,12 +454,12 @@ void VulkanFeatures::enableCommonInstanceExtensions(const VulkanContextConfig& c has_VK_EXT_headless_surface = enable(VK_EXT_HEADLESS_SURFACE_EXTENSION_NAME, ExtensionType::Instance); - if (contextConfig.headless) { + if (config.headless) { if (!has_VK_EXT_headless_surface) { IGL_LOG_ERROR("VK_EXT_headless_surface extension not supported\n"); } } - if (contextConfig.swapChainColorSpace != igl::ColorSpace::SRGB_NONLINEAR) { + if (config.swapChainColorSpace != igl::ColorSpace::SRGB_NONLINEAR) { const bool enabledExtension = enable(VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME, ExtensionType::Instance); if (!enabledExtension) { @@ -468,7 +468,7 @@ void VulkanFeatures::enableCommonInstanceExtensions(const VulkanContextConfig& c } } -void VulkanFeatures::enableCommonDeviceExtensions(const VulkanContextConfig& contextConfig) { +void VulkanFeatures::enableCommonDeviceExtensions(const VulkanContextConfig& config) { enable(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, ExtensionType::Device); enable(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, ExtensionType::Device); enable(VK_KHR_SWAPCHAIN_EXTENSION_NAME, ExtensionType::Device); @@ -531,7 +531,7 @@ void VulkanFeatures::enableCommonDeviceExtensions(const VulkanContextConfig& con has_VK_EXT_fragment_density_map = enable(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME, ExtensionType::Device); - if (contextConfig.enableMultiviewPerViewViewports) { + if (config_.enableMultiviewPerViewViewports) { has_VK_QCOM_multiview_per_view_viewports = enable(VK_QCOM_MULTIVIEW_PER_VIEW_VIEWPORTS_EXTENSION_NAME, ExtensionType::Device); IGL_SOFT_ASSERT(has_VK_QCOM_multiview_per_view_viewports, diff --git a/src/igl/vulkan/VulkanFeatures.h b/src/igl/vulkan/VulkanFeatures.h index 6743c34bbf..5ee3629733 100644 --- a/src/igl/vulkan/VulkanFeatures.h +++ b/src/igl/vulkan/VulkanFeatures.h @@ -97,15 +97,15 @@ class VulkanFeatures final { /// @param extensionType The type of the extensions /// @param validationEnabled Flag that informs the class whether the Validation Layer is /// enabled or not. - void enableCommonInstanceExtensions(const VulkanContextConfig& contextConfig); - void enableCommonDeviceExtensions(const VulkanContextConfig& contextConfig); + void enableCommonInstanceExtensions(const VulkanContextConfig& config); + void enableCommonDeviceExtensions(const VulkanContextConfig& config); public: friend class Device; friend class VulkanContext; // A copy of the config used by the VulkanContext - VulkanContextConfig config{}; + VulkanContextConfig config_{}; // NOLINTBEGIN(readability-identifier-naming) bool has_VK_EXT_descriptor_indexing = false; // promoted to Vulkan 1.2 @@ -139,7 +139,7 @@ class VulkanFeatures final { /// @brief Assembles the feature chain for the VkPhysicalDeviceFeatures2 structure by connecting /// the existing/required feature structures and their pNext chain. - void assembleFeatureChain(const VulkanContextConfig& contextConfig) noexcept; + void assembleFeatureChain(const VulkanContextConfig& config) noexcept; bool hasExtension(const char* ext) const; /// @brief Enables the extension with name `extensionName` of the type `extensionType` if the diff --git a/third-party/bootstrap-deps.json b/third-party/bootstrap-deps.json index 194729043a..c9f3094552 100644 --- a/third-party/bootstrap-deps.json +++ b/third-party/bootstrap-deps.json @@ -111,19 +111,27 @@ "revision": "v1.91.2" } }, -{ - "name": "volk", - "source": { - "type": "git", - "url": "https://github.com/zeux/volk", - "revision": "1.4.304" - } -}, -{ - "name": "vma", - "source": { - "type": "git", - "url": "https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git", +{ + "name": "volk", + "source": { + "type": "git", + "url": "https://github.com/zeux/volk", + "revision": "1.4.304" + } +}, +{ + "name": "DirectX-Headers", + "source": { + "type": "git", + "url": "https://github.com/microsoft/DirectX-Headers.git", + "revision": "v1.614.0" + } +}, +{ + "name": "vma", + "source": { + "type": "git", + "url": "https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git", "revision": "v3.2.0" } },