diff --git a/CMakeLists.txt b/CMakeLists.txt
index 5f83f19def..605ff4786c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -15,6 +15,7 @@ option(IGL_WITH_OPENGLES  "Enable IGL/OpenGL ES"              OFF)
 option(IGL_WITH_VULKAN    "Enable IGL/Vulkan"                  ON)
 option(IGL_WITH_METAL     "Enable IGL/Metal"                   ON)
 option(IGL_WITH_WEBGL     "Enable IGL/WebGL"                  OFF)
+option(IGL_WITH_D3D12     "Enable IGL/DirectX 12"             OFF)
 
 option(IGL_WITH_IGLU      "Enable IGLU utils"                  ON)
 option(IGL_WITH_SHELL     "Enable Shell utils"                 ON)
@@ -49,6 +50,10 @@ if(NOT APPLE)
   set(IGL_WITH_METAL OFF)
 endif()
 
+if(NOT WIN32)
+  set(IGL_WITH_D3D12 OFF)
+endif()
+
 if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
   # disable for all targets due to warnings in third-party code
   add_definitions(-Wno-nullability-completeness)
@@ -100,6 +105,7 @@ message(STATUS "IGL_WITH_OPENGLES  = ${IGL_WITH_OPENGLES}")
 message(STATUS "IGL_WITH_VULKAN    = ${IGL_WITH_VULKAN}")
 message(STATUS "IGL_WITH_METAL     = ${IGL_WITH_METAL}")
 message(STATUS "IGL_WITH_WEBGL     = ${IGL_WITH_WEBGL}")
+message(STATUS "IGL_WITH_D3D12     = ${IGL_WITH_D3D12}")
 
 message(STATUS "IGL_WITH_IGLU      = ${IGL_WITH_IGLU}")
 message(STATUS "IGL_WITH_SHELL     = ${IGL_WITH_SHELL}")
@@ -120,8 +126,8 @@ if(APPLE)
     message(FATAL_ERROR "At least one rendering backend should be defined (OpenGL, Vulkan or Metal).")
   endif()
 else()
-  if(NOT (IGL_WITH_OPENGL OR IGL_WITH_VULKAN OR IGL_WITH_OPENGLES OR IGL_WITH_WEBGL))
-    message(FATAL_ERROR "At least one rendering backend should be defined (OpenGL or Vulkan).")
+  if(NOT (IGL_WITH_OPENGL OR IGL_WITH_VULKAN OR IGL_WITH_OPENGLES OR IGL_WITH_WEBGL OR IGL_WITH_D3D12))
+    message(FATAL_ERROR "At least one rendering backend should be defined (OpenGL, Vulkan, or DirectX 12).")
   endif()
 endif()
 
@@ -193,6 +199,16 @@ if(IGL_WITH_OPENXR)
   igl_set_folder(openxr_loader "third-party/OpenXR")
 endif()
 
+if(WIN32 AND IGL_WITH_D3D12)
+  set(DIRECTX_HEADERS_ROOT "${IGL_ROOT_DIR}/third-party/deps/src/DirectX-Headers")
+  if(EXISTS "${DIRECTX_HEADERS_ROOT}/CMakeLists.txt")
+    add_subdirectory("${DIRECTX_HEADERS_ROOT}" "${CMAKE_BINARY_DIR}/DirectX-Headers")
+    set(DIRECTX_HEADERS_INCLUDE_DIR "${DIRECTX_HEADERS_ROOT}/include/directx")
+  else()
+    message(FATAL_ERROR "DirectX-Headers dependency not found. Run deploy_deps.py to download third-party/deps/src/DirectX-Headers.")
+  endif()
+endif()
+
 add_subdirectory(src/igl)
 
 if(IGL_WITH_TRACY)
@@ -234,11 +250,20 @@ endif()
 if(APPLE AND IGL_WITH_METAL)
   target_compile_definitions(IGLLibrary PUBLIC "IGL_BACKEND_ENABLE_METAL=1")
 endif()
+if(WIN32 AND IGL_WITH_D3D12)
+  target_compile_definitions(IGLLibrary PUBLIC "IGL_BACKEND_ENABLE_D3D12=1")
+endif()
 
 target_compile_definitions(IGLLibrary PUBLIC "IGL_CMAKE_BUILD=1")
 
 include_directories(.)
 
+# Enable CTest at top-level when tests are requested so `ctest` can discover tests
+if(IGL_WITH_TESTS)
+  include(CTest)
+  enable_testing()
+endif()
+
 if(IGL_WITH_IGLU OR IGL_WITH_SAMPLES)
   add_library(IGLstb third-party/deps/patches/stb_impl/stb_image.c third-party/deps/patches/stb_impl/stb_image_resize.c
                      third-party/deps/patches/stb_impl/stb_image_write.c)
diff --git a/samples/desktop/CMakeLists.txt b/samples/desktop/CMakeLists.txt
index d4a2d579db..48ff15312c 100644
--- a/samples/desktop/CMakeLists.txt
+++ b/samples/desktop/CMakeLists.txt
@@ -35,19 +35,52 @@ macro(ADD_DEMO app)
     target_link_libraries(${app} PRIVATE EGL)
   endif()
   target_link_libraries(${app} PRIVATE IGLstb)
+
+  # For D3D12 builds on Windows, ensure dxil.dll is deployed next to sample
+  # executables so that DXC/DXIL validation and signed DXIL shaders work in
+  # both Debug and Release configurations. This mirrors the behavior used for
+  # render sessions (shell/windows/CMakeLists.txt) and unit tests
+  # (test_all_unittests.bat).
+  if(IGL_WITH_D3D12 AND WIN32 AND MSVC)
+    find_file(DXIL_DLL_FOR_${app}
+      NAMES dxil.dll
+      PATHS
+        "C:/Program Files (x86)/Windows Kits/10/bin/10.0.22621.0/x64"
+        "C:/Program Files (x86)/Windows Kits/10/bin/10.0.22000.0/x64"
+        "C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x64"
+        "$ENV{WindowsSdkBinPath}/x64"
+      NO_DEFAULT_PATH
+    )
+    if(DXIL_DLL_FOR_${app})
+      add_custom_command(TARGET ${app} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+          "${DXIL_DLL_FOR_${app}}"
+          "$<TARGET_FILE_DIR:${app}>/"
+        COMMENT "Copying dxil.dll for ${app}"
+      )
+    endif()
+  endif()
 endmacro()
 
-add_demo("Tiny")
+if(IGL_WITH_OPENGL OR IGL_WITH_VULKAN)
+  add_demo("Tiny")
+endif()
 
 if(IGL_WITH_VULKAN)
   # this demo app does not work without Vulkan (yet)
   add_demo("Tiny_Mesh")
 endif()
 
-add_demo("Tiny_MeshLarge")
+# Tiny_MeshLarge can run on Vulkan/OpenGL; expose it for D3D12 configs too so the binary is available.
+if(IGL_WITH_OPENGL OR IGL_WITH_VULKAN OR IGL_WITH_D3D12)
+  add_demo("Tiny_MeshLarge")
 
-target_sources(Tiny_MeshLarge
-               PUBLIC "${IGL_ROOT_DIR}/third-party/deps/src/3D-Graphics-Rendering-Cookbook/shared/UtilsCubemap.cpp")
-if(NOT IGL_WITH_VULKAN)
-  target_sources(Tiny_MeshLarge PUBLIC "${IGL_ROOT_DIR}/src/igl/vulkan/util/TextureFormat.cpp")
+  target_sources(
+    Tiny_MeshLarge
+    PUBLIC "${IGL_ROOT_DIR}/third-party/deps/src/3D-Graphics-Rendering-Cookbook/shared/UtilsCubemap.cpp")
+  if(NOT IGL_WITH_VULKAN)
+    target_sources(Tiny_MeshLarge PUBLIC "${IGL_ROOT_DIR}/src/igl/vulkan/util/TextureFormat.cpp")
+  endif()
+else()
+  message(STATUS "Skipping Tiny_MeshLarge: no compatible backend enabled (needs OpenGL/Vulkan/D3D12)")
 endif()
diff --git a/shell/CMakeLists.txt b/shell/CMakeLists.txt
index ef205a131a..63c974c1ba 100644
--- a/shell/CMakeLists.txt
+++ b/shell/CMakeLists.txt
@@ -82,6 +82,7 @@ if(IGL_WITH_OPENXR)
   add_subdirectory(openxr)
 endif()
 
+
 macro(ADD_SHELL_SESSION target libs)
   set(shell_srcs apps/SessionApp.cpp renderSessions/${target}.cpp renderSessions/${target}.h)
   add_shell_session_with_srcs(${target} "${shell_srcs}" "${libs}")
diff --git a/shell/windows/CMakeLists.txt b/shell/windows/CMakeLists.txt
index b0ef48b155..f4fd278fed 100644
--- a/shell/windows/CMakeLists.txt
+++ b/shell/windows/CMakeLists.txt
@@ -41,6 +41,12 @@ endif()
 if(IGL_WITH_OPENGLES)
   add_shell_app(opengles)
 endif()
+# Only add D3D12 shell app if the sources are present (removed in this branch)
+set(IGL_D3D12_APP_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/../windows/d3d12/App.cpp")
+if(IGL_WITH_D3D12 AND EXISTS "${IGL_D3D12_APP_SOURCE}")
+  add_shell_app(d3d12)
+  target_link_libraries(IGLShellApp_d3d12 PUBLIC IGLD3D12)
+endif()
 
 function(ADD_SHELL_SESSION_BACKEND targetApp backend srcs libs)
   set(target ${targetApp}_${backend})
@@ -50,6 +56,27 @@ function(ADD_SHELL_SESSION_BACKEND targetApp backend srcs libs)
   target_compile_definitions(${target} PRIVATE "IGL_SHELL_SESSION=${targetApp}")
   target_link_libraries(${target} PUBLIC ${libs})
   target_link_libraries(${target} PUBLIC IGLShellApp_${backend})
+
+  # Copy dxil.dll for D3D12 executables (required for DXIL signing)
+  if(backend STREQUAL "d3d12" AND WIN32 AND MSVC)
+    find_file(DXIL_DLL_FOR_${target}
+      NAMES dxil.dll
+      PATHS
+        "C:/Program Files (x86)/Windows Kits/10/bin/10.0.22621.0/x64"
+        "C:/Program Files (x86)/Windows Kits/10/bin/10.0.22000.0/x64"
+        "C:/Program Files (x86)/Windows Kits/10/bin/10.0.19041.0/x64"
+        "$ENV{WindowsSdkBinPath}/x64"
+      NO_DEFAULT_PATH
+    )
+    if(DXIL_DLL_FOR_${target})
+      add_custom_command(TARGET ${target} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy_if_different
+          "${DXIL_DLL_FOR_${target}}"
+          "$<TARGET_FILE_DIR:${target}>/"
+        COMMENT "Copying dxil.dll for ${target}"
+      )
+    endif()
+  endif()
 endfunction()
 
 function(ADD_SHELL_SESSION_BACKEND_OPENXR_SIM targetApp backend srcs libs compileDefs)
@@ -72,4 +99,7 @@ macro(ADD_SHELL_SESSION_WITH_SRCS target srcs libs)
   if(IGL_WITH_OPENGLES)
     add_shell_session_backend(${target} opengles "${srcs}" "${libs}")
   endif()
+  if(IGL_WITH_D3D12 AND TARGET IGLShellApp_d3d12)
+    add_shell_session_backend(${target} d3d12 "${srcs}" "${libs}")
+  endif()
 endmacro()
diff --git a/src/igl/Buffer.h b/src/igl/Buffer.h
index 38d2c7e008..44ab481873 100644
--- a/src/igl/Buffer.h
+++ b/src/igl/Buffer.h
@@ -80,6 +80,21 @@ struct BufferDesc {
   /** @brief Identifier used for debugging */
   std::string debugName;
 
+  /**
+   * @brief Element stride in bytes for storage buffers.
+   *
+   * For buffers created with BufferTypeBits::Storage, this describes the size of a single
+   * structured element when the buffer is viewed as a StructuredBuffer / RWStructuredBuffer.
+   *
+   * Backends that create structured SRV/UAV views (such as D3D12) use this value to populate
+   * D3D12_BUFFER_SRV / D3D12_BUFFER_UAV StructureByteStride and to compute NumElements.
+   *
+   * A value of 0 means "unknown/unspecified" and backends may fall back to a default
+   * element size (typically 4 bytes) for compatibility with existing code that assumes
+   * float / uint elements.
+   */
+  size_t storageStride = 0;
+
   BufferDesc(BufferType type = 0,
              const void* IGL_NULLABLE data = nullptr,
              size_t length = 0,
diff --git a/src/igl/CMakeLists.txt b/src/igl/CMakeLists.txt
index 27c02d3f54..acf1e99f77 100644
--- a/src/igl/CMakeLists.txt
+++ b/src/igl/CMakeLists.txt
@@ -63,9 +63,16 @@ if(IGL_WITH_METAL)
   target_link_libraries(IGLLibrary PUBLIC IGLMetal)
 endif()
 
+if(IGL_WITH_D3D12)
+  add_subdirectory(d3d12)
+  target_link_libraries(IGLLibrary PUBLIC IGLD3D12)
+endif()
+
 # OpenGL tests use GLES on Windows and we do not use Angle with CMake - so OGL
 # tests are disabled for now on Windows
-if(IGL_WITH_TESTS AND IGL_WITH_IGLU AND (IGL_WITH_VULKAN OR (NOT WIN32)))
+# Enable tests when requested. On Windows, allow tests if either Vulkan or D3D12 is enabled
+# (previously required Vulkan on Windows, which blocked D3D12-only test runs).
+if(IGL_WITH_TESTS AND IGL_WITH_IGLU AND (IGL_WITH_VULKAN OR IGL_WITH_D3D12 OR (NOT WIN32)))
   add_subdirectory(tests)
   if((IGL_WITH_OPENGL OR IGL_WITH_OPENGLES) AND NOT APPLE)
     target_sources(IGLTests PRIVATE opengl/egl/Context.cpp opengl/egl/Device.cpp opengl/egl/HWDevice.cpp
diff --git a/src/igl/Common.h b/src/igl/Common.h
index a5d505d311..127f4e4f67 100644
--- a/src/igl/Common.h
+++ b/src/igl/Common.h
@@ -28,7 +28,14 @@ using Deleter = void (*)(void* IGL_NULLABLE);
 
 /// Device Capabilities or Metal Features
 constexpr uint32_t IGL_TEXTURE_SAMPLERS_MAX = 16;
-constexpr uint32_t IGL_VERTEX_ATTRIBUTES_MAX = 24;
+
+// Maximum vertex attributes across all backends
+// - D3D12: D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT = 32
+// - Vulkan: VkPhysicalDeviceLimits::maxVertexInputAttributes (typically >= 16, commonly 32)
+// - Metal: 31 (Metal Feature Set Tables)
+// - OpenGL: GL_MAX_VERTEX_ATTRIBS (typically >= 16)
+// Setting to 32 ensures compatibility with D3D12 (the most widely-supported modern API)
+constexpr uint32_t IGL_VERTEX_ATTRIBUTES_MAX = 32;
 
 // maximum number of buffers that can be bound to a shader stage
 // See maximum number of entries in the buffer argument table, per graphics or kernel function
@@ -136,6 +143,7 @@ enum class BackendType {
   OpenGL,
   Metal,
   Vulkan,
+  D3D12,
   // @fb-only
   Custom,
 };
@@ -146,6 +154,7 @@ enum class BackendFlavor : uint8_t {
   OpenGL_ES,
   Metal,
   Vulkan,
+  D3D12,
   // @fb-only
 };
 
diff --git a/src/igl/Device.cpp b/src/igl/Device.cpp
index 027a990be7..a10831b942 100644
--- a/src/igl/Device.cpp
+++ b/src/igl/Device.cpp
@@ -50,6 +50,8 @@ Color IDevice::backendDebugColor() const noexcept {
     return {1.f, 0.f, 1.f, 1.f};
   case BackendType::Vulkan:
     return {0.f, 1.f, 1.f, 1.f};
+  case BackendType::D3D12:
+    return {0.f, 1.f, 1.f, 1.f};  // Match Vulkan for parity testing
   // @fb-only
     // @fb-only
   case BackendType::Custom:
diff --git a/src/igl/DeviceFeatures.h b/src/igl/DeviceFeatures.h
index 0a065e09f1..8c7fb77cbc 100644
--- a/src/igl/DeviceFeatures.h
+++ b/src/igl/DeviceFeatures.h
@@ -154,18 +154,30 @@ enum class DeviceRequirement {
  * @brief DeviceFeatureLimits provides specific limitations on certain features supported on the
  * device
  *
- * BufferAlignment              Required byte alignment for buffer data
- * BufferNoCopyAlignment        Required byte alignment for no copy buffer data
- * MaxBindBytesBytes            Maximum number of bytes that can be bound with bindBytes
- * MaxCubeMapDimension          Maximum cube map dimensions
- * MaxFragmentUniformVectors    Maximum fragment uniform vectors
- * MaxMultisampleCount          Maximum number of samples
- * MaxPushConstantBytes         Maximum number of bytes for Push Constants
- * MaxTextureDimension1D2D      Maximum texture dimensions
- * MaxUniformBufferBytes        Maximum number of bytes for a uniform buffer
- * MaxStorageBufferBytes        Maximum number of bytes for storage buffers
- * MaxVertexUniformVectors      Maximum vertex uniform vectors
- * PushConstantsAlignment       Required byte alignment for push constants data
+ * BufferAlignment                      Required byte alignment for buffer data
+ * BufferNoCopyAlignment                Required byte alignment for no copy buffer data
+ * MaxBindBytesBytes                    Maximum number of bytes that can be bound with bindBytes
+ * MaxCubeMapDimension                  Maximum cube map dimensions
+ * MaxFragmentUniformVectors            Maximum fragment uniform vectors
+ * MaxMultisampleCount                  Maximum number of samples
+ * MaxPushConstantBytes                 Maximum number of bytes for Push Constants
+ * MaxTextureDimension1D2D              Maximum texture dimensions for 1D and 2D textures
+ * MaxTextureDimension3D                Maximum texture dimensions for 3D textures
+ * MaxStorageBufferBytes                Maximum number of bytes for storage buffers
+ * MaxUniformBufferBytes                Maximum number of bytes for a uniform buffer
+ * MaxVertexUniformVectors              Maximum vertex uniform vectors
+ * PushConstantsAlignment               Required byte alignment for push constants data
+ * ShaderStorageBufferOffsetAlignment   Required byte alignment for shader storage buffer offset
+ * MaxComputeWorkGroupSizeX             Maximum compute work group size in X dimension
+ * MaxComputeWorkGroupSizeY             Maximum compute work group size in Y dimension
+ * MaxComputeWorkGroupSizeZ             Maximum compute work group size in Z dimension
+ * MaxComputeWorkGroupInvocations       Maximum total compute work group invocations
+ * MaxVertexInputAttributes             Maximum number of vertex input attributes
+ * MaxColorAttachments                  Maximum number of color attachments (render targets)
+ * MaxDescriptorHeapCbvSrvUav           Maximum CBV/SRV/UAV descriptors in shader-visible heap (I-005)
+ * MaxDescriptorHeapSamplers            Maximum sampler descriptors in shader-visible heap (I-005)
+ * MaxDescriptorHeapRtvs                Maximum RTV descriptors in CPU-visible heap (I-005)
+ * MaxDescriptorHeapDsvs                Maximum DSV descriptors in CPU-visible heap (I-005)
  */
 enum class DeviceFeatureLimits {
   BufferAlignment = 0,
@@ -176,11 +188,23 @@ enum class DeviceFeatureLimits {
   MaxMultisampleCount,
   MaxPushConstantBytes,
   MaxTextureDimension1D2D,
+  MaxTextureDimension3D,
   MaxStorageBufferBytes,
   MaxUniformBufferBytes,
   MaxVertexUniformVectors,
   PushConstantsAlignment,
   ShaderStorageBufferOffsetAlignment,
+  MaxComputeWorkGroupSizeX,
+  MaxComputeWorkGroupSizeY,
+  MaxComputeWorkGroupSizeZ,
+  MaxComputeWorkGroupInvocations,
+  MaxVertexInputAttributes,
+  MaxColorAttachments,
+  // I-005: Descriptor heap size limits for cross-platform compatibility
+  MaxDescriptorHeapCbvSrvUav,
+  MaxDescriptorHeapSamplers,
+  MaxDescriptorHeapRtvs,
+  MaxDescriptorHeapDsvs,
 };
 
 /**
@@ -192,7 +216,7 @@ enum class DeviceFeatureLimits {
  * Metal         Metal API (macOS, iOS, etc.)
  * SpirV         Standard Portable Intermediate Representation open standard format
  */
-enum class ShaderFamily : uint8_t { Unknown, Glsl, GlslEs, Metal, SpirV };
+enum class ShaderFamily : uint8_t { Unknown, Glsl, GlslEs, Metal, SpirV, Hlsl };
 
 /**
  * @brief ShaderVersion provides information on the shader family type and version
diff --git a/src/igl/PlatformDevice.h b/src/igl/PlatformDevice.h
index 087df19d24..03e1d86edc 100644
--- a/src/igl/PlatformDevice.h
+++ b/src/igl/PlatformDevice.h
@@ -27,6 +27,7 @@ enum class PlatformDeviceType {
   OpenGLMacOS,
   OpenGLWebGL,
   Vulkan,
+  D3D12,
   // @fb-only
 };
 
diff --git a/src/igl/d3d12/Buffer.cpp b/src/igl/d3d12/Buffer.cpp
new file mode 100644
index 0000000000..e9ae928303
--- /dev/null
+++ b/src/igl/d3d12/Buffer.cpp
@@ -0,0 +1,601 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/Buffer.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/D3D12Context.h>
+#include <igl/d3d12/UploadRingBuffer.h>
+#include <igl/d3d12/D3D12StateTransition.h>
+#include <cstring>
+
+namespace igl::d3d12 {
+
+namespace {
+constexpr D3D12_RESOURCE_DESC makeBufferDesc(UINT64 size, D3D12_RESOURCE_FLAGS flags = D3D12_RESOURCE_FLAG_NONE) {
+  D3D12_RESOURCE_DESC desc = {};
+  desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+  desc.Alignment = 0;
+  desc.Width = size;
+  desc.Height = 1;
+  desc.DepthOrArraySize = 1;
+  desc.MipLevels = 1;
+  desc.Format = DXGI_FORMAT_UNKNOWN;
+  desc.SampleDesc.Count = 1;
+  desc.SampleDesc.Quality = 0;
+  desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+  desc.Flags = flags;
+  return desc;
+}
+} // namespace
+
+Buffer::Buffer(Device& device,
+               igl::d3d12::ComPtr<ID3D12Resource> resource,
+               const BufferDesc& desc,
+               D3D12_RESOURCE_STATES initialState)
+    : device_(&device),
+      resource_(std::move(resource)),
+      desc_(desc),
+      defaultState_(computeDefaultState(desc)),
+      currentState_(initialState) {
+  // Determine storage type based on heap properties
+  if (resource_.Get()) {
+    D3D12_HEAP_PROPERTIES heapProps;
+    D3D12_HEAP_FLAGS heapFlags;
+    resource_->GetHeapProperties(&heapProps, &heapFlags);
+
+    if (heapProps.Type == D3D12_HEAP_TYPE_UPLOAD) {
+      storage_ = ResourceStorage::Shared;
+    } else if (heapProps.Type == D3D12_HEAP_TYPE_READBACK) {
+      storage_ = ResourceStorage::Shared;
+    } else {
+      storage_ = ResourceStorage::Private;
+    }
+
+    if (storage_ != ResourceStorage::Private) {
+      currentState_ = D3D12_RESOURCE_STATE_GENERIC_READ;
+    }
+
+    // Track resource creation
+    D3D12Context::trackResourceCreation("Buffer", desc_.length);
+  }
+}
+
+Buffer::~Buffer() {
+  if (resource_.Get()) {
+    // Track resource destruction
+    D3D12Context::trackResourceDestruction("Buffer", desc_.length);
+  }
+  if (mappedPtr_) {
+    unmap();
+  }
+}
+
+Result Buffer::upload(const void* data, const BufferRange& range) {
+  if (resource_.Get() == nullptr) {
+    return Result(Result::Code::ArgumentInvalid, "Buffer resource is null");
+  }
+
+  if (!data) {
+    IGL_LOG_ERROR("Buffer::upload: data is NULL!\n");
+    return Result(Result::Code::ArgumentInvalid, "Upload data is null");
+  }
+
+  // Validate range
+  if (range.size == 0 || range.offset + range.size > desc_.length) {
+    return Result(Result::Code::ArgumentOutOfRange, "Upload range is out of bounds");
+  }
+
+  // For UPLOAD heap, map, copy, unmap
+  if (storage_ == ResourceStorage::Shared) {
+    void* mappedData = nullptr;
+    D3D12_RANGE readRange = {0, 0}; // Not reading from GPU
+
+    HRESULT hr = resource_->Map(0, &readRange, &mappedData);
+    if (FAILED(hr)) {
+      return Result(Result::Code::RuntimeError, "Failed to map buffer");
+    }
+
+    uint8_t* dest = static_cast<uint8_t*>(mappedData) + range.offset;
+    std::memcpy(dest, data, range.size);
+
+    D3D12_RANGE writtenRange = {range.offset, range.offset + range.size};
+    resource_->Unmap(0, &writtenRange);
+
+    return Result(Result::Code::Ok);
+  }
+
+  // For DEFAULT heap, need upload via intermediate buffer
+  if (!device_) {
+    return Result(Result::Code::RuntimeError, "Buffer device is null");
+  }
+
+  auto& ctx = device_->getD3D12Context();
+  ID3D12Device* d3dDevice = ctx.getDevice();
+  ID3D12CommandQueue* queue = ctx.getCommandQueue();
+  if (!d3dDevice || !queue) {
+    return Result(Result::Code::RuntimeError, "D3D12 device or command queue unavailable");
+  }
+
+  // Reclaim completed upload buffers before allocating new ones.
+  device_->processCompletedUploads();
+
+  UploadRingBuffer* ringBuffer = device_->getUploadRingBuffer();
+  UploadRingBuffer::Allocation ringAllocation;
+  bool useRingBuffer = false;
+
+  // Get fence value that will signal when this upload completes
+  const UINT64 uploadFenceValue = device_->getNextUploadFenceValue();
+
+  if (ringBuffer) {
+    // D3D12 requires 256-byte alignment (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)
+    ringAllocation = ringBuffer->allocate(range.size, D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, uploadFenceValue);
+
+    if (ringAllocation.valid) {
+      // Successfully allocated from ring buffer
+      std::memcpy(ringAllocation.cpuAddress, data, range.size);
+      useRingBuffer = true;
+    }
+  }
+
+  // Fallback: create temporary upload buffer if ring buffer allocation failed
+  igl::d3d12::ComPtr<ID3D12Resource> uploadBuffer;
+  HRESULT hr = S_OK;
+
+  if (!useRingBuffer) {
+    D3D12_HEAP_PROPERTIES uploadHeapProps = {};
+    uploadHeapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
+    const auto uploadDesc = makeBufferDesc(range.size);
+    hr = d3dDevice->CreateCommittedResource(&uploadHeapProps,
+                                                    D3D12_HEAP_FLAG_NONE,
+                                                    &uploadDesc,
+                                                    D3D12_RESOURCE_STATE_GENERIC_READ,
+                                                    nullptr,
+                                                    IID_PPV_ARGS(uploadBuffer.GetAddressOf()));
+    if (FAILED(hr)) {
+      return Result(Result::Code::RuntimeError, "Failed to create upload buffer");
+    }
+
+    void* mapped = nullptr;
+    D3D12_RANGE rr = {0, 0};
+    hr = uploadBuffer->Map(0, &rr, &mapped);
+    if (FAILED(hr) || mapped == nullptr) {
+      return Result(Result::Code::RuntimeError, "Failed to map upload buffer");
+    }
+    std::memcpy(mapped, data, range.size);
+    uploadBuffer->Unmap(0, nullptr);
+  }
+
+  igl::d3d12::ComPtr<ID3D12CommandAllocator> allocator = device_->getUploadCommandAllocator();
+  if (!allocator.Get()) {
+    return Result(Result::Code::RuntimeError, "Failed to get command allocator from pool");
+  }
+
+  igl::d3d12::ComPtr<ID3D12GraphicsCommandList> cmdList;
+  hr = d3dDevice->CreateCommandList(0,
+                                    D3D12_COMMAND_LIST_TYPE_DIRECT,
+                                    allocator.Get(),
+                                    nullptr,
+                                    IID_PPV_ARGS(cmdList.GetAddressOf()));
+  if (FAILED(hr)) {
+    // Return allocator to pool with fence value 0 (immediately available)
+    device_->returnUploadCommandAllocator(allocator, 0);
+    return Result(Result::Code::RuntimeError, "Failed to create command list for upload");
+  }
+
+  if (currentState_ != D3D12_RESOURCE_STATE_COPY_DEST) {
+    // Validate state transition and insert intermediate state if needed
+    const bool needsIntermediate = !D3D12StateTransition::isLegalDirectTransition(
+        currentState_, D3D12_RESOURCE_STATE_COPY_DEST);
+
+    if (needsIntermediate) {
+      // Transition to COMMON first
+      D3D12_RESOURCE_BARRIER toCommon = {};
+      toCommon.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      toCommon.Transition.pResource = resource_.Get();
+      toCommon.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      toCommon.Transition.StateBefore = currentState_;
+      toCommon.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
+      cmdList->ResourceBarrier(1, &toCommon);
+      currentState_ = D3D12_RESOURCE_STATE_COMMON;
+    }
+
+    // Now transition to COPY_DEST (guaranteed legal from COMMON or if direct was legal)
+    D3D12_RESOURCE_BARRIER toCopyDest = {};
+    toCopyDest.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+    toCopyDest.Transition.pResource = resource_.Get();
+    toCopyDest.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+    toCopyDest.Transition.StateBefore = currentState_;
+    toCopyDest.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
+    cmdList->ResourceBarrier(1, &toCopyDest);
+  }
+
+  // Copy from either ring buffer or temporary upload buffer
+  if (useRingBuffer) {
+    cmdList->CopyBufferRegion(resource_.Get(), range.offset,
+                              ringBuffer->getUploadHeap(), ringAllocation.offset,
+                              range.size);
+  } else {
+    cmdList->CopyBufferRegion(resource_.Get(), range.offset, uploadBuffer.Get(), 0, range.size);
+  }
+
+  // Prepare state transition barriers but defer state update until after GPU completes
+  D3D12_RESOURCE_STATES postState =
+      (defaultState_ == D3D12_RESOURCE_STATE_COMMON) ? D3D12_RESOURCE_STATE_GENERIC_READ : defaultState_;
+
+  if (postState != D3D12_RESOURCE_STATE_COPY_DEST) {
+    // Validate state transition and insert intermediate state if needed
+    const bool needsIntermediate = !D3D12StateTransition::isLegalDirectTransition(
+        D3D12_RESOURCE_STATE_COPY_DEST, postState);
+
+    if (needsIntermediate) {
+      // Transition to COMMON first
+      D3D12_RESOURCE_BARRIER toCommon = {};
+      toCommon.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      toCommon.Transition.pResource = resource_.Get();
+      toCommon.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      toCommon.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+      toCommon.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
+      cmdList->ResourceBarrier(1, &toCommon);
+
+      // Then transition to final state
+      D3D12_RESOURCE_BARRIER toFinal = {};
+      toFinal.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      toFinal.Transition.pResource = resource_.Get();
+      toFinal.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      toFinal.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
+      toFinal.Transition.StateAfter = postState;
+      cmdList->ResourceBarrier(1, &toFinal);
+    } else {
+      // Direct transition is legal
+      D3D12_RESOURCE_BARRIER toDefault = {};
+      toDefault.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      toDefault.Transition.pResource = resource_.Get();
+      toDefault.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      toDefault.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+      toDefault.Transition.StateAfter = postState;
+      cmdList->ResourceBarrier(1, &toDefault);
+    }
+  }
+
+  hr = cmdList->Close();
+  if (FAILED(hr)) {
+    // Return allocator to pool with fence value 0 (immediately available)
+    device_->returnUploadCommandAllocator(allocator, 0);
+    return Result(Result::Code::RuntimeError, "Failed to close upload command list");
+  }
+
+  ID3D12CommandList* lists[] = {cmdList.Get()};
+  queue->ExecuteCommandLists(1, lists);
+
+  // Ensure the allocator is not reused until GPU completes execution
+  ID3D12Fence* uploadFence = device_->getUploadFence();
+
+  // Signal must succeed; otherwise fence will never reach uploadFenceValue
+  hr = queue->Signal(uploadFence, uploadFenceValue);
+  if (FAILED(hr)) {
+    // Return allocator immediately (no fence wait needed)
+    device_->returnUploadCommandAllocator(allocator, 0);
+
+    // Check for device removal to provide richer diagnostics
+    Result deviceStatus = device_->checkDeviceRemoval();
+    if (!deviceStatus.isOk()) {
+      return deviceStatus;  // Device removed - return specific error
+    }
+
+    return Result(Result::Code::RuntimeError, "Failed to signal upload fence");
+  }
+
+  // Return allocator to pool with fence value (will be reused after fence is signaled)
+  device_->returnUploadCommandAllocator(allocator, uploadFenceValue);
+
+  // Only track temporary upload buffers (ring buffer is persistent)
+  // Pass uploadFenceValue (already signaled above) to track with correct fence
+  if (!useRingBuffer && uploadBuffer.Get()) {
+    device_->trackUploadBuffer(std::move(uploadBuffer), uploadFenceValue);
+  }
+
+  // Wait for upload fence to signal before returning.
+  // This ensures the buffer upload completes before the caller uses it.
+  Result waitResult = device_->waitForUploadFence(uploadFenceValue);
+  if (!waitResult.isOk()) {
+    return waitResult;
+  }
+
+  // Now safe to update resource state; GPU upload has completed
+  currentState_ = (postState != D3D12_RESOURCE_STATE_COPY_DEST) ? postState : D3D12_RESOURCE_STATE_COPY_DEST;
+
+  return Result(Result::Code::Ok);
+}
+
+void* Buffer::map(const BufferRange& range, Result* IGL_NULLABLE outResult) {
+  if (resource_.Get() == nullptr) {
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Buffer resource is null");
+    return nullptr;
+  }
+
+  // Validate range
+  if (range.offset > desc_.length || range.size > desc_.length ||
+      (range.offset + range.size) > desc_.length) {
+    Result::setResult(outResult, Result::Code::ArgumentOutOfRange, "Map range is out of bounds");
+    return nullptr;
+  }
+
+  // Handle mapping of DEFAULT heap storage buffers requested as Shared
+  // This happens when compute shader output buffers need to be read back
+  const bool isStorageBuffer = (desc_.type & BufferDesc::BufferTypeBits::Storage) != 0;
+  const bool requestedShared = (desc_.storage == ResourceStorage::Shared ||
+                                desc_.storage == ResourceStorage::Managed);
+  const bool needsReadbackStaging = (storage_ != ResourceStorage::Shared) &&
+                                    isStorageBuffer && requestedShared;
+
+  if (needsReadbackStaging) {
+    // Storage buffer in DEFAULT heap but requested as Shared - need staging
+    if (!device_) {
+      Result::setResult(outResult, Result::Code::RuntimeError, "Device is null");
+      return nullptr;
+    }
+
+    auto& ctx = device_->getD3D12Context();
+    auto* d3dDevice = ctx.getDevice();
+    auto* queue = ctx.getCommandQueue();
+
+    if (!d3dDevice || !queue) {
+      Result::setResult(outResult, Result::Code::RuntimeError, "D3D12 device or queue is null");
+      return nullptr;
+    }
+
+    // Create READBACK staging buffer if not already created
+    if (!readbackStagingBuffer_.Get()) {
+      D3D12_HEAP_PROPERTIES readbackHeap = {};
+      readbackHeap.Type = D3D12_HEAP_TYPE_READBACK;
+      readbackHeap.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
+      readbackHeap.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
+
+      D3D12_RESOURCE_DESC bufferDesc = {};
+      bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+      bufferDesc.Alignment = 0;
+      bufferDesc.Width = desc_.length;
+      bufferDesc.Height = 1;
+      bufferDesc.DepthOrArraySize = 1;
+      bufferDesc.MipLevels = 1;
+      bufferDesc.Format = DXGI_FORMAT_UNKNOWN;
+      bufferDesc.SampleDesc.Count = 1;
+      bufferDesc.SampleDesc.Quality = 0;
+      bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+      bufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
+
+      HRESULT hr = d3dDevice->CreateCommittedResource(
+          &readbackHeap,
+          D3D12_HEAP_FLAG_NONE,
+          &bufferDesc,
+          D3D12_RESOURCE_STATE_COPY_DEST,
+          nullptr,
+          IID_PPV_ARGS(readbackStagingBuffer_.GetAddressOf()));
+
+      if (FAILED(hr)) {
+        Result::setResult(outResult, Result::Code::RuntimeError,
+                         "Failed to create readback staging buffer");
+        return nullptr;
+      }
+    }
+
+    // ALWAYS copy from DEFAULT buffer to readback staging when mapping
+    // The DEFAULT buffer content may have changed since the last map() call
+    // (e.g., via copyTextureToBuffer or compute shader writes)
+    IGL_D3D12_LOG_VERBOSE("Buffer::map() - Copying from DEFAULT buffer (resource=%p) to readback staging\n",
+                 resource_.Get());
+
+    // D-001: Use pooled allocator instead of creating transient one
+    auto allocator = device_->getUploadCommandAllocator();
+    if (!allocator.Get()) {
+      Result::setResult(outResult, Result::Code::RuntimeError,
+                       "Failed to get allocator from pool");
+      return nullptr;
+    }
+
+    igl::d3d12::ComPtr<ID3D12GraphicsCommandList> cmdList;
+    if (FAILED(d3dDevice->CreateCommandList(0,
+                                            D3D12_COMMAND_LIST_TYPE_DIRECT,
+                                            allocator.Get(),
+                                            nullptr,
+                                            IID_PPV_ARGS(cmdList.GetAddressOf())))) {
+      Result::setResult(outResult, Result::Code::RuntimeError,
+                       "Failed to create command list for buffer copy");
+      // D-001: Return allocator to pool even on failure
+      device_->returnUploadCommandAllocator(allocator, 0);
+      return nullptr;
+    }
+
+    // Transition source buffer to COPY_SOURCE with validation.
+    const D3D12_RESOURCE_STATES assumedState = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
+    const bool needsIntermediate = !D3D12StateTransition::isLegalDirectTransition(
+        assumedState, D3D12_RESOURCE_STATE_COPY_SOURCE);
+
+    if (needsIntermediate) {
+      // Transition to COMMON first
+      D3D12_RESOURCE_BARRIER toCommon = {};
+      toCommon.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      toCommon.Transition.pResource = resource_.Get();
+      toCommon.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      toCommon.Transition.StateBefore = assumedState;
+      toCommon.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
+      cmdList->ResourceBarrier(1, &toCommon);
+
+      // Then to COPY_SOURCE
+      D3D12_RESOURCE_BARRIER toCopySource = {};
+      toCopySource.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      toCopySource.Transition.pResource = resource_.Get();
+      toCopySource.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      toCopySource.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
+      toCopySource.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
+      cmdList->ResourceBarrier(1, &toCopySource);
+    } else {
+      // Direct transition is legal
+      D3D12_RESOURCE_BARRIER barrier = {};
+      barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      barrier.Transition.pResource = resource_.Get();
+      barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      barrier.Transition.StateBefore = assumedState;
+      barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
+      cmdList->ResourceBarrier(1, &barrier);
+    }
+
+    // Copy entire buffer.
+    cmdList->CopyBufferRegion(readbackStagingBuffer_.Get(), 0, resource_.Get(), 0, desc_.length);
+
+    // Transition back with validation.
+    const bool needsIntermediateBack = !D3D12StateTransition::isLegalDirectTransition(
+        D3D12_RESOURCE_STATE_COPY_SOURCE, assumedState);
+
+    if (needsIntermediateBack) {
+      // Transition to COMMON first
+      D3D12_RESOURCE_BARRIER toCommon = {};
+      toCommon.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      toCommon.Transition.pResource = resource_.Get();
+      toCommon.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      toCommon.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
+      toCommon.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
+      cmdList->ResourceBarrier(1, &toCommon);
+
+      // Then back to original state
+      D3D12_RESOURCE_BARRIER toOriginal = {};
+      toOriginal.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      toOriginal.Transition.pResource = resource_.Get();
+      toOriginal.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      toOriginal.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
+      toOriginal.Transition.StateAfter = assumedState;
+      cmdList->ResourceBarrier(1, &toOriginal);
+    } else {
+      // Direct transition is legal
+      D3D12_RESOURCE_BARRIER barrier = {};
+      barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      barrier.Transition.pResource = resource_.Get();
+      barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
+      barrier.Transition.StateAfter = assumedState;
+      cmdList->ResourceBarrier(1, &barrier);
+    }
+
+    cmdList->Close();
+    ID3D12CommandList* lists[] = {cmdList.Get()};
+    queue->ExecuteCommandLists(1, lists);
+
+    // Wait for copy to complete
+    ctx.waitForGPU();
+
+    // D-001: Return allocator to pool after synchronous GPU wait
+    device_->returnUploadCommandAllocator(allocator, 0);
+
+    // Map the READBACK staging buffer
+    D3D12_RANGE readRange = {static_cast<SIZE_T>(range.offset),
+                            static_cast<SIZE_T>(range.offset + range.size)};
+    HRESULT hr = readbackStagingBuffer_->Map(0, &readRange, &mappedPtr_);
+
+    if (FAILED(hr)) {
+      Result::setResult(outResult, Result::Code::RuntimeError, "Failed to map readback staging buffer");
+      return nullptr;
+    }
+
+    Result::setOk(outResult);
+    return static_cast<uint8_t*>(mappedPtr_) + range.offset;
+  }
+
+  // Standard path for UPLOAD/READBACK heap buffers
+  if (storage_ != ResourceStorage::Shared) {
+    Result::setResult(outResult, Result::Code::Unsupported,
+                      "Cannot map GPU-only buffer (use ResourceStorage::Shared)");
+    return nullptr;
+  }
+
+  if (mappedPtr_) {
+    // Already mapped, return offset pointer
+    Result::setOk(outResult);
+    return static_cast<uint8_t*>(mappedPtr_) + range.offset;
+  }
+
+  D3D12_RANGE readRange = {0, 0}; // Not reading from GPU
+  HRESULT hr = resource_->Map(0, &readRange, &mappedPtr_);
+
+  if (FAILED(hr)) {
+    Result::setResult(outResult, Result::Code::RuntimeError, "Failed to map buffer");
+    return nullptr;
+  }
+
+  Result::setOk(outResult);
+  return static_cast<uint8_t*>(mappedPtr_) + range.offset;
+}
+
+void Buffer::unmap() {
+  if (!mappedPtr_) {
+    return;
+  }
+
+  // Unmap the appropriate resource (staging buffer or main buffer)
+  if (readbackStagingBuffer_.Get()) {
+    readbackStagingBuffer_->Unmap(0, nullptr);
+  } else if (resource_.Get()) {
+    resource_->Unmap(0, nullptr);
+  }
+
+  mappedPtr_ = nullptr;
+}
+
+BufferDesc::BufferAPIHint Buffer::requestedApiHints() const noexcept {
+  return desc_.hint;
+}
+
+BufferDesc::BufferAPIHint Buffer::acceptedApiHints() const noexcept {
+  return desc_.hint;
+}
+
+ResourceStorage Buffer::storage() const noexcept {
+  return storage_;
+}
+
+size_t Buffer::getSizeInBytes() const {
+  return desc_.length;
+}
+
+uint64_t Buffer::gpuAddress(size_t offset) const {
+  if (resource_.Get() == nullptr) {
+    return 0;
+  }
+
+  return resource_->GetGPUVirtualAddress() + offset;
+}
+
+BufferDesc::BufferType Buffer::getBufferType() const {
+  return desc_.type;
+}
+
+D3D12_RESOURCE_STATES Buffer::computeDefaultState(const BufferDesc& desc) const {
+  D3D12_RESOURCE_STATES state = D3D12_RESOURCE_STATE_COMMON;
+
+  if ((desc.type & BufferDesc::BufferTypeBits::Storage) != 0) {
+    state |= D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
+  }
+  if ((desc.type & BufferDesc::BufferTypeBits::Vertex) != 0 ||
+      (desc.type & BufferDesc::BufferTypeBits::Uniform) != 0) {
+    state |= D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER;
+  }
+  if ((desc.type & BufferDesc::BufferTypeBits::Index) != 0) {
+    state |= D3D12_RESOURCE_STATE_INDEX_BUFFER;
+  }
+  if ((desc.type & BufferDesc::BufferTypeBits::Indirect) != 0) {
+    state |= D3D12_RESOURCE_STATE_INDIRECT_ARGUMENT;
+  }
+
+  if (state == D3D12_RESOURCE_STATE_COMMON) {
+    return D3D12_RESOURCE_STATE_GENERIC_READ;
+  }
+
+  // Remove COMMON bit if other bits are set.
+  state &= ~D3D12_RESOURCE_STATE_COMMON;
+  return state;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/Buffer.h b/src/igl/d3d12/Buffer.h
new file mode 100644
index 0000000000..1560fee8c2
--- /dev/null
+++ b/src/igl/d3d12/Buffer.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/Buffer.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12StateTransition.h>
+
+namespace igl::d3d12 {
+
+class Device;
+
+class Buffer final : public IBuffer, public std::enable_shared_from_this<Buffer> {
+ public:
+  Buffer(Device& device,
+         igl::d3d12::ComPtr<ID3D12Resource> resource,
+         const BufferDesc& desc,
+         D3D12_RESOURCE_STATES initialState);
+  ~Buffer() override;
+
+  Result upload(const void* data, const BufferRange& range) override;
+  void* map(const BufferRange& range, Result* IGL_NULLABLE outResult) override;
+  void unmap() override;
+
+  BufferDesc::BufferAPIHint requestedApiHints() const noexcept override;
+  BufferDesc::BufferAPIHint acceptedApiHints() const noexcept override;
+  ResourceStorage storage() const noexcept override;
+
+  size_t getSizeInBytes() const override;
+  uint64_t gpuAddress(size_t offset = 0) const override;
+
+  BufferDesc::BufferType getBufferType() const override;
+
+  // D3D12-specific accessors
+  ID3D12Resource* getResource() const { return resource_.Get(); }
+  // Returns the element stride in bytes for storage buffers, as provided in BufferDesc.
+  // A value of 0 means "unspecified"; callers should fall back to a reasonable default
+  // (e.g. 4 bytes) when this occurs.
+  size_t getStorageElementStride() const noexcept { return desc_.storageStride; }
+
+ private:
+  [[nodiscard]] D3D12_RESOURCE_STATES computeDefaultState(const BufferDesc& desc) const;
+
+  Device* device_ = nullptr;
+  igl::d3d12::ComPtr<ID3D12Resource> resource_;
+  BufferDesc desc_;
+  void* mappedPtr_ = nullptr;
+  ResourceStorage storage_ = ResourceStorage::Private;
+  D3D12_RESOURCE_STATES defaultState_ = D3D12_RESOURCE_STATE_GENERIC_READ;
+  // State tracking: single non-mutable field, updated only via non-const methods
+  D3D12_RESOURCE_STATES currentState_ = D3D12_RESOURCE_STATE_COMMON;
+
+  // Staging buffer for mapping DEFAULT heap storage buffers requested as Shared
+  igl::d3d12::ComPtr<ID3D12Resource> readbackStagingBuffer_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/CMakeLists.txt b/src/igl/d3d12/CMakeLists.txt
new file mode 100644
index 0000000000..9d1e8bf8c8
--- /dev/null
+++ b/src/igl/d3d12/CMakeLists.txt
@@ -0,0 +1,55 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+#
+# This source code is licensed under the MIT license found in the
+# LICENSE file in the root directory of this source tree.
+
+cmake_minimum_required(VERSION 3.19)
+
+project(IGLD3D12 CXX C)
+
+file(GLOB SRC_FILES LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp)
+file(GLOB HEADER_FILES LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.h)
+
+add_library(IGLD3D12 ${SRC_FILES} ${HEADER_FILES})
+
+target_link_libraries(IGLD3D12 PRIVATE IGLLibrary IGLGlslang)
+
+igl_set_cxxstd(IGLD3D12 20)
+igl_set_folder(IGLD3D12 "IGL")
+
+# Link DirectX 12 system libraries
+target_link_libraries(IGLD3D12 PUBLIC
+  d3d12.lib
+  dxgi.lib
+  dxguid.lib
+  dxcompiler.lib
+  d3dcompiler.lib
+)
+
+# DirectX headers (fetched via bootstrap-deps)
+if(NOT DIRECTX_HEADERS_INCLUDE_DIR)
+  set(DIRECTX_HEADERS_INCLUDE_DIR "${IGL_ROOT_DIR}/third-party/deps/src/DirectX-Headers/include/directx")
+endif()
+target_include_directories(IGLD3D12 PUBLIC "${DIRECTX_HEADERS_INCLUDE_DIR}")
+if(TARGET Microsoft::DirectX-Headers)
+  target_link_libraries(IGLD3D12 PUBLIC Microsoft::DirectX-Headers)
+endif()
+
+# Include SPIRV-Cross for potential SPIR-V to HLSL conversion
+target_include_directories(IGLD3D12 PUBLIC "${IGL_ROOT_DIR}/third-party/deps/src/SPIRV-Cross")
+target_include_directories(IGLD3D12 PUBLIC "${IGL_ROOT_DIR}/third-party/deps/src/glslang")
+
+# Windows-specific definitions (removed - defined in D3D12Headers.h to avoid conflicts)
+
+if(WIN32 AND MSVC)
+  # Enable multithreaded compilation
+  target_compile_options(IGLD3D12 PRIVATE "/MP")
+  # Disable NOMINMAX warning (already defined in D3D12Headers.h)
+  target_compile_options(IGLD3D12 PRIVATE "/wd4005")
+  # Note: /Zc:preprocessor (conformant preprocessor) causes issues with
+  # some d3dx12 headers and WRL, so we're using the traditional preprocessor
+
+  # Note: dxil.dll deployment is now handled by individual executable targets
+  # (shell sessions and tests) to ensure it's copied to the correct directories.
+  # See shell/windows/CMakeLists.txt and src/igl/tests/CMakeLists.txt
+endif()
diff --git a/src/igl/d3d12/CommandBuffer.cpp b/src/igl/d3d12/CommandBuffer.cpp
new file mode 100644
index 0000000000..a42afcac9f
--- /dev/null
+++ b/src/igl/d3d12/CommandBuffer.cpp
@@ -0,0 +1,760 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/CommandBuffer.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/RenderCommandEncoder.h>
+#include <igl/d3d12/ComputeCommandEncoder.h>
+#include <igl/d3d12/Buffer.h>
+#include <igl/d3d12/Texture.h>
+#include <igl/d3d12/Timer.h>
+#include <igl/d3d12/D3D12FenceWaiter.h>
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+CommandBuffer::CommandBuffer(Device& device, const CommandBufferDesc& desc)
+    : ICommandBuffer(desc), device_(device) {
+  auto* d3dDevice = device_.getD3D12Context().getDevice();
+
+  if (!d3dDevice) {
+    IGL_DEBUG_ASSERT(false, "D3D12 device is null - context not initialized");
+    IGL_LOG_ERROR("D3D12 device is null - context not initialized");
+    return;  // Leave commandList_ null to indicate failure
+  }
+
+  // Check if device is in good state
+  HRESULT deviceRemovedReason = d3dDevice->GetDeviceRemovedReason();
+  if (FAILED(deviceRemovedReason)) {
+    char errorMsg[512];
+    snprintf(errorMsg, sizeof(errorMsg),
+             "D3D12 device was removed before creating command buffer. Reason: 0x%08X\n"
+             "  0x887A0005 = DXGI_ERROR_DEVICE_REMOVED\n"
+             "  0x887A0006 = DXGI_ERROR_DEVICE_HUNG\n"
+             "  0x887A0007 = DXGI_ERROR_DEVICE_RESET\n"
+             "  0x887A0020 = DXGI_ERROR_DRIVER_INTERNAL_ERROR",
+             static_cast<unsigned>(deviceRemovedReason));
+    IGL_LOG_ERROR(errorMsg);
+    IGL_DEBUG_ASSERT(false, "Device removed - see error above");
+    return;  // Leave commandList_ null to indicate failure
+  }
+
+  // Use the current frame's command allocator - allocators are created ready-to-use
+  // Following Microsoft's D3D12HelloFrameBuffering: each frame has its own allocator
+  auto& ctx = device_.getD3D12Context();
+  const uint32_t frameIdx = ctx.getCurrentFrameIndex();
+  auto* frameAllocator = ctx.getFrameContexts()[frameIdx].allocator.Get();
+
+  HRESULT hr = d3dDevice->CreateCommandList(
+      0,
+      D3D12_COMMAND_LIST_TYPE_DIRECT,
+      frameAllocator,  // Use frame allocator directly - it's in ready-to-use state after creation
+      nullptr,
+      IID_PPV_ARGS(commandList_.GetAddressOf()));
+
+  if (FAILED(hr)) {
+    char errorMsg[256];
+    snprintf(errorMsg, sizeof(errorMsg), "Failed to create command list: HRESULT = 0x%08X", static_cast<unsigned>(hr));
+    IGL_DEBUG_ASSERT(false, "%s", errorMsg);
+    IGL_LOG_ERROR(errorMsg);
+    return;  // Leave commandList_ null to indicate failure
+  }
+
+  // Command lists are created in recording state, close it for now
+  commandList_->Close();
+
+  // Create scheduling fence for waitUntilScheduled() support
+  // D-003: Fence event is now created per-wait in waitUntilScheduled() to eliminate TOCTOU race
+  hr = d3dDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(scheduleFence_.GetAddressOf()));
+  if (FAILED(hr)) {
+    char errorMsg[256];
+    snprintf(errorMsg, sizeof(errorMsg), "Failed to create scheduling fence: HRESULT = 0x%08X", static_cast<unsigned>(hr));
+    IGL_DEBUG_ASSERT(false, "%s", errorMsg);
+    IGL_LOG_ERROR(errorMsg);
+    return;  // Leave fence null to indicate failure
+  }
+}
+
+CommandBuffer::~CommandBuffer() {
+  // D-003: No need to clean up scheduleFenceEvent_ - now using dedicated events per wait
+  // scheduleFence_ is a ComPtr and will be automatically released
+}
+
+// Pre-allocated descriptor heap with fail-fast on exhaustion.
+// Allocates from pre-allocated pages, switching pages as needed.
+// Fails immediately if all pages are exhausted (Vulkan fail-fast pattern).
+Result CommandBuffer::getNextCbvSrvUavDescriptor(uint32_t* outDescriptorIndex) {
+  auto& ctx = device_.getD3D12Context();
+  const uint32_t frameIdx = ctx.getCurrentFrameIndex();
+  auto& frameCtx = ctx.getFrameContexts()[frameIdx];
+  auto& pages = frameCtx.cbvSrvUavHeapPages;
+  uint32_t currentPageIdx = frameCtx.currentCbvSrvUavPageIndex;
+
+  // Validate we have at least one page
+  if (pages.empty()) {
+    return Result{Result::Code::RuntimeError, "No CBV/SRV/UAV descriptor heap pages available"};
+  }
+
+  // Get current page index validation
+  if (currentPageIdx >= pages.size()) {
+    return Result{Result::Code::RuntimeError, "Invalid descriptor heap page index"};
+  }
+
+  // Check current offset before acquiring reference (avoid use-after-reallocation)
+  const uint32_t currentOffset = frameCtx.nextCbvSrvUavDescriptor;
+
+  // Check if current page has space; fail fast if pre-allocation is enabled.
+  if (currentOffset >= pages[currentPageIdx].capacity) {
+    // Current page is full - check if we can move to next page
+    const uint32_t nextPageIdx = currentPageIdx + 1;
+
+    // Fail-fast if pre-allocated pages are exhausted (Vulkan pattern).
+    if (nextPageIdx >= pages.size()) {
+      char errorMsg[512];
+      // All pages exhausted - fail immediately (no mid-frame allocation).
+      // Calculate actual descriptor capacity from allocated pages.
+      uint32_t totalCapacity = 0;
+      for (const auto& page : pages) {
+        totalCapacity += page.capacity;
+      }
+      snprintf(errorMsg, sizeof(errorMsg),
+               "CBV/SRV/UAV descriptor heap exhausted! Frame %u used all %zu pre-allocated pages (%u descriptors total). "
+               "This frame requires more descriptors than available. "
+               "Increase D3D12ContextConfig::maxHeapPages or enable preAllocateDescriptorPages=true, or optimize descriptor usage.",
+               frameIdx, pages.size(), totalCapacity);
+      return Result{Result::Code::RuntimeError, errorMsg};
+    }
+
+    // Move to next pre-allocated page.
+    currentPageIdx = nextPageIdx;
+    frameCtx.currentCbvSrvUavPageIndex = currentPageIdx;
+    frameCtx.nextCbvSrvUavDescriptor = 0;  // Reset offset for new page
+
+    IGL_D3D12_LOG_VERBOSE("D3D12: Switching to pre-allocated CBV/SRV/UAV page %u for frame %u\n",
+                 currentPageIdx, frameIdx);
+
+    // Update active heap when switching pages.
+    frameCtx.activeCbvSrvUavHeap = pages[currentPageIdx].heap;
+
+    // Rebind heap on the command list when switching pages.
+    if (commandList_.Get()) {
+      ID3D12DescriptorHeap* heaps[] = {
+        frameCtx.activeCbvSrvUavHeap.Get(),
+        frameCtx.samplerHeap.Get()
+      };
+      commandList_->SetDescriptorHeaps(2, heaps);
+      IGL_D3D12_LOG_VERBOSE("D3D12: Rebound descriptor heaps after switching to page %u\n", currentPageIdx);
+    }
+  }
+
+  // SAFE: Acquire reference AFTER any potential reallocation from emplace_back
+  auto& currentPage = pages[currentPageIdx];
+
+  // Allocate from current page
+  const uint32_t descriptorIndex = frameCtx.nextCbvSrvUavDescriptor++;
+  currentPage.used = frameCtx.nextCbvSrvUavDescriptor;
+
+  // Track peak usage for telemetry
+  const uint32_t totalUsed = static_cast<uint32_t>(currentPageIdx * kDescriptorsPerPage + descriptorIndex);
+  if (totalUsed > frameCtx.peakCbvSrvUavUsage) {
+    frameCtx.peakCbvSrvUavUsage = totalUsed;
+  }
+
+  // Return the descriptor index within the current page
+  *outDescriptorIndex = descriptorIndex;
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::getNextCbvSrvUavDescriptor() - frame %u, page %u, descriptor %u (total allocated: %u)\n",
+               frameIdx, currentPageIdx, descriptorIndex, totalUsed);
+#endif
+
+  return Result{};
+}
+
+// Allocate a contiguous range of CBV/SRV/UAV descriptors from pre-allocated pages.
+// Ensures the range can be bound as a single descriptor table.
+// Fails immediately if all pages are exhausted (Vulkan fail-fast pattern).
+Result CommandBuffer::allocateCbvSrvUavRange(uint32_t count, uint32_t* outBaseDescriptorIndex) {
+  if (count == 0) {
+    return Result{Result::Code::ArgumentInvalid, "Cannot allocate zero descriptors"};
+  }
+
+  auto& ctx = device_.getD3D12Context();
+  const uint32_t frameIdx = ctx.getCurrentFrameIndex();
+  auto& frameCtx = ctx.getFrameContexts()[frameIdx];
+  auto& pages = frameCtx.cbvSrvUavHeapPages;
+  uint32_t currentPageIdx = frameCtx.currentCbvSrvUavPageIndex;
+
+  if (pages.empty()) {
+    return Result{Result::Code::RuntimeError, "No CBV/SRV/UAV descriptor heap pages available"};
+  }
+
+  if (currentPageIdx >= pages.size()) {
+    return Result{Result::Code::RuntimeError, "Invalid descriptor heap page index"};
+  }
+
+  // Check space before acquiring reference (avoid use-after-reallocation)
+  const uint32_t currentOffset = frameCtx.nextCbvSrvUavDescriptor;
+  const uint32_t spaceRemaining = pages[currentPageIdx].capacity - currentOffset;
+
+  // Check if the requested range fits in the current page; fail fast on exhaustion.
+  if (count > spaceRemaining) {
+    // Not enough space in current page - validate range and check for next page
+    if (count > kDescriptorsPerPage) {
+      char errorMsg[256];
+      snprintf(errorMsg, sizeof(errorMsg),
+               "Requested descriptor range (%u) exceeds page capacity (%u)",
+               count, kDescriptorsPerPage);
+      return Result{Result::Code::ArgumentOutOfRange, errorMsg};
+    }
+
+    // Move to next pre-allocated page (fail-fast if exhausted).
+    const uint32_t nextPageIdx = currentPageIdx + 1;
+    if (nextPageIdx >= pages.size()) {
+      char errorMsg[512];
+      snprintf(errorMsg, sizeof(errorMsg),
+               "CBV/SRV/UAV descriptor heap exhausted! Frame %u needs page %u for contiguous range of %u descriptors, "
+               "but only %zu pages are pre-allocated. "
+               "Increase D3D12ContextConfig::maxHeapPages or optimize descriptor usage.",
+               frameIdx, nextPageIdx, count, pages.size());
+      return Result{Result::Code::RuntimeError, errorMsg};
+    }
+
+    // Move to next pre-allocated page
+    currentPageIdx = nextPageIdx;
+    frameCtx.currentCbvSrvUavPageIndex = currentPageIdx;
+    frameCtx.nextCbvSrvUavDescriptor = 0;
+
+    IGL_D3D12_LOG_VERBOSE("D3D12: Switching to pre-allocated CBV/SRV/UAV page %u for contiguous range of %u descriptors\n",
+                 currentPageIdx, count);
+
+    // Rebind heap on command list when switching pages
+    frameCtx.activeCbvSrvUavHeap = pages[currentPageIdx].heap;
+    if (commandList_.Get()) {
+      ID3D12DescriptorHeap* heaps[] = {
+        frameCtx.activeCbvSrvUavHeap.Get(),
+        frameCtx.samplerHeap.Get()
+      };
+      commandList_->SetDescriptorHeaps(2, heaps);
+    }
+  }
+
+  // SAFE: Acquire reference AFTER any potential reallocation from emplace_back
+  auto& currentPage = pages[currentPageIdx];
+
+  // Allocate the range from current page
+  const uint32_t baseIndex = frameCtx.nextCbvSrvUavDescriptor;
+  frameCtx.nextCbvSrvUavDescriptor += count;
+  currentPage.used = frameCtx.nextCbvSrvUavDescriptor;
+
+  // Track peak usage
+  const uint32_t totalUsed = static_cast<uint32_t>(currentPageIdx * kDescriptorsPerPage + baseIndex + count);
+  if (totalUsed > frameCtx.peakCbvSrvUavUsage) {
+    frameCtx.peakCbvSrvUavUsage = totalUsed;
+  }
+
+  *outBaseDescriptorIndex = baseIndex;
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::allocateCbvSrvUavRange() - frame %u, page %u, base %u, count %u\n",
+               frameIdx, currentPageIdx, baseIndex, count);
+#endif
+
+  return Result{};
+}
+
+uint32_t& CommandBuffer::getNextSamplerDescriptor() {
+  auto& ctx = device_.getD3D12Context();
+  const uint32_t frameIdx = ctx.getCurrentFrameIndex();
+  auto& frameCtx = ctx.getFrameContexts()[frameIdx];
+
+  // Add bounds checking to prevent sampler descriptor heap overflow.
+  // The sampler heap is allocated with kSamplerHeapSize descriptors.
+  const uint32_t currentValue = frameCtx.nextSamplerDescriptor;
+
+  // Track peak usage for telemetry (before incrementing)
+  if (currentValue > frameCtx.peakSamplerUsage) {
+    frameCtx.peakSamplerUsage = currentValue;
+
+    // Warn if approaching capacity (>80%)
+    const float usage = static_cast<float>(currentValue) / static_cast<float>(kSamplerHeapSize);
+    if (usage > 0.8f) {
+      IGL_LOG_ERROR("D3D12: Sampler descriptor usage at %.1f%% capacity (%u/%u) for frame %u\n",
+                    usage * 100.0f, currentValue, kSamplerHeapSize, frameIdx);
+    }
+  }
+
+  // CRITICAL: Assert on overflow in debug builds
+  IGL_DEBUG_ASSERT(currentValue < kSamplerHeapSize,
+                   "D3D12: Sampler descriptor heap overflow! Allocated: %u, Capacity: %u (frame %u). "
+                   "This will cause memory corruption and device removal. Increase heap size or optimize descriptor usage.",
+                   currentValue, kSamplerHeapSize, frameIdx);
+
+  // Graceful degradation in release builds: clamp to last valid descriptor
+  if (currentValue >= kSamplerHeapSize) {
+    IGL_LOG_ERROR("D3D12: Sampler descriptor heap overflow! Allocated: %u, Capacity: %u (frame %u)\n"
+                  "Clamping to last valid descriptor. Rendering artifacts expected.\n",
+                  currentValue, kSamplerHeapSize, frameIdx);
+    // Return reference to a clamped value to prevent further damage
+    // This will cause rendering artifacts but prevent crashes
+    static uint32_t clampedValue = kSamplerHeapSize - 1;
+    return clampedValue;
+  }
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::getNextSamplerDescriptor() - frame %u, current value=%u\n",
+               frameIdx, currentValue);
+#endif
+  return frameCtx.nextSamplerDescriptor;
+}
+
+void CommandBuffer::trackTransientBuffer(std::shared_ptr<IBuffer> buffer) {
+  // Add to the CURRENT frame's transient buffer list
+  // These will be kept alive until the frame completes GPU execution
+  auto& ctx = device_.getD3D12Context();
+  const uint32_t frameIdx = ctx.getCurrentFrameIndex();
+  auto& frameCtx = ctx.getFrameContexts()[frameIdx];
+
+  frameCtx.transientBuffers.push_back(std::move(buffer));
+
+  // Track high-water mark for telemetry.
+  const size_t currentCount = frameCtx.transientBuffers.size();
+  if (currentCount > frameCtx.transientBuffersHighWater) {
+    frameCtx.transientBuffersHighWater = currentCount;
+  }
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::trackTransientBuffer() - Added buffer to frame %u (total=%zu, high-water=%zu)\n",
+               frameIdx, currentCount, frameCtx.transientBuffersHighWater);
+#endif
+}
+
+void CommandBuffer::trackTransientResource(ID3D12Resource* resource) {
+  if (!resource) {
+    return;
+  }
+  auto& ctx = device_.getD3D12Context();
+  const uint32_t frameIdx = ctx.getCurrentFrameIndex();
+  auto& frameCtx = ctx.getFrameContexts()[frameIdx];
+
+  igl::d3d12::ComPtr<ID3D12Resource> keepAlive;
+  resource->AddRef();
+  keepAlive.Attach(resource);
+  frameCtx.transientResources.push_back(std::move(keepAlive));
+
+  // Track high-water mark for telemetry.
+  const size_t currentCount = frameCtx.transientResources.size();
+  if (currentCount > frameCtx.transientResourcesHighWater) {
+    frameCtx.transientResourcesHighWater = currentCount;
+  }
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::trackTransientResource() - Added resource to frame %u (total=%zu, high-water=%zu)\n",
+               frameIdx, currentCount, frameCtx.transientResourcesHighWater);
+#endif
+}
+
+Result CommandBuffer::begin() {
+  if (recording_) {
+    return Result();
+  }
+
+  // NOTE: Transient buffers are now stored in FrameContext and cleared when advancing frames
+  // NOTE: Descriptor counters are now stored in FrameContext and shared across all CommandBuffers
+  // They are reset at the start of each frame in CommandQueue::submit(), not here
+
+  // Reset per-command-buffer draw count for this recording
+  currentDrawCount_ = 0;
+
+  // CRITICAL: Set the per-frame descriptor heaps before recording commands.
+  // Each frame has its own isolated heaps to prevent descriptor conflicts.
+  // Uses the current page's heap (will be updated if we grow to new pages).
+  auto& ctx = device_.getD3D12Context();
+  const uint32_t frameIdx = ctx.getCurrentFrameIndex();
+  auto& frameCtx = ctx.getFrameContexts()[frameIdx];
+
+  // Initialize active heap to current page at frame start.
+  if (frameCtx.cbvSrvUavHeapPages.empty()) {
+    const char* msg = "No CBV/SRV/UAV heap pages available";
+    IGL_LOG_ERROR("CommandBuffer::begin() - %s for frame %u\n", msg, frameIdx);
+    return Result(Result::Code::RuntimeError, msg);
+  }
+
+  frameCtx.activeCbvSrvUavHeap = frameCtx.cbvSrvUavHeapPages[frameCtx.currentCbvSrvUavPageIndex].heap;
+
+  if (!frameCtx.activeCbvSrvUavHeap.Get()) {
+    const char* msg = "No CBV/SRV/UAV heap available";
+    IGL_LOG_ERROR("CommandBuffer::begin() - %s for frame %u\n", msg, frameIdx);
+    return Result(Result::Code::RuntimeError, msg);
+  }
+
+  // Use the CURRENT FRAME's command allocator from FrameContext
+  // Following Microsoft's D3D12HelloFrameBuffering pattern
+  auto* frameAllocator = ctx.getFrameContexts()[frameIdx].allocator.Get();
+
+  // Microsoft pattern: Reset allocator THEN reset command list
+  // Allocator was reset in CommandQueue::submit() after fence wait, OR is in initial ready state
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::begin() - Frame %u: Resetting command list with allocator...\n", frameIdx);
+#endif
+  HRESULT hr = commandList_->Reset(frameAllocator, nullptr);
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("CommandBuffer::begin() - Reset command list FAILED: 0x%08X\n", static_cast<unsigned>(hr));
+    return getResultFromHRESULT(hr);
+  }
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::begin() - Command list reset OK\n");
+#endif
+  recording_ = true;
+
+  // Bind heaps using active heap, not legacy accessor, now that the command list
+  // has been reset and is in the recording state.
+  ID3D12DescriptorHeap* heaps[] = {
+      frameCtx.activeCbvSrvUavHeap.Get(),
+      frameCtx.samplerHeap.Get()
+  };
+  commandList_->SetDescriptorHeaps(2, heaps);
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::begin() - Set per-frame descriptor heaps for frame %u\n", frameIdx);
+#endif
+
+  // Record timer start timestamp after reset and before any GPU work is recorded.
+  // This ensures the timer measures the actual command buffer workload.
+  if (desc.timer) {
+    auto* timer = static_cast<Timer*>(desc.timer.get());
+    timer->begin(commandList_.Get());
+  }
+
+  return Result();
+}
+
+void CommandBuffer::end() {
+  if (!recording_) {
+    return;
+  }
+
+  // No timer recording here; timer->begin() was called in begin(),
+  // and timer->end() will be called in CommandQueue::submit() before close.
+
+  // Close the command list - all recording is complete
+  commandList_->Close();
+  recording_ = false;
+}
+
+D3D12Context& CommandBuffer::getContext() {
+  return device_.getD3D12Context();
+}
+
+const D3D12Context& CommandBuffer::getContext() const {
+  return device_.getD3D12Context();
+}
+
+// Device draw count is incremented by CommandQueue::submit() using this buffer's count
+
+std::unique_ptr<IRenderCommandEncoder> CommandBuffer::createRenderCommandEncoder(
+    const RenderPassDesc& renderPass,
+    const std::shared_ptr<IFramebuffer>& framebuffer,
+    const Dependencies& /*dependencies*/,
+    Result* IGL_NULLABLE outResult) {
+  // Begin command buffer if not already begun
+  Result beginResult = begin();
+  if (!beginResult.isOk()) {
+    Result::setResult(outResult, std::move(beginResult));
+    return nullptr;
+  }
+
+  // Create encoder with lightweight constructor, then initialize with render pass
+  auto encoder = std::make_unique<RenderCommandEncoder>(*this, framebuffer);
+  encoder->begin(renderPass);
+  Result::setOk(outResult);
+  return encoder;
+}
+
+std::unique_ptr<IComputeCommandEncoder> CommandBuffer::createComputeCommandEncoder() {
+  // Begin command buffer if not already begun
+  Result beginResult = begin();
+  if (!beginResult.isOk()) {
+    IGL_LOG_ERROR("CommandBuffer::createComputeCommandEncoder() - begin() failed: %s\n",
+                  beginResult.message.c_str());
+    return nullptr;
+  }
+
+  return std::make_unique<ComputeCommandEncoder>(*this);
+}
+
+void CommandBuffer::present(const std::shared_ptr<ITexture>& /*surface*/) const {
+  // Note: Actual present happens in CommandQueue::submit(). This call serves
+  // as a marker indicating that this command buffer should trigger a swapchain
+  // Present when submitted.
+  willPresent_ = true;
+}
+
+void CommandBuffer::waitUntilScheduled() {
+  // If scheduleValue_ is 0, the command buffer hasn't been submitted yet
+  if (scheduleValue_ == 0) {
+#ifdef IGL_DEBUG
+    IGL_D3D12_LOG_VERBOSE("CommandBuffer::waitUntilScheduled() - Not yet submitted, returning immediately\n");
+#endif
+    return;
+  }
+
+  // Check if the scheduling fence has already been signaled
+  if (!scheduleFence_.Get()) {
+    IGL_LOG_ERROR("CommandBuffer::waitUntilScheduled() - Scheduling fence is null\n");
+    return;
+  }
+
+  const UINT64 completedValue = scheduleFence_->GetCompletedValue();
+  if (completedValue >= scheduleValue_) {
+#ifdef IGL_DEBUG
+    IGL_D3D12_LOG_VERBOSE("CommandBuffer::waitUntilScheduled() - Already scheduled (completed=%llu, target=%llu)\n",
+                 completedValue, scheduleValue_);
+#endif
+    return;
+  }
+
+  // Wait for the scheduling fence to be signaled
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::waitUntilScheduled() - Waiting for scheduling (completed=%llu, target=%llu)\n",
+               completedValue, scheduleValue_);
+#endif
+
+  // Use FenceWaiter RAII wrapper for proper fence waiting with TOCTOU protection
+  FenceWaiter waiter(scheduleFence_.Get(), scheduleValue_);
+  Result waitResult = waiter.wait();
+  if (!waitResult.isOk()) {
+    IGL_LOG_ERROR("CommandBuffer::waitUntilScheduled() - Fence wait failed: %s\n",
+                  waitResult.message.c_str());
+    return;
+  }
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::waitUntilScheduled() - Scheduling complete (fence now=%llu)\n",
+               scheduleFence_->GetCompletedValue());
+#endif
+}
+
+void CommandBuffer::waitUntilCompleted() {
+  // Wait for all submitted GPU work to complete
+  // The CommandQueue tracks frame completion via fences, so we need to wait for the current frame
+  auto& ctx = getContext();
+  auto* queue = ctx.getCommandQueue();
+  if (!queue) {
+    return;
+  }
+
+  // Signal a fence and wait for it
+  // This ensures all previously submitted command lists have completed on the GPU
+  igl::d3d12::ComPtr<ID3D12Fence> fence;
+  auto* device = ctx.getDevice();
+  if (!device || FAILED(device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf())))) {
+    return;
+  }
+
+  queue->Signal(fence.Get(), 1);
+
+  FenceWaiter waiter(fence.Get(), 1);
+  Result waitResult = waiter.wait();
+  if (!waitResult.isOk()) {
+    IGL_LOG_ERROR("CommandBuffer::waitUntilCompleted() - Fence wait failed: %s\n",
+                  waitResult.message.c_str());
+    return;
+  }
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandBuffer::waitUntilCompleted() - GPU work completed\n");
+#endif
+}
+
+void CommandBuffer::pushDebugGroupLabel(const char* label,
+                                        const igl::Color& /*color*/) const {
+  // Only emit GPU debug markers while the command list is in recording state.
+  if (!recording_ || !commandList_.Get() || !label) {
+    return;
+  }
+
+  const size_t len = strlen(label);
+  std::wstring wlabel(len, L' ');
+  std::mbstowcs(&wlabel[0], label, len);
+  commandList_->BeginEvent(
+      0, wlabel.c_str(), static_cast<UINT>((wlabel.length() + 1) * sizeof(wchar_t)));
+}
+
+void CommandBuffer::popDebugGroupLabel() const {
+  // Only pop GPU debug markers while the command list is in recording state.
+  if (!recording_ || !commandList_.Get()) {
+    return;
+  }
+
+  commandList_->EndEvent();
+}
+
+void CommandBuffer::copyBuffer(IBuffer& source,
+                               IBuffer& destination,
+                               uint64_t sourceOffset,
+                               uint64_t destinationOffset,
+                               uint64_t size) {
+  auto* src = static_cast<Buffer*>(&source);
+  auto* dst = static_cast<Buffer*>(&destination);
+  ID3D12Resource* srcRes = src->getResource();
+  ID3D12Resource* dstRes = dst->getResource();
+  if (!srcRes || !dstRes || size == 0) {
+    return;
+  }
+
+  // Use a transient copy with appropriate heap handling
+  auto& ctx = getContext();
+  ID3D12Device* device = ctx.getDevice();
+  ID3D12CommandQueue* queue = ctx.getCommandQueue();
+  if (!device || !queue) {
+    return;
+  }
+
+  auto doCopyOnList = [&](ID3D12GraphicsCommandList* list,
+                          ID3D12Resource* dstResLocal,
+                          uint64_t dstOffsetLocal) {
+    D3D12_RESOURCE_BARRIER barriers[2] = {};
+    barriers[0].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+    barriers[0].Transition.pResource = srcRes;
+    barriers[0].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+    barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ;
+    barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
+    barriers[1].Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+    barriers[1].Transition.pResource = dstResLocal;
+    barriers[1].Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+    barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_GENERIC_READ;
+    barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
+    list->ResourceBarrier(2, barriers);
+
+    list->CopyBufferRegion(dstResLocal, dstOffsetLocal, srcRes, sourceOffset, size);
+
+    barriers[0].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
+    barriers[0].Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ;
+    barriers[1].Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+    barriers[1].Transition.StateAfter = D3D12_RESOURCE_STATE_GENERIC_READ;
+    list->ResourceBarrier(2, barriers);
+  };
+
+  if (dst->storage() == ResourceStorage::Shared) {
+    // GPU cannot write into UPLOAD heap; use a READBACK staging buffer and then memcpy into UPLOAD
+    D3D12_HEAP_PROPERTIES readbackHeap{};
+    readbackHeap.Type = D3D12_HEAP_TYPE_READBACK;
+    D3D12_RESOURCE_DESC desc{};
+    desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+    desc.Width = size + destinationOffset;
+    desc.Height = 1;
+    desc.DepthOrArraySize = 1;
+    desc.MipLevels = 1;
+    desc.Format = DXGI_FORMAT_UNKNOWN;
+    desc.SampleDesc.Count = 1;
+    desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+    igl::d3d12::ComPtr<ID3D12Resource> readback;
+    HRESULT hr = device->CreateCommittedResource(&readbackHeap,
+                                                  D3D12_HEAP_FLAG_NONE,
+                                                  &desc,
+                                                  D3D12_RESOURCE_STATE_COPY_DEST,
+                                                  nullptr,
+                                                  IID_PPV_ARGS(readback.GetAddressOf()));
+    if (FAILED(hr)) {
+      IGL_LOG_ERROR("copyBuffer: Failed to create READBACK buffer, hr=0x%08X\n", static_cast<unsigned>(hr));
+      return;
+    }
+
+    igl::d3d12::ComPtr<ID3D12CommandAllocator> allocator;
+    igl::d3d12::ComPtr<ID3D12GraphicsCommandList> list;
+    if (FAILED(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
+                                              IID_PPV_ARGS(allocator.GetAddressOf()))) ||
+        FAILED(device->CreateCommandList(0,
+                                         D3D12_COMMAND_LIST_TYPE_DIRECT,
+                                         allocator.Get(),
+                                         nullptr,
+                                         IID_PPV_ARGS(list.GetAddressOf())))) {
+      IGL_LOG_ERROR("copyBuffer: Failed to create transient command list\n");
+      return;
+    }
+
+    // Transition source to COPY_SOURCE
+    D3D12_RESOURCE_BARRIER barrier = {};
+    barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+    barrier.Transition.pResource = srcRes;
+    barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+    barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
+    barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
+    list->ResourceBarrier(1, &barrier);
+
+    // Copy from source to readback (readback is already in COPY_DEST state)
+    list->CopyBufferRegion(readback.Get(), destinationOffset, srcRes, sourceOffset, size);
+
+    // Transition source back
+    barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_SOURCE;
+    barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COMMON;
+    list->ResourceBarrier(1, &barrier);
+    list->Close();
+    ID3D12CommandList* lists[] = {list.Get()};
+    queue->ExecuteCommandLists(1, lists);
+    ctx.waitForGPU();
+
+    // Map readback and copy into the UPLOAD buffer
+    void* rbPtr = nullptr;
+    D3D12_RANGE readRange{static_cast<SIZE_T>(destinationOffset), static_cast<SIZE_T>(destinationOffset + size)};
+    if (SUCCEEDED(readback->Map(0, &readRange, &rbPtr)) && rbPtr) {
+      // Map destination upload buffer
+      Result r1;
+      void* dstPtr = dst->map(BufferRange(size, destinationOffset), &r1);
+      if (dstPtr && r1.isOk()) {
+        std::memcpy(dstPtr, static_cast<uint8_t*>(rbPtr) + destinationOffset, size);
+        dst->unmap();
+      }
+      readback->Unmap(0, nullptr);
+    }
+    return;
+  }
+
+  // Default path: copy using a transient command list to DEFAULT/COMMON destinations
+  igl::d3d12::ComPtr<ID3D12CommandAllocator> allocator;
+  igl::d3d12::ComPtr<ID3D12GraphicsCommandList> list;
+  if (FAILED(device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT,
+                                            IID_PPV_ARGS(allocator.GetAddressOf()))) ||
+      FAILED(device->CreateCommandList(0,
+                                       D3D12_COMMAND_LIST_TYPE_DIRECT,
+                                       allocator.Get(),
+                                       nullptr,
+                                       IID_PPV_ARGS(list.GetAddressOf())))) {
+    return;
+  }
+
+  doCopyOnList(list.Get(), dstRes, destinationOffset);
+  list->Close();
+  ID3D12CommandList* lists2[] = {list.Get()};
+  queue->ExecuteCommandLists(1, lists2);
+  ctx.waitForGPU();
+}
+
+// Public API: Record texture-to-buffer copy for deferred execution
+void CommandBuffer::copyTextureToBuffer(ITexture& source,
+                                       IBuffer& destination,
+                                       uint64_t destinationOffset,
+                                       uint32_t mipLevel,
+                                       uint32_t layer) {
+  // Like Vulkan, defer the copy operation until command buffer submission
+  // D3D12 requires this to execute AFTER render commands complete, not during recording
+  // (Unlike Vulkan which can record into the command buffer, D3D12 has closed command list and padding constraints)
+
+  IGL_D3D12_LOG_VERBOSE("copyTextureToBuffer: Recording deferred copy operation (will execute in CommandQueue::submit)\n");
+
+  deferredTextureCopies_.push_back({
+    &source,
+    &destination,
+    destinationOffset,
+    mipLevel,
+    layer
+  });
+}
+
+} // namespace igl::d3d12
+
diff --git a/src/igl/d3d12/CommandBuffer.h b/src/igl/d3d12/CommandBuffer.h
new file mode 100644
index 0000000000..18dc1d0249
--- /dev/null
+++ b/src/igl/d3d12/CommandBuffer.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/CommandBuffer.h>
+#include <igl/ComputeCommandEncoder.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12Context.h>
+
+namespace igl::d3d12 {
+
+class Device;
+
+class CommandBuffer final : public ICommandBuffer {
+ public:
+  CommandBuffer(Device& device, const CommandBufferDesc& desc);
+  ~CommandBuffer() override;
+
+  std::unique_ptr<IRenderCommandEncoder> createRenderCommandEncoder(
+      const RenderPassDesc& renderPass,
+      const std::shared_ptr<IFramebuffer>& framebuffer,
+      const Dependencies& dependencies,
+      Result* IGL_NULLABLE outResult) override;
+
+  std::unique_ptr<IComputeCommandEncoder> createComputeCommandEncoder() override;
+
+  void present(const std::shared_ptr<ITexture>& surface) const override;
+
+  void waitUntilScheduled() override;
+  void waitUntilCompleted() override;
+
+  void pushDebugGroupLabel(const char* label, const igl::Color& color) const override;
+  void popDebugGroupLabel() const override;
+
+  void copyBuffer(IBuffer& source,
+                  IBuffer& destination,
+                  uint64_t sourceOffset,
+                  uint64_t destinationOffset,
+                  uint64_t size) override;
+  void copyTextureToBuffer(ITexture& source,
+                           IBuffer& destination,
+                           uint64_t destinationOffset,
+                           uint32_t mipLevel,
+                            uint32_t layer) override;
+
+  Result begin();
+  void end();
+  bool isRecording() const { return recording_; }
+
+  ID3D12GraphicsCommandList* getCommandList() const { return commandList_.Get(); }
+  D3D12Context& getContext();
+  const D3D12Context& getContext() const;
+  Device& getDevice() { return device_; }
+
+  size_t getCurrentDrawCount() const { return currentDrawCount_; }
+  void incrementDrawCount(size_t count = 1) { currentDrawCount_ += count; }
+
+  // Track transient resources (e.g., push constants buffers) that need to be kept alive
+  // until this FRAME completes GPU execution (not just until this command buffer is destroyed)
+  void trackTransientBuffer(std::shared_ptr<IBuffer> buffer);
+  void trackTransientResource(ID3D12Resource* resource);
+
+  // ============================================================================
+  // INTERNAL API: Descriptor Allocation (Transient Descriptor Allocator)
+  // ============================================================================
+  //
+  // These methods are implementation details of the per-frame descriptor heap
+  // management system (Strategy 1 in D3D12ResourcesBinder.h).
+  //
+  // WARNING: Do NOT call these methods directly. Use D3D12ResourcesBinder instead.
+  //
+  // These methods delegate to D3D12Context::FrameContext to share descriptor heaps
+  // across all command buffers in the current frame, ensuring efficient utilization
+  // and automatic cleanup at frame boundaries.
+  //
+  // Access: public for friend class D3D12ResourcesBinder, conceptually private.
+  // Returns Result to allow error handling on heap overflow.
+  // ============================================================================
+
+  /**
+   * @brief Allocate a single CBV/SRV/UAV descriptor from per-frame heap
+   * @internal This is an implementation detail - use D3D12ResourcesBinder instead
+   */
+  Result getNextCbvSrvUavDescriptor(uint32_t* outDescriptorIndex);
+
+  /**
+   * @brief Allocate a contiguous range of CBV/SRV/UAV descriptors on a single page
+   * @internal This is an implementation detail - use D3D12ResourcesBinder instead
+   *
+   * This ensures the range can be bound as a single descriptor table.
+   * Returns the base descriptor index; descriptors are [baseIndex, baseIndex+count)
+   */
+  Result allocateCbvSrvUavRange(uint32_t count, uint32_t* outBaseDescriptorIndex);
+
+  /**
+   * @brief Get reference to next sampler descriptor index (for increment)
+   * @internal This is an implementation detail - use D3D12ResourcesBinder instead
+   */
+  uint32_t& getNextSamplerDescriptor();
+
+  // Deferred texture-to-buffer copy operations
+  // These are recorded during command buffer recording and executed in CommandQueue::submit()
+  // AFTER all render/compute commands have been executed by the GPU
+  struct DeferredTextureCopy {
+    ITexture* source;
+    IBuffer* destination;
+    uint64_t destinationOffset;
+    uint32_t mipLevel;
+    uint32_t layer;
+  };
+  const std::vector<DeferredTextureCopy>& getDeferredTextureCopies() const {
+    return deferredTextureCopies_;
+  }
+
+  // Whether this command buffer requested a swapchain present via present().
+  bool willPresent() const { return willPresent_; }
+
+ private:
+  Device& device_;
+  igl::d3d12::ComPtr<ID3D12GraphicsCommandList> commandList_;
+  // NOTE: Command allocators are now managed per-frame in FrameContext, not per-CommandBuffer
+  size_t currentDrawCount_ = 0;
+  bool recording_ = false;
+
+  // Scheduling fence infrastructure (separate from completion fence)
+  // Used to track when command buffer is submitted to GPU queue (not when GPU completes)
+  // D-003: Removed scheduleFenceEvent_ - now using dedicated events per wait operation
+  igl::d3d12::ComPtr<ID3D12Fence> scheduleFence_;
+  uint64_t scheduleValue_ = 0;
+
+  // Deferred copy operations to execute after command buffer submission
+  std::vector<DeferredTextureCopy> deferredTextureCopies_;
+
+  // Tracks whether present(surface) was called on this command buffer.
+  // Mutable to allow modification from the logically-const present() override.
+  mutable bool willPresent_ = false;
+
+  friend class CommandQueue; // Allow CommandQueue to signal scheduleFence_
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/CommandQueue.cpp b/src/igl/d3d12/CommandQueue.cpp
new file mode 100644
index 0000000000..49435f18aa
--- /dev/null
+++ b/src/igl/d3d12/CommandQueue.cpp
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/CommandQueue.h>
+#include <igl/d3d12/CommandBuffer.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/Timer.h>
+#include <igl/d3d12/Texture.h>
+#include <igl/d3d12/Buffer.h>
+#include <igl/d3d12/TextureCopyUtils.h>
+#include <igl/d3d12/D3D12FrameManager.h>
+#include <igl/d3d12/D3D12PresentManager.h>
+#include <igl/d3d12/D3D12Context.h>
+#include <d3d12sdklayers.h>
+
+namespace igl::d3d12 {
+
+namespace {
+
+/**
+ * @brief Execute deferred texture-to-buffer copies after render commands
+ */
+void executeDeferredCopies(D3D12Context& ctx, Device& device, const CommandBuffer& cmdBuffer) {
+  const auto& deferredCopies = cmdBuffer.getDeferredTextureCopies();
+  if (deferredCopies.empty()) {
+    return;
+  }
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandQueue: Executing %zu deferred copyTextureToBuffer operations\n",
+               deferredCopies.size());
+#endif
+
+  for (const auto& copy : deferredCopies) {
+    auto* srcTex = static_cast<Texture*>(copy.source);
+    auto* dstBuf = static_cast<Buffer*>(copy.destination);
+
+    Result copyResult = TextureCopyUtils::executeCopyTextureToBuffer(
+        ctx, device, *srcTex, *dstBuf, copy.destinationOffset, copy.mipLevel, copy.layer);
+    if (!copyResult.isOk()) {
+      IGL_LOG_ERROR("Failed to copy texture to buffer: %s\n", copyResult.message.c_str());
+    }
+  }
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandQueue: All deferred copies executed successfully\n");
+#endif
+}
+
+// No standalone helper needed - this is now inlined in CommandQueue::submit()
+
+/**
+ * @brief Update per-frame fence tracking
+ */
+void updateFrameFences(D3D12Context& ctx, UINT64 currentFenceValue) {
+  auto& frameCtx = ctx.getFrameContexts()[ctx.getCurrentFrameIndex()];
+
+  // Update frame fence (backward compatibility)
+  if (frameCtx.fenceValue == 0) {
+    frameCtx.fenceValue = currentFenceValue;
+  }
+
+  // Update max allocator fence (critical for safe allocator reset)
+  if (currentFenceValue > frameCtx.maxAllocatorFence) {
+    frameCtx.maxAllocatorFence = currentFenceValue;
+  }
+
+  // Track command buffer count
+  frameCtx.commandBufferCount++;
+
+#ifdef IGL_DEBUG
+  // Keep this at IGL_LOG_INFO because the test harness (test_all_sessions.bat) parses
+  // "Signaled fence" from the INFO log; do not downgrade to VERBOSE
+  IGL_LOG_INFO("CommandQueue: Signaled fence for frame %u "
+               "(value=%llu, maxAllocatorFence=%llu, cmdBufCount=%u)\n",
+               ctx.getCurrentFrameIndex(), currentFenceValue,
+               frameCtx.maxAllocatorFence, frameCtx.commandBufferCount);
+#endif
+}
+
+} // namespace
+
+CommandQueue::CommandQueue(Device& device) : device_(device) {}
+
+std::shared_ptr<ICommandBuffer> CommandQueue::createCommandBuffer(const CommandBufferDesc& desc,
+                                                                   Result* IGL_NULLABLE outResult) {
+  auto cmdBuffer = std::make_shared<CommandBuffer>(device_, desc);
+
+  // Check if CommandBuffer was successfully initialized
+  // CommandBuffer leaves commandList_ null on failure
+  if (!cmdBuffer->getCommandList()) {
+    Result::setResult(outResult, Result::Code::RuntimeError,
+                     "Failed to create D3D12 command list. "
+                     "Possible causes: device removed, out of memory, or device initialization failed. "
+                     "Check debug output for HRESULT error code.");
+    return nullptr;
+  }
+
+  Result::setOk(outResult);
+  return cmdBuffer;
+}
+
+// Error handling behavior for submit().
+// This function executes command lists and presents frames. Error handling:
+// - Device removal: Detected via checkDeviceRemoval(), logs diagnostics, sets device.isDeviceLost()
+//   flag, and triggers IGL_DEBUG_ASSERT. Returns SubmitHandle normally (legacy API limitation).
+// - Present failures: Logged with IGL_LOG_ERROR via PresentManager. Device removal during Present
+//   also triggers IGL_DEBUG_ASSERT. Non-removal failures (swapchain/window issues) are logged but
+//   do not assert. Present result is checked but not propagated as Result (legacy API limitation).
+// - Return value: The SubmitHandle is always returned regardless of errors and does NOT reflect
+//   submission success/failure. Use device.checkDeviceRemoval() or device.isDeviceLost() as the
+//   authoritative source for fatal error detection.
+// Future: Consider Result-based submission API for explicit error propagation.
+//
+// Refactored from 614 lines to under 100 lines using helper classes:
+// - FenceWaiter: RAII fence waiting with TOCTOU protection
+// - FrameManager: Frame advancement and resource management
+// - PresentManager: Swapchain presentation with device removal detection
+SubmitHandle CommandQueue::submit(const ICommandBuffer& commandBuffer, bool /*endOfFrame*/) {
+  auto& cmdBuffer = const_cast<CommandBuffer&>(static_cast<const CommandBuffer&>(commandBuffer));
+  auto& ctx = device_.getD3D12Context();
+  auto* commandList = cmdBuffer.getCommandList();
+  auto* fence = ctx.getFence();
+
+  // Defensive: Ensure we have a valid command list
+  IGL_DEBUG_ASSERT(commandList, "D3D12 CommandQueue::submit() with null command list");
+
+  // Record timer end timestamp before closing command list
+  if (commandBuffer.desc.timer) {
+    auto* timer = static_cast<Timer*>(commandBuffer.desc.timer.get());
+    const UINT64 timerFenceValue = ctx.getFenceValue() + 1;
+    timer->end(commandList, fence, timerFenceValue);
+  }
+
+  // Close command list
+  cmdBuffer.end();
+
+  // Signal scheduling fence
+  ++scheduleFenceValue_;
+  cmdBuffer.scheduleValue_ = scheduleFenceValue_;
+  ctx.getCommandQueue()->Signal(cmdBuffer.scheduleFence_.Get(), scheduleFenceValue_);
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandQueue: Signaled scheduling fence (value=%llu)\n", scheduleFenceValue_);
+#endif
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandQueue::submit() - Executing command list...\n");
+#endif
+
+  // Execute command list
+  ID3D12CommandList* commandLists[] = {commandList};
+  ctx.getCommandQueue()->ExecuteCommandLists(1, commandLists);
+
+  // Execute deferred texture-to-buffer copies
+  executeDeferredCopies(ctx, device_, cmdBuffer);
+
+  // Check device status
+  Result deviceCheck = device_.checkDeviceRemoval();
+  if (!deviceCheck.isOk()) {
+    IGL_LOG_ERROR("CommandQueue::submit() - Device removal detected: %s\n",
+                  deviceCheck.message.c_str());
+  }
+
+  // Present only if this command buffer requested it (via present())
+  // and we have a swapchain. This avoids advancing the swapchain for
+  // intermediate offscreen passes that do not render to the back buffer.
+  bool presentOk = true;
+  if (ctx.getSwapChain() && cmdBuffer.willPresent()) {
+    PresentManager presentMgr(ctx);
+    presentOk = presentMgr.present();
+    if (!presentOk) {
+      IGL_LOG_ERROR("CommandQueue::submit() - Present failed; frame advancement may be unsafe\n");
+      // Note: Continue with fence signaling for now to maintain legacy behavior,
+      // but future work should consider early-return or recovery strategy
+    }
+  }
+
+  // Signal fence for current frame
+  const UINT64 currentFenceValue = ++ctx.getFenceValue();
+  ctx.getCommandQueue()->Signal(ctx.getFence(), currentFenceValue);
+
+  // Update frame fence tracking
+  updateFrameFences(ctx, currentFenceValue);
+
+  // Advance to next frame with proper synchronization
+  if (ctx.getSwapChain()) {
+    FrameManager frameMgr(ctx);
+    frameMgr.advanceFrame(currentFenceValue);
+  }
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandQueue::submit() - Complete!\n");
+#endif
+
+  // Aggregate per-command-buffer draw count into the device, matching GL/Vulkan behavior
+  const auto cbDraws = cmdBuffer.getCurrentDrawCount();
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandQueue::submit() - Aggregating %zu draws from CB into device\n", cbDraws);
+#endif
+  device_.incrementDrawCount(cbDraws);
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("CommandQueue::submit() - Device drawCount now=%zu\n", device_.getCurrentDrawCount());
+
+  // Log resource stats every 30 draws to track leaks
+  const size_t drawCount = device_.getCurrentDrawCount();
+  if (drawCount == 30 || drawCount == 60 || drawCount == 90 || drawCount == 120 ||
+      drawCount == 150 || drawCount == 300 || drawCount == 600 || drawCount == 900 ||
+      drawCount == 1200 || drawCount == 1500 || drawCount == 1800) {
+    IGL_D3D12_LOG_VERBOSE("CommandQueue::submit() - Logging resource stats at drawCount=%zu\n", drawCount);
+    D3D12Context::logResourceStats();
+  }
+#endif
+
+  return 0;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/CommandQueue.h b/src/igl/d3d12/CommandQueue.h
new file mode 100644
index 0000000000..26142503f7
--- /dev/null
+++ b/src/igl/d3d12/CommandQueue.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <memory>
+#include <igl/CommandQueue.h>
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+class Device;
+
+class CommandQueue final : public ICommandQueue {
+ public:
+  explicit CommandQueue(Device& device);
+  ~CommandQueue() override = default;
+
+  std::shared_ptr<ICommandBuffer> createCommandBuffer(const CommandBufferDesc& desc,
+                                                       Result* IGL_NULLABLE outResult) override;
+  SubmitHandle submit(const ICommandBuffer& commandBuffer, bool endOfFrame = false) override;
+
+ Device& getDevice() { return device_; }
+
+ private:
+  Device& device_;
+  uint64_t scheduleFenceValue_ = 0;  // Monotonically increasing fence value used for scheduling.
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/Common.cpp b/src/igl/d3d12/Common.cpp
new file mode 100644
index 0000000000..5342c1db44
--- /dev/null
+++ b/src/igl/d3d12/Common.cpp
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+DXGI_FORMAT textureFormatToDXGIFormat(TextureFormat format) {
+  switch (format) {
+  case TextureFormat::Invalid:
+    return DXGI_FORMAT_UNKNOWN;
+  case TextureFormat::R_UNorm8:
+    return DXGI_FORMAT_R8_UNORM;
+  case TextureFormat::R_UNorm16:
+    return DXGI_FORMAT_R16_UNORM;
+  case TextureFormat::R_F16:
+    return DXGI_FORMAT_R16_FLOAT;
+  case TextureFormat::R_UInt16:
+    return DXGI_FORMAT_R16_UINT;
+  case TextureFormat::B5G5R5A1_UNorm:
+    return DXGI_FORMAT_B5G5R5A1_UNORM;
+  case TextureFormat::B5G6R5_UNorm:
+    return DXGI_FORMAT_B5G6R5_UNORM;
+  case TextureFormat::RG_UNorm8:
+    return DXGI_FORMAT_R8G8_UNORM;
+  case TextureFormat::RG_UNorm16:
+    return DXGI_FORMAT_R16G16_UNORM;
+  case TextureFormat::R5G5B5A1_UNorm:
+    return DXGI_FORMAT_B5G5R5A1_UNORM; // DXGI closest match
+  case TextureFormat::BGRA_UNorm8:
+    return DXGI_FORMAT_B8G8R8A8_UNORM;
+  case TextureFormat::RGBA_UNorm8:
+  case TextureFormat::RGBX_UNorm8:
+    return DXGI_FORMAT_R8G8B8A8_UNORM;
+  case TextureFormat::RGBA_SRGB:
+    return DXGI_FORMAT_R8G8B8A8_UNORM_SRGB;
+  case TextureFormat::BGRA_SRGB:
+    return DXGI_FORMAT_B8G8R8A8_UNORM_SRGB;
+  case TextureFormat::RG_F16:
+    return DXGI_FORMAT_R16G16_FLOAT;
+  case TextureFormat::RG_UInt16:
+    return DXGI_FORMAT_R16G16_UINT;
+  case TextureFormat::RGB10_A2_UNorm_Rev:
+    return DXGI_FORMAT_R10G10B10A2_UNORM;
+  case TextureFormat::RGB10_A2_Uint_Rev:
+    return DXGI_FORMAT_R10G10B10A2_UINT;
+  case TextureFormat::R_F32:
+    return DXGI_FORMAT_R32_FLOAT;
+  case TextureFormat::R_UInt32:
+    return DXGI_FORMAT_R32_UINT;
+  case TextureFormat::RG_F32:
+    return DXGI_FORMAT_R32G32_FLOAT;
+  case TextureFormat::RGB_F16:
+    return DXGI_FORMAT_R16G16B16A16_FLOAT; // DXGI doesn't have RGB16, use RGBA16
+  case TextureFormat::RGBA_F16:
+    return DXGI_FORMAT_R16G16B16A16_FLOAT;
+  case TextureFormat::RGB_F32:
+    return DXGI_FORMAT_R32G32B32_FLOAT;
+  case TextureFormat::RGBA_UInt32:
+    return DXGI_FORMAT_R32G32B32A32_UINT;
+  case TextureFormat::RGBA_F32:
+    return DXGI_FORMAT_R32G32B32A32_FLOAT;
+  // BC7 compressed color formats
+  case TextureFormat::RGBA_BC7_UNORM_4x4:
+    return DXGI_FORMAT_BC7_UNORM;
+  case TextureFormat::RGBA_BC7_SRGB_4x4:
+    return DXGI_FORMAT_BC7_UNORM_SRGB;
+  // Depth/stencil formats
+  case TextureFormat::Z_UNorm16:
+    return DXGI_FORMAT_D16_UNORM;
+  case TextureFormat::Z_UNorm24:
+    return DXGI_FORMAT_D24_UNORM_S8_UINT; // DXGI doesn't have D24 alone
+  case TextureFormat::Z_UNorm32:
+    return DXGI_FORMAT_D32_FLOAT;
+  case TextureFormat::S8_UInt_Z24_UNorm:
+    return DXGI_FORMAT_D24_UNORM_S8_UINT;
+  case TextureFormat::S8_UInt_Z32_UNorm:
+    return DXGI_FORMAT_D32_FLOAT_S8X24_UINT;
+  case TextureFormat::S_UInt8:
+    // Stencil-only formats are not natively supported by D3D12.
+    // TODO: Implement via typed subresource views using stencil plane formats:
+    //   - DXGI_FORMAT_X24_TYPELESS_G8_UINT (for D24_UNORM_S8_UINT backing resource)
+    //   - DXGI_FORMAT_X32_TYPELESS_G8X24_UINT (for D32_FLOAT_S8X24_UINT backing resource)
+    // See: https://learn.microsoft.com/en-us/windows/win32/api/dxgiformat/ne-dxgiformat-dxgi_format
+    IGL_LOG_ERROR_ONCE("TextureFormat::S_UInt8 not supported on D3D12 (no stencil-only textures) - use S8_UInt_Z24_UNorm or S8_UInt_Z32_UNorm instead\n");
+    return DXGI_FORMAT_UNKNOWN;
+  default:
+    return DXGI_FORMAT_UNKNOWN;
+  }
+}
+
+namespace {
+bool isDepthOrStencilFormat(TextureFormat format) {
+  switch (format) {
+  case TextureFormat::Z_UNorm16:
+  case TextureFormat::Z_UNorm24:
+  case TextureFormat::Z_UNorm32:
+  case TextureFormat::S8_UInt_Z24_UNorm:
+  case TextureFormat::S8_UInt_Z32_UNorm:
+    return true;
+  default:
+    return false;
+  }
+}
+} // namespace
+
+DXGI_FORMAT textureFormatToDXGIResourceFormat(TextureFormat format, bool sampledUsage) {
+  if (!sampledUsage || !isDepthOrStencilFormat(format)) {
+    return textureFormatToDXGIFormat(format);
+  }
+
+  switch (format) {
+  case TextureFormat::Z_UNorm16:
+    return DXGI_FORMAT_R16_TYPELESS;
+  case TextureFormat::Z_UNorm24:
+  case TextureFormat::S8_UInt_Z24_UNorm:
+    return DXGI_FORMAT_R24G8_TYPELESS;
+  case TextureFormat::Z_UNorm32:
+    return DXGI_FORMAT_R32_TYPELESS;
+  case TextureFormat::S8_UInt_Z32_UNorm:
+    return DXGI_FORMAT_R32G8X24_TYPELESS;
+  default:
+    return textureFormatToDXGIFormat(format);
+  }
+}
+
+DXGI_FORMAT textureFormatToDXGIShaderResourceViewFormat(TextureFormat format) {
+  if (!isDepthOrStencilFormat(format)) {
+    return textureFormatToDXGIFormat(format);
+  }
+
+  switch (format) {
+  case TextureFormat::Z_UNorm16:
+    return DXGI_FORMAT_R16_UNORM;
+  case TextureFormat::Z_UNorm24:
+    return DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
+  case TextureFormat::S8_UInt_Z24_UNorm:
+    return DXGI_FORMAT_R24_UNORM_X8_TYPELESS;
+  case TextureFormat::Z_UNorm32:
+    return DXGI_FORMAT_R32_FLOAT;
+  case TextureFormat::S8_UInt_Z32_UNorm:
+    return DXGI_FORMAT_R32_FLOAT_X8X24_TYPELESS;
+  default:
+    return textureFormatToDXGIFormat(format);
+  }
+}
+
+TextureFormat dxgiFormatToTextureFormat(DXGI_FORMAT format) {
+  switch (format) {
+  case DXGI_FORMAT_UNKNOWN:
+    return TextureFormat::Invalid;
+  case DXGI_FORMAT_R8_UNORM:
+    return TextureFormat::R_UNorm8;
+  case DXGI_FORMAT_R16_UNORM:
+    return TextureFormat::R_UNorm16;
+  case DXGI_FORMAT_R16_FLOAT:
+    return TextureFormat::R_F16;
+  case DXGI_FORMAT_R8G8_UNORM:
+    return TextureFormat::RG_UNorm8;
+  case DXGI_FORMAT_R8G8B8A8_UNORM:
+    return TextureFormat::RGBA_UNorm8;
+  case DXGI_FORMAT_R8G8B8A8_UNORM_SRGB:
+    return TextureFormat::RGBA_SRGB;
+  case DXGI_FORMAT_B8G8R8A8_UNORM:
+    return TextureFormat::BGRA_UNorm8;
+  case DXGI_FORMAT_B8G8R8A8_UNORM_SRGB:
+    return TextureFormat::BGRA_SRGB;
+  case DXGI_FORMAT_R16G16B16A16_FLOAT:
+    return TextureFormat::RGBA_F16;
+  case DXGI_FORMAT_R32G32B32A32_FLOAT:
+    return TextureFormat::RGBA_F32;
+  case DXGI_FORMAT_D16_UNORM:
+    return TextureFormat::Z_UNorm16;
+  case DXGI_FORMAT_D24_UNORM_S8_UINT:
+    return TextureFormat::S8_UInt_Z24_UNorm;
+  case DXGI_FORMAT_D32_FLOAT:
+    return TextureFormat::Z_UNorm32;
+  case DXGI_FORMAT_BC7_UNORM:
+    return TextureFormat::RGBA_BC7_UNORM_4x4;
+  case DXGI_FORMAT_BC7_UNORM_SRGB:
+    return TextureFormat::RGBA_BC7_SRGB_4x4;
+  default:
+    return TextureFormat::Invalid;
+  }
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/Common.h b/src/igl/d3d12/Common.h
new file mode 100644
index 0000000000..eef83bcb08
--- /dev/null
+++ b/src/igl/d3d12/Common.h
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#ifndef IGL_D3D12_COMMON_H
+#define IGL_D3D12_COMMON_H
+
+#include <cassert>
+#include <utility>
+
+#include <igl/Common.h>
+#include <igl/Macros.h>
+#include <igl/Shader.h>
+#include <igl/TextureFormat.h>
+#include <igl/d3d12/D3D12Headers.h>
+
+// Set to 1 to see verbose debug console logs with D3D12 commands
+#define IGL_D3D12_PRINT_COMMANDS 0
+
+// Set to 1 to enable verbose logging (hot-path logs, detailed state tracking, etc.)
+// This is disabled by default to reduce log volume.
+#define IGL_D3D12_DEBUG_VERBOSE 0
+
+namespace igl::d3d12 {
+
+// Configuration structure for D3D12 backend.
+// Centralizes all size-related configuration with documented rationale.
+struct D3D12ContextConfig {
+  // === Frame Buffering ===
+  // Rationale: Triple buffering (3 frames) provides optimal GPU/CPU parallelism on modern hardware
+  // while maintaining reasonable memory overhead. Reducing to 2 can save memory on constrained
+  // devices but may reduce throughput. Increasing beyond 3 provides minimal benefit.
+  // D3D12 spec: Minimum 2, recommended 2-3 for flip model swapchains
+  //
+  // LIMITATION: Currently fixed at 3 due to fixed-size arrays (frameContexts_, renderTargets_).
+  // Attempting to change this value will be clamped by validate(). To enable true configurability,
+  // D3D12Context must be refactored to use std::vector instead of fixed-size arrays.
+  uint32_t maxFramesInFlight = 3;
+
+  // === Descriptor Heap Sizes (Per-Frame Shader-Visible) ===
+  // Rationale: Following Microsoft MiniEngine pattern for dynamic per-frame allocation
+  // Samplers: D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE (2048) is the hardware limit.
+  // D3D12 spec limits: CBV/SRV/UAV up to 1,000,000, Samplers max 2048
+  uint32_t samplerHeapSize = 2048;       // Total sampler descriptors per frame (D3D12 spec limit)
+
+  // === CBV/SRV/UAV Dynamic Heap Growth ===
+  // Rationale: Prevents unbounded memory growth while supporting complex scenes
+  // Starts with one page, can grow up to maxHeapPages as needed
+  // 16 pages × 1024 descriptors = 16,384 max descriptors per frame
+  // This supports ~500-1000 draw calls per frame with typical descriptor usage patterns
+  uint32_t descriptorsPerPage = 1024;    // CBV/SRV/UAV descriptors per heap page
+  uint32_t maxHeapPages = 16;            // Maximum pages per frame (total capacity = pages × descriptorsPerPage)
+
+  // Pre-allocation policy for descriptor pages.
+  // Rationale: Following Vulkan fail-fast pattern to prevent mid-frame descriptor invalidation.
+  // When true: All maxHeapPages are pre-allocated at init (recommended).
+  // When false: Only 1 page pre-allocated at init (minimal memory footprint).
+  // Both modes fail-fast when pages are exhausted - no dynamic growth to prevent descriptor invalidation.
+  // Default: true for safety (matches Vulkan behavior and supports complex scenes).
+  bool preAllocateDescriptorPages = true;
+
+  // DEPRECATED: Use descriptorsPerPage instead
+  // This field is kept for backward compatibility but has the same value as descriptorsPerPage
+  uint32_t cbvSrvUavHeapSize = 1024;     // Alias for descriptorsPerPage (deprecated)
+
+  // === CPU-Visible Descriptor Heaps (Static) ===
+  // Rationale: RTVs/DSVs are created once per texture and persist across frames
+  // 256 RTVs: Supports ~128 textures with mips/array layers (typical for games)
+  // 128 DSVs: Sufficient for depth buffers, shadow maps, and multi-pass rendering
+  // These values should be tuned based on application texture usage patterns
+  uint32_t rtvHeapSize = 256;
+  uint32_t dsvHeapSize = 128;
+
+  // === Upload Ring Buffer ===
+  // Rationale: 128MB provides good balance for streaming resources (textures, constant buffers)
+  // Smaller values (64MB) reduce memory footprint but increase allocation failures
+  // Larger values (256MB) reduce failures but waste memory on simple scenes
+  // Microsoft MiniEngine uses similar sizes (64-256MB range)
+  uint64_t uploadRingBufferSize = 128 * 1024 * 1024;  // 128 MB
+
+  // === Validation Helpers ===
+  // Clamp values to D3D12 spec limits and provide warnings for unusual configurations
+  void validate() {
+    // Frame buffering: Allow 2-4 buffers (double/triple/quad buffering)
+    // T43: Now that renderTargets_ and frameContexts_ are std::vector, we can support runtime counts.
+    // Practical range: 2 (double-buffer, higher latency), 3 (triple-buffer, balanced), 4 (lower latency, more memory)
+    // Note: DXGI may adjust the requested count; actual runtime count comes from GetDesc1().
+    constexpr uint32_t kMinFrames = 2;
+    constexpr uint32_t kMaxFrames = 4;
+    if (maxFramesInFlight < kMinFrames || maxFramesInFlight > kMaxFrames) {
+      IGL_LOG_ERROR("D3D12ContextConfig: maxFramesInFlight=%u out of range [%u, %u], clamping to %u\n",
+                    maxFramesInFlight, kMinFrames, kMaxFrames,
+                    (maxFramesInFlight < kMinFrames) ? kMinFrames : kMaxFrames);
+      maxFramesInFlight = (maxFramesInFlight < kMinFrames) ? kMinFrames : kMaxFrames;
+    }
+
+    // Sampler heap: Use D3D12 constant instead of magic number
+    if (samplerHeapSize > D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE) {
+      IGL_LOG_INFO("D3D12ContextConfig: samplerHeapSize=%u exceeds D3D12 limit (%u), clamping\n",
+                   samplerHeapSize, D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE);
+      samplerHeapSize = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
+    }
+
+    // Descriptor page limits: Prevent absurd/invalid values
+    if (descriptorsPerPage == 0) {
+      IGL_LOG_ERROR("D3D12ContextConfig: descriptorsPerPage=0 is invalid, setting to 1024\n");
+      descriptorsPerPage = 1024;
+    }
+    if (maxHeapPages == 0) {
+      IGL_LOG_ERROR("D3D12ContextConfig: maxHeapPages=0 is invalid, setting to 16\n");
+      maxHeapPages = 16;
+    }
+
+    // CBV/SRV/UAV heap: D3D12 spec limit (generic, tier-independent upper bound)
+    // Note: Actual device limits may be lower depending on resource binding tier;
+    // use CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS) for precise caps
+    constexpr uint32_t kMaxCbvSrvUavDescriptors = 1000000;
+    if (descriptorsPerPage > kMaxCbvSrvUavDescriptors) {
+      IGL_LOG_INFO("D3D12ContextConfig: descriptorsPerPage=%u exceeds D3D12 limit (%u), clamping\n",
+                   descriptorsPerPage, kMaxCbvSrvUavDescriptors);
+      descriptorsPerPage = kMaxCbvSrvUavDescriptors;
+    }
+
+    // Keep deprecated cbvSrvUavHeapSize in sync with descriptorsPerPage
+    cbvSrvUavHeapSize = descriptorsPerPage;
+
+    // Upload ring buffer: Warn if too small (may cause allocation failures)
+    constexpr uint64_t kMinRecommendedSize = 32 * 1024 * 1024;  // 32 MB
+    if (uploadRingBufferSize < kMinRecommendedSize) {
+      IGL_LOG_INFO("D3D12ContextConfig: uploadRingBufferSize=%llu MB is small, "
+                   "may cause allocation failures (recommended minimum: %llu MB)\n",
+                   uploadRingBufferSize / (1024 * 1024), kMinRecommendedSize / (1024 * 1024));
+    }
+  }
+
+  // === Preset Configurations ===
+  // Factory methods for common use cases
+
+  // Default configuration (balanced for most applications)
+  static D3D12ContextConfig defaultConfig() {
+    return D3D12ContextConfig{};  // Uses default member initializers
+  }
+
+  // Low memory configuration (mobile, integrated GPUs, constrained devices)
+  static D3D12ContextConfig lowMemoryConfig() {
+    D3D12ContextConfig config;
+    config.maxFramesInFlight = 2;        // Double-buffering to reduce memory (T43)
+    config.descriptorsPerPage = 512;     // Smaller pages
+    config.cbvSrvUavHeapSize = 512;      // Keep in sync (deprecated field)
+    config.maxHeapPages = 8;             // Fewer pages (total: 512 × 8 = 4K descriptors)
+    config.rtvHeapSize = 128;            // Fewer RTVs
+    config.dsvHeapSize = 64;             // Fewer DSVs
+    config.uploadRingBufferSize = 64 * 1024 * 1024;  // 64 MB
+    config.validate();
+    return config;
+  }
+
+  // High performance configuration (discrete GPUs, desktop, complex scenes)
+  static D3D12ContextConfig highPerformanceConfig() {
+    D3D12ContextConfig config;
+    config.maxFramesInFlight = 3;        // Triple-buffering (balanced, default) (T43)
+    config.descriptorsPerPage = 2048;    // Larger pages
+    config.cbvSrvUavHeapSize = 2048;     // Keep in sync (deprecated field)
+    config.maxHeapPages = 32;            // More pages (total: 2048 × 32 = 64K descriptors)
+    config.rtvHeapSize = 512;            // More RTVs for render targets
+    config.dsvHeapSize = 256;            // More DSVs for shadow maps
+    config.uploadRingBufferSize = 256 * 1024 * 1024;  // 256 MB
+    config.validate();
+    return config;
+  }
+};
+
+// Default frame buffering count (triple buffering).
+// T43: D3D12Context now uses runtime swapchainBufferCount_ queried from the swapchain.
+// This constant serves as the default value for D3D12ContextConfig::maxFramesInFlight
+// and is used by headless contexts (which have no swapchain to query).
+// Applications can configure 2-4 buffers via D3D12ContextConfig::maxFramesInFlight.
+constexpr uint32_t kMaxFramesInFlight = 3;
+
+// Maximum number of descriptor sets (matching IGL's Vulkan backend)
+constexpr uint32_t kMaxDescriptorSets = 4;
+
+// Maximum number of samplers; increased to D3D12 spec limit to support complex scenes.
+// D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE is defined as 2048 in d3d12.h.
+constexpr uint32_t kMaxSamplers = D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE;
+
+// Descriptor heap sizes (per-frame shader-visible heaps)
+// Following Microsoft MiniEngine pattern for dynamic per-frame allocation
+constexpr uint32_t kCbvSrvUavHeapSize = 1024;  // CBV/SRV/UAV descriptors per page
+constexpr uint32_t kSamplerHeapSize = kMaxSamplers;  // Sampler descriptors per frame
+
+// Dynamic heap growth limits (prevent unbounded memory usage).
+constexpr uint32_t kDescriptorsPerPage = kCbvSrvUavHeapSize;  // 1024 descriptors per page
+constexpr uint32_t kMaxHeapPages = 16;  // Maximum 16 pages = 16K descriptors per frame
+constexpr uint32_t kMaxDescriptorsPerFrame = kMaxHeapPages * kDescriptorsPerPage;  // 16384 total
+
+// Maximum number of vertex attributes (D3D12 spec limit).
+// Uses D3D12 spec constant instead of a hard-coded value.
+constexpr uint32_t kMaxVertexAttributes = D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT;  // 32
+
+// Normalized error macros - single log per error (no double logging).
+// Debug builds: IGL_DEBUG_ASSERT logs via _IGLDebugAbort
+// Release builds: IGL_LOG_ERROR provides visibility
+#if IGL_DEBUG_ABORT_ENABLED
+  #define D3D12_CHECK(func)                                                  \
+    do {                                                                     \
+      const HRESULT d3d12_check_result = (func);                             \
+      if (FAILED(d3d12_check_result)) {                                      \
+        IGL_DEBUG_ASSERT(false, "D3D12 API call failed: %s, HRESULT: 0x%08X", \
+                         #func,                                              \
+                         static_cast<unsigned int>(d3d12_check_result));     \
+      }                                                                      \
+    } while (0)
+
+  #define D3D12_CHECK_RETURN(func)                                           \
+    do {                                                                     \
+      const HRESULT d3d12_check_result = (func);                             \
+      if (FAILED(d3d12_check_result)) {                                      \
+        IGL_DEBUG_ASSERT(false, "D3D12 API call failed: %s, HRESULT: 0x%08X", \
+                         #func,                                              \
+                         static_cast<unsigned int>(d3d12_check_result));     \
+        return getResultFromHRESULT(d3d12_check_result);                     \
+      }                                                                      \
+    } while (0)
+#else
+  #define D3D12_CHECK(func)                                                  \
+    do {                                                                     \
+      const HRESULT d3d12_check_result = (func);                             \
+      if (FAILED(d3d12_check_result)) {                                      \
+        IGL_LOG_ERROR("D3D12 API call failed: %s, HRESULT: 0x%08X\n",        \
+                      #func,                                                 \
+                      static_cast<unsigned int>(d3d12_check_result));        \
+      }                                                                      \
+    } while (0)
+
+  #define D3D12_CHECK_RETURN(func)                                           \
+    do {                                                                     \
+      const HRESULT d3d12_check_result = (func);                             \
+      if (FAILED(d3d12_check_result)) {                                      \
+        IGL_LOG_ERROR("D3D12 API call failed: %s, HRESULT: 0x%08X\n",        \
+                      #func,                                                 \
+                      static_cast<unsigned int>(d3d12_check_result));        \
+        return getResultFromHRESULT(d3d12_check_result);                     \
+      }                                                                      \
+    } while (0)
+#endif
+
+// Verbose logging macro (hot-path logs, detailed state tracking).
+// Only logs when IGL_D3D12_DEBUG_VERBOSE is enabled (disabled by default)
+#if IGL_D3D12_DEBUG_VERBOSE
+  #define IGL_D3D12_LOG_VERBOSE(format, ...) IGL_LOG_INFO(format, ##__VA_ARGS__)
+#else
+  #define IGL_D3D12_LOG_VERBOSE(format, ...) ((void)0)
+#endif
+
+// Command logging macro (D3D12 API command traces).
+// Only logs when IGL_D3D12_PRINT_COMMANDS is enabled (disabled by default)
+// Use for command recording, state transitions, and D3D12 API call traces
+// Note: Treated as INFO-level severity but controlled separately from DEBUG_VERBOSE
+// to allow independent toggling of command traces vs general verbose output
+#if IGL_D3D12_PRINT_COMMANDS
+  #define IGL_D3D12_LOG_CMD(format, ...) IGL_LOG_INFO(format, ##__VA_ARGS__)
+#else
+  #define IGL_D3D12_LOG_CMD(format, ...) ((void)0)
+#endif
+
+// Convert HRESULT to IGL Result
+inline Result getResultFromHRESULT(HRESULT hr) {
+  if (SUCCEEDED(hr)) {
+    return Result(Result::Code::Ok);
+  }
+
+  // Map common HRESULT codes to IGL Result codes
+  switch (hr) {
+  case E_OUTOFMEMORY:
+    return Result(Result::Code::RuntimeError, "Out of memory");
+  case E_INVALIDARG:
+    return Result(Result::Code::ArgumentInvalid, "Invalid argument");
+  case E_NOTIMPL:
+    return Result(Result::Code::Unimplemented, "Not implemented");
+  case DXGI_ERROR_DEVICE_REMOVED:
+    return Result(Result::Code::RuntimeError, "Device removed");
+  case DXGI_ERROR_DEVICE_RESET:
+    return Result(Result::Code::RuntimeError, "Device reset");
+  default: {
+    // Include HRESULT code for better debugging of unexpected errors.
+    char buf[64];
+    snprintf(buf, sizeof(buf), "D3D12 error (hr=0x%08X)", static_cast<unsigned>(hr));
+    return Result(Result::Code::RuntimeError, buf);
+  }
+  }
+}
+
+// Helper for COM resource release
+template<typename T>
+void SafeRelease(T*& ptr) {
+  if (ptr) {
+    ptr->Release();
+    ptr = nullptr;
+  }
+}
+
+// Texture format conversion
+DXGI_FORMAT textureFormatToDXGIFormat(TextureFormat format);
+DXGI_FORMAT textureFormatToDXGIResourceFormat(TextureFormat format, bool sampledUsage);
+DXGI_FORMAT textureFormatToDXGIShaderResourceViewFormat(TextureFormat format);
+TextureFormat dxgiFormatToTextureFormat(DXGI_FORMAT format);
+
+// Align value to specified alignment (must be power-of-two)
+// Template allows use with different integer types (UINT64, size_t, etc.)
+// IMPORTANT: alignment must be a power of 2 (e.g., 256, 4096, 65536)
+template<typename T>
+inline T AlignUp(T value, T alignment) {
+  IGL_DEBUG_ASSERT((alignment & (alignment - 1)) == 0, "AlignUp: alignment must be power-of-two");
+  return (value + alignment - 1) & ~(alignment - 1);
+}
+
+// Hash combining utility (boost::hash_combine pattern)
+// Used for hashing complex structures like root signatures and pipeline descriptors
+template<typename T>
+inline void hashCombine(size_t& seed, const T& value) {
+  seed ^= std::hash<T>{}(value) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
+}
+
+// Feature level to string conversion
+inline const char* featureLevelToString(D3D_FEATURE_LEVEL level) {
+  switch (level) {
+    case D3D_FEATURE_LEVEL_12_2: return "12.2";
+    case D3D_FEATURE_LEVEL_12_1: return "12.1";
+    case D3D_FEATURE_LEVEL_12_0: return "12.0";
+    case D3D_FEATURE_LEVEL_11_1: return "11.1";
+    case D3D_FEATURE_LEVEL_11_0: return "11.0";
+    default: return "Unknown";
+  }
+}
+
+// Shader target helper.
+// Convert D3D_SHADER_MODEL enum to shader target string (e.g., "vs_6_6", "ps_5_1").
+inline std::string getShaderTarget(D3D_SHADER_MODEL shaderModel, ShaderStage stage) {
+  // Extract major and minor version from D3D_SHADER_MODEL enum
+  // Format: 0xMm where M = major, m = minor (e.g., 0x66 = SM 6.6, 0x51 = SM 5.1)
+  int major = (shaderModel >> 4) & 0xF;
+  int minor = shaderModel & 0xF;
+
+  // Get stage prefix
+  const char* stagePrefix = nullptr;
+  switch (stage) {
+  case ShaderStage::Vertex:
+    stagePrefix = "vs";
+    break;
+  case ShaderStage::Fragment:
+    stagePrefix = "ps";  // DirectX uses "ps" for pixel/fragment shaders
+    break;
+  case ShaderStage::Compute:
+    stagePrefix = "cs";
+    break;
+  default:
+    return "";
+  }
+
+  // Build target string (e.g., "vs_6_6", "ps_5_1", "cs_6_0")
+  char target[16];
+  snprintf(target, sizeof(target), "%s_%d_%d", stagePrefix, major, minor);
+  return std::string(target);
+}
+
+} // namespace igl::d3d12
+
+#endif // IGL_D3D12_COMMON_H
diff --git a/src/igl/d3d12/ComputeCommandEncoder.cpp b/src/igl/d3d12/ComputeCommandEncoder.cpp
new file mode 100644
index 0000000000..19d225c738
--- /dev/null
+++ b/src/igl/d3d12/ComputeCommandEncoder.cpp
@@ -0,0 +1,671 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/ComputeCommandEncoder.h>
+#include <igl/d3d12/CommandBuffer.h>
+#include <igl/d3d12/ComputePipelineState.h>
+#include <igl/d3d12/Buffer.h>
+#include <igl/d3d12/Texture.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/DescriptorHeapManager.h>
+#include <igl/d3d12/SamplerState.h>
+
+namespace igl::d3d12 {
+
+ComputeCommandEncoder::ComputeCommandEncoder(CommandBuffer& commandBuffer) :
+  commandBuffer_(commandBuffer), resourcesBinder_(commandBuffer, true /* isCompute */), isEncoding_(true) {
+  IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder created\n");
+
+  // Set descriptor heaps for this command list.
+  // Use active heap from frame context, not the legacy accessor.
+  auto& context = commandBuffer_.getContext();
+  auto& frameCtx = context.getFrameContexts()[context.getCurrentFrameIndex()];
+
+  ID3D12DescriptorHeap* cbvSrvUavHeap = frameCtx.activeCbvSrvUavHeap.Get();
+  ID3D12DescriptorHeap* samplerHeap = frameCtx.samplerHeap.Get();
+
+  // Legacy fallback: if the context does not provide per-frame heaps, try the manager once
+  if ((!cbvSrvUavHeap || !samplerHeap) && context.getDescriptorHeapManager()) {
+    auto* heapMgr = context.getDescriptorHeapManager();
+    if (!cbvSrvUavHeap) {
+      cbvSrvUavHeap = heapMgr->getCbvSrvUavHeap();
+    }
+    if (!samplerHeap) {
+      samplerHeap = heapMgr->getSamplerHeap();
+    }
+  }
+
+  if (cbvSrvUavHeap && samplerHeap) {
+    auto* commandList = commandBuffer_.getCommandList();
+    if (commandList) {
+      ID3D12DescriptorHeap* heaps[] = {cbvSrvUavHeap, samplerHeap};
+      commandList->SetDescriptorHeaps(2, heaps);
+      IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Descriptor heaps set (active heap from FrameContext)\n");
+    }
+  }
+}
+
+void ComputeCommandEncoder::endEncoding() {
+  if (!isEncoding_) {
+    return;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::endEncoding()\n");
+  isEncoding_ = false;
+}
+
+void ComputeCommandEncoder::bindComputePipelineState(
+    const std::shared_ptr<IComputePipelineState>& pipelineState) {
+  if (!pipelineState) {
+    IGL_LOG_ERROR("ComputeCommandEncoder::bindComputePipelineState - null pipeline state\n");
+    return;
+  }
+
+  currentPipeline_ = static_cast<const ComputePipelineState*>(pipelineState.get());
+
+  auto* commandList = commandBuffer_.getCommandList();
+  if (!commandBuffer_.isRecording() || !commandList) {
+    IGL_LOG_ERROR("ComputeCommandEncoder::bindComputePipelineState - command list is closed or null\n");
+    return;
+  }
+
+  // Set compute root signature and pipeline state
+  commandList->SetComputeRootSignature(currentPipeline_->getRootSignature());
+  commandList->SetPipelineState(currentPipeline_->getPipelineState());
+
+  IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindComputePipelineState - PSO and root signature set\n");
+}
+
+void ComputeCommandEncoder::dispatchThreadGroups(const Dimensions& threadgroupCount,
+                                                 const Dimensions& /*threadgroupSize*/,
+                                                 const Dependencies& dependencies) {
+  if (!currentPipeline_) {
+    IGL_LOG_ERROR("ComputeCommandEncoder::dispatchThreadGroups - no pipeline state bound\n");
+    return;
+  }
+
+  auto* commandList = commandBuffer_.getCommandList();
+  if (!commandBuffer_.isRecording() || !commandList) {
+    IGL_LOG_ERROR("ComputeCommandEncoder::dispatchThreadGroups - command list is closed or null\n");
+    return;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::dispatchThreadGroups(%u, %u, %u)\n",
+               threadgroupCount.width, threadgroupCount.height, threadgroupCount.depth);
+
+  // Process dependencies - insert barriers for buffers and textures
+  const Dependencies* deps = &dependencies;
+  std::vector<ID3D12Resource*> uavResources;
+
+  while (deps) {
+    // Handle buffer dependencies
+    for (IBuffer* buf : deps->buffers) {
+      if (!buf) {
+        break;
+      }
+      auto* d3dBuffer = static_cast<Buffer*>(buf);
+      uavResources.push_back(d3dBuffer->getResource());
+    }
+
+    // Handle texture dependencies
+    for (ITexture* tex : deps->textures) {
+      if (!tex) {
+        break;
+      }
+      auto* d3dTexture = static_cast<Texture*>(tex);
+      // Ensure texture is in proper state for compute access
+      d3dTexture->transitionAll(commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+      uavResources.push_back(d3dTexture->getResource());
+    }
+
+    deps = deps->next;
+  }
+
+  // Insert UAV barriers for dependent resources before dispatch
+  if (!uavResources.empty()) {
+    std::vector<D3D12_RESOURCE_BARRIER> barriers;
+    barriers.reserve(uavResources.size());
+
+    for (ID3D12Resource* resource : uavResources) {
+      D3D12_RESOURCE_BARRIER barrier = {};
+      barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
+      barrier.UAV.pResource = resource;
+      barriers.push_back(barrier);
+    }
+
+    commandList->ResourceBarrier(static_cast<UINT>(barriers.size()), barriers.data());
+    IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Inserted %zu UAV barriers before dispatch\n", barriers.size());
+  }
+
+  // Apply all resource bindings (textures, samplers, buffers, UAVs) before dispatch.
+  // For compute pipelines, pass nullptr since there's no RenderPipelineState
+  Result bindResult;
+  if (!resourcesBinder_.updateBindings(nullptr, &bindResult)) {
+    IGL_LOG_ERROR("dispatchThreadGroups: Failed to update resource bindings: %s\n", bindResult.message.c_str());
+    return;
+  }
+
+  // Bind all cached resources to root parameters
+  // Root signature layout (from Device::createComputePipeline):
+  // - Parameter 0: Root Constants (b0) - 16 DWORDs
+  // - Parameter 1: UAV table (u0-uN)
+  // - Parameter 2: SRV table (t0-tN)
+  // - Parameter 3: CBV table (b1-bN)
+  // - Parameter 4: Sampler table (s0-sN)
+
+  // Bind UAVs (parameter 1), with debug validation to catch sparse binding.
+  if (boundUavCount_ > 0) {
+    IGL_DEBUG_ASSERT(cachedUavHandles_[0].ptr != 0,
+                     "UAV count > 0 but base handle is null - did you bind only higher slots?");
+    if (cachedUavHandles_[0].ptr != 0) {
+      commandList->SetComputeRootDescriptorTable(1, cachedUavHandles_[0]);
+      IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Bound %zu UAVs\n", boundUavCount_);
+    } else {
+      IGL_LOG_ERROR("ComputeCommandEncoder: UAV count > 0 but base handle is null - skipping binding and clearing boundUavCount_ to 0\n");
+      // Clear count to avoid repeated errors on subsequent dispatches
+      boundUavCount_ = 0;
+    }
+  }
+
+  // Bind SRVs (Parameter 2)
+  if (boundSrvCount_ > 0) {
+    IGL_DEBUG_ASSERT(cachedSrvHandles_[0].ptr != 0,
+                     "SRV count > 0 but base handle is null - did you bind only higher slots?");
+    if (cachedSrvHandles_[0].ptr != 0) {
+      commandList->SetComputeRootDescriptorTable(2, cachedSrvHandles_[0]);
+      IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Bound %zu SRVs\n", boundSrvCount_);
+    } else {
+      IGL_LOG_ERROR("ComputeCommandEncoder: SRV count > 0 but base handle is null - skipping binding and clearing boundSrvCount_ to 0\n");
+      // Clear count to avoid repeated errors on subsequent dispatches
+      boundSrvCount_ = 0;
+    }
+  }
+
+  // Bind CBVs (parameter 3). Only create/allocate CBV descriptors when bindings have
+  // changed or the heap page has changed.
+  if (boundCbvCount_ > 0) {
+    auto& context = commandBuffer_.getContext();
+    auto& frameCtx = context.getFrameContexts()[context.getCurrentFrameIndex()];
+    const uint32_t currentPageIdx = frameCtx.currentCbvSrvUavPageIndex;
+
+    // Check if heap page changed - invalidates cached descriptors
+    const bool heapPageChanged = (cachedCbvPageIndex_ != currentPageIdx);
+    if (heapPageChanged) {
+      cbvBindingsDirty_ = true;
+      IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Heap page changed (%u -> %u), invalidating CBV cache\n",
+                   cachedCbvPageIndex_, currentPageIdx);
+    }
+
+    // Only recreate descriptors if bindings are dirty or heap changed
+    if (cbvBindingsDirty_) {
+      auto* device = context.getDevice();
+
+      // Allocate descriptors for CBV table - use fixed-size array to avoid heap allocation
+      uint32_t cbvIndices[kMaxComputeBuffers] = {};
+      for (size_t i = 0; i < boundCbvCount_; ++i) {
+        uint32_t descriptorIndex = 0;
+        Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex);
+        if (!allocResult.isOk()) {
+          IGL_LOG_ERROR("ComputeCommandEncoder: Failed to allocate CBV descriptor %zu: %s\n", i, allocResult.message.c_str());
+          return;
+        }
+        cbvIndices[i] = descriptorIndex;
+      }
+
+      // Create CBV descriptors for all bound constant buffers
+      for (size_t i = 0; i < boundCbvCount_; ++i) {
+        if (cachedCbvAddresses_[i] != 0 && cachedCbvSizes_[i] > 0) {
+          const uint32_t descriptorIndex = cbvIndices[i];
+          D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex);
+
+          // Enforce 64 KB limit for CBVs.
+          constexpr size_t kMaxCBVSize = 65536;  // 64 KB (D3D12 spec limit)
+          if (cachedCbvSizes_[i] > kMaxCBVSize) {
+            IGL_LOG_ERROR("ComputeCommandEncoder: Constant buffer %zu size (%zu bytes) exceeds D3D12 64 KB limit\n",
+                          i, cachedCbvSizes_[i]);
+            continue;  // Skip this CBV
+          }
+
+          // Align size to 256-byte boundary (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)
+          const size_t alignedSize = (cachedCbvSizes_[i] + 255) & ~255;
+
+          IGL_DEBUG_ASSERT(alignedSize <= kMaxCBVSize, "CBV size exceeds 64 KB after alignment");
+
+          D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {};
+          cbvDesc.BufferLocation = cachedCbvAddresses_[i];
+          cbvDesc.SizeInBytes = static_cast<UINT>(alignedSize);
+
+          device->CreateConstantBufferView(&cbvDesc, cpuHandle);
+        }
+      }
+
+      // Cache the base index and page for reuse
+      cachedCbvBaseIndex_ = cbvIndices[0];
+      cachedCbvPageIndex_ = currentPageIdx;
+      cbvBindingsDirty_ = false;
+
+      IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Created %zu CBV descriptors at page %u (descriptors %u-%u)\n",
+                   boundCbvCount_, currentPageIdx, cbvIndices[0], cbvIndices[boundCbvCount_ - 1]);
+    }
+
+    // Recompute GPU handle from cached base index for current heap
+    D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(cachedCbvBaseIndex_);
+
+    // Defensive check: ensure handle is valid before binding
+    IGL_DEBUG_ASSERT(gpuHandle.ptr != 0, "CBV count > 0 but GPU handle is null");
+    if (gpuHandle.ptr != 0) {
+      commandList->SetComputeRootDescriptorTable(3, gpuHandle);
+      IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Bound %zu CBVs via descriptor table (base index %u)\n",
+                   boundCbvCount_, cachedCbvBaseIndex_);
+    } else {
+      IGL_LOG_ERROR("ComputeCommandEncoder: CBV GPU handle is null, skipping binding\n");
+    }
+  }
+
+  // Bind Samplers (Parameter 4)
+  if (boundSamplerCount_ > 0) {
+    IGL_DEBUG_ASSERT(cachedSamplerHandles_[0].ptr != 0,
+                     "Sampler count > 0 but base handle is null - did you bind only higher slots?");
+    if (cachedSamplerHandles_[0].ptr != 0) {
+      commandList->SetComputeRootDescriptorTable(4, cachedSamplerHandles_[0]);
+      IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder: Bound %zu samplers\n", boundSamplerCount_);
+    } else {
+      IGL_LOG_ERROR("ComputeCommandEncoder: Sampler count > 0 but base handle is null - skipping binding and clearing boundSamplerCount_ to 0\n");
+      // Clear count to avoid repeated errors on subsequent dispatches
+      boundSamplerCount_ = 0;
+    }
+  }
+
+  // Dispatch compute work
+  // Note: threadgroupSize is embedded in the compute shader ([numthreads(...)])
+  commandList->Dispatch(threadgroupCount.width, threadgroupCount.height, threadgroupCount.depth);
+
+  // Insert resource-specific UAV barriers for bound UAVs to ensure compute writes are visible.
+  // Only barrier UAVs that were actually bound (more efficient than a global barrier).
+  if (boundUavCount_ > 0) {
+    // Use fixed-size array to avoid heap allocation in hot path
+    D3D12_RESOURCE_BARRIER barriers[kMaxComputeBuffers];
+    UINT barrierCount = 0;
+
+    for (size_t i = 0; i < boundUavCount_; ++i) {
+      if (boundUavResources_[i] != nullptr) {
+        D3D12_RESOURCE_BARRIER& barrier = barriers[barrierCount++];
+        barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_UAV;
+        barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+        barrier.UAV.pResource = boundUavResources_[i];  // Resource-specific UAV barrier
+      }
+    }
+
+    if (barrierCount > 0) {
+      commandList->ResourceBarrier(barrierCount, barriers);
+      IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::dispatchThreadGroups - dispatch complete, %u resource-specific UAV barriers inserted\n",
+                   barrierCount);
+    }
+  }
+}
+
+void ComputeCommandEncoder::bindPushConstants(const void* data,
+                                              size_t length,
+                                              size_t offset) {
+  auto* commandList = commandBuffer_.getCommandList();
+  if (!commandBuffer_.isRecording() || !commandList || !data || length == 0) {
+    IGL_LOG_ERROR("ComputeCommandEncoder::bindPushConstants: Invalid parameters or closed command list (list=%p, data=%p, len=%zu)\n",
+                  commandList, data, length);
+    return;
+  }
+
+  // Compute root signature parameter 0 is declared as D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS (b0).
+  // Increased to 32 DWORDs (128 bytes) to match Vulkan.
+  constexpr size_t kMaxPushConstantBytes = 128;
+
+  if (length + offset > kMaxPushConstantBytes) {
+    IGL_LOG_ERROR("ComputeCommandEncoder::bindPushConstants: size %zu + offset %zu exceeds maximum %zu bytes\n",
+                  length, offset, kMaxPushConstantBytes);
+    return;
+  }
+
+  // Calculate number of 32-bit values and offset in DWORDs
+  const uint32_t num32BitValues = static_cast<uint32_t>((length + 3) / 4);  // Round up to DWORDs
+  const uint32_t destOffsetIn32BitValues = static_cast<uint32_t>(offset / 4);
+
+  // Use SetComputeRoot32BitConstants to directly write data to root constants
+  // Root parameter 0 = b0 (Push Constants), as declared in compute root signature
+  commandList->SetComputeRoot32BitConstants(
+      0,                          // Root parameter index (push constants at parameter 0)
+      num32BitValues,             // Number of 32-bit values to set
+      data,                       // Source data
+      destOffsetIn32BitValues);   // Destination offset in 32-bit values
+
+  IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindPushConstants: Set %u DWORDs (%zu bytes) at offset %zu to root parameter 0 (b0)\n",
+               num32BitValues, length, offset);
+}
+
+void ComputeCommandEncoder::bindTexture(uint32_t index, ITexture* texture) {
+  // Delegate to D3D12ResourcesBinder for centralized descriptor management.
+  resourcesBinder_.bindTexture(index, texture);
+}
+
+void ComputeCommandEncoder::bindBuffer(uint32_t index, IBuffer* buffer, size_t offset, size_t /*bufferSize*/) {
+  if (!buffer) {
+    IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindBuffer: null buffer\n");
+    return;
+  }
+
+  auto* d3dBuffer = static_cast<Buffer*>(buffer);
+  auto& context = commandBuffer_.getContext();
+  auto* device = context.getDevice();
+
+  if (!device || context.getCbvSrvUavHeap() == nullptr) {
+    IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: missing device or per-frame descriptor heap\n");
+    return;
+  }
+
+  // Determine buffer type
+  const auto bufferType = d3dBuffer->getBufferType();
+  const bool isUniformBuffer = (bufferType & BufferDesc::BufferTypeBits::Uniform) != 0;
+  const bool isStorageBuffer = (bufferType & BufferDesc::BufferTypeBits::Storage) != 0;
+
+  IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindBuffer(%u): isUniform=%d, isStorage=%d\n",
+               index, isUniformBuffer, isStorageBuffer);
+
+  if (isStorageBuffer) {
+    // Storage buffer - bind as UAV (unordered access view) for read/write
+    if (index >= kMaxComputeBuffers) {
+      IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: UAV index %u exceeds max %zu\n",
+                    index, kMaxComputeBuffers);
+      return;
+    }
+
+    // Determine element stride for structured buffer views
+    // If storageStride is not specified, default to 4 bytes to preserve existing behavior
+    size_t elementStride = d3dBuffer->getStorageElementStride();
+    if (elementStride == 0) {
+      elementStride = 4;
+    }
+
+    // D3D12 requires UAV buffer views to use element-aligned offsets
+    if (offset % elementStride != 0) {
+      IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Storage buffer offset %zu is not aligned to "
+                    "element stride (%zu bytes). UAV FirstElement will be truncated (offset/stride).\n",
+                    offset, elementStride);
+      // Continue but log warning – FirstElement below uses integer division
+    }
+
+    // Validate offset doesn't exceed buffer size to prevent underflow
+    const size_t bufferSizeBytes = d3dBuffer->getSizeInBytes();
+    if (offset > bufferSizeBytes) {
+      IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Storage buffer offset %zu exceeds buffer size %zu; skipping UAV binding\n",
+                    offset, bufferSizeBytes);
+      return;
+    }
+    const size_t remaining = bufferSizeBytes - offset;
+
+    // Check for undersized buffer (would create empty or partial view)
+    if (remaining < elementStride) {
+      IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Remaining buffer size %zu is less than element stride %zu; "
+                    "UAV will have NumElements=0 (empty view). Check buffer size and offset.\n",
+                    remaining, elementStride);
+      // Continue to create the descriptor, but it will be empty (NumElements=0)
+    }
+
+    // Use Result-based allocation with dynamic heap growth.
+    uint32_t descriptorIndex = 0;
+    Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex);
+    if (!allocResult.isOk()) {
+      IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Failed to allocate UAV descriptor: %s\n", allocResult.message.c_str());
+      return;
+    }
+    D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex);
+    D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex);
+
+    // Create UAV descriptor for RWStructuredBuffer (structured buffer)
+    // D3D12 compute shaders expect structured buffers, not raw buffers
+    D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
+    uavDesc.Format = DXGI_FORMAT_UNKNOWN; // Required for structured buffers
+    uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
+    // Element index and count are expressed in units of elementStride bytes
+    // Division truncates if offset is not aligned; see warning above
+    uavDesc.Buffer.FirstElement = static_cast<UINT64>(offset / elementStride);
+    // CRITICAL: NumElements must be (size - offset) / stride, not total size / stride
+    uavDesc.Buffer.NumElements = static_cast<UINT>(remaining / elementStride);
+    uavDesc.Buffer.StructureByteStride = static_cast<UINT>(elementStride);
+    uavDesc.Buffer.CounterOffsetInBytes = 0;
+    uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE; // No flags for structured buffers
+
+    // Pre-creation validation.
+    IGL_DEBUG_ASSERT(device != nullptr, "Device is null before CreateUnorderedAccessView");
+    IGL_DEBUG_ASSERT(d3dBuffer->getResource() != nullptr, "Buffer resource is null");
+    IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "UAV descriptor handle is invalid");
+
+    device->CreateUnorderedAccessView(d3dBuffer->getResource(), nullptr, &uavDesc, cpuHandle);
+
+    cachedUavHandles_[index] = gpuHandle;
+    for (size_t i = index + 1; i < kMaxComputeBuffers; ++i) {
+      cachedUavHandles_[i] = {};
+    }
+    boundUavCount_ = static_cast<size_t>(index + 1);
+
+    // Track UAV resource for precise barrier synchronization.
+    // Note: UAV bindings are assumed to be dense (slots 0..boundUavCount_-1).
+    // Both cachedUavHandles_ and boundUavResources_ rely on this invariant.
+    boundUavResources_[index] = d3dBuffer->getResource();
+    for (size_t i = index + 1; i < kMaxComputeBuffers; ++i) {
+      boundUavResources_[i] = nullptr;
+    }
+
+    IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindBuffer: Created UAV at index %u, descriptor slot %u\n",
+                 index, descriptorIndex);
+
+    commandBuffer_.trackTransientResource(d3dBuffer->getResource());
+  } else if (isUniformBuffer) {
+    // Uniform buffer - bind as CBV (constant buffer view)
+    if (index >= kMaxComputeBuffers) {
+      IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: CBV index %u exceeds max %zu\n",
+                    index, kMaxComputeBuffers);
+      return;
+    }
+
+    // Enforce dense binding: CBVs must start at slot 0 with no gaps
+    if (index > 0 && cachedCbvAddresses_[0] == 0) {
+      IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: CBV bindings must be dense starting from slot 0. "
+                    "Cannot bind slot %u when slot 0 is not bound.\n", index);
+      return;
+    }
+
+    // Check for gaps in bindings
+    for (size_t i = 0; i < index; ++i) {
+      if (cachedCbvAddresses_[i] == 0) {
+        IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: CBV bindings must be dense. "
+                      "Cannot bind slot %u when slot %zu is not bound (gap detected).\n", index, i);
+        return;
+      }
+    }
+
+    // D3D12 requires constant buffer addresses to be 256-byte aligned
+    // (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)
+    if ((offset & 255) != 0) {
+      IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: ERROR - CBV offset %zu is not 256-byte aligned "
+                    "(required by D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT). "
+                    "Constant buffers must be created at aligned offsets. Ignoring bind request.\n", offset);
+      return;
+    }
+
+    cachedCbvAddresses_[index] = d3dBuffer->gpuAddress(offset);
+    // Store buffer size for CBV descriptor creation on the next dispatch.
+    // Actual descriptor creation happens in dispatchThreadGroups when cbvBindingsDirty_ is set.
+    size_t bufferSize = d3dBuffer->getSizeInBytes() - offset;
+
+    // D3D12 spec: Constant buffers must be ≤ 64 KB
+    constexpr size_t kMaxCBVSize = 65536;  // 64 KB
+    if (bufferSize > kMaxCBVSize) {
+      IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Buffer size (%zu bytes) exceeds D3D12 64 KB limit for constant buffers at index %u. Clamping to 64 KB.\n",
+                    bufferSize, index);
+      bufferSize = kMaxCBVSize;
+    }
+
+    cachedCbvSizes_[index] = bufferSize;
+    for (size_t i = index + 1; i < kMaxComputeBuffers; ++i) {
+      cachedCbvAddresses_[i] = 0;
+      cachedCbvSizes_[i] = 0;
+    }
+    boundCbvCount_ = static_cast<size_t>(index + 1);
+
+    // Mark CBV bindings as dirty to trigger descriptor recreation on the next dispatch.
+    cbvBindingsDirty_ = true;
+
+    IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindBuffer: Cached CBV at index %u, address 0x%llx, size %zu\n",
+                 index, cachedCbvAddresses_[index], cachedCbvSizes_[index]);
+
+    commandBuffer_.trackTransientResource(d3dBuffer->getResource());
+  } else {
+    IGL_LOG_ERROR("ComputeCommandEncoder::bindBuffer: Buffer must be Uniform or Storage type\n");
+  }
+}
+
+void ComputeCommandEncoder::bindUniform(const UniformDesc& /*uniformDesc*/, const void* /*data*/) {
+  // Single uniform binding not supported in D3D12
+  // Use uniform buffers (CBVs) instead
+  IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindUniform - not supported, use uniform buffers\n");
+}
+
+void ComputeCommandEncoder::bindBytes(uint32_t /*index*/, const void* /*data*/, size_t /*length*/) {
+
+  // D3D12 backend does not support bindBytes
+  // Applications should use uniform buffers (bindBuffer) instead
+  // This is a no-op to maintain compatibility with cross-platform code
+  IGL_DEBUG_ASSERT_NOT_IMPLEMENTED();
+  IGL_LOG_INFO_ONCE("bindBytes is not supported in D3D12 backend. Use bindBuffer with uniform buffers instead.\n");
+}
+
+void ComputeCommandEncoder::bindImageTexture(uint32_t index, ITexture* texture, TextureFormat /*format*/) {
+  if (!texture) {
+    IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindImageTexture: null texture\n");
+    return;
+  }
+
+  if (index >= kMaxComputeBuffers) {
+    IGL_LOG_ERROR("ComputeCommandEncoder::bindImageTexture: index %u exceeds max %zu\n",
+                  index, kMaxComputeBuffers);
+    return;
+  }
+
+  auto& context = commandBuffer_.getContext();
+  auto* device = context.getDevice();
+  auto* d3dTexture = static_cast<Texture*>(texture);
+
+  if (!device || !d3dTexture->getResource() || context.getCbvSrvUavHeap() == nullptr) {
+    IGL_LOG_ERROR("ComputeCommandEncoder::bindImageTexture: missing device, resource, or per-frame heap\n");
+    return;
+  }
+
+  // Transition texture to UAV state for compute shader read/write access
+  auto* commandList = commandBuffer_.getCommandList();
+  if (commandList) {
+    d3dTexture->transitionAll(commandList, D3D12_RESOURCE_STATE_UNORDERED_ACCESS);
+  }
+
+  // Allocate descriptor and create UAV using Result-based allocation with dynamic heap growth.
+  uint32_t descriptorIndex = 0;
+  Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex);
+  if (!allocResult.isOk()) {
+    IGL_LOG_ERROR("ComputeCommandEncoder::bindImageTexture: Failed to allocate UAV descriptor: %s\n", allocResult.message.c_str());
+    return;
+  }
+  D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex);
+  D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex);
+
+  // Create UAV descriptor
+  D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
+  uavDesc.Format = textureFormatToDXGIFormat(d3dTexture->getFormat());
+
+  auto resourceDesc = d3dTexture->getResource()->GetDesc();
+  if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
+    uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE3D;
+    uavDesc.Texture3D.MipSlice = 0;
+    uavDesc.Texture3D.FirstWSlice = 0;
+    uavDesc.Texture3D.WSize = resourceDesc.DepthOrArraySize;
+  } else if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D) {
+    if (resourceDesc.DepthOrArraySize > 1) {
+      uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2DARRAY;
+      uavDesc.Texture2DArray.MipSlice = 0;
+      uavDesc.Texture2DArray.FirstArraySlice = 0;
+      uavDesc.Texture2DArray.ArraySize = resourceDesc.DepthOrArraySize;
+      uavDesc.Texture2DArray.PlaneSlice = 0;
+    } else {
+      uavDesc.ViewDimension = D3D12_UAV_DIMENSION_TEXTURE2D;
+      uavDesc.Texture2D.MipSlice = 0;
+      uavDesc.Texture2D.PlaneSlice = 0;
+    }
+  } else {
+    IGL_LOG_ERROR("ComputeCommandEncoder::bindImageTexture: unsupported dimension\n");
+    return;
+  }
+
+  // Pre-creation validation.
+  IGL_DEBUG_ASSERT(device != nullptr, "Device is null before CreateUnorderedAccessView");
+  IGL_DEBUG_ASSERT(d3dTexture->getResource() != nullptr, "Texture resource is null");
+  IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "UAV descriptor handle is invalid");
+
+  device->CreateUnorderedAccessView(d3dTexture->getResource(), nullptr, &uavDesc, cpuHandle);
+
+  cachedUavHandles_[index] = gpuHandle;
+  for (size_t i = index + 1; i < kMaxComputeBuffers; ++i) {
+    cachedUavHandles_[i] = {};
+  }
+  boundUavCount_ = static_cast<size_t>(index + 1);
+
+  // Track UAV resources for precise barrier synchronization.
+  // Note: UAV bindings are assumed to be dense (slots 0..boundUavCount_-1).
+  // Both cachedUavHandles_ and boundUavResources_ rely on this invariant.
+  boundUavResources_[index] = d3dTexture->getResource();
+  for (size_t i = index + 1; i < kMaxComputeBuffers; ++i) {
+    boundUavResources_[i] = nullptr;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("ComputeCommandEncoder::bindImageTexture: Created UAV at index %u, descriptor slot %u\n",
+               index, descriptorIndex);
+}
+
+void ComputeCommandEncoder::bindSamplerState(uint32_t index, ISamplerState* samplerState) {
+  // Delegate to D3D12ResourcesBinder for centralized descriptor management.
+  resourcesBinder_.bindSamplerState(index, samplerState);
+}
+
+void ComputeCommandEncoder::pushDebugGroupLabel(const char* label, const Color& /*color*/) const {
+  auto* commandList = commandBuffer_.getCommandList();
+  if (!commandBuffer_.isRecording() || !commandList || !label) {
+    return;
+  }
+  // PIX debug markers
+  const size_t len = strlen(label);
+  std::wstring wlabel(len, L' ');
+  std::mbstowcs(&wlabel[0], label, len);
+  commandList->BeginEvent(
+      0, wlabel.c_str(), static_cast<UINT>((wlabel.length() + 1) * sizeof(wchar_t)));
+}
+
+void ComputeCommandEncoder::insertDebugEventLabel(const char* label, const Color& /*color*/) const {
+  auto* commandList = commandBuffer_.getCommandList();
+  if (!commandBuffer_.isRecording() || !commandList || !label) {
+    return;
+  }
+  const size_t len = strlen(label);
+  std::wstring wlabel(len, L' ');
+  std::mbstowcs(&wlabel[0], label, len);
+  commandList->SetMarker(
+      0, wlabel.c_str(), static_cast<UINT>((wlabel.length() + 1) * sizeof(wchar_t)));
+}
+
+void ComputeCommandEncoder::popDebugGroupLabel() const {
+  auto* commandList = commandBuffer_.getCommandList();
+  if (!commandBuffer_.isRecording() || !commandList) {
+    return;
+  }
+  commandList->EndEvent();
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/ComputeCommandEncoder.h b/src/igl/d3d12/ComputeCommandEncoder.h
new file mode 100644
index 0000000000..6c13aae126
--- /dev/null
+++ b/src/igl/d3d12/ComputeCommandEncoder.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/ComputeCommandEncoder.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12ResourcesBinder.h>
+
+namespace igl::d3d12 {
+
+class CommandBuffer;
+class ComputePipelineState;
+
+class ComputeCommandEncoder final : public IComputeCommandEncoder {
+ public:
+  explicit ComputeCommandEncoder(CommandBuffer& commandBuffer);
+  ~ComputeCommandEncoder() override = default;
+
+  void endEncoding() override;
+
+  void bindComputePipelineState(const std::shared_ptr<IComputePipelineState>& pipelineState) override;
+  void dispatchThreadGroups(const Dimensions& threadgroupCount,
+                           const Dimensions& threadgroupSize,
+                           const Dependencies& dependencies = {}) override;
+  void bindPushConstants(const void* data, size_t length, size_t offset = 0) override;
+  void bindTexture(uint32_t index, ITexture* texture) override;
+
+  /**
+   * @brief Bind a buffer to a compute shader slot
+   *
+   * IMPORTANT: For constant buffers (uniform buffers) in compute shaders, bindings MUST be DENSE
+   * starting from index 0 with NO GAPS. For example:
+   *   - VALID:   bindBuffer(0, ...), bindBuffer(1, ...), bindBuffer(2, ...)
+   *   - INVALID: bindBuffer(0, ...), bindBuffer(2, ...) // gap at index 1
+   *   - INVALID: bindBuffer(1, ...), bindBuffer(2, ...) // index 0 not bound
+   *
+   * This constraint is enforced by D3D12ResourcesBinder and will return InvalidOperation if violated.
+   * See D3D12ResourcesBinder::updateBufferBindings for implementation details.
+   *
+   * @param index Buffer slot index (maps to HLSL register b0, b1, etc. for CBVs)
+   * @param buffer Buffer to bind
+   * @param offset Offset in bytes into the buffer
+   * @param bufferSize Size of the buffer region to bind
+   */
+  void bindBuffer(uint32_t index, IBuffer* buffer, size_t offset = 0, size_t bufferSize = 0) override;
+  void bindUniform(const UniformDesc& uniformDesc, const void* data) override;
+  void bindBytes(uint32_t index, const void* data, size_t length) override;
+  void bindImageTexture(uint32_t index, ITexture* texture, TextureFormat format) override;
+  void bindSamplerState(uint32_t index, ISamplerState* samplerState) override;
+
+  // Debug labels
+  void pushDebugGroupLabel(const char* label, const Color& color) const override;
+  void insertDebugEventLabel(const char* label, const Color& color) const override;
+  void popDebugGroupLabel() const override;
+
+ private:
+  CommandBuffer& commandBuffer_;
+  const ComputePipelineState* currentPipeline_ = nullptr;
+  bool isEncoding_ = false;
+
+  // Centralized resource binding management.
+  D3D12ResourcesBinder resourcesBinder_;
+
+  // Cached GPU handles for resources
+  // IMPORTANT: Bindings must be DENSE and start at slot 0 for each table.
+  // SetComputeRootDescriptorTable always uses cached*Handles_[0] as the base,
+  // so binding only higher slots (e.g., slot 1 without slot 0) will fail.
+  static constexpr size_t kMaxComputeBuffers = 8;
+  // Increased from 8 to 16 to match IGL_TEXTURE_SAMPLERS_MAX contract.
+  static constexpr size_t kMaxComputeTextures = IGL_TEXTURE_SAMPLERS_MAX;  // 16
+  // Increased from 4 to 16 to match IGL_TEXTURE_SAMPLERS_MAX contract.
+  static constexpr size_t kMaxComputeSamplers = IGL_TEXTURE_SAMPLERS_MAX;  // 16
+
+  D3D12_GPU_DESCRIPTOR_HANDLE cachedUavHandles_[kMaxComputeBuffers] = {};
+  D3D12_GPU_DESCRIPTOR_HANDLE cachedSrvHandles_[kMaxComputeTextures] = {};
+  D3D12_GPU_DESCRIPTOR_HANDLE cachedSamplerHandles_[kMaxComputeSamplers] = {};
+  D3D12_GPU_VIRTUAL_ADDRESS cachedCbvAddresses_[kMaxComputeBuffers] = {};
+  // Track CBV sizes for descriptor creation.
+  size_t cachedCbvSizes_[kMaxComputeBuffers] = {};
+
+  size_t boundUavCount_ = 0;
+  size_t boundSrvCount_ = 0;
+  size_t boundCbvCount_ = 0;
+  size_t boundSamplerCount_ = 0;
+
+  // Cache CBV descriptor indices to avoid per-dispatch allocation.
+  uint32_t cachedCbvBaseIndex_ = 0;
+  uint32_t cachedCbvPageIndex_ = UINT32_MAX;  // Track heap page for invalidation
+  bool cbvBindingsDirty_ = true;  // Track if CBV bindings have changed
+
+  // Track UAV resources for precise synchronization barriers.
+  // Tracks UAV resources bound via bindBuffer (storage buffers) and bindImageTexture (RW textures).
+  ID3D12Resource* boundUavResources_[kMaxComputeBuffers] = {};
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/ComputePipelineState.cpp b/src/igl/d3d12/ComputePipelineState.cpp
new file mode 100644
index 0000000000..b23547c2c5
--- /dev/null
+++ b/src/igl/d3d12/ComputePipelineState.cpp
@@ -0,0 +1,173 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/ComputePipelineState.h>
+#include <igl/d3d12/ShaderModule.h>
+#include <igl/d3d12/D3D12ReflectionUtils.h>
+#include <igl/RenderPipelineReflection.h>
+#include <igl/NameHandle.h>
+#include <d3dcompiler.h>
+
+namespace igl::d3d12 {
+
+ComputePipelineState::ComputePipelineState(const ComputePipelineDesc& desc,
+                                           igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState,
+                                           igl::d3d12::ComPtr<ID3D12RootSignature> rootSignature)
+    : desc_(desc),
+      pipelineState_(std::move(pipelineState)),
+      rootSignature_(std::move(rootSignature)) {
+  // Set D3D12 object names for PIX debugging
+  const std::string& debugName = desc_.debugName;
+  if (pipelineState_.Get() && !debugName.empty()) {
+    std::wstring wideName(debugName.begin(), debugName.end());
+    pipelineState_->SetName((L"ComputePSO_" + wideName).c_str());
+    IGL_D3D12_LOG_VERBOSE("ComputePipelineState: Set PIX debug name 'ComputePSO_%s'\n", debugName.c_str());
+  }
+  if (rootSignature_.Get() && !debugName.empty()) {
+    std::wstring wideName(debugName.begin(), debugName.end());
+    rootSignature_->SetName((L"ComputeRootSig_" + wideName).c_str());
+    IGL_D3D12_LOG_VERBOSE("ComputePipelineState: Set PIX root signature name 'ComputeRootSig_%s'\n", debugName.c_str());
+  }
+}
+
+std::shared_ptr<IComputePipelineState::IComputePipelineReflection>
+ComputePipelineState::computePipelineReflection() {
+  // Return cached reflection if already created
+  if (reflection_) {
+    return reflection_;
+  }
+
+  // Reflection implementation following the pattern from RenderPipelineState
+  struct ReflectionImpl final : public IComputePipelineReflection {
+    std::vector<BufferArgDesc> ubs;
+    std::vector<SamplerArgDesc> samplers;
+    std::vector<TextureArgDesc> textures;
+    const std::vector<BufferArgDesc>& allUniformBuffers() const override { return ubs; }
+    const std::vector<SamplerArgDesc>& allSamplers() const override { return samplers; }
+    const std::vector<TextureArgDesc>& allTextures() const override { return textures; }
+  };
+
+  auto out = std::make_shared<ReflectionImpl>();
+
+  // Get compute shader module and reflect it
+  if (!desc_.shaderStages) {
+    return out;
+  }
+
+  auto computeModule = desc_.shaderStages->getComputeModule();
+  if (!computeModule) {
+    return out;
+  }
+
+  auto* d3dMod = dynamic_cast<const igl::d3d12::ShaderModule*>(computeModule.get());
+  if (!d3dMod) {
+    return out;
+  }
+
+  const auto& bc = d3dMod->getBytecode();
+  if (bc.empty()) {
+    return out;
+  }
+
+  // Create shader reflection interface using D3DReflect
+  igl::d3d12::ComPtr<ID3D12ShaderReflection> refl;
+  if (FAILED(D3DReflect(bc.data(), bc.size(), IID_PPV_ARGS(refl.GetAddressOf())))) {
+    return out;
+  }
+
+  D3D12_SHADER_DESC sd{};
+  if (FAILED(refl->GetDesc(&sd))) {
+    return out;
+  }
+
+  // Extract constant buffer information
+  for (UINT i = 0; i < sd.ConstantBuffers; ++i) {
+    auto* cb = refl->GetConstantBufferByIndex(i);
+    D3D12_SHADER_BUFFER_DESC cbd{};
+    if (FAILED(cb->GetDesc(&cbd))) {
+      continue;
+    }
+
+    // Find the bind point for this constant buffer
+    int bufferIndex = -1;
+    for (UINT r = 0; r < sd.BoundResources; ++r) {
+      D3D12_SHADER_INPUT_BIND_DESC bind{};
+      if (SUCCEEDED(refl->GetResourceBindingDesc(r, &bind))) {
+        if (bind.Type == D3D_SIT_CBUFFER &&
+            std::string(bind.Name) == std::string(cbd.Name)) {
+          bufferIndex = static_cast<int>(bind.BindPoint);
+          break;
+        }
+      }
+    }
+
+    BufferArgDesc ub;
+    ub.name = igl::genNameHandle(cbd.Name ? cbd.Name : "");
+    ub.bufferAlignment = 256; // D3D12 constant buffer alignment
+    ub.bufferDataSize = cbd.Size;
+    ub.bufferIndex = bufferIndex;
+    ub.shaderStage = ShaderStage::Compute;
+    ub.isUniformBlock = true;
+
+    // Extract member variables from constant buffer
+    for (UINT v = 0; v < cbd.Variables; ++v) {
+      auto* var = cb->GetVariableByIndex(v);
+      D3D12_SHADER_VARIABLE_DESC vd{};
+      if (FAILED(var->GetDesc(&vd))) {
+        continue;
+      }
+
+      auto* t = var->GetType();
+      if (!t) {
+        continue;
+      }
+
+      D3D12_SHADER_TYPE_DESC td{};
+      if (FAILED(t->GetDesc(&td))) {
+        continue;
+      }
+
+      BufferArgDesc::BufferMemberDesc m;
+      m.name = igl::genNameHandle(vd.Name ? vd.Name : "");
+      m.type = ReflectionUtils::mapUniformType(td);
+      m.offset = vd.StartOffset;
+      m.arrayLength = td.Elements ? td.Elements : 1;
+      ub.members.push_back(std::move(m));
+    }
+
+    out->ubs.push_back(std::move(ub));
+  }
+
+  // Extract texture and sampler bindings
+  for (UINT r = 0; r < sd.BoundResources; ++r) {
+    D3D12_SHADER_INPUT_BIND_DESC bind{};
+    if (FAILED(refl->GetResourceBindingDesc(r, &bind))) {
+      continue;
+    }
+
+    if (bind.Type == D3D_SIT_TEXTURE) {
+      TextureArgDesc t;
+      t.name = bind.Name ? bind.Name : "";
+      t.type = TextureType::TwoD;
+      t.textureIndex = bind.BindPoint;
+      t.shaderStage = ShaderStage::Compute;
+      out->textures.push_back(std::move(t));
+    } else if (bind.Type == D3D_SIT_SAMPLER) {
+      SamplerArgDesc s;
+      s.name = bind.Name ? bind.Name : "";
+      s.samplerIndex = bind.BindPoint;
+      s.shaderStage = ShaderStage::Compute;
+      out->samplers.push_back(std::move(s));
+    }
+  }
+
+  // Cache the reflection for future calls
+  reflection_ = out;
+  return reflection_;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/ComputePipelineState.h b/src/igl/d3d12/ComputePipelineState.h
new file mode 100644
index 0000000000..6cd8c6e0dd
--- /dev/null
+++ b/src/igl/d3d12/ComputePipelineState.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/ComputePipelineState.h>
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+class ComputePipelineState final : public IComputePipelineState {
+ public:
+  ComputePipelineState(const ComputePipelineDesc& desc,
+                       igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState,
+                       igl::d3d12::ComPtr<ID3D12RootSignature> rootSignature);
+  ~ComputePipelineState() override = default;
+
+  std::shared_ptr<IComputePipelineReflection> computePipelineReflection() override;
+
+  // D3D12-specific accessors
+  ID3D12PipelineState* getPipelineState() const { return pipelineState_.Get(); }
+  ID3D12RootSignature* getRootSignature() const { return rootSignature_.Get(); }
+
+ private:
+  ComputePipelineDesc desc_;
+  igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState_;
+  igl::d3d12::ComPtr<ID3D12RootSignature> rootSignature_;
+  std::shared_ptr<IComputePipelineReflection> reflection_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12AllocatorPool.cpp b/src/igl/d3d12/D3D12AllocatorPool.cpp
new file mode 100644
index 0000000000..fcef51b7c5
--- /dev/null
+++ b/src/igl/d3d12/D3D12AllocatorPool.cpp
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12AllocatorPool.h>
+
+#include <igl/d3d12/D3D12Context.h>
+#include <igl/d3d12/D3D12FenceWaiter.h>
+#include <igl/d3d12/D3D12ImmediateCommands.h>
+#include <igl/d3d12/D3D12StagingDevice.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/UploadRingBuffer.h>
+
+namespace igl::d3d12 {
+
+void D3D12AllocatorPool::initialize(D3D12Context& ctx, IFenceProvider* fenceProvider) {
+  auto* device = ctx.getDevice();
+  if (!device) {
+    IGL_LOG_ERROR("D3D12AllocatorPool::initialize: D3D12 device is null\n");
+    return;
+  }
+
+  HRESULT hr = device->CreateFence(
+      0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(uploadFence_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR(
+        "D3D12AllocatorPool::initialize: Failed to create upload fence: 0x%08X\n",
+        hr);
+  } else {
+    uploadFenceValue_ = 0;
+    IGL_D3D12_LOG_VERBOSE(
+        "D3D12AllocatorPool::initialize: Upload fence created successfully\n");
+  }
+
+  // Use upload ring buffer size from D3D12ContextConfig instead of hardcoding.
+  const uint64_t uploadRingBufferSize = ctx.getConfig().uploadRingBufferSize;
+  uploadRingBuffer_ =
+      std::make_unique<UploadRingBuffer>(device, uploadRingBufferSize);
+
+  auto* commandQueue = ctx.getCommandQueue();
+  if (commandQueue && uploadFence_.Get() && fenceProvider) {
+    immediateCommands_ = std::make_unique<D3D12ImmediateCommands>(
+        device, commandQueue, uploadFence_.Get(), fenceProvider);
+    stagingDevice_ = std::make_unique<D3D12StagingDevice>(
+        device, uploadFence_.Get(), uploadRingBuffer_.get());
+  }
+}
+
+void D3D12AllocatorPool::processCompletedUploads() {
+  if (!uploadFence_.Get()) {
+    return;
+  }
+
+  const UINT64 completed = uploadFence_->GetCompletedValue();
+
+  {
+    std::lock_guard<std::mutex> lock(pendingUploadsMutex_);
+    auto it = pendingUploads_.begin();
+    while (it != pendingUploads_.end()) {
+      if (it->fenceValue <= completed) {
+        it = pendingUploads_.erase(it);
+      } else {
+        ++it;
+      }
+    }
+  }
+
+  if (uploadRingBuffer_) {
+    uploadRingBuffer_->retire(completed);
+  }
+}
+
+void D3D12AllocatorPool::trackUploadBuffer(
+    ComPtr<ID3D12Resource> buffer,
+    UINT64 fenceValue) {
+  if (!buffer.Get()) {
+    return;
+  }
+
+  std::lock_guard<std::mutex> lock(pendingUploadsMutex_);
+  pendingUploads_.push_back(PendingUpload{fenceValue, std::move(buffer)});
+}
+
+ComPtr<ID3D12CommandAllocator> D3D12AllocatorPool::getUploadCommandAllocator(
+    D3D12Context& ctx) {
+  if (!uploadFence_.Get()) {
+    IGL_LOG_ERROR(
+        "D3D12AllocatorPool::getUploadCommandAllocator: Upload fence not "
+        "initialized\n");
+    return nullptr;
+  }
+
+  std::lock_guard<std::mutex> lock(commandAllocatorPoolMutex_);
+
+  const UINT64 completedValue = uploadFence_->GetCompletedValue();
+
+  for (size_t i = 0; i < commandAllocatorPool_.size(); ++i) {
+    auto& tracked = commandAllocatorPool_[i];
+
+    if (completedValue >= tracked.fenceValue) {
+      auto allocator = tracked.allocator;
+
+      commandAllocatorPool_[i] = commandAllocatorPool_.back();
+      commandAllocatorPool_.pop_back();
+
+      HRESULT hr = allocator->Reset();
+      if (FAILED(hr)) {
+        IGL_LOG_ERROR(
+            "D3D12AllocatorPool::getUploadCommandAllocator: "
+            "CommandAllocator::Reset failed: 0x%08X\n",
+            hr);
+        return nullptr;
+      }
+
+      totalAllocatorReuses_++;
+      return allocator;
+    }
+  }
+
+  static constexpr size_t kMaxCommandAllocators = 256;
+
+  if (totalCommandAllocatorsCreated_ >= kMaxCommandAllocators) {
+    IGL_LOG_ERROR(
+        "D3D12AllocatorPool::getUploadCommandAllocator: Command allocator "
+        "pool exhausted\n");
+    return nullptr;
+  }
+
+  auto* device = ctx.getDevice();
+  if (!device) {
+    IGL_LOG_ERROR(
+        "D3D12AllocatorPool::getUploadCommandAllocator: D3D12 device is null\n");
+    return nullptr;
+  }
+
+  ComPtr<ID3D12CommandAllocator> newAllocator;
+  HRESULT hr = device->CreateCommandAllocator(
+      D3D12_COMMAND_LIST_TYPE_DIRECT,
+      IID_PPV_ARGS(newAllocator.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR(
+        "D3D12AllocatorPool::getUploadCommandAllocator: "
+        "CreateCommandAllocator failed: 0x%08X\n",
+        hr);
+    return nullptr;
+  }
+
+  totalCommandAllocatorsCreated_++;
+  return newAllocator;
+}
+
+void D3D12AllocatorPool::returnUploadCommandAllocator(
+    ComPtr<ID3D12CommandAllocator> allocator,
+    UINT64 fenceValue) {
+  if (!allocator.Get()) {
+    return;
+  }
+
+  std::lock_guard<std::mutex> lock(commandAllocatorPoolMutex_);
+
+  TrackedCommandAllocator tracked;
+  tracked.allocator = allocator;
+  tracked.fenceValue = fenceValue;
+  commandAllocatorPool_.push_back(tracked);
+
+  if (commandAllocatorPool_.size() > peakPoolSize_) {
+    peakPoolSize_ = commandAllocatorPool_.size();
+  }
+}
+
+::igl::Result D3D12AllocatorPool::waitForUploadFence(
+    const Device& device,
+    UINT64 fenceValue) const {
+  if (!uploadFence_.Get()) {
+    return ::igl::Result(
+        ::igl::Result::Code::InvalidOperation, "Upload fence not initialized");
+  }
+
+  if (uploadFence_->GetCompletedValue() >= fenceValue) {
+    return ::igl::Result();
+  }
+
+  FenceWaiter waiter(uploadFence_.Get(), fenceValue);
+  ::igl::Result waitResult = waiter.wait();
+  if (!waitResult.isOk()) {
+    ::igl::Result deviceStatus = device.checkDeviceRemoval();
+    if (!deviceStatus.isOk()) {
+      return deviceStatus;
+    }
+    return waitResult;
+  }
+
+  return Result();
+}
+
+void D3D12AllocatorPool::clearOnDeviceDestruction() {
+  {
+    std::lock_guard<std::mutex> lock(commandAllocatorPoolMutex_);
+    commandAllocatorPool_.clear();
+    totalCommandAllocatorsCreated_ = 0;
+    peakPoolSize_ = 0;
+    totalAllocatorReuses_ = 0;
+  }
+  {
+    std::lock_guard<std::mutex> lock(pendingUploadsMutex_);
+    pendingUploads_.clear();
+  }
+
+  uploadRingBuffer_.reset();
+  stagingDevice_.reset();
+  immediateCommands_.reset();
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12AllocatorPool.h b/src/igl/d3d12/D3D12AllocatorPool.h
new file mode 100644
index 0000000000..61bca25c89
--- /dev/null
+++ b/src/igl/d3d12/D3D12AllocatorPool.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <vector>
+
+#include <igl/d3d12/Common.h>
+
+namespace igl {
+class Result;
+} // namespace igl
+
+namespace igl::d3d12 {
+
+class Device;
+class D3D12Context;
+class UploadRingBuffer;
+class D3D12ImmediateCommands;
+class D3D12StagingDevice;
+class IFenceProvider;
+
+class D3D12AllocatorPool {
+ public:
+  D3D12AllocatorPool() = default;
+
+  void initialize(D3D12Context& ctx, IFenceProvider* fenceProvider);
+
+  void processCompletedUploads();
+  void trackUploadBuffer(ComPtr<ID3D12Resource> buffer, UINT64 fenceValue);
+
+  ComPtr<ID3D12CommandAllocator> getUploadCommandAllocator(D3D12Context& ctx);
+  void returnUploadCommandAllocator(ComPtr<ID3D12CommandAllocator> allocator,
+                                    UINT64 fenceValue);
+
+  ID3D12Fence* getUploadFence() const {
+    return uploadFence_.Get();
+  }
+
+  UINT64 getNextUploadFenceValue() {
+    return ++uploadFenceValue_;
+  }
+
+  UINT64 getLastUploadFenceValue() const {
+    return uploadFenceValue_;
+  }
+
+  UploadRingBuffer* getUploadRingBuffer() const {
+    return uploadRingBuffer_.get();
+  }
+
+  D3D12ImmediateCommands* getImmediateCommands() const {
+    return immediateCommands_.get();
+  }
+
+  D3D12StagingDevice* getStagingDevice() const {
+    return stagingDevice_.get();
+  }
+
+  ::igl::Result waitForUploadFence(const Device& device, UINT64 fenceValue) const;
+
+  void clearOnDeviceDestruction();
+
+ private:
+  struct PendingUpload {
+    UINT64 fenceValue = 0;
+    ComPtr<ID3D12Resource> resource;
+  };
+
+  struct TrackedCommandAllocator {
+    ComPtr<ID3D12CommandAllocator> allocator;
+    UINT64 fenceValue = 0;
+  };
+
+  std::mutex pendingUploadsMutex_;
+  std::vector<PendingUpload> pendingUploads_;
+
+  std::mutex commandAllocatorPoolMutex_;
+  std::vector<TrackedCommandAllocator> commandAllocatorPool_;
+  size_t totalCommandAllocatorsCreated_ = 0;
+  size_t peakPoolSize_ = 0;
+  size_t totalAllocatorReuses_ = 0;
+
+  ComPtr<ID3D12Fence> uploadFence_;
+  UINT64 uploadFenceValue_ = 0;
+
+  std::unique_ptr<UploadRingBuffer> uploadRingBuffer_;
+  std::unique_ptr<D3D12ImmediateCommands> immediateCommands_;
+  std::unique_ptr<D3D12StagingDevice> stagingDevice_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12Context.cpp b/src/igl/d3d12/D3D12Context.cpp
new file mode 100644
index 0000000000..11875700b5
--- /dev/null
+++ b/src/igl/d3d12/D3D12Context.cpp
@@ -0,0 +1,1468 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12Context.h>
+#include <igl/d3d12/DescriptorHeapManager.h>
+#include <igl/d3d12/D3D12FenceWaiter.h>
+
+#include <cstdlib>
+#include <string>
+
+namespace igl::d3d12 {
+
+namespace {
+// Import ComPtr for readability
+template<typename T>
+using ComPtr = igl::d3d12::ComPtr<T>;
+} // namespace
+
+// Static member initialization
+D3D12Context::ResourceStats D3D12Context::resourceStats_;
+std::mutex D3D12Context::resourceStatsMutex_;
+
+// AdapterInfo helper methods
+uint64_t D3D12Context::AdapterInfo::getDedicatedVideoMemoryMB() const {
+  return desc.DedicatedVideoMemory / (1024 * 1024);
+}
+
+const char* D3D12Context::AdapterInfo::getVendorName() const {
+  switch (desc.VendorId) {
+    case 0x10DE: return "NVIDIA";
+    case 0x1002: case 0x1022: return "AMD";
+    case 0x8086: return "Intel";
+    case 0x1414: return "Microsoft";
+    default: return "Unknown";
+  }
+}
+
+// MemoryBudget helper methods
+uint64_t D3D12Context::MemoryBudget::totalAvailableMemory() const {
+  return dedicatedVideoMemory + sharedSystemMemory;
+}
+
+double D3D12Context::MemoryBudget::getUsagePercentage() const {
+  if (totalAvailableMemory() == 0) return 0.0;
+  return (static_cast<double>(estimatedUsage) / totalAvailableMemory()) * 100.0;
+}
+
+bool D3D12Context::MemoryBudget::isMemoryCritical() const {
+  return getUsagePercentage() > 90.0;
+}
+
+bool D3D12Context::MemoryBudget::isMemoryLow() const {
+  return getUsagePercentage() > 70.0;
+}
+
+// A-011: Helper function to probe highest supported feature level for an adapter
+D3D_FEATURE_LEVEL D3D12Context::getHighestFeatureLevel(IDXGIAdapter1* adapter) {
+  const D3D_FEATURE_LEVEL featureLevels[] = {
+    D3D_FEATURE_LEVEL_12_2,
+    D3D_FEATURE_LEVEL_12_1,
+    D3D_FEATURE_LEVEL_12_0,
+    D3D_FEATURE_LEVEL_11_1,
+    D3D_FEATURE_LEVEL_11_0,
+  };
+
+  for (D3D_FEATURE_LEVEL fl : featureLevels) {
+    if (SUCCEEDED(D3D12CreateDevice(adapter, fl, _uuidof(ID3D12Device), nullptr))) {
+      return fl;
+    }
+  }
+
+  return static_cast<D3D_FEATURE_LEVEL>(0);  // No supported feature level
+}
+
+D3D12Context::~D3D12Context() {
+  // Wait for GPU to finish before cleanup
+  waitForGPU();
+
+  // Explicitly release all frame context resources to prevent leaks.
+  for (uint32_t i = 0; i < frameContexts_.size(); ++i) {
+    frameContexts_[i].transientBuffers.clear();
+    frameContexts_[i].transientResources.clear();
+
+    // Explicitly reset heaps inside each page before clearing the vector.
+    for (auto& page : frameContexts_[i].cbvSrvUavHeapPages) {
+      page.heap.Reset();
+    }
+    frameContexts_[i].cbvSrvUavHeapPages.clear();
+
+    frameContexts_[i].samplerHeap.Reset();
+    frameContexts_[i].activeCbvSrvUavHeap.Reset();
+    frameContexts_[i].allocator.Reset();
+  }
+
+  // Release render targets explicitly.
+  for (uint32_t i = 0; i < renderTargets_.size(); ++i) {
+    renderTargets_[i].Reset();
+  }
+
+  // Release command signatures.
+  drawIndirectSignature_.Reset();
+  drawIndexedIndirectSignature_.Reset();
+
+  // Release core resources explicitly.
+  rtvHeap_.Reset();
+  swapChain_.Reset();
+  fence_.Reset();
+  commandQueue_.Reset();
+
+  // Clean up descriptor heap manager's heaps before deleting it.
+  // Note: heapMgr_ may point to either ownedHeapMgr_ OR external heap manager
+  // (e.g., HeadlessContext owns it via unique_ptr). We cleanup the heaps regardless.
+  if (heapMgr_) {
+    heapMgr_->cleanup();
+  }
+
+  // Clean up owned descriptor heap manager (if we own it)
+  delete ownedHeapMgr_;
+  ownedHeapMgr_ = nullptr;
+  heapMgr_ = nullptr;
+
+  // Release device last, after all dependent resources are freed.
+  device_.Reset();
+  adapter_.Reset();
+  dxgiFactory_.Reset();
+
+#ifdef IGL_DEBUG
+  IGL_LOG_INFO("[D3D12Context] All resources released\n");
+#endif
+}
+
+Result D3D12Context::initialize(HWND hwnd, uint32_t width, uint32_t height,
+                                const D3D12ContextConfig& config) {
+  width_ = width;
+  height_ = height;
+
+  // Store and validate configuration.
+  config_ = config;
+  config_.validate();
+
+  // Pre-allocate vectors to config size (T43). Will be verified/resized after swapchain creation.
+  swapchainBufferCount_ = config_.maxFramesInFlight;
+  renderTargets_.resize(swapchainBufferCount_);
+  frameContexts_.resize(swapchainBufferCount_);
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating D3D12 device...\n");
+  Result deviceResult = createDevice();
+  if (!deviceResult.isOk()) {
+    return deviceResult;
+  }
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Device created successfully\n");
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating command queue...\n");
+  Result queueResult = createCommandQueue();
+  if (!queueResult.isOk()) {
+    return queueResult;
+  }
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Command queue created successfully\n");
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating swapchain (%ux%u)...\n", width, height);
+  Result swapChainResult = createSwapChain(hwnd, width, height);
+  if (!swapChainResult.isOk()) {
+    return swapChainResult;
+  }
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain created successfully\n");
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating RTV heap...\n");
+  Result rtvResult = createRTVHeap();
+  if (!rtvResult.isOk()) {
+    return rtvResult;
+  }
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: RTV heap created successfully\n");
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating back buffers...\n");
+  Result backBufferResult = createBackBuffers();
+  if (!backBufferResult.isOk()) {
+    return backBufferResult;
+  }
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Back buffers created successfully\n");
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating descriptor heaps...\n");
+  Result descriptorHeapResult = createDescriptorHeaps();
+  if (!descriptorHeapResult.isOk()) {
+    return descriptorHeapResult;
+  }
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Descriptor heaps created successfully\n");
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating command signatures...\n");
+  Result commandSigResult = createCommandSignatures();
+  if (!commandSigResult.isOk()) {
+    return commandSigResult;
+  }
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Command signatures created successfully\n");
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating fence for GPU synchronization...\n");
+  HRESULT hr = device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12Context: Failed to create fence (HRESULT: 0x%08X)\n", static_cast<unsigned>(hr));
+    IGL_DEBUG_ASSERT(false);
+    return Result(Result::Code::RuntimeError, "Failed to create fence");
+  }
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Fence created successfully\n");
+
+  // Create per-frame command allocators using runtime buffer count (T43).
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating per-frame command allocators...\n");
+  for (UINT i = 0; i < swapchainBufferCount_; i++) {
+    hr = device_->CreateCommandAllocator(
+        D3D12_COMMAND_LIST_TYPE_DIRECT,
+        IID_PPV_ARGS(frameContexts_[i].allocator.GetAddressOf()));
+    if (FAILED(hr)) {
+      IGL_LOG_ERROR("D3D12Context: Failed to create command allocator for frame %u (HRESULT: 0x%08X)\n", i, static_cast<unsigned>(hr));
+      IGL_DEBUG_ASSERT(false);
+      return Result(Result::Code::RuntimeError, "Failed to create command allocator for frame " + std::to_string(i));
+    }
+    IGL_D3D12_LOG_VERBOSE("D3D12Context: Created command allocator for frame %u\n", i);
+  }
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Per-frame command allocators created successfully\n");
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Initialization complete!\n");
+
+  return Result();
+}
+
+Result D3D12Context::resize(uint32_t width, uint32_t height) {
+  // Validate dimensions
+  if (width == 0 || height == 0) {
+    return Result{Result::Code::ArgumentInvalid,
+                 "Invalid resize dimensions: width and height must be non-zero"};
+  }
+
+  if (width == width_ && height == height_) {
+    return Result();
+  }
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Resizing swapchain from %ux%u to %ux%u\n",
+               width_, height_, width, height);
+
+  width_ = width;
+  height_ = height;
+
+  // Wait for all GPU work to complete before releasing backbuffers
+  // This prevents DXGI_ERROR_DEVICE_REMOVED when GPU is still rendering to old buffers
+  if (fence_.Get() && commandQueue_.Get()) {
+    const UINT64 currentFence = fenceValue_;
+    commandQueue_->Signal(fence_.Get(), currentFence);
+
+    FenceWaiter waiter(fence_.Get(), currentFence);
+    Result waitResult = waiter.wait();
+    if (!waitResult.isOk()) {
+      IGL_LOG_ERROR("D3D12Context::resize() - Fence wait failed: %s\n",
+                    waitResult.message.c_str());
+      // Continue with resize despite error - old buffers will be released anyway
+    }
+  }
+
+  // Release old back buffers (T43: use runtime buffer count)
+  for (UINT i = 0; i < swapchainBufferCount_; i++) {
+    renderTargets_[i].Reset();
+  }
+
+  // Store swapchain format and flags for potential recreation
+  DXGI_SWAP_CHAIN_DESC1 currentDesc = {};
+  if (swapChain_.Get()) {
+    swapChain_->GetDesc1(&currentDesc);
+  }
+
+  // Try to resize existing swapchain (T43: use runtime buffer count)
+  HRESULT hr = swapChain_->ResizeBuffers(
+      swapchainBufferCount_,
+      width,
+      height,
+      currentDesc.Format ? currentDesc.Format : DXGI_FORMAT_B8G8R8A8_UNORM,
+      currentDesc.Flags);
+
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12Context: ResizeBuffers failed (HRESULT=0x%08X), attempting to recreate swapchain\n",
+                  static_cast<unsigned>(hr));
+
+    // Graceful fallback: Recreate swapchain from scratch
+    Result result = recreateSwapChain(width, height);
+    if (!result.isOk()) {
+      IGL_LOG_ERROR("D3D12Context: Failed to recreate swapchain: %s\n", result.message.c_str());
+      return Result{Result::Code::RuntimeError,
+                   "Failed to resize or recreate swapchain"};
+    }
+
+    IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain recreated successfully\n");
+  } else {
+    IGL_D3D12_LOG_VERBOSE("D3D12Context: ResizeBuffers succeeded\n");
+  }
+
+  // Recreate back buffer views
+  Result backBufferResult = createBackBuffers();
+  if (!backBufferResult.isOk()) {
+    IGL_LOG_ERROR("D3D12Context: Failed to recreate back buffers: %s\n", backBufferResult.message.c_str());
+    return backBufferResult;
+  }
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain resize complete\n");
+
+  return Result();
+}
+
+Result D3D12Context::recreateSwapChain(uint32_t width, uint32_t height) {
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Recreating swapchain with dimensions %ux%u\n", width, height);
+
+  // Get window handle from existing swapchain before releasing it
+  DXGI_SWAP_CHAIN_DESC1 oldDesc = {};
+  if (!swapChain_.Get()) {
+    return Result{Result::Code::RuntimeError, "No existing swapchain to recreate"};
+  }
+
+  HRESULT hr = swapChain_->GetDesc1(&oldDesc);
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12Context: Failed to get swapchain description (HRESULT=0x%08X)\n",
+                  static_cast<unsigned>(hr));
+    return Result{Result::Code::RuntimeError, "Failed to get swapchain description"};
+  }
+
+  // Try to get HWND via GetHwnd (IDXGISwapChain3)
+  HWND hwnd = nullptr;
+  hr = swapChain_->GetHwnd(&hwnd);
+  if (FAILED(hr) || !hwnd) {
+    IGL_LOG_ERROR("D3D12Context: Failed to get HWND from swapchain (HRESULT=0x%08X)\n",
+                  static_cast<unsigned>(hr));
+    return Result{Result::Code::RuntimeError, "Failed to get HWND from swapchain"};
+  }
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Retrieved HWND=%p from existing swapchain\n", hwnd);
+
+  // Release old swapchain completely
+  swapChain_.Reset();
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Old swapchain released\n");
+
+  // Create new swapchain with updated dimensions
+  DXGI_SWAP_CHAIN_DESC1 newDesc = {};
+  newDesc.Width = width;
+  newDesc.Height = height;
+  newDesc.Format = oldDesc.Format ? oldDesc.Format : DXGI_FORMAT_B8G8R8A8_UNORM;
+  newDesc.Stereo = FALSE;
+  newDesc.SampleDesc.Count = 1;
+  newDesc.SampleDesc.Quality = 0;
+  newDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
+  newDesc.BufferCount = swapchainBufferCount_;  // T43: use runtime buffer count
+  newDesc.Scaling = DXGI_SCALING_STRETCH;
+  newDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
+  newDesc.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED;
+  newDesc.Flags = oldDesc.Flags;  // Preserve tearing support flag
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating new swapchain (format=%u, flags=0x%X)\n",
+               newDesc.Format, newDesc.Flags);
+
+  igl::d3d12::ComPtr<IDXGISwapChain1> swapChain1;
+  hr = dxgiFactory_->CreateSwapChainForHwnd(
+      commandQueue_.Get(),
+      hwnd,
+      &newDesc,
+      nullptr,
+      nullptr,
+      swapChain1.GetAddressOf());
+
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12Context: CreateSwapChainForHwnd failed (HRESULT=0x%08X)\n",
+                  static_cast<unsigned>(hr));
+    return Result{Result::Code::RuntimeError,
+                 "Failed to recreate swapchain with CreateSwapChainForHwnd"};
+  }
+
+  // Query IDXGISwapChain3 interface
+  hr = swapChain1->QueryInterface(IID_PPV_ARGS(swapChain_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12Context: Failed to query IDXGISwapChain3 (HRESULT=0x%08X)\n",
+                  static_cast<unsigned>(hr));
+    return Result{Result::Code::RuntimeError,
+                 "Failed to query IDXGISwapChain3 interface"};
+  }
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain recreated successfully\n");
+  return Result{};
+}
+
+Result D3D12Context::createDevice() {
+  // DO NOT enable experimental features in windowed mode - it breaks swapchain creation!
+  // Experimental features are ONLY enabled in HeadlessD3D12Context for unit tests
+  // Windowed render sessions use signed DXIL (via IDxcValidator) which doesn't need experimental mode
+
+  // A-007: Read debug configuration from environment variables
+  // Helper function to read boolean env var (returns defaultValue if not set)
+  auto getEnvBool = [](const char* name, bool defaultValue) -> bool {
+    const char* value = std::getenv(name);
+    if (!value) return defaultValue;
+    return (std::string(value) == "1") || (std::string(value) == "true");
+  };
+
+  // A-007: Debug configuration from environment variables.
+  // Defaults are tuned for aggressive validation in debug builds so that
+  // issues like PSO creation failures and binding mismatches are surfaced
+  // without requiring the user to set environment variables manually.
+  bool enableDebugLayer = getEnvBool("IGL_D3D12_DEBUG",
+#ifdef _DEBUG
+    true   // Default ON in debug builds
+#else
+    false  // Default OFF in release builds
+#endif
+  );
+  bool enableGPUValidation = getEnvBool("IGL_D3D12_GPU_VALIDATION",
+#ifdef _DEBUG
+    true   // Default ON in debug builds for better diagnostics
+#else
+    false  // Default OFF in release builds
+#endif
+  );
+  bool enableDRED = getEnvBool("IGL_D3D12_DRED",
+#ifdef _DEBUG
+    true   // Default ON in debug builds
+#else
+    false  // Default OFF in release builds
+#endif
+  );
+  bool enableDXGIDebug = getEnvBool("IGL_DXGI_DEBUG",
+#ifdef _DEBUG
+    true   // Default ON in debug builds
+#else
+    false  // Default OFF in release builds
+#endif
+  );
+  bool breakOnError = getEnvBool("IGL_D3D12_BREAK_ON_ERROR",
+#ifdef _DEBUG
+    true   // Default BREAK on error in debug builds
+#else
+    false  // Default LOG only in release builds
+#endif
+  );
+  bool breakOnWarning = getEnvBool("IGL_D3D12_BREAK_ON_WARNING",
+#ifdef _DEBUG
+    false  // Default LOG warnings in debug builds (can be overridden)
+#else
+    false
+#endif
+  );
+
+  IGL_D3D12_LOG_VERBOSE("=== D3D12 Debug Configuration ===\n");
+  IGL_D3D12_LOG_VERBOSE("  Debug Layer:       %s\n", enableDebugLayer ? "ENABLED" : "DISABLED");
+  IGL_D3D12_LOG_VERBOSE("  GPU Validation:    %s\n", enableGPUValidation ? "ENABLED" : "DISABLED");
+  IGL_D3D12_LOG_VERBOSE("  DRED:              %s\n", enableDRED ? "ENABLED" : "DISABLED");
+  IGL_D3D12_LOG_VERBOSE("  DXGI Debug:        %s\n", enableDXGIDebug ? "ENABLED" : "DISABLED");
+  IGL_D3D12_LOG_VERBOSE("  Break on Error:    %s\n", breakOnError ? "ENABLED" : "DISABLED");
+  IGL_D3D12_LOG_VERBOSE("  Break on Warning:  %s\n", breakOnWarning ? "ENABLED" : "DISABLED");
+  IGL_D3D12_LOG_VERBOSE("=================================\n");
+
+  // Initialize DXGI factory flags
+  UINT dxgiFactoryFlags = 0;
+
+  // A-007: Enable debug layer if configured
+  if (enableDebugLayer) {
+    igl::d3d12::ComPtr<ID3D12Debug> debugController;
+    if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(debugController.GetAddressOf())))) {
+      debugController->EnableDebugLayer();
+      IGL_D3D12_LOG_VERBOSE("D3D12Context: Debug layer ENABLED\n");
+
+      // Enable DXGI debug layer if configured
+      if (enableDXGIDebug) {
+        dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG;
+        IGL_D3D12_LOG_VERBOSE("D3D12Context: DXGI debug layer ENABLED\n");
+      }
+
+      // A-007: Enable GPU-Based Validation if configured
+      // WARNING: This significantly impacts performance (10-100x slower)
+      if (enableGPUValidation) {
+        igl::d3d12::ComPtr<ID3D12Debug1> debugController1;
+        if (SUCCEEDED(debugController->QueryInterface(IID_PPV_ARGS(debugController1.GetAddressOf())))) {
+          debugController1->SetEnableGPUBasedValidation(TRUE);
+          IGL_D3D12_LOG_VERBOSE("D3D12Context: GPU-Based Validation ENABLED (may slow down rendering 10-100x)\n");
+        } else {
+          IGL_LOG_ERROR("D3D12Context: Failed to enable GPU-Based Validation (requires ID3D12Debug1)\n");
+        }
+      }
+    } else {
+      IGL_LOG_ERROR("D3D12Context: Failed to get D3D12 debug interface - Graphics Tools may not be installed\n");
+    }
+  } else {
+    IGL_D3D12_LOG_VERBOSE("D3D12Context: Debug layer DISABLED\n");
+  }
+
+  // A-007: Enable DRED if configured (Device Removed Extended Data for better crash diagnostics)
+  if (enableDRED) {
+    igl::d3d12::ComPtr<ID3D12DeviceRemovedExtendedDataSettings1> dredSettings1;
+    if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(dredSettings1.GetAddressOf())))) {
+      dredSettings1->SetAutoBreadcrumbsEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON);
+      dredSettings1->SetPageFaultEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON);
+      dredSettings1->SetBreadcrumbContextEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON);
+      IGL_D3D12_LOG_VERBOSE("D3D12Context: DRED 1.2 fully configured (breadcrumbs + page faults + context)\n");
+    } else {
+      IGL_LOG_ERROR("D3D12Context: Failed to configure DRED (requires Windows 10 19041+)\n");
+    }
+  }
+
+  // Create DXGI factory with debug flag in debug builds.
+  HRESULT hr = CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(dxgiFactory_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12Context: Failed to create DXGI factory (HRESULT: 0x%08X)\n", static_cast<unsigned>(hr));
+    IGL_DEBUG_ASSERT(false);
+    return Result(Result::Code::RuntimeError, "Failed to create DXGI factory");
+  }
+
+  // A-011: Enumerate and select best adapter
+  Result enumResult = enumerateAndSelectAdapter();
+  if (!enumResult.isOk()) {
+    return enumResult;
+  }
+
+  // A-012: Detect memory budget
+  detectMemoryBudget();
+
+  // Create D3D12 device on selected adapter
+  hr = D3D12CreateDevice(
+      adapter_.Get(),
+      selectedFeatureLevel_,
+      IID_PPV_ARGS(device_.GetAddressOf()));
+
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12CreateDevice failed on selected adapter: 0x%08X\n", static_cast<unsigned>(hr));
+    IGL_DEBUG_ASSERT(false);
+    return Result(Result::Code::RuntimeError, "Failed to create D3D12 device on selected adapter");
+  }
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Device created with Feature Level %s\n",
+               featureLevelToString(selectedFeatureLevel_));
+
+  // A-007: Setup info queue with configurable break-on-severity settings
+  if (enableDebugLayer) {
+    igl::d3d12::ComPtr<ID3D12InfoQueue> infoQueue;
+    if (SUCCEEDED(device_->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) {
+      // A-007: Configure break-on-severity based on environment variables
+      infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, TRUE);  // Always break on corruption
+      infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, breakOnError ? TRUE : FALSE);
+      infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, breakOnWarning ? TRUE : FALSE);
+
+      // Filter out INFO messages and a small set of known performance-only
+      // clear warnings that are expected in this backend (no functional
+      // impact). Do NOT filter invalid-shader-bytecode or signature messages so
+      // that pipeline creation problems surface clearly during debugging.
+      D3D12_MESSAGE_SEVERITY severities[] = {
+        D3D12_MESSAGE_SEVERITY_INFO
+      };
+
+      // Filter out only clear-value performance hints (IDs 820/821) and the
+      // known PS float-to-uint RT bitcast warning (677). All other message IDs
+      // (including invalid shader bytecode or unparseable signatures) are kept.
+      D3D12_MESSAGE_ID denyIds[] = {
+        static_cast<D3D12_MESSAGE_ID>(820),  // ClearRenderTargetView w/o optimized clear value
+        static_cast<D3D12_MESSAGE_ID>(821),  // ClearDepthStencilView clear value mismatch
+        static_cast<D3D12_MESSAGE_ID>(677)   // PS float output to UINT RT (bitcast)
+      };
+
+      D3D12_INFO_QUEUE_FILTER filter = {};
+      filter.DenyList.NumSeverities = 1;
+      filter.DenyList.pSeverityList = severities;
+      filter.DenyList.NumIDs = static_cast<UINT>(std::size(denyIds));
+      filter.DenyList.pIDList = denyIds;
+      infoQueue->PushStorageFilter(&filter);
+
+      IGL_D3D12_LOG_VERBOSE("D3D12Context: Info queue configured (Corruption=BREAK, Error=%s, Warning=%s)\n",
+                   breakOnError ? "BREAK" : "LOG", breakOnWarning ? "BREAK" : "LOG");
+    }
+  }
+
+  // Query root signature capabilities.
+  // This is critical for Tier-1 devices which don't support unbounded descriptor ranges.
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Querying root signature capabilities...\n");
+
+  // Query highest supported root signature version
+  D3D12_FEATURE_DATA_ROOT_SIGNATURE featureDataRootSig = {};
+  featureDataRootSig.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_1;
+
+  hr = device_->CheckFeatureSupport(
+      D3D12_FEATURE_ROOT_SIGNATURE,
+      &featureDataRootSig,
+      sizeof(featureDataRootSig));
+
+  if (SUCCEEDED(hr)) {
+    highestRootSignatureVersion_ = featureDataRootSig.HighestVersion;
+    IGL_D3D12_LOG_VERBOSE("  Highest Root Signature Version: %s\n",
+                 highestRootSignatureVersion_ == D3D_ROOT_SIGNATURE_VERSION_1_1 ? "1.1" : "1.0");
+  } else {
+    // If query fails, assume v1.0 (most conservative)
+    highestRootSignatureVersion_ = D3D_ROOT_SIGNATURE_VERSION_1_0;
+    IGL_D3D12_LOG_VERBOSE("  Root Signature query failed (assuming v1.0)\n");
+  }
+
+  // Query resource binding tier
+  D3D12_FEATURE_DATA_D3D12_OPTIONS options = {};
+  hr = device_->CheckFeatureSupport(
+      D3D12_FEATURE_D3D12_OPTIONS,
+      &options,
+      sizeof(options));
+
+  if (SUCCEEDED(hr)) {
+    resourceBindingTier_ = options.ResourceBindingTier;
+    const char* tierName = "Unknown";
+    switch (resourceBindingTier_) {
+      case D3D12_RESOURCE_BINDING_TIER_1: tierName = "Tier 1 (bounded descriptors required)"; break;
+      case D3D12_RESOURCE_BINDING_TIER_2: tierName = "Tier 2 (unbounded arrays except samplers)"; break;
+      case D3D12_RESOURCE_BINDING_TIER_3: tierName = "Tier 3 (fully unbounded)"; break;
+    }
+    IGL_D3D12_LOG_VERBOSE("  Resource Binding Tier: %s\n", tierName);
+  } else {
+    // If query fails, assume Tier 1 (most conservative)
+    resourceBindingTier_ = D3D12_RESOURCE_BINDING_TIER_1;
+    IGL_D3D12_LOG_VERBOSE("  Resource Binding Tier query failed (assuming Tier 1)\n");
+  }
+
+  // Query shader model support with progressive fallback (A-005)
+  // This is critical for FL11 hardware which only supports SM 5.1, not SM 6.0+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Querying shader model capabilities for Feature Level %d.%d...\n",
+               (selectedFeatureLevel_ >> 12) & 0xF, (selectedFeatureLevel_ >> 8) & 0xF);
+
+  // Helper to map feature level to expected minimum shader model
+  auto getMinShaderModelForFeatureLevel = [](D3D_FEATURE_LEVEL fl) -> D3D_SHADER_MODEL {
+    switch (fl) {
+      case D3D_FEATURE_LEVEL_12_2:
+        return D3D_SHADER_MODEL_6_6;  // FL 12.2 supports SM 6.6+
+      case D3D_FEATURE_LEVEL_12_1:
+        return D3D_SHADER_MODEL_6_1;  // FL 12.1 supports SM 6.1 (mesh shaders)
+      case D3D_FEATURE_LEVEL_12_0:
+        return D3D_SHADER_MODEL_6_0;  // FL 12.0 supports SM 6.0 (wave operations)
+      case D3D_FEATURE_LEVEL_11_1:
+      case D3D_FEATURE_LEVEL_11_0:
+        return D3D_SHADER_MODEL_5_1;  // FL 11.x only supports SM 5.1
+      default:
+        return D3D_SHADER_MODEL_5_1;  // Conservative fallback
+    }
+  };
+
+  auto shaderModelToString = [](D3D_SHADER_MODEL sm) -> const char* {
+    switch (sm) {
+      case D3D_SHADER_MODEL_6_6: return "6.6";
+      case D3D_SHADER_MODEL_6_5: return "6.5";
+      case D3D_SHADER_MODEL_6_4: return "6.4";
+      case D3D_SHADER_MODEL_6_3: return "6.3";
+      case D3D_SHADER_MODEL_6_2: return "6.2";
+      case D3D_SHADER_MODEL_6_1: return "6.1";
+      case D3D_SHADER_MODEL_6_0: return "6.0";
+      case D3D_SHADER_MODEL_5_1: return "5.1";
+      default: return "Unknown";
+    }
+  };
+
+  // Shader models to attempt, from highest to lowest
+  const D3D_SHADER_MODEL shaderModels[] = {
+      D3D_SHADER_MODEL_6_6,
+      D3D_SHADER_MODEL_6_5,
+      D3D_SHADER_MODEL_6_4,
+      D3D_SHADER_MODEL_6_3,
+      D3D_SHADER_MODEL_6_2,
+      D3D_SHADER_MODEL_6_1,
+      D3D_SHADER_MODEL_6_0,
+      D3D_SHADER_MODEL_5_1,
+  };
+
+  D3D_SHADER_MODEL detectedShaderModel = D3D_SHADER_MODEL_5_1;
+  bool shaderModelDetected = false;
+
+  // Try each shader model from highest to lowest
+  for (D3D_SHADER_MODEL sm : shaderModels) {
+    D3D12_FEATURE_DATA_SHADER_MODEL shaderModelData = { sm };
+    hr = device_->CheckFeatureSupport(
+        D3D12_FEATURE_SHADER_MODEL,
+        &shaderModelData,
+        sizeof(shaderModelData));
+
+    if (SUCCEEDED(hr)) {
+      detectedShaderModel = shaderModelData.HighestShaderModel;
+      shaderModelDetected = true;
+      IGL_D3D12_LOG_VERBOSE("  Detected Shader Model: %s\n", shaderModelToString(detectedShaderModel));
+      break;  // Found highest supported, stop trying
+    } else {
+      IGL_D3D12_LOG_VERBOSE("  Shader Model %s not supported, trying lower version\n",
+                   shaderModelToString(sm));
+    }
+  }
+
+  if (!shaderModelDetected) {
+    // Fallback based on feature level
+    D3D_SHADER_MODEL minimumSM = getMinShaderModelForFeatureLevel(selectedFeatureLevel_);
+    IGL_D3D12_LOG_VERBOSE("  WARNING: Shader model detection failed, using minimum for Feature Level: %s\n",
+                    shaderModelToString(minimumSM));
+    detectedShaderModel = minimumSM;
+  }
+
+  // Validate shader model is appropriate for feature level
+  D3D_SHADER_MODEL minimumRequired = getMinShaderModelForFeatureLevel(selectedFeatureLevel_);
+  if (detectedShaderModel < minimumRequired) {
+    IGL_D3D12_LOG_VERBOSE("  WARNING: Detected Shader Model %s is below minimum for Feature Level: %s\n",
+                    shaderModelToString(detectedShaderModel),
+                    shaderModelToString(minimumRequired));
+  }
+
+  maxShaderModel_ = detectedShaderModel;
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Final Shader Model selected: %s\n", shaderModelToString(maxShaderModel_));
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Root signature capabilities detected successfully\n");
+
+  return Result();
+}
+
+// A-011: Enumerate and select best adapter
+Result D3D12Context::enumerateAndSelectAdapter() {
+  enumeratedAdapters_.clear();
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Enumerating DXGI adapters...\n");
+
+  // Try IDXGIFactory6 first for high-performance GPU preference
+  igl::d3d12::ComPtr<IDXGIFactory6> factory6;
+  (void)dxgiFactory_->QueryInterface(IID_PPV_ARGS(factory6.GetAddressOf()));
+
+  if (factory6.Get()) {
+    for (UINT i = 0; ; ++i) {
+      igl::d3d12::ComPtr<IDXGIAdapter1> adapter;
+      if (FAILED(factory6->EnumAdapterByGpuPreference(i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
+                                                      IID_PPV_ARGS(adapter.GetAddressOf())))) {
+        break;
+      }
+
+      AdapterInfo info{};
+      info.adapter = adapter;
+      info.index = i;
+      info.isWarp = false;
+
+      adapter->GetDesc1(&info.desc);
+
+      // Skip software adapters in main enumeration (we'll add WARP separately)
+      if (info.desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
+        continue;
+      }
+
+      // Determine feature level
+      info.featureLevel = getHighestFeatureLevel(adapter.Get());
+      if (info.featureLevel == static_cast<D3D_FEATURE_LEVEL>(0)) {
+        IGL_D3D12_LOG_VERBOSE("D3D12Context: Adapter %u does not support D3D12 (skipping)\n", i);
+        continue;
+      }
+
+      enumeratedAdapters_.push_back(info);
+
+      // Log adapter details
+      IGL_D3D12_LOG_VERBOSE("D3D12Context: Adapter %u:\n", i);
+      IGL_D3D12_LOG_VERBOSE("  Description: %ls\n", info.desc.Description);
+      IGL_D3D12_LOG_VERBOSE("  Vendor ID: 0x%04X (%s)\n", info.desc.VendorId, info.getVendorName());
+      IGL_D3D12_LOG_VERBOSE("  Device ID: 0x%04X\n", info.desc.DeviceId);
+      IGL_D3D12_LOG_VERBOSE("  Dedicated VRAM: %llu MB\n", info.getDedicatedVideoMemoryMB());
+      IGL_D3D12_LOG_VERBOSE("  Shared System Memory: %llu MB\n", info.desc.SharedSystemMemory / (1024 * 1024));
+      IGL_D3D12_LOG_VERBOSE("  Feature Level: %s\n", featureLevelToString(info.featureLevel));
+      IGL_D3D12_LOG_VERBOSE("  LUID: 0x%08X:0x%08X\n", info.desc.AdapterLuid.HighPart, info.desc.AdapterLuid.LowPart);
+    }
+  }
+
+  // Fallback enumeration if Factory6 not available
+  if (enumeratedAdapters_.empty()) {
+    for (UINT i = 0; ; ++i) {
+      igl::d3d12::ComPtr<IDXGIAdapter1> adapter;
+      if (dxgiFactory_->EnumAdapters1(i, adapter.GetAddressOf()) == DXGI_ERROR_NOT_FOUND) {
+        break;
+      }
+
+      AdapterInfo info{};
+      info.adapter = adapter;
+      info.index = i;
+      info.isWarp = false;
+
+      adapter->GetDesc1(&info.desc);
+
+      // Skip software adapters
+      if (info.desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
+        continue;
+      }
+
+      // Determine feature level
+      info.featureLevel = getHighestFeatureLevel(adapter.Get());
+      if (info.featureLevel == static_cast<D3D_FEATURE_LEVEL>(0)) {
+        continue;
+      }
+
+      enumeratedAdapters_.push_back(info);
+
+      // Log adapter details
+      IGL_D3D12_LOG_VERBOSE("D3D12Context: Adapter %u:\n", i);
+      IGL_D3D12_LOG_VERBOSE("  Description: %ls\n", info.desc.Description);
+      IGL_D3D12_LOG_VERBOSE("  Vendor ID: 0x%04X (%s)\n", info.desc.VendorId, info.getVendorName());
+      IGL_D3D12_LOG_VERBOSE("  Device ID: 0x%04X\n", info.desc.DeviceId);
+      IGL_D3D12_LOG_VERBOSE("  Dedicated VRAM: %llu MB\n", info.getDedicatedVideoMemoryMB());
+      IGL_D3D12_LOG_VERBOSE("  Shared System Memory: %llu MB\n", info.desc.SharedSystemMemory / (1024 * 1024));
+      IGL_D3D12_LOG_VERBOSE("  Feature Level: %s\n", featureLevelToString(info.featureLevel));
+    }
+  }
+
+  // Add WARP adapter as fallback option (software rasterizer)
+  igl::d3d12::ComPtr<IDXGIAdapter> warpAdapter;
+  if (SUCCEEDED(dxgiFactory_->EnumWarpAdapter(IID_PPV_ARGS(warpAdapter.GetAddressOf())))) {
+    igl::d3d12::ComPtr<IDXGIAdapter1> warpAdapter1;
+    if (SUCCEEDED(warpAdapter->QueryInterface(IID_PPV_ARGS(warpAdapter1.GetAddressOf())))) {
+      AdapterInfo warpInfo{};
+      warpInfo.adapter = warpAdapter1;
+      warpInfo.index = static_cast<uint32_t>(enumeratedAdapters_.size());
+      warpInfo.isWarp = true;
+
+      warpAdapter1->GetDesc1(&warpInfo.desc);
+      warpInfo.featureLevel = getHighestFeatureLevel(warpAdapter1.Get());
+
+      enumeratedAdapters_.push_back(warpInfo);
+
+      IGL_D3D12_LOG_VERBOSE("D3D12Context: WARP Adapter (Software):\n");
+      IGL_D3D12_LOG_VERBOSE("  Description: %ls\n", warpInfo.desc.Description);
+      IGL_D3D12_LOG_VERBOSE("  Feature Level: %s\n", featureLevelToString(warpInfo.featureLevel));
+    }
+  }
+
+  if (enumeratedAdapters_.empty()) {
+    IGL_LOG_ERROR("D3D12Context: No compatible D3D12 adapters found!\n");
+    IGL_DEBUG_ASSERT(false);
+    return Result(Result::Code::RuntimeError, "No D3D12-compatible adapters available");
+  }
+
+  // Select adapter based on environment variable or heuristic
+  selectedAdapterIndex_ = 0;  // Default to first adapter (discrete GPU on laptops)
+
+  char adapterEnv[64] = {};
+  DWORD envResult = GetEnvironmentVariableA("IGL_D3D12_ADAPTER", adapterEnv, sizeof(adapterEnv));
+  if (envResult > 0 && envResult < sizeof(adapterEnv)) {
+    if (strcmp(adapterEnv, "WARP") == 0) {
+      // Find WARP adapter
+      for (size_t i = 0; i < enumeratedAdapters_.size(); ++i) {
+        if (enumeratedAdapters_[i].isWarp) {
+          selectedAdapterIndex_ = static_cast<uint32_t>(i);
+          IGL_D3D12_LOG_VERBOSE("D3D12Context: Environment override - using WARP adapter\n");
+          break;
+        }
+      }
+    } else {
+      // Parse adapter index
+      int requestedIndex = atoi(adapterEnv);
+      if (requestedIndex >= 0 && requestedIndex < static_cast<int>(enumeratedAdapters_.size())) {
+        selectedAdapterIndex_ = static_cast<uint32_t>(requestedIndex);
+        IGL_D3D12_LOG_VERBOSE("D3D12Context: Environment override - using adapter %d\n", requestedIndex);
+      } else {
+        IGL_LOG_ERROR("D3D12Context: Invalid adapter index %d (available: 0-%zu)\n",
+                      requestedIndex, enumeratedAdapters_.size() - 1);
+      }
+    }
+  } else {
+    // Heuristic: Choose adapter with highest feature level and most VRAM
+    D3D_FEATURE_LEVEL highestFL = enumeratedAdapters_[0].featureLevel;
+    uint64_t largestVRAM = enumeratedAdapters_[0].getDedicatedVideoMemoryMB();
+
+    for (size_t i = 1; i < enumeratedAdapters_.size(); ++i) {
+      if (enumeratedAdapters_[i].isWarp) {
+        continue;  // Skip WARP for automatic selection
+      }
+
+      uint64_t vram = enumeratedAdapters_[i].getDedicatedVideoMemoryMB();
+      D3D_FEATURE_LEVEL fl = enumeratedAdapters_[i].featureLevel;
+
+      // Prefer higher feature level, or same feature level with more VRAM
+      if (fl > highestFL || (fl == highestFL && vram > largestVRAM)) {
+        selectedAdapterIndex_ = static_cast<uint32_t>(i);
+        highestFL = fl;
+        largestVRAM = vram;
+      }
+    }
+  }
+
+  adapter_ = enumeratedAdapters_[selectedAdapterIndex_].adapter;
+  selectedFeatureLevel_ = enumeratedAdapters_[selectedAdapterIndex_].featureLevel;
+
+  // T44: Concise single-line adapter log at INFO level (matches Vulkan/Metal minimalism)
+  const auto& selected = enumeratedAdapters_[selectedAdapterIndex_];
+  IGL_LOG_INFO("D3D12 Adapter: %ls (FL %s, %llu MB VRAM)\n",
+               selected.desc.Description,
+               featureLevelToString(selectedFeatureLevel_),
+               selected.getDedicatedVideoMemoryMB());
+
+  // Verbose: Detailed adapter info (vendor, device ID, LUID, etc.)
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Selected adapter %u: %ls (FL %s)\n",
+               selectedAdapterIndex_,
+               selected.desc.Description,
+               featureLevelToString(selectedFeatureLevel_));
+
+  return Result();
+}
+
+// A-012: Detect memory budget from selected adapter
+void D3D12Context::detectMemoryBudget() {
+  if (selectedAdapterIndex_ >= enumeratedAdapters_.size()) {
+    IGL_LOG_ERROR("D3D12Context: No adapter selected for memory budget detection\n");
+    return;
+  }
+
+  const auto& selectedAdapter = enumeratedAdapters_[selectedAdapterIndex_];
+
+  memoryBudget_.dedicatedVideoMemory = selectedAdapter.desc.DedicatedVideoMemory;
+  memoryBudget_.sharedSystemMemory = selectedAdapter.desc.SharedSystemMemory;
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: GPU Memory Budget:\n");
+  IGL_D3D12_LOG_VERBOSE("  Dedicated Video Memory: %.2f MB\n",
+               memoryBudget_.dedicatedVideoMemory / (1024.0 * 1024.0));
+  IGL_D3D12_LOG_VERBOSE("  Shared System Memory: %.2f MB\n",
+               memoryBudget_.sharedSystemMemory / (1024.0 * 1024.0));
+  IGL_D3D12_LOG_VERBOSE("  Total Available: %.2f MB\n",
+               memoryBudget_.totalAvailableMemory() / (1024.0 * 1024.0));
+
+  // Recommend conservative budget (80% of available)
+  uint64_t recommendedBudget = static_cast<uint64_t>(memoryBudget_.totalAvailableMemory() * 0.8);
+  IGL_D3D12_LOG_VERBOSE("  Recommended Budget (80%%): %.2f MB\n",
+               recommendedBudget / (1024.0 * 1024.0));
+}
+
+// A-010: Detect HDR output capabilities
+void D3D12Context::detectHDRCapabilities() {
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Detecting HDR output capabilities...\n");
+
+  // Reset to defaults
+  hdrCapabilities_ = HDRCapabilities{};
+
+  // Need a valid swapchain to query output
+  if (!swapChain_.Get()) {
+    IGL_D3D12_LOG_VERBOSE("  No swapchain available, HDR detection skipped\n");
+    return;
+  }
+
+  // Get the output (monitor) containing the swapchain
+  igl::d3d12::ComPtr<IDXGIOutput> output;
+  HRESULT hr = swapChain_->GetContainingOutput(output.GetAddressOf());
+  if (FAILED(hr)) {
+    IGL_D3D12_LOG_VERBOSE("  Failed to get containing output (0x%08X), HDR not available\n", static_cast<unsigned>(hr));
+    return;
+  }
+
+  // Query for IDXGIOutput6 (required for HDR queries)
+  igl::d3d12::ComPtr<IDXGIOutput6> output6;
+  hr = output->QueryInterface(IID_PPV_ARGS(output6.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_D3D12_LOG_VERBOSE("  IDXGIOutput6 not available (needs Windows 10 1703+), HDR not supported\n");
+    return;
+  }
+
+  // Get output description with color space info
+  DXGI_OUTPUT_DESC1 outputDesc = {};
+  hr = output6->GetDesc1(&outputDesc);
+  if (FAILED(hr)) {
+    IGL_D3D12_LOG_VERBOSE("  Failed to get output description (0x%08X)\n", static_cast<unsigned>(hr));
+    return;
+  }
+
+  // Store native color space
+  hdrCapabilities_.nativeColorSpace = outputDesc.ColorSpace;
+
+  // Store luminance information
+  hdrCapabilities_.maxLuminance = outputDesc.MaxLuminance;
+  hdrCapabilities_.minLuminance = outputDesc.MinLuminance;
+  hdrCapabilities_.maxFullFrameLuminance = outputDesc.MaxFullFrameLuminance;
+
+  IGL_D3D12_LOG_VERBOSE("  Native Color Space: %u\n", outputDesc.ColorSpace);
+  IGL_D3D12_LOG_VERBOSE("  Max Luminance: %.2f nits\n", outputDesc.MaxLuminance);
+  IGL_D3D12_LOG_VERBOSE("  Min Luminance: %.4f nits\n", outputDesc.MinLuminance);
+  IGL_D3D12_LOG_VERBOSE("  Max Full Frame Luminance: %.2f nits\n", outputDesc.MaxFullFrameLuminance);
+
+  // Check for HDR10 support (BT.2020 ST2084 - PQ curve) via swapchain
+  UINT colorSpaceSupport = 0;
+  hr = swapChain_->CheckColorSpaceSupport(DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020, &colorSpaceSupport);
+  if (SUCCEEDED(hr) && (colorSpaceSupport & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT)) {
+    hdrCapabilities_.hdrSupported = true;
+    IGL_D3D12_LOG_VERBOSE("  HDR10 (BT.2020 PQ): SUPPORTED\n");
+  } else {
+    IGL_D3D12_LOG_VERBOSE("  HDR10 (BT.2020 PQ): NOT SUPPORTED\n");
+  }
+
+  // Check for scRGB support (linear floating-point HDR)
+  hr = swapChain_->CheckColorSpaceSupport(DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709, &colorSpaceSupport);
+  if (SUCCEEDED(hr) && (colorSpaceSupport & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT)) {
+    hdrCapabilities_.scRGBSupported = true;
+    IGL_D3D12_LOG_VERBOSE("  scRGB (Linear FP16): SUPPORTED\n");
+  } else {
+    IGL_D3D12_LOG_VERBOSE("  scRGB (Linear FP16): NOT SUPPORTED\n");
+  }
+
+  // Summary
+  if (hdrCapabilities_.hdrSupported || hdrCapabilities_.scRGBSupported) {
+    IGL_D3D12_LOG_VERBOSE("D3D12Context: HDR output AVAILABLE (max %.0f nits)\n", outputDesc.MaxLuminance);
+  } else {
+    IGL_D3D12_LOG_VERBOSE("D3D12Context: HDR output NOT AVAILABLE (SDR display)\n");
+  }
+}
+
+Result D3D12Context::createCommandQueue() {
+  D3D12_COMMAND_QUEUE_DESC queueDesc = {};
+  queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
+  queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
+
+  HRESULT hr = device_->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(commandQueue_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12Context: Failed to create command queue (HRESULT: 0x%08X)\n", static_cast<unsigned>(hr));
+    IGL_DEBUG_ASSERT(false);
+    return Result(Result::Code::RuntimeError, "Failed to create command queue");
+  }
+
+  return Result();
+}
+
+Result D3D12Context::createSwapChain(HWND hwnd, uint32_t width, uint32_t height) {
+  DXGI_SWAP_CHAIN_DESC1 swapChainDesc = {};
+  swapChainDesc.Width = width;
+  swapChainDesc.Height = height;
+  // Use BGRA_UNORM (non-sRGB) for maximum compatibility with all display adapters
+  // Vulkan baselines use BGRA channel ordering for swapchain and MRT targets
+  swapChainDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
+  swapChainDesc.Stereo = FALSE;
+  swapChainDesc.SampleDesc.Count = 1;
+  swapChainDesc.SampleDesc.Quality = 0;
+  swapChainDesc.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
+  swapChainDesc.BufferCount = config_.maxFramesInFlight;  // T43: use configured buffer count
+  swapChainDesc.Scaling = DXGI_SCALING_STRETCH;
+  swapChainDesc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
+  swapChainDesc.AlphaMode = DXGI_ALPHA_MODE_UNSPECIFIED;
+
+  // Query tearing support capability (required for variable refresh rate displays)
+  // This capability must be queried before creating the swapchain
+  BOOL allowTearing = FALSE;
+  igl::d3d12::ComPtr<IDXGIFactory5> factory5;
+  if (SUCCEEDED(dxgiFactory_.Get()->QueryInterface(IID_PPV_ARGS(factory5.GetAddressOf())))) {
+    if (SUCCEEDED(factory5->CheckFeatureSupport(DXGI_FEATURE_PRESENT_ALLOW_TEARING,
+                                                &allowTearing,
+                                                sizeof(allowTearing)))) {
+      tearingSupported_ = (allowTearing == TRUE);
+      if (tearingSupported_) {
+        IGL_D3D12_LOG_VERBOSE("D3D12Context: Tearing support available (variable refresh rate)\n");
+      }
+    }
+  }
+
+  // Set swapchain tearing flag if supported (required to use DXGI_PRESENT_ALLOW_TEARING)
+  // Without this flag, using DXGI_PRESENT_ALLOW_TEARING in Present() is invalid
+  swapChainDesc.Flags = tearingSupported_ ? DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING : 0;
+
+  igl::d3d12::ComPtr<IDXGISwapChain1> tempSwapChain;
+  HRESULT hr = dxgiFactory_->CreateSwapChainForHwnd(
+      commandQueue_.Get(),
+      hwnd,
+      &swapChainDesc,
+      nullptr,
+      nullptr,
+      tempSwapChain.GetAddressOf()
+  );
+
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("CreateSwapChainForHwnd failed: 0x%08X, trying legacy CreateSwapChain\n", (unsigned)hr);
+    // Fallback: legacy CreateSwapChain
+    DXGI_SWAP_CHAIN_DESC legacy = {};
+    legacy.BufferDesc.Width = width;
+    legacy.BufferDesc.Height = height;
+    legacy.BufferDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
+    legacy.BufferDesc.RefreshRate.Numerator = 60;
+    legacy.BufferDesc.RefreshRate.Denominator = 1;
+    legacy.SampleDesc.Count = 1;
+    legacy.SampleDesc.Quality = 0;
+    legacy.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
+    legacy.BufferCount = config_.maxFramesInFlight;  // T43: use configured buffer count
+    legacy.OutputWindow = hwnd;
+    legacy.Windowed = TRUE;
+    legacy.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
+    legacy.Flags = 0;
+
+    igl::d3d12::ComPtr<IDXGISwapChain> legacySwap;
+    HRESULT hr2 = dxgiFactory_->CreateSwapChain(commandQueue_.Get(), &legacy, legacySwap.GetAddressOf());
+    if (FAILED(hr2)) {
+      IGL_LOG_ERROR("D3D12Context: Failed to create swapchain (hr=0x%08X / 0x%08X)\n", (unsigned)hr, (unsigned)hr2);
+      IGL_DEBUG_ASSERT(false);
+      return Result(Result::Code::RuntimeError, "Failed to create swapchain");
+    }
+    // Try to QI to IDXGISwapChain3
+    hr2 = legacySwap->QueryInterface(IID_PPV_ARGS(swapChain_.GetAddressOf()));
+    if (FAILED(hr2)) {
+      IGL_LOG_ERROR("D3D12Context: Failed to query IDXGISwapChain3 (hr=0x%08X)\n", (unsigned)hr2);
+      IGL_DEBUG_ASSERT(false);
+      return Result(Result::Code::RuntimeError, "Failed to query IDXGISwapChain3");
+    }
+    return Result();
+  }
+
+  // Cast to IDXGISwapChain3
+  hr = tempSwapChain->QueryInterface(IID_PPV_ARGS(swapChain_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12Context: Failed to query IDXGISwapChain3 interface (HRESULT: 0x%08X)\n", static_cast<unsigned>(hr));
+    IGL_DEBUG_ASSERT(false);
+    return Result(Result::Code::RuntimeError, "Failed to query IDXGISwapChain3 interface");
+  }
+
+  // A-009: Verify swapchain actually supports tearing after creation
+  if (tearingSupported_) {
+    DXGI_SWAP_CHAIN_DESC1 actualDesc = {};
+    hr = swapChain_->GetDesc1(&actualDesc);
+    if (SUCCEEDED(hr)) {
+      const bool actualTearingFlag = (actualDesc.Flags & DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING) != 0;
+      const bool actualWindowedMode = (actualDesc.SwapEffect == DXGI_SWAP_EFFECT_FLIP_DISCARD ||
+                                        actualDesc.SwapEffect == DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL);
+
+      if (!actualTearingFlag) {
+        IGL_D3D12_LOG_VERBOSE("D3D12Context: Tearing flag was NOT set on swapchain (downgraded by driver)\n");
+        tearingSupported_ = false;
+      } else if (!actualWindowedMode) {
+        IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain not in flip mode (tearing requires flip model)\n");
+        tearingSupported_ = false;
+      } else {
+        IGL_D3D12_LOG_VERBOSE("D3D12Context: Tearing verified on swapchain (windowed flip model + tearing flag)\n");
+      }
+    } else {
+      IGL_D3D12_LOG_VERBOSE("D3D12Context: Failed to verify swapchain desc, assuming tearing unavailable\n");
+      tearingSupported_ = false;
+    }
+  }
+
+  // A-010: Detect HDR capabilities now that swapchain is created
+  detectHDRCapabilities();
+
+  // Query swapchain buffer count for dynamic frame management (T43)
+  DXGI_SWAP_CHAIN_DESC1 swapDesc = {};
+  hr = swapChain_->GetDesc1(&swapDesc);
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12Context: Failed to query swapchain description (HRESULT: 0x%08X)\n", static_cast<unsigned>(hr));
+    IGL_DEBUG_ASSERT(false);
+    return Result(Result::Code::RuntimeError, "Failed to query swapchain description");
+  }
+
+  swapchainBufferCount_ = swapDesc.BufferCount;
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Swapchain created with %u buffers\n", swapchainBufferCount_);
+
+  // Resize frame management arrays to match swapchain buffer count
+  renderTargets_.resize(swapchainBufferCount_);
+  frameContexts_.resize(swapchainBufferCount_);
+
+  return Result();
+}
+
+Result D3D12Context::createRTVHeap() {
+  D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
+  heapDesc.NumDescriptors = swapchainBufferCount_;  // Use queried buffer count
+  heapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
+  heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
+
+  HRESULT hr = device_->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(rtvHeap_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("D3D12Context: Failed to create RTV heap (HRESULT: 0x%08X)\n", static_cast<unsigned>(hr));
+    IGL_DEBUG_ASSERT(false);
+    return Result(Result::Code::RuntimeError, "Failed to create RTV heap");
+  }
+
+  rtvDescriptorSize_ = device_->GetDescriptorHandleIncrementSize(
+      D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
+
+  return Result();
+}
+
+Result D3D12Context::createBackBuffers() {
+  D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle = rtvHeap_->GetCPUDescriptorHandleForHeapStart();
+
+  for (UINT i = 0; i < swapchainBufferCount_; i++) {  // Use queried buffer count
+    HRESULT hr = swapChain_->GetBuffer(i, IID_PPV_ARGS(renderTargets_[i].GetAddressOf()));
+    if (FAILED(hr)) {
+      IGL_LOG_ERROR("D3D12Context: Failed to get swapchain buffer %u (HRESULT: 0x%08X)\n", i, static_cast<unsigned>(hr));
+      IGL_DEBUG_ASSERT(false);
+      return Result(Result::Code::RuntimeError, "Failed to get swapchain buffer");
+    }
+
+    // Pre-creation validation.
+    IGL_DEBUG_ASSERT(device_.Get() != nullptr, "Device is null before CreateRenderTargetView");
+    IGL_DEBUG_ASSERT(renderTargets_[i].Get() != nullptr, "Swapchain buffer is null");
+    IGL_DEBUG_ASSERT(rtvHandle.ptr != 0, "RTV descriptor handle is invalid");
+
+    device_->CreateRenderTargetView(renderTargets_[i].Get(), nullptr, rtvHandle);
+    rtvHandle.ptr += rtvDescriptorSize_;
+  }
+
+  return Result();
+}
+
+Result D3D12Context::createDescriptorHeaps() {
+  // Cache descriptor sizes
+  cbvSrvUavDescriptorSize_ = device_->GetDescriptorHandleIncrementSize(
+      D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+  samplerDescriptorSize_ = device_->GetDescriptorHandleIncrementSize(
+      D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
+
+  // Create per-frame shader-visible descriptor heaps using configurable sizes.
+  // Each frame gets its own isolated heaps to prevent descriptor conflicts between frames.
+  // Use pre-allocation with fail-fast on exhaustion (Vulkan pattern, no dynamic growth).
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating per-frame descriptor heaps with fail-fast allocation...\n");
+  IGL_D3D12_LOG_VERBOSE("  Config: bufferCount=%u, samplerHeapSize=%u, "
+               "descriptorsPerPage=%u, maxHeapPages=%u, preAllocate=%s\n",
+               swapchainBufferCount_, config_.samplerHeapSize,
+               config_.descriptorsPerPage, config_.maxHeapPages,
+               config_.preAllocateDescriptorPages ? "true" : "false");
+
+  for (UINT i = 0; i < swapchainBufferCount_; i++) {
+    // CBV/SRV/UAV heap: pre-allocate pages based on the configuration policy.
+    // When preAllocateDescriptorPages is true, allocate all maxHeapPages upfront
+    // to prevent mid-frame allocation and descriptor invalidation (Vulkan fail-fast pattern).
+    {
+      frameContexts_[i].cbvSrvUavHeapPages.clear();
+      frameContexts_[i].currentCbvSrvUavPageIndex = 0;
+
+      const uint32_t pagesToAllocate = config_.preAllocateDescriptorPages ? config_.maxHeapPages : 1;
+
+      for (uint32_t pageIdx = 0; pageIdx < pagesToAllocate; ++pageIdx) {
+        igl::d3d12::ComPtr<ID3D12DescriptorHeap> heap;
+        Result result = allocateDescriptorHeapPage(
+            D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV,
+            config_.descriptorsPerPage,
+            &heap);
+
+        if (!result.isOk()) {
+          IGL_LOG_ERROR("D3D12Context: Failed to create CBV/SRV/UAV heap page %u for frame %u: %s\n",
+                        pageIdx, i, result.message.c_str());
+          IGL_DEBUG_ASSERT(false);
+          return result;
+        }
+
+        frameContexts_[i].cbvSrvUavHeapPages.emplace_back(heap, config_.descriptorsPerPage);
+      }
+
+      const uint32_t allocatedDescriptors = pagesToAllocate * config_.descriptorsPerPage;
+      if (config_.preAllocateDescriptorPages) {
+        IGL_D3D12_LOG_VERBOSE("  Frame %u: Pre-allocated %u CBV/SRV/UAV heap pages (%u descriptors, fail-fast on exhaustion)\n",
+                     i, pagesToAllocate, allocatedDescriptors);
+      } else {
+        IGL_D3D12_LOG_VERBOSE("  Frame %u: Allocated %u CBV/SRV/UAV heap page (%u descriptors, fail-fast on exhaustion)\n",
+                     i, pagesToAllocate, allocatedDescriptors);
+      }
+    }
+
+    // Sampler heap: samplerHeapSize descriptors
+    {
+      D3D12_DESCRIPTOR_HEAP_DESC desc = {};
+      desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+      desc.NumDescriptors = config_.samplerHeapSize;  // T14: Use configurable size
+      desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+      desc.NodeMask = 0;
+
+      HRESULT hr = device_->CreateDescriptorHeap(&desc,
+          IID_PPV_ARGS(frameContexts_[i].samplerHeap.GetAddressOf()));
+      if (FAILED(hr)) {
+        IGL_LOG_ERROR("D3D12Context: Failed to create per-frame Sampler heap for frame %u (HRESULT: 0x%08X)\n", i, static_cast<unsigned>(hr));
+        IGL_DEBUG_ASSERT(false);
+        return Result(Result::Code::RuntimeError, "Failed to create per-frame Sampler heap for frame " + std::to_string(i));
+      }
+      IGL_D3D12_LOG_VERBOSE("  Frame %u: Created Sampler heap (%u descriptors)\n", i, config_.samplerHeapSize);
+    }
+  }
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Per-frame descriptor heaps created successfully\n");
+  // Memory calculation reflects actual pre-allocation (no dynamic growth).
+  const uint32_t pagesPerFrame = config_.preAllocateDescriptorPages ? config_.maxHeapPages : 1;
+  const uint32_t cbvSrvUavDescriptors = config_.descriptorsPerPage * pagesPerFrame;
+  const uint32_t totalDescriptorsPerFrame = cbvSrvUavDescriptors + config_.samplerHeapSize;
+  const uint32_t totalMemoryKB = (swapchainBufferCount_ * totalDescriptorsPerFrame * 32) / 1024;
+  IGL_D3D12_LOG_VERBOSE("  Allocated memory: %u frames * (%u CBV/SRV/UAV + %u Samplers) * 32 bytes = %u KB\n",
+               swapchainBufferCount_, cbvSrvUavDescriptors,
+               config_.samplerHeapSize, totalMemoryKB);
+
+  IGL_D3D12_LOG_VERBOSE("D3D12Context: Creating descriptor heap manager...\n");
+
+  // Create descriptor heap manager using configuration values.
+  DescriptorHeapManager::Sizes sizes{};
+  sizes.cbvSrvUav = 256;  // For CPU-visible staging (not used for shader-visible).
+  sizes.samplers = 16;    // For CPU-visible staging (not used for shader-visible).
+  sizes.rtvs = config_.rtvHeapSize;
+  sizes.dsvs = config_.dsvHeapSize;
+
+  ownedHeapMgr_ = new DescriptorHeapManager();
+  Result result = ownedHeapMgr_->initialize(device_.Get(), sizes);
+  if (!result.isOk()) {
+    IGL_LOG_ERROR("D3D12Context: Failed to initialize descriptor heap manager: %s\n",
+                  result.message.c_str());
+    delete ownedHeapMgr_;
+    ownedHeapMgr_ = nullptr;
+  } else {
+    heapMgr_ = ownedHeapMgr_;
+    IGL_D3D12_LOG_VERBOSE("D3D12Context: Descriptor heap manager created successfully\n");
+  }
+
+  return Result();
+}
+
+Result D3D12Context::createCommandSignatures() {
+  // Create command signature for DrawInstanced (multiDrawIndirect)
+  // D3D12_DRAW_ARGUMENTS: { VertexCountPerInstance, InstanceCount, StartVertexLocation, StartInstanceLocation }
+  {
+    D3D12_INDIRECT_ARGUMENT_DESC drawArg = {};
+    drawArg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW;
+
+    D3D12_COMMAND_SIGNATURE_DESC drawSigDesc = {};
+    drawSigDesc.ByteStride = sizeof(D3D12_DRAW_ARGUMENTS);  // 16 bytes (4 x UINT)
+    drawSigDesc.NumArgumentDescs = 1;
+    drawSigDesc.pArgumentDescs = &drawArg;
+    drawSigDesc.NodeMask = 0;
+
+    HRESULT hr = device_->CreateCommandSignature(
+        &drawSigDesc,
+        nullptr,  // No root signature needed for simple draw commands
+        IID_PPV_ARGS(drawIndirectSignature_.GetAddressOf()));
+
+    if (FAILED(hr)) {
+      IGL_LOG_ERROR("D3D12Context: Failed to create draw indirect command signature (HRESULT: 0x%08X)\n", static_cast<unsigned>(hr));
+      IGL_DEBUG_ASSERT(false);
+      return Result(Result::Code::RuntimeError, "Failed to create draw indirect command signature");
+    }
+    IGL_D3D12_LOG_VERBOSE("D3D12Context: Created draw indirect command signature (stride: %u bytes)\n",
+                 drawSigDesc.ByteStride);
+  }
+
+  // Create command signature for DrawIndexedInstanced (multiDrawIndexedIndirect)
+  // D3D12_DRAW_INDEXED_ARGUMENTS: { IndexCountPerInstance, InstanceCount, StartIndexLocation, BaseVertexLocation, StartInstanceLocation }
+  {
+    D3D12_INDIRECT_ARGUMENT_DESC drawIndexedArg = {};
+    drawIndexedArg.Type = D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED;
+
+    D3D12_COMMAND_SIGNATURE_DESC drawIndexedSigDesc = {};
+    drawIndexedSigDesc.ByteStride = sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);  // 20 bytes (5 x UINT)
+    drawIndexedSigDesc.NumArgumentDescs = 1;
+    drawIndexedSigDesc.pArgumentDescs = &drawIndexedArg;
+    drawIndexedSigDesc.NodeMask = 0;
+
+    HRESULT hr = device_->CreateCommandSignature(
+        &drawIndexedSigDesc,
+        nullptr,  // No root signature needed for simple draw commands
+        IID_PPV_ARGS(drawIndexedIndirectSignature_.GetAddressOf()));
+
+    if (FAILED(hr)) {
+      IGL_LOG_ERROR("D3D12Context: Failed to create draw indexed indirect command signature (HRESULT: 0x%08X)\n", static_cast<unsigned>(hr));
+      IGL_DEBUG_ASSERT(false);
+      return Result(Result::Code::RuntimeError, "Failed to create draw indexed indirect command signature");
+    }
+    IGL_D3D12_LOG_VERBOSE("D3D12Context: Created draw indexed indirect command signature (stride: %u bytes)\n",
+                 drawIndexedSigDesc.ByteStride);
+  }
+
+  return Result();
+}
+
+uint32_t D3D12Context::getCurrentBackBufferIndex() const {
+  if (swapChain_.Get() == nullptr) {
+    return 0;
+  }
+  return swapChain_->GetCurrentBackBufferIndex();
+}
+
+ID3D12Resource* D3D12Context::getCurrentBackBuffer() const {
+  uint32_t index = getCurrentBackBufferIndex();
+  if (index >= swapchainBufferCount_) {
+    IGL_LOG_ERROR("getCurrentBackBuffer(): index %u >= swapchainBufferCount %u\n", index, swapchainBufferCount_);
+    return nullptr;
+  }
+
+  ID3D12Resource* resource = renderTargets_[index].Get();
+  IGL_D3D12_LOG_VERBOSE("getCurrentBackBuffer(): index=%u, resource=%p\n", index, (void*)resource);
+  return resource;
+}
+
+D3D12_CPU_DESCRIPTOR_HANDLE D3D12Context::getCurrentRTV() const {
+  if (rtvHeap_.Get() == nullptr) {
+    return {0};
+  }
+  D3D12_CPU_DESCRIPTOR_HANDLE rtv = rtvHeap_->GetCPUDescriptorHandleForHeapStart();
+  rtv.ptr += getCurrentBackBufferIndex() * rtvDescriptorSize_;
+  return rtv;
+}
+
+void D3D12Context::waitForGPU() {
+  if (!fence_.Get() || !commandQueue_.Get()) {
+    return;
+  }
+
+  // Signal and increment the fence value
+  const UINT64 fenceToWaitFor = ++fenceValue_;
+  commandQueue_->Signal(fence_.Get(), fenceToWaitFor);
+
+  // Wait until the fence is crossed using FenceWaiter (TOCTOU-safe)
+  FenceWaiter waiter(fence_.Get(), fenceToWaitFor);
+  Result waitResult = waiter.wait(INFINITE);
+  if (!waitResult.isOk()) {
+    IGL_LOG_ERROR("D3D12Context::waitForGPU() - Fence wait failed: %s (fence=%llu)\n",
+                  waitResult.message.c_str(), fenceToWaitFor);
+  }
+}
+
+void D3D12Context::trackResourceCreation(const char* type, size_t sizeBytes) {
+  std::lock_guard<std::mutex> lock(resourceStatsMutex_);
+  if (strcmp(type, "Buffer") == 0) {
+    resourceStats_.totalBuffersCreated++;
+    resourceStats_.bufferMemoryBytes += sizeBytes;
+  } else if (strcmp(type, "Texture") == 0) {
+    resourceStats_.totalTexturesCreated++;
+    resourceStats_.textureMemoryBytes += sizeBytes;
+  } else if (strcmp(type, "SRV") == 0) {
+    resourceStats_.totalSRVsCreated++;
+  } else if (strcmp(type, "Sampler") == 0) {
+    resourceStats_.totalSamplersCreated++;
+  }
+}
+
+void D3D12Context::trackResourceDestruction(const char* type, size_t sizeBytes) {
+  std::lock_guard<std::mutex> lock(resourceStatsMutex_);
+  if (strcmp(type, "Buffer") == 0) {
+    resourceStats_.totalBuffersDestroyed++;
+    resourceStats_.bufferMemoryBytes -= sizeBytes;
+  } else if (strcmp(type, "Texture") == 0) {
+    resourceStats_.totalTexturesDestroyed++;
+    resourceStats_.textureMemoryBytes -= sizeBytes;
+  }
+}
+
+void D3D12Context::logResourceStats() {
+  std::lock_guard<std::mutex> lock(resourceStatsMutex_);
+  IGL_D3D12_LOG_VERBOSE("=== D3D12 Resource Statistics ===\n");
+  IGL_D3D12_LOG_VERBOSE("  Buffers: %zu created, %zu destroyed (leaked: %zd)\n",
+               resourceStats_.totalBuffersCreated,
+               resourceStats_.totalBuffersDestroyed,
+               (int64_t)resourceStats_.totalBuffersCreated - (int64_t)resourceStats_.totalBuffersDestroyed);
+  IGL_D3D12_LOG_VERBOSE("  Textures: %zu created, %zu destroyed (leaked: %zd)\n",
+               resourceStats_.totalTexturesCreated,
+               resourceStats_.totalTexturesDestroyed,
+               (int64_t)resourceStats_.totalTexturesCreated - (int64_t)resourceStats_.totalTexturesDestroyed);
+  IGL_D3D12_LOG_VERBOSE("  SRVs created: %zu\n", resourceStats_.totalSRVsCreated);
+  IGL_D3D12_LOG_VERBOSE("  Samplers created: %zu\n", resourceStats_.totalSamplersCreated);
+  IGL_D3D12_LOG_VERBOSE("  Buffer memory: %.2f MB\n", resourceStats_.bufferMemoryBytes / (1024.0 * 1024.0));
+  IGL_D3D12_LOG_VERBOSE("  Texture memory: %.2f MB\n", resourceStats_.textureMemoryBytes / (1024.0 * 1024.0));
+  IGL_D3D12_LOG_VERBOSE("==================================\n");
+}
+
+// Allocate a new descriptor heap page for dynamic growth.
+Result D3D12Context::allocateDescriptorHeapPage(
+    D3D12_DESCRIPTOR_HEAP_TYPE type,
+    uint32_t numDescriptors,
+    igl::d3d12::ComPtr<ID3D12DescriptorHeap>* outHeap) {
+  if (!device_.Get()) {
+    return Result{Result::Code::RuntimeError, "Device is null"};
+  }
+
+  D3D12_DESCRIPTOR_HEAP_DESC heapDesc = {};
+  heapDesc.Type = type;
+  heapDesc.NumDescriptors = numDescriptors;
+  heapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+  heapDesc.NodeMask = 0;
+
+  HRESULT hr = device_->CreateDescriptorHeap(&heapDesc, IID_PPV_ARGS(outHeap->GetAddressOf()));
+  if (FAILED(hr)) {
+    char errorMsg[256];
+    snprintf(errorMsg, sizeof(errorMsg),
+             "Failed to create descriptor heap page (type=%d, numDescriptors=%u): HRESULT=0x%08X",
+             static_cast<int>(type), numDescriptors, static_cast<unsigned>(hr));
+    return Result{Result::Code::RuntimeError, errorMsg};
+  }
+
+  return Result{};
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12Context.h b/src/igl/d3d12/D3D12Context.h
new file mode 100644
index 0000000000..7ad8873167
--- /dev/null
+++ b/src/igl/d3d12/D3D12Context.h
@@ -0,0 +1,423 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12Headers.h>
+#include <memory>
+#include <mutex>
+
+namespace igl {
+class IBuffer; // Forward declaration for igl::IBuffer
+}
+
+namespace igl::d3d12 {
+
+class DescriptorHeapManager; // fwd decl in igl::d3d12
+
+/**
+ * @brief Descriptor heap page for dynamic multi-page growth
+ *
+ * Part of Strategy 1 (Transient Descriptor Allocator) architecture.
+ * See D3D12ResourcesBinder.h for full architecture documentation.
+ *
+ * Following Microsoft MiniEngine's DynamicDescriptorHeap pattern:
+ * - Start with 1 page of 1024 descriptors per frame
+ * - Grow to up to 16 pages (16,384 descriptors) on-demand
+ * - Reset all counters at frame boundary (no deallocation needed)
+ */
+struct DescriptorHeapPage {
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> heap;
+  uint32_t capacity;  // Total descriptors in this page
+  uint32_t used;      // Currently allocated descriptors
+
+  DescriptorHeapPage() : capacity(0), used(0) {}
+  DescriptorHeapPage(igl::d3d12::ComPtr<ID3D12DescriptorHeap> h, uint32_t cap)
+      : heap(h), capacity(cap), used(0) {}
+};
+
+/**
+ * @brief Per-frame context for CPU/GPU parallelism and descriptor management
+ *
+ * ============================================================================
+ * ARCHITECTURE: Strategy 1 - Transient Descriptor Allocator
+ * ============================================================================
+ *
+ * FrameContext implements the per-frame descriptor heap management system
+ * (Strategy 1 in D3D12ResourcesBinder.h architecture).
+ *
+ * **Key Design Decisions**:
+ * - 3 frames in flight: Prevents CPU/GPU stalls while enabling triple buffering
+ * - Per-frame isolation: Each frame gets independent descriptor heaps
+ * - Shared across command buffers: ALL command buffers in a frame share these heaps
+ * - Linear allocation: O(1) descriptor allocation with simple counter increment
+ * - Frame-boundary reset: Counters reset to 0, no per-descriptor deallocation
+ * - Dynamic growth: CBV/SRV/UAV heaps can grow from 1 to 16 pages on-demand
+ *
+ * **Descriptor Heap Layout**:
+ * - CBV/SRV/UAV: Multi-page array (1024 descriptors/page, up to 16 pages = 16K total)
+ * - Samplers: Single heap (2048 descriptors, D3D12 spec limit, no growth)
+ *
+ * **Access Pattern**:
+ * - CommandBuffer::getNextCbvSrvUavDescriptor() - allocates from current page
+ * - CommandBuffer::allocateCbvSrvUavRange() - allocates contiguous range
+ * - CommandBuffer::getNextSamplerDescriptor() - returns reference for increment
+ * - FrameManager::resetDescriptorCounters() - resets at frame boundary
+ *
+ * **Performance Characteristics**:
+ * - Allocation: O(1) with occasional page growth (O(n) for page vector resize)
+ * - Deallocation: None (bulk reset at frame boundary)
+ * - Memory: ~4MB worst case per frame (16 pages * 1024 descriptors * 32 bytes/descriptor)
+ *
+ * For architecture overview, see D3D12ResourcesBinder.h documentation.
+ */
+struct FrameContext {
+  igl::d3d12::ComPtr<ID3D12CommandAllocator> allocator;
+  UINT64 fenceValue = 0;  // First fence signaled this frame (backward compatibility)
+
+  // D-002: Track maximum fence value of ALL command lists using this allocator
+  // CRITICAL: Allocator can only be reset when GPU completes maxAllocatorFence
+  UINT64 maxAllocatorFence = 0;
+
+  // D-002: Count command buffers submitted with this allocator (telemetry)
+  uint32_t commandBufferCount = 0;
+
+  // Per-frame shader-visible descriptor heaps (following Microsoft MiniEngine pattern).
+  // Supports multiple pages for dynamic growth to prevent overflow and corruption.
+  // Each frame gets its own isolated heap pages to prevent descriptor conflicts.
+  std::vector<DescriptorHeapPage> cbvSrvUavHeapPages;  // Dynamic array of 1024-descriptor pages
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> samplerHeap;     // 2048 descriptors (kMaxSamplers)
+
+  // Current active page index for CBV/SRV/UAV allocation.
+  uint32_t currentCbvSrvUavPageIndex = 0;
+
+  // Track the currently active shader-visible heap for command list binding.
+  // This is updated when allocating new pages and must be rebound to the command list.
+  // This heap is returned by D3D12Context::getCbvSrvUavHeap() for binding.
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> activeCbvSrvUavHeap;
+
+  // Linear allocator counters - reset to 0 each frame
+  // Incremented by each command buffer's encoders as they allocate descriptors
+  uint32_t nextCbvSrvUavDescriptor = 0;
+  uint32_t nextSamplerDescriptor = 0;
+
+  // Transient resources that must be kept alive until this frame completes GPU execution
+  // Examples: push constant buffers, temporary upload buffers
+  // CRITICAL: These are cleared when we advance to the next frame AFTER waiting for
+  // this frame's fence, ensuring the GPU has finished reading them
+  std::vector<std::shared_ptr<igl::IBuffer>> transientBuffers;
+  std::vector<igl::d3d12::ComPtr<ID3D12Resource>> transientResources;
+
+  // Telemetry for transient resource tracking.
+  // Tracks high-water mark to observe peak usage and detect unbounded growth.
+  size_t transientBuffersHighWater = 0;
+  size_t transientResourcesHighWater = 0;
+
+  // Telemetry for descriptor heap usage tracking.
+  // Tracks peak descriptor usage per frame to detect heap overflow risks.
+  uint32_t peakCbvSrvUavUsage = 0;
+  uint32_t peakSamplerUsage = 0;
+};
+
+class D3D12Context {
+ public:
+  // A-011: Multi-adapter enumeration and tracking
+  struct AdapterInfo {
+    igl::d3d12::ComPtr<IDXGIAdapter1> adapter;
+    DXGI_ADAPTER_DESC1 desc;
+    D3D_FEATURE_LEVEL featureLevel;
+    bool isWarp;  // Software rasterizer
+    uint32_t index;  // Original enumeration index
+
+    // Helper methods
+    uint64_t getDedicatedVideoMemoryMB() const;
+    const char* getVendorName() const;
+  };
+
+  // A-012: Memory budget tracking
+  struct MemoryBudget {
+    uint64_t dedicatedVideoMemory = 0;  // Dedicated GPU memory (bytes)
+    uint64_t sharedSystemMemory = 0;     // Shared system memory accessible to GPU (bytes)
+    uint64_t estimatedUsage = 0;         // Current estimated usage by this device (bytes)
+    uint64_t userDefinedBudgetLimit = 0; // Optional soft limit
+
+    uint64_t totalAvailableMemory() const;
+    double getUsagePercentage() const;
+    bool isMemoryCritical() const;
+    bool isMemoryLow() const;
+  };
+
+  // A-010: HDR output capabilities
+  struct HDRCapabilities {
+    bool hdrSupported = false;           // HDR10 support
+    bool scRGBSupported = false;          // scRGB (FP16) support
+    DXGI_COLOR_SPACE_TYPE nativeColorSpace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709;  // SDR default
+    float maxLuminance = 80.0f;          // Max luminance in nits (SDR default)
+    float minLuminance = 0.0f;           // Min luminance in nits
+    float maxFullFrameLuminance = 80.0f; // Max full-frame luminance in nits
+  };
+
+  D3D12Context() = default;
+  ~D3D12Context();
+
+  // initialize() accepts optional D3D12ContextConfig for configurable sizes.
+  Result initialize(HWND hwnd, uint32_t width, uint32_t height,
+                   const D3D12ContextConfig& config = D3D12ContextConfig::defaultConfig());
+  Result resize(uint32_t width, uint32_t height);
+
+  ID3D12Device* getDevice() const { return device_.Get(); }
+  ID3D12CommandQueue* getCommandQueue() const { return commandQueue_.Get(); }
+  IDXGISwapChain3* getSwapChain() const { return swapChain_.Get(); }
+
+  // Get currently active CBV/SRV/UAV descriptor heap for current frame.
+  // Returns the active heap used for descriptor allocation. Use this for heap binding.
+  // For multi-page access or diagnostics, use getFrameContexts().
+  ID3D12DescriptorHeap* getCbvSrvUavHeap() const {
+    const auto& frameCtx = frameContexts_[currentFrameIndex_];
+    return frameCtx.activeCbvSrvUavHeap.Get();
+  }
+  ID3D12DescriptorHeap* getSamplerHeap() const {
+    return frameContexts_[currentFrameIndex_].samplerHeap.Get();
+  }
+
+  // Allocate a new descriptor heap page for dynamic growth.
+  Result allocateDescriptorHeapPage(D3D12_DESCRIPTOR_HEAP_TYPE type,
+                                     uint32_t numDescriptors,
+                                     igl::d3d12::ComPtr<ID3D12DescriptorHeap>* outHeap);
+
+  // Get descriptor sizes
+  UINT getCbvSrvUavDescriptorSize() const { return cbvSrvUavDescriptorSize_; }
+  UINT getSamplerDescriptorSize() const { return samplerDescriptorSize_; }
+
+  // Get root signature capabilities
+  D3D_ROOT_SIGNATURE_VERSION getHighestRootSignatureVersion() const { return highestRootSignatureVersion_; }
+  D3D12_RESOURCE_BINDING_TIER getResourceBindingTier() const { return resourceBindingTier_; }
+
+  // Get shader model capability.
+  D3D_SHADER_MODEL getMaxShaderModel() const { return maxShaderModel_; }
+
+  // Get selected feature level (A-004, A-005)
+  D3D_FEATURE_LEVEL getSelectedFeatureLevel() const { return selectedFeatureLevel_; }
+
+  // Get tearing support capability
+  bool isTearingSupported() const { return tearingSupported_; }
+
+  // Get command signatures for indirect drawing.
+  ID3D12CommandSignature* getDrawIndirectSignature() const { return drawIndirectSignature_.Get(); }
+  ID3D12CommandSignature* getDrawIndexedIndirectSignature() const { return drawIndexedIndirectSignature_.Get(); }
+
+  // Get descriptor handles from per-frame heaps using the current page for multi-heap support.
+  D3D12_CPU_DESCRIPTOR_HANDLE getCbvSrvUavCpuHandle(uint32_t descriptorIndex) const {
+    const auto& frameCtx = frameContexts_[currentFrameIndex_];
+    const auto& pages = frameCtx.cbvSrvUavHeapPages;
+    const uint32_t pageIdx = frameCtx.currentCbvSrvUavPageIndex;
+
+    if (pages.empty() || pageIdx >= pages.size()) {
+      return {0};  // Invalid handle
+    }
+
+    auto h = pages[pageIdx].heap->GetCPUDescriptorHandleForHeapStart();
+    h.ptr += descriptorIndex * cbvSrvUavDescriptorSize_;
+    return h;
+  }
+
+  D3D12_GPU_DESCRIPTOR_HANDLE getCbvSrvUavGpuHandle(uint32_t descriptorIndex) const {
+    const auto& frameCtx = frameContexts_[currentFrameIndex_];
+    const auto& pages = frameCtx.cbvSrvUavHeapPages;
+    const uint32_t pageIdx = frameCtx.currentCbvSrvUavPageIndex;
+
+    if (pages.empty() || pageIdx >= pages.size()) {
+      return {0};  // Invalid handle
+    }
+
+    auto h = pages[pageIdx].heap->GetGPUDescriptorHandleForHeapStart();
+    h.ptr += descriptorIndex * cbvSrvUavDescriptorSize_;
+    return h;
+  }
+
+  D3D12_CPU_DESCRIPTOR_HANDLE getSamplerCpuHandle(uint32_t descriptorIndex) const {
+    auto h = frameContexts_[currentFrameIndex_].samplerHeap->GetCPUDescriptorHandleForHeapStart();
+    h.ptr += descriptorIndex * samplerDescriptorSize_;
+    return h;
+  }
+
+  D3D12_GPU_DESCRIPTOR_HANDLE getSamplerGpuHandle(uint32_t descriptorIndex) const {
+    auto h = frameContexts_[currentFrameIndex_].samplerHeap->GetGPUDescriptorHandleForHeapStart();
+    h.ptr += descriptorIndex * samplerDescriptorSize_;
+    return h;
+  }
+
+  // Optional descriptor heap manager (provided by headless context)
+  DescriptorHeapManager* getDescriptorHeapManager() const { return heapMgr_; }
+
+  uint32_t getCurrentBackBufferIndex() const;
+  ID3D12Resource* getCurrentBackBuffer() const;
+  D3D12_CPU_DESCRIPTOR_HANDLE getCurrentRTV() const;
+
+  void waitForGPU();
+
+  // Per-frame fence access for CommandQueue
+  FrameContext* getFrameContexts() { return frameContexts_.data(); }
+  UINT& getCurrentFrameIndex() { return currentFrameIndex_; }
+  UINT getSwapchainBufferCount() const { return swapchainBufferCount_; }
+  UINT64& getFenceValue() { return fenceValue_; }
+  ID3D12Fence* getFence() const { return fence_.Get(); }
+
+  // Resource tracking for diagnostics
+  static void trackResourceCreation(const char* type, size_t sizeBytes);
+  static void trackResourceDestruction(const char* type, size_t sizeBytes);
+  static void logResourceStats();
+
+  // A-011: Adapter enumeration and selection
+  const std::vector<AdapterInfo>& getEnumeratedAdapters() const { return enumeratedAdapters_; }
+  const AdapterInfo* getSelectedAdapter() const {
+    if (selectedAdapterIndex_ < enumeratedAdapters_.size()) {
+      return &enumeratedAdapters_[selectedAdapterIndex_];
+    }
+    return nullptr;
+  }
+  uint32_t getSelectedAdapterIndex() const { return selectedAdapterIndex_; }
+
+  // A-012: Memory budget tracking
+  MemoryBudget getMemoryBudget() const {
+    std::lock_guard<std::mutex> lock(memoryTrackingMutex_);
+    return memoryBudget_;
+  }
+
+  double getMemoryUsagePercentage() const {
+    std::lock_guard<std::mutex> lock(memoryTrackingMutex_);
+    return memoryBudget_.getUsagePercentage();
+  }
+
+  bool isMemoryLow() const {
+    std::lock_guard<std::mutex> lock(memoryTrackingMutex_);
+    return memoryBudget_.isMemoryLow();
+  }
+
+  bool isMemoryCritical() const {
+    std::lock_guard<std::mutex> lock(memoryTrackingMutex_);
+    return memoryBudget_.isMemoryCritical();
+  }
+
+  void updateMemoryUsage(int64_t delta) {
+    std::lock_guard<std::mutex> lock(memoryTrackingMutex_);
+    uint64_t newUsage = memoryBudget_.estimatedUsage;
+    if (delta < 0) {
+      uint64_t absDelta = static_cast<uint64_t>(-delta);
+      newUsage = (absDelta > newUsage) ? 0 : (newUsage - absDelta);
+    } else {
+      newUsage += static_cast<uint64_t>(delta);
+    }
+    memoryBudget_.estimatedUsage = newUsage;
+  }
+
+  // A-010: HDR output capabilities
+  const HDRCapabilities& getHDRCapabilities() const { return hdrCapabilities_; }
+  bool isHDRSupported() const { return hdrCapabilities_.hdrSupported; }
+
+  // Accessor for configuration (sizes, frame buffering, etc.).
+  const D3D12ContextConfig& getConfig() const { return config_; }
+
+ protected:
+  [[nodiscard]] Result createDevice();
+  [[nodiscard]] Result createCommandQueue();
+  [[nodiscard]] Result createSwapChain(HWND hwnd, uint32_t width, uint32_t height);
+  Result recreateSwapChain(uint32_t width, uint32_t height);
+  [[nodiscard]] Result createRTVHeap();
+  [[nodiscard]] Result createBackBuffers();
+  [[nodiscard]] Result createDescriptorHeaps();
+  [[nodiscard]] Result createCommandSignatures();
+
+  // A-011: Adapter enumeration
+  [[nodiscard]] Result enumerateAndSelectAdapter();
+  static D3D_FEATURE_LEVEL getHighestFeatureLevel(IDXGIAdapter1* adapter);
+
+  // A-012: Memory budget detection
+  void detectMemoryBudget();
+
+  // A-010: HDR output detection
+  void detectHDRCapabilities();
+
+  igl::d3d12::ComPtr<IDXGIFactory4> dxgiFactory_;
+  igl::d3d12::ComPtr<IDXGIAdapter1> adapter_;
+  igl::d3d12::ComPtr<ID3D12Device> device_;
+  igl::d3d12::ComPtr<ID3D12CommandQueue> commandQueue_;
+  igl::d3d12::ComPtr<IDXGISwapChain3> swapChain_;
+  UINT swapchainBufferCount_ = 0;  // Queried from swapchain, replaces kMaxFramesInFlight
+
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> rtvHeap_;
+  std::vector<igl::d3d12::ComPtr<ID3D12Resource>> renderTargets_;  // Sized to swapchainBufferCount_
+  UINT rtvDescriptorSize_ = 0;
+
+  // Descriptor sizes (cached from device)
+  UINT cbvSrvUavDescriptorSize_ = 0;
+  UINT samplerDescriptorSize_ = 0;
+
+  // Feature detection for root signature capabilities
+  D3D_ROOT_SIGNATURE_VERSION highestRootSignatureVersion_ = D3D_ROOT_SIGNATURE_VERSION_1_0;
+  D3D12_RESOURCE_BINDING_TIER resourceBindingTier_ = D3D12_RESOURCE_BINDING_TIER_1;
+
+  // Feature detection for device feature level (A-004)
+  D3D_FEATURE_LEVEL selectedFeatureLevel_ = D3D_FEATURE_LEVEL_11_0;
+
+  // Feature detection for shader model.
+  // DXC requires SM 6.0 minimum (SM 5.x deprecated).
+  D3D_SHADER_MODEL maxShaderModel_ = D3D_SHADER_MODEL_6_0;
+
+  // Feature detection for variable refresh rate (tearing) support
+  bool tearingSupported_ = false;
+
+  // A-011: Multi-adapter tracking (structs defined in public section)
+  std::vector<AdapterInfo> enumeratedAdapters_;
+  uint32_t selectedAdapterIndex_ = 0;
+
+  // A-012: Memory budget tracking (struct defined in public section)
+  MemoryBudget memoryBudget_;
+  mutable std::mutex memoryTrackingMutex_;
+
+  // A-010: HDR output capabilities (struct defined in public section)
+  HDRCapabilities hdrCapabilities_;
+
+  // Command signatures for indirect drawing.
+  igl::d3d12::ComPtr<ID3D12CommandSignature> drawIndirectSignature_;
+  igl::d3d12::ComPtr<ID3D12CommandSignature> drawIndexedIndirectSignature_;
+
+  // Descriptor heap manager for headless contexts (unit tests)
+  DescriptorHeapManager* ownedHeapMgr_ = nullptr;  // Owned manager for windowed contexts (raw ptr, manually deleted)
+  DescriptorHeapManager* heapMgr_ = nullptr; // non-owning; points to ownedHeapMgr_ or external (headless)
+
+  // Per-frame synchronization for CPU/GPU parallelism
+  std::vector<FrameContext> frameContexts_;  // Sized to swapchainBufferCount_
+  UINT currentFrameIndex_ = 0;
+
+  // Global synchronization
+  igl::d3d12::ComPtr<ID3D12Fence> fence_;
+  UINT64 fenceValue_ = 0;
+
+  uint32_t width_ = 0;
+  uint32_t height_ = 0;
+
+  // Configuration for customizable sizes.
+  D3D12ContextConfig config_;
+
+  // Resource tracking (static for global tracking across all contexts)
+  struct ResourceStats {
+    size_t totalBuffersCreated = 0;
+    size_t totalBuffersDestroyed = 0;
+    size_t totalTexturesCreated = 0;
+    size_t totalTexturesDestroyed = 0;
+    size_t totalSRVsCreated = 0;
+    size_t totalSamplersCreated = 0;
+    size_t bufferMemoryBytes = 0;
+    size_t textureMemoryBytes = 0;
+  };
+  static ResourceStats resourceStats_;
+  static std::mutex resourceStatsMutex_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12DeviceCapabilities.cpp b/src/igl/d3d12/D3D12DeviceCapabilities.cpp
new file mode 100644
index 0000000000..cccd98eec1
--- /dev/null
+++ b/src/igl/d3d12/D3D12DeviceCapabilities.cpp
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12DeviceCapabilities.h>
+
+#include <igl/d3d12/D3D12Context.h>
+
+namespace igl::d3d12 {
+
+void D3D12DeviceCapabilities::initialize(D3D12Context& ctx) {
+  validateDeviceLimits(ctx);
+}
+
+void D3D12DeviceCapabilities::validateDeviceLimits(D3D12Context& ctx) {
+  auto* device = ctx.getDevice();
+  if (!device) {
+    IGL_LOG_ERROR("D3D12DeviceCapabilities::validateDeviceLimits: D3D12 device is null\n");
+    return;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("=== D3D12 Device Capabilities and Limits Validation ===\n");
+
+  // Query D3D12_FEATURE_D3D12_OPTIONS for resource binding tier and other capabilities
+  HRESULT hr =
+      device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS, &deviceOptions_, sizeof(deviceOptions_));
+
+  if (SUCCEEDED(hr)) {
+    // Log resource binding tier
+    const char* tierName = "Unknown";
+    switch (deviceOptions_.ResourceBindingTier) {
+    case D3D12_RESOURCE_BINDING_TIER_1:
+      tierName = "Tier 1 (bounded descriptors required)";
+      break;
+    case D3D12_RESOURCE_BINDING_TIER_2:
+      tierName = "Tier 2 (unbounded arrays except samplers)";
+      break;
+    case D3D12_RESOURCE_BINDING_TIER_3:
+      tierName = "Tier 3 (fully unbounded)";
+      break;
+    }
+    IGL_D3D12_LOG_VERBOSE("  Resource Binding Tier: %s\n", tierName);
+
+    // Log other relevant capabilities
+    IGL_D3D12_LOG_VERBOSE("  Standard Swizzle 64KB Supported: %s\n",
+                 deviceOptions_.StandardSwizzle64KBSupported ? "Yes" : "No");
+    IGL_D3D12_LOG_VERBOSE("  Cross-Node Sharing Tier: %d\n", deviceOptions_.CrossNodeSharingTier);
+    IGL_D3D12_LOG_VERBOSE("  Conservative Rasterization Tier: %d\n",
+                 deviceOptions_.ConservativeRasterizationTier);
+  } else {
+    IGL_LOG_ERROR(
+        "  Failed to query D3D12_FEATURE_D3D12_OPTIONS (HRESULT: 0x%08X)\n", hr);
+  }
+
+  // Query D3D12_FEATURE_D3D12_OPTIONS1 for root signature version
+  hr = device->CheckFeatureSupport(
+      D3D12_FEATURE_D3D12_OPTIONS1, &deviceOptions1_, sizeof(deviceOptions1_));
+
+  if (SUCCEEDED(hr)) {
+    IGL_D3D12_LOG_VERBOSE("  Wave Intrinsics Supported: %s\n",
+                 deviceOptions1_.WaveOps ? "Yes" : "No");
+    IGL_D3D12_LOG_VERBOSE("  Wave Lane Count Min: %u\n", deviceOptions1_.WaveLaneCountMin);
+    IGL_D3D12_LOG_VERBOSE("  Wave Lane Count Max: %u\n", deviceOptions1_.WaveLaneCountMax);
+    IGL_D3D12_LOG_VERBOSE("  Total Lane Count: %u\n", deviceOptions1_.TotalLaneCount);
+  } else {
+    IGL_D3D12_LOG_VERBOSE(
+        "  D3D12_FEATURE_D3D12_OPTIONS1 query failed (not critical)\n");
+  }
+
+  // The rest of the original validation logic lives in Device::getFeatureLimits()
+  // and related capability queries, so no additional checks are needed here.
+}
+
+} // namespace igl::d3d12
+
diff --git a/src/igl/d3d12/D3D12DeviceCapabilities.h b/src/igl/d3d12/D3D12DeviceCapabilities.h
new file mode 100644
index 0000000000..338bc8f551
--- /dev/null
+++ b/src/igl/d3d12/D3D12DeviceCapabilities.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+class D3D12Context;
+
+class D3D12DeviceCapabilities {
+ public:
+  void initialize(D3D12Context& ctx);
+
+  [[nodiscard]] const D3D12_FEATURE_DATA_D3D12_OPTIONS& getOptions() const {
+    return deviceOptions_;
+  }
+
+  [[nodiscard]] const D3D12_FEATURE_DATA_D3D12_OPTIONS1& getOptions1() const {
+    return deviceOptions1_;
+  }
+
+  [[nodiscard]] D3D12_RESOURCE_BINDING_TIER getResourceBindingTier() const {
+    return deviceOptions_.ResourceBindingTier;
+  }
+
+ private:
+  void validateDeviceLimits(D3D12Context& ctx);
+
+  D3D12_FEATURE_DATA_D3D12_OPTIONS deviceOptions_ = {};
+  D3D12_FEATURE_DATA_D3D12_OPTIONS1 deviceOptions1_ = {};
+};
+
+} // namespace igl::d3d12
+
diff --git a/src/igl/d3d12/D3D12FenceWaiter.cpp b/src/igl/d3d12/D3D12FenceWaiter.cpp
new file mode 100644
index 0000000000..8c018e088b
--- /dev/null
+++ b/src/igl/d3d12/D3D12FenceWaiter.cpp
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12FenceWaiter.h>
+
+namespace igl::d3d12 {
+
+FenceWaiter::FenceWaiter(ID3D12Fence* fence, UINT64 targetValue)
+    : fence_(fence), targetValue_(targetValue) {
+  if (!fence_) {
+    IGL_LOG_ERROR("FenceWaiter: null fence provided\n");
+    setupErrorCode_ = Result::Code::ArgumentNull;
+    setupErrorMessage_ = "Null fence provided to FenceWaiter";
+    return;
+  }
+
+  event_ = CreateEvent(nullptr, FALSE, FALSE, nullptr);
+  if (!event_) {
+    const DWORD lastError = GetLastError();
+    IGL_LOG_ERROR("FenceWaiter: Failed to create event handle (LastError=0x%08X)\n", lastError);
+    setupErrorCode_ = Result::Code::InvalidOperation;
+    char buf[128];
+    snprintf(buf, sizeof(buf), "CreateEvent failed (OS error 0x%08X)", lastError);
+    setupErrorMessage_ = buf;
+    return;
+  }
+
+  HRESULT hr = fence_->SetEventOnCompletion(targetValue_, event_);
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("FenceWaiter: SetEventOnCompletion failed: 0x%08X\n", static_cast<unsigned>(hr));
+    CloseHandle(event_);
+    event_ = nullptr;
+    setupErrorCode_ = Result::Code::InvalidOperation;
+    char buf[128];
+    snprintf(buf, sizeof(buf), "SetEventOnCompletion failed (HRESULT=0x%08X)", static_cast<unsigned>(hr));
+    setupErrorMessage_ = buf;
+    return;
+  }
+
+  setupSucceeded_ = true;
+}
+
+FenceWaiter::~FenceWaiter() {
+  if (event_) {
+    CloseHandle(event_);
+  }
+}
+
+bool FenceWaiter::isComplete() const {
+  return fence_ && fence_->GetCompletedValue() >= targetValue_;
+}
+
+Result FenceWaiter::wait(DWORD timeoutMs) {
+  // Check if setup succeeded (constructor completed event creation and SetEventOnCompletion)
+  if (!setupSucceeded_ || !event_) {
+    return Result(setupErrorCode_, setupErrorMessage_);
+  }
+
+  // D-003: Re-check fence after SetEventOnCompletion to avoid TOCTOU race
+  if (isComplete()) {
+    return Result();  // Already complete, no wait needed
+  }
+
+  DWORD waitResult = WaitForSingleObject(event_, timeoutMs);
+
+  if (waitResult == WAIT_OBJECT_0) {
+    // Verify fence actually reached target value
+    UINT64 completedValue = fence_->GetCompletedValue();
+    if (completedValue < targetValue_) {
+      IGL_LOG_ERROR("FenceWaiter: Wait returned but fence incomplete (expected=%llu, got=%llu)\n",
+                    targetValue_, completedValue);
+
+      // CRITICAL: This indicates a GPU/driver issue (event signaled but fence not updated)
+      // For INFINITE timeout, try bounded recovery; otherwise honor the timeout contract
+      if (timeoutMs == INFINITE) {
+        // Bounded spin as last resort for INFINITE waits only
+        const int maxSpins = 10000;
+        int spins = 0;
+        for (; spins < maxSpins && fence_->GetCompletedValue() < targetValue_; ++spins) {
+          Sleep(1);
+        }
+
+        if (fence_->GetCompletedValue() >= targetValue_) {
+          IGL_D3D12_LOG_VERBOSE("FenceWaiter: Fence completed after %d recovery spins\n", spins);
+          return Result();  // Success after recovery
+        }
+
+        IGL_LOG_ERROR("FenceWaiter: Fence still incomplete after %d bounded spins\n", maxSpins);
+      }
+
+      // Honor timeout contract: event signaled but fence incomplete = failure
+      return Result(Result::Code::RuntimeError,
+                    "Fence incomplete after wait (possible GPU hang or driver issue)");
+    }
+    return Result();  // Success
+  } else if (waitResult == WAIT_TIMEOUT) {
+    const UINT64 completedValue = fence_ ? fence_->GetCompletedValue() : 0;
+    IGL_LOG_ERROR("FenceWaiter: Timeout waiting for fence %llu (completed=%llu)\n",
+                  targetValue_, completedValue);
+    return Result(Result::Code::RuntimeError,
+                  "Fence wait timed out (possible GPU hang)");
+  } else {
+    const DWORD lastError = GetLastError();
+    IGL_LOG_ERROR("FenceWaiter: Wait failed with result 0x%08X (LastError=0x%08X)\n",
+                  waitResult, lastError);
+    char buf[128];
+    snprintf(buf, sizeof(buf), "WaitForSingleObject failed (result=0x%08X, OS error=0x%08X)",
+             waitResult, lastError);
+    return Result(Result::Code::RuntimeError, buf);
+  }
+}
+
+bool FenceWaiter::isTimeoutError(const Result& result) {
+  return !result.isOk() && result.message.find("timed out") != std::string::npos;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12FenceWaiter.h b/src/igl/d3d12/D3D12FenceWaiter.h
new file mode 100644
index 0000000000..cb0c5cf3a1
--- /dev/null
+++ b/src/igl/d3d12/D3D12FenceWaiter.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+/**
+ * @brief RAII helper for D3D12 fence waiting operations
+ *
+ * Manages event creation, SetEventOnCompletion, and proper cleanup.
+ * Eliminates TOCTOU races by rechecking fence after SetEventOnCompletion.
+ *
+ * IMPORTANT: The fence pointer must remain valid for the lifetime of the FenceWaiter.
+ * Typical usage is with fences owned by long-lived context objects.
+ *
+ * Usage:
+ *   FenceWaiter waiter(fence, targetValue);
+ *   Result result = waiter.wait(timeoutMs);
+ *   if (!result.isOk()) {
+ *     // Handle specific error (timeout, setup failure, etc.)
+ *   }
+ */
+class FenceWaiter final {
+ public:
+  FenceWaiter(ID3D12Fence* fence, UINT64 targetValue);
+  ~FenceWaiter();
+
+  // Delete copy/move to ensure single ownership of event handle
+  FenceWaiter(const FenceWaiter&) = delete;
+  FenceWaiter& operator=(const FenceWaiter&) = delete;
+  FenceWaiter(FenceWaiter&&) = delete;
+  FenceWaiter& operator=(FenceWaiter&&) = delete;
+
+  /**
+   * @brief Wait for fence to reach target value with timeout
+   * @param timeoutMs Timeout in milliseconds (INFINITE for no timeout)
+   * @return Result with specific error code and message on failure:
+   *         - ArgumentNull: Null fence provided to constructor
+   *         - InvalidOperation: Event creation or SetEventOnCompletion failed
+   *         - RuntimeError: Wait timed out (use isTimeoutError() to detect)
+   *         - RuntimeError: Wait failed or fence incomplete after event signaled
+   */
+  Result wait(DWORD timeoutMs = INFINITE);
+
+  /**
+   * @brief Check if fence already reached target without waiting
+   */
+  bool isComplete() const;
+
+  /**
+   * @brief Check if a Result represents a timeout error
+   * @param result The Result to check
+   * @return true if the result indicates a timeout, false otherwise
+   */
+  static bool isTimeoutError(const Result& result);
+
+ private:
+  ID3D12Fence* fence_;
+  UINT64 targetValue_;
+  HANDLE event_ = nullptr;
+  bool setupSucceeded_ = false;
+  Result::Code setupErrorCode_ = Result::Code::Ok;
+  std::string setupErrorMessage_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12FrameManager.cpp b/src/igl/d3d12/D3D12FrameManager.cpp
new file mode 100644
index 0000000000..098a8e2f14
--- /dev/null
+++ b/src/igl/d3d12/D3D12FrameManager.cpp
@@ -0,0 +1,242 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12FrameManager.h>
+#include <igl/d3d12/D3D12Context.h>
+#include <igl/d3d12/D3D12FenceWaiter.h>
+
+namespace igl::d3d12 {
+
+void FrameManager::advanceFrame(UINT64 currentFenceValue) {
+  // Calculate next frame index
+  const uint32_t bufferCount = context_.getSwapchainBufferCount();
+  const uint32_t nextFrameIndex = (context_.getCurrentFrameIndex() + 1) % bufferCount;
+
+  // STEP 1: Pipeline overload protection
+  waitForPipelineSync(currentFenceValue);
+
+  // STEP 2: Wait for next frame's resources to be available
+  if (!waitForFrame(nextFrameIndex)) {
+    IGL_LOG_ERROR("FrameManager: Skipping frame advancement due to fence wait failure\n");
+    return;
+  }
+
+  // STEP 3: Advance to next frame
+  context_.getCurrentFrameIndex() = nextFrameIndex;
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("FrameManager: Advanced to frame index %u\n", nextFrameIndex);
+#endif
+
+  // STEP 4: Reset allocator safely
+  resetAllocator(nextFrameIndex);
+
+  // STEP 5: Clear transient resources
+  clearTransientResources(nextFrameIndex);
+
+  // STEP 6: Reset descriptor counters
+  resetDescriptorCounters(nextFrameIndex);
+}
+
+void FrameManager::waitForPipelineSync(UINT64 currentFenceValue) {
+  auto* fence = context_.getFence();
+
+  // Ensure we don't have more frames in flight than swapchain buffers
+  const uint32_t bufferCount = context_.getSwapchainBufferCount();
+  const UINT64 minimumSafeFence = (currentFenceValue >= bufferCount)
+      ? (currentFenceValue - (bufferCount - 1))
+      : 0;
+
+  const UINT64 currentCompletedValue = fence->GetCompletedValue();
+  if (currentCompletedValue < minimumSafeFence) {
+#ifdef IGL_DEBUG
+    IGL_D3D12_LOG_VERBOSE("FrameManager: SAFETY WAIT - Pipeline overload protection (completed=%llu, need=%llu)\n",
+                 currentCompletedValue, minimumSafeFence);
+#endif
+
+    FenceWaiter waiter(fence, minimumSafeFence);
+    Result waitResult = waiter.wait(INFINITE);
+    if (!waitResult.isOk()) {
+      IGL_LOG_ERROR("FrameManager: CRITICAL - Pipeline safety wait failed: %s; continuing but overload protection compromised\n",
+                    waitResult.message.c_str());
+      // Continue anyway - this is a safety net, not a hard requirement
+      // But future work should consider aborting here as well
+    }
+#ifdef IGL_DEBUG
+    else {
+      IGL_D3D12_LOG_VERBOSE("FrameManager: Safety wait completed (fence now=%llu)\n",
+                   fence->GetCompletedValue());
+    }
+#endif
+  }
+}
+
+bool FrameManager::waitForFrame(uint32_t frameIndex) {
+  auto* fence = context_.getFence();
+  const UINT64 frameFence = context_.getFrameContexts()[frameIndex].fenceValue;
+
+  if (frameFence != 0 && fence->GetCompletedValue() < frameFence) {
+#ifdef IGL_DEBUG
+    IGL_D3D12_LOG_VERBOSE("FrameManager: Waiting for frame %u (fence=%llu, current=%llu)\n",
+                 frameIndex, frameFence, fence->GetCompletedValue());
+#endif
+
+    FenceWaiter waiter(fence, frameFence);
+
+    // Try with 5-second timeout first (handles window drag scenarios)
+    Result waitResult = waiter.wait(5000);
+    if (!waitResult.isOk()) {
+      // Check if it's a timeout or other error
+      if (FenceWaiter::isTimeoutError(waitResult)) {
+        IGL_LOG_ERROR("FrameManager: Wait for frame %u fence %llu timed out after 5s; forcing infinite wait\n",
+                      frameIndex, frameFence);
+      } else {
+        IGL_LOG_ERROR("FrameManager: Wait for frame %u fence %llu failed: %s; forcing infinite wait\n",
+                      frameIndex, frameFence, waitResult.message.c_str());
+      }
+      // Fall back to infinite wait
+      waitResult = waiter.wait(INFINITE);
+      if (!waitResult.isOk()) {
+        IGL_LOG_ERROR("FrameManager: CRITICAL - Infinite wait for frame %u failed: %s; aborting frame advancement\n",
+                      frameIndex, waitResult.message.c_str());
+        return false; // Abort frame advancement - unsafe to proceed
+      }
+    }
+
+#ifdef IGL_DEBUG
+    IGL_D3D12_LOG_VERBOSE("FrameManager: Frame %u resources now available (completed=%llu)\n",
+                 frameIndex, fence->GetCompletedValue());
+#endif
+  } else {
+#ifdef IGL_DEBUG
+    IGL_D3D12_LOG_VERBOSE("FrameManager: Frame %u resources already available (fence=%llu, completed=%llu)\n",
+                 frameIndex, frameFence, fence->GetCompletedValue());
+#endif
+  }
+  return true;
+}
+
+void FrameManager::resetAllocator(uint32_t frameIndex) {
+  auto* fence = context_.getFence();
+  auto& frame = context_.getFrameContexts()[frameIndex];
+  auto* allocator = frame.allocator.Get();
+
+  const UINT64 allocatorFence = frame.maxAllocatorFence;
+
+  if (allocatorFence == 0) {
+    // First time using this allocator
+    HRESULT hr = allocator->Reset();
+    if (FAILED(hr)) {
+      IGL_LOG_ERROR("FrameManager: Failed to reset frame %u allocator: 0x%08X\n",
+                    frameIndex, static_cast<unsigned>(hr));
+    }
+  } else {
+    // Verify GPU completed all command lists using this allocator
+    const UINT64 completedValue = fence->GetCompletedValue();
+
+    if (completedValue < allocatorFence) {
+      IGL_LOG_ERROR("FrameManager: ALLOCATOR SYNC ISSUE - GPU not done with all command lists "
+                    "(completed=%llu, need=%llu, cmdBufCount=%u). Waiting...\n",
+                    completedValue, allocatorFence, frame.commandBufferCount);
+
+      FenceWaiter waiter(fence, allocatorFence);
+      Result waitResult = waiter.wait(INFINITE);
+      if (!waitResult.isOk()) {
+        IGL_LOG_ERROR("FrameManager: CRITICAL - Allocator wait failed: %s; skipping unsafe allocator reset for frame %u\n",
+                      waitResult.message.c_str(), frameIndex);
+        // Do not reset allocator if GPU hasn't completed - would cause sync violations
+        return;
+      }
+      IGL_D3D12_LOG_VERBOSE("FrameManager: Allocator wait completed (fence now=%llu)\n",
+                   fence->GetCompletedValue());
+    }
+
+    // Reset allocator (safe now - GPU has completed all command lists)
+    HRESULT hr = allocator->Reset();
+    if (FAILED(hr)) {
+      IGL_LOG_ERROR("FrameManager: Failed to reset frame %u allocator: 0x%08X "
+                    "(maxFence=%llu, completed=%llu, cmdBufCount=%u)\n",
+                    frameIndex, static_cast<unsigned>(hr),
+                    allocatorFence, fence->GetCompletedValue(),
+                    frame.commandBufferCount);
+    } else {
+#ifdef IGL_DEBUG
+      IGL_D3D12_LOG_VERBOSE("FrameManager: Reset frame %u allocator (waited for %u command buffers, maxFence=%llu)\n",
+                   frameIndex, frame.commandBufferCount, allocatorFence);
+#endif
+    }
+
+#ifdef _DEBUG
+    if (SUCCEEDED(hr)) {
+      const UINT64 currentCompleted = fence->GetCompletedValue();
+      IGL_DEBUG_ASSERT(currentCompleted >= allocatorFence,
+                       "Allocator reset before GPU completed all command lists!");
+    }
+#endif
+  }
+
+  // Reset frame tracking
+  frame.fenceValue = 0;
+  frame.maxAllocatorFence = 0;
+  frame.commandBufferCount = 0;
+}
+
+void FrameManager::clearTransientResources(uint32_t frameIndex) {
+  auto& frame = context_.getFrameContexts()[frameIndex];
+
+  if (!frame.transientBuffers.empty()) {
+#ifdef IGL_DEBUG
+    IGL_D3D12_LOG_VERBOSE("FrameManager: Clearing %zu transient buffers from frame %u (high-water=%zu)\n",
+                 frame.transientBuffers.size(), frameIndex, frame.transientBuffersHighWater);
+#endif
+    frame.transientBuffers.clear();
+  }
+
+  if (!frame.transientResources.empty()) {
+#ifdef IGL_DEBUG
+    IGL_D3D12_LOG_VERBOSE("FrameManager: Releasing %zu transient D3D resources from frame %u (high-water=%zu)\n",
+                 frame.transientResources.size(), frameIndex, frame.transientResourcesHighWater);
+#endif
+    frame.transientResources.clear();
+  }
+}
+
+void FrameManager::resetDescriptorCounters(uint32_t frameIndex) {
+  auto& frame = context_.getFrameContexts()[frameIndex];
+
+  const uint32_t cbvSrvUavUsage = frame.nextCbvSrvUavDescriptor;
+  const uint32_t samplerUsage = frame.nextSamplerDescriptor;
+  const uint32_t peakCbvSrvUav = frame.peakCbvSrvUavUsage;
+  const uint32_t peakSampler = frame.peakSamplerUsage;
+
+  if (cbvSrvUavUsage > 0 || samplerUsage > 0) {
+#ifdef IGL_DEBUG
+    const float cbvSrvUavPercent = (static_cast<float>(cbvSrvUavUsage) / kCbvSrvUavHeapSize) * 100.0f;
+    const float samplerPercent = (static_cast<float>(samplerUsage) / kSamplerHeapSize) * 100.0f;
+    const float peakCbvSrvUavPercent = (static_cast<float>(peakCbvSrvUav) / kCbvSrvUavHeapSize) * 100.0f;
+    const float peakSamplerPercent = (static_cast<float>(peakSampler) / kSamplerHeapSize) * 100.0f;
+
+    IGL_D3D12_LOG_VERBOSE("FrameManager: Frame %u descriptor usage:\n"
+                 "  CBV/SRV/UAV: final=%u/%u (%.1f%%), peak=%u/%u (%.1f%%)\n"
+                 "  Samplers:    final=%u/%u (%.1f%%), peak=%u/%u (%.1f%%)\n",
+                 frameIndex,
+                 cbvSrvUavUsage, kCbvSrvUavHeapSize, cbvSrvUavPercent,
+                 peakCbvSrvUav, kCbvSrvUavHeapSize, peakCbvSrvUavPercent,
+                 samplerUsage, kSamplerHeapSize, samplerPercent,
+                 peakSampler, kSamplerHeapSize, peakSamplerPercent);
+#endif
+  }
+
+  // Reset counters
+  frame.nextCbvSrvUavDescriptor = 0;
+  frame.nextSamplerDescriptor = 0;
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("FrameManager: Reset descriptor counters for frame %u to 0\n", frameIndex);
+#endif
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12FrameManager.h b/src/igl/d3d12/D3D12FrameManager.h
new file mode 100644
index 0000000000..e416b8987b
--- /dev/null
+++ b/src/igl/d3d12/D3D12FrameManager.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+class D3D12Context;
+
+/**
+ * @brief Manages frame advancement, fence waiting, and resource cleanup
+ *
+ * Centralizes the complex logic for:
+ * - Waiting for next frame's resources to become available
+ * - Pipeline overload protection (ensuring max frames in flight)
+ * - Safe command allocator reset after GPU completion
+ * - Transient resource cleanup
+ * - Descriptor heap reset
+ */
+class FrameManager final {
+ public:
+  explicit FrameManager(D3D12Context& context) : context_(context) {}
+
+  /**
+   * @brief Advance to next frame with proper synchronization
+   *
+   * Handles:
+   * 1. Calculate next frame index
+   * 2. Wait for pipeline overload protection
+   * 3. Wait for next frame's resources
+   * 4. Update frame index
+   * 5. Reset allocator safely
+   * 6. Clear transient resources
+   * 7. Reset descriptor counters
+   *
+   * @param currentFenceValue The fence value just signaled
+   */
+  void advanceFrame(UINT64 currentFenceValue);
+
+ private:
+  /**
+   * @brief Wait for pipeline to avoid overload (max frames in flight)
+   */
+  void waitForPipelineSync(UINT64 currentFenceValue);
+
+  /**
+   * @brief Wait for specific frame's resources to become available
+   * @return true if wait succeeded, false if catastrophic wait failure
+   */
+  bool waitForFrame(uint32_t frameIndex);
+
+  /**
+   * @brief Safely reset command allocator after GPU completion
+   */
+  void resetAllocator(uint32_t frameIndex);
+
+  /**
+   * @brief Clear transient resources from completed frame
+   */
+  void clearTransientResources(uint32_t frameIndex);
+
+  /**
+   * @brief Log and reset descriptor usage counters
+   */
+  void resetDescriptorCounters(uint32_t frameIndex);
+
+  D3D12Context& context_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12Headers.h b/src/igl/d3d12/D3D12Headers.h
new file mode 100644
index 0000000000..10e3b90f9f
--- /dev/null
+++ b/src/igl/d3d12/D3D12Headers.h
@@ -0,0 +1,230 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#ifndef IGL_D3D12_D3D12HEADERS_H
+#define IGL_D3D12_D3D12HEADERS_H
+
+// Windows headers
+#ifndef NOMINMAX
+#define NOMINMAX
+#endif
+// Don't use WIN32_LEAN_AND_MEAN - it excludes wrl/client.h
+#include <windows.h>
+
+// DirectX 12 headers
+#include <d3d12.h>
+#include <dxgi1_6.h>
+
+// DirectX Shader Compiler
+#include <dxcapi.h>
+#include <d3dcompiler.h>  // For D3DCompile (legacy HLSL compiler)
+
+// D3DX12 helper library (header-only)
+// Disable buggy helper classes that have preprocessor issues or require newer SDK
+#define D3DX12_NO_STATE_OBJECT_HELPERS
+#define D3DX12_NO_CHECK_FEATURE_SUPPORT_CLASS
+
+// Manually include only the d3dx12 headers we need (excluding incompatible ones)
+#include <d3d12.h>
+#include <d3dx12_barriers.h>
+#include <d3dx12_core.h>
+#include <d3dx12_default.h>
+#include <d3dx12_pipeline_state_stream.h>
+#include <d3dx12_render_pass.h>
+// Excluded: d3dx12_resource_helpers.h (requires property_format_table.h which needs newer SDK)
+// Excluded: d3dx12_property_format_table.h (requires newer SDK)
+#include <d3dx12_root_signature.h>
+
+// ComPtr for COM object management
+// IGL's minimal ComPtr-like smart pointer implementing the subset of Microsoft::WRL::ComPtr
+// used by the D3D12 backend. We keep it custom to avoid preprocessor issues with <wrl/client.h>.
+//
+// WARNING: Do not include <wrl/client.h> before this header in the same translation unit.
+// Use igl::d3d12::ComPtr instead of Microsoft::WRL::ComPtr throughout the D3D12 backend.
+// (Including <wrl/client.h> after this header is technically safe but unsupported.)
+//
+// Supported operations: Get, GetAddressOf, ReleaseAndGetAddressOf, Reset, Attach, Detach,
+// As<U>, TryAs<U>, CopyTo, comparison operators, bool conversion, move/copy semantics.
+// Operations NOT implemented: ComPtrRef, operator&, CopyTo(REFIID, void**), and other WRL internals.
+
+// Compile-time guard: fail if <wrl/client.h> was already included before this header
+#if defined(__WRL_CLIENT_H__) || defined(_WRL_CLIENT_H_)
+#error "D3D12Headers.h must be included before <wrl/client.h>. The D3D12 backend uses igl::d3d12::ComPtr exclusively."
+#endif
+
+namespace igl {
+namespace d3d12 {
+  template<typename T>
+  class ComPtr {
+   public:
+    ComPtr() noexcept : ptr_(nullptr) {}
+    ComPtr(T* ptr) noexcept : ptr_(ptr) { if (ptr_) ptr_->AddRef(); }
+
+    // Copy constructor - AddRef the pointer
+    ComPtr(const ComPtr& other) noexcept : ptr_(other.ptr_) {
+      if (ptr_) ptr_->AddRef();
+    }
+
+    // Copy assignment - AddRef new, Release old
+    ComPtr& operator=(const ComPtr& other) noexcept {
+      if (this != &other) {
+        if (other.ptr_) other.ptr_->AddRef();
+        if (ptr_) ptr_->Release();
+        ptr_ = other.ptr_;
+      }
+      return *this;
+    }
+
+    // Move constructor
+    ComPtr(ComPtr&& other) noexcept : ptr_(other.ptr_) { other.ptr_ = nullptr; }
+
+    // Move assignment
+    ComPtr& operator=(ComPtr&& other) noexcept {
+      if (this != &other) {
+        if (ptr_) ptr_->Release();
+        ptr_ = other.ptr_;
+        other.ptr_ = nullptr;
+      }
+      return *this;
+    }
+
+    // Destructor
+    ~ComPtr() { if (ptr_) ptr_->Release(); }
+
+    // Accessor methods
+    T* Get() const noexcept { return ptr_; }
+    T** GetAddressOf() noexcept { return &ptr_; }
+    T* operator->() const noexcept { return ptr_; }
+
+    // Dereference operator (caller must ensure ptr_ != nullptr)
+    T& operator*() const noexcept { return *ptr_; }
+
+    // Comparison operators (operator< compares raw addresses for use in containers)
+    bool operator==(const ComPtr& other) const noexcept {
+      return ptr_ == other.ptr_;
+    }
+
+    bool operator!=(const ComPtr& other) const noexcept {
+      return ptr_ != other.ptr_;
+    }
+
+    bool operator<(const ComPtr& other) const noexcept {
+      return ptr_ < other.ptr_;
+    }
+
+    bool operator==(T* other) const noexcept {
+      return ptr_ == other;
+    }
+
+    bool operator!=(T* other) const noexcept {
+      return ptr_ != other;
+    }
+
+    // Boolean conversion for nullptr checks
+    explicit operator bool() const noexcept {
+      return ptr_ != nullptr;
+    }
+
+    // Reset to release current pointer
+    void Reset() noexcept {
+      if (ptr_) {
+        ptr_->Release();
+        ptr_ = nullptr;
+      }
+    }
+
+    // Attach a raw pointer without AddRef
+    void Attach(T* ptr) noexcept {
+      if (ptr_) {
+        ptr_->Release();
+      }
+      ptr_ = ptr;
+    }
+
+    // Detach and return raw pointer without Release
+    T* Detach() noexcept {
+      T* temp = ptr_;
+      ptr_ = nullptr;
+      return temp;
+    }
+
+    // ReleaseAndGetAddressOf - release current and return address for output
+    T** ReleaseAndGetAddressOf() noexcept {
+      Reset();
+      return &ptr_;
+    }
+
+    // As<U>() - QueryInterface to another interface type
+    // Note: Unlike WRL's ComPtr::As, this implementation adds null-safety:
+    // - Returns E_POINTER if 'other' is null
+    // - Returns E_FAIL and resets 'other' if this->ptr_ is null (no object to query)
+    // - Otherwise returns HRESULT from QueryInterface (S_OK or E_NOINTERFACE typically)
+    // WRL assumes non-null and relies only on QueryInterface return value.
+    // Rationale: Explicit null checks give more predictable behavior when pointers may be null.
+    // Callers should treat any non-S_OK result uniformly as "interface query failed".
+    template<typename U>
+    HRESULT As(ComPtr<U>* other) const noexcept {
+      if (!other) {
+        return E_POINTER;
+      }
+      if (!ptr_) {
+        other->Reset();
+        return E_FAIL; // No object to query
+      }
+      return ptr_->QueryInterface(__uuidof(U), reinterpret_cast<void**>(other->ReleaseAndGetAddressOf()));
+    }
+
+    // TryAs<U>() - QueryInterface convenience method that returns ComPtr<U>
+    // WARNING: Silently drops HRESULT; returns empty ComPtr on failure.
+    // For error-sensitive code, prefer the HRESULT-returning As(ComPtr<U>* other) overload.
+    // Use case: Optional interface queries where failure is expected/acceptable and doesn't need diagnosis.
+    // In code paths that return igl::Result or log errors, prefer As() so you can propagate the HRESULT.
+    template<typename U>
+    ComPtr<U> TryAs() const noexcept {
+      ComPtr<U> result;
+      if (ptr_) {
+        ptr_->QueryInterface(__uuidof(U), reinterpret_cast<void**>(result.ReleaseAndGetAddressOf()));
+      }
+      return result;
+    }
+
+    // CopyTo - Copy pointer with AddRef
+    // Note: Returns S_OK even if ptr_ is null; *other will be set to nullptr (matches WRL)
+    HRESULT CopyTo(T** other) const noexcept {
+      if (!other) {
+        return E_POINTER;
+      }
+      *other = ptr_;
+      if (ptr_) {
+        ptr_->AddRef();
+      }
+      return S_OK;
+    }
+
+   private:
+    T* ptr_;
+  };
+} // namespace d3d12
+} // namespace igl
+
+// For convenience in D3D12 implementation files, you may add a local using declaration:
+//   namespace { template<typename T> using ComPtr = igl::d3d12::ComPtr<T>; }
+// This reduces verbosity without polluting the global or igl::d3d12 namespace.
+
+namespace Microsoft {
+namespace WRL {
+  // DO NOT define ComPtr here - it conflicts with <wrl/client.h>
+  // All D3D12 code should use igl::d3d12::ComPtr directly
+} // namespace WRL
+} // namespace Microsoft
+
+// Note: Library linking is handled by CMake (see src/igl/d3d12/CMakeLists.txt)
+// Required libraries: d3d12.lib, dxgi.lib, dxguid.lib, dxcompiler.lib, d3dcompiler.lib
+
+#endif // IGL_D3D12_D3D12HEADERS_H
diff --git a/src/igl/d3d12/D3D12ImmediateCommands.cpp b/src/igl/d3d12/D3D12ImmediateCommands.cpp
new file mode 100644
index 0000000000..266b02c236
--- /dev/null
+++ b/src/igl/d3d12/D3D12ImmediateCommands.cpp
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12ImmediateCommands.h>
+#include <igl/d3d12/D3D12FenceWaiter.h>
+#include <igl/d3d12/Common.h>
+#include <igl/Assert.h>
+
+namespace igl::d3d12 {
+
+D3D12ImmediateCommands::D3D12ImmediateCommands(ID3D12Device* device,
+                                               ID3D12CommandQueue* queue,
+                                               ID3D12Fence* fence,
+                                               IFenceProvider* fenceProvider)
+    : device_(device), queue_(queue), fence_(fence), fenceProvider_(fenceProvider) {
+  IGL_DEBUG_ASSERT(device_);
+  IGL_DEBUG_ASSERT(queue_);
+  IGL_DEBUG_ASSERT(fence_);
+  IGL_DEBUG_ASSERT(fenceProvider_);
+
+  IGL_D3D12_LOG_VERBOSE("D3D12ImmediateCommands: Initialized (using shared fence timeline)\n");
+}
+
+D3D12ImmediateCommands::~D3D12ImmediateCommands() {
+  // Wait for all in-flight operations to complete
+  if (fence_) {
+    for (const auto& entry : inFlightAllocators_) {
+      if (fence_->GetCompletedValue() < entry.fenceValue) {
+        FenceWaiter waiter(fence_, entry.fenceValue);
+        Result waitResult = waiter.wait();
+        if (!waitResult.isOk()) {
+          IGL_LOG_ERROR("D3D12ImmediateCommands::~D3D12ImmediateCommands() - Fence wait failed during cleanup: %s\n",
+                        waitResult.message.c_str());
+        }
+      }
+    }
+  }
+
+  IGL_D3D12_LOG_VERBOSE("D3D12ImmediateCommands: Destroyed\n");
+}
+
+ID3D12GraphicsCommandList* D3D12ImmediateCommands::begin(Result* outResult) {
+  std::lock_guard<std::mutex> lock(poolMutex_);
+
+  // Reclaim completed allocators first
+  reclaimCompletedAllocators();
+
+  // Get or create an allocator
+  Result result = getOrCreateAllocator(&currentAllocator_);
+  if (!result.isOk()) {
+    Result::setResult(outResult, result);
+    return nullptr;
+  }
+
+  // Reset the allocator for reuse
+  HRESULT hr = currentAllocator_->Reset();
+  if (FAILED(hr)) {
+    Result::setResult(outResult,
+                      Result{Result::Code::RuntimeError, "Failed to reset command allocator"});
+    return nullptr;
+  }
+
+  // Create or reset command list
+  if (!cmdList_.Get()) {
+    hr = device_->CreateCommandList(0,
+                                    D3D12_COMMAND_LIST_TYPE_DIRECT,
+                                    currentAllocator_.Get(),
+                                    nullptr,
+                                    IID_PPV_ARGS(cmdList_.GetAddressOf()));
+    if (FAILED(hr)) {
+      Result::setResult(outResult,
+                        Result{Result::Code::RuntimeError, "Failed to create command list"});
+      return nullptr;
+    }
+  } else {
+    hr = cmdList_->Reset(currentAllocator_.Get(), nullptr);
+    if (FAILED(hr)) {
+      Result::setResult(outResult,
+                        Result{Result::Code::RuntimeError, "Failed to reset command list"});
+      return nullptr;
+    }
+  }
+
+  Result::setOk(outResult);
+  return cmdList_.Get();
+}
+
+uint64_t D3D12ImmediateCommands::submit(bool wait, Result* outResult) {
+  if (!cmdList_.Get()) {
+    Result::setResult(outResult, Result{Result::Code::RuntimeError, "No active command list"});
+    return 0;
+  }
+
+  // Close the command list
+  HRESULT hr = cmdList_->Close();
+  if (FAILED(hr)) {
+    Result::setResult(outResult,
+                      Result{Result::Code::RuntimeError, "Failed to close command list"});
+    return 0;
+  }
+
+  // Execute command list
+  ID3D12CommandList* lists[] = {cmdList_.Get()};
+  queue_->ExecuteCommandLists(1, lists);
+
+  // Get next fence value from shared timeline
+  const uint64_t fenceValue = fenceProvider_->getNextFenceValue();
+
+  // Signal fence on shared timeline
+  hr = queue_->Signal(fence_, fenceValue);
+  if (FAILED(hr)) {
+    Result::setResult(outResult,
+                      Result{Result::Code::RuntimeError, "Failed to signal fence"});
+    return 0;
+  }
+
+  // Move current allocator to in-flight list
+  {
+    std::lock_guard<std::mutex> lock(poolMutex_);
+    inFlightAllocators_.push_back({currentAllocator_, fenceValue});
+    currentAllocator_.Reset();
+  }
+
+  // Wait if requested
+  if (wait) {
+    Result waitResult = waitForFence(fenceValue);
+    if (!waitResult.isOk()) {
+      Result::setResult(outResult, waitResult);
+      return 0;  // Return 0 to signal failure
+    }
+  }
+
+  Result::setOk(outResult);
+  return fenceValue;
+}
+
+bool D3D12ImmediateCommands::isComplete(uint64_t fenceValue) const {
+  if (!fence_) {
+    return false;
+  }
+  return fence_->GetCompletedValue() >= fenceValue;
+}
+
+Result D3D12ImmediateCommands::waitForFence(uint64_t fenceValue) {
+  if (!fence_) {
+    return Result{Result::Code::RuntimeError, "Fence is null"};
+  }
+
+  if (isComplete(fenceValue)) {
+    return Result{};
+  }
+
+  FenceWaiter waiter(fence_, fenceValue);
+  return waiter.wait();  // Directly return the detailed Result
+}
+
+void D3D12ImmediateCommands::reclaimCompletedAllocators() {
+  // Note: Internal helper called by begin() with poolMutex_ already held
+  if (!fence_) {
+    return;
+  }
+
+  const uint64_t completedValue = fence_->GetCompletedValue();
+
+  // Move completed allocators from in-flight to available
+  auto it = inFlightAllocators_.begin();
+  while (it != inFlightAllocators_.end()) {
+    if (it->fenceValue <= completedValue) {
+      availableAllocators_.push_back({it->allocator, 0});
+      it = inFlightAllocators_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+Result D3D12ImmediateCommands::getOrCreateAllocator(
+    igl::d3d12::ComPtr<ID3D12CommandAllocator>* outAllocator) {
+  // Try to reuse an available allocator
+  if (!availableAllocators_.empty()) {
+    *outAllocator = availableAllocators_.back().allocator;
+    availableAllocators_.pop_back();
+    return Result{};
+  }
+
+  // Create new allocator
+  HRESULT hr = device_->CreateCommandAllocator(
+      D3D12_COMMAND_LIST_TYPE_DIRECT,
+      IID_PPV_ARGS(outAllocator->GetAddressOf()));
+
+  if (FAILED(hr)) {
+    return Result{Result::Code::RuntimeError, "Failed to create command allocator"};
+  }
+
+  IGL_D3D12_LOG_VERBOSE("D3D12ImmediateCommands: Created new command allocator (pool size: %zu)\n",
+               availableAllocators_.size() + inFlightAllocators_.size() + 1);
+
+  return Result{};
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12ImmediateCommands.h b/src/igl/d3d12/D3D12ImmediateCommands.h
new file mode 100644
index 0000000000..4753e1a389
--- /dev/null
+++ b/src/igl/d3d12/D3D12ImmediateCommands.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/Common.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12Headers.h>
+#include <vector>
+#include <mutex>
+
+namespace igl::d3d12 {
+
+/**
+ * @brief Interface for obtaining fence values from shared timeline
+ *
+ * This interface allows D3D12ImmediateCommands to participate in the
+ * device's shared fence timeline without managing its own counter.
+ */
+class IFenceProvider {
+public:
+  virtual ~IFenceProvider() = default;
+
+  /**
+   * @brief Get the next fence value from the shared timeline
+   * @return Monotonically increasing fence value
+   */
+  virtual uint64_t getNextFenceValue() = 0;
+};
+
+/**
+ * @brief Centralized management of immediate copy operations
+ *
+ * Provides a pooled command allocator/list infrastructure for transient
+ * upload/readback operations, eliminating per-operation allocator creation
+ * and redundant GPU synchronization.
+ *
+ * Thread-safety: This class is NOT thread-safe for concurrent begin()/submit().
+ * Only one begin()/submit() sequence may be active at a time. Multiple threads
+ * calling begin() concurrently will corrupt the shared command list.
+ *
+ * The allocator pool (reclaimCompletedAllocators) is internally synchronized.
+ *
+ * Inspired by Vulkan's VulkanImmediateCommands pattern.
+ */
+class D3D12ImmediateCommands {
+public:
+  /**
+   * @brief Initialize the immediate commands infrastructure
+   * @param device D3D12 device for resource creation
+   * @param queue Command queue for submission
+   * @param fence Fence for completion tracking (shared with device)
+   * @param fenceProvider Provider for next fence values from shared timeline
+   */
+  D3D12ImmediateCommands(ID3D12Device* device,
+                         ID3D12CommandQueue* queue,
+                         ID3D12Fence* fence,
+                         IFenceProvider* fenceProvider);
+
+  ~D3D12ImmediateCommands();
+
+  /**
+   * @brief Get command list for immediate copy operation
+   *
+   * Returns a ready-to-use command list from the pool. The command list
+   * is already reset and ready for recording.
+   *
+   * @param outResult Optional result for error reporting
+   * @return Command list ready for recording, or nullptr on failure
+   */
+  [[nodiscard]] ID3D12GraphicsCommandList* begin(Result* outResult = nullptr);
+
+  /**
+   * @brief Submit command list and optionally wait for completion
+   *
+   * Closes, submits, and signals the fence. If wait=true, blocks until
+   * GPU completes the work.
+   *
+   * @param wait If true, block until GPU completes
+   * @param outResult Optional result for error reporting
+   * @return Fence value that will signal when work completes (0 on failure)
+   */
+  [[nodiscard]] uint64_t submit(bool wait, Result* outResult = nullptr);
+
+  /**
+   * @brief Check if a fence value has completed
+   * @param fenceValue Fence value to check
+   * @return true if GPU has completed this fence value
+   */
+  [[nodiscard]] bool isComplete(uint64_t fenceValue) const;
+
+  /**
+   * @brief Wait for a specific fence value to complete
+   * @param fenceValue Fence value to wait for
+   * @return Result indicating success or failure
+   */
+  [[nodiscard]] Result waitForFence(uint64_t fenceValue);
+
+private:
+  /**
+   * @brief Reclaim completed command allocators back to pool
+   *
+   * Internal method called during begin() to recycle allocators.
+   * Must be called with poolMutex_ held.
+   */
+  void reclaimCompletedAllocators();
+  struct AllocatorEntry {
+    igl::d3d12::ComPtr<ID3D12CommandAllocator> allocator;
+    uint64_t fenceValue = 0;  // Fence value when this allocator was last used
+  };
+
+  ID3D12Device* device_ = nullptr;
+  ID3D12CommandQueue* queue_ = nullptr;
+  ID3D12Fence* fence_ = nullptr;  // Shared fence (owned by Device)
+  IFenceProvider* fenceProvider_ = nullptr;  // Provides fence values from shared timeline
+
+  // Current command list for recording
+  igl::d3d12::ComPtr<ID3D12GraphicsCommandList> cmdList_;
+
+  // Current allocator being used
+  igl::d3d12::ComPtr<ID3D12CommandAllocator> currentAllocator_;
+
+  // Pool of available allocators
+  std::vector<AllocatorEntry> availableAllocators_;
+
+  // Allocators in flight (waiting for GPU)
+  std::vector<AllocatorEntry> inFlightAllocators_;
+
+  // Mutex for thread-safe allocator pool access
+  std::mutex poolMutex_;
+
+  // Get or create an allocator from the pool
+  [[nodiscard]] Result getOrCreateAllocator(
+      igl::d3d12::ComPtr<ID3D12CommandAllocator>* outAllocator);
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12PipelineBuilder.cpp b/src/igl/d3d12/D3D12PipelineBuilder.cpp
new file mode 100644
index 0000000000..b479335479
--- /dev/null
+++ b/src/igl/d3d12/D3D12PipelineBuilder.cpp
@@ -0,0 +1,706 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12PipelineBuilder.h>
+#include <igl/d3d12/D3D12Context.h>
+
+namespace igl::d3d12 {
+
+namespace {
+
+// Helper function to calculate root signature cost in DWORDs
+uint32_t calculateRootSignatureCostInternal(const D3D12_ROOT_SIGNATURE_DESC& desc) {
+  uint32_t totalCost = 0;
+
+  for (uint32_t i = 0; i < desc.NumParameters; ++i) {
+    const auto& param = desc.pParameters[i];
+
+    switch (param.ParameterType) {
+      case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS:
+        totalCost += param.Constants.Num32BitValues;
+        break;
+      case D3D12_ROOT_PARAMETER_TYPE_CBV:
+      case D3D12_ROOT_PARAMETER_TYPE_SRV:
+      case D3D12_ROOT_PARAMETER_TYPE_UAV:
+        totalCost += 2;  // Root descriptors cost 2 DWORDs
+        break;
+      case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE:
+        totalCost += 1;  // Descriptor tables cost 1 DWORD
+        break;
+    }
+  }
+
+  return totalCost;
+}
+
+// Helper to convert IGL blend factor to D3D12
+D3D12_BLEND toD3D12Blend(BlendFactor f) {
+  switch (f) {
+    case BlendFactor::Zero: return D3D12_BLEND_ZERO;
+    case BlendFactor::One: return D3D12_BLEND_ONE;
+    case BlendFactor::SrcColor: return D3D12_BLEND_SRC_COLOR;
+    case BlendFactor::OneMinusSrcColor: return D3D12_BLEND_INV_SRC_COLOR;
+    case BlendFactor::SrcAlpha: return D3D12_BLEND_SRC_ALPHA;
+    case BlendFactor::OneMinusSrcAlpha: return D3D12_BLEND_INV_SRC_ALPHA;
+    case BlendFactor::DstColor: return D3D12_BLEND_DEST_COLOR;
+    case BlendFactor::OneMinusDstColor: return D3D12_BLEND_INV_DEST_COLOR;
+    case BlendFactor::DstAlpha: return D3D12_BLEND_DEST_ALPHA;
+    case BlendFactor::OneMinusDstAlpha: return D3D12_BLEND_INV_DEST_ALPHA;
+    case BlendFactor::SrcAlphaSaturated: return D3D12_BLEND_SRC_ALPHA_SAT;
+    case BlendFactor::BlendColor: return D3D12_BLEND_BLEND_FACTOR;
+    case BlendFactor::OneMinusBlendColor: return D3D12_BLEND_INV_BLEND_FACTOR;
+    case BlendFactor::BlendAlpha: return D3D12_BLEND_BLEND_FACTOR;
+    case BlendFactor::OneMinusBlendAlpha: return D3D12_BLEND_INV_BLEND_FACTOR;
+    case BlendFactor::Src1Color: return D3D12_BLEND_SRC1_COLOR;
+    case BlendFactor::OneMinusSrc1Color: return D3D12_BLEND_INV_SRC1_COLOR;
+    case BlendFactor::Src1Alpha: return D3D12_BLEND_SRC1_ALPHA;
+    case BlendFactor::OneMinusSrc1Alpha: return D3D12_BLEND_INV_SRC1_ALPHA;
+    default: return D3D12_BLEND_ONE;
+  }
+}
+
+// Helper to convert IGL blend operation to D3D12
+D3D12_BLEND_OP toD3D12BlendOp(BlendOp op) {
+  switch (op) {
+    case BlendOp::Add: return D3D12_BLEND_OP_ADD;
+    case BlendOp::Subtract: return D3D12_BLEND_OP_SUBTRACT;
+    case BlendOp::ReverseSubtract: return D3D12_BLEND_OP_REV_SUBTRACT;
+    case BlendOp::Min: return D3D12_BLEND_OP_MIN;
+    case BlendOp::Max: return D3D12_BLEND_OP_MAX;
+    default: return D3D12_BLEND_OP_ADD;
+  }
+}
+
+} // anonymous namespace
+
+//=============================================================================
+// D3D12GraphicsPipelineBuilder
+//=============================================================================
+
+D3D12GraphicsPipelineBuilder::D3D12GraphicsPipelineBuilder() {
+  // Zero-initialize the descriptor
+  psoDesc_ = {};
+
+  // Set sensible defaults for rasterizer state
+  psoDesc_.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
+  psoDesc_.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
+  psoDesc_.RasterizerState.FrontCounterClockwise = FALSE;
+  psoDesc_.RasterizerState.DepthBias = 0;
+  psoDesc_.RasterizerState.DepthBiasClamp = 0.0f;
+  psoDesc_.RasterizerState.SlopeScaledDepthBias = 0.0f;
+  psoDesc_.RasterizerState.DepthClipEnable = TRUE;
+  psoDesc_.RasterizerState.MultisampleEnable = FALSE;
+  psoDesc_.RasterizerState.AntialiasedLineEnable = FALSE;
+  psoDesc_.RasterizerState.ForcedSampleCount = 0;
+  psoDesc_.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
+
+  // Set sensible defaults for blend state
+  psoDesc_.BlendState.AlphaToCoverageEnable = FALSE;
+  psoDesc_.BlendState.IndependentBlendEnable = FALSE;
+  for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) {
+    psoDesc_.BlendState.RenderTarget[i].BlendEnable = FALSE;
+    psoDesc_.BlendState.RenderTarget[i].LogicOpEnable = FALSE;
+    psoDesc_.BlendState.RenderTarget[i].SrcBlend = D3D12_BLEND_ONE;
+    psoDesc_.BlendState.RenderTarget[i].DestBlend = D3D12_BLEND_ZERO;
+    psoDesc_.BlendState.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD;
+    psoDesc_.BlendState.RenderTarget[i].SrcBlendAlpha = D3D12_BLEND_ONE;
+    psoDesc_.BlendState.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_ZERO;
+    psoDesc_.BlendState.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD;
+    psoDesc_.BlendState.RenderTarget[i].LogicOp = D3D12_LOGIC_OP_NOOP;
+    psoDesc_.BlendState.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
+  }
+
+  // Set sensible defaults for depth-stencil state
+  psoDesc_.DepthStencilState.DepthEnable = FALSE;
+  psoDesc_.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
+  psoDesc_.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
+  psoDesc_.DepthStencilState.StencilEnable = FALSE;
+  psoDesc_.DepthStencilState.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK;
+  psoDesc_.DepthStencilState.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK;
+  psoDesc_.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP;
+  psoDesc_.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
+  psoDesc_.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP;
+  psoDesc_.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+  psoDesc_.DepthStencilState.BackFace = psoDesc_.DepthStencilState.FrontFace;
+
+  // Defaults for other fields
+  psoDesc_.SampleMask = UINT_MAX;
+  psoDesc_.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
+  psoDesc_.NumRenderTargets = 1;
+  psoDesc_.RTVFormats[0] = DXGI_FORMAT_R8G8B8A8_UNORM;
+  psoDesc_.DSVFormat = DXGI_FORMAT_UNKNOWN;
+  psoDesc_.SampleDesc.Count = 1;
+  psoDesc_.SampleDesc.Quality = 0;
+  psoDesc_.NodeMask = 0;
+  psoDesc_.CachedPSO.pCachedBlob = nullptr;
+  psoDesc_.CachedPSO.CachedBlobSizeInBytes = 0;
+  psoDesc_.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::vertexShader(
+    const std::vector<uint8_t>& bytecode) {
+  vsBytecode_ = bytecode;
+  psoDesc_.VS = {vsBytecode_.data(), vsBytecode_.size()};
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::pixelShader(
+    const std::vector<uint8_t>& bytecode) {
+  psBytecode_ = bytecode;
+  psoDesc_.PS = {psBytecode_.data(), psBytecode_.size()};
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::shaderBytecode(
+    const std::vector<uint8_t>& vs,
+    const std::vector<uint8_t>& ps) {
+  return vertexShader(vs).pixelShader(ps);
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::vertexInputLayout(
+    const std::vector<D3D12_INPUT_ELEMENT_DESC>& elements) {
+  inputElements_ = elements;
+  psoDesc_.InputLayout = {inputElements_.data(), static_cast<UINT>(inputElements_.size())};
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::blendState(
+    const D3D12_BLEND_DESC& desc) {
+  psoDesc_.BlendState = desc;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::blendStateForAttachment(
+    UINT attachmentIndex,
+    const RenderPipelineDesc::TargetDesc::ColorAttachment& attachment) {
+  if (attachmentIndex >= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT) {
+    return *this;
+  }
+
+  // Enable independent blending when configuring attachments beyond RT0
+  if (attachmentIndex > 0) {
+    psoDesc_.BlendState.IndependentBlendEnable = TRUE;
+  }
+
+  auto& rt = psoDesc_.BlendState.RenderTarget[attachmentIndex];
+  rt.BlendEnable = attachment.blendEnabled ? TRUE : FALSE;
+  rt.SrcBlend = toD3D12Blend(attachment.srcRGBBlendFactor);
+  rt.DestBlend = toD3D12Blend(attachment.dstRGBBlendFactor);
+  rt.BlendOp = toD3D12BlendOp(attachment.rgbBlendOp);
+  rt.SrcBlendAlpha = toD3D12Blend(attachment.srcAlphaBlendFactor);
+  rt.DestBlendAlpha = toD3D12Blend(attachment.dstAlphaBlendFactor);
+  rt.BlendOpAlpha = toD3D12BlendOp(attachment.alphaBlendOp);
+
+  // Convert IGL color write mask to D3D12
+  UINT8 writeMask = 0;
+  if (attachment.colorWriteMask & igl::kColorWriteBitsRed) {
+    writeMask |= D3D12_COLOR_WRITE_ENABLE_RED;
+  }
+  if (attachment.colorWriteMask & igl::kColorWriteBitsGreen) {
+    writeMask |= D3D12_COLOR_WRITE_ENABLE_GREEN;
+  }
+  if (attachment.colorWriteMask & igl::kColorWriteBitsBlue) {
+    writeMask |= D3D12_COLOR_WRITE_ENABLE_BLUE;
+  }
+  if (attachment.colorWriteMask & igl::kColorWriteBitsAlpha) {
+    writeMask |= D3D12_COLOR_WRITE_ENABLE_ALPHA;
+  }
+  rt.RenderTargetWriteMask = writeMask;
+
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::rasterizerState(
+    const D3D12_RASTERIZER_DESC& desc) {
+  psoDesc_.RasterizerState = desc;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::cullMode(CullMode mode) {
+  switch (mode) {
+    case CullMode::Back:
+      psoDesc_.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
+      break;
+    case CullMode::Front:
+      psoDesc_.RasterizerState.CullMode = D3D12_CULL_MODE_FRONT;
+      break;
+    case CullMode::Disabled:
+    default:
+      psoDesc_.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
+      break;
+  }
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::frontFaceWinding(WindingMode mode) {
+  psoDesc_.RasterizerState.FrontCounterClockwise =
+      (mode == WindingMode::CounterClockwise) ? TRUE : FALSE;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::polygonFillMode(PolygonFillMode mode) {
+  psoDesc_.RasterizerState.FillMode =
+      (mode == PolygonFillMode::Line) ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::depthStencilState(
+    const D3D12_DEPTH_STENCIL_DESC& desc) {
+  psoDesc_.DepthStencilState = desc;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::depthTestEnabled(bool enabled) {
+  psoDesc_.DepthStencilState.DepthEnable = enabled ? TRUE : FALSE;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::depthWriteEnabled(bool enabled) {
+  psoDesc_.DepthStencilState.DepthWriteMask =
+      enabled ? D3D12_DEPTH_WRITE_MASK_ALL : D3D12_DEPTH_WRITE_MASK_ZERO;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::depthCompareFunc(
+    D3D12_COMPARISON_FUNC func) {
+  psoDesc_.DepthStencilState.DepthFunc = func;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::renderTargetFormat(
+    UINT index,
+    DXGI_FORMAT format) {
+  if (index < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT) {
+    psoDesc_.RTVFormats[index] = format;
+    // Auto-update NumRenderTargets to include this slot
+    if (index + 1 > psoDesc_.NumRenderTargets) {
+      psoDesc_.NumRenderTargets = index + 1;
+      // Enable independent blending when using multiple render targets
+      psoDesc_.BlendState.IndependentBlendEnable = (psoDesc_.NumRenderTargets > 1) ? TRUE : FALSE;
+    }
+  }
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::renderTargetFormats(
+    const std::vector<DXGI_FORMAT>& formats) {
+  const UINT count = static_cast<UINT>(
+      std::min<size_t>(formats.size(), D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT));
+  psoDesc_.NumRenderTargets = count;
+  // Enable independent blending when using multiple render targets
+  psoDesc_.BlendState.IndependentBlendEnable = (count > 1) ? TRUE : FALSE;
+  for (UINT i = 0; i < count; ++i) {
+    psoDesc_.RTVFormats[i] = formats[i];
+  }
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::depthStencilFormat(
+    DXGI_FORMAT format) {
+  psoDesc_.DSVFormat = format;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::numRenderTargets(UINT count) {
+  const UINT clamped = std::min(count, static_cast<UINT>(D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT));
+  psoDesc_.NumRenderTargets = clamped;
+  // Enable independent blending when using multiple render targets
+  psoDesc_.BlendState.IndependentBlendEnable = (clamped > 1) ? TRUE : FALSE;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::sampleCount(UINT count) {
+  psoDesc_.SampleDesc.Count = count;
+  psoDesc_.RasterizerState.MultisampleEnable = (count > 1) ? TRUE : FALSE;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::sampleMask(UINT mask) {
+  psoDesc_.SampleMask = mask;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::primitiveTopologyType(
+    D3D12_PRIMITIVE_TOPOLOGY_TYPE type) {
+  psoDesc_.PrimitiveTopologyType = type;
+  return *this;
+}
+
+D3D12GraphicsPipelineBuilder& D3D12GraphicsPipelineBuilder::streamOutput(
+    const D3D12_STREAM_OUTPUT_DESC& desc) {
+  psoDesc_.StreamOutput = desc;
+  return *this;
+}
+
+Result D3D12GraphicsPipelineBuilder::build(ID3D12Device* device,
+                                             ID3D12RootSignature* rootSignature,
+                                             ID3D12PipelineState** outPipelineState,
+                                             const char* debugName) {
+  if (!device) {
+    return Result(Result::Code::ArgumentNull, "Device is null");
+  }
+  if (!rootSignature) {
+    return Result(Result::Code::ArgumentNull, "Root signature is null");
+  }
+  if (!outPipelineState) {
+    return Result(Result::Code::ArgumentNull, "Output pipeline state is null");
+  }
+
+  // Initialize output to null for safety
+  *outPipelineState = nullptr;
+
+  // Validate shader bytecode
+  if (psoDesc_.VS.BytecodeLength == 0) {
+    return Result(Result::Code::ArgumentInvalid, "Vertex shader bytecode is required");
+  }
+  if (psoDesc_.PS.BytecodeLength == 0) {
+    return Result(Result::Code::ArgumentInvalid, "Pixel shader bytecode is required");
+  }
+
+  // Set root signature
+  psoDesc_.pRootSignature = rootSignature;
+
+  // Create pipeline state
+  igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState;
+  HRESULT hr = device->CreateGraphicsPipelineState(&psoDesc_, IID_PPV_ARGS(pipelineState.GetAddressOf()));
+  if (FAILED(hr)) {
+    char errorMsg[512];
+    snprintf(errorMsg, sizeof(errorMsg),
+             "Failed to create graphics pipeline state. HRESULT: 0x%08X",
+             static_cast<unsigned int>(hr));
+    return Result(Result::Code::RuntimeError, errorMsg);
+  }
+
+  // Set debug name if provided
+  if (debugName && debugName[0] != '\0') {
+    std::wstring wideName(debugName, debugName + strlen(debugName));
+    pipelineState->SetName(wideName.c_str());
+  }
+
+  *outPipelineState = pipelineState.Get();
+  pipelineState->AddRef();  // Transfer ownership
+  return Result();
+}
+
+//=============================================================================
+// D3D12ComputePipelineBuilder
+//=============================================================================
+
+D3D12ComputePipelineBuilder::D3D12ComputePipelineBuilder() {
+  // Zero-initialize the descriptor
+  psoDesc_ = {};
+  psoDesc_.NodeMask = 0;
+  psoDesc_.CachedPSO.pCachedBlob = nullptr;
+  psoDesc_.CachedPSO.CachedBlobSizeInBytes = 0;
+  psoDesc_.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
+}
+
+D3D12ComputePipelineBuilder& D3D12ComputePipelineBuilder::shaderBytecode(
+    const std::vector<uint8_t>& bytecode) {
+  csBytecode_ = bytecode;
+  psoDesc_.CS.pShaderBytecode = csBytecode_.data();
+  psoDesc_.CS.BytecodeLength = csBytecode_.size();
+  return *this;
+}
+
+Result D3D12ComputePipelineBuilder::build(ID3D12Device* device,
+                                            ID3D12RootSignature* rootSignature,
+                                            ID3D12PipelineState** outPipelineState,
+                                            const char* debugName) {
+  if (!device) {
+    return Result(Result::Code::ArgumentNull, "Device is null");
+  }
+  if (!rootSignature) {
+    return Result(Result::Code::ArgumentNull, "Root signature is null");
+  }
+  if (!outPipelineState) {
+    return Result(Result::Code::ArgumentNull, "Output pipeline state is null");
+  }
+
+  // Initialize output to null for safety
+  *outPipelineState = nullptr;
+
+  // Validate shader bytecode
+  if (psoDesc_.CS.BytecodeLength == 0) {
+    return Result(Result::Code::ArgumentInvalid, "Compute shader bytecode is required");
+  }
+
+  // Set root signature
+  psoDesc_.pRootSignature = rootSignature;
+
+  // Create pipeline state
+  igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState;
+  HRESULT hr = device->CreateComputePipelineState(&psoDesc_, IID_PPV_ARGS(pipelineState.GetAddressOf()));
+  if (FAILED(hr)) {
+    char errorMsg[512];
+    snprintf(errorMsg, sizeof(errorMsg),
+             "Failed to create compute pipeline state. HRESULT: 0x%08X",
+             static_cast<unsigned int>(hr));
+    return Result(Result::Code::RuntimeError, errorMsg);
+  }
+
+  // Set debug name if provided
+  if (debugName && debugName[0] != '\0') {
+    std::wstring wideName(debugName, debugName + strlen(debugName));
+    pipelineState->SetName(wideName.c_str());
+  }
+
+  *outPipelineState = pipelineState.Get();
+  pipelineState->AddRef();  // Transfer ownership
+  return Result();
+}
+
+//=============================================================================
+// D3D12RootSignatureBuilder
+//=============================================================================
+
+D3D12RootSignatureBuilder::D3D12RootSignatureBuilder() {
+  flags_ = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
+}
+
+D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::addRootConstants(
+    UINT shaderRegister,
+    UINT num32BitValues,
+    UINT registerSpace) {
+  RootParameter param{};  // Zero-initialize
+  param.param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
+  param.param.Constants.ShaderRegister = shaderRegister;
+  param.param.Constants.RegisterSpace = registerSpace;
+  param.param.Constants.Num32BitValues = num32BitValues;
+  param.param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+  rootParameters_.push_back(param);
+  return *this;
+}
+
+D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::addRootCBV(UINT shaderRegister,
+                                                                  UINT registerSpace) {
+  RootParameter param{};  // Zero-initialize
+  param.param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_CBV;
+  param.param.Descriptor.ShaderRegister = shaderRegister;
+  param.param.Descriptor.RegisterSpace = registerSpace;
+  param.param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+  rootParameters_.push_back(param);
+  return *this;
+}
+
+D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::addRootSRV(UINT shaderRegister,
+                                                                  UINT registerSpace) {
+  RootParameter param{};  // Zero-initialize
+  param.param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_SRV;
+  param.param.Descriptor.ShaderRegister = shaderRegister;
+  param.param.Descriptor.RegisterSpace = registerSpace;
+  param.param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+  rootParameters_.push_back(param);
+  return *this;
+}
+
+D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::addRootUAV(UINT shaderRegister,
+                                                                  UINT registerSpace) {
+  RootParameter param{};  // Zero-initialize
+  param.param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_UAV;
+  param.param.Descriptor.ShaderRegister = shaderRegister;
+  param.param.Descriptor.RegisterSpace = registerSpace;
+  param.param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+  rootParameters_.push_back(param);
+  return *this;
+}
+
+D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::addDescriptorTable(
+    D3D12_DESCRIPTOR_RANGE_TYPE rangeType,
+    UINT numDescriptors,
+    UINT baseShaderRegister,
+    UINT registerSpace) {
+  RootParameter param{};  // Zero-initialize
+  param.param.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+  param.param.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+
+  // Create descriptor range
+  DescriptorRange range;
+  range.range.RangeType = rangeType;
+  range.range.NumDescriptors = numDescriptors;
+  range.range.BaseShaderRegister = baseShaderRegister;
+  range.range.RegisterSpace = registerSpace;
+  range.range.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+
+  param.ranges.push_back(range);
+  rootParameters_.push_back(param);
+  return *this;
+}
+
+D3D12RootSignatureBuilder& D3D12RootSignatureBuilder::flags(D3D12_ROOT_SIGNATURE_FLAGS flags) {
+  flags_ = flags;
+  return *this;
+}
+
+Result D3D12RootSignatureBuilder::build(ID3D12Device* device,
+                                         const D3D12Context* context,
+                                         ID3D12RootSignature** outRootSignature) {
+  if (!device) {
+    return Result(Result::Code::ArgumentNull, "Device is null");
+  }
+  if (!outRootSignature) {
+    return Result(Result::Code::ArgumentNull, "Output root signature is null");
+  }
+
+  // Initialize output to null for safety
+  *outRootSignature = nullptr;
+
+  // Build arrays of D3D12_ROOT_PARAMETER and descriptor ranges
+  std::vector<D3D12_ROOT_PARAMETER> d3d12Params;
+  std::vector<std::vector<D3D12_DESCRIPTOR_RANGE>> allRanges;
+
+  d3d12Params.reserve(rootParameters_.size());
+  allRanges.reserve(rootParameters_.size());
+
+  for (auto& param : rootParameters_) {
+    if (param.param.ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) {
+      // Store ranges for this table
+      std::vector<D3D12_DESCRIPTOR_RANGE> ranges;
+      ranges.reserve(param.ranges.size());
+      for (auto& r : param.ranges) {
+        D3D12_DESCRIPTOR_RANGE range = r.range;
+        if (context) {
+          const UINT maxCount = getMaxDescriptorCount(context, range.RangeType);
+          if (range.NumDescriptors == UINT_MAX || range.NumDescriptors > maxCount) {
+            range.NumDescriptors = maxCount;
+          }
+        }
+        ranges.push_back(range);
+      }
+      allRanges.push_back(std::move(ranges));
+
+      // Update descriptor table to point to the ranges
+      D3D12_ROOT_PARAMETER d3d12Param = param.param;
+      d3d12Param.DescriptorTable.NumDescriptorRanges =
+          static_cast<UINT>(allRanges.back().size());
+      d3d12Param.DescriptorTable.pDescriptorRanges = allRanges.back().data();
+      d3d12Params.push_back(d3d12Param);
+    } else {
+      // Not a descriptor table, just copy
+      d3d12Params.push_back(param.param);
+    }
+  }
+
+  // Build root signature descriptor
+  D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {};
+  rootSigDesc.NumParameters = static_cast<UINT>(d3d12Params.size());
+  rootSigDesc.pParameters = d3d12Params.data();
+  rootSigDesc.NumStaticSamplers = 0;
+  rootSigDesc.pStaticSamplers = nullptr;
+  rootSigDesc.Flags = flags_;
+
+  // Validate cost (64 DWORD limit)
+  const uint32_t cost = calculateRootSignatureCostInternal(rootSigDesc);
+  if (cost > 64) {
+    char errorMsg[256];
+    snprintf(errorMsg, sizeof(errorMsg),
+             "Root signature cost exceeds 64 DWORD limit: %u DWORDs", cost);
+    return Result(Result::Code::ArgumentOutOfRange, errorMsg);
+  }
+
+  // Serialize root signature
+  igl::d3d12::ComPtr<ID3DBlob> signature;
+  igl::d3d12::ComPtr<ID3DBlob> error;
+  HRESULT hr = D3D12SerializeRootSignature(&rootSigDesc,
+                                            D3D_ROOT_SIGNATURE_VERSION_1,
+                                            signature.GetAddressOf(),
+                                            error.GetAddressOf());
+  if (FAILED(hr)) {
+    const char* errorStr = error.Get() ? static_cast<const char*>(error->GetBufferPointer())
+                                       : "Unknown error";
+    char errorMsg[512];
+    snprintf(errorMsg, sizeof(errorMsg),
+             "Failed to serialize root signature. HRESULT: 0x%08X, Error: %s",
+             static_cast<unsigned int>(hr), errorStr);
+    return Result(Result::Code::RuntimeError, errorMsg);
+  }
+
+  // Create root signature
+  igl::d3d12::ComPtr<ID3D12RootSignature> rootSignature;
+  hr = device->CreateRootSignature(0,
+                                    signature->GetBufferPointer(),
+                                    signature->GetBufferSize(),
+                                    IID_PPV_ARGS(rootSignature.GetAddressOf()));
+  if (FAILED(hr)) {
+    char errorMsg[256];
+    snprintf(errorMsg, sizeof(errorMsg),
+             "Failed to create root signature. HRESULT: 0x%08X",
+             static_cast<unsigned int>(hr));
+    return Result(Result::Code::RuntimeError, errorMsg);
+  }
+
+  *outRootSignature = rootSignature.Get();
+  rootSignature->AddRef();  // Transfer ownership
+  return Result();
+}
+
+UINT D3D12RootSignatureBuilder::getMaxDescriptorCount(const D3D12Context* context,
+                                                       D3D12_DESCRIPTOR_RANGE_TYPE rangeType) {
+  if (!context) {
+    return 128;  // Conservative default
+  }
+
+  const D3D12_RESOURCE_BINDING_TIER bindingTier = context->getResourceBindingTier();
+  const bool needsBoundedRanges = (bindingTier == D3D12_RESOURCE_BINDING_TIER_1);
+
+  if (!needsBoundedRanges) {
+    return UINT_MAX;  // Unbounded
+  }
+
+  // Conservative bounds for Tier 1 devices
+  switch (rangeType) {
+    case D3D12_DESCRIPTOR_RANGE_TYPE_SRV:
+      return 128;
+    case D3D12_DESCRIPTOR_RANGE_TYPE_UAV:
+      return 64;
+    case D3D12_DESCRIPTOR_RANGE_TYPE_CBV:
+      return 64;
+    case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER:
+      return 32;
+    default:
+      return 128;
+  }
+}
+
+uint32_t D3D12RootSignatureBuilder::calculateCost() const {
+  // Build temporary descriptor for cost calculation
+  std::vector<D3D12_ROOT_PARAMETER> d3d12Params;
+  std::vector<std::vector<D3D12_DESCRIPTOR_RANGE>> allRanges;
+
+  d3d12Params.reserve(rootParameters_.size());
+  allRanges.reserve(rootParameters_.size());
+
+  for (const auto& param : rootParameters_) {
+    if (param.param.ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE) {
+      std::vector<D3D12_DESCRIPTOR_RANGE> ranges;
+      ranges.reserve(param.ranges.size());
+      for (const auto& r : param.ranges) {
+        ranges.push_back(r.range);
+      }
+      allRanges.push_back(std::move(ranges));
+
+      D3D12_ROOT_PARAMETER d3d12Param = param.param;
+      d3d12Param.DescriptorTable.NumDescriptorRanges =
+          static_cast<UINT>(allRanges.back().size());
+      d3d12Param.DescriptorTable.pDescriptorRanges = allRanges.back().data();
+      d3d12Params.push_back(d3d12Param);
+    } else {
+      d3d12Params.push_back(param.param);
+    }
+  }
+
+  D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {};
+  rootSigDesc.NumParameters = static_cast<UINT>(d3d12Params.size());
+  rootSigDesc.pParameters = d3d12Params.data();
+  rootSigDesc.NumStaticSamplers = 0;
+  rootSigDesc.pStaticSamplers = nullptr;
+  rootSigDesc.Flags = flags_;
+
+  return calculateRootSignatureCostInternal(rootSigDesc);
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12PipelineBuilder.h b/src/igl/d3d12/D3D12PipelineBuilder.h
new file mode 100644
index 0000000000..bb5e9c9467
--- /dev/null
+++ b/src/igl/d3d12/D3D12PipelineBuilder.h
@@ -0,0 +1,208 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/d3d12/Common.h>
+#include <igl/RenderPipelineState.h>
+#include <igl/ComputePipelineState.h>
+#include <vector>
+
+namespace igl::d3d12 {
+
+class D3D12Context;
+
+/**
+ * @brief Fluent builder for D3D12 graphics pipeline state objects
+ *
+ * Encapsulates the complex setup of D3D12_GRAPHICS_PIPELINE_STATE_DESC
+ * and provides a clean, chainable API similar to VulkanPipelineBuilder.
+ *
+ * Usage:
+ *   D3D12GraphicsPipelineBuilder builder;
+ *   builder.shaderBytecode(vsBytecode, psBytecode)
+ *          .vertexInputLayout(inputElements)
+ *          .blendState(blendDesc)
+ *          .rasterizerState(rasterizerDesc)
+ *          .depthStencilState(depthStencilDesc)
+ *          .renderTargetFormats(rtvFormats)
+ *          .sampleCount(sampleCount)
+ *          .primitiveTopology(topology);
+ *   auto result = builder.build(device, rootSignature, outPipelineState);
+ */
+class D3D12GraphicsPipelineBuilder final {
+ public:
+  D3D12GraphicsPipelineBuilder();
+  ~D3D12GraphicsPipelineBuilder() = default;
+
+  // Shader configuration
+  D3D12GraphicsPipelineBuilder& vertexShader(const std::vector<uint8_t>& bytecode);
+  D3D12GraphicsPipelineBuilder& pixelShader(const std::vector<uint8_t>& bytecode);
+  D3D12GraphicsPipelineBuilder& shaderBytecode(const std::vector<uint8_t>& vs,
+                                                const std::vector<uint8_t>& ps);
+
+  // Vertex input layout
+  D3D12GraphicsPipelineBuilder& vertexInputLayout(
+      const std::vector<D3D12_INPUT_ELEMENT_DESC>& elements);
+
+  // Blend state
+  D3D12GraphicsPipelineBuilder& blendState(const D3D12_BLEND_DESC& desc);
+  D3D12GraphicsPipelineBuilder& blendStateForAttachment(
+      UINT attachmentIndex,
+      const RenderPipelineDesc::TargetDesc::ColorAttachment& attachment);
+
+  // Rasterizer state
+  D3D12GraphicsPipelineBuilder& rasterizerState(const D3D12_RASTERIZER_DESC& desc);
+  D3D12GraphicsPipelineBuilder& cullMode(CullMode mode);
+  D3D12GraphicsPipelineBuilder& frontFaceWinding(WindingMode mode);
+  D3D12GraphicsPipelineBuilder& polygonFillMode(PolygonFillMode mode);
+
+  // Depth-stencil state
+  D3D12GraphicsPipelineBuilder& depthStencilState(const D3D12_DEPTH_STENCIL_DESC& desc);
+  D3D12GraphicsPipelineBuilder& depthTestEnabled(bool enabled);
+  D3D12GraphicsPipelineBuilder& depthWriteEnabled(bool enabled);
+  D3D12GraphicsPipelineBuilder& depthCompareFunc(D3D12_COMPARISON_FUNC func);
+
+  // Render target configuration
+  D3D12GraphicsPipelineBuilder& renderTargetFormat(UINT index, DXGI_FORMAT format);
+  D3D12GraphicsPipelineBuilder& renderTargetFormats(const std::vector<DXGI_FORMAT>& formats);
+  D3D12GraphicsPipelineBuilder& depthStencilFormat(DXGI_FORMAT format);
+  D3D12GraphicsPipelineBuilder& numRenderTargets(UINT count);
+
+  // Sample configuration
+  D3D12GraphicsPipelineBuilder& sampleCount(UINT count);
+  D3D12GraphicsPipelineBuilder& sampleMask(UINT mask);
+
+  // Primitive topology
+  D3D12GraphicsPipelineBuilder& primitiveTopologyType(D3D12_PRIMITIVE_TOPOLOGY_TYPE type);
+
+  // Stream output (optional)
+  D3D12GraphicsPipelineBuilder& streamOutput(const D3D12_STREAM_OUTPUT_DESC& desc);
+
+  // Build the pipeline state object
+  [[nodiscard]] Result build(ID3D12Device* device,
+                              ID3D12RootSignature* rootSignature,
+                              ID3D12PipelineState** outPipelineState,
+                              const char* debugName = nullptr);
+
+  // Get the current PSO desc (for inspection/debugging)
+  [[nodiscard]] const D3D12_GRAPHICS_PIPELINE_STATE_DESC& getDesc() const {
+    return psoDesc_;
+  }
+
+ private:
+  D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc_;
+  std::vector<D3D12_INPUT_ELEMENT_DESC> inputElements_;
+  std::vector<uint8_t> vsBytecode_;
+  std::vector<uint8_t> psBytecode_;
+};
+
+/**
+ * @brief Fluent builder for D3D12 compute pipeline state objects
+ *
+ * Simplified builder for compute shaders.
+ *
+ * Usage:
+ *   D3D12ComputePipelineBuilder builder;
+ *   builder.shaderBytecode(csBytecode);
+ *   auto result = builder.build(device, rootSignature, outPipelineState);
+ */
+class D3D12ComputePipelineBuilder final {
+ public:
+  D3D12ComputePipelineBuilder();
+  ~D3D12ComputePipelineBuilder() = default;
+
+  // Shader configuration
+  D3D12ComputePipelineBuilder& shaderBytecode(const std::vector<uint8_t>& bytecode);
+
+  // Build the pipeline state object
+  [[nodiscard]] Result build(ID3D12Device* device,
+                              ID3D12RootSignature* rootSignature,
+                              ID3D12PipelineState** outPipelineState,
+                              const char* debugName = nullptr);
+
+  // Get the current PSO desc (for inspection/debugging)
+  [[nodiscard]] const D3D12_COMPUTE_PIPELINE_STATE_DESC& getDesc() const {
+    return psoDesc_;
+  }
+
+ private:
+  D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc_;
+  std::vector<uint8_t> csBytecode_;
+};
+
+/**
+ * @brief Builder for D3D12 root signatures
+ *
+ * Encapsulates root signature creation with support for:
+ * - Root constants (push constants)
+ * - Root descriptors (CBVs)
+ * - Descriptor tables (CBV/SRV/UAV/Sampler)
+ * - Automatic cost calculation and validation
+ *
+ * Usage:
+ *   D3D12RootSignatureBuilder builder;
+ *   builder.addRootConstants(shaderRegister, num32BitValues)
+ *          .addRootCBV(shaderRegister)
+ *          .addDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, count, baseRegister);
+ *   auto result = builder.build(device, context, outRootSignature);
+ */
+class D3D12RootSignatureBuilder final {
+ public:
+  D3D12RootSignatureBuilder();
+  ~D3D12RootSignatureBuilder() = default;
+
+  // Root constants (inline 32-bit values)
+  D3D12RootSignatureBuilder& addRootConstants(UINT shaderRegister,
+                                               UINT num32BitValues,
+                                               UINT registerSpace = 0);
+
+  // Root descriptors (CBV/SRV/UAV accessed directly via GPU virtual address)
+  D3D12RootSignatureBuilder& addRootCBV(UINT shaderRegister, UINT registerSpace = 0);
+  D3D12RootSignatureBuilder& addRootSRV(UINT shaderRegister, UINT registerSpace = 0);
+  D3D12RootSignatureBuilder& addRootUAV(UINT shaderRegister, UINT registerSpace = 0);
+
+  // Descriptor tables
+  D3D12RootSignatureBuilder& addDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE rangeType,
+                                                 UINT numDescriptors,
+                                                 UINT baseShaderRegister,
+                                                 UINT registerSpace = 0);
+
+  // Flags
+  D3D12RootSignatureBuilder& flags(D3D12_ROOT_SIGNATURE_FLAGS flags);
+
+  // Build the root signature
+  // Note: context parameter is reserved for future tier-based validation.
+  // Currently, callers should use getMaxDescriptorCount() when configuring
+  // descriptor tables to ensure hardware compatibility.
+  [[nodiscard]] Result build(ID3D12Device* device,
+                              const D3D12Context* context,
+                              ID3D12RootSignature** outRootSignature);
+
+  // Query limits from device - use this when calling addDescriptorTable()
+  // to ensure descriptor counts are within hardware tier limits
+  static UINT getMaxDescriptorCount(const D3D12Context* context,
+                                     D3D12_DESCRIPTOR_RANGE_TYPE rangeType);
+
+  // Calculate root signature cost in DWORDs (must be <= 64)
+  [[nodiscard]] uint32_t calculateCost() const;
+
+ private:
+  struct DescriptorRange {
+    D3D12_DESCRIPTOR_RANGE range;
+  };
+
+  struct RootParameter {
+    D3D12_ROOT_PARAMETER param;
+    std::vector<DescriptorRange> ranges;  // For descriptor tables
+  };
+
+  std::vector<RootParameter> rootParameters_;
+  D3D12_ROOT_SIGNATURE_FLAGS flags_ = D3D12_ROOT_SIGNATURE_FLAG_NONE;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12PipelineCache.cpp b/src/igl/d3d12/D3D12PipelineCache.cpp
new file mode 100644
index 0000000000..8ba02033c6
--- /dev/null
+++ b/src/igl/d3d12/D3D12PipelineCache.cpp
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12PipelineCache.h>
+#include <igl/d3d12/D3D12RootSignatureKey.h>
+#include <igl/d3d12/D3D12Context.h>
+#include <algorithm>
+#include <vector>
+
+namespace igl::d3d12 {
+
+ComPtr<ID3D12RootSignature> D3D12PipelineCache::createRootSignatureFromKey(
+    ID3D12Device* d3dDevice,
+    const D3D12RootSignatureKey& key,
+    D3D12_RESOURCE_BINDING_TIER bindingTier,
+    Result* IGL_NULLABLE outResult) const {
+
+  if (!d3dDevice) {
+    Result::setResult(outResult, Result::Code::InvalidOperation, "D3D12 device is null");
+    return nullptr;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("Creating root signature from reflection key:\n");
+  if (key.hasPushConstants) {
+    IGL_D3D12_LOG_VERBOSE("  Push constants: b%u (%u DWORDs)\n",
+                 key.pushConstantSlot, key.pushConstantSize);
+  }
+  IGL_D3D12_LOG_VERBOSE("  CBV slots: %zu, SRV slots: %zu, UAV slots: %zu, Sampler slots: %zu\n",
+               key.usedCBVSlots.size(), key.usedSRVSlots.size(),
+               key.usedUAVSlots.size(), key.usedSamplerSlots.size());
+
+  // Determine if we need bounded ranges (Tier 1 hardware)
+  const bool needsBoundedRanges = (bindingTier == D3D12_RESOURCE_BINDING_TIER_1);
+  const UINT srvBound = needsBoundedRanges ? 128 : UINT_MAX;
+  const UINT samplerBound = needsBoundedRanges ? 32 : UINT_MAX;
+  const UINT uavBound = needsBoundedRanges ? 8 : UINT_MAX;
+
+  // Build descriptor ranges dynamically - only create ranges for resource types the shader uses
+  // The ranges must remain stable (no reallocation) since root parameters will point to them
+  std::vector<D3D12_DESCRIPTOR_RANGE> descriptorRanges;
+  descriptorRanges.reserve(4);  // Maximum: CBV, SRV, Sampler, UAV
+
+  // Track which descriptor range index corresponds to each resource type
+  size_t cbvRangeIndex = SIZE_MAX;
+  size_t srvRangeIndex = SIZE_MAX;
+  size_t samplerRangeIndex = SIZE_MAX;
+  size_t uavRangeIndex = SIZE_MAX;
+
+  // CBV descriptor table (only if shader uses CBVs)
+  if (!key.usedCBVSlots.empty()) {
+    cbvRangeIndex = descriptorRanges.size();
+
+    // D3D12 descriptor tables must start at register 0
+    // Calculate range from 0 to max slot (includes unused slots)
+    UINT maxCBVSlot = key.maxCBVSlot;
+    UINT numCBVs = maxCBVSlot + 1;
+
+    D3D12_DESCRIPTOR_RANGE cbvRange = {};
+    cbvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
+    cbvRange.NumDescriptors = numCBVs;
+    cbvRange.BaseShaderRegister = 0;
+    cbvRange.RegisterSpace = 0;
+    cbvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+    descriptorRanges.push_back(cbvRange);
+  }
+
+  // SRV descriptor table (only if shader uses SRVs)
+  if (!key.usedSRVSlots.empty()) {
+    srvRangeIndex = descriptorRanges.size();
+
+    // D3D12 descriptor tables must start at register 0
+    // Calculate range from 0 to max slot (includes unused slots)
+    UINT maxSRVSlot = key.maxSRVSlot;
+    UINT numSRVs = maxSRVSlot + 1;
+
+    D3D12_DESCRIPTOR_RANGE srvRange = {};
+    srvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
+    srvRange.NumDescriptors = numSRVs;
+    srvRange.BaseShaderRegister = 0;
+    srvRange.RegisterSpace = 0;
+    srvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+    descriptorRanges.push_back(srvRange);
+  }
+
+  // Sampler descriptor table (only if shader uses samplers)
+  if (!key.usedSamplerSlots.empty()) {
+    samplerRangeIndex = descriptorRanges.size();
+
+    // D3D12 descriptor tables must start at register 0
+    // Calculate range from 0 to max slot (includes unused slots)
+    UINT maxSamplerSlot = key.maxSamplerSlot;
+    UINT numSamplers = maxSamplerSlot + 1;
+
+    D3D12_DESCRIPTOR_RANGE samplerRange = {};
+    samplerRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
+    samplerRange.NumDescriptors = numSamplers;
+    samplerRange.BaseShaderRegister = 0;
+    samplerRange.RegisterSpace = 0;
+    samplerRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+    descriptorRanges.push_back(samplerRange);
+  }
+
+  // UAV descriptor table (only if shader uses UAVs)
+  if (!key.usedUAVSlots.empty()) {
+    uavRangeIndex = descriptorRanges.size();
+
+    // D3D12 descriptor tables must start at register 0
+    // Calculate range from 0 to max slot (includes unused slots)
+    UINT maxUAVSlot = key.maxUAVSlot;
+    UINT numUAVs = maxUAVSlot + 1;
+
+    D3D12_DESCRIPTOR_RANGE uavRange = {};
+    uavRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
+    uavRange.NumDescriptors = numUAVs;
+    uavRange.BaseShaderRegister = 0;
+    uavRange.RegisterSpace = 0;
+    uavRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+    descriptorRanges.push_back(uavRange);
+  }
+
+  // Build root parameters dynamically based on shader reflection (Vulkan approach)
+  // Only include what the shader actually declares - no hardcoded assumptions
+  // Order: Push constants, CBV table, SRV table, Sampler table, UAV table
+  std::vector<D3D12_ROOT_PARAMETER> rootParams;
+
+  // Track which root parameter index corresponds to each resource type
+  UINT pushConstantRootParamIndex = UINT_MAX;
+  UINT cbvTableRootParamIndex = UINT_MAX;
+  UINT srvTableRootParamIndex = UINT_MAX;
+  UINT samplerTableRootParamIndex = UINT_MAX;
+  UINT uavTableRootParamIndex = UINT_MAX;
+
+  // Add push constants if shader uses them (always first if present)
+  if (key.hasPushConstants) {
+    pushConstantRootParamIndex = static_cast<UINT>(rootParams.size());
+
+    D3D12_ROOT_PARAMETER pushConstParam = {};
+    pushConstParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
+    pushConstParam.Constants.ShaderRegister = key.pushConstantSlot;
+    pushConstParam.Constants.RegisterSpace = 0;
+    pushConstParam.Constants.Num32BitValues = key.pushConstantSize;
+    pushConstParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+    rootParams.push_back(pushConstParam);
+  }
+
+  // Add CBV descriptor table if shader uses any CBV slots
+  if (!key.usedCBVSlots.empty() && cbvRangeIndex != SIZE_MAX) {
+    cbvTableRootParamIndex = static_cast<UINT>(rootParams.size());
+
+    D3D12_ROOT_PARAMETER cbvTableParam = {};
+    cbvTableParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+    cbvTableParam.DescriptorTable.NumDescriptorRanges = 1;
+    cbvTableParam.DescriptorTable.pDescriptorRanges = &descriptorRanges[cbvRangeIndex];
+    cbvTableParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+    rootParams.push_back(cbvTableParam);
+  }
+
+  // Add SRV descriptor table if shader uses any SRV slots
+  if (!key.usedSRVSlots.empty() && srvRangeIndex != SIZE_MAX) {
+    srvTableRootParamIndex = static_cast<UINT>(rootParams.size());
+
+    D3D12_ROOT_PARAMETER srvTableParam = {};
+    srvTableParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+    srvTableParam.DescriptorTable.NumDescriptorRanges = 1;
+    srvTableParam.DescriptorTable.pDescriptorRanges = &descriptorRanges[srvRangeIndex];
+    srvTableParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+    rootParams.push_back(srvTableParam);
+  }
+
+  // Add Sampler descriptor table if shader uses any sampler slots
+  if (!key.usedSamplerSlots.empty() && samplerRangeIndex != SIZE_MAX) {
+    samplerTableRootParamIndex = static_cast<UINT>(rootParams.size());
+
+    D3D12_ROOT_PARAMETER samplerTableParam = {};
+    samplerTableParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+    samplerTableParam.DescriptorTable.NumDescriptorRanges = 1;
+    samplerTableParam.DescriptorTable.pDescriptorRanges = &descriptorRanges[samplerRangeIndex];
+    samplerTableParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+    rootParams.push_back(samplerTableParam);
+  }
+
+  // Add UAV descriptor table if shader uses any UAV slots
+  if (!key.usedUAVSlots.empty() && uavRangeIndex != SIZE_MAX) {
+    uavTableRootParamIndex = static_cast<UINT>(rootParams.size());
+
+    D3D12_ROOT_PARAMETER uavTableParam = {};
+    uavTableParam.ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+    uavTableParam.DescriptorTable.NumDescriptorRanges = 1;
+    uavTableParam.DescriptorTable.pDescriptorRanges = &descriptorRanges[uavRangeIndex];
+    uavTableParam.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+    rootParams.push_back(uavTableParam);
+  }
+
+  // Create root signature desc
+  D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {};
+  rootSigDesc.NumParameters = static_cast<UINT>(rootParams.size());
+  rootSigDesc.pParameters = rootParams.data();
+  rootSigDesc.NumStaticSamplers = 0;
+  rootSigDesc.pStaticSamplers = nullptr;
+  rootSigDesc.Flags = key.flags;
+
+  IGL_D3D12_LOG_VERBOSE("  Root signature has %u parameters\n", rootSigDesc.NumParameters);
+
+  // Use existing caching infrastructure
+  return getOrCreateRootSignature(d3dDevice, rootSigDesc, outResult);
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12PipelineCache.h b/src/igl/d3d12/D3D12PipelineCache.h
new file mode 100644
index 0000000000..7888e620f3
--- /dev/null
+++ b/src/igl/d3d12/D3D12PipelineCache.h
@@ -0,0 +1,471 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+#include <vector>
+
+#include <igl/Common.h>
+#include <igl/RenderPipelineState.h>
+#include <igl/ComputePipelineState.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/ShaderModule.h>
+#include <igl/d3d12/VertexInputState.h>
+
+namespace igl::d3d12 {
+
+class D3D12PipelineCache {
+ public:
+  D3D12PipelineCache() = default;
+
+  void clear();
+
+ private:
+  size_t hashRootSignature(const D3D12_ROOT_SIGNATURE_DESC& desc) const;
+  ComPtr<ID3D12RootSignature> getOrCreateRootSignature(
+      ID3D12Device* d3dDevice,
+      const D3D12_ROOT_SIGNATURE_DESC& desc,
+      Result* IGL_NULLABLE outResult) const;
+
+  // Create root signature from reflection-based key (for dynamic binding)
+  ComPtr<ID3D12RootSignature> createRootSignatureFromKey(
+      ID3D12Device* d3dDevice,
+      const struct D3D12RootSignatureKey& key,
+      D3D12_RESOURCE_BINDING_TIER bindingTier,
+      Result* IGL_NULLABLE outResult) const;
+
+  size_t hashRenderPipelineDesc(const RenderPipelineDesc& desc) const;
+  size_t hashComputePipelineDesc(const ComputePipelineDesc& desc) const;
+
+  mutable std::unordered_map<size_t, ComPtr<ID3D12PipelineState>> graphicsPSOCache_;
+  mutable std::unordered_map<size_t, ComPtr<ID3D12PipelineState>> computePSOCache_;
+  mutable std::mutex psoCacheMutex_;
+  mutable size_t graphicsPSOCacheHits_ = 0;
+  mutable size_t graphicsPSOCacheMisses_ = 0;
+  mutable size_t computePSOCacheHits_ = 0;
+  mutable size_t computePSOCacheMisses_ = 0;
+
+  mutable std::unordered_map<size_t, ComPtr<ID3D12RootSignature>> rootSignatureCache_;
+  mutable std::mutex rootSignatureCacheMutex_;
+  mutable size_t rootSignatureCacheHits_ = 0;
+  mutable size_t rootSignatureCacheMisses_ = 0;
+
+  std::vector<uint8_t> mipmapVSBytecode_;
+  std::vector<uint8_t> mipmapPSBytecode_;
+  ComPtr<ID3D12RootSignature> mipmapRootSignature_;
+  bool mipmapShadersAvailable_ = false;
+
+  friend class Device;
+};
+
+inline void D3D12PipelineCache::clear() {
+  {
+    std::lock_guard<std::mutex> lock(psoCacheMutex_);
+    graphicsPSOCache_.clear();
+    computePSOCache_.clear();
+    graphicsPSOCacheHits_ = 0;
+    graphicsPSOCacheMisses_ = 0;
+    computePSOCacheHits_ = 0;
+    computePSOCacheMisses_ = 0;
+  }
+  {
+    std::lock_guard<std::mutex> lock(rootSignatureCacheMutex_);
+    rootSignatureCache_.clear();
+    rootSignatureCacheHits_ = 0;
+    rootSignatureCacheMisses_ = 0;
+  }
+  mipmapVSBytecode_.clear();
+  mipmapPSBytecode_.clear();
+  mipmapRootSignature_.Reset();
+  mipmapShadersAvailable_ = false;
+}
+
+inline size_t D3D12PipelineCache::hashRootSignature(
+    const D3D12_ROOT_SIGNATURE_DESC& desc) const {
+  size_t hash = 0;
+
+  hashCombine(hash, static_cast<size_t>(desc.Flags));
+  hashCombine(hash, static_cast<size_t>(desc.NumParameters));
+
+  for (UINT i = 0; i < desc.NumParameters; ++i) {
+    const auto& param = desc.pParameters[i];
+
+    hashCombine(hash, static_cast<size_t>(param.ParameterType));
+    hashCombine(hash, static_cast<size_t>(param.ShaderVisibility));
+
+    switch (param.ParameterType) {
+    case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: {
+      hashCombine(hash,
+                  static_cast<size_t>(param.DescriptorTable.NumDescriptorRanges));
+
+      for (UINT j = 0; j < param.DescriptorTable.NumDescriptorRanges; ++j) {
+        const auto& range = param.DescriptorTable.pDescriptorRanges[j];
+        hashCombine(hash, static_cast<size_t>(range.RangeType));
+        hashCombine(hash, static_cast<size_t>(range.NumDescriptors));
+        hashCombine(hash, static_cast<size_t>(range.BaseShaderRegister));
+        hashCombine(hash, static_cast<size_t>(range.RegisterSpace));
+        hashCombine(
+            hash,
+            static_cast<size_t>(range.OffsetInDescriptorsFromTableStart));
+      }
+      break;
+    }
+    case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: {
+      hashCombine(hash, static_cast<size_t>(param.Constants.ShaderRegister));
+      hashCombine(hash, static_cast<size_t>(param.Constants.RegisterSpace));
+      hashCombine(hash, static_cast<size_t>(param.Constants.Num32BitValues));
+      break;
+    }
+    case D3D12_ROOT_PARAMETER_TYPE_CBV:
+    case D3D12_ROOT_PARAMETER_TYPE_SRV:
+    case D3D12_ROOT_PARAMETER_TYPE_UAV: {
+      hashCombine(hash, static_cast<size_t>(param.Descriptor.ShaderRegister));
+      hashCombine(hash, static_cast<size_t>(param.Descriptor.RegisterSpace));
+      break;
+    }
+    }
+  }
+
+  hashCombine(hash, static_cast<size_t>(desc.NumStaticSamplers));
+  for (UINT i = 0; i < desc.NumStaticSamplers; ++i) {
+    const auto& sampler = desc.pStaticSamplers[i];
+    hashCombine(hash, static_cast<size_t>(sampler.Filter));
+    hashCombine(hash, static_cast<size_t>(sampler.AddressU));
+    hashCombine(hash, static_cast<size_t>(sampler.AddressV));
+    hashCombine(hash, static_cast<size_t>(sampler.AddressW));
+    hashCombine(hash, static_cast<size_t>(sampler.ComparisonFunc));
+    hashCombine(hash, static_cast<size_t>(sampler.ShaderRegister));
+    hashCombine(hash, static_cast<size_t>(sampler.RegisterSpace));
+    hashCombine(hash, static_cast<size_t>(sampler.ShaderVisibility));
+  }
+
+  return hash;
+}
+
+inline ComPtr<ID3D12RootSignature> D3D12PipelineCache::getOrCreateRootSignature(
+    ID3D12Device* d3dDevice,
+    const D3D12_ROOT_SIGNATURE_DESC& desc,
+    Result* IGL_NULLABLE outResult) const {
+  const size_t hash = hashRootSignature(desc);
+
+  {
+    std::lock_guard<std::mutex> lock(rootSignatureCacheMutex_);
+    auto it = rootSignatureCache_.find(hash);
+    if (it != rootSignatureCache_.end()) {
+      rootSignatureCacheHits_++;
+      IGL_D3D12_LOG_VERBOSE(
+          "  Root signature cache HIT (hash=0x%zx, hits=%zu, misses=%zu)\n",
+          hash,
+          rootSignatureCacheHits_,
+          rootSignatureCacheMisses_);
+      return it->second;
+    }
+  }
+
+  rootSignatureCacheMisses_++;
+  IGL_D3D12_LOG_VERBOSE(
+      "  Root signature cache MISS (hash=0x%zx, hits=%zu, misses=%zu)\n",
+      hash,
+      rootSignatureCacheHits_,
+      rootSignatureCacheMisses_);
+
+  if (!d3dDevice) {
+    Result::setResult(outResult,
+                      Result::Code::InvalidOperation,
+                      "D3D12 device is null");
+    return nullptr;
+  }
+
+  ComPtr<ID3DBlob> signature;
+  ComPtr<ID3DBlob> error;
+
+  // Query highest supported root signature version for this device.
+  D3D12_FEATURE_DATA_ROOT_SIGNATURE featureData{};
+  featureData.HighestVersion = D3D_ROOT_SIGNATURE_VERSION_1_1;
+  HRESULT featureHr = d3dDevice->CheckFeatureSupport(
+      D3D12_FEATURE_ROOT_SIGNATURE, &featureData, sizeof(featureData));
+  D3D_ROOT_SIGNATURE_VERSION highestVersion =
+      SUCCEEDED(featureHr) ? featureData.HighestVersion : D3D_ROOT_SIGNATURE_VERSION_1_0;
+
+  HRESULT hr = E_FAIL;
+
+  if (highestVersion >= D3D_ROOT_SIGNATURE_VERSION_1_1) {
+    // Use versioned root signature (1.1) when available and preserve NumDescriptors
+    // as-is (Tier 2/3 unbounded ranges are expressed via UINT_MAX).
+    std::vector<D3D12_ROOT_PARAMETER1> params1;
+    std::vector<std::vector<D3D12_DESCRIPTOR_RANGE1>> rangesPerParam;
+    params1.reserve(desc.NumParameters);
+    rangesPerParam.reserve(desc.NumParameters);
+
+    for (UINT i = 0; i < desc.NumParameters; ++i) {
+      const D3D12_ROOT_PARAMETER& srcParam = desc.pParameters[i];
+      D3D12_ROOT_PARAMETER1 dstParam{};
+      dstParam.ParameterType = srcParam.ParameterType;
+      dstParam.ShaderVisibility = srcParam.ShaderVisibility;
+
+      switch (srcParam.ParameterType) {
+      case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS:
+        dstParam.Constants.ShaderRegister = srcParam.Constants.ShaderRegister;
+        dstParam.Constants.RegisterSpace = srcParam.Constants.RegisterSpace;
+        dstParam.Constants.Num32BitValues = srcParam.Constants.Num32BitValues;
+        break;
+      case D3D12_ROOT_PARAMETER_TYPE_CBV:
+      case D3D12_ROOT_PARAMETER_TYPE_SRV:
+      case D3D12_ROOT_PARAMETER_TYPE_UAV:
+        dstParam.Descriptor.ShaderRegister = srcParam.Descriptor.ShaderRegister;
+        dstParam.Descriptor.RegisterSpace = srcParam.Descriptor.RegisterSpace;
+        dstParam.Descriptor.Flags = D3D12_ROOT_DESCRIPTOR_FLAG_NONE;
+        break;
+      case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: {
+        const UINT numRanges = srcParam.DescriptorTable.NumDescriptorRanges;
+        const D3D12_DESCRIPTOR_RANGE* srcRanges = srcParam.DescriptorTable.pDescriptorRanges;
+
+        if (numRanges > 0 && srcRanges) {
+          rangesPerParam.emplace_back();
+          auto& dstRanges = rangesPerParam.back();
+          dstRanges.resize(numRanges);
+
+      for (UINT j = 0; j < numRanges; ++j) {
+            const D3D12_DESCRIPTOR_RANGE& srcRange = srcRanges[j];
+            D3D12_DESCRIPTOR_RANGE1 dstRange{};
+            dstRange.RangeType = srcRange.RangeType;
+            dstRange.NumDescriptors = srcRange.NumDescriptors;
+            dstRange.BaseShaderRegister = srcRange.BaseShaderRegister;
+            dstRange.RegisterSpace = srcRange.RegisterSpace;
+            // Mark descriptor ranges as DESCRIPTORS_VOLATILE to match the dynamic
+            // per-draw descriptor update pattern used by D3D12ResourcesBinder.
+            // This avoids D3D12 WARNING/ERROR ID=646, which requires all descriptors
+            // in STATIC ranges to be initialized before binding the table.
+            dstRange.Flags = D3D12_DESCRIPTOR_RANGE_FLAG_DESCRIPTORS_VOLATILE;
+            dstRange.OffsetInDescriptorsFromTableStart =
+                srcRange.OffsetInDescriptorsFromTableStart;
+            dstRanges[j] = dstRange;
+          }
+
+          dstParam.DescriptorTable.NumDescriptorRanges = numRanges;
+          dstParam.DescriptorTable.pDescriptorRanges = dstRanges.data();
+        } else {
+          dstParam.DescriptorTable.NumDescriptorRanges = 0;
+          dstParam.DescriptorTable.pDescriptorRanges = nullptr;
+        }
+        break;
+      }
+      }
+
+      params1.push_back(dstParam);
+    }
+
+    D3D12_ROOT_SIGNATURE_DESC1 desc1{};
+    desc1.NumParameters = static_cast<UINT>(params1.size());
+    desc1.pParameters = params1.data();
+    desc1.NumStaticSamplers = desc.NumStaticSamplers;
+    desc1.pStaticSamplers = desc.pStaticSamplers;
+    desc1.Flags = desc.Flags;
+
+    D3D12_VERSIONED_ROOT_SIGNATURE_DESC versionedDesc{};
+    versionedDesc.Version = D3D_ROOT_SIGNATURE_VERSION_1_1;
+    versionedDesc.Desc_1_1 = desc1;
+
+    IGL_D3D12_LOG_VERBOSE("  Serializing root signature (version 1.1)...\n");
+    hr = D3D12SerializeVersionedRootSignature(
+        &versionedDesc, signature.GetAddressOf(), error.GetAddressOf());
+  } else {
+    // Device only supports Root Signature 1.0. Clamp any unbounded descriptor ranges
+    // (NumDescriptors == UINT_MAX) to a large but finite conservative value so that
+    // the serialized root signature is portable across RS 1.0 implementations.
+    constexpr UINT kMaxDescriptorsFallback = 16384; // Sufficient for current heap sizes.
+
+    std::vector<D3D12_ROOT_PARAMETER> params;
+    std::vector<std::vector<D3D12_DESCRIPTOR_RANGE>> rangesPerParam;
+    params.reserve(desc.NumParameters);
+    rangesPerParam.reserve(desc.NumParameters);
+
+    for (UINT i = 0; i < desc.NumParameters; ++i) {
+      const D3D12_ROOT_PARAMETER& srcParam = desc.pParameters[i];
+      D3D12_ROOT_PARAMETER dstParam = srcParam;
+
+      if (srcParam.ParameterType == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE &&
+          srcParam.DescriptorTable.NumDescriptorRanges > 0 &&
+          srcParam.DescriptorTable.pDescriptorRanges) {
+        const UINT numRanges = srcParam.DescriptorTable.NumDescriptorRanges;
+        const D3D12_DESCRIPTOR_RANGE* srcRanges = srcParam.DescriptorTable.pDescriptorRanges;
+
+        rangesPerParam.emplace_back();
+        auto& dstRanges = rangesPerParam.back();
+        dstRanges.resize(numRanges);
+
+        for (UINT j = 0; j < numRanges; ++j) {
+          dstRanges[j] = srcRanges[j];
+          if (dstRanges[j].NumDescriptors == UINT_MAX) {
+            dstRanges[j].NumDescriptors = kMaxDescriptorsFallback;
+          }
+        }
+
+        dstParam.DescriptorTable.NumDescriptorRanges = numRanges;
+        dstParam.DescriptorTable.pDescriptorRanges = dstRanges.data();
+      }
+
+      params.push_back(dstParam);
+    }
+
+    D3D12_ROOT_SIGNATURE_DESC adjustedDesc{};
+    adjustedDesc.NumParameters = static_cast<UINT>(params.size());
+    adjustedDesc.pParameters = params.data();
+    adjustedDesc.NumStaticSamplers = desc.NumStaticSamplers;
+    adjustedDesc.pStaticSamplers = desc.pStaticSamplers;
+    adjustedDesc.Flags = desc.Flags;
+
+    IGL_D3D12_LOG_VERBOSE("  Serializing root signature (version 1.0, bounded ranges)...\n");
+    hr = D3D12SerializeRootSignature(
+        &adjustedDesc,
+        D3D_ROOT_SIGNATURE_VERSION_1,
+        signature.GetAddressOf(),
+        error.GetAddressOf());
+  }
+
+  if (FAILED(hr)) {
+    if (error.Get()) {
+      const char* errorMsg =
+          static_cast<const char*>(error->GetBufferPointer());
+      IGL_LOG_ERROR("Root signature serialization error: %s\n", errorMsg);
+    }
+    Result::setResult(outResult,
+                      Result::Code::RuntimeError,
+                      "Failed to serialize root signature");
+    return nullptr;
+  }
+
+  ComPtr<ID3D12RootSignature> rootSignature;
+  hr = d3dDevice->CreateRootSignature(0,
+                                      signature->GetBufferPointer(),
+                                      signature->GetBufferSize(),
+                                      IID_PPV_ARGS(rootSignature.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR(
+        "  CreateRootSignature FAILED: 0x%08X\n",
+        static_cast<unsigned>(hr));
+    Result::setResult(outResult,
+                      Result::Code::RuntimeError,
+                      "Failed to create root signature");
+    return nullptr;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("  Root signature created successfully\n");
+
+  {
+    std::lock_guard<std::mutex> lock(rootSignatureCacheMutex_);
+    rootSignatureCache_[hash] = rootSignature;
+  }
+
+  return rootSignature;
+}
+
+inline size_t D3D12PipelineCache::hashRenderPipelineDesc(
+    const RenderPipelineDesc& desc) const {
+  size_t hash = 0;
+
+  if (desc.shaderStages) {
+    auto* vertexModule =
+        static_cast<const ShaderModule*>(desc.shaderStages->getVertexModule().get());
+    auto* fragmentModule =
+        static_cast<const ShaderModule*>(desc.shaderStages->getFragmentModule().get());
+
+    if (vertexModule) {
+      const auto& vsBytecode = vertexModule->getBytecode();
+      hashCombine(hash, vsBytecode.size());
+      size_t bytesToHash = std::min<size_t>(256, vsBytecode.size());
+      for (size_t i = 0; i < bytesToHash; i += 8) {
+        hashCombine(hash, static_cast<size_t>(vsBytecode[i]));
+      }
+    }
+
+    if (fragmentModule) {
+      const auto& psBytecode = fragmentModule->getBytecode();
+      hashCombine(hash, psBytecode.size());
+      size_t bytesToHash = std::min<size_t>(256, psBytecode.size());
+      for (size_t i = 0; i < bytesToHash; i += 8) {
+        hashCombine(hash, static_cast<size_t>(psBytecode[i]));
+      }
+    }
+  }
+
+  if (desc.vertexInputState) {
+    auto* d3d12VertexInput =
+        static_cast<const VertexInputState*>(desc.vertexInputState.get());
+    const auto& vertexDesc = d3d12VertexInput->getDesc();
+    hashCombine(hash, vertexDesc.numAttributes);
+    for (size_t i = 0; i < vertexDesc.numAttributes; ++i) {
+      hashCombine(hash,
+                  static_cast<size_t>(vertexDesc.attributes[i].format));
+      hashCombine(hash, vertexDesc.attributes[i].offset);
+      hashCombine(hash, vertexDesc.attributes[i].bufferIndex);
+      hashCombine(
+          hash,
+          std::hash<std::string>{}(vertexDesc.attributes[i].name));
+    }
+  }
+
+  hashCombine(hash, desc.targetDesc.colorAttachments.size());
+  for (const auto& att : desc.targetDesc.colorAttachments) {
+    hashCombine(hash, static_cast<size_t>(att.textureFormat));
+  }
+  hashCombine(hash,
+              static_cast<size_t>(desc.targetDesc.depthAttachmentFormat));
+  hashCombine(hash,
+              static_cast<size_t>(desc.targetDesc.stencilAttachmentFormat));
+
+  for (const auto& att : desc.targetDesc.colorAttachments) {
+    hashCombine(hash, att.blendEnabled ? 1 : 0);
+    hashCombine(hash, static_cast<size_t>(att.srcRGBBlendFactor));
+    hashCombine(hash, static_cast<size_t>(att.dstRGBBlendFactor));
+    hashCombine(hash, static_cast<size_t>(att.rgbBlendOp));
+    hashCombine(hash, static_cast<size_t>(att.srcAlphaBlendFactor));
+    hashCombine(hash, static_cast<size_t>(att.dstAlphaBlendFactor));
+    hashCombine(hash, static_cast<size_t>(att.alphaBlendOp));
+    hashCombine(hash, static_cast<size_t>(att.colorWriteMask));
+  }
+
+  hashCombine(hash, static_cast<size_t>(desc.cullMode));
+  hashCombine(hash, static_cast<size_t>(desc.frontFaceWinding));
+  hashCombine(hash, static_cast<size_t>(desc.polygonFillMode));
+
+  hashCombine(hash, static_cast<size_t>(desc.topology));
+
+  hashCombine(hash, desc.sampleCount);
+
+  return hash;
+}
+
+inline size_t D3D12PipelineCache::hashComputePipelineDesc(
+    const ComputePipelineDesc& desc) const {
+  size_t hash = 0;
+
+  if (desc.shaderStages) {
+    auto* computeModule =
+        static_cast<const ShaderModule*>(desc.shaderStages->getComputeModule().get());
+
+    if (computeModule) {
+      const auto& csBytecode = computeModule->getBytecode();
+      hashCombine(hash, csBytecode.size());
+      size_t bytesToHash = std::min<size_t>(256, csBytecode.size());
+      for (size_t i = 0; i < bytesToHash; i += 8) {
+        hashCombine(hash, static_cast<size_t>(csBytecode[i]));
+      }
+    }
+  }
+
+  for (char c : desc.debugName) {
+    hashCombine(hash, static_cast<size_t>(c));
+  }
+
+  return hash;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12PresentManager.cpp b/src/igl/d3d12/D3D12PresentManager.cpp
new file mode 100644
index 0000000000..638bff867f
--- /dev/null
+++ b/src/igl/d3d12/D3D12PresentManager.cpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12PresentManager.h>
+#include <igl/d3d12/D3D12Context.h>
+#include <d3d12sdklayers.h>
+#include <vector>
+
+namespace igl::d3d12 {
+
+bool PresentManager::present() {
+  auto* swapChain = context_.getSwapChain();
+  if (!swapChain) {
+    return true; // No swapchain, nothing to present
+  }
+
+  auto* device = context_.getDevice();
+
+  // Check device status before presenting
+  if (!checkDeviceStatus("before Present")) {
+    return false;
+  }
+
+  // Configure VSync via environment variable
+  UINT syncInterval = 1;
+  UINT presentFlags = 0;
+  {
+    char buf[8] = {};
+    if (GetEnvironmentVariableA("IGL_D3D12_VSYNC", buf, sizeof(buf)) > 0) {
+      if (buf[0] == '0') {
+        syncInterval = 0;
+        if (context_.isTearingSupported()) {
+          presentFlags |= DXGI_PRESENT_ALLOW_TEARING;
+        }
+      }
+    }
+  }
+
+  // Present
+  HRESULT presentHr = swapChain->Present(syncInterval, presentFlags);
+  if (FAILED(presentHr)) {
+    IGL_LOG_ERROR("PresentManager: Present failed: 0x%08X\n", static_cast<unsigned>(presentHr));
+
+    // Check if device was removed during Present
+    HRESULT deviceStatus = device->GetDeviceRemovedReason();
+    if (FAILED(deviceStatus)) {
+      IGL_LOG_ERROR("PresentManager: DEVICE REMOVED during Present! Reason: 0x%08X\n",
+                    static_cast<unsigned>(deviceStatus));
+#ifdef IGL_DEBUG
+      logInfoQueueMessages(device);
+      logDredInfo(device);
+#endif
+      IGL_DEBUG_ASSERT(false);
+    } else {
+      IGL_LOG_ERROR("PresentManager: Present failed but device reports OK; check swapchain/window state\n");
+    }
+    // Present failed - return false regardless of whether device was removed
+    return false;
+  }
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("PresentManager: Present OK\n");
+#endif
+
+  // Check device status after Present
+  if (!checkDeviceStatus("after Present")) {
+    return false;
+  }
+
+  return true;
+}
+
+bool PresentManager::checkDeviceStatus(const char* contextStr) {
+  auto* device = context_.getDevice();
+  HRESULT deviceStatus = device->GetDeviceRemovedReason();
+
+  if (FAILED(deviceStatus)) {
+    IGL_LOG_ERROR("PresentManager: DEVICE REMOVED %s! Reason: 0x%08X\n",
+                  contextStr, static_cast<unsigned>(deviceStatus));
+#ifdef IGL_DEBUG
+    logInfoQueueMessages(device);
+    logDredInfo(device);
+#endif
+    IGL_DEBUG_ASSERT(false);
+    return false;
+  }
+
+  return true;
+}
+
+#ifdef IGL_DEBUG
+void PresentManager::logInfoQueueMessages(ID3D12Device* device) {
+  igl::d3d12::ComPtr<ID3D12InfoQueue> infoQueue;
+  if (FAILED(device->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) {
+    return;
+  }
+
+  UINT64 numMessages = infoQueue->GetNumStoredMessages();
+  IGL_D3D12_LOG_VERBOSE("D3D12 Info Queue has %llu messages:\n", numMessages);
+  for (UINT64 i = 0; i < numMessages; ++i) {
+    SIZE_T messageLength = 0;
+    infoQueue->GetMessage(i, nullptr, &messageLength);
+    if (messageLength == 0) {
+      continue;
+    }
+    // Use RAII vector instead of malloc/free
+    std::vector<uint8_t> messageBuffer(messageLength);
+    auto* message = reinterpret_cast<D3D12_MESSAGE*>(messageBuffer.data());
+    if (SUCCEEDED(infoQueue->GetMessage(i, message, &messageLength))) {
+      const char* severityStr = "UNKNOWN";
+      switch (message->Severity) {
+        case D3D12_MESSAGE_SEVERITY_CORRUPTION: severityStr = "CORRUPTION"; break;
+        case D3D12_MESSAGE_SEVERITY_ERROR: severityStr = "ERROR"; break;
+        case D3D12_MESSAGE_SEVERITY_WARNING: severityStr = "WARNING"; break;
+        case D3D12_MESSAGE_SEVERITY_INFO: severityStr = "INFO"; break;
+        case D3D12_MESSAGE_SEVERITY_MESSAGE: severityStr = "MESSAGE"; break;
+      }
+      IGL_D3D12_LOG_VERBOSE("  [%s] %s\n", severityStr, message->pDescription);
+    }
+    // messageBuffer automatically freed at end of scope
+  }
+}
+
+void PresentManager::logDredInfo(ID3D12Device* device) {
+#if defined(__ID3D12DeviceRemovedExtendedData1_INTERFACE_DEFINED__)
+  igl::d3d12::ComPtr<ID3D12DeviceRemovedExtendedData1> dred;
+  if (FAILED(device->QueryInterface(IID_PPV_ARGS(dred.GetAddressOf())))) {
+    IGL_D3D12_LOG_VERBOSE("DRED: ID3D12DeviceRemovedExtendedData1 not available.\n");
+    return;
+  }
+
+  D3D12_DRED_AUTO_BREADCRUMBS_OUTPUT1 breadcrumbs = {};
+  if (SUCCEEDED(dred->GetAutoBreadcrumbsOutput1(&breadcrumbs)) && breadcrumbs.pHeadAutoBreadcrumbNode) {
+    IGL_LOG_ERROR("DRED AutoBreadcrumbs (most recent first):\n");
+    const D3D12_AUTO_BREADCRUMB_NODE1* node = breadcrumbs.pHeadAutoBreadcrumbNode;
+    uint32_t nodeIndex = 0;
+    constexpr uint32_t kMaxNodesToPrint = 16;
+    while (node && nodeIndex < kMaxNodesToPrint) {
+      const char* listName = node->pCommandListDebugNameA ? node->pCommandListDebugNameA : "<unnamed>";
+      const char* queueName = node->pCommandQueueDebugNameA ? node->pCommandQueueDebugNameA : "<unnamed>";
+      IGL_LOG_ERROR("  Node #%u: CommandList=%p (%s) CommandQueue=%p (%s) Breadcrumbs=%u completed=%u\n",
+                    nodeIndex,
+                    node->pCommandList,
+                    listName,
+                    node->pCommandQueue,
+                    queueName,
+                    node->BreadcrumbCount,
+                    node->pLastBreadcrumbValue ? *node->pLastBreadcrumbValue : 0);
+      if (node->pCommandHistory && node->BreadcrumbCount > 0) {
+        D3D12_AUTO_BREADCRUMB_OP lastOp = node->pCommandHistory[node->BreadcrumbCount - 1];
+        IGL_LOG_ERROR("    Last command: %d (history count=%u)\n", static_cast<int>(lastOp), node->BreadcrumbCount);
+      }
+      node = node->pNext;
+      ++nodeIndex;
+    }
+    if (node) {
+      IGL_LOG_ERROR("  ... additional breadcrumbs omitted ...\n");
+    }
+  } else {
+    IGL_D3D12_LOG_VERBOSE("DRED: No auto breadcrumbs captured.\n");
+  }
+
+  D3D12_DRED_PAGE_FAULT_OUTPUT1 pageFault = {};
+  if (SUCCEEDED(dred->GetPageFaultAllocationOutput1(&pageFault)) && pageFault.PageFaultVA != 0) {
+    IGL_LOG_ERROR("DRED PageFault: VA=0x%016llx\n", pageFault.PageFaultVA);
+    if (pageFault.pHeadExistingAllocationNode) {
+      const auto* alloc = pageFault.pHeadExistingAllocationNode;
+      IGL_LOG_ERROR("  Existing allocation: Object=%p Name=%s Type=%u\n",
+                    alloc->pObject,
+                    alloc->ObjectNameA ? alloc->ObjectNameA : "<unnamed>",
+                    static_cast<unsigned>(alloc->AllocationType));
+    }
+    if (pageFault.pHeadRecentFreedAllocationNode) {
+      const auto* freed = pageFault.pHeadRecentFreedAllocationNode;
+      IGL_LOG_ERROR("  Recently freed allocation: Object=%p Name=%s Type=%u\n",
+                    freed->pObject,
+                    freed->ObjectNameA ? freed->ObjectNameA : "<unnamed>",
+                    static_cast<unsigned>(freed->AllocationType));
+    }
+  } else {
+    IGL_D3D12_LOG_VERBOSE("DRED: No page fault data available.\n");
+  }
+#else
+  (void)device;
+  IGL_D3D12_LOG_VERBOSE("DRED: Extended data interfaces not available on this SDK.\n");
+#endif
+}
+#endif // IGL_DEBUG
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12PresentManager.h b/src/igl/d3d12/D3D12PresentManager.h
new file mode 100644
index 0000000000..5ea0c86c30
--- /dev/null
+++ b/src/igl/d3d12/D3D12PresentManager.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+class D3D12Context;
+
+/**
+ * @brief Manages swapchain presentation with device removal detection
+ *
+ * Handles:
+ * - VSync configuration via environment variable
+ * - Present flags (tearing support)
+ * - Device removal detection before/after Present
+ * - DRED and Info Queue diagnostics on failure
+ */
+class PresentManager final {
+ public:
+  explicit PresentManager(D3D12Context& context) : context_(context) {}
+
+  /**
+   * @brief Present the current frame with proper error handling
+   *
+   * Checks device status before and after Present, logs diagnostics on failure.
+   * Does not throw - sets device lost flag for application to check.
+   *
+   * @return true if present succeeded, false if device was removed or present failed
+   */
+  bool present();
+
+ private:
+  /**
+   * @brief Check device status and log diagnostics if removed
+   */
+  bool checkDeviceStatus(const char* context);
+
+#ifdef IGL_DEBUG
+  /**
+   * @brief Log Info Queue messages for debugging (debug builds only)
+   */
+  void logInfoQueueMessages(ID3D12Device* device);
+
+  /**
+   * @brief Log DRED breadcrumbs and page fault info (debug builds only)
+   */
+  void logDredInfo(ID3D12Device* device);
+#endif
+
+  D3D12Context& context_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12ReflectionUtils.cpp b/src/igl/d3d12/D3D12ReflectionUtils.cpp
new file mode 100644
index 0000000000..bb4554252d
--- /dev/null
+++ b/src/igl/d3d12/D3D12ReflectionUtils.cpp
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12ReflectionUtils.h>
+
+namespace igl::d3d12::ReflectionUtils {
+
+igl::UniformType mapUniformType(const D3D12_SHADER_TYPE_DESC& td) {
+  if ((td.Class == D3D_SVC_MATRIX_ROWS || td.Class == D3D_SVC_MATRIX_COLUMNS) &&
+      td.Rows == 4 && td.Columns == 4) {
+    return igl::UniformType::Mat4x4;
+  }
+  if (td.Type == D3D_SVT_FLOAT) {
+    if (td.Class == D3D_SVC_SCALAR) return igl::UniformType::Float;
+    if (td.Class == D3D_SVC_VECTOR) {
+      switch (td.Columns) {
+        case 2: return igl::UniformType::Float2;
+        case 3: return igl::UniformType::Float3;
+        case 4: return igl::UniformType::Float4;
+        default: return igl::UniformType::Invalid;
+      }
+    }
+  }
+  return igl::UniformType::Invalid;
+}
+
+} // namespace igl::d3d12::ReflectionUtils
diff --git a/src/igl/d3d12/D3D12ReflectionUtils.h b/src/igl/d3d12/D3D12ReflectionUtils.h
new file mode 100644
index 0000000000..ffc8061044
--- /dev/null
+++ b/src/igl/d3d12/D3D12ReflectionUtils.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/Uniform.h>
+#include <igl/d3d12/D3D12Headers.h>
+
+namespace igl::d3d12::ReflectionUtils {
+
+/**
+ * Maps D3D12 shader type descriptor to IGL uniform type
+ *
+ * Supported types:
+ * - float (D3D_SVT_FLOAT + D3D_SVC_SCALAR) → UniformType::Float
+ * - float2/3/4 (D3D_SVT_FLOAT + D3D_SVC_VECTOR) → UniformType::Float2/3/4
+ * - float4x4 (D3D_SVC_MATRIX_ROWS/COLUMNS, 4x4) → UniformType::Mat4x4
+ *
+ * All other types (int, uint, bool, matrices other than 4x4, etc.) map to UniformType::Invalid
+ *
+ * @param td D3D12 shader type descriptor from reflection
+ * @return Corresponding IGL UniformType, or UniformType::Invalid for unsupported types
+ */
+igl::UniformType mapUniformType(const D3D12_SHADER_TYPE_DESC& td);
+
+} // namespace igl::d3d12::ReflectionUtils
diff --git a/src/igl/d3d12/D3D12ResourcesBinder.cpp b/src/igl/d3d12/D3D12ResourcesBinder.cpp
new file mode 100644
index 0000000000..6476d0b9e3
--- /dev/null
+++ b/src/igl/d3d12/D3D12ResourcesBinder.cpp
@@ -0,0 +1,929 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12ResourcesBinder.h>
+
+#include <igl/d3d12/Buffer.h>
+#include <igl/d3d12/CommandBuffer.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12Context.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/RenderPipelineState.h>
+#include <igl/d3d12/SamplerState.h>
+#include <igl/d3d12/Texture.h>
+
+namespace igl::d3d12 {
+
+namespace {
+// D3D12 alignment requirement for constant buffer views
+constexpr size_t kConstantBufferAlignment = 256;
+constexpr size_t kMaxCBVSize = 65536; // 64 KB (D3D12 spec limit)
+
+// Compute pipeline hardcoded root parameter layout
+// Note: Graphics pipelines use pure reflection-based layout queried from RenderPipelineState
+// Compute pipelines still use this hardcoded layout (should be migrated to reflection)
+constexpr uint32_t kComputeRootParam_PushConstants = 0;
+constexpr uint32_t kComputeRootParam_UAVTable = 1;
+constexpr uint32_t kComputeRootParam_SRVTable = 2;
+constexpr uint32_t kComputeRootParam_CBVTable = 3;
+constexpr uint32_t kComputeRootParam_SamplerTable = 4;
+
+} // namespace
+
+D3D12ResourcesBinder::D3D12ResourcesBinder(CommandBuffer& commandBuffer, bool isCompute)
+    : commandBuffer_(commandBuffer), isCompute_(isCompute) {}
+
+void D3D12ResourcesBinder::bindTexture(uint32_t index, ITexture* texture) {
+  if (index >= IGL_TEXTURE_SAMPLERS_MAX) {
+    IGL_LOG_ERROR("D3D12ResourcesBinder::bindTexture: index %u exceeds maximum %u\n",
+                  index,
+                  IGL_TEXTURE_SAMPLERS_MAX);
+    return;
+  }
+
+  if (!texture) {
+    // Unbind texture at this slot
+    if (index < bindingsTextures_.count) {
+      bindingsTextures_.textures[index] = nullptr;
+      bindingsTextures_.handles[index] = {};
+      // Update count to highest bound slot + 1
+      while (bindingsTextures_.count > 0 && bindingsTextures_.textures[bindingsTextures_.count - 1] == nullptr) {
+        bindingsTextures_.count--;
+      }
+    }
+    dirtyFlags_ |= DirtyFlagBits_Textures;
+    return;
+  }
+
+  auto* d3dTexture = static_cast<Texture*>(texture);
+  ID3D12Resource* resource = d3dTexture->getResource();
+
+  if (!resource) {
+    IGL_LOG_ERROR("D3D12ResourcesBinder::bindTexture: texture resource is null\n");
+    return;
+  }
+
+  // Transition texture to shader resource state
+  // Note: This must happen immediately, not deferred until updateBindings()
+  // Use pipeline-specific states for optimal barrier tracking:
+  // - Graphics: PIXEL_SHADER_RESOURCE (pixel shader read)
+  // - Compute: NON_PIXEL_SHADER_RESOURCE (compute/vertex/geometry shader read)
+  auto* commandList = commandBuffer_.getCommandList();
+  const auto targetState = isCompute_ ? D3D12_RESOURCE_STATE_NON_PIXEL_SHADER_RESOURCE
+                                      : D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
+  d3dTexture->transitionAll(commandList, targetState);
+
+  // Store texture pointer for descriptor creation in updateBindings()
+  bindingsTextures_.textures[index] = texture;
+
+  // Mark textures dirty - descriptor will be created in updateBindings()
+  dirtyFlags_ |= DirtyFlagBits_Textures;
+
+  // Update binding count
+  if (index >= bindingsTextures_.count) {
+    bindingsTextures_.count = index + 1;
+  }
+}
+
+void D3D12ResourcesBinder::bindSamplerState(uint32_t index, ISamplerState* samplerState) {
+  if (index >= IGL_TEXTURE_SAMPLERS_MAX) {
+    IGL_LOG_ERROR("D3D12ResourcesBinder::bindSamplerState: index %u exceeds maximum %u\n",
+                  index,
+                  IGL_TEXTURE_SAMPLERS_MAX);
+    return;
+  }
+
+  if (!samplerState) {
+    // Unbind sampler at this slot
+    if (index < bindingsSamplers_.count) {
+      bindingsSamplers_.samplers[index] = nullptr;
+      bindingsSamplers_.handles[index] = {};
+      // Update count to highest bound slot + 1
+      while (bindingsSamplers_.count > 0 && bindingsSamplers_.samplers[bindingsSamplers_.count - 1] == nullptr) {
+        bindingsSamplers_.count--;
+      }
+    }
+    dirtyFlags_ |= DirtyFlagBits_Samplers;
+    return;
+  }
+
+  // Store sampler pointer for descriptor creation in updateBindings()
+  bindingsSamplers_.samplers[index] = samplerState;
+
+  // Mark samplers dirty - descriptor will be created in updateBindings()
+  dirtyFlags_ |= DirtyFlagBits_Samplers;
+
+  // Update binding count
+  if (index >= bindingsSamplers_.count) {
+    bindingsSamplers_.count = index + 1;
+  }
+}
+
+void D3D12ResourcesBinder::bindBuffer(uint32_t index,
+                                      IBuffer* buffer,
+                                      size_t offset,
+                                      size_t size,
+                                      bool isUAV,
+                                      size_t elementStride) {
+  if (index >= IGL_BUFFER_BINDINGS_MAX) {
+    IGL_LOG_ERROR("D3D12ResourcesBinder::bindBuffer: index %u exceeds maximum %u\n",
+                  index,
+                  IGL_BUFFER_BINDINGS_MAX);
+    return;
+  }
+
+  if (!buffer) {
+    // Unbind buffer/UAV at this slot
+    if (isUAV) {
+      if (index < bindingsUAVs_.count) {
+        bindingsUAVs_.buffers[index] = nullptr;
+        bindingsUAVs_.offsets[index] = 0;
+        bindingsUAVs_.elementStrides[index] = 0;
+        bindingsUAVs_.handles[index] = {};
+        while (bindingsUAVs_.count > 0 && bindingsUAVs_.buffers[bindingsUAVs_.count - 1] == nullptr) {
+          bindingsUAVs_.count--;
+        }
+      }
+      dirtyFlags_ |= DirtyFlagBits_UAVs;
+    } else {
+      if (index < bindingsBuffers_.count) {
+        bindingsBuffers_.buffers[index] = nullptr;
+        bindingsBuffers_.addresses[index] = 0;
+        bindingsBuffers_.offsets[index] = 0;
+        bindingsBuffers_.sizes[index] = 0;
+        while (bindingsBuffers_.count > 0 && bindingsBuffers_.buffers[bindingsBuffers_.count - 1] == nullptr) {
+          bindingsBuffers_.count--;
+        }
+      }
+      dirtyFlags_ |= DirtyFlagBits_Buffers;
+    }
+    return;
+  }
+
+  auto* d3dBuffer = static_cast<Buffer*>(buffer);
+  ID3D12Resource* resource = d3dBuffer->getResource();
+
+  if (!resource) {
+    IGL_LOG_ERROR("D3D12ResourcesBinder::bindBuffer: buffer resource is null\n");
+    return;
+  }
+
+  if (isUAV) {
+    // Storage buffer (UAV) - store buffer pointer, offset, and element stride for descriptor creation
+    if (elementStride == 0) {
+      IGL_LOG_ERROR("D3D12ResourcesBinder::bindBuffer: UAV binding requires non-zero elementStride\n");
+      return;
+    }
+    bindingsUAVs_.buffers[index] = buffer;
+    bindingsUAVs_.offsets[index] = offset;
+    bindingsUAVs_.elementStrides[index] = elementStride;
+    dirtyFlags_ |= DirtyFlagBits_UAVs;
+    if (index >= bindingsUAVs_.count) {
+      bindingsUAVs_.count = index + 1;
+    }
+  } else {
+    // Uniform buffer (CBV) - D3D12 requires 256-byte alignment for CBV addresses
+    // Compute base address (must be 256-byte aligned)
+    D3D12_GPU_VIRTUAL_ADDRESS baseAddress = resource->GetGPUVirtualAddress();
+    D3D12_GPU_VIRTUAL_ADDRESS alignedAddress = (baseAddress + offset) & ~(kConstantBufferAlignment - 1);
+
+    bindingsBuffers_.buffers[index] = buffer;
+    bindingsBuffers_.addresses[index] = alignedAddress;
+    bindingsBuffers_.offsets[index] = offset;
+    bindingsBuffers_.sizes[index] = size;
+    dirtyFlags_ |= DirtyFlagBits_Buffers;
+    if (index >= bindingsBuffers_.count) {
+      bindingsBuffers_.count = index + 1;
+    }
+  }
+}
+
+bool D3D12ResourcesBinder::updateBindings(const RenderPipelineState* renderPipeline, Result* outResult) {
+  auto* commandList = commandBuffer_.getCommandList();
+  auto& context = commandBuffer_.getContext();
+  auto* device = context.getDevice();
+
+  if (!commandList || !device) {
+    if (outResult) {
+      *outResult = Result{Result::Code::RuntimeError, "Invalid command list or device"};
+    }
+    return false;
+  }
+
+  bool success = true;
+
+  // Update textures (SRV table)
+  if (dirtyFlags_ & DirtyFlagBits_Textures) {
+    if (!updateTextureBindings(commandList, device, renderPipeline, outResult)) {
+      success = false;
+    }
+  }
+
+  // Update samplers (sampler table)
+  if (dirtyFlags_ & DirtyFlagBits_Samplers) {
+    if (!updateSamplerBindings(commandList, device, renderPipeline, outResult)) {
+      success = false;
+    }
+  }
+
+  // Update buffers (CBV table)
+  if (dirtyFlags_ & DirtyFlagBits_Buffers) {
+    if (!updateBufferBindings(commandList, device, renderPipeline, outResult)) {
+      success = false;
+    }
+  }
+
+  // Update UAVs (UAV table for compute)
+  if ((dirtyFlags_ & DirtyFlagBits_UAVs) && isCompute_) {
+    if (!updateUAVBindings(commandList, device, outResult)) {
+      success = false;
+    }
+  }
+
+  // Clear dirty flags
+  dirtyFlags_ = 0;
+
+  return success;
+}
+
+void D3D12ResourcesBinder::reset() {
+  bindingsTextures_ = {};
+  bindingsSamplers_ = {};
+  bindingsBuffers_ = {};
+  bindingsUAVs_ = {};
+  dirtyFlags_ = DirtyFlagBits_Textures | DirtyFlagBits_Samplers | DirtyFlagBits_Buffers |
+                DirtyFlagBits_UAVs;
+}
+
+bool D3D12ResourcesBinder::updateTextureBindings(ID3D12GraphicsCommandList* cmdList,
+                                                 ID3D12Device* device,
+                                                 const RenderPipelineState* renderPipeline,
+                                                 Result* outResult) {
+  if (bindingsTextures_.count == 0) {
+    return true; // Nothing to bind
+  }
+
+  auto& context = commandBuffer_.getContext();
+
+  // Determine how many descriptors to allocate based on pipeline's root signature
+  // For graphics: Use pipeline's declared SRV range (0 to maxSRVSlot inclusive)
+  // For compute: Use bindingsTextures_.count (legacy sparse allocation)
+  uint32_t descriptorRangeSize = bindingsTextures_.count;
+
+  if (!isCompute_ && renderPipeline) {
+    // Graphics pipeline: Match root signature's SRV descriptor range exactly
+    const UINT pipelineSRVCount = renderPipeline->getSRVDescriptorCount();
+    if (pipelineSRVCount > 0) {
+      descriptorRangeSize = pipelineSRVCount;
+      IGL_D3D12_LOG_VERBOSE("updateTextureBindings: Using pipeline SRV range size=%u (bound=%u)\n",
+                   descriptorRangeSize, bindingsTextures_.count);
+    }
+  }
+
+  // Allocate a contiguous range of descriptors for all textures on a single page
+  // This ensures we can bind them as a single descriptor table
+  uint32_t baseDescriptorIndex = 0;
+  Result allocResult =
+      commandBuffer_.allocateCbvSrvUavRange(descriptorRangeSize, &baseDescriptorIndex);
+  if (!allocResult.isOk()) {
+    IGL_LOG_ERROR(
+        "D3D12ResourcesBinder: Failed to allocate contiguous SRV range (%u descriptors): %s\n",
+        descriptorRangeSize,
+        allocResult.message.c_str());
+    if (outResult) {
+      *outResult = allocResult;
+    }
+    return false;
+  }
+
+  // Create SRV descriptors for all texture slots from 0 to descriptorRangeSize-1.
+  // For unbound slots, emit a null SRV so that the descriptor table is fully
+  // initialized and matches the root signature descriptor range exactly.
+  for (uint32_t i = 0; i < descriptorRangeSize; ++i) {
+    const uint32_t descriptorIndex = baseDescriptorIndex + i;
+    D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle =
+        context.getCbvSrvUavCpuHandle(descriptorIndex);
+    D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle =
+        context.getCbvSrvUavGpuHandle(descriptorIndex);
+
+    // Check if this slot is bound (may be null if beyond bindingsTextures_.count)
+    auto* texture = (i < bindingsTextures_.count) ? bindingsTextures_.textures[i] : nullptr;
+    if (!texture) {
+      // Create an explicit null SRV descriptor. D3D12 does not permit both
+      // the resource AND the descriptor pointer to be null, so we bind a
+      // well-formed descriptor with zeroed fields instead. This is treated as
+      // a null descriptor by the runtime.
+      D3D12_SHADER_RESOURCE_VIEW_DESC nullSrv = {};
+      nullSrv.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+      nullSrv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+      nullSrv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+      nullSrv.Texture2D.MipLevels = 1;
+      device->CreateShaderResourceView(nullptr, &nullSrv, cpuHandle);
+      D3D12Context::trackResourceCreation("SRV", 0);
+      // Only cache handle if within bounds (avoid out-of-bounds write)
+      if (i < IGL_TEXTURE_SAMPLERS_MAX) {
+        bindingsTextures_.handles[i] = gpuHandle;
+      }
+      continue;
+    }
+
+    auto* d3dTexture = static_cast<Texture*>(texture);
+    ID3D12Resource* resource = d3dTexture->getResource();
+    if (!resource) {
+      D3D12_SHADER_RESOURCE_VIEW_DESC nullSrv = {};
+      nullSrv.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+      nullSrv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+      nullSrv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+      nullSrv.Texture2D.MipLevels = 1;
+      device->CreateShaderResourceView(nullptr, &nullSrv, cpuHandle);
+      D3D12Context::trackResourceCreation("SRV", 0);
+      // Only cache handle if within bounds (avoid out-of-bounds write)
+      if (i < IGL_TEXTURE_SAMPLERS_MAX) {
+        bindingsTextures_.handles[i] = gpuHandle;
+      }
+      continue;
+    }
+
+    D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
+    srvDesc.Format = textureFormatToDXGIShaderResourceViewFormat(d3dTexture->getFormat());
+    srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+
+    auto resourceDesc = resource->GetDesc();
+    const bool isView = d3dTexture->isView();
+    const uint32_t mostDetailedMip = isView ? d3dTexture->getMipLevelOffset() : 0;
+    const uint32_t mipLevels =
+        isView ? d3dTexture->getNumMipLevelsInView() : d3dTexture->getNumMipLevels();
+
+    if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D) {
+      srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE3D;
+      srvDesc.Texture3D.MipLevels = mipLevels;
+      srvDesc.Texture3D.MostDetailedMip = mostDetailedMip;
+    } else if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D) {
+      const auto textureType = d3dTexture->getType();
+      const bool isArrayTexture =
+          (isView && d3dTexture->getNumArraySlicesInView() > 0) ||
+          (!isView && resourceDesc.DepthOrArraySize > 1);
+
+      // Prioritize cube textures so that cubemaps created as 2D arrays
+      // with 6 faces are exposed as TEXTURECUBE to shaders that declare
+      // TextureCube / samplerCube.
+      if (textureType == TextureType::Cube) {
+        srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURECUBE;
+        srvDesc.TextureCube.MostDetailedMip = mostDetailedMip;
+        srvDesc.TextureCube.MipLevels = mipLevels;
+      } else if (textureType == TextureType::TwoDArray || isArrayTexture) {
+        srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2DARRAY;
+        srvDesc.Texture2DArray.MostDetailedMip = mostDetailedMip;
+        srvDesc.Texture2DArray.MipLevels = mipLevels;
+        srvDesc.Texture2DArray.FirstArraySlice =
+            isView ? d3dTexture->getArraySliceOffset() : 0;
+        srvDesc.Texture2DArray.ArraySize =
+            isView ? d3dTexture->getNumArraySlicesInView()
+                   : resourceDesc.DepthOrArraySize;
+      } else {
+        srvDesc.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+        srvDesc.Texture2D.MostDetailedMip = mostDetailedMip;
+        srvDesc.Texture2D.MipLevels = mipLevels;
+      }
+    } else {
+      IGL_LOG_ERROR("D3D12ResourcesBinder: Unsupported texture dimension %d\n",
+                    resourceDesc.Dimension);
+      if (outResult) {
+        *outResult =
+            Result{Result::Code::Unsupported, "Unsupported texture dimension for SRV"};
+      }
+      return false;
+    }
+
+    device->CreateShaderResourceView(resource, &srvDesc, cpuHandle);
+    D3D12Context::trackResourceCreation("SRV", 0);
+
+    // Cache the GPU handle (only if within bounds)
+    if (i < IGL_TEXTURE_SAMPLERS_MAX) {
+      bindingsTextures_.handles[i] = gpuHandle;
+    }
+  }
+
+  // Bind the SRV table to the appropriate root parameter
+  // Use the first descriptor in the allocated range (baseDescriptorIndex)
+  D3D12_GPU_DESCRIPTOR_HANDLE tableBaseHandle = context.getCbvSrvUavGpuHandle(baseDescriptorIndex);
+
+  if (isCompute_) {
+    cmdList->SetComputeRootDescriptorTable(kComputeRootParam_SRVTable, tableBaseHandle);
+  } else {
+    // Graphics pipeline: Query reflection-based root parameter index from pipeline
+    if (!renderPipeline) {
+      IGL_LOG_ERROR("updateTextureBindings: renderPipeline is NULL, cannot bind SRV table\n");
+    } else {
+      const UINT srvTableIndex = renderPipeline->getSRVTableRootParameterIndex();
+      IGL_D3D12_LOG_VERBOSE("updateTextureBindings: srvTableIndex=%u (UINT_MAX=%u)\n", srvTableIndex, UINT_MAX);
+      if (srvTableIndex != UINT_MAX) {
+        cmdList->SetGraphicsRootDescriptorTable(srvTableIndex, tableBaseHandle);
+        IGL_D3D12_LOG_VERBOSE("updateTextureBindings: Bound SRV table to root param %u (range size %u)\n",
+                     srvTableIndex, descriptorRangeSize);
+      } else {
+        IGL_LOG_ERROR("updateTextureBindings: srvTableIndex is UINT_MAX, shader doesn't use SRVs?\n");
+      }
+    }
+  }
+
+  return true;
+}
+
+bool D3D12ResourcesBinder::updateSamplerBindings(ID3D12GraphicsCommandList* cmdList,
+                                                 ID3D12Device* device,
+                                                 const RenderPipelineState* renderPipeline,
+                                                 Result* outResult) {
+  if (bindingsSamplers_.count == 0) {
+    return true; // Nothing to bind
+  }
+
+  auto& context = commandBuffer_.getContext();
+
+  // Determine how many descriptors to allocate based on pipeline's root signature
+  // For graphics: Use pipeline's declared sampler range (0 to maxSamplerSlot inclusive)
+  // For compute: Use bindingsSamplers_.count (legacy behavior)
+  uint32_t descriptorRangeSize = bindingsSamplers_.count;
+
+  if (!isCompute_ && renderPipeline) {
+    // Graphics pipeline: Match root signature's sampler descriptor range exactly
+    const UINT pipelineSamplerCount = renderPipeline->getSamplerDescriptorCount();
+    if (pipelineSamplerCount > 0) {
+      descriptorRangeSize = pipelineSamplerCount;
+      IGL_D3D12_LOG_VERBOSE("updateSamplerBindings: Using pipeline sampler range size=%u (bound=%u)\n",
+                   descriptorRangeSize, bindingsSamplers_.count);
+    }
+  }
+
+  // Get base sampler descriptor index for contiguous allocation
+  uint32_t baseSamplerIndex = commandBuffer_.getNextSamplerDescriptor();
+
+  // Create sampler descriptors for all slots from 0 to descriptorRangeSize-1
+  // For unbound slots, create a default sampler to fill the table
+  for (uint32_t i = 0; i < descriptorRangeSize; ++i) {
+    const uint32_t descriptorIndex = baseSamplerIndex + i;
+
+    // Get descriptor handles
+    D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getSamplerCpuHandle(descriptorIndex);
+    D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getSamplerGpuHandle(descriptorIndex);
+
+    // Check if this slot is bound (may be null if beyond bindingsSamplers_.count)
+    auto* samplerState = (i < bindingsSamplers_.count) ? bindingsSamplers_.samplers[i] : nullptr;
+
+    // Create sampler descriptor
+    D3D12_SAMPLER_DESC samplerDesc = {};
+    if (samplerState) {
+      if (auto* d3dSampler = dynamic_cast<SamplerState*>(samplerState)) {
+        samplerDesc = d3dSampler->getDesc();
+      } else {
+        // Fallback for bound but invalid sampler
+        samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
+        samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
+        samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
+        samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_WRAP;
+        samplerDesc.MipLODBias = 0.0f;
+        samplerDesc.MaxAnisotropy = 1;
+        samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
+        samplerDesc.BorderColor[0] = 0.0f;
+        samplerDesc.BorderColor[1] = 0.0f;
+        samplerDesc.BorderColor[2] = 0.0f;
+        samplerDesc.BorderColor[3] = 0.0f;
+        samplerDesc.MinLOD = 0.0f;
+        samplerDesc.MaxLOD = D3D12_FLOAT32_MAX;
+      }
+    } else {
+      // Unbound slot: Create default sampler for unused descriptor table entries
+      samplerDesc.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
+      samplerDesc.AddressU = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
+      samplerDesc.AddressV = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
+      samplerDesc.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
+      samplerDesc.MipLODBias = 0.0f;
+      samplerDesc.MaxAnisotropy = 1;
+      samplerDesc.ComparisonFunc = D3D12_COMPARISON_FUNC_NEVER;
+      samplerDesc.BorderColor[0] = 0.0f;
+      samplerDesc.BorderColor[1] = 0.0f;
+      samplerDesc.BorderColor[2] = 0.0f;
+      samplerDesc.BorderColor[3] = 0.0f;
+      samplerDesc.MinLOD = 0.0f;
+      samplerDesc.MaxLOD = D3D12_FLOAT32_MAX;
+    }
+
+    device->CreateSampler(&samplerDesc, cpuHandle);
+    D3D12Context::trackResourceCreation("Sampler", 0);
+
+    // Cache the GPU handle (only if within bounds)
+    if (i < IGL_TEXTURE_SAMPLERS_MAX) {
+      bindingsSamplers_.handles[i] = gpuHandle;
+    }
+  }
+
+  // Update sampler descriptor counter to reserve the allocated range
+  commandBuffer_.getNextSamplerDescriptor() = baseSamplerIndex + descriptorRangeSize;
+
+  // Bind the sampler table to the appropriate root parameter
+  // Use the first descriptor in the allocated range
+  D3D12_GPU_DESCRIPTOR_HANDLE tableBaseHandle = context.getSamplerGpuHandle(baseSamplerIndex);
+  if (isCompute_) {
+    cmdList->SetComputeRootDescriptorTable(kComputeRootParam_SamplerTable, tableBaseHandle);
+  } else {
+    // Graphics pipeline: Query reflection-based root parameter index from pipeline
+    if (!renderPipeline) {
+      IGL_LOG_ERROR("updateSamplerBindings: renderPipeline is NULL, cannot bind sampler table\n");
+    } else {
+      const UINT samplerTableIndex = renderPipeline->getSamplerTableRootParameterIndex();
+      IGL_D3D12_LOG_VERBOSE("updateSamplerBindings: samplerTableIndex=%u (UINT_MAX=%u)\n", samplerTableIndex, UINT_MAX);
+      if (samplerTableIndex != UINT_MAX) {
+        cmdList->SetGraphicsRootDescriptorTable(samplerTableIndex, tableBaseHandle);
+        IGL_D3D12_LOG_VERBOSE("updateSamplerBindings: Bound sampler table to root param %u (range size %u)\n",
+                     samplerTableIndex, descriptorRangeSize);
+      } else {
+        IGL_LOG_ERROR("updateSamplerBindings: samplerTableIndex is UINT_MAX, shader doesn't use samplers?\n");
+      }
+    }
+  }
+
+  return true;
+}
+
+bool D3D12ResourcesBinder::updateBufferBindings(ID3D12GraphicsCommandList* cmdList,
+                                                ID3D12Device* device,
+                                                const RenderPipelineState* renderPipeline,
+                                                Result* outResult) {
+  if (bindingsBuffers_.count == 0) {
+    return true; // Nothing to bind
+  }
+
+  if (isCompute_) {
+    // Compute pipeline: all CBVs go through descriptor table (root parameter 3)
+    auto& context = commandBuffer_.getContext();
+
+    // Count bound CBVs and validate dense binding
+    uint32_t boundCbvCount = 0;
+    for (uint32_t i = 0; i < bindingsBuffers_.count; ++i) {
+      if (bindingsBuffers_.addresses[i] != 0) {
+        boundCbvCount++;
+      }
+    }
+
+    if (boundCbvCount == 0) {
+      return true; // No CBVs to bind
+    }
+
+    // CRITICAL VALIDATION: Enforce dense CBV binding for compute shaders
+    // =====================================================================
+    // D3D12 descriptor tables bind contiguously starting from the base register.
+    // For compute CBVs, this means:
+    //   - VALID:   binding slots 0, 1, 2 (dense from b0)
+    //   - INVALID: binding slots 0, 2 (gap at slot 1)
+    //   - INVALID: binding slots 1, 2 (slot 0 not bound)
+    //
+    // This is FATAL validation - sparse bindings will return InvalidOperation error.
+    // Application code must ensure CBVs are bound densely from index 0 with no gaps.
+    //
+    // Rationale: When we call SetComputeRootDescriptorTable with N descriptors at base b0,
+    // D3D12 expects HLSL registers b0, b1, ..., b(N-1) to map 1:1 with descriptor table
+    // entries. Gaps would cause shader register mismatches and undefined behavior.
+
+    if (bindingsBuffers_.addresses[0] == 0) {
+      IGL_LOG_ERROR("D3D12ResourcesBinder: Compute CBV bindings are sparse (slot 0 not bound). "
+                    "D3D12 requires dense bindings starting at index 0.\n");
+      if (outResult) {
+        *outResult = Result{Result::Code::InvalidOperation,
+                            "Compute CBV bindings must be dense starting at slot 0"};
+      }
+      return false;
+    }
+
+    // Verify no gaps in binding range (all slots from 0 to boundCbvCount-1 must be bound)
+    for (uint32_t i = 1; i < boundCbvCount; ++i) {
+      if (bindingsBuffers_.addresses[i] == 0) {
+        IGL_LOG_ERROR("D3D12ResourcesBinder: Sparse compute CBV binding detected at slot %u "
+                      "(expected dense binding through slot %u)\n", i, boundCbvCount - 1);
+        if (outResult) {
+          *outResult = Result{Result::Code::InvalidOperation, "Compute CBV bindings must be dense"};
+        }
+        return false;
+      }
+    }
+
+    // Allocate a contiguous range of descriptors for all CBVs on a single page
+    // This ensures we can bind them as a single descriptor table
+    uint32_t baseDescriptorIndex = 0;
+    Result allocResult = commandBuffer_.allocateCbvSrvUavRange(boundCbvCount, &baseDescriptorIndex);
+    if (!allocResult.isOk()) {
+      IGL_LOG_ERROR("D3D12ResourcesBinder: Failed to allocate contiguous CBV range (%u descriptors): %s\n",
+                    boundCbvCount,
+                    allocResult.message.c_str());
+      if (outResult) {
+        *outResult = allocResult;
+      }
+      return false;
+    }
+
+    // Create CBV descriptors for all bound buffers
+    uint32_t descriptorOffset = 0;
+    for (uint32_t i = 0; i < bindingsBuffers_.count; ++i) {
+      if (bindingsBuffers_.addresses[i] == 0) {
+        continue; // Skip unbound slots
+      }
+
+      // Validate address alignment (D3D12 requires 256-byte alignment)
+      if (bindingsBuffers_.addresses[i] % kConstantBufferAlignment != 0) {
+        IGL_LOG_ERROR("D3D12ResourcesBinder: Constant buffer %u address 0x%llx is not 256-byte aligned\n",
+                      i, bindingsBuffers_.addresses[i]);
+        if (outResult) {
+          *outResult = Result{Result::Code::ArgumentInvalid,
+                              "Constant buffer address must be 256-byte aligned"};
+        }
+        return false;
+      }
+
+      // Validate size
+      size_t size = bindingsBuffers_.sizes[i];
+      if (size > kMaxCBVSize) {
+        IGL_LOG_ERROR("D3D12ResourcesBinder: Constant buffer %u size (%zu bytes) exceeds 64 KB limit\n",
+                      i, size);
+        if (outResult) {
+          *outResult = Result{Result::Code::ArgumentOutOfRange,
+                              "Constant buffer size exceeds 64 KB D3D12 limit"};
+        }
+        return false;
+      }
+
+      // Align size to 256-byte boundary
+      const size_t alignedSize = (size + kConstantBufferAlignment - 1) & ~(kConstantBufferAlignment - 1);
+
+      // Use contiguous descriptor index (baseDescriptorIndex + descriptorOffset)
+      const uint32_t descriptorIndex = baseDescriptorIndex + descriptorOffset;
+      D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex);
+
+      D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {};
+      cbvDesc.BufferLocation = bindingsBuffers_.addresses[i];
+      cbvDesc.SizeInBytes = static_cast<UINT>(alignedSize);
+
+      device->CreateConstantBufferView(&cbvDesc, cpuHandle);
+      descriptorOffset++;
+    }
+
+    // Sanity check: descriptorOffset should match boundCbvCount after dense packing
+    IGL_DEBUG_ASSERT(descriptorOffset == boundCbvCount,
+                     "CBV descriptor packing mismatch: allocated %u but created %u",
+                     boundCbvCount, descriptorOffset);
+
+    // Bind the CBV descriptor table to root parameter 3
+    D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(baseDescriptorIndex);
+    cmdList->SetComputeRootDescriptorTable(kComputeRootParam_CBVTable, gpuHandle);
+  } else {
+    // Graphics pipeline: Reflection-based CBV descriptor table binding
+    auto& context = commandBuffer_.getContext();
+
+    // Count bound CBVs
+    uint32_t boundCbvCount = 0;
+    for (uint32_t i = 0; i < bindingsBuffers_.count; ++i) {
+      if (bindingsBuffers_.addresses[i] != 0) {
+        boundCbvCount++;
+      }
+    }
+
+    if (boundCbvCount == 0) {
+      return true; // No CBVs to bind
+    }
+
+    // Determine how many descriptors to allocate based on pipeline's root signature
+    // Use pipeline's declared CBV range (0 to maxCBVSlot inclusive) to match root signature
+    uint32_t descriptorRangeSize = bindingsBuffers_.count;
+
+    if (renderPipeline) {
+      const UINT pipelineCBVCount = renderPipeline->getCBVDescriptorCount();
+      if (pipelineCBVCount > 0) {
+        descriptorRangeSize = pipelineCBVCount;
+      }
+    }
+
+    // Allocate a contiguous range of descriptors from 0 to descriptorRangeSize-1
+    uint32_t baseDescriptorIndex = 0;
+    Result allocResult = commandBuffer_.allocateCbvSrvUavRange(descriptorRangeSize, &baseDescriptorIndex);
+    if (!allocResult.isOk()) {
+      IGL_LOG_ERROR("D3D12ResourcesBinder: Failed to allocate CBV range (%u descriptors): %s\n",
+                    descriptorRangeSize,
+                    allocResult.message.c_str());
+      if (outResult) {
+        *outResult = allocResult;
+      }
+      return false;
+    }
+
+    IGL_D3D12_LOG_VERBOSE("updateBufferBindings: Graphics CBV binding - range b0-b%u, %u descriptors\n",
+                 descriptorRangeSize - 1, descriptorRangeSize);
+
+    // Create CBV descriptors for all slots from 0 to descriptorRangeSize-1
+    // For unbound slots, create null descriptors to match the root signature range
+    for (uint32_t slotIndex = 0; slotIndex < descriptorRangeSize; ++slotIndex) {
+      const uint32_t descriptorIndex = baseDescriptorIndex + slotIndex;
+      D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex);
+
+      // Check if this slot is bound (may be null if beyond bindingsBuffers_.count)
+      const bool isSlotBound = (slotIndex < bindingsBuffers_.count) &&
+                               (bindingsBuffers_.addresses[slotIndex] != 0);
+
+      if (isSlotBound) {
+        // Bound slot: Create valid CBV descriptor
+        // Validate address alignment (D3D12 requires 256-byte alignment)
+        if (bindingsBuffers_.addresses[slotIndex] % kConstantBufferAlignment != 0) {
+          IGL_LOG_ERROR("D3D12ResourcesBinder: Constant buffer %u address 0x%llx is not 256-byte aligned\n",
+                        slotIndex, bindingsBuffers_.addresses[slotIndex]);
+          if (outResult) {
+            *outResult = Result{Result::Code::ArgumentInvalid,
+                                "Constant buffer address must be 256-byte aligned"};
+          }
+          return false;
+        }
+
+        // Validate size
+        size_t size = bindingsBuffers_.sizes[slotIndex];
+        if (size > kMaxCBVSize) {
+          IGL_LOG_ERROR("D3D12ResourcesBinder: Constant buffer %u size (%zu bytes) exceeds 64 KB limit\n",
+                        slotIndex, size);
+          if (outResult) {
+            *outResult = Result{Result::Code::ArgumentOutOfRange,
+                                "Constant buffer size exceeds 64 KB D3D12 limit"};
+          }
+          return false;
+        }
+
+        // Align size to 256-byte boundary
+        const size_t alignedSize = (size + kConstantBufferAlignment - 1) & ~(kConstantBufferAlignment - 1);
+
+        D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {};
+        cbvDesc.BufferLocation = bindingsBuffers_.addresses[slotIndex];
+        cbvDesc.SizeInBytes = static_cast<UINT>(alignedSize);
+
+        device->CreateConstantBufferView(&cbvDesc, cpuHandle);
+        IGL_D3D12_LOG_VERBOSE("D3D12ResourcesBinder: Created CBV descriptor for b%u (address=0x%llx, size=%u)\n",
+                     slotIndex, cbvDesc.BufferLocation, cbvDesc.SizeInBytes);
+      } else {
+        // Unbound slot: Create NULL descriptor to fill the root signature descriptor range
+        D3D12_CONSTANT_BUFFER_VIEW_DESC nullCbvDesc = {};
+        nullCbvDesc.BufferLocation = 0;  // NULL CBV
+        nullCbvDesc.SizeInBytes = D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16;  // Minimum valid size
+
+        device->CreateConstantBufferView(&nullCbvDesc, cpuHandle);
+        IGL_D3D12_LOG_VERBOSE("D3D12ResourcesBinder: Created NULL CBV descriptor for b%u\n", slotIndex);
+      }
+    }
+
+    // Query pipeline for reflection-based CBV table root parameter index
+    if (!renderPipeline) {
+      IGL_LOG_ERROR("updateBufferBindings: renderPipeline is NULL, cannot bind CBV table\n");
+      if (outResult) {
+        *outResult = Result{Result::Code::ArgumentInvalid, "renderPipeline is required for graphics CBV binding"};
+      }
+      return false;
+    }
+
+    const UINT cbvTableIndex = renderPipeline->getCBVTableRootParameterIndex();
+
+    if (cbvTableIndex != UINT_MAX) {
+      // Bind the CBV descriptor table to the reflection-based root parameter
+      D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(baseDescriptorIndex);
+      cmdList->SetGraphicsRootDescriptorTable(cbvTableIndex, gpuHandle);
+    }
+  }
+
+  return true;
+}
+
+bool D3D12ResourcesBinder::updateUAVBindings(ID3D12GraphicsCommandList* cmdList,
+                                             ID3D12Device* device,
+                                             Result* outResult) {
+  if (bindingsUAVs_.count == 0) {
+    return true; // Nothing to bind
+  }
+
+  // Validate dense bindings
+  if (bindingsUAVs_.buffers[0] == nullptr) {
+    IGL_LOG_ERROR("D3D12ResourcesBinder: UAV bindings are sparse (slot 0 not bound). "
+                  "D3D12 requires dense bindings starting at index 0.\n");
+    if (outResult) {
+      *outResult = Result{Result::Code::InvalidOperation,
+                          "UAV bindings must be dense starting at slot 0"};
+    }
+    return false;
+  }
+
+  auto& context = commandBuffer_.getContext();
+
+  // Verify all UAVs are bound (dense binding requirement)
+  for (uint32_t i = 0; i < bindingsUAVs_.count; ++i) {
+    if (bindingsUAVs_.buffers[i] == nullptr) {
+      IGL_LOG_ERROR("D3D12ResourcesBinder: Sparse UAV binding detected at slot %u\n", i);
+      if (outResult) {
+        *outResult = Result{Result::Code::InvalidOperation, "UAV bindings must be dense"};
+      }
+      return false;
+    }
+  }
+
+  // Allocate a contiguous range of descriptors for all UAVs on a single page
+  // This ensures we can bind them as a single descriptor table
+  uint32_t baseDescriptorIndex = 0;
+  Result allocResult = commandBuffer_.allocateCbvSrvUavRange(bindingsUAVs_.count, &baseDescriptorIndex);
+  if (!allocResult.isOk()) {
+    IGL_LOG_ERROR("D3D12ResourcesBinder: Failed to allocate contiguous UAV range (%u descriptors): %s\n",
+                  bindingsUAVs_.count,
+                  allocResult.message.c_str());
+    if (outResult) {
+      *outResult = allocResult;
+    }
+    return false;
+  }
+
+  // Create UAV descriptors for all bound storage buffers
+  for (uint32_t i = 0; i < bindingsUAVs_.count; ++i) {
+    auto* buffer = bindingsUAVs_.buffers[i];
+    auto* d3dBuffer = static_cast<Buffer*>(buffer);
+    ID3D12Resource* resource = d3dBuffer->getResource();
+
+    const size_t offset = bindingsUAVs_.offsets[i];
+    const size_t elementStride = bindingsUAVs_.elementStrides[i];
+    const size_t bufferSize = d3dBuffer->getSizeInBytes();
+
+    // FATAL VALIDATION: UAV offset must be aligned to element stride
+    // This check immediately fails the entire updateBindings() call and returns InvalidOperation.
+    // Misaligned offsets would create invalid D3D12 UAV descriptors and cause device removal.
+    if (offset % elementStride != 0) {
+      IGL_LOG_ERROR(
+          "D3D12ResourcesBinder: UAV offset %zu is not aligned to element stride %zu. "
+          "This is a FATAL error - updateBindings() will fail.\n",
+          offset,
+          elementStride);
+      if (outResult) {
+        *outResult = Result{Result::Code::ArgumentInvalid,
+                            "UAV offset must be aligned to element stride"};
+      }
+      return false;
+    }
+
+    // FATAL VALIDATION: UAV offset must be within buffer bounds
+    // This check immediately fails the entire updateBindings() call and returns ArgumentOutOfRange.
+    // Out-of-bounds offsets would access invalid memory and cause GPU faults.
+    if (offset > bufferSize) {
+      IGL_LOG_ERROR("D3D12ResourcesBinder: UAV offset %zu exceeds buffer size %zu. "
+                    "This is a FATAL error - updateBindings() will fail.\n",
+                    offset,
+                    bufferSize);
+      if (outResult) {
+        *outResult = Result{Result::Code::ArgumentOutOfRange, "UAV offset exceeds buffer size"};
+      }
+      return false;
+    }
+
+    const size_t remaining = bufferSize - offset;
+    // FATAL VALIDATION: At least one full element must fit in remaining buffer space
+    // This check immediately fails the entire updateBindings() call and returns ArgumentOutOfRange.
+    // Creating a UAV with zero elements or partial elements would be invalid.
+    if (remaining < elementStride) {
+      IGL_LOG_ERROR("D3D12ResourcesBinder: UAV remaining size %zu < element stride %zu. "
+                    "This is a FATAL error - updateBindings() will fail.\n",
+                    remaining,
+                    elementStride);
+      if (outResult) {
+        *outResult = Result{Result::Code::ArgumentOutOfRange,
+                            "UAV remaining size less than element stride"};
+      }
+      return false;
+    }
+
+    // Use contiguous descriptor index (baseDescriptorIndex + i)
+    const uint32_t descriptorIndex = baseDescriptorIndex + i;
+    D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex);
+    D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex);
+
+    // Create UAV descriptor for structured buffer
+    D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
+    uavDesc.Format = DXGI_FORMAT_UNKNOWN;  // Required for structured buffers
+    uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
+    uavDesc.Buffer.FirstElement = static_cast<UINT64>(offset / elementStride);
+    uavDesc.Buffer.NumElements = static_cast<UINT>(remaining / elementStride);
+    uavDesc.Buffer.StructureByteStride = static_cast<UINT>(elementStride);
+    uavDesc.Buffer.CounterOffsetInBytes = 0;
+    uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
+
+    device->CreateUnorderedAccessView(resource, nullptr, &uavDesc, cpuHandle);
+    D3D12Context::trackResourceCreation("UAV", 0);
+
+    // Cache the GPU handle
+    bindingsUAVs_.handles[i] = gpuHandle;
+  }
+
+  // Bind the UAV table to root parameter 1 (compute only)
+  cmdList->SetComputeRootDescriptorTable(kComputeRootParam_UAVTable, bindingsUAVs_.handles[0]);
+
+  return true;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12ResourcesBinder.h b/src/igl/d3d12/D3D12ResourcesBinder.h
new file mode 100644
index 0000000000..f549eff9da
--- /dev/null
+++ b/src/igl/d3d12/D3D12ResourcesBinder.h
@@ -0,0 +1,353 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/Buffer.h>
+#include <igl/Common.h>
+#include <igl/SamplerState.h>
+#include <igl/Texture.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12Headers.h>
+
+namespace igl::d3d12 {
+
+class CommandBuffer;
+class PipelineState;
+
+/**
+ * @brief Binding state for textures and their associated GPU descriptor handles
+ *
+ * Stores up to IGL_TEXTURE_SAMPLERS_MAX texture bindings (t0-t15 in HLSL).
+ * Each binding stores the texture pointer (for descriptor creation) and the
+ * resulting GPU descriptor handle (for root parameter binding).
+ */
+struct BindingsTextures {
+  ITexture* textures[IGL_TEXTURE_SAMPLERS_MAX] = {};
+  D3D12_GPU_DESCRIPTOR_HANDLE handles[IGL_TEXTURE_SAMPLERS_MAX] = {};
+  uint32_t count = 0;
+};
+
+/**
+ * @brief Binding state for samplers and their associated GPU descriptor handles
+ *
+ * Stores up to IGL_TEXTURE_SAMPLERS_MAX sampler bindings (s0-s15 in HLSL).
+ * Each binding stores the sampler state pointer (for descriptor creation) and the
+ * resulting GPU descriptor handle (for root parameter binding).
+ */
+struct BindingsSamplers {
+  ISamplerState* samplers[IGL_TEXTURE_SAMPLERS_MAX] = {};
+  D3D12_GPU_DESCRIPTOR_HANDLE handles[IGL_TEXTURE_SAMPLERS_MAX] = {};
+  uint32_t count = 0;
+};
+
+/**
+ * @brief Binding state for uniform buffers (constant buffers in D3D12)
+ *
+ * Stores up to IGL_BUFFER_BINDINGS_MAX buffer bindings (b0-b30 in HLSL).
+ * D3D12 has two binding methods:
+ * - Root CBV (direct GPU virtual address) - used for b0-b1 (legacy/frequent)
+ * - CBV descriptor table - used for b2+ (less frequent)
+ *
+ * This struct stores buffer pointers and GPU virtual addresses/sizes for all bindings.
+ * The actual binding method is determined by the pipeline root signature.
+ */
+struct BindingsBuffers {
+  IBuffer* buffers[IGL_BUFFER_BINDINGS_MAX] = {};
+  D3D12_GPU_VIRTUAL_ADDRESS addresses[IGL_BUFFER_BINDINGS_MAX] = {};
+  size_t offsets[IGL_BUFFER_BINDINGS_MAX] = {};
+  size_t sizes[IGL_BUFFER_BINDINGS_MAX] = {};
+  uint32_t count = 0;
+};
+
+/**
+ * @brief Binding state for unordered access views (UAVs)
+ *
+ * Stores up to IGL_BUFFER_BINDINGS_MAX UAV bindings (u0-u30 in HLSL).
+ * Used for storage buffers in compute shaders and writable resources.
+ * Each binding stores the buffer pointer, offset, element stride (for descriptor creation),
+ * and the resulting GPU descriptor handle (for root parameter binding).
+ */
+struct BindingsUAVs {
+  IBuffer* buffers[IGL_BUFFER_BINDINGS_MAX] = {};
+  size_t offsets[IGL_BUFFER_BINDINGS_MAX] = {};
+  size_t elementStrides[IGL_BUFFER_BINDINGS_MAX] = {};  // Byte stride per element for structured buffers
+  D3D12_GPU_DESCRIPTOR_HANDLE handles[IGL_BUFFER_BINDINGS_MAX] = {};
+  uint32_t count = 0;
+};
+
+/**
+ * @brief Centralized resource binding management for D3D12 command encoders
+ *
+ * D3D12ResourcesBinder is the single entry point for shader-visible descriptor binding
+ * (CBV/SRV/UAV/Sampler) used by command encoders. It consolidates descriptor allocation
+ * and resource binding logic that was previously fragmented across RenderCommandEncoder
+ * and ComputeCommandEncoder.
+ *
+ * Note: RTV/DSV descriptors are managed separately by DescriptorHeapManager and bound
+ * directly by encoders during render pass setup.
+ *
+ * ============================================================================
+ * ARCHITECTURE: D3D12 Descriptor Management Overview
+ * ============================================================================
+ *
+ * The D3D12 backend uses THREE distinct descriptor management strategies:
+ *
+ * 1. **Transient Descriptor Allocator** (Per-Frame Heaps)
+ *    - Location: D3D12Context::FrameContext, CommandBuffer allocation methods
+ *    - Purpose: Shader-visible descriptors (CBV/SRV/UAV/Samplers) for rendering
+ *    - Lifecycle: Allocated during command encoding, reset at frame boundary
+ *    - Strategy: Linear allocation with dynamic multi-page growth
+ *    - Used for: SRVs (textures), UAVs (storage buffers), CBVs, Samplers
+ *    - Access: ONLY through D3D12ResourcesBinder (internal detail)
+ *
+ * 2. **Persistent Descriptor Allocator** (DescriptorHeapManager)
+ *    - Location: DescriptorHeapManager class
+ *    - Purpose: CPU-visible descriptors (RTV/DSV) with explicit lifecycle
+ *    - Lifecycle: Allocated at resource creation, freed at resource destruction
+ *    - Strategy: Free-list allocation with double-free protection
+ *    - Used for: Render target views, depth-stencil views
+ *    - Access: Directly by Texture and Framebuffer classes
+ *
+ * 3. **Root Descriptor Optimization** (Inline Binding)
+ *    - Location: D3D12ResourcesBinder::updateBufferBindings()
+ *    - Purpose: Bypass descriptor heaps for frequently-updated constant buffers
+ *    - Lifecycle: No descriptor created - binds GPU virtual address directly
+ *    - Strategy: D3D12 root CBVs (graphics b0-b1 only)
+ *    - Used for: Hot-path constant buffers in graphics pipeline
+ *    - Access: ONLY through D3D12ResourcesBinder (internal optimization)
+ *
+ * **Design Rationale**:
+ * - Strategies 1 and 2 handle DIFFERENT descriptor types (shader-visible vs CPU-visible)
+ *   and lifecycles (transient vs persistent), so they cannot be merged
+ * - Strategy 3 is a D3D12-specific optimization, not a separate "system"
+ * - D3D12ResourcesBinder abstracts these details, providing a unified binding interface
+ *
+ * ============================================================================
+ * Key Responsibilities of D3D12ResourcesBinder
+ * ============================================================================
+ *
+ * - Cache resource bindings locally until updateBindings() is called
+ * - Allocate descriptors from per-frame shader-visible heaps on-demand (Strategy 1)
+ * - Create SRV/UAV/CBV/Sampler descriptors in GPU-visible heaps
+ * - Decide when to use root CBVs vs descriptor tables (Strategy 3)
+ * - Track dirty state to minimize descriptor creation and root parameter updates
+ * - Support both graphics and compute pipeline bind points
+ * - Transition texture resources to appropriate shader-resource states (buffers must
+ *   be created in the correct state and are not transitioned here)
+ *
+ * Design principles:
+ * - **Lazy update**: Bindings are cached locally and only applied to GPU on updateBindings()
+ * - **Dirty tracking**: Only update descriptor sets when resources change
+ * - **Pipeline awareness**: Different root signature layouts for graphics vs compute
+ * - **Per-frame isolation**: Uses per-frame descriptor heaps to prevent race conditions
+ * - **Implementation hiding**: External code should never directly access CommandBuffer
+ *   descriptor allocation methods - always go through ResourcesBinder
+ *
+ * Thread-safety: This class is NOT thread-safe. Each encoder should own its own binder.
+ *
+ * Dependencies:
+ * - T01: Correct descriptor binding patterns
+ * - T06: Shared helper utilities for descriptor creation
+ * - T16: Unified logging controls
+ * - T20: Consolidated descriptor management architecture
+ *
+ * Related to Vulkan ResourcesBinder pattern (src/igl/vulkan/ResourcesBinder.h)
+ */
+class D3D12ResourcesBinder final {
+ public:
+  /**
+   * @brief Initialize the resource binder for a command buffer
+   *
+   * @param commandBuffer Command buffer to bind resources to (provides context/device access)
+   * @param isCompute True for compute pipelines, false for graphics pipelines
+   */
+  D3D12ResourcesBinder(CommandBuffer& commandBuffer, bool isCompute);
+
+  /**
+   * @brief Bind a texture (shader resource view) to a specific slot
+   *
+   * Creates or updates an SRV descriptor in the per-frame CBV/SRV/UAV heap
+   * and caches the GPU handle. The binding is not applied to the command list
+   * until updateBindings() is called.
+   *
+   * @param index Texture slot (t0-t15 in HLSL, 0-based index)
+   * @param texture Texture to bind (nullptr to unbind)
+   */
+  void bindTexture(uint32_t index, ITexture* texture);
+
+  /**
+   * @brief Bind a sampler state to a specific slot
+   *
+   * Creates or updates a sampler descriptor in the per-frame sampler heap
+   * and caches the GPU handle. The binding is not applied to the command list
+   * until updateBindings() is called.
+   *
+   * @param index Sampler slot (s0-s15 in HLSL, 0-based index)
+   * @param samplerState Sampler state to bind (nullptr to unbind)
+   */
+  void bindSamplerState(uint32_t index, ISamplerState* samplerState);
+
+  /**
+   * @brief Bind a buffer (constant buffer or storage buffer) to a specific slot
+   *
+   * For uniform buffers (constant buffers):
+   * - Stores GPU virtual address for root CBV binding (b0-b1)
+   * - Or creates CBV descriptor for descriptor table binding (b2+)
+   * - **COMPUTE SHADERS**: CBV bindings MUST be dense starting from index 0 with no gaps.
+   *   For example, binding slots 0, 1, 2 is valid; binding 0, 2 (skipping 1) will fail.
+   *   This constraint is enforced because descriptor tables bind contiguously from b0.
+   *
+   * For storage buffers:
+   * - Creates UAV descriptor in the per-frame CBV/SRV/UAV heap
+   * - Requires elementStride for structured buffer descriptor creation
+   *
+   * The binding is not applied to the command list until updateBindings() is called.
+   *
+   * @param index Buffer slot (b0-b30 for CBVs, u0-u30 for UAVs in HLSL)
+   * @param buffer Buffer to bind (nullptr to unbind)
+   * @param offset Offset in bytes into the buffer
+   * @param size Size in bytes to bind
+   * @param isUAV True to bind as UAV (storage buffer), false for CBV (uniform buffer)
+   * @param elementStride For UAVs: byte stride per element for structured buffers (required)
+   */
+  void bindBuffer(uint32_t index,
+                  IBuffer* buffer,
+                  size_t offset,
+                  size_t size,
+                  bool isUAV = false,
+                  size_t elementStride = 0);
+
+  /**
+   * @brief Apply all pending bindings to the command list
+   *
+   * This method performs the actual GPU binding work:
+   * 1. Creates descriptors for any dirty bindings (textures/samplers/buffers/UAVs)
+   * 2. Sets root descriptor tables (SetGraphicsRootDescriptorTable/SetComputeRootDescriptorTable)
+   * 3. Sets root constants/root CBVs if applicable
+   * 4. Clears dirty flags
+   *
+   * This should be called before draw/dispatch commands to ensure all bindings are active.
+   *
+   * @param renderPipeline For graphics pipelines: current pipeline to query reflection-based root parameter indices.
+   *                       For compute pipelines: pass nullptr (uses hardcoded layout).
+   * @param outResult Optional result for error reporting (e.g., descriptor heap overflow).
+   *                  If nullptr, caller receives only success/fail boolean. If non-null,
+   *                  all failure paths populate both error code and diagnostic message.
+   * @return true if bindings applied successfully, false on error
+   */
+  [[nodiscard]] bool updateBindings(const class RenderPipelineState* renderPipeline = nullptr,
+                                     Result* outResult = nullptr);
+
+  /**
+   * @brief Reset all bindings and dirty flags
+   *
+   * Called at the start of a new frame or when switching pipelines to ensure
+   * clean binding state. Does not affect the underlying descriptor heaps.
+   */
+  void reset();
+
+ private:
+  /**
+   * @brief Bitwise flags for dirty resource types
+   *
+   * Used to track which resource types have been modified since the last
+   * updateBindings() call, allowing us to skip descriptor creation and
+   * root parameter updates for unchanged resources.
+   */
+  enum DirtyFlagBits : uint8_t {
+    DirtyFlagBits_Textures = 1 << 0,
+    DirtyFlagBits_Samplers = 1 << 1,
+    DirtyFlagBits_Buffers = 1 << 2,
+    DirtyFlagBits_UAVs = 1 << 3,
+  };
+
+  /**
+   * @brief Update texture bindings (SRV descriptor table)
+   *
+   * Creates SRV descriptors for all bound textures in the per-frame heap
+   * and sets the root descriptor table parameter.
+   *
+   * @param cmdList Command list to update
+   * @param device D3D12 device for descriptor creation
+   * @param renderPipeline Pipeline to query reflection-based root parameter indices (graphics only)
+   * @param outResult Optional result for error reporting
+   * @return true on success, false on error
+   */
+  [[nodiscard]] bool updateTextureBindings(ID3D12GraphicsCommandList* cmdList,
+                                           ID3D12Device* device,
+                                           const class RenderPipelineState* renderPipeline,
+                                           Result* outResult);
+
+  /**
+   * @brief Update sampler bindings (sampler descriptor table)
+   *
+   * Creates sampler descriptors for all bound samplers in the per-frame heap
+   * and sets the root descriptor table parameter.
+   *
+   * @param cmdList Command list to update
+   * @param device D3D12 device for descriptor creation
+   * @param renderPipeline Pipeline to query reflection-based root parameter indices (graphics only)
+   * @param outResult Optional result for error reporting
+   * @return true on success, false on error
+   */
+  [[nodiscard]] bool updateSamplerBindings(ID3D12GraphicsCommandList* cmdList,
+                                           ID3D12Device* device,
+                                           const class RenderPipelineState* renderPipeline,
+                                           Result* outResult);
+
+  /**
+   * @brief Update buffer bindings (CBV descriptor table)
+   *
+   * For graphics pipelines:
+   * - Creates CBV descriptor table for all bound CBVs
+   * - Queries pipeline for reflection-based root parameter index
+   *
+   * For compute pipelines:
+   * - Creates CBV descriptor table for all bindings (hardcoded root parameter)
+   *
+   * @param cmdList Command list to update
+   * @param device D3D12 device for descriptor creation
+   * @param renderPipeline Pipeline to query reflection-based root parameter indices (graphics only)
+   * @param outResult Optional result for error reporting
+   * @return true on success, false on error
+   */
+  [[nodiscard]] bool updateBufferBindings(ID3D12GraphicsCommandList* cmdList,
+                                          ID3D12Device* device,
+                                          const class RenderPipelineState* renderPipeline,
+                                          Result* outResult);
+
+  /**
+   * @brief Update UAV bindings (UAV descriptor table for compute shaders)
+   *
+   * Creates UAV descriptors for all bound storage buffers in the per-frame heap
+   * and sets the root descriptor table parameter. Only used for compute pipelines.
+   *
+   * @param cmdList Command list to update
+   * @param device D3D12 device for descriptor creation
+   * @param outResult Optional result for error reporting
+   * @return true on success, false on error
+   */
+  [[nodiscard]] bool updateUAVBindings(ID3D12GraphicsCommandList* cmdList,
+                                       ID3D12Device* device,
+                                       Result* outResult);
+
+  CommandBuffer& commandBuffer_;
+  bool isCompute_ = false;
+
+  // Cached binding state
+  BindingsTextures bindingsTextures_;
+  BindingsSamplers bindingsSamplers_;
+  BindingsBuffers bindingsBuffers_;
+  BindingsUAVs bindingsUAVs_;
+
+  // Dirty tracking flags
+  uint32_t dirtyFlags_ = DirtyFlagBits_Textures | DirtyFlagBits_Samplers |
+                         DirtyFlagBits_Buffers | DirtyFlagBits_UAVs;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12RootSignatureKey.h b/src/igl/d3d12/D3D12RootSignatureKey.h
new file mode 100644
index 0000000000..9dcf918cc2
--- /dev/null
+++ b/src/igl/d3d12/D3D12RootSignatureKey.h
@@ -0,0 +1,325 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/ShaderModule.h>
+#include <vector>
+#include <algorithm>
+
+namespace igl::d3d12 {
+
+/**
+ * @brief Key structure for root signature cache lookup based on shader resource usage
+ *
+ * This structure captures the essential shader resource requirements needed to construct
+ * a compatible root signature. It enables Vulkan-style dynamic root signature selection
+ * where the root signature is chosen based on actual shader resource usage rather than
+ * being globally fixed.
+ *
+ * The key includes:
+ * - Push constant configuration (slot and size)
+ * - Resource slot usage (CBV/SRV/UAV/Sampler ranges)
+ * - Flags for shader visibility and optimization
+ *
+ * Root signatures with the same key are compatible and can be reused across pipelines.
+ */
+struct D3D12RootSignatureKey {
+  // Push constants configuration
+  bool hasPushConstants = false;
+  UINT pushConstantSlot = UINT_MAX;  // Which b# register
+  UINT pushConstantSize = 0;         // Size in 32-bit values
+
+  // Resource slot ranges (sorted for consistent hashing)
+  std::vector<UINT> usedCBVSlots;
+  std::vector<UINT> usedSRVSlots;
+  std::vector<UINT> usedUAVSlots;
+  std::vector<UINT> usedSamplerSlots;
+
+  // Minimum / maximum slot indices (for determining descriptor table windows)
+  UINT minCBVSlot = 0;
+  UINT maxCBVSlot = 0;
+  UINT minSRVSlot = 0;
+  UINT maxSRVSlot = 0;
+  UINT minUAVSlot = 0;
+  UINT maxUAVSlot = 0;
+  UINT minSamplerSlot = 0;
+  UINT maxSamplerSlot = 0;
+
+  // Root signature flags
+  D3D12_ROOT_SIGNATURE_FLAGS flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
+
+  /**
+   * @brief Construct key from vertex + fragment shader reflection
+   *
+   * Merges resource usage from both shaders to create a unified key.
+   * Handles push constant slot conflicts (prefers vertex shader if both use different slots).
+   */
+  static D3D12RootSignatureKey fromShaderReflection(
+      const ShaderModule::ShaderReflectionInfo* vsReflection,
+      const ShaderModule::ShaderReflectionInfo* psReflection);
+
+  /**
+   * @brief Construct key from compute shader reflection
+   */
+  static D3D12RootSignatureKey fromShaderReflection(
+      const ShaderModule::ShaderReflectionInfo* csReflection);
+
+  bool operator==(const D3D12RootSignatureKey& other) const {
+    return hasPushConstants == other.hasPushConstants &&
+           pushConstantSlot == other.pushConstantSlot &&
+           pushConstantSize == other.pushConstantSize &&
+           usedCBVSlots == other.usedCBVSlots &&
+           usedSRVSlots == other.usedSRVSlots &&
+           usedUAVSlots == other.usedUAVSlots &&
+           usedSamplerSlots == other.usedSamplerSlots &&
+           minCBVSlot == other.minCBVSlot &&
+           maxCBVSlot == other.maxCBVSlot &&
+           minSRVSlot == other.minSRVSlot &&
+           maxSRVSlot == other.maxSRVSlot &&
+           minUAVSlot == other.minUAVSlot &&
+           maxUAVSlot == other.maxUAVSlot &&
+           minSamplerSlot == other.minSamplerSlot &&
+           maxSamplerSlot == other.maxSamplerSlot &&
+           flags == other.flags;
+  }
+
+  struct HashFunction {
+    size_t operator()(const D3D12RootSignatureKey& key) const {
+      size_t hash = 0;
+
+      // Hash push constants
+      hashCombine(hash, key.hasPushConstants ? 1 : 0);
+      hashCombine(hash, static_cast<size_t>(key.pushConstantSlot));
+      hashCombine(hash, static_cast<size_t>(key.pushConstantSize));
+
+      // Hash resource slots
+      for (UINT slot : key.usedCBVSlots) {
+        hashCombine(hash, static_cast<size_t>(slot));
+      }
+      for (UINT slot : key.usedSRVSlots) {
+        hashCombine(hash, static_cast<size_t>(slot));
+      }
+      for (UINT slot : key.usedUAVSlots) {
+        hashCombine(hash, static_cast<size_t>(slot));
+      }
+      for (UINT slot : key.usedSamplerSlots) {
+        hashCombine(hash, static_cast<size_t>(slot));
+      }
+
+      // Hash min/max slots
+      hashCombine(hash, static_cast<size_t>(key.minCBVSlot));
+      hashCombine(hash, static_cast<size_t>(key.maxCBVSlot));
+      hashCombine(hash, static_cast<size_t>(key.minSRVSlot));
+      hashCombine(hash, static_cast<size_t>(key.maxSRVSlot));
+      hashCombine(hash, static_cast<size_t>(key.minUAVSlot));
+      hashCombine(hash, static_cast<size_t>(key.maxUAVSlot));
+      hashCombine(hash, static_cast<size_t>(key.minSamplerSlot));
+      hashCombine(hash, static_cast<size_t>(key.maxSamplerSlot));
+
+      // Hash flags
+      hashCombine(hash, static_cast<size_t>(key.flags));
+
+      return hash;
+    }
+  };
+
+private:
+  // Helper to merge two slot vectors and sort
+  static std::vector<UINT> mergeAndSort(const std::vector<UINT>& a, const std::vector<UINT>& b) {
+    std::vector<UINT> result = a;
+    result.insert(result.end(), b.begin(), b.end());
+    std::sort(result.begin(), result.end());
+    // Remove duplicates
+    result.erase(std::unique(result.begin(), result.end()), result.end());
+    return result;
+  }
+};
+
+// Implementation of fromShaderReflection for graphics pipeline
+inline D3D12RootSignatureKey D3D12RootSignatureKey::fromShaderReflection(
+    const ShaderModule::ShaderReflectionInfo* vsReflection,
+    const ShaderModule::ShaderReflectionInfo* psReflection) {
+  D3D12RootSignatureKey key;
+
+  // Merge push constants (prefer vertex shader if conflict)
+  if (vsReflection && vsReflection->hasPushConstants) {
+    key.hasPushConstants = true;
+    key.pushConstantSlot = vsReflection->pushConstantSlot;
+    key.pushConstantSize = vsReflection->pushConstantSize;
+  } else if (psReflection && psReflection->hasPushConstants) {
+    key.hasPushConstants = true;
+    key.pushConstantSlot = psReflection->pushConstantSlot;
+    key.pushConstantSize = psReflection->pushConstantSize;
+  }
+
+  // Merge resource slots
+  // IMPORTANT: Exclude push constant slot from CBV descriptor table
+  // Push constants use inline root constants (D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS),
+  // not a CBV descriptor. Including the push constant slot in usedCBVSlots would cause
+  // a root signature overlap error.
+  if (vsReflection && psReflection) {
+    key.usedCBVSlots = mergeAndSort(vsReflection->usedCBVSlots, psReflection->usedCBVSlots);
+    key.usedSRVSlots = mergeAndSort(vsReflection->usedSRVSlots, psReflection->usedSRVSlots);
+    key.usedUAVSlots = mergeAndSort(vsReflection->usedUAVSlots, psReflection->usedUAVSlots);
+    key.usedSamplerSlots = mergeAndSort(vsReflection->usedSamplerSlots, psReflection->usedSamplerSlots);
+
+    key.maxCBVSlot = std::max(vsReflection->maxCBVSlot, psReflection->maxCBVSlot);
+    key.maxSRVSlot = std::max(vsReflection->maxSRVSlot, psReflection->maxSRVSlot);
+    key.maxUAVSlot = std::max(vsReflection->maxUAVSlot, psReflection->maxUAVSlot);
+    key.maxSamplerSlot = std::max(vsReflection->maxSamplerSlot, psReflection->maxSamplerSlot);
+  } else if (vsReflection) {
+    key.usedCBVSlots = vsReflection->usedCBVSlots;
+    key.usedSRVSlots = vsReflection->usedSRVSlots;
+    key.usedUAVSlots = vsReflection->usedUAVSlots;
+    key.usedSamplerSlots = vsReflection->usedSamplerSlots;
+
+    std::sort(key.usedCBVSlots.begin(), key.usedCBVSlots.end());
+    key.usedCBVSlots.erase(std::unique(key.usedCBVSlots.begin(), key.usedCBVSlots.end()),
+                           key.usedCBVSlots.end());
+
+    std::sort(key.usedSRVSlots.begin(), key.usedSRVSlots.end());
+    key.usedSRVSlots.erase(std::unique(key.usedSRVSlots.begin(), key.usedSRVSlots.end()),
+                           key.usedSRVSlots.end());
+
+    std::sort(key.usedUAVSlots.begin(), key.usedUAVSlots.end());
+    key.usedUAVSlots.erase(std::unique(key.usedUAVSlots.begin(), key.usedUAVSlots.end()),
+                           key.usedUAVSlots.end());
+
+    std::sort(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end());
+    key.usedSamplerSlots.erase(std::unique(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end()),
+                               key.usedSamplerSlots.end());
+
+    key.maxCBVSlot = vsReflection->maxCBVSlot;
+    key.maxSRVSlot = vsReflection->maxSRVSlot;
+    key.maxUAVSlot = vsReflection->maxUAVSlot;
+    key.maxSamplerSlot = vsReflection->maxSamplerSlot;
+  } else if (psReflection) {
+    key.usedCBVSlots = psReflection->usedCBVSlots;
+    key.usedSRVSlots = psReflection->usedSRVSlots;
+    key.usedUAVSlots = psReflection->usedUAVSlots;
+    key.usedSamplerSlots = psReflection->usedSamplerSlots;
+
+    std::sort(key.usedCBVSlots.begin(), key.usedCBVSlots.end());
+    key.usedCBVSlots.erase(std::unique(key.usedCBVSlots.begin(), key.usedCBVSlots.end()),
+                           key.usedCBVSlots.end());
+
+    std::sort(key.usedSRVSlots.begin(), key.usedSRVSlots.end());
+    key.usedSRVSlots.erase(std::unique(key.usedSRVSlots.begin(), key.usedSRVSlots.end()),
+                           key.usedSRVSlots.end());
+
+    std::sort(key.usedUAVSlots.begin(), key.usedUAVSlots.end());
+    key.usedUAVSlots.erase(std::unique(key.usedUAVSlots.begin(), key.usedUAVSlots.end()),
+                           key.usedUAVSlots.end());
+
+    std::sort(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end());
+    key.usedSamplerSlots.erase(std::unique(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end()),
+                               key.usedSamplerSlots.end());
+
+    key.maxCBVSlot = psReflection->maxCBVSlot;
+    key.maxSRVSlot = psReflection->maxSRVSlot;
+    key.maxUAVSlot = psReflection->maxUAVSlot;
+    key.maxSamplerSlot = psReflection->maxSamplerSlot;
+  }
+
+  // Compute min slots (if any resources are present)
+  if (!key.usedCBVSlots.empty()) {
+    key.minCBVSlot = key.usedCBVSlots.front();
+  }
+  if (!key.usedSRVSlots.empty()) {
+    key.minSRVSlot = key.usedSRVSlots.front();
+  }
+  if (!key.usedUAVSlots.empty()) {
+    key.minUAVSlot = key.usedUAVSlots.front();
+  }
+  if (!key.usedSamplerSlots.empty()) {
+    key.minSamplerSlot = key.usedSamplerSlots.front();
+  }
+
+  // Remove push constant slot from CBV slots (if present)
+  // Push constants are bound via root constants, not CBV descriptor table
+  if (key.hasPushConstants) {
+    key.usedCBVSlots.erase(
+        std::remove(key.usedCBVSlots.begin(), key.usedCBVSlots.end(), key.pushConstantSlot),
+        key.usedCBVSlots.end());
+  }
+
+  return key;
+}
+
+// Implementation of fromShaderReflection for compute pipeline
+inline D3D12RootSignatureKey D3D12RootSignatureKey::fromShaderReflection(
+    const ShaderModule::ShaderReflectionInfo* csReflection) {
+  D3D12RootSignatureKey key;
+
+  if (!csReflection) {
+    return key;
+  }
+
+  // Copy push constants
+  key.hasPushConstants = csReflection->hasPushConstants;
+  key.pushConstantSlot = csReflection->pushConstantSlot;
+  key.pushConstantSize = csReflection->pushConstantSize;
+
+  // Copy resource slots
+  key.usedCBVSlots = csReflection->usedCBVSlots;
+  key.usedSRVSlots = csReflection->usedSRVSlots;
+  key.usedUAVSlots = csReflection->usedUAVSlots;
+  key.usedSamplerSlots = csReflection->usedSamplerSlots;
+
+   // Ensure resource slot lists are sorted and unique for stable hashing / min/max tracking
+  std::sort(key.usedCBVSlots.begin(), key.usedCBVSlots.end());
+  key.usedCBVSlots.erase(std::unique(key.usedCBVSlots.begin(), key.usedCBVSlots.end()),
+                         key.usedCBVSlots.end());
+
+  std::sort(key.usedSRVSlots.begin(), key.usedSRVSlots.end());
+  key.usedSRVSlots.erase(std::unique(key.usedSRVSlots.begin(), key.usedSRVSlots.end()),
+                         key.usedSRVSlots.end());
+
+  std::sort(key.usedUAVSlots.begin(), key.usedUAVSlots.end());
+  key.usedUAVSlots.erase(std::unique(key.usedUAVSlots.begin(), key.usedUAVSlots.end()),
+                         key.usedUAVSlots.end());
+
+  std::sort(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end());
+  key.usedSamplerSlots.erase(std::unique(key.usedSamplerSlots.begin(), key.usedSamplerSlots.end()),
+                             key.usedSamplerSlots.end());
+
+  key.maxCBVSlot = csReflection->maxCBVSlot;
+  key.maxSRVSlot = csReflection->maxSRVSlot;
+  key.maxUAVSlot = csReflection->maxUAVSlot;
+  key.maxSamplerSlot = csReflection->maxSamplerSlot;
+
+  // Remove push constant slot from CBV slots (if present)
+  // Push constants are bound via root constants, not CBV descriptor table
+  if (key.hasPushConstants) {
+    key.usedCBVSlots.erase(
+        std::remove(key.usedCBVSlots.begin(), key.usedCBVSlots.end(), key.pushConstantSlot),
+        key.usedCBVSlots.end());
+  }
+
+  // Compute min slots (if any resources are present)
+  if (!key.usedCBVSlots.empty()) {
+    key.minCBVSlot = key.usedCBVSlots.front();
+  }
+  if (!key.usedSRVSlots.empty()) {
+    key.minSRVSlot = key.usedSRVSlots.front();
+  }
+  if (!key.usedUAVSlots.empty()) {
+    key.minUAVSlot = key.usedUAVSlots.front();
+  }
+  if (!key.usedSamplerSlots.empty()) {
+    key.minSamplerSlot = key.usedSamplerSlots.front();
+  }
+
+  // Compute shaders don't need input assembler
+  key.flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
+
+  return key;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12SamplerCache.h b/src/igl/d3d12/D3D12SamplerCache.h
new file mode 100644
index 0000000000..faa0b610fa
--- /dev/null
+++ b/src/igl/d3d12/D3D12SamplerCache.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <memory>
+#include <mutex>
+#include <unordered_map>
+
+#include <igl/Common.h>
+#include <igl/SamplerState.h>
+#include <igl/d3d12/SamplerState.h>
+
+namespace igl::d3d12 {
+
+struct SamplerCacheStats {
+  size_t cacheHits = 0;
+  size_t cacheMisses = 0;
+  size_t activeSamplers = 0;
+  float hitRate = 0.0f;
+};
+
+class D3D12SamplerCache {
+ public:
+  D3D12SamplerCache() = default;
+
+  [[nodiscard]] std::shared_ptr<ISamplerState> createSamplerState(
+      const SamplerStateDesc& desc,
+      Result* IGL_NULLABLE outResult) const {
+    const size_t samplerHash = std::hash<SamplerStateDesc>{}(desc);
+
+    {
+      std::lock_guard<std::mutex> lock(samplerCacheMutex_);
+
+      auto it = samplerCache_.find(samplerHash);
+      if (it != samplerCache_.end()) {
+        std::shared_ptr<SamplerState> existingSampler = it->second.lock();
+
+        if (existingSampler) {
+          samplerCacheHits_++;
+          const size_t totalRequests =
+              samplerCacheHits_ + samplerCacheMisses_;
+          IGL_D3D12_LOG_VERBOSE(
+              "D3D12SamplerCache::createSamplerState: Cache HIT "
+              "(hash=0x%zx, hits=%zu, misses=%zu, hit rate=%.1f%%)\n",
+              samplerHash,
+              samplerCacheHits_,
+              samplerCacheMisses_,
+              totalRequests > 0
+                  ? 100.0 * samplerCacheHits_ /
+                        static_cast<double>(totalRequests)
+                  : 0.0);
+          Result::setOk(outResult);
+          // Upcast shared_ptr<SamplerState> -> shared_ptr<ISamplerState>
+          return existingSampler;
+        } else {
+          samplerCache_.erase(it);
+        }
+      }
+    }
+
+    D3D12_SAMPLER_DESC samplerDesc = {};
+
+    auto toD3D12Address = [](SamplerAddressMode m) {
+      switch (m) {
+      case SamplerAddressMode::Repeat:
+        return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
+      case SamplerAddressMode::MirrorRepeat:
+        return D3D12_TEXTURE_ADDRESS_MODE_MIRROR;
+      case SamplerAddressMode::Clamp:
+        return D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
+      default:
+        return D3D12_TEXTURE_ADDRESS_MODE_WRAP;
+      }
+    };
+
+    auto toD3D12Compare = [](CompareFunction f) {
+      switch (f) {
+      case CompareFunction::Less:
+        return D3D12_COMPARISON_FUNC_LESS;
+      case CompareFunction::LessEqual:
+        return D3D12_COMPARISON_FUNC_LESS_EQUAL;
+      case CompareFunction::Greater:
+        return D3D12_COMPARISON_FUNC_GREATER;
+      case CompareFunction::GreaterEqual:
+        return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
+      case CompareFunction::Equal:
+        return D3D12_COMPARISON_FUNC_EQUAL;
+      case CompareFunction::NotEqual:
+        return D3D12_COMPARISON_FUNC_NOT_EQUAL;
+      case CompareFunction::AlwaysPass:
+        return D3D12_COMPARISON_FUNC_ALWAYS;
+      case CompareFunction::Never:
+        return D3D12_COMPARISON_FUNC_NEVER;
+      default:
+        return D3D12_COMPARISON_FUNC_NEVER;
+      }
+    };
+
+    const bool useComparison = desc.depthCompareEnabled;
+
+    const bool minLinear = (desc.minFilter != SamplerMinMagFilter::Nearest);
+    const bool magLinear = (desc.magFilter != SamplerMinMagFilter::Nearest);
+    const bool mipLinear = (desc.mipFilter == SamplerMipFilter::Linear);
+    const bool anisotropic = (desc.maxAnisotropic > 1);
+
+    if (anisotropic) {
+      samplerDesc.Filter = useComparison
+          ? D3D12_FILTER_COMPARISON_ANISOTROPIC
+          : D3D12_FILTER_ANISOTROPIC;
+      samplerDesc.MaxAnisotropy =
+          std::min<uint32_t>(desc.maxAnisotropic, 16);
+    } else {
+      D3D12_FILTER filter = D3D12_FILTER_MIN_MAG_MIP_POINT;
+      if (minLinear && magLinear && mipLinear) {
+        filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
+      } else if (minLinear && magLinear && !mipLinear) {
+        filter = D3D12_FILTER_MIN_MAG_LINEAR_MIP_POINT;
+      } else if (minLinear && !magLinear && mipLinear) {
+        filter = D3D12_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR;
+      } else if (minLinear && !magLinear && !mipLinear) {
+        filter = D3D12_FILTER_MIN_LINEAR_MAG_MIP_POINT;
+      } else if (!minLinear && magLinear && mipLinear) {
+        filter = D3D12_FILTER_MIN_POINT_MAG_MIP_LINEAR;
+      } else if (!minLinear && magLinear && !mipLinear) {
+        filter = D3D12_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT;
+      } else if (!minLinear && !magLinear && mipLinear) {
+        filter = D3D12_FILTER_MIN_MAG_POINT_MIP_LINEAR;
+      }
+
+      if (useComparison) {
+        filter = static_cast<D3D12_FILTER>(
+            filter | D3D12_FILTER_COMPARISON_MIN_MAG_MIP_POINT -
+                         D3D12_FILTER_MIN_MAG_MIP_POINT);
+      }
+      samplerDesc.Filter = filter;
+      samplerDesc.MaxAnisotropy = 1;
+    }
+
+    samplerDesc.AddressU = toD3D12Address(desc.addressModeU);
+    samplerDesc.AddressV = toD3D12Address(desc.addressModeV);
+    samplerDesc.AddressW = toD3D12Address(desc.addressModeW);
+    samplerDesc.MipLODBias = 0.0f;
+    // For comparison samplers, use the requested depth comparison function.
+    // For non-comparison samplers, set ComparisonFunc to NEVER so that the
+    // debug layer does not flag spurious D3D12_MESSAGE_ID 1361 warnings when
+    // Filter is not a comparison filter. The value is ignored in this case.
+    samplerDesc.ComparisonFunc =
+        useComparison ? toD3D12Compare(desc.depthCompareFunction)
+                      : D3D12_COMPARISON_FUNC_NEVER;
+    samplerDesc.BorderColor[0] = 0.0f;
+    samplerDesc.BorderColor[1] = 0.0f;
+    samplerDesc.BorderColor[2] = 0.0f;
+    samplerDesc.BorderColor[3] = 0.0f;
+    samplerDesc.MinLOD = static_cast<float>(desc.mipLodMin);
+    samplerDesc.MaxLOD = static_cast<float>(desc.mipLodMax);
+
+    auto concreteSampler = std::make_shared<SamplerState>(samplerDesc);
+    std::shared_ptr<ISamplerState> samplerState =
+        std::static_pointer_cast<ISamplerState>(concreteSampler);
+
+    {
+      std::lock_guard<std::mutex> lock(samplerCacheMutex_);
+      samplerCache_[samplerHash] = concreteSampler;
+      samplerCacheMisses_++;
+      IGL_D3D12_LOG_VERBOSE(
+          "D3D12SamplerCache::createSamplerState: Cache MISS "
+          "(hash=0x%zx, total misses=%zu)\n",
+          samplerHash,
+          samplerCacheMisses_);
+    }
+
+    Result::setOk(outResult);
+    return samplerState;
+  }
+
+  [[nodiscard]] SamplerCacheStats getStats() const {
+    std::lock_guard<std::mutex> lock(samplerCacheMutex_);
+
+    SamplerCacheStats stats;
+    stats.cacheHits = samplerCacheHits_;
+    stats.cacheMisses = samplerCacheMisses_;
+
+    stats.activeSamplers = 0;
+    for (const auto& [hash, weakPtr] : samplerCache_) {
+      (void)hash;
+      if (!weakPtr.expired()) {
+        stats.activeSamplers++;
+      }
+    }
+
+    const size_t totalRequests = stats.cacheHits + stats.cacheMisses;
+    if (totalRequests > 0) {
+      stats.hitRate = 100.0f * static_cast<float>(stats.cacheHits) /
+                      static_cast<float>(totalRequests);
+    }
+
+    return stats;
+  }
+
+  void clear() {
+    std::lock_guard<std::mutex> lock(samplerCacheMutex_);
+    samplerCache_.clear();
+    samplerCacheHits_ = 0;
+    samplerCacheMisses_ = 0;
+  }
+
+ private:
+  mutable std::unordered_map<size_t, std::weak_ptr<SamplerState>> samplerCache_;
+  mutable std::mutex samplerCacheMutex_;
+  mutable size_t samplerCacheHits_ = 0;
+  mutable size_t samplerCacheMisses_ = 0;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12StagingDevice.cpp b/src/igl/d3d12/D3D12StagingDevice.cpp
new file mode 100644
index 0000000000..df76ef8bb4
--- /dev/null
+++ b/src/igl/d3d12/D3D12StagingDevice.cpp
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/D3D12StagingDevice.h>
+#include <igl/d3d12/D3D12FenceWaiter.h>
+#include <igl/d3d12/UploadRingBuffer.h>
+#include <igl/d3d12/Common.h>
+#include <igl/Assert.h>
+
+namespace igl::d3d12 {
+
+D3D12StagingDevice::D3D12StagingDevice(ID3D12Device* device,
+                                       ID3D12Fence* fence,
+                                       UploadRingBuffer* uploadRingBuffer)
+    : device_(device), fence_(fence), uploadRingBuffer_(uploadRingBuffer) {
+  IGL_DEBUG_ASSERT(device_);
+  IGL_DEBUG_ASSERT(fence_);
+
+  IGL_D3D12_LOG_VERBOSE("D3D12StagingDevice: Initialized (ring buffer: %s)\n",
+               uploadRingBuffer_ ? "yes" : "no");
+}
+
+D3D12StagingDevice::~D3D12StagingDevice() {
+  // Wait for all in-flight buffers to complete
+  if (fence_) {
+    for (const auto& entry : inFlightBuffers_) {
+      if (fence_->GetCompletedValue() < entry.fenceValue) {
+        FenceWaiter waiter(fence_, entry.fenceValue);
+        Result waitResult = waiter.wait();
+        if (!waitResult.isOk()) {
+          IGL_LOG_ERROR("D3D12StagingDevice::~D3D12StagingDevice() - Fence wait failed during cleanup: %s\n",
+                        waitResult.message.c_str());
+        }
+      }
+    }
+  }
+
+  IGL_D3D12_LOG_VERBOSE("D3D12StagingDevice: Destroyed\n");
+}
+
+D3D12StagingDevice::StagingBuffer D3D12StagingDevice::allocateUpload(size_t size,
+                                                                      size_t alignment,
+                                                                      uint64_t fenceValue) {
+  // Try ring buffer first for small allocations
+  if (uploadRingBuffer_ && size <= kMaxRingBufferAllocation) {
+    auto ringAlloc = uploadRingBuffer_->allocate(size, alignment, fenceValue);
+    if (ringAlloc.valid) {
+      StagingBuffer result;
+      result.buffer = ringAlloc.buffer;
+      result.mappedPtr = ringAlloc.cpuAddress;
+      result.size = ringAlloc.size;
+      result.offset = ringAlloc.offset;
+      result.valid = true;
+      result.isFromRingBuffer = true;
+      return result;
+    }
+  }
+
+  // Fall back to dedicated staging buffer
+  std::lock_guard<std::mutex> lock(poolMutex_);
+
+  // Reclaim completed buffers
+  reclaimCompletedBuffers();
+
+  igl::d3d12::ComPtr<ID3D12Resource> buffer;
+
+  // Try to find a reusable buffer
+  if (!findReusableBuffer(size, false, &buffer)) {
+    // Create new buffer
+    Result result = createStagingBuffer(size, false, &buffer);
+    if (!result.isOk() || !buffer.Get()) {
+      return StagingBuffer{};  // Return invalid buffer
+    }
+  }
+
+  // Map the buffer
+  void* mappedPtr = nullptr;
+  D3D12_RANGE readRange{0, 0};  // Not reading
+  HRESULT hr = buffer->Map(0, &readRange, &mappedPtr);
+  if (FAILED(hr) || !mappedPtr) {
+    IGL_LOG_ERROR("D3D12StagingDevice: Failed to map upload buffer\n");
+    return StagingBuffer{};
+  }
+
+  StagingBuffer staging;
+  staging.buffer = buffer;
+  staging.mappedPtr = mappedPtr;
+  staging.size = size;
+  staging.offset = 0;
+  staging.valid = true;
+  staging.isFromRingBuffer = false;
+
+  return staging;
+}
+
+D3D12StagingDevice::StagingBuffer D3D12StagingDevice::allocateReadback(size_t size) {
+  std::lock_guard<std::mutex> lock(poolMutex_);
+
+  // Reclaim completed buffers
+  reclaimCompletedBuffers();
+
+  igl::d3d12::ComPtr<ID3D12Resource> buffer;
+
+  // Try to find a reusable buffer
+  if (!findReusableBuffer(size, true, &buffer)) {
+    // Create new buffer
+    Result result = createStagingBuffer(size, true, &buffer);
+    if (!result.isOk() || !buffer.Get()) {
+      return StagingBuffer{};  // Return invalid buffer
+    }
+  }
+
+  // Readback buffers are mapped on-demand when needed
+  StagingBuffer staging;
+  staging.buffer = buffer;
+  staging.mappedPtr = nullptr;
+  staging.size = size;
+  staging.offset = 0;
+  staging.valid = true;
+  staging.isFromRingBuffer = false;
+
+  return staging;
+}
+
+void D3D12StagingDevice::free(StagingBuffer buffer, uint64_t fenceValue) {
+  if (!buffer.valid) {
+    return;
+  }
+
+  // Ring buffer allocations are handled automatically
+  if (buffer.isFromRingBuffer) {
+    return;
+  }
+
+  std::lock_guard<std::mutex> lock(poolMutex_);
+
+  // Unmap if it was mapped
+  if (buffer.mappedPtr) {
+    buffer.buffer->Unmap(0, nullptr);
+  }
+
+  // Add to in-flight list
+  BufferEntry entry;
+  entry.buffer = buffer.buffer;
+  entry.size = buffer.size;
+  entry.fenceValue = fenceValue;
+
+  // Determine if it's a readback buffer
+  D3D12_HEAP_PROPERTIES heapProps;
+  buffer.buffer->GetHeapProperties(&heapProps, nullptr);
+  entry.isReadback = (heapProps.Type == D3D12_HEAP_TYPE_READBACK);
+
+  inFlightBuffers_.push_back(std::move(entry));
+}
+
+void D3D12StagingDevice::reclaimCompletedBuffers() {
+  // Note: Internal helper called by allocate* methods with poolMutex_ already held
+  if (!fence_) {
+    return;
+  }
+
+  const uint64_t completedValue = fence_->GetCompletedValue();
+
+  // Move completed buffers from in-flight to available
+  auto it = inFlightBuffers_.begin();
+  while (it != inFlightBuffers_.end()) {
+    if (it->fenceValue <= completedValue) {
+      availableBuffers_.push_back({it->buffer, it->size, 0, it->isReadback});
+      it = inFlightBuffers_.erase(it);
+    } else {
+      ++it;
+    }
+  }
+}
+
+Result D3D12StagingDevice::createStagingBuffer(
+    size_t size,
+    bool forReadback,
+    igl::d3d12::ComPtr<ID3D12Resource>* outBuffer) {
+  D3D12_HEAP_PROPERTIES heapProps{};
+  heapProps.Type = forReadback ? D3D12_HEAP_TYPE_READBACK : D3D12_HEAP_TYPE_UPLOAD;
+
+  D3D12_RESOURCE_DESC desc{};
+  desc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+  desc.Width = size;
+  desc.Height = 1;
+  desc.DepthOrArraySize = 1;
+  desc.MipLevels = 1;
+  desc.Format = DXGI_FORMAT_UNKNOWN;
+  desc.SampleDesc.Count = 1;
+  desc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+
+  D3D12_RESOURCE_STATES initialState = forReadback ? D3D12_RESOURCE_STATE_COPY_DEST
+                                                    : D3D12_RESOURCE_STATE_GENERIC_READ;
+
+  HRESULT hr = device_->CreateCommittedResource(&heapProps,
+                                                D3D12_HEAP_FLAG_NONE,
+                                                &desc,
+                                                initialState,
+                                                nullptr,
+                                                IID_PPV_ARGS(outBuffer->GetAddressOf()));
+
+  if (FAILED(hr)) {
+    return Result{Result::Code::RuntimeError, "Failed to create staging buffer"};
+  }
+
+  IGL_D3D12_LOG_VERBOSE("D3D12StagingDevice: Created new %s buffer (size: %zu bytes)\n",
+               forReadback ? "readback" : "upload",
+               size);
+
+  return Result{};
+}
+
+bool D3D12StagingDevice::findReusableBuffer(size_t size,
+                                            bool forReadback,
+                                            igl::d3d12::ComPtr<ID3D12Resource>* outBuffer) {
+  // Find a buffer that matches type and is large enough
+  for (auto it = availableBuffers_.begin(); it != availableBuffers_.end(); ++it) {
+    if (it->isReadback == forReadback && it->size >= size) {
+      // Prefer buffers that are close in size (within 2x) to avoid waste
+      if (it->size <= size * 2) {
+        *outBuffer = it->buffer;
+        availableBuffers_.erase(it);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12StagingDevice.h b/src/igl/d3d12/D3D12StagingDevice.h
new file mode 100644
index 0000000000..f2e8b04d57
--- /dev/null
+++ b/src/igl/d3d12/D3D12StagingDevice.h
@@ -0,0 +1,134 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/Common.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12Headers.h>
+#include <vector>
+#include <mutex>
+
+namespace igl::d3d12 {
+
+class UploadRingBuffer;
+
+/**
+ * @brief Centralized management of staging buffers for upload/readback
+ *
+ * Provides pooled staging buffer allocation for upload and readback operations,
+ * eliminating per-operation staging buffer creation and improving reuse.
+ *
+ * Inspired by Vulkan's VulkanStagingDevice pattern.
+ */
+class D3D12StagingDevice {
+public:
+  /**
+   * @brief Staging buffer allocation
+   */
+  struct StagingBuffer {
+    igl::d3d12::ComPtr<ID3D12Resource> buffer;
+    void* mappedPtr = nullptr;
+    size_t size = 0;
+    uint64_t offset = 0;  // Offset within buffer (for ring buffer allocations)
+    bool valid = false;
+    bool isFromRingBuffer = false;  // True if allocated from ring buffer
+
+    StagingBuffer() = default;
+  };
+
+  /**
+   * @brief Initialize the staging device
+   * @param device D3D12 device for resource creation
+   * @param fence Fence for completion tracking
+   * @param uploadRingBuffer Optional existing upload ring buffer to integrate
+   */
+  D3D12StagingDevice(ID3D12Device* device,
+                     ID3D12Fence* fence,
+                     UploadRingBuffer* uploadRingBuffer = nullptr);
+
+  ~D3D12StagingDevice();
+
+  /**
+   * @brief Allocate a staging buffer for upload operations
+   *
+   * First attempts to use the upload ring buffer if available and size permits.
+   * Falls back to creating a dedicated staging buffer for large allocations.
+   *
+   * @param size Size in bytes
+   * @param alignment Required alignment (default 256 for constant buffers)
+   * @param fenceValue Fence value when this allocation will be used
+   * @return Staging buffer allocation
+   */
+  [[nodiscard]] StagingBuffer allocateUpload(size_t size,
+                                             size_t alignment = 256,
+                                             uint64_t fenceValue = 0);
+
+  /**
+   * @brief Allocate a staging buffer for readback operations
+   *
+   * Readback buffers are in READBACK heap (CPU-readable after GPU write).
+   *
+   * @param size Size in bytes
+   * @return Staging buffer allocation
+   */
+  [[nodiscard]] StagingBuffer allocateReadback(size_t size);
+
+  /**
+   * @brief Free a staging buffer
+   *
+   * Buffers allocated from ring buffer are automatically recycled.
+   * Dedicated buffers are pooled for reuse.
+   *
+   * @param buffer Buffer to free
+   * @param fenceValue Fence value when GPU is done using this buffer
+   */
+  void free(StagingBuffer buffer, uint64_t fenceValue);
+
+private:
+  /**
+   * @brief Reclaim completed staging buffers back to pool
+   *
+   * Internal method called during allocate* to recycle buffers.
+   * Must be called with poolMutex_ held.
+   */
+  void reclaimCompletedBuffers();
+  struct BufferEntry {
+    igl::d3d12::ComPtr<ID3D12Resource> buffer;
+    size_t size = 0;
+    uint64_t fenceValue = 0;  // Fence value when this buffer was last used
+    bool isReadback = false;  // True for READBACK heap, false for UPLOAD heap
+  };
+
+  ID3D12Device* device_ = nullptr;
+  ID3D12Fence* fence_ = nullptr;
+  UploadRingBuffer* uploadRingBuffer_ = nullptr;
+
+  // Pool of available staging buffers
+  std::vector<BufferEntry> availableBuffers_;
+
+  // Buffers in flight (waiting for GPU)
+  std::vector<BufferEntry> inFlightBuffers_;
+
+  // Mutex for thread-safe pool access
+  std::mutex poolMutex_;
+
+  // Maximum size to use ring buffer (larger allocations get dedicated buffers)
+  static constexpr size_t kMaxRingBufferAllocation = 1024 * 1024;  // 1MB
+
+  // Create a new staging buffer
+  [[nodiscard]] Result createStagingBuffer(size_t size,
+                                          bool forReadback,
+                                          igl::d3d12::ComPtr<ID3D12Resource>* outBuffer);
+
+  // Find a reusable buffer from the pool
+  [[nodiscard]] bool findReusableBuffer(size_t size,
+                                       bool forReadback,
+                                       igl::d3d12::ComPtr<ID3D12Resource>* outBuffer);
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12StateTransition.h b/src/igl/d3d12/D3D12StateTransition.h
new file mode 100644
index 0000000000..89dc980647
--- /dev/null
+++ b/src/igl/d3d12/D3D12StateTransition.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/d3d12/D3D12Headers.h>
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+/**
+ * @brief D3D12 Resource State Transition Helper
+ *
+ * Provides conservative validation for D3D12 resource state transitions.
+ * Enforces write-to-write transitions through COMMON intermediate state.
+ *
+ * Conservative Policy (voluntary, not D3D12 spec requirement):
+ * - Any state with write bits -> any state with write bits: Use COMMON intermediate
+ *   (e.g., RENDER_TARGET -> COMMON -> COPY_DEST)
+ * - All other transitions: Direct transition allowed
+ *
+ * Note: D3D12 spec allows direct write-to-write transitions with a single barrier.
+ * This helper uses COMMON intermediate as an extra-conservative policy.
+ *
+ * See: https://learn.microsoft.com/windows/win32/direct3d12/using-resource-barriers-to-synchronize-gpu-access-to-resources
+ */
+class D3D12StateTransition {
+ public:
+  /**
+   * @brief Check if a state contains any write bits
+   *
+   * Tests whether the state mask includes any write-capable bits.
+   * Used to enforce conservative "write-to-write requires COMMON" policy.
+   */
+  static bool isWriteState(D3D12_RESOURCE_STATES state) {
+    constexpr D3D12_RESOURCE_STATES kWriteMask =
+        D3D12_RESOURCE_STATE_RENDER_TARGET |
+        D3D12_RESOURCE_STATE_UNORDERED_ACCESS |
+        D3D12_RESOURCE_STATE_DEPTH_WRITE |
+        D3D12_RESOURCE_STATE_COPY_DEST |
+        D3D12_RESOURCE_STATE_RESOLVE_DEST |
+        D3D12_RESOURCE_STATE_STREAM_OUT |
+        D3D12_RESOURCE_STATE_VIDEO_DECODE_WRITE |
+        D3D12_RESOURCE_STATE_VIDEO_PROCESS_WRITE |
+        D3D12_RESOURCE_STATE_VIDEO_ENCODE_WRITE;
+    return (state & kWriteMask) != 0;
+  }
+
+  /**
+   * @brief Check if a direct state transition is allowed
+   *
+   * @return true if direct transition allowed, false if COMMON intermediate required
+   */
+  static bool isLegalDirectTransition(D3D12_RESOURCE_STATES from,
+                                      D3D12_RESOURCE_STATES to) {
+    if (from == to) {
+      return true;
+    }
+
+    // COMMON can transition to/from anything directly
+    if (from == D3D12_RESOURCE_STATE_COMMON || to == D3D12_RESOURCE_STATE_COMMON) {
+      return true;
+    }
+
+    // Write-to-write requires COMMON intermediate
+    if (isWriteState(from) && isWriteState(to)) {
+      return false;
+    }
+
+    return true;
+  }
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/D3D12Telemetry.h b/src/igl/d3d12/D3D12Telemetry.h
new file mode 100644
index 0000000000..2b1c2ca61e
--- /dev/null
+++ b/src/igl/d3d12/D3D12Telemetry.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+
+namespace igl::d3d12 {
+
+class D3D12Telemetry {
+ public:
+  void incrementDrawCount(size_t count) noexcept {
+    currentDrawCount_.fetch_add(count, std::memory_order_relaxed);
+  }
+
+  void incrementShaderCompilationCount() noexcept {
+    shaderCompilationCount_.fetch_add(1, std::memory_order_relaxed);
+  }
+
+  [[nodiscard]] size_t getDrawCount() const noexcept {
+    return currentDrawCount_.load(std::memory_order_relaxed);
+  }
+
+  [[nodiscard]] size_t getShaderCompilationCount() const noexcept {
+    return shaderCompilationCount_.load(std::memory_order_relaxed);
+  }
+
+ private:
+  std::atomic<size_t> currentDrawCount_{0};
+  std::atomic<size_t> shaderCompilationCount_{0};
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/DXCCompiler.cpp b/src/igl/d3d12/DXCCompiler.cpp
new file mode 100644
index 0000000000..42352d1239
--- /dev/null
+++ b/src/igl/d3d12/DXCCompiler.cpp
@@ -0,0 +1,240 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/DXCCompiler.h>
+#include <igl/d3d12/D3D12Headers.h>
+#include <codecvt>
+#include <locale>
+
+namespace igl::d3d12 {
+
+namespace {
+// Import ComPtr for readability
+template<typename T>
+using ComPtr = igl::d3d12::ComPtr<T>;
+} // namespace
+
+DXCCompiler::DXCCompiler() = default;
+DXCCompiler::~DXCCompiler() = default;
+
+Result DXCCompiler::initialize() {
+  if (initialized_) {
+    return Result();
+  }
+
+  IGL_D3D12_LOG_VERBOSE("DXCCompiler: Initializing DXC compiler...\n");
+
+  // Create DXC utils
+  HRESULT hr = DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(utils_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("DXCCompiler: Failed to create DxcUtils: 0x%08X\n", static_cast<unsigned>(hr));
+    return Result(Result::Code::RuntimeError, "Failed to create DxcUtils");
+  }
+
+  // Create DXC compiler
+  hr = DxcCreateInstance(CLSID_DxcCompiler, IID_PPV_ARGS(compiler_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("DXCCompiler: Failed to create DxcCompiler: 0x%08X\n", static_cast<unsigned>(hr));
+    return Result(Result::Code::RuntimeError, "Failed to create DxcCompiler");
+  }
+
+  // Create default include handler
+  hr = utils_->CreateDefaultIncludeHandler(includeHandler_.GetAddressOf());
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("DXCCompiler: Failed to create include handler: 0x%08X\n", static_cast<unsigned>(hr));
+    return Result(Result::Code::RuntimeError, "Failed to create include handler");
+  }
+
+  // Create DXC validator for DXIL signing (optional but highly recommended)
+  hr = DxcCreateInstance(CLSID_DxcValidator, IID_PPV_ARGS(validator_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_D3D12_LOG_VERBOSE("DXCCompiler: Validator not available (0x%08X) - DXIL will be unsigned\n", static_cast<unsigned>(hr));
+    // Not a fatal error - continue without validator
+  } else {
+    IGL_D3D12_LOG_VERBOSE("DXCCompiler: Validator initialized - DXIL signing available\n");
+  }
+
+  initialized_ = true;
+  IGL_D3D12_LOG_VERBOSE("DXCCompiler: Initialization successful (Shader Model 6.0+ enabled)\n");
+
+  return Result();
+}
+
+Result DXCCompiler::compile(
+    const char* source,
+    size_t sourceLength,
+    const char* entryPoint,
+    const char* target,
+    const char* debugName,
+    uint32_t flags,
+    std::vector<uint8_t>& outBytecode,
+    std::string& outErrors) {
+
+  if (!initialized_) {
+    return Result(Result::Code::InvalidOperation, "DXC compiler not initialized");
+  }
+
+  IGL_D3D12_LOG_VERBOSE("DXCCompiler: Compiling shader '%s' with target '%s' (%zu bytes source)\n",
+               debugName ? debugName : "unnamed",
+               target,
+               sourceLength);
+
+  // Create source blob
+  igl::d3d12::ComPtr<IDxcBlobEncoding> sourceBlob;
+  HRESULT hr = utils_->CreateBlob(source, static_cast<UINT32>(sourceLength), CP_UTF8, sourceBlob.GetAddressOf());
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("DXCCompiler: Failed to create source blob: 0x%08X\n", static_cast<unsigned>(hr));
+    return Result(Result::Code::RuntimeError, "Failed to create source blob");
+  }
+
+  // Convert strings to wide char for DXC API
+  std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
+  std::wstring wEntryPoint = converter.from_bytes(entryPoint);
+  std::wstring wTarget = converter.from_bytes(target);
+
+  // Build compilation arguments
+  std::vector<LPCWSTR> arguments;
+
+  // Entry point
+  arguments.push_back(L"-E");
+  arguments.push_back(wEntryPoint.c_str());
+
+  // Target profile
+  arguments.push_back(L"-T");
+  arguments.push_back(wTarget.c_str());
+
+  // Debug info and optimization
+  if (flags & D3DCOMPILE_DEBUG) {
+    IGL_D3D12_LOG_VERBOSE("  DXC: Debug mode enabled\n");
+    arguments.push_back(L"-Zi");              // Debug info
+    arguments.push_back(L"-Qembed_debug");    // Embed debug info in shader
+    arguments.push_back(L"-Od");              // Disable optimizations
+  } else {
+    IGL_D3D12_LOG_VERBOSE("  DXC: Release mode - maximum optimization\n");
+    arguments.push_back(L"-O3");              // Maximum optimization
+  }
+
+  // Skip optimization flag
+  if (flags & D3DCOMPILE_SKIP_OPTIMIZATION) {
+    arguments.push_back(L"-Od");
+  }
+
+  // Warnings as errors
+  if (flags & D3DCOMPILE_WARNINGS_ARE_ERRORS) {
+    IGL_D3D12_LOG_VERBOSE("  DXC: Treating warnings as errors\n");
+    arguments.push_back(L"-WX");
+  }
+
+  // Compile
+  DxcBuffer sourceBuffer = {};
+  sourceBuffer.Ptr = sourceBlob->GetBufferPointer();
+  sourceBuffer.Size = sourceBlob->GetBufferSize();
+  sourceBuffer.Encoding = CP_UTF8;
+
+  igl::d3d12::ComPtr<IDxcResult> result;
+  hr = compiler_->Compile(
+      &sourceBuffer,
+      arguments.data(),
+      static_cast<UINT32>(arguments.size()),
+      includeHandler_.Get(),
+      IID_PPV_ARGS(result.GetAddressOf())
+  );
+
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("DXCCompiler: Compilation invocation failed: 0x%08X\n", static_cast<unsigned>(hr));
+    return Result(Result::Code::RuntimeError, "DXC compilation invocation failed");
+  }
+
+  // Check compilation status
+  HRESULT compileStatus;
+  result->GetStatus(&compileStatus);
+
+  // Get errors/warnings
+  igl::d3d12::ComPtr<IDxcBlobUtf8> errors;
+  igl::d3d12::ComPtr<IDxcBlobWide> errorsName;
+  result->GetOutput(DXC_OUT_ERRORS, IID_PPV_ARGS(errors.GetAddressOf()), errorsName.GetAddressOf());
+  if (errors.Get() && errors->GetStringLength() > 0) {
+    outErrors = std::string(errors->GetStringPointer(), errors->GetStringLength());
+  }
+
+  if (FAILED(compileStatus)) {
+    IGL_LOG_ERROR("DXCCompiler: Shader compilation failed\n");
+    if (!outErrors.empty()) {
+      IGL_LOG_ERROR("%s\n", outErrors.c_str());
+    }
+    return Result(Result::Code::RuntimeError, "Shader compilation failed: " + outErrors);
+  }
+
+  // Log warnings if any
+  if (!outErrors.empty()) {
+    IGL_D3D12_LOG_VERBOSE("DXCCompiler: Compilation warnings:\n%s\n", outErrors.c_str());
+  }
+
+  // Get compiled bytecode (DXIL)
+  igl::d3d12::ComPtr<IDxcBlob> bytecode;
+  igl::d3d12::ComPtr<IDxcBlobWide> bytecodeName;
+  result->GetOutput(DXC_OUT_OBJECT, IID_PPV_ARGS(bytecode.GetAddressOf()), bytecodeName.GetAddressOf());
+
+  if (!bytecode.Get()) {
+    IGL_LOG_ERROR("DXCCompiler: No bytecode produced\n");
+    return Result(Result::Code::RuntimeError, "No bytecode produced");
+  }
+
+  // Validate and sign DXIL if validator is available
+  if (validator_.Get()) {
+    IGL_D3D12_LOG_VERBOSE("DXCCompiler: Attempting DXIL validation and signing...\n");
+    igl::d3d12::ComPtr<IDxcOperationResult> validationResult;
+    hr = validator_->Validate(bytecode.Get(), DxcValidatorFlags_InPlaceEdit, validationResult.GetAddressOf());
+
+    if (SUCCEEDED(hr)) {
+      HRESULT validationStatus;
+      validationResult->GetStatus(&validationStatus);
+      IGL_D3D12_LOG_VERBOSE("DXCCompiler: Validation status: 0x%08X\n", static_cast<unsigned>(validationStatus));
+
+      if (SUCCEEDED(validationStatus)) {
+        // Get the validated (signed) bytecode - this replaces the original
+        igl::d3d12::ComPtr<IDxcBlob> validatedBlob;
+        validationResult->GetResult(validatedBlob.GetAddressOf());
+
+        if (validatedBlob.Get()) {
+          IGL_D3D12_LOG_VERBOSE("DXCCompiler: Got validated blob (%zu bytes)\n", validatedBlob->GetBufferSize());
+          // Replace bytecode with validated version using move semantics
+          bytecode.Reset();
+          bytecode = std::move(validatedBlob);
+          IGL_D3D12_LOG_VERBOSE("DXCCompiler: DXIL validated and signed successfully\n");
+        } else {
+          IGL_D3D12_LOG_VERBOSE("DXCCompiler: Validation succeeded but no blob returned\n");
+        }
+      } else {
+        // Validation failed - get error messages
+        igl::d3d12::ComPtr<IDxcBlobEncoding> validationErrors;
+        validationResult->GetErrorBuffer(validationErrors.GetAddressOf());
+        if (validationErrors.Get() && validationErrors->GetBufferSize() > 0) {
+          std::string errMsg(static_cast<const char*>(validationErrors->GetBufferPointer()),
+                           validationErrors->GetBufferSize());
+          IGL_D3D12_LOG_VERBOSE("DXCCompiler: DXIL validation failed:\n%s\n", errMsg.c_str());
+        }
+        IGL_D3D12_LOG_VERBOSE("DXCCompiler: Using unsigned DXIL (may require experimental features)\n");
+      }
+    } else {
+      IGL_D3D12_LOG_VERBOSE("DXCCompiler: DXIL validation skipped (validator error 0x%08X)\n", static_cast<unsigned>(hr));
+    }
+  } else {
+    IGL_D3D12_LOG_VERBOSE("DXCCompiler: Using unsigned DXIL (validator not available)\n");
+  }
+
+  // Copy bytecode to output (either signed or unsigned)
+  const uint8_t* data = static_cast<const uint8_t*>(bytecode->GetBufferPointer());
+  size_t size = bytecode->GetBufferSize();
+  outBytecode.assign(data, data + size);
+
+  IGL_D3D12_LOG_VERBOSE("DXCCompiler: Compilation successful (%zu bytes DXIL bytecode)\n", size);
+
+  return Result();
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/DXCCompiler.h b/src/igl/d3d12/DXCCompiler.h
new file mode 100644
index 0000000000..49e77011a0
--- /dev/null
+++ b/src/igl/d3d12/DXCCompiler.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/Common.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12Headers.h>
+#include <vector>
+#include <string>
+
+namespace igl::d3d12 {
+
+/**
+ * @brief DXC (DirectX Shader Compiler) wrapper for modern HLSL compilation
+ *
+ * Replaces legacy FXC (D3DCompile) with DXC for:
+ * - Shader Model 6.0+ support
+ * - 10-20% better shader performance
+ * - Modern optimization passes
+ * - Future features (raytracing, mesh shaders, wave intrinsics)
+ */
+class DXCCompiler {
+ public:
+  DXCCompiler();
+  ~DXCCompiler();
+
+  /**
+   * @brief Initialize DXC compiler (call once)
+   * @return Result indicating success or failure
+   */
+  Result initialize();
+
+  /**
+   * @brief Check if DXC is available and initialized
+   */
+  bool isInitialized() const { return initialized_; }
+
+  /**
+   * @brief Compile HLSL source to DXIL bytecode (Shader Model 6.0+)
+   *
+   * @param source HLSL source code
+   * @param sourceLength Length of source code
+   * @param entryPoint Entry point function name (e.g., "main")
+   * @param target Shader target profile (e.g., "vs_6_0", "ps_6_0", "cs_6_0")
+   * @param debugName Debug name for error messages
+   * @param flags Compilation flags (D3DCOMPILE_* constants)
+   * @param outBytecode Output DXIL bytecode
+   * @param outErrors Output compilation errors/warnings
+   * @return Result indicating success or failure
+   */
+  Result compile(
+      const char* source,
+      size_t sourceLength,
+      const char* entryPoint,
+      const char* target,
+      const char* debugName,
+      uint32_t flags,
+      std::vector<uint8_t>& outBytecode,
+      std::string& outErrors
+  );
+
+ private:
+  igl::d3d12::ComPtr<IDxcUtils> utils_;
+  igl::d3d12::ComPtr<IDxcCompiler3> compiler_;
+  igl::d3d12::ComPtr<IDxcIncludeHandler> includeHandler_;
+  igl::d3d12::ComPtr<IDxcValidator> validator_;
+  bool initialized_ = false;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/DepthStencilState.h b/src/igl/d3d12/DepthStencilState.h
new file mode 100644
index 0000000000..d765465625
--- /dev/null
+++ b/src/igl/d3d12/DepthStencilState.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/DepthStencilState.h>
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+class DepthStencilState final : public IDepthStencilState {
+ public:
+  explicit DepthStencilState(const DepthStencilStateDesc& desc) : desc_(desc) {}
+  ~DepthStencilState() override = default;
+
+  const DepthStencilStateDesc& getDesc() const { return desc_; }
+
+ private:
+  DepthStencilStateDesc desc_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/DescriptorHeapManager.cpp b/src/igl/d3d12/DescriptorHeapManager.cpp
new file mode 100644
index 0000000000..a0ea524477
--- /dev/null
+++ b/src/igl/d3d12/DescriptorHeapManager.cpp
@@ -0,0 +1,730 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/DescriptorHeapManager.h>
+
+#include <algorithm>
+
+namespace igl::d3d12 {
+
+Result DescriptorHeapManager::initialize(ID3D12Device* device, const Sizes& sizes) {
+  if (!device) {
+    return Result(Result::Code::ArgumentInvalid, "Null device for DescriptorHeapManager");
+  }
+
+  // A-006: Copy requested sizes, then validate/clamp against device limits
+  sizes_ = sizes;
+  validateAndClampSizes(device);
+
+  // Create shader-visible CBV/SRV/UAV heap
+  {
+    D3D12_DESCRIPTOR_HEAP_DESC desc = {};
+    desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+    desc.NumDescriptors = sizes_.cbvSrvUav;
+    desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+    if (FAILED(device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(cbvSrvUavHeap_.GetAddressOf())))) {
+      // A-006: Enhanced error message with size context
+      IGL_LOG_ERROR("DescriptorHeapManager: Failed to create CBV/SRV/UAV heap "
+                    "(size=%u descriptors)\n", sizes_.cbvSrvUav);
+      return Result(Result::Code::RuntimeError, "Failed to create CBV/SRV/UAV heap");
+    }
+    cbvSrvUavDescriptorSize_ = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+    // Populate free list
+    freeCbvSrvUav_.reserve(sizes_.cbvSrvUav);
+    for (uint32_t i = 0; i < sizes_.cbvSrvUav; ++i) {
+      freeCbvSrvUav_.push_back(i);
+    }
+  }
+
+  // Create shader-visible sampler heap
+  {
+    D3D12_DESCRIPTOR_HEAP_DESC desc = {};
+    desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+    desc.NumDescriptors = sizes_.samplers;
+    desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+    if (FAILED(device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(samplerHeap_.GetAddressOf())))) {
+      // A-006: Enhanced error message with size context
+      IGL_LOG_ERROR("DescriptorHeapManager: Failed to create sampler heap "
+                    "(size=%u descriptors, limit=2048)\n", sizes_.samplers);
+      return Result(Result::Code::RuntimeError, "Failed to create sampler heap");
+    }
+    samplerDescriptorSize_ = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
+    // Populate free list
+    freeSamplers_.reserve(sizes_.samplers);
+    for (uint32_t i = 0; i < sizes_.samplers; ++i) {
+      freeSamplers_.push_back(i);
+    }
+  }
+
+  // Create CPU-visible RTV heap
+  {
+    D3D12_DESCRIPTOR_HEAP_DESC desc = {};
+    desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
+    desc.NumDescriptors = sizes_.rtvs;
+    desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
+    if (FAILED(device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(rtvHeap_.GetAddressOf())))) {
+      // A-006: Enhanced error message with size context
+      IGL_LOG_ERROR("DescriptorHeapManager: Failed to create RTV heap "
+                    "(size=%u descriptors)\n", sizes_.rtvs);
+      return Result(Result::Code::RuntimeError, "Failed to create RTV heap");
+    }
+    rtvDescriptorSize_ = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_RTV);
+    // Populate free list
+    freeRtvs_.reserve(sizes_.rtvs);
+    for (uint32_t i = 0; i < sizes_.rtvs; ++i) {
+      freeRtvs_.push_back(i);
+    }
+  }
+
+  // Create CPU-visible DSV heap
+  {
+    D3D12_DESCRIPTOR_HEAP_DESC desc = {};
+    desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV;
+    desc.NumDescriptors = sizes_.dsvs;
+    desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
+    if (FAILED(device->CreateDescriptorHeap(&desc, IID_PPV_ARGS(dsvHeap_.GetAddressOf())))) {
+      // A-006: Enhanced error message with size context
+      IGL_LOG_ERROR("DescriptorHeapManager: Failed to create DSV heap "
+                    "(size=%u descriptors)\n", sizes_.dsvs);
+      return Result(Result::Code::RuntimeError, "Failed to create DSV heap");
+    }
+    dsvDescriptorSize_ = device->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_DSV);
+    // Populate free list
+    freeDsvs_.reserve(sizes_.dsvs);
+    for (uint32_t i = 0; i < sizes_.dsvs; ++i) {
+      freeDsvs_.push_back(i);
+    }
+  }
+
+  return Result();
+}
+
+uint32_t DescriptorHeapManager::allocateRTV() {
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (freeRtvs_.empty()) {
+    IGL_LOG_ERROR("DescriptorHeapManager: RTV heap exhausted! "
+                  "Requested allocation failed (capacity: %u descriptors)\n",
+                  sizes_.rtvs);
+    return UINT32_MAX;
+  }
+  const uint32_t idx = freeRtvs_.back();
+  freeRtvs_.pop_back();
+
+  return idx;
+}
+
+uint32_t DescriptorHeapManager::allocateDSV() {
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (freeDsvs_.empty()) {
+    IGL_LOG_ERROR("DescriptorHeapManager: DSV heap exhausted! "
+                  "Requested allocation failed (capacity: %u descriptors)\n",
+                  sizes_.dsvs);
+    return UINT32_MAX;
+  }
+  const uint32_t idx = freeDsvs_.back();
+  freeDsvs_.pop_back();
+
+  return idx;
+}
+
+void DescriptorHeapManager::freeRTV(uint32_t index) {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  // Validate bounds
+  if (index == UINT32_MAX || index >= sizes_.rtvs) {
+    return;
+  }
+
+  // CRITICAL: Detect double-free bugs by checking if index is already in free list
+  // Note: O(N) scan - acceptable for RTV heap (typically ~256 descriptors)
+  if (std::find(freeRtvs_.begin(), freeRtvs_.end(), index) != freeRtvs_.end()) {
+    IGL_LOG_ERROR("DescriptorHeapManager: DOUBLE-FREE DETECTED - RTV index %u already freed!\n", index);
+    IGL_DEBUG_ASSERT(false, "Double-free of RTV descriptor - caller bug detected");
+    return;  // Prevent corruption even in release builds
+  }
+
+  // Add to free list
+  freeRtvs_.push_back(index);
+}
+
+void DescriptorHeapManager::freeDSV(uint32_t index) {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  // Validate bounds
+  if (index == UINT32_MAX || index >= sizes_.dsvs) {
+    return;
+  }
+
+  // CRITICAL: Detect double-free bugs by checking if index is already in free list
+  // Note: O(N) scan - acceptable for DSV heap (typically ~128 descriptors)
+  if (std::find(freeDsvs_.begin(), freeDsvs_.end(), index) != freeDsvs_.end()) {
+    IGL_LOG_ERROR("DescriptorHeapManager: DOUBLE-FREE DETECTED - DSV index %u already freed!\n", index);
+    IGL_DEBUG_ASSERT(false, "Double-free of DSV descriptor - caller bug detected");
+    return;  // Prevent corruption even in release builds
+  }
+
+  // Add to free list
+  freeDsvs_.push_back(index);
+}
+
+uint32_t DescriptorHeapManager::allocateCbvSrvUav() {
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (freeCbvSrvUav_.empty()) {
+    IGL_LOG_ERROR("DescriptorHeapManager: CBV/SRV/UAV heap exhausted! "
+                  "Requested allocation failed (capacity: %u descriptors)\n",
+                  sizes_.cbvSrvUav);
+    return UINT32_MAX;
+  }
+  const uint32_t idx = freeCbvSrvUav_.back();
+  freeCbvSrvUav_.pop_back();
+
+  return idx;
+}
+
+uint32_t DescriptorHeapManager::allocateSampler() {
+  std::lock_guard<std::mutex> lock(mutex_);
+  if (freeSamplers_.empty()) {
+    IGL_LOG_ERROR("DescriptorHeapManager: Sampler heap exhausted! "
+                  "Requested allocation failed (capacity: %u descriptors)\n",
+                  sizes_.samplers);
+    return UINT32_MAX;
+  }
+  const uint32_t idx = freeSamplers_.back();
+  freeSamplers_.pop_back();
+
+  return idx;
+}
+
+void DescriptorHeapManager::freeCbvSrvUav(uint32_t index) {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  // Validate bounds
+  if (index == UINT32_MAX || index >= sizes_.cbvSrvUav) {
+    return;
+  }
+
+#if IGL_DEBUG
+  // CRITICAL: Detect double-free bugs by checking if index is already in free list
+  // Note: O(N) scan - can be expensive for large heaps (~4096 descriptors).
+  // Only enabled in debug builds to avoid overhead in production.
+  if (std::find(freeCbvSrvUav_.begin(), freeCbvSrvUav_.end(), index) != freeCbvSrvUav_.end()) {
+    IGL_LOG_ERROR("DescriptorHeapManager: DOUBLE-FREE DETECTED - CBV/SRV/UAV index %u already freed!\n", index);
+    IGL_DEBUG_ASSERT(false, "Double-free of CBV/SRV/UAV descriptor - caller bug detected");
+    return;  // Prevent corruption even in debug builds
+  }
+#endif
+
+  // Add to free list
+  freeCbvSrvUav_.push_back(index);
+}
+
+void DescriptorHeapManager::freeSampler(uint32_t index) {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  // Validate bounds
+  if (index == UINT32_MAX || index >= sizes_.samplers) {
+    return;
+  }
+
+#if IGL_DEBUG
+  // CRITICAL: Detect double-free bugs by checking if index is already in free list
+  // Note: O(N) scan - can be expensive for large heaps (~2048 descriptors).
+  // Only enabled in debug builds to avoid overhead in production.
+  if (std::find(freeSamplers_.begin(), freeSamplers_.end(), index) != freeSamplers_.end()) {
+    IGL_LOG_ERROR("DescriptorHeapManager: DOUBLE-FREE DETECTED - Sampler index %u already freed!\n", index);
+    IGL_DEBUG_ASSERT(false, "Double-free of Sampler descriptor - caller bug detected");
+    return;  // Prevent corruption even in debug builds
+  }
+#endif
+
+  // Add to free list
+  freeSamplers_.push_back(index);
+}
+
+// Explicit error checking with bool return, building on the basic index validation helpers.
+bool DescriptorHeapManager::getRTVHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const {
+  if (!outHandle) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getRTVHandle: outHandle is null\n");
+    return false;
+  }
+
+  // Initialize to zero in case of error
+  *outHandle = {};
+
+  if (!rtvHeap_.Get()) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getRTVHandle: RTV heap is null\n");
+    IGL_DEBUG_ASSERT(false, "RTV heap is null");
+    return false;
+  }
+
+  if (index == UINT32_MAX) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getRTVHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n");
+    IGL_DEBUG_ASSERT(false, "Attempted to get RTV handle with invalid index UINT32_MAX");
+    return false;
+  }
+
+  if (index >= sizes_.rtvs) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getRTVHandle: Index %u exceeds heap size %u\n",
+                  index, sizes_.rtvs);
+    IGL_DEBUG_ASSERT(false, "RTV descriptor index out of bounds");
+    return false;
+  }
+
+  // Check if descriptor has been freed (use-after-free detection)
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (std::find(freeRtvs_.begin(), freeRtvs_.end(), index) != freeRtvs_.end()) {
+      IGL_LOG_ERROR("DescriptorHeapManager::getRTVHandle: Descriptor index %u has been freed (use-after-free)\n", index);
+      IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed RTV descriptor");
+      return false;
+    }
+  }
+
+  *outHandle = rtvHeap_->GetCPUDescriptorHandleForHeapStart();
+  outHandle->ptr += index * rtvDescriptorSize_;
+
+  // Validate final handle is non-null
+  IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getRTVHandle returned null CPU descriptor handle");
+
+  return true;
+}
+
+// Explicit error checking with bool return, building on descriptor validation helpers.
+bool DescriptorHeapManager::getDSVHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const {
+  if (!outHandle) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getDSVHandle: outHandle is null\n");
+    return false;
+  }
+
+  // Initialize to zero in case of error
+  *outHandle = {};
+
+  if (!dsvHeap_.Get()) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getDSVHandle: DSV heap is null\n");
+    IGL_DEBUG_ASSERT(false, "DSV heap is null");
+    return false;
+  }
+
+  if (index == UINT32_MAX) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getDSVHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n");
+    IGL_DEBUG_ASSERT(false, "Attempted to get DSV handle with invalid index UINT32_MAX");
+    return false;
+  }
+
+  if (index >= sizes_.dsvs) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getDSVHandle: Index %u exceeds heap size %u\n",
+                  index, sizes_.dsvs);
+    IGL_DEBUG_ASSERT(false, "DSV descriptor index out of bounds");
+    return false;
+  }
+
+  // Check if descriptor has been freed (use-after-free detection)
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (std::find(freeDsvs_.begin(), freeDsvs_.end(), index) != freeDsvs_.end()) {
+      IGL_LOG_ERROR("DescriptorHeapManager::getDSVHandle: Descriptor index %u has been freed (use-after-free)\n", index);
+      IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed DSV descriptor");
+      return false;
+    }
+  }
+
+  *outHandle = dsvHeap_->GetCPUDescriptorHandleForHeapStart();
+  outHandle->ptr += index * dsvDescriptorSize_;
+
+  // Validate final handle is non-null
+  IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getDSVHandle returned null CPU descriptor handle");
+
+  return true;
+}
+
+// Bool-returning CBV/SRV/UAV handle getters
+bool DescriptorHeapManager::getCbvSrvUavCpuHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const {
+  if (!outHandle) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavCpuHandle: outHandle is null\n");
+    return false;
+  }
+
+  *outHandle = {};
+
+  if (!cbvSrvUavHeap_.Get()) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavCpuHandle: CBV/SRV/UAV heap is null\n");
+    IGL_DEBUG_ASSERT(false, "CBV/SRV/UAV heap is null");
+    return false;
+  }
+
+  if (index == UINT32_MAX) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavCpuHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n");
+    IGL_DEBUG_ASSERT(false, "Attempted to get CBV/SRV/UAV handle with invalid index UINT32_MAX");
+    return false;
+  }
+
+  if (index >= sizes_.cbvSrvUav) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavCpuHandle: Index %u exceeds heap size %u\n",
+                  index, sizes_.cbvSrvUav);
+    IGL_DEBUG_ASSERT(false, "CBV/SRV/UAV descriptor index out of bounds");
+    return false;
+  }
+
+#if IGL_DEBUG
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (std::find(freeCbvSrvUav_.begin(), freeCbvSrvUav_.end(), index) != freeCbvSrvUav_.end()) {
+      IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavCpuHandle: Descriptor index %u has been freed (use-after-free)\n", index);
+      IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed CBV/SRV/UAV descriptor");
+      return false;
+    }
+  }
+#endif
+
+  *outHandle = cbvSrvUavHeap_->GetCPUDescriptorHandleForHeapStart();
+  outHandle->ptr += index * cbvSrvUavDescriptorSize_;
+
+  IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getCbvSrvUavCpuHandle returned null CPU descriptor handle");
+
+  return true;
+}
+
+bool DescriptorHeapManager::getCbvSrvUavGpuHandle(uint32_t index, D3D12_GPU_DESCRIPTOR_HANDLE* outHandle) const {
+  if (!outHandle) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavGpuHandle: outHandle is null\n");
+    return false;
+  }
+
+  *outHandle = {};
+
+  if (!cbvSrvUavHeap_.Get()) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavGpuHandle: CBV/SRV/UAV heap is null\n");
+    IGL_DEBUG_ASSERT(false, "CBV/SRV/UAV heap is null");
+    return false;
+  }
+
+  if (index == UINT32_MAX) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavGpuHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n");
+    IGL_DEBUG_ASSERT(false, "Attempted to get CBV/SRV/UAV GPU handle with invalid index UINT32_MAX");
+    return false;
+  }
+
+  if (index >= sizes_.cbvSrvUav) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavGpuHandle: Index %u exceeds heap size %u\n",
+                  index, sizes_.cbvSrvUav);
+    IGL_DEBUG_ASSERT(false, "CBV/SRV/UAV descriptor index out of bounds");
+    return false;
+  }
+
+#if IGL_DEBUG
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (std::find(freeCbvSrvUav_.begin(), freeCbvSrvUav_.end(), index) != freeCbvSrvUav_.end()) {
+      IGL_LOG_ERROR("DescriptorHeapManager::getCbvSrvUavGpuHandle: Descriptor index %u has been freed (use-after-free)\n", index);
+      IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed CBV/SRV/UAV descriptor");
+      return false;
+    }
+  }
+#endif
+
+  *outHandle = cbvSrvUavHeap_->GetGPUDescriptorHandleForHeapStart();
+  outHandle->ptr += index * cbvSrvUavDescriptorSize_;
+
+  IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getCbvSrvUavGpuHandle returned null GPU descriptor handle");
+
+  return true;
+}
+
+bool DescriptorHeapManager::getSamplerCpuHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const {
+  if (!outHandle) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getSamplerCpuHandle: outHandle is null\n");
+    return false;
+  }
+
+  *outHandle = {};
+
+  if (!samplerHeap_.Get()) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getSamplerCpuHandle: Sampler heap is null\n");
+    IGL_DEBUG_ASSERT(false, "Sampler heap is null");
+    return false;
+  }
+
+  if (index == UINT32_MAX) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getSamplerCpuHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n");
+    IGL_DEBUG_ASSERT(false, "Attempted to get Sampler handle with invalid index UINT32_MAX");
+    return false;
+  }
+
+  if (index >= sizes_.samplers) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getSamplerCpuHandle: Index %u exceeds heap size %u\n",
+                  index, sizes_.samplers);
+    IGL_DEBUG_ASSERT(false, "Sampler descriptor index out of bounds");
+    return false;
+  }
+
+#if IGL_DEBUG
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (std::find(freeSamplers_.begin(), freeSamplers_.end(), index) != freeSamplers_.end()) {
+      IGL_LOG_ERROR("DescriptorHeapManager::getSamplerCpuHandle: Descriptor index %u has been freed (use-after-free)\n", index);
+      IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed Sampler descriptor");
+      return false;
+    }
+  }
+#endif
+
+  *outHandle = samplerHeap_->GetCPUDescriptorHandleForHeapStart();
+  outHandle->ptr += index * samplerDescriptorSize_;
+
+  IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getSamplerCpuHandle returned null CPU descriptor handle");
+
+  return true;
+}
+
+bool DescriptorHeapManager::getSamplerGpuHandle(uint32_t index, D3D12_GPU_DESCRIPTOR_HANDLE* outHandle) const {
+  if (!outHandle) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getSamplerGpuHandle: outHandle is null\n");
+    return false;
+  }
+
+  *outHandle = {};
+
+  if (!samplerHeap_.Get()) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getSamplerGpuHandle: Sampler heap is null\n");
+    IGL_DEBUG_ASSERT(false, "Sampler heap is null");
+    return false;
+  }
+
+  if (index == UINT32_MAX) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getSamplerGpuHandle: Invalid index UINT32_MAX (allocation failure sentinel)\n");
+    IGL_DEBUG_ASSERT(false, "Attempted to get Sampler GPU handle with invalid index UINT32_MAX");
+    return false;
+  }
+
+  if (index >= sizes_.samplers) {
+    IGL_LOG_ERROR("DescriptorHeapManager::getSamplerGpuHandle: Index %u exceeds heap size %u\n",
+                  index, sizes_.samplers);
+    IGL_DEBUG_ASSERT(false, "Sampler descriptor index out of bounds");
+    return false;
+  }
+
+#if IGL_DEBUG
+  {
+    std::lock_guard<std::mutex> lock(mutex_);
+    if (std::find(freeSamplers_.begin(), freeSamplers_.end(), index) != freeSamplers_.end()) {
+      IGL_LOG_ERROR("DescriptorHeapManager::getSamplerGpuHandle: Descriptor index %u has been freed (use-after-free)\n", index);
+      IGL_DEBUG_ASSERT(false, "Use-after-free: Accessing freed Sampler descriptor");
+      return false;
+    }
+  }
+#endif
+
+  *outHandle = samplerHeap_->GetGPUDescriptorHandleForHeapStart();
+  outHandle->ptr += index * samplerDescriptorSize_;
+
+  IGL_DEBUG_ASSERT(outHandle->ptr != 0, "getSamplerGpuHandle returned null GPU descriptor handle");
+
+  return true;
+}
+
+// Descriptor handle validation helpers.
+bool DescriptorHeapManager::isValidRTVIndex(uint32_t index) const {
+  if (index == UINT32_MAX) {
+    return false;  // Sentinel value for allocation failure
+  }
+  if (index >= sizes_.rtvs) {
+    return false;  // Out of bounds
+  }
+  // Check if descriptor is currently allocated (not in free list)
+  // This helps detect use-after-free bugs
+  std::lock_guard<std::mutex> lock(mutex_);
+  for (const auto& freeIdx : freeRtvs_) {
+    if (freeIdx == index) {
+      return false;  // Index is in free list, so it's not allocated
+    }
+  }
+  return true;  // Not in free list, so it's allocated
+}
+
+bool DescriptorHeapManager::isValidDSVIndex(uint32_t index) const {
+  if (index == UINT32_MAX) {
+    return false;  // Sentinel value for allocation failure
+  }
+  if (index >= sizes_.dsvs) {
+    return false;  // Out of bounds
+  }
+  // Check if descriptor is currently allocated
+  std::lock_guard<std::mutex> lock(mutex_);
+  for (const auto& freeIdx : freeDsvs_) {
+    if (freeIdx == index) {
+      return false;  // Index is in free list, so it's not allocated
+    }
+  }
+  return true;  // Not in free list, so it's allocated
+}
+
+bool DescriptorHeapManager::isValidCbvSrvUavIndex(uint32_t index) const {
+  if (index == UINT32_MAX) {
+    return false;  // Sentinel value for allocation failure
+  }
+  if (index >= sizes_.cbvSrvUav) {
+    return false;  // Out of bounds
+  }
+  // Check if descriptor is currently allocated
+  std::lock_guard<std::mutex> lock(mutex_);
+  for (const auto& freeIdx : freeCbvSrvUav_) {
+    if (freeIdx == index) {
+      return false;  // Index is in free list, so it's not allocated
+    }
+  }
+  return true;  // Not in free list, so it's allocated
+}
+
+bool DescriptorHeapManager::isValidSamplerIndex(uint32_t index) const {
+  if (index == UINT32_MAX) {
+    return false;  // Sentinel value for allocation failure
+  }
+  if (index >= sizes_.samplers) {
+    return false;  // Out of bounds
+  }
+  // Check if descriptor is currently allocated
+  std::lock_guard<std::mutex> lock(mutex_);
+  for (const auto& freeIdx : freeSamplers_) {
+    if (freeIdx == index) {
+      return false;  // Index is in free list, so it's not allocated
+    }
+  }
+  return true;  // Not in free list, so it's allocated
+}
+
+void DescriptorHeapManager::logUsageStats() const {
+  std::lock_guard<std::mutex> lock(mutex_);
+  IGL_D3D12_LOG_VERBOSE("=== Descriptor Heap Usage Statistics ===\n");
+
+  // CBV/SRV/UAV heap
+  const uint32_t cbvSrvUavUsed = sizes_.cbvSrvUav - static_cast<uint32_t>(freeCbvSrvUav_.size());
+  const float cbvSrvUavPercent = (cbvSrvUavUsed * 100.0f) / sizes_.cbvSrvUav;
+  IGL_D3D12_LOG_VERBOSE("  CBV/SRV/UAV: %u / %u (%.1f%% used)\n",
+               cbvSrvUavUsed, sizes_.cbvSrvUav, cbvSrvUavPercent);
+
+  // Sampler heap
+  const uint32_t samplersUsed = sizes_.samplers - static_cast<uint32_t>(freeSamplers_.size());
+  const float samplersPercent = (samplersUsed * 100.0f) / sizes_.samplers;
+  IGL_D3D12_LOG_VERBOSE("  Samplers:    %u / %u (%.1f%% used)\n",
+               samplersUsed, sizes_.samplers, samplersPercent);
+
+  // RTV heap
+  const uint32_t rtvsUsed = sizes_.rtvs - static_cast<uint32_t>(freeRtvs_.size());
+  const float rtvsPercent = (rtvsUsed * 100.0f) / sizes_.rtvs;
+  IGL_D3D12_LOG_VERBOSE("  RTVs:        %u / %u (%.1f%% used)\n",
+               rtvsUsed, sizes_.rtvs, rtvsPercent);
+
+  // DSV heap
+  const uint32_t dsvsUsed = sizes_.dsvs - static_cast<uint32_t>(freeDsvs_.size());
+  const float dsvsPercent = (dsvsUsed * 100.0f) / sizes_.dsvs;
+  IGL_D3D12_LOG_VERBOSE("  DSVs:        %u / %u (%.1f%% used)\n",
+               dsvsUsed, sizes_.dsvs, dsvsPercent);
+
+  IGL_D3D12_LOG_VERBOSE("========================================\n");
+}
+
+// Explicit cleanup to release descriptor heaps before device destruction
+void DescriptorHeapManager::cleanup() {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  // Release all descriptor heaps explicitly to prevent leaks
+  cbvSrvUavHeap_.Reset();
+  samplerHeap_.Reset();
+  rtvHeap_.Reset();
+  dsvHeap_.Reset();
+
+  // Clear free lists
+  freeCbvSrvUav_.clear();
+  freeSamplers_.clear();
+  freeRtvs_.clear();
+  freeDsvs_.clear();
+}
+
+void DescriptorHeapManager::validateAndClampSizes(ID3D12Device* device) {
+  // A-006: Validate descriptor heap sizes against D3D12 device limits
+  IGL_D3D12_LOG_VERBOSE("=== Descriptor Heap Size Validation ===\n");
+
+  // Query device options for resource binding tier (affects limits)
+  D3D12_FEATURE_DATA_D3D12_OPTIONS options = {};
+  HRESULT hr = device->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS,
+                                            &options,
+                                            sizeof(options));
+
+  if (SUCCEEDED(hr)) {
+    const char* tierName = "Unknown";
+    switch (options.ResourceBindingTier) {
+      case D3D12_RESOURCE_BINDING_TIER_1: tierName = "Tier 1"; break;
+      case D3D12_RESOURCE_BINDING_TIER_2: tierName = "Tier 2"; break;
+      case D3D12_RESOURCE_BINDING_TIER_3: tierName = "Tier 3"; break;
+    }
+    IGL_D3D12_LOG_VERBOSE("  Resource Binding Tier: %s\n", tierName);
+  }
+
+  // === SHADER-VISIBLE CBV/SRV/UAV HEAP ===
+  // D3D12 spec: Max 1,000,000 descriptors for shader-visible heaps (FL 11.0+)
+  // Conservative limit: 1,000,000 (actual limit may be lower on some hardware)
+  constexpr uint32_t kMaxCbvSrvUavDescriptors = 1000000;
+
+  if (sizes_.cbvSrvUav > kMaxCbvSrvUavDescriptors) {
+    IGL_LOG_ERROR("  WARNING: Requested CBV/SRV/UAV heap size (%u) exceeds "
+                  "D3D12 spec limit (%u)\n",
+                  sizes_.cbvSrvUav, kMaxCbvSrvUavDescriptors);
+    IGL_LOG_ERROR("  Clamping to %u descriptors\n", kMaxCbvSrvUavDescriptors);
+    sizes_.cbvSrvUav = kMaxCbvSrvUavDescriptors;
+  } else {
+    IGL_D3D12_LOG_VERBOSE("  CBV/SRV/UAV heap size: %u (limit: %u) - OK\n",
+                 sizes_.cbvSrvUav, kMaxCbvSrvUavDescriptors);
+  }
+
+  // === SHADER-VISIBLE SAMPLER HEAP ===
+  // D3D12 spec: Max 2,048 descriptors (D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE)
+  constexpr uint32_t kMaxSamplerDescriptors = 2048;
+
+  if (sizes_.samplers > kMaxSamplerDescriptors) {
+    IGL_LOG_ERROR("  WARNING: Requested sampler heap size (%u) exceeds "
+                  "D3D12 limit (%u)\n",
+                  sizes_.samplers, kMaxSamplerDescriptors);
+    IGL_LOG_ERROR("  Clamping to %u descriptors\n", kMaxSamplerDescriptors);
+    sizes_.samplers = kMaxSamplerDescriptors;
+  } else {
+    IGL_D3D12_LOG_VERBOSE("  Sampler heap size: %u (limit: %u) - OK\n",
+                 sizes_.samplers, kMaxSamplerDescriptors);
+  }
+
+  // === CPU-VISIBLE RTV HEAP ===
+  // D3D12 spec: Typically 64K+ descriptors (device-dependent)
+  // Conservative validation: Warn if exceeding 16K (reasonable limit)
+  constexpr uint32_t kMaxRtvDescriptors = 16384;
+
+  if (sizes_.rtvs > kMaxRtvDescriptors) {
+    IGL_LOG_ERROR("  WARNING: Requested RTV heap size (%u) is unusually large\n",
+                  sizes_.rtvs);
+    IGL_LOG_ERROR("  Recommended maximum: %u descriptors\n", kMaxRtvDescriptors);
+    // Don't clamp - let CreateDescriptorHeap fail if truly excessive
+  } else {
+    IGL_D3D12_LOG_VERBOSE("  RTV heap size: %u (recommended max: %u) - OK\n",
+                 sizes_.rtvs, kMaxRtvDescriptors);
+  }
+
+  // === CPU-VISIBLE DSV HEAP ===
+  // Similar limits to RTV heap
+  constexpr uint32_t kMaxDsvDescriptors = 16384;
+
+  if (sizes_.dsvs > kMaxDsvDescriptors) {
+    IGL_LOG_ERROR("  WARNING: Requested DSV heap size (%u) is unusually large\n",
+                  sizes_.dsvs);
+    IGL_LOG_ERROR("  Recommended maximum: %u descriptors\n", kMaxDsvDescriptors);
+    // Don't clamp - let CreateDescriptorHeap fail if truly excessive
+  } else {
+    IGL_D3D12_LOG_VERBOSE("  DSV heap size: %u (recommended max: %u) - OK\n",
+                 sizes_.dsvs, kMaxDsvDescriptors);
+  }
+
+  IGL_D3D12_LOG_VERBOSE("========================================\n");
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/DescriptorHeapManager.h b/src/igl/d3d12/DescriptorHeapManager.h
new file mode 100644
index 0000000000..c985606214
--- /dev/null
+++ b/src/igl/d3d12/DescriptorHeapManager.h
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <mutex>
+#include <vector>
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+/**
+ * @brief Persistent Descriptor Allocator for CPU-visible and long-lived descriptors
+ *
+ * ============================================================================
+ * ARCHITECTURE: Strategy 2 - Persistent Descriptor Allocator
+ * ============================================================================
+ *
+ * DescriptorHeapManager handles descriptors with EXPLICIT lifecycle management:
+ * - **PRIMARY USE**: CPU-visible descriptors (RTV/DSV) for Texture and Framebuffer
+ * - **SECONDARY USE**: Shader-visible descriptors for headless/unit test contexts
+ *
+ * **Key Differences from Per-Frame System (Strategy 1)**:
+ * - Lifecycle: Allocated at resource creation, freed at destruction (not per-frame reset)
+ * - Allocation: Free-list pattern (not linear) - supports arbitrary alloc/free
+ * - Safety: Double-free detection, mutex protection for thread-safety
+ * - Visibility: Creates both CPU-visible AND shader-visible heaps
+ *
+ * **When to Use This vs Per-Frame (D3D12ResourcesBinder)**:
+ * - Use DescriptorHeapManager for: RTV/DSV allocation for textures/framebuffers
+ * - Use DescriptorHeapManager for: Headless contexts without per-frame infrastructure
+ * - Do NOT use for: Transient SRV/UAV/CBV/Samplers during rendering
+ * - Do NOT use for: Descriptor table binding in encoders
+ *
+ * **Design Note**: This class creates shader-visible heaps (CBV/SRV/UAV, Samplers)
+ * for backward compatibility with headless contexts. In normal rendering contexts:
+ * - D3D12Context uses per-frame heaps (Strategy 1) for shader-visible descriptors
+ * - DescriptorHeapManager is only used for RTV/DSV allocation
+ * - Its shader-visible heaps serve as a fallback when per-frame heaps are unavailable
+ *   (e.g., headless/unit-test contexts - see ComputeCommandEncoder.cpp:32-40)
+ *
+ * For architecture overview, see D3D12ResourcesBinder.h documentation.
+ *
+ * Thread-safety: This class IS thread-safe (uses mutex for allocation/free).
+ */
+class DescriptorHeapManager {
+ public:
+  // Descriptor heap sizes configuration.
+  // Default values match D3D12ContextConfig for consistency but can be customized at runtime.
+  struct Sizes {
+    uint32_t cbvSrvUav = 4096; // shader-visible (kept larger for unit tests/headless)
+    uint32_t samplers = 2048;  // shader-visible (D3D12 spec limit)
+    uint32_t rtvs = 256;       // CPU-visible (default from D3D12ContextConfig)
+    uint32_t dsvs = 128;       // CPU-visible (default from D3D12ContextConfig)
+
+    // Note: D3D12Context and HeadlessContext construct Sizes manually based on their
+    // specific needs (environment overrides, test requirements, etc.) rather than using
+    // a generic factory method. To customize, construct Sizes with desired values.
+  };
+
+  DescriptorHeapManager() = default;
+  Result initialize(ID3D12Device* device, const Sizes& sizes = {});
+
+  // Shader-visible heaps for binding
+  ID3D12DescriptorHeap* getCbvSrvUavHeap() const { return cbvSrvUavHeap_.Get(); }
+  ID3D12DescriptorHeap* getSamplerHeap() const { return samplerHeap_.Get(); }
+
+  // Allocate a CPU descriptor from RTV/DSV heaps
+  uint32_t allocateRTV();
+  uint32_t allocateDSV();
+  void freeRTV(uint32_t index);
+  void freeDSV(uint32_t index);
+
+  // Allocate indices inside shader-visible heaps (for creating CBV/SRV/UAV or Samplers)
+  uint32_t allocateCbvSrvUav();
+  uint32_t allocateSampler();
+  void freeCbvSrvUav(uint32_t index);
+  void freeSampler(uint32_t index);
+
+  // Get CPU/GPU descriptor handles with validation.
+  // Returns false on error (invalid index, null heap, use-after-free) and leaves outHandle zeroed.
+  // Returns true on success and writes the valid handle to outHandle.
+  [[nodiscard]] bool getRTVHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const;
+  [[nodiscard]] bool getDSVHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const;
+  [[nodiscard]] bool getCbvSrvUavCpuHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const;
+  [[nodiscard]] bool getCbvSrvUavGpuHandle(uint32_t index, D3D12_GPU_DESCRIPTOR_HANDLE* outHandle) const;
+  [[nodiscard]] bool getSamplerCpuHandle(uint32_t index, D3D12_CPU_DESCRIPTOR_HANDLE* outHandle) const;
+  [[nodiscard]] bool getSamplerGpuHandle(uint32_t index, D3D12_GPU_DESCRIPTOR_HANDLE* outHandle) const;
+
+  uint32_t getCbvSrvUavDescriptorSize() const { return cbvSrvUavDescriptorSize_; }
+  uint32_t getSamplerDescriptorSize() const { return samplerDescriptorSize_; }
+  uint32_t getRtvDescriptorSize() const { return rtvDescriptorSize_; }
+  uint32_t getDsvDescriptorSize() const { return dsvDescriptorSize_; }
+
+  // Descriptor handle validation helpers for diagnostics/telemetry.
+  // Note: These are NOT optimized for hot-path usage (O(N) free-list scans).
+  // For per-draw/dispatch validation, prefer the get*Handle methods which cache results.
+  [[nodiscard]] bool isValidRTVIndex(uint32_t index) const;
+  [[nodiscard]] bool isValidDSVIndex(uint32_t index) const;
+  [[nodiscard]] bool isValidCbvSrvUavIndex(uint32_t index) const;
+  [[nodiscard]] bool isValidSamplerIndex(uint32_t index) const;
+
+  // Telemetry: Log current heap usage statistics
+  void logUsageStats() const;
+
+  // Explicit cleanup of descriptor heaps to prevent leaks.
+  void cleanup();
+
+ private:
+  // Heaps
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> cbvSrvUavHeap_;
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> samplerHeap_;
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> rtvHeap_;
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> dsvHeap_;
+
+  // Increments
+  UINT cbvSrvUavDescriptorSize_ = 0;
+  UINT samplerDescriptorSize_ = 0;
+  UINT rtvDescriptorSize_ = 0;
+  UINT dsvDescriptorSize_ = 0;
+
+  // Free lists for CPU-only heaps
+  std::vector<uint32_t> freeRtvs_;
+  std::vector<uint32_t> freeDsvs_;
+  // Free lists for shader-visible heaps
+  std::vector<uint32_t> freeCbvSrvUav_;
+  std::vector<uint32_t> freeSamplers_;
+
+  // Total sizes
+  Sizes sizes_{};
+
+  // Thread safety
+  mutable std::mutex mutex_;
+
+  // A-006: Validate and clamp descriptor heap sizes to device limits
+  void validateAndClampSizes(ID3D12Device* device);
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/Device.cpp b/src/igl/d3d12/Device.cpp
new file mode 100644
index 0000000000..ec2d04b0ca
--- /dev/null
+++ b/src/igl/d3d12/Device.cpp
@@ -0,0 +1,3648 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/CommandQueue.h>
+#include <igl/d3d12/Buffer.h>
+#include <igl/d3d12/RenderPipelineState.h>
+#include <igl/d3d12/ComputePipelineState.h>
+#include <igl/d3d12/ShaderModule.h>
+#include <igl/d3d12/Framebuffer.h>
+#include <igl/d3d12/VertexInputState.h>
+#include <igl/d3d12/DepthStencilState.h>
+#include <igl/d3d12/SamplerState.h>
+#include <igl/d3d12/Texture.h>
+#include <igl/d3d12/PlatformDevice.h>
+#include <igl/d3d12/DXCCompiler.h>
+#include <igl/d3d12/Timer.h>
+#include <igl/d3d12/UploadRingBuffer.h>
+#include <igl/d3d12/D3D12ImmediateCommands.h>
+#include <igl/d3d12/D3D12StagingDevice.h>
+#include <igl/d3d12/D3D12FenceWaiter.h>
+#include <igl/d3d12/D3D12RootSignatureKey.h>
+#include <igl/VertexInputState.h>
+#include <igl/Texture.h>
+#include <igl/Assert.h>  // For IGL_DEBUG_ASSERT in waitForUploadFence.
+#include <d3dcompiler.h>
+#include <d3d12sdklayers.h>
+#include <dxgidebug.h>
+#include <cstring>
+#include <cctype>
+#include <vector>
+#include <mutex>   // For std::call_once.
+#include <fstream>
+#include <filesystem>
+#include <cstdlib>
+
+#pragma comment(lib, "d3dcompiler.lib")
+
+namespace igl::d3d12 {
+
+namespace {
+// Import ComPtr for readability
+template<typename T>
+using ComPtr = igl::d3d12::ComPtr<T>;
+
+// Capture D3D12 InfoQueue messages (warnings/errors) into an artifacts log when enabled.
+void captureInfoQueueForDevice(ID3D12Device* device) {
+  const char* captureEnv = std::getenv("IGL_D3D12_CAPTURE_VALIDATION");
+  if (!captureEnv || (captureEnv[0] != '1' && captureEnv[0] != 'T' && captureEnv[0] != 't' &&
+                      captureEnv[0] != 'Y' && captureEnv[0] != 'y')) {
+    return;
+  }
+
+  if (!device) {
+    return;
+  }
+
+  ComPtr<ID3D12InfoQueue> infoQueue;
+  if (FAILED(device->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) {
+    return;
+  }
+
+  const UINT64 numMessages = infoQueue->GetNumStoredMessages();
+
+  namespace fs = std::filesystem;
+
+  fs::path root;
+  if (const char* rootEnv = std::getenv("IGL_ARTIFACT_ROOT");
+      rootEnv && *rootEnv != '\0') {
+    root = fs::path(rootEnv);
+  } else {
+    root = fs::current_path() / "artifacts";
+  }
+
+  fs::path logPath = root / "validation" / "D3D12_InfoQueue.log";
+  std::error_code ec;
+  fs::create_directories(logPath.parent_path(), ec);
+
+  std::ofstream out(logPath, std::ios::app);
+  if (!out) {
+    return;
+  }
+
+  out << "=== D3D12 InfoQueue Dump ===\n";
+
+  if (numMessages == 0) {
+    out << "[INFO] No non-info D3D12 messages recorded for this device.\n";
+    return;
+  }
+
+  for (UINT64 i = 0; i < numMessages; ++i) {
+    SIZE_T messageLength = 0;
+    if (FAILED(infoQueue->GetMessage(i, nullptr, &messageLength)) ||
+        messageLength == 0) {
+      continue;
+    }
+
+    std::vector<uint8_t> buffer(messageLength);
+    auto* message =
+        reinterpret_cast<D3D12_MESSAGE*>(buffer.data());
+    if (FAILED(infoQueue->GetMessage(i, message, &messageLength))) {
+      continue;
+    }
+
+      // Skip informational messages; capture warnings, errors, and corruption.
+      if (message->Severity == D3D12_MESSAGE_SEVERITY_INFO ||
+          message->Severity == D3D12_MESSAGE_SEVERITY_MESSAGE) {
+        continue;
+      }
+
+      // Explicitly ignore well-understood performance-only clear warnings from
+      // the validation layer (IDs 820 and 821). These indicate that optimized
+      // clear values were not provided or do not match but do not affect
+      // correctness; they are tracked separately in the audit documentation.
+      if (message->ID == 820 || message->ID == 821 || message->ID == 677) {
+        continue;
+      }
+
+    const char* severityStr = "UNKNOWN";
+    switch (message->Severity) {
+    case D3D12_MESSAGE_SEVERITY_CORRUPTION:
+      severityStr = "CORRUPTION";
+      break;
+    case D3D12_MESSAGE_SEVERITY_ERROR:
+      severityStr = "ERROR";
+      break;
+    case D3D12_MESSAGE_SEVERITY_WARNING:
+      severityStr = "WARNING";
+      break;
+    default:
+      break;
+    }
+
+    out << "[" << severityStr << "] ID=" << static_cast<unsigned>(message->ID)
+        << " : " << (message->pDescription ? message->pDescription : "")
+        << "\n";
+  }
+
+  infoQueue->ClearStoredMessages();
+}
+
+// Log D3D12 and DXGI InfoQueue messages to the runtime log to aid debugging.
+// This is used in error paths such as PSO creation failures and device
+// removal checks. It is intentionally tolerant of missing debug components
+// (dxgidebug.dll, Graphics Tools, etc.) and will simply emit nothing if the
+// queues are unavailable.
+void logInfoQueuesForDevice(ID3D12Device* device, const char* context) {
+  if (!device) {
+    return;
+  }
+
+  // First, log any messages from the D3D12 device's InfoQueue.
+  ComPtr<ID3D12InfoQueue> infoQueue;
+  if (SUCCEEDED(device->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) {
+    const UINT64 numMessages = infoQueue->GetNumStoredMessages();
+    IGL_LOG_ERROR("  [%s] D3D12 Info Queue has %llu messages:\n",
+                  context,
+                  static_cast<unsigned long long>(numMessages));
+    for (UINT64 i = 0; i < numMessages; ++i) {
+      SIZE_T messageLength = 0;
+      if (FAILED(infoQueue->GetMessage(i, nullptr, &messageLength)) ||
+          messageLength == 0) {
+        continue;
+      }
+
+      std::vector<uint8_t> buffer(messageLength);
+      auto* message = reinterpret_cast<D3D12_MESSAGE*>(buffer.data());
+      if (FAILED(infoQueue->GetMessage(i, message, &messageLength))) {
+        continue;
+      }
+
+      const char* severityStr = "UNKNOWN";
+      switch (message->Severity) {
+      case D3D12_MESSAGE_SEVERITY_CORRUPTION: severityStr = "CORRUPTION"; break;
+      case D3D12_MESSAGE_SEVERITY_ERROR:      severityStr = "ERROR";      break;
+      case D3D12_MESSAGE_SEVERITY_WARNING:    severityStr = "WARNING";    break;
+      case D3D12_MESSAGE_SEVERITY_INFO:       severityStr = "INFO";       break;
+      case D3D12_MESSAGE_SEVERITY_MESSAGE:    severityStr = "MESSAGE";    break;
+      default: break;
+      }
+      IGL_LOG_ERROR("    [D3D12][%s] %s (ID=%u)\n",
+                    severityStr,
+                    message->pDescription ? message->pDescription : "<no description>",
+                    static_cast<unsigned>(message->ID));
+    }
+    infoQueue->ClearStoredMessages();
+  }
+
+  // Next, attempt to log messages from the global DXGI InfoQueue via
+  // dxgidebug.dll, if present. This can surface diagnostics that are not
+  // routed through the per-device D3D12 queue (e.g. swap-chain errors or
+  // certain shader validation issues).
+  HMODULE dxgiDebugModule = LoadLibraryA("dxgidebug.dll");
+  if (!dxgiDebugModule) {
+    return;
+  }
+
+  using PFN_DXGIGetDebugInterface = HRESULT(WINAPI *)(REFIID, void**);
+  auto dxgiGetDebugInterface =
+      reinterpret_cast<PFN_DXGIGetDebugInterface>(GetProcAddress(dxgiDebugModule, "DXGIGetDebugInterface"));
+
+  if (dxgiGetDebugInterface) {
+    ComPtr<IDXGIInfoQueue> dxgiInfoQueue;
+    if (SUCCEEDED(dxgiGetDebugInterface(IID_PPV_ARGS(dxgiInfoQueue.GetAddressOf())))) {
+      const DXGI_DEBUG_ID producers[] = {DXGI_DEBUG_DXGI, DXGI_DEBUG_DX, DXGI_DEBUG_APP};
+      const char* producerNames[] = {"DXGI", "DX", "APP"};
+      for (size_t p = 0; p < std::size(producers); ++p) {
+        const DXGI_DEBUG_ID producer = producers[p];
+        const UINT64 numMessages = dxgiInfoQueue->GetNumStoredMessages(producer);
+        if (numMessages == 0) {
+          continue;
+        }
+        IGL_LOG_ERROR("  [%s] DXGI InfoQueue (%s) has %llu messages:\n",
+                      context,
+                      producerNames[p],
+                      static_cast<unsigned long long>(numMessages));
+        for (UINT64 i = 0; i < numMessages; ++i) {
+          SIZE_T messageLength = 0;
+          if (FAILED(dxgiInfoQueue->GetMessage(producer, i, nullptr, &messageLength)) ||
+              messageLength == 0) {
+            continue;
+          }
+          std::vector<uint8_t> buffer(messageLength);
+          auto* message =
+              reinterpret_cast<DXGI_INFO_QUEUE_MESSAGE*>(buffer.data());
+          if (FAILED(dxgiInfoQueue->GetMessage(producer, i, message, &messageLength))) {
+            continue;
+          }
+          const char* severityStr = "UNKNOWN";
+          switch (message->Severity) {
+          case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_CORRUPTION: severityStr = "CORRUPTION"; break;
+          case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_ERROR:      severityStr = "ERROR";      break;
+          case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_WARNING:    severityStr = "WARNING";    break;
+          case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_INFO:       severityStr = "INFO";       break;
+          case DXGI_INFO_QUEUE_MESSAGE_SEVERITY_MESSAGE:    severityStr = "MESSAGE";    break;
+          default: break;
+          }
+          IGL_LOG_ERROR("    [DXGI/%s][%s] %s (ID=%u)\n",
+                        producerNames[p],
+                        severityStr,
+                        message->pDescription ? message->pDescription : "<no description>",
+                        static_cast<unsigned>(message->ID));
+        }
+        dxgiInfoQueue->ClearStoredMessages(producer);
+      }
+      // Also clear any remaining global messages so subsequent calls only
+      // report new diagnostics.
+      dxgiInfoQueue->ClearStoredMessages(DXGI_DEBUG_ALL);
+    }
+  }
+
+  FreeLibrary(dxgiDebugModule);
+}
+
+// Use std::hash<SamplerStateDesc> for deduplication (implemented in igl/SamplerState.cpp).
+} // namespace
+
+// Helper: Calculate root signature cost in DWORDs
+// Root signature limit: 64 DWORDs
+// Cost formula (per Microsoft documentation):
+//   - Root constants: 1 DWORD per 32-bit value
+//   - Root descriptors (CBV/SRV/UAV): 2 DWORDs each
+//   - Descriptor tables: 1 DWORD each (regardless of table size)
+//   - Static samplers: 0 DWORDs (free)
+// Reference: https://learn.microsoft.com/en-us/windows/win32/direct3d12/root-signature-limits
+static uint32_t calculateRootSignatureCost(const D3D12_ROOT_SIGNATURE_DESC& desc) {
+  uint32_t totalCost = 0;
+
+  for (uint32_t i = 0; i < desc.NumParameters; ++i) {
+    const auto& param = desc.pParameters[i];
+
+    switch (param.ParameterType) {
+      case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS:
+        totalCost += param.Constants.Num32BitValues;
+        IGL_D3D12_LOG_VERBOSE("    [%u] Root constants (b%u): %u DWORDs\n",
+                     i, param.Constants.ShaderRegister, param.Constants.Num32BitValues);
+        break;
+
+      case D3D12_ROOT_PARAMETER_TYPE_CBV:
+        totalCost += 2;
+        IGL_D3D12_LOG_VERBOSE("    [%u] Root CBV (b%u): 2 DWORDs\n",
+                     i, param.Descriptor.ShaderRegister);
+        break;
+
+      case D3D12_ROOT_PARAMETER_TYPE_SRV:
+        totalCost += 2;
+        IGL_D3D12_LOG_VERBOSE("    [%u] Root SRV (t%u): 2 DWORDs\n",
+                     i, param.Descriptor.ShaderRegister);
+        break;
+
+      case D3D12_ROOT_PARAMETER_TYPE_UAV:
+        totalCost += 2;
+        IGL_D3D12_LOG_VERBOSE("    [%u] Root UAV (u%u): 2 DWORDs\n",
+                     i, param.Descriptor.ShaderRegister);
+        break;
+
+      case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE:
+        totalCost += 1;
+        const char* tableType = "Unknown";
+        if (param.DescriptorTable.NumDescriptorRanges > 0) {
+          switch (param.DescriptorTable.pDescriptorRanges[0].RangeType) {
+            case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: tableType = "CBV"; break;
+            case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: tableType = "SRV"; break;
+            case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: tableType = "UAV"; break;
+            case D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER: tableType = "Sampler"; break;
+          }
+        }
+        IGL_D3D12_LOG_VERBOSE("    [%u] Descriptor table (%s): 1 DWORD\n", i, tableType);
+        break;
+    }
+  }
+
+  if (desc.NumStaticSamplers > 0) {
+    IGL_D3D12_LOG_VERBOSE("    Static samplers: 0 DWORDs (free, count=%u)\n", desc.NumStaticSamplers);
+  }
+
+  return totalCost;
+}
+
+// Optional debug helper: validate that the shader input/output signatures are
+// consistent with the input layout and render target configuration we build
+// for a graphics PSO. This is intended purely for diagnostics and has no
+// effect on runtime behaviour.
+static void validateShaderBindingsAndLayout(const RenderPipelineDesc& desc,
+                                            const D3D12_GRAPHICS_PIPELINE_STATE_DESC& psoDesc,
+                                            const std::vector<D3D12_INPUT_ELEMENT_DESC>& inputElements,
+                                            ID3D12ShaderReflection* IGL_NULLABLE vsRefl,
+                                            ID3D12ShaderReflection* IGL_NULLABLE psRefl) {
+  // Environment toggle: IGL_D3D12_VALIDATE_SHADER_BINDINGS=0 disables validation.
+  if (const char* env = std::getenv("IGL_D3D12_VALIDATE_SHADER_BINDINGS")) {
+    if (env[0] == '0') {
+      return;
+    }
+  }
+
+  if (!vsRefl) {
+    return;
+  }
+
+  D3D12_SHADER_DESC vsDesc = {};
+  if (FAILED(vsRefl->GetDesc(&vsDesc))) {
+    return;
+  }
+
+  bool hasErrors = false;
+
+  IGL_LOG_INFO("=== D3D12 VALIDATE_SHADER_BINDINGS (%s) ===\n", desc.debugName.c_str());
+
+  // Helper for case-insensitive string compare.
+  auto iequals = [](const char* a, const char* b) -> bool {
+    if (!a || !b) {
+      return false;
+    }
+    while (*a && *b) {
+      const int ca = std::tolower(static_cast<unsigned char>(*a));
+      const int cb = std::tolower(static_cast<unsigned char>(*b));
+      if (ca != cb) {
+        return false;
+      }
+      ++a;
+      ++b;
+    }
+    return *a == '\0' && *b == '\0';
+  };
+
+  // Validate vertex shader inputs against the input layout.
+  IGL_LOG_INFO("  [VS] InputParameters=%u, InputLayout.Elements=%u\n",
+               vsDesc.InputParameters,
+               static_cast<unsigned>(inputElements.size()));
+
+  for (UINT i = 0; i < vsDesc.InputParameters; ++i) {
+    D3D12_SIGNATURE_PARAMETER_DESC paramDesc = {};
+    if (FAILED(vsRefl->GetInputParameterDesc(i, &paramDesc))) {
+      continue;
+    }
+
+    // Skip system-value semantics (SV_*), which do not come from the input layout.
+    if (paramDesc.SemanticName &&
+        paramDesc.SemanticName[0] == 'S' && paramDesc.SemanticName[1] == 'V' &&
+        paramDesc.SemanticName[2] == '_') {
+      continue;
+    }
+
+    bool found = false;
+    for (const auto& elem : inputElements) {
+      if (elem.SemanticName &&
+          iequals(elem.SemanticName, paramDesc.SemanticName) &&
+          elem.SemanticIndex == paramDesc.SemanticIndex) {
+        found = true;
+        break;
+      }
+    }
+
+    if (!found) {
+      hasErrors = true;
+      IGL_LOG_ERROR("  [VALIDATION] VS input '%s%u' has no matching input layout element "
+                    "(debugName='%s').\n",
+                    paramDesc.SemanticName ? paramDesc.SemanticName : "",
+                    paramDesc.SemanticIndex,
+                    desc.debugName.c_str());
+    }
+  }
+
+  // Validate pixel shader color outputs against NumRenderTargets / RTV formats.
+  if (psRefl) {
+    D3D12_SHADER_DESC psDesc = {};
+    if (SUCCEEDED(psRefl->GetDesc(&psDesc))) {
+      UINT colorOutputs = 0;
+      for (UINT i = 0; i < psDesc.OutputParameters; ++i) {
+        D3D12_SIGNATURE_PARAMETER_DESC paramDesc = {};
+        if (FAILED(psRefl->GetOutputParameterDesc(i, &paramDesc))) {
+          continue;
+        }
+        if (paramDesc.SemanticName && iequals(paramDesc.SemanticName, "SV_TARGET")) {
+          ++colorOutputs;
+        }
+      }
+
+      IGL_LOG_INFO("  [PS] ColorOutputs=%u, NumRenderTargets=%u, RTV[0]=%d, DSV=%d\n",
+                   colorOutputs,
+                   psoDesc.NumRenderTargets,
+                   psoDesc.NumRenderTargets > 0 ? static_cast<int>(psoDesc.RTVFormats[0])
+                                                : static_cast<int>(DXGI_FORMAT_UNKNOWN),
+                   static_cast<int>(psoDesc.DSVFormat));
+
+      if (colorOutputs == 0 && psoDesc.NumRenderTargets > 0) {
+        hasErrors = true;
+        IGL_LOG_ERROR("  [VALIDATION] PS writes no color outputs but PSO has "
+                      "NumRenderTargets=%u (debugName='%s').\n",
+                      psoDesc.NumRenderTargets,
+                      desc.debugName.c_str());
+      } else if (colorOutputs > 0 && psoDesc.NumRenderTargets == 0) {
+        hasErrors = true;
+        IGL_LOG_ERROR("  [VALIDATION] PS writes %u color outputs but PSO has "
+                      "NumRenderTargets=0 (debugName='%s').\n",
+                      colorOutputs,
+                      desc.debugName.c_str());
+      } else if (colorOutputs > psoDesc.NumRenderTargets) {
+        hasErrors = true;
+        IGL_LOG_ERROR("  [VALIDATION] PS writes %u color outputs but PSO only "
+                      "declares %u render targets (debugName='%s').\n",
+                      colorOutputs,
+                      psoDesc.NumRenderTargets,
+                      desc.debugName.c_str());
+      }
+    }
+  }
+
+  if (!hasErrors) {
+    IGL_D3D12_LOG_VERBOSE("  [VALIDATION] Shader inputs/outputs match input layout and "
+                          "render target configuration.\n");
+  }
+
+  IGL_LOG_INFO("=== END D3D12 VALIDATE_SHADER_BINDINGS ===\n");
+}
+
+Device::Device(std::unique_ptr<D3D12Context> ctx) : ctx_(std::move(ctx)) {
+  platformDevice_ = std::make_unique<PlatformDevice>(*this);
+
+  // Validate device limits against actual device capabilities.
+  capabilities_.initialize(*ctx_);
+
+  // Initialize upload infrastructure (allocator pool and upload helpers).
+  allocatorPool_.initialize(*ctx_, this);
+
+  auto* device = ctx_->getDevice();
+  if (device) {
+    // Pre-compile mipmap generation shaders at device initialization.
+    // This avoids runtime compilation overhead in Texture::generateMipmap().
+    {
+      // HLSL shader sources (identical to those in Texture.cpp)
+      static const char* kVS = R"(
+struct VSOut { float4 pos: SV_POSITION; float2 uv: TEXCOORD0; };
+VSOut main(uint id: SV_VertexID) {
+  float2 p = float2((id << 1) & 2, id & 2);
+  VSOut o; o.pos = float4(p*float2(2,-2)+float2(-1,1), 0, 1); o.uv = p; return o;
+}
+)";
+      static const char* kPS = R"(
+Texture2D tex0 : register(t0);
+SamplerState smp : register(s0);
+float4 main(float4 pos:SV_POSITION, float2 uv:TEXCOORD0) : SV_TARGET { return tex0.SampleLevel(smp, uv, 0); }
+)";
+
+      // Initialize DXC compiler
+      DXCCompiler dxcCompiler;
+      Result initResult = dxcCompiler.initialize();
+      if (!initResult.isOk()) {
+        IGL_LOG_ERROR("Device::Device: Failed to initialize DXC for mipmap shader compilation: %s\n",
+                      initResult.message.c_str());
+        IGL_LOG_ERROR("  Mipmap generation will be unavailable\n");
+        return;  // Early exit - don't attempt compilation without DXC
+      }
+
+      // Get shader model from context (minimum SM 6.0 for DXC)
+      D3D_SHADER_MODEL shaderModel = ctx_->getMaxShaderModel();
+      std::string vsTarget = getShaderTarget(shaderModel, ShaderStage::Vertex);
+      std::string psTarget = getShaderTarget(shaderModel, ShaderStage::Fragment);
+
+      // Compile vertex shader
+      std::string vsErrors;
+      Result vsResult = dxcCompiler.compile(kVS,
+                                            strlen(kVS),
+                                            "main",
+                                            vsTarget.c_str(),
+                                            "MipmapGenerationVS",
+                                            0,
+                                            pipelineCache_.mipmapVSBytecode_,
+                                            vsErrors);
+      if (!vsResult.isOk()) {
+        IGL_LOG_ERROR("Device::Device: Failed to pre-compile mipmap VS: %s\n%s\n",
+                      vsResult.message.c_str(), vsErrors.c_str());
+        pipelineCache_.mipmapVSBytecode_.clear();
+        return;  // Early exit - can't proceed without VS
+      }
+
+      // Compile pixel shader
+      std::string psErrors;
+      Result psResult = dxcCompiler.compile(kPS,
+                                            strlen(kPS),
+                                            "main",
+                                            psTarget.c_str(),
+                                            "MipmapGenerationPS",
+                                            0,
+                                            pipelineCache_.mipmapPSBytecode_,
+                                            psErrors);
+      if (!psResult.isOk()) {
+        IGL_LOG_ERROR("Device::Device: Failed to pre-compile mipmap PS: %s\n%s\n",
+                      psResult.message.c_str(), psErrors.c_str());
+        pipelineCache_.mipmapPSBytecode_.clear();
+        pipelineCache_.mipmapVSBytecode_.clear();  // Clear VS too for consistency
+        return;  // Early exit - can't proceed without PS
+      }
+
+      // Create root signature for mipmap generation
+      D3D12_DESCRIPTOR_RANGE ranges[2] = {};
+      ranges[0].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
+      ranges[0].NumDescriptors = 1;
+      ranges[0].BaseShaderRegister = 0;
+      ranges[0].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+      ranges[1].RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
+      ranges[1].NumDescriptors = 1;
+      ranges[1].BaseShaderRegister = 0;
+      ranges[1].OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+
+      D3D12_ROOT_PARAMETER params[2] = {};
+      params[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+      params[0].DescriptorTable.NumDescriptorRanges = 1;
+      params[0].DescriptorTable.pDescriptorRanges = &ranges[0];
+      params[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+      params[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+      params[1].DescriptorTable.NumDescriptorRanges = 1;
+      params[1].DescriptorTable.pDescriptorRanges = &ranges[1];
+      params[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+
+      D3D12_ROOT_SIGNATURE_DESC rsDesc = {};
+      rsDesc.NumParameters = 2;
+      rsDesc.pParameters = params;
+      rsDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT;
+
+      igl::d3d12::ComPtr<ID3DBlob> sig, err;
+      if (FAILED(D3D12SerializeRootSignature(&rsDesc, D3D_ROOT_SIGNATURE_VERSION_1,
+                                             sig.GetAddressOf(), err.GetAddressOf()))) {
+        IGL_LOG_ERROR("Device::Device: Failed to serialize mipmap root signature\n");
+        if (err && err->GetBufferPointer()) {
+          IGL_LOG_ERROR("  D3D12 error: %s\n", static_cast<const char*>(err->GetBufferPointer()));
+        }
+        pipelineCache_.mipmapVSBytecode_.clear();
+        pipelineCache_.mipmapPSBytecode_.clear();
+        return;
+      }
+
+      if (FAILED(device->CreateRootSignature(0,
+                                             sig->GetBufferPointer(),
+                                             sig->GetBufferSize(),
+                                             IID_PPV_ARGS(
+                                                 pipelineCache_.mipmapRootSignature_.GetAddressOf())))) {
+        IGL_LOG_ERROR("Device::Device: Failed to create mipmap root signature\n");
+        pipelineCache_.mipmapVSBytecode_.clear();
+        pipelineCache_.mipmapPSBytecode_.clear();
+        return;
+      }
+
+      // Success! Mark mipmap shaders as available
+      pipelineCache_.mipmapShadersAvailable_ = true;
+      IGL_D3D12_LOG_VERBOSE("Device::Device: Mipmap shaders pre-compiled successfully (%zu bytes VS, %zu bytes PS)\n",
+                   pipelineCache_.mipmapVSBytecode_.size(),
+                   pipelineCache_.mipmapPSBytecode_.size());
+    }
+  }
+}
+
+Device::~Device() {
+  // Capture D3D12 validation messages for this device if enabled via environment.
+  if (ctx_) {
+    captureInfoQueueForDevice(ctx_->getDevice());
+  }
+
+  // No shared event to clean up; events are per-call in waitForUploadFence.
+
+  // Ensure upload-related resources are released before destroying the device.
+  // D3D12Context destructor handles main queue fence waits via waitForGPU().
+  pipelineCache_.clear();
+  samplerCache_.clear();
+  allocatorPool_.clearOnDeviceDestruction();
+
+  // Clear bind group pools to release texture and buffer shared_ptrs that keep resources alive.
+  bindGroupTexturesPool_.clear();
+  bindGroupBuffersPool_.clear();
+}
+
+// Check for device removal and report detailed error.
+Result Device::checkDeviceRemoval() const {
+  auto* device = ctx_->getDevice();
+  if (!device) {
+    // Device not initialized is an invalid operation, not success.
+    IGL_DEBUG_ASSERT(false, "Device::checkDeviceRemoval() called before device initialization");
+    return Result(Result::Code::InvalidOperation, "Device not initialized");
+  }
+
+  // Early return if device already marked as lost (return cached reason for diagnostics)
+  if (deviceLost_) {
+    return Result(Result::Code::RuntimeError,
+                  std::string("Device previously lost: ") + deviceLostReason_);
+  }
+
+  HRESULT hr = device->GetDeviceRemovedReason();
+  if (FAILED(hr)) {
+    const char* reason = "Unknown";
+    switch (hr) {
+      case DXGI_ERROR_DEVICE_HUNG:
+        reason = "DEVICE_HUNG (GPU not responding)";
+        break;
+      case DXGI_ERROR_DEVICE_REMOVED:
+        reason = "DEVICE_REMOVED (Driver crash or hardware failure)";
+        break;
+      case DXGI_ERROR_DEVICE_RESET:
+        reason = "DEVICE_RESET (Driver update or TDR)";
+        break;
+      case DXGI_ERROR_DRIVER_INTERNAL_ERROR:
+        reason = "DRIVER_INTERNAL_ERROR (Driver bug)";
+        break;
+      case DXGI_ERROR_INVALID_CALL:
+        reason = "INVALID_CALL (API misuse detected)";
+        break;
+      default:
+        break;
+    }
+
+    // Cache the reason and mark device as lost for diagnostics.
+    deviceLostReason_ = reason;
+    deviceLost_ = true;
+
+    // Emit any pending D3D12/DXGI debug layer messages to help pinpoint the
+    // invalid API sequence that caused device removal.
+    logInfoQueuesForDevice(device, "Device::checkDeviceRemoval");
+
+    IGL_LOG_ERROR("D3D12 Device Removal Detected: %s (HRESULT=0x%08X)\n", reason, hr);
+    IGL_DEBUG_ASSERT(false);
+    return Result(Result::Code::RuntimeError, std::string("D3D12 device removed: ") + reason);
+  }
+
+  // On success (S_OK), device is healthy
+  return Result();
+}
+
+// Alignment validation methods.
+
+bool Device::validateMSAAAlignment(const TextureDesc& desc, Result* IGL_NULLABLE outResult) const {
+  if (desc.numSamples <= 1) {
+    return true;  // Not MSAA, no special alignment requirements
+  }
+
+  // MSAA resources require 64KB alignment in D3D12
+  // D3D12 CreateCommittedResource automatically handles this, but we validate dimensions
+  // to ensure resource won't exceed device limits
+  IGL_D3D12_LOG_VERBOSE("Device::validateMSAAAlignment: Validating MSAA texture (samples=%u, %ux%u)\n",
+               desc.numSamples, desc.width, desc.height);
+
+  // Check if texture dimensions are reasonable for MSAA
+  // Large MSAA textures may fail due to memory constraints
+  const size_t pixelCount = static_cast<size_t>(desc.width) * desc.height;
+  const size_t bytesPerPixel = 4;  // Conservative estimate (RGBA8)
+  const size_t estimatedSize = pixelCount * bytesPerPixel * desc.numSamples;
+
+  // Warn if MSAA texture is very large (> 256MB)
+  if (estimatedSize > 256 * 1024 * 1024) {
+    IGL_D3D12_LOG_VERBOSE("Device::validateMSAAAlignment: WARNING - Large MSAA texture detected (%zu MB). "
+                 "May cause memory pressure.\n", estimatedSize / (1024 * 1024));
+  }
+
+  return true;
+}
+
+bool Device::validateTextureAlignment(const D3D12_RESOURCE_DESC& resourceDesc,
+                                       uint32_t sampleCount,
+                                       Result* IGL_NULLABLE outResult) const {
+  // D3D12 texture alignment requirements:
+  // - MSAA textures (SampleDesc.Count > 1): 64KB alignment (automatic via CreateCommittedResource)
+  // - Regular textures: 64KB alignment (automatic via CreateCommittedResource)
+  // - Small textures (<= 64KB): May use 4KB alignment
+
+  // This validation is informational - D3D12 handles alignment automatically
+  // We just verify parameters are within expected ranges
+
+  if (sampleCount > 1) {
+    // MSAA texture - will use 64KB alignment
+    IGL_D3D12_LOG_VERBOSE("Device::validateTextureAlignment: MSAA texture will use 64KB alignment (samples=%u)\n",
+                 sampleCount);
+  }
+
+  // Validate resource dimensions don't exceed D3D12 limits
+  constexpr UINT64 kMaxTextureDimension2D = D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION;  // 16384
+
+  if (resourceDesc.Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE2D) {
+    if (resourceDesc.Width > kMaxTextureDimension2D || resourceDesc.Height > kMaxTextureDimension2D) {
+      IGL_LOG_ERROR("Device::validateTextureAlignment: Texture dimensions (%llux%u) exceed D3D12 limit (%llu)\n",
+                    resourceDesc.Width, resourceDesc.Height, kMaxTextureDimension2D);
+      Result::setResult(outResult, Result::Code::ArgumentInvalid,
+                        "Texture dimensions exceed D3D12 maximum (16384x16384)");
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool Device::validateBufferAlignment(size_t bufferSize, bool isUniform) const {
+  // D3D12 buffer alignment requirements:
+  // - Constant buffers: 256 bytes (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)
+  // - Other buffers: No strict alignment requirement
+
+  if (isUniform) {
+    // Uniform buffers must be 256-byte aligned
+    // This is already handled in createBuffer() by rounding up the size
+    if (bufferSize % BUFFER_ALIGNMENT != 0) {
+      IGL_D3D12_LOG_VERBOSE("Device::validateBufferAlignment: Uniform buffer size %zu will be rounded up to %zu\n",
+                   bufferSize, (bufferSize + BUFFER_ALIGNMENT - 1) & ~(BUFFER_ALIGNMENT - 1));
+    }
+  }
+
+  return true;
+}
+
+// BindGroups
+Holder<BindGroupTextureHandle> Device::createBindGroup(
+    const BindGroupTextureDesc& desc,
+    const IRenderPipelineState* IGL_NULLABLE /*compatiblePipeline*/,
+    Result* IGL_NULLABLE outResult) {
+  // Store bind group descriptor in pool for later use by encoder
+  BindGroupTextureDesc description(desc);
+  const auto handle = bindGroupTexturesPool_.create(std::move(description));
+  Result::setResult(outResult,
+                    handle.empty() ? Result(Result::Code::RuntimeError, "Cannot create bind group")
+                                   : Result());
+  return {this, handle};
+}
+
+Holder<BindGroupBufferHandle> Device::createBindGroup(const BindGroupBufferDesc& desc,
+                                                       Result* IGL_NULLABLE outResult) {
+  // Store bind group descriptor in pool for later use by encoder
+  BindGroupBufferDesc description(desc);
+  const auto handle = bindGroupBuffersPool_.create(std::move(description));
+  Result::setResult(outResult,
+                    handle.empty() ? Result(Result::Code::RuntimeError, "Cannot create bind group")
+                                   : Result());
+  return {this, handle};
+}
+
+void Device::destroy(BindGroupTextureHandle handle) {
+  if (handle.empty()) {
+    return;
+  }
+  bindGroupTexturesPool_.destroy(handle);
+}
+
+void Device::destroy(BindGroupBufferHandle handle) {
+  if (handle.empty()) {
+    return;
+  }
+  bindGroupBuffersPool_.destroy(handle);
+}
+
+void Device::destroy(SamplerHandle /*handle*/) {
+  // No-op: D3D12 backend doesn't use the SamplerHandle system.
+  // Samplers are created as shared_ptr<ISamplerState> and managed via ref-counting.
+  // Sampler descriptors are allocated transiently per command encoder at bind time,
+  // not persistently at sampler creation time, so there's nothing to deallocate here.
+}
+
+// Command Queue
+std::shared_ptr<ICommandQueue> Device::createCommandQueue(const CommandQueueDesc& /*desc*/,
+                                                           Result* IGL_NULLABLE
+                                                               outResult) noexcept {
+  Result::setOk(outResult);
+  return std::make_shared<CommandQueue>(*this);
+}
+
+// Resources
+std::unique_ptr<IBuffer> Device::createBuffer(const BufferDesc& desc,
+                                              Result* IGL_NULLABLE outResult) const noexcept {
+  // Single const_cast at the API boundary; all mutation happens in the non-const helper.
+  auto& self = const_cast<Device&>(*this);
+  return self.createBufferImpl(desc, outResult);
+}
+
+std::unique_ptr<IBuffer> Device::createBufferImpl(const BufferDesc& desc,
+                                                   Result* IGL_NULLABLE outResult) noexcept {
+  auto* device = ctx_->getDevice();
+  if (!device) {
+    Result::setResult(outResult, Result::Code::RuntimeError, "D3D12 device is null");
+    return nullptr;
+  }
+
+  // Determine heap type and initial state based on storage
+  D3D12_HEAP_TYPE heapType;
+  D3D12_RESOURCE_STATES initialState;
+
+  // CRITICAL: Storage buffers with UAV flags MUST use DEFAULT heap
+  // D3D12 does not allow UAV resources on UPLOAD heaps
+  const bool isStorageBuffer = (desc.type & BufferDesc::BufferTypeBits::Storage) != 0;
+  const bool forceDefaultHeap = isStorageBuffer;  // Storage buffers need UAV, which requires DEFAULT heap
+
+  if ((desc.storage == ResourceStorage::Shared || desc.storage == ResourceStorage::Managed) && !forceDefaultHeap) {
+    // CPU-writable upload heap (for non-storage buffers only)
+    heapType = D3D12_HEAP_TYPE_UPLOAD;
+    initialState = D3D12_RESOURCE_STATE_GENERIC_READ;
+  } else {
+    // GPU-only default heap (required for storage buffers with UAV)
+    heapType = D3D12_HEAP_TYPE_DEFAULT;
+    initialState = D3D12_RESOURCE_STATE_COMMON;
+  }
+
+  // Create heap properties
+  D3D12_HEAP_PROPERTIES heapProps = {};
+  heapProps.Type = heapType;
+  heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
+  heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
+
+  // For uniform buffers, size must be aligned to 256 bytes (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)
+  const bool isUniformBuffer = (desc.type & BufferDesc::BufferTypeBits::Uniform) != 0;
+
+  // Validate buffer alignment requirements.
+  validateBufferAlignment(desc.length, isUniformBuffer);
+
+  const UINT64 alignedSize = isUniformBuffer
+      ? AlignUp<UINT64>(desc.length, 256)  // D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT
+      : desc.length;
+
+  IGL_D3D12_LOG_VERBOSE("Device::createBuffer: type=%d, requested_size=%zu, aligned_size=%llu, isUniform=%d\n",
+               desc.type, desc.length, alignedSize, isUniformBuffer);
+
+  // Create buffer description
+  D3D12_RESOURCE_DESC bufferDesc = {};
+  bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+  bufferDesc.Alignment = 0;
+  bufferDesc.Width = alignedSize;
+  bufferDesc.Height = 1;
+  bufferDesc.DepthOrArraySize = 1;
+  bufferDesc.MipLevels = 1;
+  bufferDesc.Format = DXGI_FORMAT_UNKNOWN;
+  bufferDesc.SampleDesc.Count = 1;
+  bufferDesc.SampleDesc.Quality = 0;
+  bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+  bufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
+
+  // Add UAV flag for storage buffers (used by compute shaders)
+  // isStorageBuffer already defined above for heap type determination
+  if (isStorageBuffer) {
+    bufferDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+    IGL_D3D12_LOG_VERBOSE("Device::createBuffer: Storage buffer - adding UAV flag\n");
+  }
+
+  // Create the buffer resource
+  igl::d3d12::ComPtr<ID3D12Resource> buffer;
+  HRESULT hr = device->CreateCommittedResource(
+      &heapProps,
+      D3D12_HEAP_FLAG_NONE,
+      &bufferDesc,
+      initialState,
+      nullptr,
+      IID_PPV_ARGS(buffer.GetAddressOf())
+  );
+
+  if (FAILED(hr)) {
+    char errorMsg[256];
+    snprintf(errorMsg, sizeof(errorMsg), "Failed to create buffer: HRESULT = 0x%08X", static_cast<unsigned>(hr));
+    Result::setResult(outResult, Result::Code::RuntimeError, errorMsg);
+    return nullptr;
+  }
+
+  // Debug: Log GPU address for uniform buffers
+  if (isUniformBuffer) {
+    D3D12_GPU_VIRTUAL_ADDRESS gpuAddr = buffer->GetGPUVirtualAddress();
+    IGL_D3D12_LOG_VERBOSE("Device::createBuffer: Uniform buffer created, GPU address=0x%llx\n", gpuAddr);
+  }
+
+  // Upload initial data if provided
+  D3D12_RESOURCE_STATES finalState = initialState;
+
+  if (heapType == D3D12_HEAP_TYPE_UPLOAD) {
+    finalState = D3D12_RESOURCE_STATE_GENERIC_READ;
+  }
+
+  if (desc.data) {
+    if (heapType == D3D12_HEAP_TYPE_UPLOAD) {
+      void* mappedData = nullptr;
+      D3D12_RANGE readRange = {0, 0};
+      hr = buffer->Map(0, &readRange, &mappedData);
+
+      if (SUCCEEDED(hr)) {
+        std::memcpy(mappedData, desc.data, desc.length);
+        buffer->Unmap(0, nullptr);
+      }
+    } else if (heapType == D3D12_HEAP_TYPE_DEFAULT) {
+      // DEFAULT heap: stage through an UPLOAD buffer and copy
+      IGL_D3D12_LOG_VERBOSE("Device::createBuffer: Staging initial data via UPLOAD heap for DEFAULT buffer\n");
+
+      // Create upload buffer
+      D3D12_HEAP_PROPERTIES uploadHeapProps = {};
+      uploadHeapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
+      uploadHeapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
+      uploadHeapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
+
+      // Create upload buffer description WITHOUT UAV flag (UPLOAD heaps can't have UAV)
+      D3D12_RESOURCE_DESC uploadBufferDesc = bufferDesc;
+      uploadBufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;  // Remove UAV flag for upload buffer
+
+      igl::d3d12::ComPtr<ID3D12Resource> uploadBuffer;
+      HRESULT upHr = device->CreateCommittedResource(&uploadHeapProps,
+                                                     D3D12_HEAP_FLAG_NONE,
+                                                     &uploadBufferDesc,
+                                                     D3D12_RESOURCE_STATE_GENERIC_READ,
+                                                     nullptr,
+                                                     IID_PPV_ARGS(uploadBuffer.GetAddressOf()));
+      if (FAILED(upHr)) {
+        IGL_LOG_ERROR("Device::createBuffer: Failed to create upload buffer: 0x%08X\n", static_cast<unsigned>(upHr));
+      } else {
+        // Map and copy data
+        void* mapped = nullptr;
+        D3D12_RANGE rr = {0, 0};
+        if (SUCCEEDED(uploadBuffer->Map(0, &rr, &mapped)) && mapped) {
+          std::memcpy(mapped, desc.data, desc.length);
+          uploadBuffer->Unmap(0, nullptr);
+
+          igl::d3d12::ComPtr<ID3D12CommandAllocator> allocator = getUploadCommandAllocator();
+          if (!allocator.Get()) {
+            IGL_LOG_ERROR("Device::createBuffer: Failed to get command allocator from pool\n");
+          } else {
+            igl::d3d12::ComPtr<ID3D12GraphicsCommandList> cmdList;
+            if (SUCCEEDED(device->CreateCommandList(0,
+                                                    D3D12_COMMAND_LIST_TYPE_DIRECT,
+                                                    allocator.Get(),
+                                                    nullptr,
+                                                    IID_PPV_ARGS(cmdList.GetAddressOf())))) {
+              // Transition default buffer to COPY_DEST
+              D3D12_RESOURCE_BARRIER toCopyDest = {};
+              toCopyDest.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+              toCopyDest.Transition.pResource = buffer.Get();
+              toCopyDest.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+              toCopyDest.Transition.StateBefore = initialState; // COMMON
+              toCopyDest.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
+              cmdList->ResourceBarrier(1, &toCopyDest);
+
+              // Copy upload -> default
+              cmdList->CopyBufferRegion(buffer.Get(), 0, uploadBuffer.Get(), 0, alignedSize);
+
+              // Transition to a likely-read state based on buffer type
+              D3D12_RESOURCE_STATES targetState = D3D12_RESOURCE_STATE_GENERIC_READ;
+              if (desc.type & BufferDesc::BufferTypeBits::Vertex) {
+                targetState = D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER;
+              } else if (desc.type & BufferDesc::BufferTypeBits::Uniform) {
+                targetState = D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER;
+              } else if (desc.type & BufferDesc::BufferTypeBits::Index) {
+                targetState = D3D12_RESOURCE_STATE_INDEX_BUFFER;
+              }
+              D3D12_RESOURCE_BARRIER toTarget = {};
+              toTarget.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+              toTarget.Transition.pResource = buffer.Get();
+              toTarget.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+              toTarget.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+              toTarget.Transition.StateAfter = targetState;
+              cmdList->ResourceBarrier(1, &toTarget);
+
+              cmdList->Close();
+              ID3D12CommandList* lists[] = {cmdList.Get()};
+              ctx_->getCommandQueue()->ExecuteCommandLists(1, lists);
+
+              // Use async fence signaling instead of synchronous waitForGPU().
+              // Get fence value that will signal when this upload completes.
+              UINT64 uploadFenceValue = getNextUploadFenceValue();
+
+              // Signal upload fence after copy completes
+              HRESULT hrSignal =
+                  ctx_->getCommandQueue()->Signal(allocatorPool_.getUploadFence(), uploadFenceValue);
+              if (FAILED(hrSignal)) {
+                IGL_LOG_ERROR("Device::createBuffer: Failed to signal upload fence: 0x%08X\n", hrSignal);
+                // Return allocator with 0 to avoid blocking the pool
+                returnUploadCommandAllocator(allocator, 0);
+              } else {
+                // Return allocator to pool with fence value (will be reused after the fence is signaled).
+                returnUploadCommandAllocator(allocator, uploadFenceValue);
+
+                // Track staging buffer for async cleanup with the associated fence value.
+                trackUploadBuffer(std::move(uploadBuffer), uploadFenceValue);
+              }
+
+              finalState = targetState;
+            } else {
+              IGL_LOG_ERROR("Device::createBuffer: Failed to create command list\n");
+              // Return allocator with 0 to avoid blocking the pool
+              returnUploadCommandAllocator(allocator, 0);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  Result::setOk(outResult);
+  return std::make_unique<Buffer>(const_cast<Device&>(*this), std::move(buffer), desc, finalState);
+}
+
+std::shared_ptr<IDepthStencilState> Device::createDepthStencilState(
+    const DepthStencilStateDesc& desc,
+    Result* IGL_NULLABLE outResult) const {
+  Result::setOk(outResult);
+  return std::make_shared<DepthStencilState>(desc);
+}
+
+std::unique_ptr<IShaderStages> Device::createShaderStages(const ShaderStagesDesc& desc,
+                                                          Result* IGL_NULLABLE
+                                                              outResult) const {
+  Result::setOk(outResult);
+  return std::make_unique<ShaderStages>(desc);
+}
+
+std::shared_ptr<ISamplerState> Device::createSamplerState(const SamplerStateDesc& desc,
+                                                          Result* IGL_NULLABLE outResult) const {
+  return samplerCache_.createSamplerState(desc, outResult);
+}
+
+std::shared_ptr<ITexture> Device::createTexture(const TextureDesc& desc,
+                                                Result* IGL_NULLABLE outResult) const noexcept {
+  auto* device = ctx_->getDevice();
+
+  // Check for exportability - D3D12 doesn't support exportable textures
+  if (desc.exportability == TextureDesc::TextureExportability::Exportable) {
+    Result::setResult(outResult, Result::Code::Unimplemented,
+                      "D3D12 does not support exportable textures");
+    return nullptr;
+  }
+
+  // Convert IGL texture format to DXGI format
+  DXGI_FORMAT dxgiFormat = textureFormatToDXGIFormat(desc.format);
+  IGL_D3D12_LOG_VERBOSE("Device::createTexture: IGL format=%d -> DXGI format=%d\n", (int)desc.format, (int)dxgiFormat);
+  if (dxgiFormat == DXGI_FORMAT_UNKNOWN) {
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Unsupported texture format");
+    return nullptr;
+  }
+
+  // Create texture resource description
+  D3D12_RESOURCE_DESC resourceDesc = {};
+
+  // Set dimension based on texture type
+  if (desc.type == TextureType::ThreeD) {
+    resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE3D;
+    resourceDesc.DepthOrArraySize = static_cast<UINT16>(desc.depth);
+  } else if (desc.type == TextureType::Cube) {
+    // Cube textures are 2D textures with 6 array slices per layer (one per face).
+    // For cube arrays: numLayers * 6 faces.
+    resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
+    resourceDesc.DepthOrArraySize = static_cast<UINT16>(desc.numLayers * 6);
+    IGL_D3D12_LOG_VERBOSE("Device::createTexture: Cube texture with %u layers -> %u array slices\n",
+                 desc.numLayers, resourceDesc.DepthOrArraySize);
+  } else {
+    resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_TEXTURE2D;
+    resourceDesc.DepthOrArraySize = static_cast<UINT16>(desc.numLayers);
+  }
+
+  const bool sampledUsage =
+      (desc.usage & TextureDesc::TextureUsageBits::Sampled) != 0;
+  const DXGI_FORMAT resourceFormat =
+      textureFormatToDXGIResourceFormat(desc.format, sampledUsage);
+  if (resourceFormat == DXGI_FORMAT_UNKNOWN) {
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Unsupported resource format");
+    return nullptr;
+  }
+  resourceDesc.Alignment = 0;
+  resourceDesc.Width = desc.width;
+  resourceDesc.Height = desc.height;
+  resourceDesc.MipLevels = static_cast<UINT16>(desc.numMipLevels);
+  resourceDesc.Format = resourceFormat;
+
+  // MSAA configuration
+  // D3D12 MSAA requirements:
+  // - Sample count must be 1, 2, 4, 8, or 16 (power of 2)
+  // - Quality level 0 is standard MSAA (higher quality levels are vendor-specific)
+  // - MSAA textures cannot have mipmaps (numMipLevels must be 1)
+  // - Not all formats support all sample counts - validation required
+  const uint32_t sampleCount = std::max(1u, desc.numSamples);
+
+  // Validate MSAA alignment requirements before creating the resource.
+  if (sampleCount > 1) {
+    if (!validateMSAAAlignment(desc, outResult)) {
+      // Error already set by validation function
+      return nullptr;
+    }
+  }
+
+  // Validate MSAA constraints.
+  if (sampleCount > 1) {
+    // MSAA textures cannot have mipmaps
+    if (desc.numMipLevels > 1) {
+      IGL_LOG_ERROR("Device::createTexture: MSAA textures cannot have mipmaps (numMipLevels=%u, numSamples=%u)\n",
+                    desc.numMipLevels, sampleCount);
+      Result::setResult(outResult, Result::Code::ArgumentInvalid,
+                        "MSAA textures cannot have mipmaps (numMipLevels must be 1)");
+      return nullptr;
+    }
+
+    // Validate that the requested MSAA sample count is supported for this format.
+    // NOTE: Applications should query DeviceFeatureLimits::MaxMultisampleCount proactively
+    //       to avoid runtime errors. Use getMaxMSAASamplesForFormat() for format-specific queries.
+    D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msqLevels = {};
+    msqLevels.Format = dxgiFormat;
+    msqLevels.SampleCount = sampleCount;
+    msqLevels.Flags = D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE;
+
+    if (FAILED(device->CheckFeatureSupport(D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS, &msqLevels, sizeof(msqLevels))) ||
+        msqLevels.NumQualityLevels == 0) {
+      // Query maximum supported samples for better error messages.
+      const uint32_t maxSamples = getMaxMSAASamplesForFormat(desc.format);
+
+      char errorMsg[512];
+      snprintf(errorMsg, sizeof(errorMsg),
+               "Device::createTexture: Format %d does not support %u samples (max supported: %u). "
+               "Query DeviceFeatureLimits::MaxMultisampleCount before texture creation.",
+               static_cast<int>(dxgiFormat), sampleCount, maxSamples);
+      IGL_LOG_ERROR("%s\n", errorMsg);
+      Result::setResult(outResult, Result::Code::Unsupported, errorMsg);
+      return nullptr;
+    }
+
+    IGL_D3D12_LOG_VERBOSE("Device::createTexture: MSAA enabled - format=%d, samples=%u, quality levels=%u\n",
+                 static_cast<int>(dxgiFormat), sampleCount, msqLevels.NumQualityLevels);
+  }
+
+  resourceDesc.SampleDesc.Count = sampleCount;
+  resourceDesc.SampleDesc.Quality = 0;  // Standard MSAA quality (0 = default/standard)
+  resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_UNKNOWN;
+  resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
+
+  // Set resource flags based on usage.
+  // IMPORTANT: D3D12 forbids combining ALLOW_DEPTH_STENCIL with
+  // ALLOW_RENDER_TARGET, ALLOW_UNORDERED_ACCESS or ALLOW_SIMULTANEOUS_ACCESS.
+  // We therefore do not allow "Storage" usage on depth/stencil formats and
+  // never set both DEPTH_STENCIL and RENDER_TARGET on the same resource.
+  const bool isDepthStencilFormat =
+      (desc.format >= TextureFormat::Z_UNorm16 && desc.format <= TextureFormat::S_UInt8);
+
+  if (desc.usage & TextureDesc::TextureUsageBits::Sampled) {
+    // Shader resource - no special flags needed
+  }
+
+  // Attachment usage becomes either a color render target or a depth/stencil
+  // target depending on the texture format.
+  if (desc.usage & TextureDesc::TextureUsageBits::Attachment) {
+    if (isDepthStencilFormat) {
+      resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL;
+    } else {
+      resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
+    }
+  }
+
+  // Storage (unordered access) is only supported for non-depth/stencil
+  // formats. If requested on a depth/stencil texture, log and ignore it.
+  if (desc.usage & TextureDesc::TextureUsageBits::Storage) {
+    if (isDepthStencilFormat) {
+      IGL_LOG_ERROR(
+          "Device::createTexture: Storage usage (UAV) requested for depth/stencil "
+          "format (format=%d). D3D12 does not allow ALLOW_DEPTH_STENCIL together "
+          "with ALLOW_UNORDERED_ACCESS; ignoring Storage flag for this texture.\n",
+          static_cast<int>(desc.format));
+    } else {
+      resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS;
+    }
+  }
+
+  // Create heap properties
+  D3D12_HEAP_PROPERTIES heapProps = {};
+  heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
+  heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
+  heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
+  heapProps.CreationNodeMask = 1;
+  heapProps.VisibleNodeMask = 1;
+
+  // Determine initial state
+  D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COMMON;
+
+    // Prepare optimized clear value for depth/stencil only.
+    // For color render targets we deliberately avoid passing an optimized clear
+    // value to CreateCommittedResource, because RenderPass clear colors are
+    // often dynamic. Passing a fixed optimized clear color while clearing to
+    // arbitrary colors triggers D3D12 WARNING ID=820
+    // (ClearRenderTargetView clear values do not match resource creation).
+    D3D12_CLEAR_VALUE clearValue = {};
+    D3D12_CLEAR_VALUE* pClearValue = nullptr;
+
+    if (resourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) {
+      clearValue.Format = dxgiFormat;
+      clearValue.DepthStencil.Depth = 1.0f;     // Default far plane
+      clearValue.DepthStencil.Stencil = 0;
+      pClearValue = &clearValue;
+  }
+
+  // Validate texture alignment before creating the resource.
+  if (!validateTextureAlignment(resourceDesc, sampleCount, outResult)) {
+    // Error already set by validation function
+    return nullptr;
+  }
+
+  // Create the texture resource
+  igl::d3d12::ComPtr<ID3D12Resource> resource;
+  HRESULT hr = device->CreateCommittedResource(
+      &heapProps,
+      D3D12_HEAP_FLAG_NONE,
+      &resourceDesc,
+      initialState,
+      pClearValue,  // Optimized clear value for render targets/depth-stencil
+      IID_PPV_ARGS(resource.GetAddressOf()));
+
+  if (FAILED(hr)) {
+    char errorMsg[512];
+    if (hr == DXGI_ERROR_DEVICE_REMOVED) {
+      HRESULT removedReason = device->GetDeviceRemovedReason();
+      snprintf(errorMsg, sizeof(errorMsg),
+               "Failed to create texture resource. Device removed! HRESULT: 0x%08X, Removed reason: 0x%08X",
+               static_cast<unsigned>(hr), static_cast<unsigned>(removedReason));
+    } else {
+      snprintf(errorMsg, sizeof(errorMsg), "Failed to create texture resource. HRESULT: 0x%08X", static_cast<unsigned>(hr));
+    }
+    Result::setResult(outResult, Result::Code::RuntimeError, errorMsg);
+    return nullptr;
+  }
+
+  // Create IGL texture from D3D12 resource.
+  auto texture = Texture::createFromResource(
+      resource.Get(), desc.format, desc, device, ctx_->getCommandQueue(), initialState,
+      const_cast<Device*>(this));
+  Result::setOk(outResult);
+  return texture;
+}
+
+std::shared_ptr<ITexture> Device::createTextureView(std::shared_ptr<ITexture> texture,
+                                                    const TextureViewDesc& desc,
+                                                    Result* IGL_NULLABLE
+                                                        outResult) const noexcept {
+  if (!texture) {
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Parent texture is null");
+    return nullptr;
+  }
+
+  // Cast to D3D12 texture
+  auto d3d12Texture = std::static_pointer_cast<Texture>(texture);
+  if (!d3d12Texture) {
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Texture is not a D3D12 texture");
+    return nullptr;
+  }
+
+  // Create the texture view
+  auto view = Texture::createTextureView(d3d12Texture, desc);
+  if (!view) {
+    Result::setResult(outResult, Result::Code::RuntimeError, "Failed to create texture view");
+    return nullptr;
+  }
+
+  Result::setOk(outResult);
+  return view;
+}
+
+std::shared_ptr<ITimer> Device::createTimer(Result* IGL_NULLABLE outResult) const noexcept {
+  auto timer = std::make_shared<Timer>(*this);
+  Result::setOk(outResult);
+  return timer;
+}
+
+std::shared_ptr<IVertexInputState> Device::createVertexInputState(
+    const VertexInputStateDesc& desc,
+    Result* IGL_NULLABLE outResult) const {
+  Result::setOk(outResult);
+  return std::make_shared<VertexInputState>(desc);
+}
+
+std::shared_ptr<IComputePipelineState> Device::createComputePipeline(
+    const ComputePipelineDesc& desc,
+    Result* IGL_NULLABLE outResult) const {
+  IGL_D3D12_LOG_VERBOSE("Device::createComputePipeline() START - debugName='%s'\n", desc.debugName.c_str());
+
+  auto* device = ctx_->getDevice();
+  if (!device) {
+    IGL_LOG_ERROR("  D3D12 device is null!\n");
+    Result::setResult(outResult, Result::Code::InvalidOperation, "D3D12 device is null");
+    return nullptr;
+  }
+
+  if (!desc.shaderStages) {
+    IGL_LOG_ERROR("  Shader stages are required!\n");
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader stages are required");
+    return nullptr;
+  }
+
+  if (desc.shaderStages->getType() != ShaderStagesType::Compute) {
+    IGL_LOG_ERROR("  Shader stages must be compute type!\n");
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader stages must be compute type");
+    return nullptr;
+  }
+
+  // Get compute shader module
+  auto* computeModule = static_cast<const ShaderModule*>(desc.shaderStages->getComputeModule().get());
+  if (!computeModule) {
+    IGL_LOG_ERROR("  Compute module is null!\n");
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Compute shader required");
+    return nullptr;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("  Getting compute shader bytecode...\n");
+  const auto& csBytecode = computeModule->getBytecode();
+  IGL_D3D12_LOG_VERBOSE("  CS bytecode: %zu bytes\n", csBytecode.size());
+
+  // Create root signature for compute
+  // Root signature layout for compute:
+  // - Root parameter 0: Root Constants for b0 (Push Constants)
+  // - Root parameter 1: Descriptor table with unbounded UAVs (u0-uN)
+  // - Root parameter 2: Descriptor table with unbounded SRVs (t0-tN)
+  // - Root parameter 3: Descriptor table with unbounded CBVs (b1-bN)
+  // - Root parameter 4: Descriptor table with unbounded Samplers (s0-sN)
+
+  // Query root signature capabilities to determine descriptor range bounds.
+  // Tier 1 devices require bounded descriptor ranges.
+  const D3D12_RESOURCE_BINDING_TIER bindingTier = ctx_->getResourceBindingTier();
+  const bool needsBoundedRanges = (bindingTier == D3D12_RESOURCE_BINDING_TIER_1);
+
+  // Conservative bounds for Tier 1 devices (based on actual usage in render sessions)
+  // These limits are sufficient for all current IGL usage patterns
+  const UINT uavBound = needsBoundedRanges ? 64 : UINT_MAX;
+  const UINT srvBound = needsBoundedRanges ? 128 : UINT_MAX;
+  const UINT cbvBound = needsBoundedRanges ? 64 : UINT_MAX;
+  const UINT samplerBound = needsBoundedRanges ? 32 : UINT_MAX;  // Samplers always bounded on Tier 1/2
+
+  if (needsBoundedRanges) {
+    IGL_D3D12_LOG_VERBOSE("  Using bounded descriptor ranges (Tier 1): UAV=%u, SRV=%u, CBV=%u, Sampler=%u\n",
+                 uavBound, srvBound, cbvBound, samplerBound);
+  } else {
+    IGL_D3D12_LOG_VERBOSE("  Using unbounded descriptor ranges (Tier %u)\n",
+                 bindingTier == D3D12_RESOURCE_BINDING_TIER_3 ? 3 : 2);
+  }
+
+  // Descriptor range for UAVs (unordered access views - read/write buffers and textures).
+  D3D12_DESCRIPTOR_RANGE uavRange = {};
+  uavRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
+  uavRange.NumDescriptors = uavBound;
+  uavRange.BaseShaderRegister = 0;  // Starting at u0
+  uavRange.RegisterSpace = 0;
+  uavRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+
+  // Descriptor range for SRVs (shader resource views - read-only textures and buffers)
+  D3D12_DESCRIPTOR_RANGE srvRange = {};
+  srvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
+  srvRange.NumDescriptors = srvBound;
+  srvRange.BaseShaderRegister = 0;  // Starting at t0
+  srvRange.RegisterSpace = 0;
+  srvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+
+  // Descriptor range for CBVs (constant buffer views).
+  // Note: b0 will be used for root constants (push constants), so the CBV table starts at b1.
+  D3D12_DESCRIPTOR_RANGE cbvRange = {};
+  cbvRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
+  cbvRange.NumDescriptors = cbvBound;
+  cbvRange.BaseShaderRegister = 1;  // Starting at b1 (b0 is root constants)
+  cbvRange.RegisterSpace = 0;
+  cbvRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+
+  // Descriptor range for Samplers.
+  D3D12_DESCRIPTOR_RANGE samplerRange = {};
+  samplerRange.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER;
+  samplerRange.NumDescriptors = samplerBound;
+  samplerRange.BaseShaderRegister = 0;  // Starting at s0
+  samplerRange.RegisterSpace = 0;
+  samplerRange.OffsetInDescriptorsFromTableStart = D3D12_DESCRIPTOR_RANGE_OFFSET_APPEND;
+
+  // Root parameters
+  D3D12_ROOT_PARAMETER rootParams[5] = {};
+
+  // Parameter 0: Root Constants for b0 (Push Constants)
+  //  Increased from 16 to 32 DWORDs (64→128 bytes) to match Vulkan
+  // Using 32-bit constants for push constants in compute shaders
+  rootParams[0].ParameterType = D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS;
+  rootParams[0].Constants.ShaderRegister = 0;  // b0
+  rootParams[0].Constants.RegisterSpace = 0;
+  rootParams[0].Constants.Num32BitValues = 32;  // 32 DWORDs = 128 bytes (matches Vulkan)
+  rootParams[0].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+
+  // Parameter 1: Descriptor table for UAVs
+  rootParams[1].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+  rootParams[1].DescriptorTable.NumDescriptorRanges = 1;
+  rootParams[1].DescriptorTable.pDescriptorRanges = &uavRange;
+  rootParams[1].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+
+  // Parameter 2: Descriptor table for SRVs
+  rootParams[2].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+  rootParams[2].DescriptorTable.NumDescriptorRanges = 1;
+  rootParams[2].DescriptorTable.pDescriptorRanges = &srvRange;
+  rootParams[2].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+
+  // Parameter 3: Descriptor table for CBVs (b1+)
+  // Note: b0 is now root constants, this table starts at b1
+  rootParams[3].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+  rootParams[3].DescriptorTable.NumDescriptorRanges = 1;
+  rootParams[3].DescriptorTable.pDescriptorRanges = &cbvRange;
+  rootParams[3].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+
+  // Parameter 4: Descriptor table for Samplers
+  rootParams[4].ParameterType = D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE;
+  rootParams[4].DescriptorTable.NumDescriptorRanges = 1;
+  rootParams[4].DescriptorTable.pDescriptorRanges = &samplerRange;
+  rootParams[4].ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
+
+  D3D12_ROOT_SIGNATURE_DESC rootSigDesc = {};
+  rootSigDesc.NumParameters = 5;
+  rootSigDesc.pParameters = rootParams;
+  rootSigDesc.NumStaticSamplers = 0;
+  rootSigDesc.pStaticSamplers = nullptr;
+  rootSigDesc.Flags = D3D12_ROOT_SIGNATURE_FLAG_NONE;
+
+  // CRITICAL: Validate root signature cost (64 DWORD hardware limit).
+  IGL_D3D12_LOG_VERBOSE("  Validating compute root signature cost:\n");
+  const uint32_t cost = calculateRootSignatureCost(rootSigDesc);
+  IGL_D3D12_LOG_VERBOSE("  Total cost: %u / 64 DWORDs (%.1f%%)\n", cost, 100.0f * cost / 64.0f);
+
+  // Warning threshold at 50% (32 DWORDs)
+  if (cost > 32) {
+    IGL_D3D12_LOG_VERBOSE("  WARNING: Root signature cost exceeds 50%% of limit: %u / 64 DWORDs\n", cost);
+  }
+
+  // Hard limit enforcement
+  IGL_DEBUG_ASSERT(cost <= 64, "Root signature exceeds 64 DWORD limit!");
+  if (cost > 64) {
+    IGL_LOG_ERROR("  ROOT SIGNATURE COST OVERFLOW: %u DWORDs (limit: 64)\n", cost);
+    Result::setResult(outResult, Result::Code::ArgumentOutOfRange,
+                      "Root signature cost exceeds 64 DWORD hardware limit");
+    return nullptr;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("  Creating compute root signature with Root Constants (b0)/UAVs/SRVs/CBVs/Samplers\n");
+
+  // Get or create cached root signature.
+  igl::d3d12::ComPtr<ID3D12RootSignature> rootSignature =
+      pipelineCache_.getOrCreateRootSignature(ctx_->getDevice(), rootSigDesc, outResult);
+  if (!rootSignature.Get()) {
+    return nullptr;
+  }
+
+  // Create compute pipeline state
+  D3D12_COMPUTE_PIPELINE_STATE_DESC psoDesc = {};
+  psoDesc.pRootSignature = rootSignature.Get();
+  psoDesc.CS.pShaderBytecode = csBytecode.data();
+  psoDesc.CS.BytecodeLength = csBytecode.size();
+  psoDesc.NodeMask = 0;
+  psoDesc.CachedPSO.pCachedBlob = nullptr;
+  psoDesc.CachedPSO.CachedBlobSizeInBytes = 0;
+  psoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
+
+  // PSO cache lookup (thread-safe with double-checked locking).
+  const size_t psoHash = pipelineCache_.hashComputePipelineDesc(desc);
+  igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState;
+
+  // First check: Lock for cache lookup
+  {
+    std::lock_guard<std::mutex> lock(pipelineCache_.psoCacheMutex_);
+    auto psoIt = pipelineCache_.computePSOCache_.find(psoHash);
+    if (psoIt != pipelineCache_.computePSOCache_.end()) {
+      // Cache hit - reuse existing PSO
+      pipelineCache_.computePSOCacheHits_++;
+      pipelineState = psoIt->second;  // Assignment creates a ref-counted copy
+      IGL_D3D12_LOG_VERBOSE("  [PSO CACHE HIT] Hash=0x%zx, hits=%zu, misses=%zu, hit rate=%.1f%%\n",
+                   psoHash,
+                   pipelineCache_.computePSOCacheHits_,
+                   pipelineCache_.computePSOCacheMisses_,
+                   100.0 * pipelineCache_.computePSOCacheHits_ /
+                       (pipelineCache_.computePSOCacheHits_ +
+                        pipelineCache_.computePSOCacheMisses_));
+      IGL_D3D12_LOG_VERBOSE("Device::createComputePipeline() SUCCESS (CACHED) - PSO=%p, RootSig=%p\n",
+                   pipelineState.Get(), rootSignature.Get());
+      Result::setOk(outResult);
+      // Create a copy of the root signature for the returned object
+      igl::d3d12::ComPtr<ID3D12RootSignature> rootSigCopy = rootSignature;
+      return std::make_shared<ComputePipelineState>(desc, std::move(pipelineState), std::move(rootSigCopy));
+    }
+  }
+
+  // Cache miss - create new PSO outside lock (expensive operation)
+  IGL_D3D12_LOG_VERBOSE("  [PSO CACHE MISS] Hash=0x%zx\n", psoHash);
+
+  IGL_D3D12_LOG_VERBOSE("  Creating compute pipeline state...\n");
+  HRESULT hr = device->CreateComputePipelineState(&psoDesc, IID_PPV_ARGS(pipelineState.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("  CreateComputePipelineState FAILED: 0x%08X\n", static_cast<unsigned>(hr));
+
+    // Dump D3D12 + DXGI debug messages, if available, to help identify the
+    // invalid PSO configuration (shader bytecode, root signature, etc.).
+    logInfoQueuesForDevice(device, "CreateComputePipelineState");
+
+    Result::setResult(outResult, Result::Code::RuntimeError, "Failed to create compute pipeline state");
+    return nullptr;
+  }
+
+  // E-011: Set debug name on compute PSO for better debugging in PIX/RenderDoc
+  if (desc.shaderStages && desc.shaderStages->getComputeModule()) {
+    const std::string& psoName = desc.shaderStages->getComputeModule()->info().debugName;
+    if (!psoName.empty()) {
+      // Convert to wide string for D3D12 SetName API
+      std::wstring wideName(psoName.begin(), psoName.end());
+      pipelineState->SetName(wideName.c_str());
+      IGL_D3D12_LOG_VERBOSE("  Set compute PSO debug name: %s\n", psoName.c_str());
+    }
+  }
+
+  // Second check: Lock for cache insertion with double-check.
+  // Another thread may have created the PSO while we were creating ours
+  {
+    std::lock_guard<std::mutex> lock(pipelineCache_.psoCacheMutex_);
+    auto psoIt = pipelineCache_.computePSOCache_.find(psoHash);
+    if (psoIt != pipelineCache_.computePSOCache_.end()) {
+      // Another thread beat us to it - use their PSO
+      pipelineCache_.computePSOCacheHits_++;
+      pipelineState = psoIt->second;
+      IGL_D3D12_LOG_VERBOSE("  [PSO DOUBLE-CHECK HIT] Another thread created PSO, using theirs. Hash=0x%zx\n", psoHash);
+    } else {
+      // We're the first to complete - cache our PSO
+      pipelineCache_.computePSOCacheMisses_++;
+      pipelineCache_.computePSOCache_[psoHash] = pipelineState;
+      IGL_D3D12_LOG_VERBOSE("  [PSO CACHED] Hash=0x%zx, hits=%zu, misses=%zu\n",
+                   psoHash,
+                   pipelineCache_.computePSOCacheHits_,
+                   pipelineCache_.computePSOCacheMisses_);
+    }
+  }
+
+  IGL_D3D12_LOG_VERBOSE("Device::createComputePipeline() SUCCESS - PSO=%p, RootSig=%p (hash=0x%zx)\n",
+               pipelineState.Get(), rootSignature.Get(), psoHash);
+  Result::setOk(outResult);
+  return std::make_shared<ComputePipelineState>(desc, std::move(pipelineState), std::move(rootSignature));
+}
+
+std::shared_ptr<IRenderPipelineState> Device::createRenderPipeline(
+    const RenderPipelineDesc& desc,
+    Result* IGL_NULLABLE outResult) const {
+  IGL_D3D12_LOG_VERBOSE("Device::createRenderPipeline() START - debugName='%s'\n", desc.debugName.c_str());
+
+  auto* device = ctx_->getDevice();
+  if (!device) {
+    IGL_LOG_ERROR("  D3D12 device is null!\n");
+    Result::setResult(outResult, Result::Code::InvalidOperation, "D3D12 device is null");
+    return nullptr;
+  }
+
+  if (!desc.shaderStages) {
+    IGL_LOG_ERROR("  Shader stages are required!\n");
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader stages are required");
+    return nullptr;
+  }
+
+  // Get shader modules
+  auto* vertexModule = static_cast<const ShaderModule*>(desc.shaderStages->getVertexModule().get());
+  auto* fragmentModule = static_cast<const ShaderModule*>(desc.shaderStages->getFragmentModule().get());
+
+  if (!vertexModule || !fragmentModule) {
+    IGL_LOG_ERROR("  Vertex or fragment module is null!\n");
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Vertex and fragment shaders required");
+    return nullptr;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("  Getting shader bytecode...\n");
+  // Get shader bytecode first
+  const auto& vsBytecode = vertexModule->getBytecode();
+  const auto& psBytecode = fragmentModule->getBytecode();
+  IGL_D3D12_LOG_VERBOSE("  VS bytecode: %zu bytes, PS bytecode: %zu bytes\n", vsBytecode.size(), psBytecode.size());
+
+  // Extract shader reflection info for dynamic root signature creation
+  const auto& vsReflectionInfo = vertexModule->getReflectionInfo();
+  const auto& psReflectionInfo = fragmentModule->getReflectionInfo();
+
+  // Create root signature key from shader reflection
+  D3D12RootSignatureKey rootSigKey = D3D12RootSignatureKey::fromShaderReflection(&vsReflectionInfo, &psReflectionInfo);
+
+  // Query resource binding tier for descriptor range bounds
+  const D3D12_RESOURCE_BINDING_TIER bindingTier = ctx_->getResourceBindingTier();
+  IGL_D3D12_LOG_VERBOSE("  Resource binding tier: %u\n", bindingTier);
+
+  // Create root signature dynamically based on shader requirements
+  igl::d3d12::ComPtr<ID3D12RootSignature> rootSignature =
+      pipelineCache_.createRootSignatureFromKey(ctx_->getDevice(), rootSigKey, bindingTier, outResult);
+  if (!rootSignature.Get()) {
+    return nullptr;
+  }
+
+  // Create PSO - zero-initialize all fields
+  D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
+  psoDesc.pRootSignature = rootSignature.Get();
+
+  // Shader bytecode
+  psoDesc.VS = {vsBytecode.data(), vsBytecode.size()};
+  psoDesc.PS = {psBytecode.data(), psBytecode.size()};
+  // Explicitly zero unused shader stages
+  psoDesc.DS = {nullptr, 0};
+  psoDesc.HS = {nullptr, 0};
+  psoDesc.GS = {nullptr, 0};
+
+  // Rasterizer state - configure based on pipeline descriptor
+  // Fill mode (solid vs wireframe)
+  psoDesc.RasterizerState.FillMode = (desc.polygonFillMode == PolygonFillMode::Line)
+      ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID;
+
+  // Cull mode configuration
+  switch (desc.cullMode) {
+    case CullMode::Back:
+      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
+      break;
+    case CullMode::Front:
+      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_FRONT;
+      break;
+    case CullMode::Disabled:
+    default:
+      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
+      break;
+  }
+
+  // Front face winding order
+  psoDesc.RasterizerState.FrontCounterClockwise =
+      (desc.frontFaceWinding == WindingMode::CounterClockwise) ? TRUE : FALSE;
+
+  // Depth bias (polygon offset) - baseline values set in PSO
+  // Note: IGL doesn't currently expose depth bias in RenderPipelineDesc
+  // Applications can dynamically adjust depth bias via RenderCommandEncoder::setDepthBias()
+  // These PSO values serve as the baseline which can be dynamically overridden
+  psoDesc.RasterizerState.DepthBias = 0;  // Integer depth bias (default: no bias)
+  psoDesc.RasterizerState.DepthBiasClamp = 0.0f;  // Max depth bias value (default: no clamp)
+  psoDesc.RasterizerState.SlopeScaledDepthBias = 0.0f;  // Slope-scaled bias for angled surfaces
+
+  psoDesc.RasterizerState.DepthClipEnable = TRUE;  // Enable depth clipping
+  psoDesc.RasterizerState.MultisampleEnable = (desc.sampleCount > 1) ? TRUE : FALSE;
+  psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;
+  psoDesc.RasterizerState.ForcedSampleCount = 0;
+  psoDesc.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
+
+  // Blend state - configure per render target based on pipeline descriptor
+  psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
+  const size_t numColorAttachments = desc.targetDesc.colorAttachments.size();
+  psoDesc.BlendState.IndependentBlendEnable = numColorAttachments > 1 ? TRUE : FALSE;
+
+  // Helper to convert IGL blend factor to D3D12
+  auto toD3D12Blend = [](BlendFactor f) {
+    switch (f) {
+      case BlendFactor::Zero: return D3D12_BLEND_ZERO;
+      case BlendFactor::One: return D3D12_BLEND_ONE;
+      case BlendFactor::SrcColor: return D3D12_BLEND_SRC_COLOR;
+      case BlendFactor::OneMinusSrcColor: return D3D12_BLEND_INV_SRC_COLOR;
+      case BlendFactor::SrcAlpha: return D3D12_BLEND_SRC_ALPHA;
+      case BlendFactor::OneMinusSrcAlpha: return D3D12_BLEND_INV_SRC_ALPHA;
+      case BlendFactor::DstColor: return D3D12_BLEND_DEST_COLOR;
+      case BlendFactor::OneMinusDstColor: return D3D12_BLEND_INV_DEST_COLOR;
+      case BlendFactor::DstAlpha: return D3D12_BLEND_DEST_ALPHA;
+      case BlendFactor::OneMinusDstAlpha: return D3D12_BLEND_INV_DEST_ALPHA;
+      case BlendFactor::SrcAlphaSaturated: return D3D12_BLEND_SRC_ALPHA_SAT;
+      case BlendFactor::BlendColor: return D3D12_BLEND_BLEND_FACTOR;
+      case BlendFactor::OneMinusBlendColor: return D3D12_BLEND_INV_BLEND_FACTOR;
+      case BlendFactor::BlendAlpha: return D3D12_BLEND_BLEND_FACTOR; // D3D12 uses same constant for RGB and Alpha
+      case BlendFactor::OneMinusBlendAlpha: return D3D12_BLEND_INV_BLEND_FACTOR; // D3D12 uses same constant for RGB and Alpha
+      case BlendFactor::Src1Color: return D3D12_BLEND_SRC1_COLOR; // Dual-source blending
+      case BlendFactor::OneMinusSrc1Color: return D3D12_BLEND_INV_SRC1_COLOR; // Dual-source blending
+      case BlendFactor::Src1Alpha: return D3D12_BLEND_SRC1_ALPHA; // Dual-source blending
+      case BlendFactor::OneMinusSrc1Alpha: return D3D12_BLEND_INV_SRC1_ALPHA; // Dual-source blending
+      default: return D3D12_BLEND_ONE;
+    }
+  };
+
+  auto toD3D12BlendOp = [](BlendOp op) {
+    switch (op) {
+      case BlendOp::Add: return D3D12_BLEND_OP_ADD;
+      case BlendOp::Subtract: return D3D12_BLEND_OP_SUBTRACT;
+      case BlendOp::ReverseSubtract: return D3D12_BLEND_OP_REV_SUBTRACT;
+      case BlendOp::Min: return D3D12_BLEND_OP_MIN;
+      case BlendOp::Max: return D3D12_BLEND_OP_MAX;
+      default: return D3D12_BLEND_OP_ADD;
+    }
+  };
+
+  for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) {
+    if (i < desc.targetDesc.colorAttachments.size()) {
+      const auto& att = desc.targetDesc.colorAttachments[i];
+      psoDesc.BlendState.RenderTarget[i].BlendEnable = att.blendEnabled ? TRUE : FALSE;
+      psoDesc.BlendState.RenderTarget[i].SrcBlend = toD3D12Blend(att.srcRGBBlendFactor);
+      psoDesc.BlendState.RenderTarget[i].DestBlend = toD3D12Blend(att.dstRGBBlendFactor);
+      psoDesc.BlendState.RenderTarget[i].BlendOp = toD3D12BlendOp(att.rgbBlendOp);
+      psoDesc.BlendState.RenderTarget[i].SrcBlendAlpha = toD3D12Blend(att.srcAlphaBlendFactor);
+      psoDesc.BlendState.RenderTarget[i].DestBlendAlpha = toD3D12Blend(att.dstAlphaBlendFactor);
+      psoDesc.BlendState.RenderTarget[i].BlendOpAlpha = toD3D12BlendOp(att.alphaBlendOp);
+
+      // Convert IGL color write mask to D3D12
+      UINT8 writeMask = 0;
+      if (att.colorWriteMask & igl::kColorWriteBitsRed) {
+        writeMask |= D3D12_COLOR_WRITE_ENABLE_RED;
+      }
+      if (att.colorWriteMask & igl::kColorWriteBitsGreen) {
+        writeMask |= D3D12_COLOR_WRITE_ENABLE_GREEN;
+      }
+      if (att.colorWriteMask & igl::kColorWriteBitsBlue) {
+        writeMask |= D3D12_COLOR_WRITE_ENABLE_BLUE;
+      }
+      if (att.colorWriteMask & igl::kColorWriteBitsAlpha) {
+        writeMask |= D3D12_COLOR_WRITE_ENABLE_ALPHA;
+      }
+      psoDesc.BlendState.RenderTarget[i].RenderTargetWriteMask = writeMask;
+
+      IGL_D3D12_LOG_VERBOSE("  PSO RenderTarget[%u]: BlendEnable=%d, SrcBlend=%d, DstBlend=%d, WriteMask=0x%02X\n",
+                   i, att.blendEnabled, psoDesc.BlendState.RenderTarget[i].SrcBlend,
+                   psoDesc.BlendState.RenderTarget[i].DestBlend, writeMask);
+    } else {
+      // Default blend state for unused render targets
+      psoDesc.BlendState.RenderTarget[i].BlendEnable = FALSE;
+      psoDesc.BlendState.RenderTarget[i].SrcBlend = D3D12_BLEND_ONE;
+      psoDesc.BlendState.RenderTarget[i].DestBlend = D3D12_BLEND_ZERO;
+      psoDesc.BlendState.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD;
+      psoDesc.BlendState.RenderTarget[i].SrcBlendAlpha = D3D12_BLEND_ONE;
+      psoDesc.BlendState.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_ZERO;
+      psoDesc.BlendState.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD;
+      psoDesc.BlendState.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
+    }
+    // Logic operations support (bitwise blend operations)
+    // Query hardware support for logic operations
+    // Note: LogicOp is currently disabled as IGL doesn't expose logic operation settings in RenderPipelineDesc
+    // To enable in the future:
+    // 1. Add LogicOp enum and logicOpEnabled/logicOp fields to RenderPipelineDesc::ColorAttachment
+    // 2. Query D3D12_FEATURE_D3D12_OPTIONS.OutputMergerLogicOp at device initialization
+    // 3. Set LogicOpEnable = TRUE and LogicOp = convertLogicOp(att.logicOp) when enabled
+    psoDesc.BlendState.RenderTarget[i].LogicOpEnable = FALSE;
+    psoDesc.BlendState.RenderTarget[i].LogicOp = D3D12_LOGIC_OP_NOOP;
+  }
+
+  // Helper to convert IGL stencil operation to D3D12
+  auto toD3D12StencilOp = [](StencilOperation op) {
+    switch (op) {
+      case StencilOperation::Keep: return D3D12_STENCIL_OP_KEEP;
+      case StencilOperation::Zero: return D3D12_STENCIL_OP_ZERO;
+      case StencilOperation::Replace: return D3D12_STENCIL_OP_REPLACE;
+      case StencilOperation::IncrementClamp: return D3D12_STENCIL_OP_INCR_SAT;
+      case StencilOperation::DecrementClamp: return D3D12_STENCIL_OP_DECR_SAT;
+      case StencilOperation::Invert: return D3D12_STENCIL_OP_INVERT;
+      case StencilOperation::IncrementWrap: return D3D12_STENCIL_OP_INCR;
+      case StencilOperation::DecrementWrap: return D3D12_STENCIL_OP_DECR;
+      default: return D3D12_STENCIL_OP_KEEP;
+    }
+  };
+
+  // Helper to convert IGL compare function to D3D12
+  auto toD3D12CompareFunc = [](CompareFunction func) {
+    switch (func) {
+      case CompareFunction::Never: return D3D12_COMPARISON_FUNC_NEVER;
+      case CompareFunction::Less: return D3D12_COMPARISON_FUNC_LESS;
+      case CompareFunction::Equal: return D3D12_COMPARISON_FUNC_EQUAL;
+      case CompareFunction::LessEqual: return D3D12_COMPARISON_FUNC_LESS_EQUAL;
+      case CompareFunction::Greater: return D3D12_COMPARISON_FUNC_GREATER;
+      case CompareFunction::NotEqual: return D3D12_COMPARISON_FUNC_NOT_EQUAL;
+      case CompareFunction::GreaterEqual: return D3D12_COMPARISON_FUNC_GREATER_EQUAL;
+      case CompareFunction::AlwaysPass: return D3D12_COMPARISON_FUNC_ALWAYS;
+      default: return D3D12_COMPARISON_FUNC_LESS;
+    }
+  };
+
+  // Depth stencil state - check if we have a depth or stencil attachment
+  const bool hasDepth = (desc.targetDesc.depthAttachmentFormat != TextureFormat::Invalid);
+  const bool hasStencil = (desc.targetDesc.stencilAttachmentFormat != TextureFormat::Invalid);
+
+  if (hasDepth) {
+    psoDesc.DepthStencilState.DepthEnable = TRUE;
+    psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
+    // Use LESS_EQUAL to allow Z=0 to pass when depth buffer is cleared to 0
+    psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
+  } else {
+    psoDesc.DepthStencilState.DepthEnable = FALSE;
+    psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
+    psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+  }
+
+  // Configure stencil state (can be used with or without depth)
+  if (hasStencil) {
+    // Note: In D3D12/IGL, stencil state is configured via DepthStencilState binding
+    // For now, we set up basic stencil configuration in the PSO
+    // Default: stencil disabled unless explicitly configured by DepthStencilState
+    psoDesc.DepthStencilState.StencilEnable = FALSE;
+    psoDesc.DepthStencilState.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK;
+    psoDesc.DepthStencilState.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK;
+
+    // Front face stencil operations (defaults)
+    psoDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP;
+    psoDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
+    psoDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP;
+    psoDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+
+    // Back face stencil operations (defaults, same as front)
+    psoDesc.DepthStencilState.BackFace = psoDesc.DepthStencilState.FrontFace;
+
+    IGL_D3D12_LOG_VERBOSE("  PSO Stencil configured: format=%d\n", (int)desc.targetDesc.stencilAttachmentFormat);
+  } else {
+    psoDesc.DepthStencilState.StencilEnable = FALSE;
+  }
+
+  // Render target formats: support multiple render targets (MRT)
+  if (!desc.targetDesc.colorAttachments.empty()) {
+    const UINT n = static_cast<UINT>(std::min<size_t>(desc.targetDesc.colorAttachments.size(), D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT));
+    psoDesc.NumRenderTargets = n;
+    IGL_D3D12_LOG_VERBOSE("  PSO NumRenderTargets = %u (color attachments = %zu)\n", n, desc.targetDesc.colorAttachments.size());
+    for (UINT i = 0; i < n; ++i) {
+      // CRITICAL: Extract value to avoid MSVC debug iterator bounds check in function call
+      const auto textureFormat = desc.targetDesc.colorAttachments[i].textureFormat;
+      psoDesc.RTVFormats[i] = textureFormatToDXGIFormat(textureFormat);
+      IGL_D3D12_LOG_VERBOSE("  PSO RTVFormats[%u] = %d (IGL format %d)\n", i, psoDesc.RTVFormats[i], textureFormat);
+    }
+  } else {
+    psoDesc.NumRenderTargets = 0;
+    IGL_D3D12_LOG_VERBOSE("  PSO NumRenderTargets = 0 (no color attachments)\n");
+    for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) {
+      psoDesc.RTVFormats[i] = DXGI_FORMAT_UNKNOWN;
+    }
+  }
+  if (desc.targetDesc.depthAttachmentFormat != TextureFormat::Invalid) {
+    psoDesc.DSVFormat = textureFormatToDXGIFormat(desc.targetDesc.depthAttachmentFormat);
+  } else {
+    psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;
+  }
+
+  // Sample settings
+  psoDesc.SampleMask = UINT_MAX;
+  psoDesc.SampleDesc.Count = 1;
+  psoDesc.SampleDesc.Quality = 0;  // Must be 0 for Count=1
+
+  // Primitive topology - convert from IGL topology enum
+  if (desc.topology == igl::PrimitiveType::Point) {
+    psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
+    IGL_D3D12_LOG_VERBOSE("  Setting PSO topology type to POINT\n");
+  } else if (desc.topology == igl::PrimitiveType::Line ||
+             desc.topology == igl::PrimitiveType::LineStrip) {
+    psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
+    IGL_D3D12_LOG_VERBOSE("  Setting PSO topology type to LINE\n");
+  } else {
+    psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
+    IGL_D3D12_LOG_VERBOSE("  Setting PSO topology type to TRIANGLE\n");
+  }
+  psoDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
+
+  // Additional required fields
+  psoDesc.NodeMask = 0;  // Single GPU operation
+  psoDesc.CachedPSO.pCachedBlob = nullptr;
+  psoDesc.CachedPSO.CachedBlobSizeInBytes = 0;
+  psoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
+
+  // Input layout.
+  std::vector<D3D12_INPUT_ELEMENT_DESC> inputElements;
+  std::vector<std::string> semanticNames; // Keep semantic name strings alive
+
+  if (desc.vertexInputState) {
+    // Convert IGL vertex input state to D3D12 input layout
+    auto* d3d12VertexInput = static_cast<const VertexInputState*>(desc.vertexInputState.get());
+    const auto& vertexDesc = d3d12VertexInput->getDesc();
+
+    // Pre-reserve space to prevent reallocation (which would invalidate c_str() pointers)
+    semanticNames.reserve(vertexDesc.numAttributes);
+
+    IGL_D3D12_LOG_VERBOSE("  Processing vertex input state: %zu attributes\n", vertexDesc.numAttributes);
+    for (size_t i = 0; i < vertexDesc.numAttributes; ++i) {
+      const auto& attr = vertexDesc.attributes[i];
+      IGL_D3D12_LOG_VERBOSE("    Attribute %zu: name='%s', format=%d, offset=%zu, bufferIndex=%u\n",
+                   i, attr.name.c_str(), static_cast<int>(attr.format), attr.offset, attr.bufferIndex);
+
+      // Map IGL attribute names to D3D12 HLSL semantic names
+      // IMPORTANT: Semantic names must NOT end with numbers - use SemanticIndex field instead
+      std::string semanticName;
+      // Case-insensitive helpers
+      auto toLower = [](std::string s){ for (auto& c : s) c = static_cast<char>(tolower(c)); return s; };
+      const std::string nlow = toLower(attr.name);
+      auto startsWith = [&](const char* p){ return nlow.rfind(p, 0) == 0; };
+      auto contains = [&](const char* p){ return nlow.find(p) != std::string::npos; };
+
+      if (startsWith("pos") || startsWith("position") || contains("position")) {
+        semanticName = "POSITION";
+      } else if (startsWith("col") || startsWith("color")) {
+        semanticName = "COLOR";
+      } else if (startsWith("st") || startsWith("uv") || startsWith("tex") || contains("texcoord") || startsWith("offset")) {
+        semanticName = "TEXCOORD";
+      } else if (startsWith("norm") || startsWith("normal")) {
+        semanticName = "NORMAL";
+      } else if (startsWith("tangent")) {
+        semanticName = "TANGENT";
+      } else {
+        // Fallback: POSITION for first attribute, TEXCOORD for second, COLOR otherwise
+        if (i == 0) semanticName = "POSITION";
+        else if (i == 1) semanticName = "TEXCOORD";
+        else semanticName = "COLOR";
+      }
+      semanticNames.push_back(semanticName);
+      IGL_D3D12_LOG_VERBOSE("      Mapped '%s' -> '%s'\n", attr.name.c_str(), semanticName.c_str());
+
+      D3D12_INPUT_ELEMENT_DESC element = {};
+      element.SemanticName = semanticNames.back().c_str();
+      element.SemanticIndex = 0;
+      element.AlignedByteOffset = static_cast<UINT>(attr.offset);
+      element.InputSlot = attr.bufferIndex;
+      // Check if this buffer binding uses per-instance data
+      // Note: inputBindings array may be sparse (bufferIndex >= numInputBindings), so check bounds with MAX
+      const bool isInstanceData = (attr.bufferIndex < IGL_BUFFER_BINDINGS_MAX &&
+                                    vertexDesc.inputBindings[attr.bufferIndex].sampleFunction ==
+                                        VertexSampleFunction::Instance);
+      element.InputSlotClass = isInstanceData ? D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA
+                                               : D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
+      element.InstanceDataStepRate = isInstanceData ? 1 : 0;
+      IGL_D3D12_LOG_VERBOSE("      bufferIndex=%u, isInstance=%d, sampleFunc=%d, InputSlotClass=%d, StepRate=%u\n",
+                   attr.bufferIndex, isInstanceData,
+                   (int)vertexDesc.inputBindings[attr.bufferIndex].sampleFunction,
+                   (int)element.InputSlotClass, element.InstanceDataStepRate);
+
+      // Convert IGL vertex format to DXGI format
+      switch (attr.format) {
+        case VertexAttributeFormat::Float1:
+          element.Format = DXGI_FORMAT_R32_FLOAT;
+          break;
+        case VertexAttributeFormat::Float2:
+          element.Format = DXGI_FORMAT_R32G32_FLOAT;
+          break;
+        case VertexAttributeFormat::Float3:
+          element.Format = DXGI_FORMAT_R32G32B32_FLOAT;
+          break;
+        case VertexAttributeFormat::Float4:
+          element.Format = DXGI_FORMAT_R32G32B32A32_FLOAT;
+          break;
+        case VertexAttributeFormat::Byte1:
+          element.Format = DXGI_FORMAT_R8_UINT;
+          break;
+        case VertexAttributeFormat::Byte2:
+          element.Format = DXGI_FORMAT_R8G8_UINT;
+          break;
+        case VertexAttributeFormat::Byte4:
+          element.Format = DXGI_FORMAT_R8G8B8A8_UINT;
+          break;
+        case VertexAttributeFormat::UByte4Norm:
+          element.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+          break;
+        case VertexAttributeFormat::HalfFloat1:
+          element.Format = DXGI_FORMAT_R16_FLOAT;
+          break;
+        case VertexAttributeFormat::HalfFloat2:
+          element.Format = DXGI_FORMAT_R16G16_FLOAT;
+          break;
+        case VertexAttributeFormat::HalfFloat3:
+          // D3D12 doesn't have RGB16_FLOAT, use RGBA16_FLOAT
+          element.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;
+          break;
+        case VertexAttributeFormat::HalfFloat4:
+          element.Format = DXGI_FORMAT_R16G16B16A16_FLOAT;
+          break;
+        case VertexAttributeFormat::Int1:
+          element.Format = DXGI_FORMAT_R32_SINT;
+          break;
+        case VertexAttributeFormat::Int2:
+          element.Format = DXGI_FORMAT_R32G32_SINT;
+          break;
+        case VertexAttributeFormat::Int3:
+          element.Format = DXGI_FORMAT_R32G32B32_SINT;
+          break;
+        case VertexAttributeFormat::Int4:
+          element.Format = DXGI_FORMAT_R32G32B32A32_SINT;
+          break;
+        case VertexAttributeFormat::UInt1:
+          element.Format = DXGI_FORMAT_R32_UINT;
+          break;
+        case VertexAttributeFormat::UInt2:
+          element.Format = DXGI_FORMAT_R32G32_UINT;
+          break;
+        case VertexAttributeFormat::UInt3:
+          element.Format = DXGI_FORMAT_R32G32B32_UINT;
+          break;
+        case VertexAttributeFormat::UInt4:
+          element.Format = DXGI_FORMAT_R32G32B32A32_UINT;
+          break;
+        case VertexAttributeFormat::Int_2_10_10_10_REV:
+          // Use an unsigned 10:10:10:2 format and decode SNORM manually in the shader.
+          element.Format = DXGI_FORMAT_R10G10B10A2_UINT;
+          break;
+        default:
+          element.Format = DXGI_FORMAT_R32G32B32A32_FLOAT; // fallback
+          IGL_LOG_ERROR("  Unsupported vertex attribute format: %d (using fallback RGBA32_FLOAT)\n", static_cast<int>(attr.format));
+          break;
+      }
+
+      inputElements.push_back(element);
+    }
+  } else {
+    // Default simple triangle layout: position (float3) + color (float4)
+    inputElements.resize(2);
+    inputElements[0] = {"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0,
+                        D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0};
+    inputElements[1] = {"COLOR", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 12,
+                        D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA, 0};
+  }
+  psoDesc.InputLayout = {inputElements.data(), static_cast<UINT>(inputElements.size())};
+
+  IGL_D3D12_LOG_VERBOSE("  Final input layout: %u elements\n", static_cast<unsigned>(inputElements.size()));
+  for (size_t i = 0; i < inputElements.size(); ++i) {
+    IGL_D3D12_LOG_VERBOSE("    [%zu]: %s (index %u), format %d, slot %u, offset %u\n",
+                 i, inputElements[i].SemanticName, inputElements[i].SemanticIndex,
+                 static_cast<int>(inputElements[i].Format),
+                 inputElements[i].InputSlot, inputElements[i].AlignedByteOffset);
+  }
+
+  // Use shader reflection to verify input signature matches input layout
+  IGL_D3D12_LOG_VERBOSE("  Reflecting vertex shader to verify input signature...\n");
+  igl::d3d12::ComPtr<ID3D12ShaderReflection> vsReflection;
+  HRESULT hr = D3DReflect(vsBytecode.data(), vsBytecode.size(), IID_PPV_ARGS(vsReflection.GetAddressOf()));
+  if (SUCCEEDED(hr)) {
+    D3D12_SHADER_DESC shaderDesc = {};
+    vsReflection->GetDesc(&shaderDesc);
+    IGL_D3D12_LOG_VERBOSE("    Shader expects %u input parameters:\n", shaderDesc.InputParameters);
+    for (UINT i = 0; i < shaderDesc.InputParameters; ++i) {
+      D3D12_SIGNATURE_PARAMETER_DESC paramDesc = {};
+      vsReflection->GetInputParameterDesc(i, &paramDesc);
+      IGL_D3D12_LOG_VERBOSE("      [%u]: %s%u (semantic index %u), mask 0x%02X\n",
+                   i, paramDesc.SemanticName, paramDesc.SemanticIndex,
+                   paramDesc.SemanticIndex, paramDesc.Mask);
+    }
+  } else {
+    IGL_D3D12_LOG_VERBOSE("    Shader reflection unavailable: 0x%08X (non-critical - pipeline will still be created)\n", static_cast<unsigned>(hr));
+  }
+
+  // PSO cache lookup (thread-safe with double-checked locking).
+  const size_t psoHash = pipelineCache_.hashRenderPipelineDesc(desc);
+  igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState;
+
+  // First check: Lock for cache lookup
+  {
+    std::lock_guard<std::mutex> lock(pipelineCache_.psoCacheMutex_);
+    auto psoIt = pipelineCache_.graphicsPSOCache_.find(psoHash);
+    if (psoIt != pipelineCache_.graphicsPSOCache_.end()) {
+      // Cache hit - reuse existing PSO
+      pipelineCache_.graphicsPSOCacheHits_++;
+      pipelineState = psoIt->second;  // Assignment creates a ref-counted copy
+      IGL_D3D12_LOG_VERBOSE("  [PSO CACHE HIT] Hash=0x%zx, hits=%zu, misses=%zu, hit rate=%.1f%%\n",
+                   psoHash,
+                   pipelineCache_.graphicsPSOCacheHits_,
+                   pipelineCache_.graphicsPSOCacheMisses_,
+                   100.0 * pipelineCache_.graphicsPSOCacheHits_ /
+                       (pipelineCache_.graphicsPSOCacheHits_ +
+                        pipelineCache_.graphicsPSOCacheMisses_));
+      IGL_D3D12_LOG_VERBOSE("Device::createRenderPipeline() SUCCESS (CACHED) - PSO=%p, RootSig=%p\n",
+                   pipelineState.Get(), rootSignature.Get());
+      Result::setOk(outResult);
+      // Create a copy of the root signature for the returned object
+      igl::d3d12::ComPtr<ID3D12RootSignature> rootSigCopy = rootSignature;
+      auto renderPipeline = std::make_shared<RenderPipelineState>(desc, std::move(pipelineState), std::move(rootSigCopy));
+
+      // Compute root parameter layout from shader reflection key (same as cache miss path)
+      UINT paramIndex = 0;
+
+      if (rootSigKey.hasPushConstants) {
+        renderPipeline->shaderReflection_.pushConstantRootParamIndex = paramIndex++;
+      }
+
+      if (!rootSigKey.usedCBVSlots.empty()) {
+        renderPipeline->rootParamLayout_.cbvTableIndex = paramIndex++;
+        renderPipeline->rootParamLayout_.cbvDescriptorCount = rootSigKey.maxCBVSlot + 1;
+      }
+
+      if (!rootSigKey.usedSRVSlots.empty()) {
+        renderPipeline->rootParamLayout_.srvTableIndex = paramIndex++;
+        renderPipeline->rootParamLayout_.srvDescriptorCount = rootSigKey.maxSRVSlot + 1;
+      }
+
+      if (!rootSigKey.usedSamplerSlots.empty()) {
+        renderPipeline->rootParamLayout_.samplerTableIndex = paramIndex++;
+        renderPipeline->rootParamLayout_.samplerDescriptorCount = rootSigKey.maxSamplerSlot + 1;
+      }
+
+      if (!rootSigKey.usedUAVSlots.empty()) {
+        renderPipeline->rootParamLayout_.uavTableIndex = paramIndex++;
+        renderPipeline->rootParamLayout_.uavDescriptorCount = rootSigKey.maxUAVSlot + 1;
+      }
+
+      return renderPipeline;
+    }
+  }
+
+  // Cache miss - create new PSO outside lock (expensive operation)
+  IGL_D3D12_LOG_VERBOSE("  [PSO CACHE MISS] Hash=0x%zx\n", psoHash);
+
+  IGL_D3D12_LOG_VERBOSE("  Creating pipeline state (this may take a moment)...\n");
+
+  // Optional: a more detailed validation pass (validateShaderBindingsAndLayout) can be
+  // re-enabled here if needed for diagnostics. It was previously wired to shader reflection
+  // and emitted verbose logs on every cache miss; for normal runs we rely on the D3D12
+  // debug layer instead.
+
+  hr = device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(pipelineState.GetAddressOf()));
+    if (FAILED(hr)) {
+      // Dump D3D12 + DXGI debug messages if available so that any invalid
+      // PSO configuration (bytecode/root signature/RT formats) is visible.
+      logInfoQueuesForDevice(device, "CreateGraphicsPipelineState");
+
+    char errorMsg[512];
+    snprintf(errorMsg,
+             sizeof(errorMsg),
+             "Failed to create pipeline state. HRESULT: 0x%08X\n"
+             "  VS size: %zu, PS size: %zu\n"
+             "  Input elements: %u\n"
+             "  NumRenderTargets: %u, RTV[0]: %d, DSV: %d\n"
+             "  SampleDesc: Count=%u, Quality=%u\n"
+             "  PrimitiveTopologyType: %d\n",
+             static_cast<unsigned>(hr),
+             psoDesc.VS.BytecodeLength,
+             psoDesc.PS.BytecodeLength,
+             psoDesc.InputLayout.NumElements,
+             psoDesc.NumRenderTargets,
+             static_cast<int>(psoDesc.RTVFormats[0]),
+             static_cast<int>(psoDesc.DSVFormat),
+             psoDesc.SampleDesc.Count,
+             psoDesc.SampleDesc.Quality,
+             static_cast<int>(psoDesc.PrimitiveTopologyType));
+    IGL_LOG_ERROR(errorMsg);
+    Result::setResult(outResult, Result::Code::RuntimeError, errorMsg);
+    return nullptr;
+  }
+
+  // E-011: Set debug name on PSO for better debugging in PIX/RenderDoc
+  std::string psoName;
+  if (desc.shaderStages->getVertexModule()) {
+    psoName += desc.shaderStages->getVertexModule()->info().debugName;
+  }
+  if (desc.shaderStages->getFragmentModule()) {
+    if (!psoName.empty()) {
+      psoName += " + ";
+    }
+    psoName += desc.shaderStages->getFragmentModule()->info().debugName;
+  }
+  if (!psoName.empty()) {
+    // Convert to wide string for D3D12 SetName API
+    std::wstring wideName(psoName.begin(), psoName.end());
+    pipelineState->SetName(wideName.c_str());
+    IGL_D3D12_LOG_VERBOSE("  Set PSO debug name: %s\n", psoName.c_str());
+  }
+
+  // Second check: Lock for cache insertion with double-check.
+  // Another thread may have created the PSO while we were creating ours
+  {
+    std::lock_guard<std::mutex> lock(pipelineCache_.psoCacheMutex_);
+    auto psoIt = pipelineCache_.graphicsPSOCache_.find(psoHash);
+    if (psoIt != pipelineCache_.graphicsPSOCache_.end()) {
+      // Another thread beat us to it - use their PSO
+      pipelineCache_.graphicsPSOCacheHits_++;
+      pipelineState = psoIt->second;
+      IGL_D3D12_LOG_VERBOSE("  [PSO DOUBLE-CHECK HIT] Another thread created PSO, using theirs. Hash=0x%zx\n", psoHash);
+    } else {
+      // We're the first to complete - cache our PSO
+      pipelineCache_.graphicsPSOCacheMisses_++;
+      pipelineCache_.graphicsPSOCache_[psoHash] = pipelineState;
+      IGL_D3D12_LOG_VERBOSE("  [PSO CACHED] Hash=0x%zx, hits=%zu, misses=%zu\n",
+                   psoHash,
+                   pipelineCache_.graphicsPSOCacheHits_,
+                   pipelineCache_.graphicsPSOCacheMisses_);
+    }
+  }
+
+  IGL_D3D12_LOG_VERBOSE("Device::createRenderPipeline() SUCCESS - PSO=%p, RootSig=%p (hash=0x%zx)\n",
+               pipelineState.Get(), rootSignature.Get(), psoHash);
+
+  // Create the pipeline state object
+  auto renderPipeline = std::make_shared<RenderPipelineState>(desc, std::move(pipelineState), std::move(rootSignature));
+
+  // Compute root parameter layout from shader reflection key
+  // The layout order matches createRootSignatureFromKey():
+  //   1. Push constants (if present)
+  //   2. CBV table (if shader uses CBVs)
+  //   3. SRV table (if shader uses SRVs)
+  //   4. Sampler table (if shader uses samplers)
+  //   5. UAV table (if shader uses UAVs)
+  UINT paramIndex = 0;
+
+  if (rootSigKey.hasPushConstants) {
+    renderPipeline->shaderReflection_.pushConstantRootParamIndex = paramIndex++;
+  }
+
+  if (!rootSigKey.usedCBVSlots.empty()) {
+    renderPipeline->rootParamLayout_.cbvTableIndex = paramIndex++;
+    renderPipeline->rootParamLayout_.cbvDescriptorCount = rootSigKey.maxCBVSlot + 1;
+  }
+
+  if (!rootSigKey.usedSRVSlots.empty()) {
+    renderPipeline->rootParamLayout_.srvTableIndex = paramIndex++;
+    renderPipeline->rootParamLayout_.srvDescriptorCount = rootSigKey.maxSRVSlot + 1;
+  }
+
+  if (!rootSigKey.usedSamplerSlots.empty()) {
+    renderPipeline->rootParamLayout_.samplerTableIndex = paramIndex++;
+    renderPipeline->rootParamLayout_.samplerDescriptorCount = rootSigKey.maxSamplerSlot + 1;
+  }
+
+  if (!rootSigKey.usedUAVSlots.empty()) {
+    renderPipeline->rootParamLayout_.uavTableIndex = paramIndex++;
+    renderPipeline->rootParamLayout_.uavDescriptorCount = rootSigKey.maxUAVSlot + 1;
+  }
+
+  Result::setOk(outResult);
+  return renderPipeline;
+}
+
+// D3D12-specific: Create PSO variant with substituted render target formats
+// This is called by RenderPipelineState::getPipelineState() for Vulkan-style dynamic PSO selection
+igl::d3d12::ComPtr<ID3D12PipelineState> Device::createPipelineStateVariant(
+    const RenderPipelineDesc& desc,
+    ID3D12RootSignature* rootSignature,
+    Result* IGL_NULLABLE outResult) const {
+  IGL_D3D12_LOG_VERBOSE("Device::createPipelineStateVariant() - Creating PSO variant for framebuffer formats\n");
+
+  auto* device = ctx_->getDevice();
+  if (!device || !rootSignature) {
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Invalid device or root signature");
+    return nullptr;
+  }
+
+  if (!desc.shaderStages) {
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader stages required");
+    return nullptr;
+  }
+
+  // Get shader bytecode
+  auto* vertexModule = static_cast<const ShaderModule*>(desc.shaderStages->getVertexModule().get());
+  auto* fragmentModule = static_cast<const ShaderModule*>(desc.shaderStages->getFragmentModule().get());
+
+  if (!vertexModule || !fragmentModule) {
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Vertex and fragment shaders required");
+    return nullptr;
+  }
+
+  const auto& vsBytecode = vertexModule->getBytecode();
+  const auto& psBytecode = fragmentModule->getBytecode();
+
+  // Build D3D12_GRAPHICS_PIPELINE_STATE_DESC from RenderPipelineDesc
+  // This mirrors the logic in createRenderPipeline() but without caching
+  D3D12_GRAPHICS_PIPELINE_STATE_DESC psoDesc = {};
+  psoDesc.pRootSignature = rootSignature;
+
+  // Shader bytecode
+  psoDesc.VS = {vsBytecode.data(), vsBytecode.size()};
+  psoDesc.PS = {psBytecode.data(), psBytecode.size()};
+  psoDesc.DS = {nullptr, 0};
+  psoDesc.HS = {nullptr, 0};
+  psoDesc.GS = {nullptr, 0};
+
+  // Rasterizer state
+  psoDesc.RasterizerState.FillMode = (desc.polygonFillMode == PolygonFillMode::Line)
+      ? D3D12_FILL_MODE_WIREFRAME : D3D12_FILL_MODE_SOLID;
+
+  switch (desc.cullMode) {
+    case CullMode::Back:
+      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_BACK;
+      break;
+    case CullMode::Front:
+      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_FRONT;
+      break;
+    case CullMode::Disabled:
+    default:
+      psoDesc.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
+      break;
+  }
+
+  psoDesc.RasterizerState.FrontCounterClockwise =
+      (desc.frontFaceWinding == WindingMode::CounterClockwise) ? TRUE : FALSE;
+  psoDesc.RasterizerState.DepthBias = 0;
+  psoDesc.RasterizerState.DepthBiasClamp = 0.0f;
+  psoDesc.RasterizerState.SlopeScaledDepthBias = 0.0f;
+  psoDesc.RasterizerState.DepthClipEnable = TRUE;
+  psoDesc.RasterizerState.MultisampleEnable = (desc.sampleCount > 1) ? TRUE : FALSE;
+  psoDesc.RasterizerState.AntialiasedLineEnable = FALSE;
+  psoDesc.RasterizerState.ForcedSampleCount = 0;
+  psoDesc.RasterizerState.ConservativeRaster = D3D12_CONSERVATIVE_RASTERIZATION_MODE_OFF;
+
+  // Blend state
+  psoDesc.BlendState.AlphaToCoverageEnable = FALSE;
+  const size_t numColorAttachments = desc.targetDesc.colorAttachments.size();
+  psoDesc.BlendState.IndependentBlendEnable = numColorAttachments > 1 ? TRUE : FALSE;
+
+  auto toD3D12Blend = [](BlendFactor f) {
+    switch (f) {
+      case BlendFactor::Zero: return D3D12_BLEND_ZERO;
+      case BlendFactor::One: return D3D12_BLEND_ONE;
+      case BlendFactor::SrcColor: return D3D12_BLEND_SRC_COLOR;
+      case BlendFactor::OneMinusSrcColor: return D3D12_BLEND_INV_SRC_COLOR;
+      case BlendFactor::SrcAlpha: return D3D12_BLEND_SRC_ALPHA;
+      case BlendFactor::OneMinusSrcAlpha: return D3D12_BLEND_INV_SRC_ALPHA;
+      case BlendFactor::DstColor: return D3D12_BLEND_DEST_COLOR;
+      case BlendFactor::OneMinusDstColor: return D3D12_BLEND_INV_DEST_COLOR;
+      case BlendFactor::DstAlpha: return D3D12_BLEND_DEST_ALPHA;
+      case BlendFactor::OneMinusDstAlpha: return D3D12_BLEND_INV_DEST_ALPHA;
+      case BlendFactor::SrcAlphaSaturated: return D3D12_BLEND_SRC_ALPHA_SAT;
+      case BlendFactor::BlendColor: return D3D12_BLEND_BLEND_FACTOR;
+      case BlendFactor::OneMinusBlendColor: return D3D12_BLEND_INV_BLEND_FACTOR;
+      case BlendFactor::BlendAlpha: return D3D12_BLEND_BLEND_FACTOR;
+      case BlendFactor::OneMinusBlendAlpha: return D3D12_BLEND_INV_BLEND_FACTOR;
+      case BlendFactor::Src1Color: return D3D12_BLEND_SRC1_COLOR;
+      case BlendFactor::OneMinusSrc1Color: return D3D12_BLEND_INV_SRC1_COLOR;
+      case BlendFactor::Src1Alpha: return D3D12_BLEND_SRC1_ALPHA;
+      case BlendFactor::OneMinusSrc1Alpha: return D3D12_BLEND_INV_SRC1_ALPHA;
+      default: return D3D12_BLEND_ONE;
+    }
+  };
+
+  auto toD3D12BlendOp = [](BlendOp op) {
+    switch (op) {
+      case BlendOp::Add: return D3D12_BLEND_OP_ADD;
+      case BlendOp::Subtract: return D3D12_BLEND_OP_SUBTRACT;
+      case BlendOp::ReverseSubtract: return D3D12_BLEND_OP_REV_SUBTRACT;
+      case BlendOp::Min: return D3D12_BLEND_OP_MIN;
+      case BlendOp::Max: return D3D12_BLEND_OP_MAX;
+      default: return D3D12_BLEND_OP_ADD;
+    }
+  };
+
+  for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) {
+    if (i < desc.targetDesc.colorAttachments.size()) {
+      const auto& att = desc.targetDesc.colorAttachments[i];
+      psoDesc.BlendState.RenderTarget[i].BlendEnable = att.blendEnabled ? TRUE : FALSE;
+      psoDesc.BlendState.RenderTarget[i].SrcBlend = toD3D12Blend(att.srcRGBBlendFactor);
+      psoDesc.BlendState.RenderTarget[i].DestBlend = toD3D12Blend(att.dstRGBBlendFactor);
+      psoDesc.BlendState.RenderTarget[i].BlendOp = toD3D12BlendOp(att.rgbBlendOp);
+      psoDesc.BlendState.RenderTarget[i].SrcBlendAlpha = toD3D12Blend(att.srcAlphaBlendFactor);
+      psoDesc.BlendState.RenderTarget[i].DestBlendAlpha = toD3D12Blend(att.dstAlphaBlendFactor);
+      psoDesc.BlendState.RenderTarget[i].BlendOpAlpha = toD3D12BlendOp(att.alphaBlendOp);
+
+      UINT8 writeMask = 0;
+      if (att.colorWriteMask & igl::kColorWriteBitsRed) {
+        writeMask |= D3D12_COLOR_WRITE_ENABLE_RED;
+      }
+      if (att.colorWriteMask & igl::kColorWriteBitsGreen) {
+        writeMask |= D3D12_COLOR_WRITE_ENABLE_GREEN;
+      }
+      if (att.colorWriteMask & igl::kColorWriteBitsBlue) {
+        writeMask |= D3D12_COLOR_WRITE_ENABLE_BLUE;
+      }
+      if (att.colorWriteMask & igl::kColorWriteBitsAlpha) {
+        writeMask |= D3D12_COLOR_WRITE_ENABLE_ALPHA;
+      }
+      psoDesc.BlendState.RenderTarget[i].RenderTargetWriteMask = writeMask;
+    } else {
+      psoDesc.BlendState.RenderTarget[i].BlendEnable = FALSE;
+      psoDesc.BlendState.RenderTarget[i].SrcBlend = D3D12_BLEND_ONE;
+      psoDesc.BlendState.RenderTarget[i].DestBlend = D3D12_BLEND_ZERO;
+      psoDesc.BlendState.RenderTarget[i].BlendOp = D3D12_BLEND_OP_ADD;
+      psoDesc.BlendState.RenderTarget[i].SrcBlendAlpha = D3D12_BLEND_ONE;
+      psoDesc.BlendState.RenderTarget[i].DestBlendAlpha = D3D12_BLEND_ZERO;
+      psoDesc.BlendState.RenderTarget[i].BlendOpAlpha = D3D12_BLEND_OP_ADD;
+      psoDesc.BlendState.RenderTarget[i].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
+    }
+    psoDesc.BlendState.RenderTarget[i].LogicOpEnable = FALSE;
+    psoDesc.BlendState.RenderTarget[i].LogicOp = D3D12_LOGIC_OP_NOOP;
+  }
+
+  // Depth-stencil state
+  const bool hasDepth = (desc.targetDesc.depthAttachmentFormat != TextureFormat::Invalid);
+  const bool hasStencil = (desc.targetDesc.stencilAttachmentFormat != TextureFormat::Invalid);
+
+  if (hasDepth) {
+    psoDesc.DepthStencilState.DepthEnable = TRUE;
+    psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ALL;
+    psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_LESS_EQUAL;
+  } else {
+    psoDesc.DepthStencilState.DepthEnable = FALSE;
+    psoDesc.DepthStencilState.DepthWriteMask = D3D12_DEPTH_WRITE_MASK_ZERO;
+    psoDesc.DepthStencilState.DepthFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+  }
+
+  if (hasStencil) {
+    psoDesc.DepthStencilState.StencilEnable = FALSE;
+    psoDesc.DepthStencilState.StencilReadMask = D3D12_DEFAULT_STENCIL_READ_MASK;
+    psoDesc.DepthStencilState.StencilWriteMask = D3D12_DEFAULT_STENCIL_WRITE_MASK;
+    psoDesc.DepthStencilState.FrontFace.StencilFailOp = D3D12_STENCIL_OP_KEEP;
+    psoDesc.DepthStencilState.FrontFace.StencilDepthFailOp = D3D12_STENCIL_OP_KEEP;
+    psoDesc.DepthStencilState.FrontFace.StencilPassOp = D3D12_STENCIL_OP_KEEP;
+    psoDesc.DepthStencilState.FrontFace.StencilFunc = D3D12_COMPARISON_FUNC_ALWAYS;
+    psoDesc.DepthStencilState.BackFace = psoDesc.DepthStencilState.FrontFace;
+  } else {
+    psoDesc.DepthStencilState.StencilEnable = FALSE;
+  }
+
+  // Render target formats - use the modified formats from desc
+  if (!desc.targetDesc.colorAttachments.empty()) {
+    const UINT n = static_cast<UINT>(std::min<size_t>(desc.targetDesc.colorAttachments.size(), D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT));
+    psoDesc.NumRenderTargets = n;
+    for (UINT i = 0; i < n; ++i) {
+      const auto textureFormat = desc.targetDesc.colorAttachments[i].textureFormat;
+      psoDesc.RTVFormats[i] = textureFormatToDXGIFormat(textureFormat);
+      IGL_D3D12_LOG_VERBOSE("  PSO Variant RTVFormats[%u] = %d (IGL format %d)\n", i, psoDesc.RTVFormats[i], textureFormat);
+    }
+  } else {
+    psoDesc.NumRenderTargets = 0;
+    for (UINT i = 0; i < D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; ++i) {
+      psoDesc.RTVFormats[i] = DXGI_FORMAT_UNKNOWN;
+    }
+  }
+
+  if (desc.targetDesc.depthAttachmentFormat != TextureFormat::Invalid) {
+    psoDesc.DSVFormat = textureFormatToDXGIFormat(desc.targetDesc.depthAttachmentFormat);
+  } else {
+    psoDesc.DSVFormat = DXGI_FORMAT_UNKNOWN;
+  }
+
+  // Sample settings
+  psoDesc.SampleMask = UINT_MAX;
+  psoDesc.SampleDesc.Count = 1;
+  psoDesc.SampleDesc.Quality = 0;
+
+  // Primitive topology
+  if (desc.topology == igl::PrimitiveType::Point) {
+    psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_POINT;
+  } else if (desc.topology == igl::PrimitiveType::Line ||
+             desc.topology == igl::PrimitiveType::LineStrip) {
+    psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_LINE;
+  } else {
+    psoDesc.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
+  }
+  psoDesc.IBStripCutValue = D3D12_INDEX_BUFFER_STRIP_CUT_VALUE_DISABLED;
+
+  psoDesc.NodeMask = 0;
+  psoDesc.CachedPSO.pCachedBlob = nullptr;
+  psoDesc.CachedPSO.CachedBlobSizeInBytes = 0;
+  psoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
+
+  // Input layout
+  std::vector<D3D12_INPUT_ELEMENT_DESC> inputElements;
+  std::vector<std::string> semanticNames;
+
+  if (desc.vertexInputState) {
+    auto* d3d12VertexInput = static_cast<const VertexInputState*>(desc.vertexInputState.get());
+    const auto& vertexDesc = d3d12VertexInput->getDesc();
+    semanticNames.reserve(vertexDesc.numAttributes);
+
+    for (size_t i = 0; i < vertexDesc.numAttributes; ++i) {
+      const auto& attr = vertexDesc.attributes[i];
+      std::string semanticName;
+      auto toLower = [](std::string s){ for (auto& c : s) c = static_cast<char>(tolower(c)); return s; };
+      const std::string nlow = toLower(attr.name);
+      auto startsWith = [&](const char* p){ return nlow.rfind(p, 0) == 0; };
+      auto contains = [&](const char* p){ return nlow.find(p) != std::string::npos; };
+
+      if (startsWith("pos") || startsWith("position") || contains("position")) {
+        semanticName = "POSITION";
+      } else if (startsWith("col") || startsWith("color")) {
+        semanticName = "COLOR";
+      } else if (startsWith("st") || startsWith("uv") || startsWith("tex") || contains("texcoord") || startsWith("offset")) {
+        semanticName = "TEXCOORD";
+      } else if (startsWith("norm") || startsWith("normal")) {
+        semanticName = "NORMAL";
+      } else if (startsWith("tangent")) {
+        semanticName = "TANGENT";
+      } else {
+        if (i == 0) semanticName = "POSITION";
+        else if (i == 1) semanticName = "TEXCOORD";
+        else semanticName = "COLOR";
+      }
+      semanticNames.push_back(semanticName);
+
+      D3D12_INPUT_ELEMENT_DESC element = {};
+      element.SemanticName = semanticNames.back().c_str();
+      element.SemanticIndex = 0;
+      element.AlignedByteOffset = static_cast<UINT>(attr.offset);
+      element.InputSlot = attr.bufferIndex;
+
+      bool isPerInstance = false;
+      if (attr.bufferIndex < vertexDesc.numInputBindings) {
+        isPerInstance = (vertexDesc.inputBindings[attr.bufferIndex].sampleFunction == igl::VertexSampleFunction::Instance);
+      }
+      element.InputSlotClass = isPerInstance ? D3D12_INPUT_CLASSIFICATION_PER_INSTANCE_DATA : D3D12_INPUT_CLASSIFICATION_PER_VERTEX_DATA;
+      element.InstanceDataStepRate = isPerInstance ? 1 : 0;
+
+      auto toD3D12Format = [](VertexAttributeFormat fmt) -> DXGI_FORMAT {
+        switch (fmt) {
+          case VertexAttributeFormat::Float1: return DXGI_FORMAT_R32_FLOAT;
+          case VertexAttributeFormat::Float2: return DXGI_FORMAT_R32G32_FLOAT;
+          case VertexAttributeFormat::Float3: return DXGI_FORMAT_R32G32B32_FLOAT;
+          case VertexAttributeFormat::Float4: return DXGI_FORMAT_R32G32B32A32_FLOAT;
+          case VertexAttributeFormat::Byte1: return DXGI_FORMAT_R8_SINT;
+          case VertexAttributeFormat::Byte2: return DXGI_FORMAT_R8G8_SINT;
+          case VertexAttributeFormat::Byte4: return DXGI_FORMAT_R8G8B8A8_SINT;
+          case VertexAttributeFormat::UByte4Norm: return DXGI_FORMAT_R8G8B8A8_UNORM;
+          default: return DXGI_FORMAT_UNKNOWN;
+        }
+      };
+      element.Format = toD3D12Format(attr.format);
+      inputElements.push_back(element);
+    }
+  }
+  psoDesc.InputLayout = {inputElements.data(), static_cast<UINT>(inputElements.size())};
+
+  // Create the pipeline state
+  igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState;
+  HRESULT hr = device->CreateGraphicsPipelineState(&psoDesc, IID_PPV_ARGS(pipelineState.GetAddressOf()));
+  if (FAILED(hr)) {
+    logInfoQueuesForDevice(device, "CreateGraphicsPipelineState (variant)");
+    char errorMsg[256];
+    snprintf(errorMsg, sizeof(errorMsg),
+             "Failed to create PSO variant. HRESULT: 0x%08X, RTV[0]: %d, DSV: %d",
+             static_cast<unsigned>(hr),
+             static_cast<int>(psoDesc.RTVFormats[0]),
+             static_cast<int>(psoDesc.DSVFormat));
+    IGL_LOG_ERROR(errorMsg);
+    Result::setResult(outResult, Result::Code::RuntimeError, errorMsg);
+    return nullptr;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("Device::createPipelineStateVariant() SUCCESS - PSO=%p\n", pipelineState.Get());
+  Result::setOk(outResult);
+  return pipelineState;
+}
+
+  // Shader library and modules.
+std::unique_ptr<IShaderLibrary> Device::createShaderLibrary(const ShaderLibraryDesc& desc,
+                                                            Result* IGL_NULLABLE
+                                                                outResult) const {
+  IGL_D3D12_LOG_VERBOSE("Device::createShaderLibrary() - moduleInfo count=%zu, debugName='%s'\n",
+               desc.moduleInfo.size(), desc.debugName.c_str());
+
+  if (desc.moduleInfo.empty()) {
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "ShaderLibrary requires at least one module");
+    return nullptr;
+  }
+
+  if (!desc.input.isValid()) {
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Invalid shader library input");
+    return nullptr;
+  }
+
+  std::vector<std::shared_ptr<IShaderModule>> modules;
+  modules.reserve(desc.moduleInfo.size());
+
+  if (desc.input.type == ShaderInputType::Binary) {
+    // Binary input: share the same bytecode across all modules (Metal-style)
+    IGL_D3D12_LOG_VERBOSE("  Using binary input (%zu bytes) for all modules\n", desc.input.length);
+    std::vector<uint8_t> bytecode(desc.input.length);
+    std::memcpy(bytecode.data(), desc.input.data, desc.input.length);
+
+    for (const auto& info : desc.moduleInfo) {
+      // Create a copy of the bytecode for each module
+      std::vector<uint8_t> moduleBytecode = bytecode;
+      modules.push_back(std::make_shared<ShaderModule>(info, std::move(moduleBytecode)));
+    }
+  } else if (desc.input.type == ShaderInputType::String) {
+    // String input: compile each module separately with its own entry point
+    if (!desc.input.source || !*desc.input.source) {
+      Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader library source is empty");
+      return nullptr;
+    }
+
+    IGL_D3D12_LOG_VERBOSE("  Compiling %zu modules from string input\n", desc.moduleInfo.size());
+
+    for (const auto& info : desc.moduleInfo) {
+      // Create a ShaderModuleDesc for this specific module
+      ShaderModuleDesc moduleDesc;
+      moduleDesc.info = info;
+      moduleDesc.input.type = ShaderInputType::String;
+      moduleDesc.input.source = desc.input.source;
+      moduleDesc.input.options = desc.input.options;
+      moduleDesc.debugName = desc.debugName + "_" + info.entryPoint;
+
+      Result moduleResult;
+      auto module = createShaderModule(moduleDesc, &moduleResult);
+      if (!moduleResult.isOk()) {
+        IGL_LOG_ERROR("  Failed to compile module '%s': %s\n",
+                      info.entryPoint.c_str(), moduleResult.message.c_str());
+        Result::setResult(outResult, std::move(moduleResult));
+        return nullptr;
+      }
+      modules.push_back(std::move(module));
+    }
+  } else {
+    Result::setResult(outResult, Result::Code::Unsupported, "Unsupported shader library input type");
+    return nullptr;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("Device::createShaderLibrary() SUCCESS - created %zu modules\n", modules.size());
+  Result::setOk(outResult);
+  return std::make_unique<ShaderLibrary>(std::move(modules));
+}
+
+// Helper function: Compile HLSL shader using legacy FXC compiler (Shader Model 5.1)
+// This is a fallback when DXC is unavailable or fails
+namespace {
+Result compileShaderFXC(
+    const char* source,
+    size_t sourceLength,
+    const char* entryPoint,
+    const char* target,
+    const char* debugName,
+    UINT compileFlags,
+    std::vector<uint8_t>& outBytecode,
+    std::string& outErrors) {
+
+  IGL_D3D12_LOG_VERBOSE("FXC: Compiling shader '%s' with target '%s' (%zu bytes source)\n",
+               debugName ? debugName : "unnamed",
+               target,
+               sourceLength);
+
+  igl::d3d12::ComPtr<ID3DBlob> bytecode;
+  igl::d3d12::ComPtr<ID3DBlob> errors;
+
+  // D3DCompile is the legacy FXC compiler API
+  // It's always available on Windows 10+ (via d3dcompiler_47.dll)
+  HRESULT hr = D3DCompile(
+      source,
+      sourceLength,
+      debugName,  // Source name (for error messages)
+      nullptr,    // Defines
+      D3D_COMPILE_STANDARD_FILE_INCLUDE,
+      entryPoint,
+      target,
+      compileFlags,
+      0,          // Effect flags (not used for shaders)
+      bytecode.GetAddressOf(),
+      errors.GetAddressOf()
+  );
+
+  if (FAILED(hr)) {
+    std::string errorMsg = "FXC compilation failed";
+    if (errors.Get() && errors->GetBufferSize() > 0) {
+      outErrors = std::string(
+          static_cast<const char*>(errors->GetBufferPointer()),
+          errors->GetBufferSize()
+      );
+      errorMsg += ": " + outErrors;
+      IGL_LOG_ERROR("FXC: %s\n", outErrors.c_str());
+    }
+    return Result(Result::Code::RuntimeError, errorMsg);
+  }
+
+  // Log warnings if any
+  if (errors.Get() && errors->GetBufferSize() > 0) {
+    outErrors = std::string(
+        static_cast<const char*>(errors->GetBufferPointer()),
+        errors->GetBufferSize()
+    );
+    IGL_D3D12_LOG_VERBOSE("FXC: Compilation warnings:\n%s\n", outErrors.c_str());
+  }
+
+  // Copy bytecode to output
+  const uint8_t* data = static_cast<const uint8_t*>(bytecode->GetBufferPointer());
+  size_t size = bytecode->GetBufferSize();
+  outBytecode.assign(data, data + size);
+
+  IGL_D3D12_LOG_VERBOSE("FXC: Compilation successful (%zu bytes bytecode)\n", size);
+
+  return Result();
+}
+} // anonymous namespace
+
+// Note: getShaderTarget() helper moved to Common.h for shared use.
+
+std::shared_ptr<IShaderModule> Device::createShaderModule(const ShaderModuleDesc& desc,
+                                                          Result* IGL_NULLABLE outResult) const {
+  IGL_D3D12_LOG_VERBOSE("Device::createShaderModule() - stage=%d, entryPoint='%s', debugName='%s'\n",
+               static_cast<int>(desc.info.stage), desc.info.entryPoint.c_str(), desc.debugName.c_str());
+
+  if (!desc.input.isValid()) {
+    IGL_LOG_ERROR("  Invalid shader input!\n");
+    Result::setResult(outResult, Result::Code::ArgumentInvalid, "Invalid shader input");
+    return nullptr;
+  }
+
+  std::vector<uint8_t> bytecode;
+
+  if (desc.input.type == ShaderInputType::Binary) {
+    // Binary input - copy bytecode directly
+    IGL_D3D12_LOG_VERBOSE("  Using binary input (%zu bytes)\n", desc.input.length);
+    bytecode.resize(desc.input.length);
+    std::memcpy(bytecode.data(), desc.input.data, desc.input.length);
+  } else if (desc.input.type == ShaderInputType::String) {
+    // String input - compile HLSL at runtime using DXC (DirectX Shader Compiler)
+    // For string input, use desc.input.source (not data) and calculate length
+    if (!desc.input.source) {
+      IGL_LOG_ERROR("  Shader source is null!\n");
+      Result::setResult(outResult, Result::Code::ArgumentInvalid, "Shader source is null");
+      return nullptr;
+    }
+
+    const size_t sourceLength = strlen(desc.input.source);
+    IGL_D3D12_LOG_VERBOSE("  Compiling HLSL from string (%zu bytes) using DXC...\n", sourceLength);
+
+    // Initialize DXC compiler thread-safely using std::call_once.
+    static DXCCompiler dxcCompiler;
+    static std::once_flag dxcInitFlag;
+    static bool dxcAvailable = false;
+
+    std::call_once(dxcInitFlag, []() {
+      Result initResult = dxcCompiler.initialize();
+      dxcAvailable = initResult.isOk();
+
+      if (dxcAvailable) {
+        IGL_D3D12_LOG_VERBOSE("  DXC compiler initialized successfully (Shader Model 6.0+ support)\n");
+      } else {
+        IGL_D3D12_LOG_VERBOSE("  DXC compiler initialization failed: %s\n", initResult.message.c_str());
+        IGL_D3D12_LOG_VERBOSE("  Falling back to FXC (Shader Model 5.1)\n");
+      }
+    });
+
+    // Determine shader target based on stage
+    // Use SM 6.0 for DXC, SM 5.1 for FXC fallback
+    const char* targetDXC = nullptr;
+    const char* targetFXC = nullptr;
+    switch (desc.info.stage) {
+    case ShaderStage::Vertex:
+      targetDXC = "vs_6_0";
+      targetFXC = "vs_5_1";
+      break;
+    case ShaderStage::Fragment:
+      targetDXC = "ps_6_0";
+      targetFXC = "ps_5_1";
+      break;
+    case ShaderStage::Compute:
+      targetDXC = "cs_6_0";
+      targetFXC = "cs_5_1";
+      break;
+    default:
+      IGL_LOG_ERROR("  Unsupported shader stage!\n");
+      Result::setResult(outResult, Result::Code::ArgumentInvalid, "Unsupported shader stage");
+      return nullptr;
+    }
+
+    // Compile flags (DXC uses D3DCOMPILE_* flags)
+    UINT compileFlags = D3DCOMPILE_ENABLE_STRICTNESS;
+
+    // Enable shader debugging features
+    #ifdef _DEBUG
+      compileFlags |= D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION;
+      IGL_D3D12_LOG_VERBOSE("  DEBUG BUILD: Enabling shader debug info and disabling optimizations\n");
+    #else
+      // In release builds, still enable debug info for PIX captures unless explicitly disabled
+      const char* disableDebugInfo = std::getenv("IGL_D3D12_DISABLE_SHADER_DEBUG");
+      if (!disableDebugInfo || std::string(disableDebugInfo) != "1") {
+        compileFlags |= D3DCOMPILE_DEBUG;
+        IGL_D3D12_LOG_VERBOSE("  RELEASE BUILD: Enabling shader debug info (disable with IGL_D3D12_DISABLE_SHADER_DEBUG=1)\n");
+      }
+    #endif
+
+    // Optional: Enable warnings as errors for stricter validation
+    const char* warningsAsErrors = std::getenv("IGL_D3D12_SHADER_WARNINGS_AS_ERRORS");
+    if (warningsAsErrors && std::string(warningsAsErrors) == "1") {
+      compileFlags |= D3DCOMPILE_WARNINGS_ARE_ERRORS;
+      IGL_D3D12_LOG_VERBOSE("  Treating shader warnings as errors\n");
+    }
+
+    // Try DXC first if available, fallback to FXC if DXC fails or unavailable
+    std::string errors;
+    Result compileResult;
+    bool compiledWithDXC = false;
+
+    if (dxcAvailable) {
+      // Try DXC compilation (Shader Model 6.0)
+      IGL_D3D12_LOG_VERBOSE("  Attempting DXC compilation (Shader Model 6.0)...\n");
+      compileResult = dxcCompiler.compile(
+          desc.input.source,
+          sourceLength,
+          desc.info.entryPoint.c_str(),
+          targetDXC,
+          desc.debugName.c_str(),
+          compileFlags,
+          bytecode,
+          errors
+      );
+
+      if (compileResult.isOk()) {
+        IGL_D3D12_LOG_VERBOSE("  DXC shader compiled successfully (%zu bytes DXIL bytecode)\n", bytecode.size());
+        compiledWithDXC = true;
+      } else {
+        IGL_D3D12_LOG_VERBOSE("  DXC compilation failed: %s\n", compileResult.message.c_str());
+        if (!errors.empty()) {
+          IGL_D3D12_LOG_VERBOSE("  DXC errors: %s\n", errors.c_str());
+        }
+        IGL_D3D12_LOG_VERBOSE("  Falling back to FXC (Shader Model 5.1)...\n");
+      }
+    }
+
+    // Use FXC if DXC is unavailable or failed
+    if (!compiledWithDXC) {
+      errors.clear();
+      compileResult = compileShaderFXC(
+          desc.input.source,
+          sourceLength,
+          desc.info.entryPoint.c_str(),
+          targetFXC,
+          desc.debugName.c_str(),
+          compileFlags,
+          bytecode,
+          errors
+      );
+
+      if (!compileResult.isOk()) {
+        // Both DXC and FXC failed - report error
+        std::string errorMsg;
+        const char* stageStr = "";
+        switch (desc.info.stage) {
+          case ShaderStage::Vertex: stageStr = "VERTEX"; break;
+          case ShaderStage::Fragment: stageStr = "FRAGMENT/PIXEL"; break;
+          case ShaderStage::Compute: stageStr = "COMPUTE"; break;
+          default: stageStr = "UNKNOWN"; break;
+        }
+
+        errorMsg = "Shader compilation FAILED (both DXC and FXC)\n";
+        errorMsg += "  Stage: " + std::string(stageStr) + "\n";
+        errorMsg += "  Entry Point: " + desc.info.entryPoint + "\n";
+        errorMsg += "  Target (FXC): " + std::string(targetFXC) + "\n";
+        errorMsg += "  Debug Name: " + desc.debugName + "\n";
+
+        if (!errors.empty()) {
+          errorMsg += "\n=== FXC COMPILER ERRORS ===\n";
+          errorMsg += errors;
+          errorMsg += "\n===========================\n";
+        } else {
+          errorMsg += "  Error: " + compileResult.message + "\n";
+        }
+
+        IGL_LOG_ERROR("%s", errorMsg.c_str());
+        Result::setResult(outResult, Result::Code::RuntimeError, errorMsg.c_str());
+        return nullptr;
+      }
+
+      IGL_D3D12_LOG_VERBOSE("  FXC shader compiled successfully (%zu bytes bytecode)\n", bytecode.size());
+    }
+  } else {
+    Result::setResult(outResult, Result::Code::Unsupported, "Unsupported shader input type");
+    return nullptr;
+  }
+
+  // Create shader module with bytecode
+  auto module = std::make_shared<ShaderModule>(desc.info, std::move(bytecode));
+
+  // Create shader reflection from DXIL bytecode.
+  // This allows runtime queries of shader resources, bindings, and constant buffers.
+  IGL_D3D12_LOG_VERBOSE("  Attempting to create shader reflection (bytecode size=%zu)...\n",
+                        module->getBytecode().size());
+  if (!module->getBytecode().empty()) {
+    // Create IDxcUtils for reflection
+    igl::d3d12::ComPtr<IDxcUtils> dxcUtils;
+    IGL_D3D12_LOG_VERBOSE("    Creating IDxcUtils for reflection...\n");
+    HRESULT hr = DxcCreateInstance(CLSID_DxcUtils, IID_PPV_ARGS(dxcUtils.GetAddressOf()));
+    IGL_D3D12_LOG_VERBOSE("    DxcCreateInstance result: 0x%08X\n", hr);
+
+    if (SUCCEEDED(hr)) {
+      // Prepare buffer for reflection
+      DxcBuffer reflectionBuffer = {};
+      reflectionBuffer.Ptr = module->getBytecode().data();
+      reflectionBuffer.Size = module->getBytecode().size();
+      reflectionBuffer.Encoding = 0;
+
+      // Create reflection interface
+      igl::d3d12::ComPtr<ID3D12ShaderReflection> reflection;
+      hr = dxcUtils->CreateReflection(&reflectionBuffer, IID_PPV_ARGS(reflection.GetAddressOf()));
+
+      if (SUCCEEDED(hr)) {
+        module->setReflection(reflection);
+        IGL_D3D12_LOG_VERBOSE("  Shader reflection created successfully (DXIL reflection)\n");
+
+        // Emit a concise reflection dump by default to help diagnose
+        // resource-binding issues. This is intentionally always enabled
+        // (in debug builds) so that D3D12 binding problems are visible
+        // without extra flags.
+        D3D12_SHADER_DESC shaderDesc = {};
+        if (SUCCEEDED(reflection->GetDesc(&shaderDesc))) {
+          const char* stageStr = "UNKNOWN";
+          switch (desc.info.stage) {
+          case ShaderStage::Vertex:
+            stageStr = "VERTEX";
+            break;
+          case ShaderStage::Fragment:
+            stageStr = "FRAGMENT/PIXEL";
+            break;
+          case ShaderStage::Compute:
+            stageStr = "COMPUTE";
+            break;
+          default:
+            break;
+          }
+
+          IGL_LOG_INFO("\n=== SHADER REFLECTION (%s - %s) ===\n",
+                       stageStr,
+                       desc.info.entryPoint.c_str());
+          IGL_LOG_INFO("  Bound Resources: %u\n", shaderDesc.BoundResources);
+          for (UINT i = 0; i < shaderDesc.BoundResources; ++i) {
+            D3D12_SHADER_INPUT_BIND_DESC bindDesc = {};
+            if (SUCCEEDED(reflection->GetResourceBindingDesc(i, &bindDesc))) {
+              const char* typeStr = "Unknown";
+              const char* registerPrefix = "?";
+              switch (bindDesc.Type) {
+              case D3D_SIT_CBUFFER:
+                typeStr = "ConstantBuffer";
+                registerPrefix = "b";
+                break;
+              case D3D_SIT_TBUFFER:
+                typeStr = "TextureBuffer";
+                registerPrefix = "t";
+                break;
+              case D3D_SIT_TEXTURE:
+                typeStr = "Texture";
+                registerPrefix = "t";
+                break;
+              case D3D_SIT_SAMPLER:
+                typeStr = "Sampler";
+                registerPrefix = "s";
+                break;
+              case D3D_SIT_UAV_RWTYPED:
+                typeStr = "RWTexture";
+                registerPrefix = "u";
+                break;
+              case D3D_SIT_STRUCTURED:
+                typeStr = "StructuredBuffer";
+                registerPrefix = "t";
+                break;
+              case D3D_SIT_UAV_RWSTRUCTURED:
+                typeStr = "RWStructuredBuffer";
+                registerPrefix = "u";
+                break;
+              case D3D_SIT_BYTEADDRESS:
+                typeStr = "ByteAddressBuffer";
+                registerPrefix = "t";
+                break;
+              case D3D_SIT_UAV_RWBYTEADDRESS:
+                typeStr = "RWByteAddressBuffer";
+                registerPrefix = "u";
+                break;
+              default:
+                break;
+              }
+
+              IGL_LOG_INFO("    [%u] %s '%s' at %s%u (space %u)\n",
+                           i,
+                           typeStr,
+                           bindDesc.Name,
+                           registerPrefix,
+                           bindDesc.BindPoint,
+                           bindDesc.Space);
+            }
+          }
+
+          IGL_LOG_INFO("  Constant Buffers: %u\n", shaderDesc.ConstantBuffers);
+          for (UINT i = 0; i < shaderDesc.ConstantBuffers; ++i) {
+            ID3D12ShaderReflectionConstantBuffer* cb =
+                reflection->GetConstantBufferByIndex(i);
+            D3D12_SHADER_BUFFER_DESC cbDesc = {};
+            if (cb && SUCCEEDED(cb->GetDesc(&cbDesc))) {
+              IGL_LOG_INFO("    [%u] %s: %u bytes, %u variables\n",
+                           i,
+                           cbDesc.Name,
+                           cbDesc.Size,
+                           cbDesc.Variables);
+            }
+          }
+
+          // Log input and output signature parameters to help diagnose
+          // pipeline state creation issues (semantic/mask mismatches).
+          IGL_LOG_INFO("  Input Parameters: %u\n", shaderDesc.InputParameters);
+          for (UINT i = 0; i < shaderDesc.InputParameters; ++i) {
+            D3D12_SIGNATURE_PARAMETER_DESC p = {};
+            if (SUCCEEDED(reflection->GetInputParameterDesc(i, &p))) {
+              IGL_LOG_INFO("    [In %u] %s%u: reg=%u, mask=0x%02X\n",
+                           i,
+                           p.SemanticName ? p.SemanticName : "",
+                           p.SemanticIndex,
+                           p.Register,
+                           p.Mask);
+            }
+          }
+
+          IGL_LOG_INFO("  Output Parameters: %u\n", shaderDesc.OutputParameters);
+          for (UINT i = 0; i < shaderDesc.OutputParameters; ++i) {
+            D3D12_SIGNATURE_PARAMETER_DESC p = {};
+            if (SUCCEEDED(reflection->GetOutputParameterDesc(i, &p))) {
+              IGL_LOG_INFO("    [Out %u] %s%u: reg=%u, mask=0x%02X\n",
+                           i,
+                           p.SemanticName ? p.SemanticName : "",
+                           p.SemanticIndex,
+                           p.Register,
+                           p.Mask);
+            }
+          }
+          IGL_LOG_INFO("================================\n\n");
+        }
+      } else {
+        IGL_D3D12_LOG_VERBOSE("  Failed to create DXIL reflection: 0x%08X, trying D3DReflect for DXBC bytecode...\n", hr);
+
+        // Fallback to D3DReflect for DXBC bytecode (FXC-compiled shaders)
+        hr = D3DReflect(module->getBytecode().data(), module->getBytecode().size(),
+                        IID_PPV_ARGS(reflection.GetAddressOf()));
+
+        if (SUCCEEDED(hr)) {
+          module->setReflection(reflection);
+          IGL_D3D12_LOG_VERBOSE("  Shader reflection created successfully (DXBC reflection)\n");
+
+          // Emit reflection dump for DXBC shaders as well
+          D3D12_SHADER_DESC shaderDesc = {};
+          if (SUCCEEDED(reflection->GetDesc(&shaderDesc))) {
+            const char* stageStr = "UNKNOWN";
+            switch (desc.info.stage) {
+            case ShaderStage::Vertex:
+              stageStr = "VERTEX";
+              break;
+            case ShaderStage::Fragment:
+              stageStr = "FRAGMENT/PIXEL";
+              break;
+            case ShaderStage::Compute:
+              stageStr = "COMPUTE";
+              break;
+            default:
+              break;
+            }
+
+            IGL_LOG_INFO("\n=== SHADER REFLECTION (%s - %s) [DXBC] ===\n",
+                         stageStr,
+                         desc.info.entryPoint.c_str());
+            IGL_LOG_INFO("  Bound Resources: %u\n", shaderDesc.BoundResources);
+            for (UINT i = 0; i < shaderDesc.BoundResources; ++i) {
+              D3D12_SHADER_INPUT_BIND_DESC bindDesc = {};
+              if (SUCCEEDED(reflection->GetResourceBindingDesc(i, &bindDesc))) {
+                const char* typeStr = "Unknown";
+                const char* registerPrefix = "?";
+                switch (bindDesc.Type) {
+                case D3D_SIT_CBUFFER:
+                  typeStr = "ConstantBuffer";
+                  registerPrefix = "b";
+                  break;
+                case D3D_SIT_TBUFFER:
+                  typeStr = "TextureBuffer";
+                  registerPrefix = "t";
+                  break;
+                case D3D_SIT_TEXTURE:
+                  typeStr = "Texture";
+                  registerPrefix = "t";
+                  break;
+                case D3D_SIT_SAMPLER:
+                  typeStr = "Sampler";
+                  registerPrefix = "s";
+                  break;
+                case D3D_SIT_UAV_RWTYPED:
+                  typeStr = "RWTexture/UAV";
+                  registerPrefix = "u";
+                  break;
+                default:
+                  break;
+                }
+
+                IGL_LOG_INFO("    [%u] %s '%s' at %s%u (space %u)\n",
+                             i,
+                             typeStr,
+                             bindDesc.Name,
+                             registerPrefix,
+                             bindDesc.BindPoint,
+                             bindDesc.Space);
+              }
+            }
+            IGL_LOG_INFO("================================\n\n");
+          }
+        } else {
+          IGL_D3D12_LOG_VERBOSE("  Failed to create reflection with both DXC and D3DReflect: 0x%08X (non-fatal)\n", hr);
+        }
+      }
+    } else {
+      IGL_D3D12_LOG_VERBOSE("  Failed to create DXC utils for reflection: 0x%08X (non-fatal)\n", hr);
+    }
+  }
+
+  Result::setOk(outResult);
+  return module;
+}
+
+// Framebuffer
+std::shared_ptr<IFramebuffer> Device::createFramebuffer(const FramebufferDesc& desc,
+                                                        Result* IGL_NULLABLE outResult) {
+  Result::setOk(outResult);
+  return std::make_shared<Framebuffer>(desc);
+}
+
+// Capabilities
+const IPlatformDevice& Device::getPlatformDevice() const noexcept {
+  return *platformDevice_;
+}
+
+bool Device::hasFeature(DeviceFeatures feature) const {
+  IGL_D3D12_LOG_VERBOSE("[D3D12] hasFeature query: %d\n", static_cast<int>(feature));
+  switch (feature) {
+    // Expected true in tests (non-OpenGL branch)
+    case DeviceFeatures::CopyBuffer:
+    case DeviceFeatures::DrawInstanced:
+    case DeviceFeatures::DrawFirstIndexFirstVertex: // D3D12 DrawIndexedInstanced supports first index/vertex
+    case DeviceFeatures::SRGB:
+    case DeviceFeatures::SRGBSwapchain:
+    case DeviceFeatures::UniformBlocks:
+    case DeviceFeatures::StandardDerivative: // ddx/ddy available in HLSL
+    case DeviceFeatures::TextureFloat:
+    case DeviceFeatures::TextureHalfFloat:
+    case DeviceFeatures::ReadWriteFramebuffer:
+    case DeviceFeatures::TextureNotPot:
+    case DeviceFeatures::ShaderTextureLod:
+    case DeviceFeatures::ExplicitBinding:
+    case DeviceFeatures::MapBufferRange: // UPLOAD/READBACK buffers support mapping
+    case DeviceFeatures::ShaderLibrary: // Support shader libraries in D3D12
+    case DeviceFeatures::Texture3D: // D3D12 supports 3D textures (DIMENSION_TEXTURE3D).
+    case DeviceFeatures::TexturePartialMipChain: // D3D12 supports partial mip chains via custom SRVs.
+    case DeviceFeatures::TextureViews: // D3D12 supports createTextureView() via shared resources.
+      return true;
+    case DeviceFeatures::MultipleRenderTargets:
+      return true; // D3D12 supports up to 8 simultaneous render targets.
+    case DeviceFeatures::Compute:
+      return true; // Compute shaders now supported with compute pipeline and dispatch
+    case DeviceFeatures::Texture2DArray:
+      IGL_D3D12_LOG_VERBOSE("[D3D12] hasFeature(Texture2DArray) returning TRUE\n");
+      return true; // D3D12 supports 2D texture arrays via DepthOrArraySize in D3D12_RESOURCE_DESC
+    case DeviceFeatures::PushConstants:
+      return true; // Implemented via root constants at parameter 0 (shader register b2)
+    case DeviceFeatures::SRGBWriteControl:
+    case DeviceFeatures::TextureArrayExt:
+    case DeviceFeatures::TextureExternalImage:
+    case DeviceFeatures::Multiview:
+    case DeviceFeatures::BindBytes:  // Not supported - use uniform buffers instead
+    case DeviceFeatures::BindUniform:
+    case DeviceFeatures::BufferRing:
+    case DeviceFeatures::BufferNoCopy:
+    case DeviceFeatures::BufferDeviceAddress:
+    case DeviceFeatures::ShaderTextureLodExt:
+    case DeviceFeatures::StandardDerivativeExt:
+    case DeviceFeatures::SamplerMinMaxLod:
+    case DeviceFeatures::DrawIndexedIndirect:
+    case DeviceFeatures::ExplicitBindingExt:
+    case DeviceFeatures::TextureFormatRG:
+    case DeviceFeatures::ValidationLayersEnabled:
+    case DeviceFeatures::ExternalMemoryObjects:
+      return false;
+    default:
+      return false;
+  }
+}
+
+bool Device::hasRequirement(DeviceRequirement /*requirement*/) const {
+  return false;
+}
+
+bool Device::getFeatureLimits(DeviceFeatureLimits featureLimits, size_t& result) const {
+  // Compile-time validation: IGL constant must not exceed D3D12 API limit
+  static_assert(IGL_VERTEX_ATTRIBUTES_MAX <= D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT,
+                "IGL_VERTEX_ATTRIBUTES_MAX exceeds D3D12 vertex input limit");
+
+  switch (featureLimits) {
+    case DeviceFeatureLimits::BufferAlignment:
+      // D3D12 buffer alignment requirements vary by buffer type:
+      // - Constant buffers: 256 bytes (D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT)
+      // - Storage buffers: 4 bytes (see ShaderStorageBufferOffsetAlignment)
+      // - Vertex/index buffers: 4 bytes (DWORD alignment)
+      // This returns the most restrictive alignment (constant buffers).
+      // See: https://learn.microsoft.com/en-us/windows/win32/direct3d12/constants
+      result = D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT; // 256 bytes
+      return true;
+
+    case DeviceFeatureLimits::BufferNoCopyAlignment:
+      // D3D12 doesn't support no-copy buffers in the same way as Metal
+      result = 0;
+      return true;
+
+    case DeviceFeatureLimits::MaxBindBytesBytes:
+      // bind-bytes (like Metal setVertexBytes) not supported on D3D12
+      result = 0;
+      return true;
+
+    case DeviceFeatureLimits::MaxCubeMapDimension:
+      // D3D12 cube map dimension limits (Feature Level 11_0+: 16384)
+      result = 16384; // D3D12_REQ_TEXTURECUBE_DIMENSION
+      return true;
+
+    case DeviceFeatureLimits::MaxFragmentUniformVectors:
+      // D3D12 allows 64KB constant buffers, each vec4 is 16 bytes
+      // 64KB / 16 bytes = 4096 vec4s
+      result = 4096;
+      return true;
+
+    case DeviceFeatureLimits::MaxMultisampleCount: {
+      // Query the maximum MSAA sample count supported by the device.
+      // Test common sample counts (1, 2, 4, 8, 16) for RGBA8 (most widely supported format).
+      // This provides a conservative estimate; actual support varies by format.
+      // Applications should use getMaxMSAASamplesForFormat() for format-specific queries.
+      auto* device = ctx_->getDevice();
+      if (!device) {
+        result = 1;  // No MSAA support if device unavailable
+        return false;
+      }
+
+      // Use RGBA8 as reference format (most widely supported)
+      const DXGI_FORMAT referenceFormat = DXGI_FORMAT_R8G8B8A8_UNORM;
+
+      // Test sample counts in descending order: 16, 8, 4, 2, 1
+      const uint32_t testCounts[] = {16, 8, 4, 2, 1};
+
+      for (uint32_t sampleCount : testCounts) {
+        D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msqLevels = {};
+        msqLevels.Format = referenceFormat;
+        msqLevels.SampleCount = sampleCount;
+        msqLevels.Flags = D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE;
+
+        HRESULT hr = device->CheckFeatureSupport(
+            D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS,
+            &msqLevels,
+            sizeof(msqLevels));
+
+        if (SUCCEEDED(hr) && msqLevels.NumQualityLevels > 0) {
+          result = sampleCount;
+          return true;
+        }
+      }
+
+      // Fallback to 1x (no MSAA)
+      result = 1;
+      return true;
+    }
+
+    case DeviceFeatureLimits::MaxPushConstantBytes:
+      // D3D12 root constants: each root constant is 4 bytes (DWORD)
+      // D3D12 root signature limit is 64 DWORDs total, but not all for constants
+      // Conservative limit: 256 bytes (64 DWORDs)
+      result = 256;
+      return true;
+
+    case DeviceFeatureLimits::MaxTextureDimension1D2D:
+      // D3D12 Feature Level 11_0+: 16384 for 1D and 2D textures
+      // Feature Level 12+: still 16384
+      result = 16384; // D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION
+      return true;
+
+    case DeviceFeatureLimits::MaxStorageBufferBytes:
+      // D3D12 structured buffer max size: 128MB (2^27 bytes)
+      // UAV structured buffer limit
+      result = 128 * 1024 * 1024; // 128 MB
+      return true;
+
+    case DeviceFeatureLimits::MaxUniformBufferBytes:
+      // D3D12 constant buffer size limit: 64KB (65536 bytes)
+      result = 64 * 1024; // D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16
+      return true;
+
+    case DeviceFeatureLimits::MaxVertexUniformVectors:
+      // Same as fragment uniform vectors for D3D12
+      // 64KB / 16 bytes per vec4 = 4096 vec4s
+      result = 4096;
+      return true;
+
+    case DeviceFeatureLimits::PushConstantsAlignment:
+      // Root constants are aligned to DWORD (4 bytes)
+      result = 4;
+      return true;
+
+    case DeviceFeatureLimits::ShaderStorageBufferOffsetAlignment:
+      // D3D12 storage buffer (UAV/structured buffer) alignment.
+      // D3D12 structured buffers require 4-byte (DWORD) alignment, unlike constant buffers (256 bytes)
+      // This matches Vulkan's typical minStorageBufferOffsetAlignment (often 16-64 bytes, device-dependent)
+      // See: https://learn.microsoft.com/en-us/windows/win32/direct3d12/alignment
+      result = 4;
+      return true;
+
+    case DeviceFeatureLimits::MaxTextureDimension3D:
+      // D3D12 3D texture dimension limits (Feature Level 11_0+: 2048)
+      // Feature Level 10_0+: 2048
+      result = 2048; // D3D12_REQ_TEXTURE3D_U_V_OR_W_DIMENSION
+      return true;
+
+    case DeviceFeatureLimits::MaxComputeWorkGroupSizeX:
+      // D3D12 compute shader thread group limits
+      result = D3D12_CS_THREAD_GROUP_MAX_X; // 1024
+      return true;
+
+    case DeviceFeatureLimits::MaxComputeWorkGroupSizeY:
+      // D3D12 compute shader thread group limits
+      result = D3D12_CS_THREAD_GROUP_MAX_Y; // 1024
+      return true;
+
+    case DeviceFeatureLimits::MaxComputeWorkGroupSizeZ:
+      // D3D12 compute shader thread group limits
+      result = D3D12_CS_THREAD_GROUP_MAX_Z; // 64
+      return true;
+
+    case DeviceFeatureLimits::MaxComputeWorkGroupInvocations:
+      // D3D12 max threads per thread group
+      result = D3D12_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; // 1024
+      return true;
+
+    case DeviceFeatureLimits::MaxVertexInputAttributes:
+      // D3D12 max vertex input slots (32 per D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT)
+      result = D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT; // 32
+      IGL_DEBUG_ASSERT(IGL_VERTEX_ATTRIBUTES_MAX <= result,
+                       "IGL_VERTEX_ATTRIBUTES_MAX exceeds D3D12 reported limit");
+      return true;
+
+    case DeviceFeatureLimits::MaxColorAttachments:
+      // D3D12 max simultaneous render targets
+      result = D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT; // 8
+      return true;
+
+    // Descriptor heap size limits chosen for cross-platform compatibility.
+    case DeviceFeatureLimits::MaxDescriptorHeapCbvSrvUav:
+      // D3D12 shader-visible CBV/SRV/UAV descriptor heap size
+      // Hardware limit: 1,000,000+ descriptors
+      // Current implementation uses 4096 descriptors (see DescriptorHeapManager::Sizes)
+      // This reports the configured limit, not the hardware maximum
+      result = 4096;
+      return true;
+
+    case DeviceFeatureLimits::MaxDescriptorHeapSamplers:
+      // D3D12 shader-visible sampler descriptor heap size
+      // Hardware limit: 2048 descriptors (D3D12 spec limit for sampler heaps)
+      // Current implementation uses 2048 descriptors (see DescriptorHeapManager::Sizes)
+      result = 2048;
+      return true;
+
+    case DeviceFeatureLimits::MaxDescriptorHeapRtvs:
+      // D3D12 CPU-visible RTV descriptor heap size
+      // Hardware limit: 16,384 descriptors
+      // Current implementation uses 256 descriptors (see DescriptorHeapManager::Sizes)
+      result = 256;
+      return true;
+
+    case DeviceFeatureLimits::MaxDescriptorHeapDsvs:
+      // D3D12 CPU-visible DSV descriptor heap size
+      // Hardware limit: 16,384 descriptors
+      // Current implementation uses 128 descriptors (see DescriptorHeapManager::Sizes)
+      result = 128;
+      return true;
+  }
+
+  // Should never reach here - all cases handled
+  result = 0;
+  return false;
+}
+
+
+ICapabilities::TextureFormatCapabilities Device::getTextureFormatCapabilities(TextureFormat format) const {
+  using CapBits = ICapabilities::TextureFormatCapabilityBits;
+  uint8_t caps = 0;
+
+  // Depth formats: guarantee they are sampleable in shaders for tests
+  switch (format) {
+  case TextureFormat::Z_UNorm16:
+  case TextureFormat::Z_UNorm24:
+  case TextureFormat::Z_UNorm32:
+  case TextureFormat::S8_UInt_Z24_UNorm:
+  case TextureFormat::S8_UInt_Z32_UNorm:
+    caps |= CapBits::Sampled;
+    return caps;
+  default:
+    break;
+  }
+
+  // D3D12 does not support 3-channel RGB formats natively - they are mapped to RGBA formats
+  // However, 3-channel formats cannot be used as render targets because:
+  // 1. RGB_F16/RGB_F32 map to RGBA equivalents, but D3D12 expects RGBA data layout for RT
+  // 2. Rendering to these formats would require alpha channel handling that IGL doesn't expose
+  // 3. Other backends (OpenGL, Metal) also don't support RGB formats as render targets
+  // See also: OpenGL's DeviceFeatureSet.cpp line 1271 "RGB floating point textures are NOT renderable"
+  const bool isThreeChannelRgbFormat =
+      format == TextureFormat::RGB_F16 ||
+      format == TextureFormat::RGB_F32;
+
+  auto* dev = ctx_->getDevice();
+  if (!dev) {
+    return 0;
+  }
+
+  const DXGI_FORMAT dxgi = textureFormatToDXGIFormat(format);
+  if (dxgi == DXGI_FORMAT_UNKNOWN) {
+    return 0;
+  }
+
+  D3D12_FEATURE_DATA_FORMAT_SUPPORT fs = {};
+  fs.Format = dxgi;
+  if (FAILED(dev->CheckFeatureSupport(D3D12_FEATURE_FORMAT_SUPPORT, &fs, sizeof(fs)))) {
+    return 0;
+  }
+
+  const auto s1 = fs.Support1;
+  const auto s2 = fs.Support2;
+
+  const auto props = TextureFormatProperties::fromTextureFormat(format);
+
+  // Enhanced D3D12 format capability mapping.
+  // Map D3D12_FORMAT_SUPPORT1 flags to IGL capabilities
+
+  // Sampled: Can be used with texture sampling instructions
+  if (s1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) {
+    caps |= CapBits::Sampled;
+  }
+
+  // SampledFiltered: Supports linear filtering (only for non-integer color formats)
+  // Also check D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON for depth formats
+  if (props.hasColor() && !props.isInteger()) {
+    if (s1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE) {
+      caps |= CapBits::SampledFiltered;
+    }
+  } else if (props.hasDepth() || props.hasStencil()) {
+    // Depth formats: check for comparison filtering support
+    if (s1 & D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON) {
+      caps |= CapBits::SampledFiltered;
+    }
+  }
+
+  // Attachment: Can be used as render target or depth/stencil attachment
+  // Also consider D3D12_FORMAT_SUPPORT1_BLENDABLE and D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET
+  // Don't report Attachment capability for 3-channel RGB formats even if D3D12 reports the
+  // underlying RGBA format as renderable - using them as render targets causes device removal
+  if (!isThreeChannelRgbFormat) {
+    if ((s1 & D3D12_FORMAT_SUPPORT1_RENDER_TARGET) || (s1 & D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL)) {
+      caps |= CapBits::Attachment;
+    }
+  }
+
+  // Storage: Can be used with unordered access (UAV)
+  // Check for typed UAV load/store, or atomic operations
+  // Enhanced UAV capability detection.
+  const bool hasUAVTypedOps = (s2 & D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD) &&
+                              (s2 & D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE);
+  const bool hasUAVAtomicOps = (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD) ||
+                               (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS) ||
+                               (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE) ||
+                               (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE) ||
+                               (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX) ||
+                               (s2 & D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX);
+
+  if (hasFeature(DeviceFeatures::Compute) && (hasUAVTypedOps || hasUAVAtomicOps)) {
+    caps |= CapBits::Storage;
+  }
+
+  // SampledAttachment: Can be both sampled and used as attachment
+  if ((caps & CapBits::Sampled) && (caps & CapBits::Attachment)) {
+    caps |= CapBits::SampledAttachment;
+  }
+
+#if IGL_DEBUG || defined(IGL_FORCE_ENABLE_LOGS)
+  // Debug logging for unmapped D3D12 capabilities.
+  // This helps identify format capabilities that D3D12 supports but IGL doesn't expose
+  uint32_t unmappedS1 = 0;
+  uint32_t unmappedS2 = 0;
+
+  // Check unmapped D3D12_FORMAT_SUPPORT1 flags
+  const uint32_t mappedS1 = D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE |
+                            D3D12_FORMAT_SUPPORT1_SHADER_SAMPLE_COMPARISON |
+                            D3D12_FORMAT_SUPPORT1_RENDER_TARGET |
+                            D3D12_FORMAT_SUPPORT1_DEPTH_STENCIL |
+                            D3D12_FORMAT_SUPPORT1_BLENDABLE |
+                            D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RENDERTARGET;
+  unmappedS1 = s1 & ~mappedS1;
+
+  // Check unmapped D3D12_FORMAT_SUPPORT2 flags
+  const uint32_t mappedS2 = D3D12_FORMAT_SUPPORT2_UAV_TYPED_LOAD |
+                            D3D12_FORMAT_SUPPORT2_UAV_TYPED_STORE |
+                            D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_ADD |
+                            D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_BITWISE_OPS |
+                            D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_COMPARE_STORE_OR_COMPARE_EXCHANGE |
+                            D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_EXCHANGE |
+                            D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_SIGNED_MIN_OR_MAX |
+                            D3D12_FORMAT_SUPPORT2_UAV_ATOMIC_UNSIGNED_MIN_OR_MAX;
+  unmappedS2 = s2 & ~mappedS2;
+
+  if (unmappedS1 != 0 || unmappedS2 != 0) {
+    IGL_D3D12_LOG_VERBOSE("Format %d (DXGI %d) has unmapped D3D12 capabilities:\n",
+                 static_cast<int>(format), static_cast<int>(dxgi));
+    if (unmappedS1 != 0) {
+      IGL_D3D12_LOG_VERBOSE("  Support1 unmapped flags: 0x%08X\n", unmappedS1);
+      // Log specific unmapped flags that might be useful
+      // Note: Some flags may not be defined in older Windows SDK versions
+      const uint32_t MIP_AUTOGEN = 0x800;           // D3D12_FORMAT_SUPPORT1_MIP_AUTOGEN
+      const uint32_t MULTISAMPLE_RESOLVE = 0x40;    // D3D12_FORMAT_SUPPORT1_MULTISAMPLE_RESOLVE
+      const uint32_t MULTISAMPLE_LOAD = 0x100000;   // D3D12_FORMAT_SUPPORT1_MULTISAMPLE_LOAD
+
+      if (unmappedS1 & MIP_AUTOGEN) {
+        IGL_D3D12_LOG_VERBOSE("    - MIP_AUTOGEN (0x800)\n");
+      }
+      if (unmappedS1 & MULTISAMPLE_RESOLVE) {
+        IGL_D3D12_LOG_VERBOSE("    - MULTISAMPLE_RESOLVE (0x40)\n");
+      }
+      if (unmappedS1 & MULTISAMPLE_LOAD) {
+        IGL_D3D12_LOG_VERBOSE("    - MULTISAMPLE_LOAD (0x100000)\n");
+      }
+    }
+    if (unmappedS2 != 0) {
+      IGL_D3D12_LOG_VERBOSE("  Support2 unmapped flags: 0x%08X\n", unmappedS2);
+      const uint32_t OUTPUT_MERGER_LOGIC_OP = 0x2;  // D3D12_FORMAT_SUPPORT2_OUTPUT_MERGER_LOGIC_OP
+      if (unmappedS2 & OUTPUT_MERGER_LOGIC_OP) {
+        IGL_D3D12_LOG_VERBOSE("    - OUTPUT_MERGER_LOGIC_OP (0x2)\n");
+      }
+    }
+  }
+#endif
+
+  return caps;
+}
+
+ShaderVersion Device::getShaderVersion() const {
+  // Report HLSL SM 6.0 if DXC is available; otherwise SM 5.0 (D3DCompile fallback)
+  bool dxcAvailable = false;
+#if IGL_PLATFORM_WINDOWS
+  HMODULE h = GetModuleHandleA("dxcompiler.dll");
+  if (!h) {
+    h = LoadLibraryA("dxcompiler.dll");
+  }
+  if (h) {
+    FARPROC proc = GetProcAddress(h, "DxcCreateInstance");
+    dxcAvailable = (proc != nullptr);
+  }
+#endif
+  if (dxcAvailable) {
+    return ShaderVersion{ShaderFamily::Hlsl, 6, 0, 0};
+  }
+  return ShaderVersion{ShaderFamily::Hlsl, 5, 0, 0};
+}
+
+BackendVersion Device::getBackendVersion() const {
+  // Query highest supported feature level to report backend version
+  auto* dev = ctx_->getDevice();
+  if (!dev) {
+    return BackendVersion{BackendFlavor::D3D12, 0, 0};
+  }
+
+  static const D3D_FEATURE_LEVEL kLevels[] = {
+      D3D_FEATURE_LEVEL_12_2,
+      D3D_FEATURE_LEVEL_12_1,
+      D3D_FEATURE_LEVEL_12_0,
+      D3D_FEATURE_LEVEL_11_1,
+      D3D_FEATURE_LEVEL_11_0,
+  };
+  D3D12_FEATURE_DATA_FEATURE_LEVELS fls = {};
+  fls.NumFeatureLevels = static_cast<UINT>(sizeof(kLevels) / sizeof(kLevels[0]));
+  fls.pFeatureLevelsRequested = kLevels;
+  fls.MaxSupportedFeatureLevel = D3D_FEATURE_LEVEL_11_0;
+
+  if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_FEATURE_LEVELS, &fls, sizeof(fls)))) {
+    switch (fls.MaxSupportedFeatureLevel) {
+    case D3D_FEATURE_LEVEL_12_2:
+      return BackendVersion{BackendFlavor::D3D12, 12, 2};
+    case D3D_FEATURE_LEVEL_12_1:
+      return BackendVersion{BackendFlavor::D3D12, 12, 1};
+    case D3D_FEATURE_LEVEL_12_0:
+      return BackendVersion{BackendFlavor::D3D12, 12, 0};
+    case D3D_FEATURE_LEVEL_11_1:
+      return BackendVersion{BackendFlavor::D3D12, 11, 1};
+    case D3D_FEATURE_LEVEL_11_0:
+    default:
+      return BackendVersion{BackendFlavor::D3D12, 11, 0};
+    }
+  }
+
+  // Fallback if CheckFeatureSupport fails
+  return BackendVersion{BackendFlavor::D3D12, 11, 0};
+}
+
+BackendType Device::getBackendType() const {
+  return BackendType::D3D12;
+}
+
+// Get sampler cache statistics for telemetry and debugging.
+SamplerCacheStats Device::getSamplerCacheStats() const {
+  return samplerCache_.getStats();
+}
+
+// Query maximum MSAA sample count for a specific format.
+uint32_t Device::getMaxMSAASamplesForFormat(TextureFormat format) const {
+  auto* device = ctx_->getDevice();
+  if (!device) {
+    return 1;
+  }
+
+  // Convert IGL format to DXGI format
+  const DXGI_FORMAT dxgiFormat = textureFormatToDXGIFormat(format);
+  if (dxgiFormat == DXGI_FORMAT_UNKNOWN) {
+    IGL_LOG_ERROR("Device::getMaxMSAASamplesForFormat: Unknown format %d\n", static_cast<int>(format));
+    return 1;
+  }
+
+  // Test sample counts in descending order: 16, 8, 4, 2, 1
+  const uint32_t testCounts[] = {16, 8, 4, 2, 1};
+
+  for (uint32_t sampleCount : testCounts) {
+    D3D12_FEATURE_DATA_MULTISAMPLE_QUALITY_LEVELS msqLevels = {};
+    msqLevels.Format = dxgiFormat;
+    msqLevels.SampleCount = sampleCount;
+    msqLevels.Flags = D3D12_MULTISAMPLE_QUALITY_LEVELS_FLAG_NONE;
+
+    HRESULT hr = device->CheckFeatureSupport(
+        D3D12_FEATURE_MULTISAMPLE_QUALITY_LEVELS,
+        &msqLevels,
+        sizeof(msqLevels));
+
+    if (SUCCEEDED(hr) && msqLevels.NumQualityLevels > 0) {
+      return sampleCount;
+    }
+  }
+
+  return 1;  // No MSAA support
+}
+
+void Device::processCompletedUploads() {
+  allocatorPool_.processCompletedUploads();
+}
+
+Result Device::waitForUploadFence(UINT64 fenceValue) const {
+  return allocatorPool_.waitForUploadFence(*this, fenceValue);
+}
+
+void Device::trackUploadBuffer(igl::d3d12::ComPtr<ID3D12Resource> buffer, UINT64 fenceValue) {
+  allocatorPool_.trackUploadBuffer(std::move(buffer), fenceValue);
+}
+
+igl::d3d12::ComPtr<ID3D12CommandAllocator> Device::getUploadCommandAllocator() {
+  return allocatorPool_.getUploadCommandAllocator(*ctx_);
+}
+
+void Device::returnUploadCommandAllocator(igl::d3d12::ComPtr<ID3D12CommandAllocator> allocator,
+                                          UINT64 fenceValue) {
+  allocatorPool_.returnUploadCommandAllocator(std::move(allocator), fenceValue);
+}
+
+size_t Device::getCurrentDrawCount() const {
+  return telemetry_.getDrawCount();
+}
+
+size_t Device::getShaderCompilationCount() const {
+  return telemetry_.getShaderCompilationCount();
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/Device.h b/src/igl/d3d12/Device.h
new file mode 100644
index 0000000000..19e468ba32
--- /dev/null
+++ b/src/igl/d3d12/Device.h
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <atomic>
+#include <memory>
+#include <mutex>
+#include <vector>
+#include <unordered_map>
+#include <igl/Buffer.h>
+#include <igl/CommandEncoder.h>
+#include <igl/Device.h>
+#include <igl/Shader.h>
+#include <igl/Common.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12Context.h>
+#include <igl/d3d12/D3D12AllocatorPool.h>
+#include <igl/d3d12/D3D12DeviceCapabilities.h>
+#include <igl/d3d12/D3D12PipelineCache.h>
+#include <igl/d3d12/D3D12SamplerCache.h>
+#include <igl/d3d12/D3D12Telemetry.h>
+#include <igl/d3d12/D3D12ImmediateCommands.h>  // For IFenceProvider interface.
+
+namespace igl::d3d12 {
+
+class PlatformDevice;
+class UploadRingBuffer;
+class SamplerState;  // Forward declaration for sampler cache
+class D3D12StagingDevice;  // Forward declaration.
+
+/// @brief Implements the igl::IDevice interface for DirectX 12
+class Device final : public IDevice, public IFenceProvider {
+ public:
+  explicit Device(std::unique_ptr<D3D12Context> ctx);
+  ~Device() override;
+
+  // BindGroups
+  [[nodiscard]] Holder<BindGroupTextureHandle> createBindGroup(
+      const BindGroupTextureDesc& desc,
+      const IRenderPipelineState* IGL_NULLABLE compatiblePipeline,
+      Result* IGL_NULLABLE outResult) override;
+  [[nodiscard]] Holder<BindGroupBufferHandle> createBindGroup(
+      const BindGroupBufferDesc& desc,
+      Result* IGL_NULLABLE outResult) override;
+  void destroy(BindGroupTextureHandle handle) override;
+  void destroy(BindGroupBufferHandle handle) override;
+  void destroy(SamplerHandle handle) override;
+
+  // Command Queue
+  [[nodiscard]] std::shared_ptr<ICommandQueue> createCommandQueue(
+      const CommandQueueDesc& desc,
+      Result* IGL_NULLABLE outResult) noexcept override;
+
+  // Resources
+  [[nodiscard]] std::unique_ptr<IBuffer> createBuffer(const BufferDesc& desc,
+                                                      Result* IGL_NULLABLE
+                                                          outResult) const noexcept override;
+
+  // Non-const helper for createBuffer; handles upload operations that mutate internal state.
+  [[nodiscard]] std::unique_ptr<IBuffer> createBufferImpl(const BufferDesc& desc,
+                                                          Result* IGL_NULLABLE outResult) noexcept;
+
+  [[nodiscard]] std::shared_ptr<IDepthStencilState> createDepthStencilState(
+      const DepthStencilStateDesc& desc,
+      Result* IGL_NULLABLE outResult) const override;
+
+  [[nodiscard]] std::unique_ptr<IShaderStages> createShaderStages(
+      const ShaderStagesDesc& desc,
+      Result* IGL_NULLABLE outResult) const override;
+
+  [[nodiscard]] std::shared_ptr<ISamplerState> createSamplerState(
+      const SamplerStateDesc& desc,
+      Result* IGL_NULLABLE outResult) const override;
+
+  [[nodiscard]] std::shared_ptr<ITexture> createTexture(const TextureDesc& desc,
+                                                        Result* IGL_NULLABLE
+                                                            outResult) const noexcept override;
+
+  [[nodiscard]] std::shared_ptr<ITexture> createTextureView(
+      std::shared_ptr<ITexture> texture,
+      const TextureViewDesc& desc,
+      Result* IGL_NULLABLE outResult) const noexcept override;
+
+  [[nodiscard]] std::shared_ptr<ITimer> createTimer(
+      Result* IGL_NULLABLE outResult) const noexcept override;
+
+  [[nodiscard]] std::shared_ptr<IVertexInputState> createVertexInputState(
+      const VertexInputStateDesc& desc,
+      Result* IGL_NULLABLE outResult) const override;
+
+  // Pipelines
+  [[nodiscard]] std::shared_ptr<IComputePipelineState> createComputePipeline(
+      const ComputePipelineDesc& desc,
+      Result* IGL_NULLABLE outResult) const override;
+
+  [[nodiscard]] std::shared_ptr<IRenderPipelineState> createRenderPipeline(
+      const RenderPipelineDesc& desc,
+      Result* IGL_NULLABLE outResult) const override;
+
+  // D3D12-specific: Create PSO variant with substituted formats (for dynamic PSO selection)
+  // Called by RenderPipelineState::getPipelineState() to create format variants
+  [[nodiscard]] igl::d3d12::ComPtr<ID3D12PipelineState> createPipelineStateVariant(
+      const RenderPipelineDesc& desc,
+      ID3D12RootSignature* rootSignature,
+      Result* IGL_NULLABLE outResult) const;
+
+  // Shader library and modules
+  [[nodiscard]] std::unique_ptr<IShaderLibrary> createShaderLibrary(
+      const ShaderLibraryDesc& desc,
+      Result* IGL_NULLABLE outResult) const override;
+
+  [[nodiscard]] std::shared_ptr<IShaderModule> createShaderModule(
+      const ShaderModuleDesc& desc,
+      Result* IGL_NULLABLE outResult) const override;
+
+  // Framebuffer
+  [[nodiscard]] std::shared_ptr<IFramebuffer> createFramebuffer(
+      const FramebufferDesc& desc,
+      Result* IGL_NULLABLE outResult) override;
+
+  // Capabilities
+  [[nodiscard]] const IPlatformDevice& getPlatformDevice() const noexcept override;
+
+  [[nodiscard]] bool hasFeature(DeviceFeatures feature) const override;
+  [[nodiscard]] bool hasRequirement(DeviceRequirement requirement) const override;
+  [[nodiscard]] bool getFeatureLimits(DeviceFeatureLimits featureLimits,
+                                     size_t& result) const override;
+  [[nodiscard]] TextureFormatCapabilities getTextureFormatCapabilities(
+      TextureFormat format) const override;
+  [[nodiscard]] ShaderVersion getShaderVersion() const override;
+  [[nodiscard]] BackendVersion getBackendVersion() const override;
+
+  [[nodiscard]] BackendType getBackendType() const override;
+
+  [[nodiscard]] size_t getCurrentDrawCount() const override;
+  [[nodiscard]] size_t getShaderCompilationCount() const override;
+
+  void incrementDrawCount(size_t n) { telemetry_.incrementDrawCount(n); }
+
+  D3D12Context& getD3D12Context() {
+    return *ctx_;
+  }
+  [[nodiscard]] const D3D12Context& getD3D12Context() const {
+    return *ctx_;
+  }
+
+  // Bind group accessors for RenderCommandEncoder
+  [[nodiscard]] const BindGroupTextureDesc* getBindGroupTextureDesc(BindGroupTextureHandle handle) const {
+    return bindGroupTexturesPool_.get(handle);
+  }
+  [[nodiscard]] const BindGroupBufferDesc* getBindGroupBufferDesc(BindGroupBufferHandle handle) const {
+    return bindGroupBuffersPool_.get(handle);
+  }
+
+  // Device capabilities accessors.
+  [[nodiscard]] const D3D12_FEATURE_DATA_D3D12_OPTIONS& getDeviceOptions() const {
+    return capabilities_.getOptions();
+  }
+  [[nodiscard]] const D3D12_FEATURE_DATA_D3D12_OPTIONS1& getDeviceOptions1() const {
+    return capabilities_.getOptions1();
+  }
+  [[nodiscard]] D3D12_RESOURCE_BINDING_TIER getResourceBindingTier() const {
+    return capabilities_.getResourceBindingTier();
+  }
+
+  void processCompletedUploads();
+  void trackUploadBuffer(igl::d3d12::ComPtr<ID3D12Resource> buffer, UINT64 fenceValue);
+
+  // Command allocator pool access for upload operations.
+  igl::d3d12::ComPtr<ID3D12CommandAllocator> getUploadCommandAllocator();
+  void returnUploadCommandAllocator(igl::d3d12::ComPtr<ID3D12CommandAllocator> allocator,
+                                     UINT64 fenceValue);
+  ID3D12Fence* getUploadFence() const { return allocatorPool_.getUploadFence(); }
+  UINT64 getNextUploadFenceValue() { return allocatorPool_.getNextUploadFenceValue(); }
+  Result waitForUploadFence(UINT64 fenceValue) const;
+
+  // IFenceProvider implementation (shared fence timeline).
+  uint64_t getNextFenceValue() override { return getNextUploadFenceValue(); }
+
+  // Upload ring buffer access.
+  UploadRingBuffer* getUploadRingBuffer() const { return allocatorPool_.getUploadRingBuffer(); }
+
+  // Check for device removal and return error Result if detected.
+  [[nodiscard]] Result checkDeviceRemoval() const;
+
+  // Query if device has been lost.
+  [[nodiscard]] bool isDeviceLost() const { return deviceLost_; }
+
+  // Sampler cache statistics.
+  [[nodiscard]] SamplerCacheStats getSamplerCacheStats() const;
+
+  // Query maximum MSAA sample count for a specific format.
+  // Returns 1 if the format does not support MSAA.
+  [[nodiscard]] uint32_t getMaxMSAASamplesForFormat(TextureFormat format) const;
+
+ private:
+  // Alignment validation helpers.
+  bool validateMSAAAlignment(const TextureDesc& desc, Result* IGL_NULLABLE outResult) const;
+  bool validateTextureAlignment(const D3D12_RESOURCE_DESC& resourceDesc,
+                                 uint32_t sampleCount, Result* IGL_NULLABLE outResult) const;
+  bool validateBufferAlignment(size_t bufferSize, bool isUniform) const;
+
+  // Alignment constants.
+  static constexpr size_t MSAA_ALIGNMENT = 65536;  // 64KB for MSAA textures
+  static constexpr size_t BUFFER_ALIGNMENT = 256;   // 256 bytes for constant buffers
+  static constexpr size_t DEFAULT_TEXTURE_ALIGNMENT = 65536;  // 64KB default for textures
+
+  D3D12DeviceCapabilities capabilities_;
+
+  std::unique_ptr<D3D12Context> ctx_;
+  std::unique_ptr<PlatformDevice> platformDevice_;
+  D3D12Telemetry telemetry_;
+
+  // Bind group pools
+  Pool<BindGroupTextureTag, BindGroupTextureDesc> bindGroupTexturesPool_;
+  Pool<BindGroupBufferTag, BindGroupBufferDesc> bindGroupBuffersPool_;
+
+  // Upload tracking state (non-mutable, mutated only from non-const paths).
+  // Modified by createBufferImpl, Buffer::upload, Texture::upload via non-const Device references
+  // and synchronized via pendingUploadsMutex_ for thread-safe access.
+  D3D12AllocatorPool allocatorPool_;
+  D3D12PipelineCache pipelineCache_;
+  D3D12SamplerCache samplerCache_;
+
+  // Device lost flag and reason for fatal error handling (atomic for thread-safe access).
+  mutable std::atomic<bool> deviceLost_{false};
+  mutable std::string deviceLostReason_;  // Cached reason for diagnostics
+
+ public:
+  // Shared staging infrastructure for upload/readback operations.
+  // Used by Buffer, Texture, Framebuffer, CommandBuffer for centralized resource management.
+  [[nodiscard]] D3D12ImmediateCommands* getImmediateCommands() const {
+    return allocatorPool_.getImmediateCommands();
+  }
+  [[nodiscard]] D3D12StagingDevice* getStagingDevice() const {
+    return allocatorPool_.getStagingDevice();
+  }
+
+  // Access pre-compiled mipmap shaders.
+  [[nodiscard]] bool areMipmapShadersAvailable() const {
+    return pipelineCache_.mipmapShadersAvailable_;
+  }
+  [[nodiscard]] const std::vector<uint8_t>& getMipmapVSBytecode() const {
+    return pipelineCache_.mipmapVSBytecode_;
+  }
+  [[nodiscard]] const std::vector<uint8_t>& getMipmapPSBytecode() const {
+    return pipelineCache_.mipmapPSBytecode_;
+  }
+  [[nodiscard]] ID3D12RootSignature* getMipmapRootSignature() const {
+    return pipelineCache_.mipmapRootSignature_.Get();
+  }
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/Framebuffer.cpp b/src/igl/d3d12/Framebuffer.cpp
new file mode 100644
index 0000000000..ba71b007ca
--- /dev/null
+++ b/src/igl/d3d12/Framebuffer.cpp
@@ -0,0 +1,781 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <algorithm>
+#include <chrono>
+#include <igl/d3d12/Framebuffer.h>
+#include <igl/d3d12/CommandBuffer.h>
+#include <igl/d3d12/CommandQueue.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/Texture.h>
+#include <igl/d3d12/D3D12FenceWaiter.h>
+#include <igl/d3d12/D3D12ImmediateCommands.h>
+#include <igl/d3d12/D3D12StagingDevice.h>
+#include <cstring>
+
+namespace igl::d3d12 {
+
+namespace {
+// Import ComPtr for readability
+template<typename T>
+using ComPtr = igl::d3d12::ComPtr<T>;
+} // namespace
+
+Framebuffer::Framebuffer(const FramebufferDesc& desc) : desc_(desc) {}
+
+Framebuffer::~Framebuffer() {
+  // FenceWaiter RAII handles event cleanup automatically
+}
+
+std::vector<size_t> Framebuffer::getColorAttachmentIndices() const {
+  std::vector<size_t> indices;
+  for (size_t i = 0; i < IGL_COLOR_ATTACHMENTS_MAX; ++i) {
+    if (desc_.colorAttachments[i].texture) {
+      indices.push_back(i);
+    }
+  }
+  return indices;
+}
+
+std::shared_ptr<ITexture> Framebuffer::getColorAttachment(size_t index) const {
+  if (index < IGL_COLOR_ATTACHMENTS_MAX) {
+    return desc_.colorAttachments[index].texture;
+  }
+  return nullptr;
+}
+
+std::shared_ptr<ITexture> Framebuffer::getResolveColorAttachment(size_t index) const {
+  if (index < IGL_COLOR_ATTACHMENTS_MAX) {
+    return desc_.colorAttachments[index].resolveTexture;
+  }
+  return nullptr;
+}
+
+std::shared_ptr<ITexture> Framebuffer::getDepthAttachment() const {
+  return desc_.depthAttachment.texture;
+}
+
+std::shared_ptr<ITexture> Framebuffer::getResolveDepthAttachment() const {
+  return desc_.depthAttachment.resolveTexture;
+}
+
+std::shared_ptr<ITexture> Framebuffer::getStencilAttachment() const {
+  return desc_.stencilAttachment.texture;
+}
+
+FramebufferMode Framebuffer::getMode() const {
+  return desc_.mode;
+}
+
+bool Framebuffer::isSwapchainBound() const {
+  return false;
+}
+
+void Framebuffer::copyBytesColorAttachment(ICommandQueue& cmdQueue,
+                                           size_t index,
+                                           void* pixelBytes,
+                                           const TextureRangeDesc& range,
+                                           size_t bytesPerRow) const {
+  if (!pixelBytes || index >= IGL_COLOR_ATTACHMENTS_MAX) {
+    return;
+  }
+
+  auto* d3dQueueWrapper = dynamic_cast<CommandQueue*>(&cmdQueue);
+  if (!d3dQueueWrapper) {
+    return;
+  }
+
+  auto& iglDevice = d3dQueueWrapper->getDevice();
+  auto& ctx = iglDevice.getD3D12Context();
+  auto* device = ctx.getDevice();
+  if (!device) {
+    return;
+  }
+
+  // Get shared infrastructure used for readback.
+  auto* immediateCommands = iglDevice.getImmediateCommands();
+  auto* stagingDevice = iglDevice.getStagingDevice();
+  if (!immediateCommands || !stagingDevice) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesColorAttachment - Shared infrastructure not available\n");
+    return;
+  }
+
+  auto srcTex = std::static_pointer_cast<Texture>(desc_.colorAttachments[index].texture);
+  if (!srcTex) {
+    return;
+  }
+
+  ID3D12Resource* srcRes = srcTex->getResource();
+  if (!srcRes) {
+    return;
+  }
+
+  const uint32_t mipLevel = range.mipLevel;
+  const uint32_t copyLayer = (srcTex->getType() == TextureType::Cube) ? range.face : range.layer;
+  const uint32_t subresourceIndex = srcTex->calcSubresourceIndex(mipLevel, copyLayer);
+
+  const auto texDims = srcTex->getDimensions();
+  const uint32_t mipWidth = std::max<uint32_t>(1u, texDims.width >> mipLevel);
+  const uint32_t mipHeight = std::max<uint32_t>(1u, texDims.height >> mipLevel);
+
+  const UINT64 frameFenceValue = ctx.getFenceValue();
+
+  auto& cache = readbackCache_[index];
+
+  const auto fmtProps = TextureFormatProperties::fromTextureFormat(srcTex->getFormat());
+  const size_t bytesPerPixel = std::max<size_t>(fmtProps.bytesPerBlock, 1);
+  const size_t fullRowBytes = static_cast<size_t>(mipWidth) * bytesPerPixel;
+
+  bool cacheUpToDate = cache.cacheValid &&
+                       cache.cachedFrameFenceValue == frameFenceValue &&
+                       cache.cachedMipLevel == mipLevel &&
+                       cache.cachedLayer == copyLayer &&
+                       cache.cachedWidth == mipWidth &&
+                       cache.cachedHeight == mipHeight &&
+                       cache.cachedBytesPerPixel == bytesPerPixel;
+
+  if (!cacheUpToDate) {
+    const auto refreshStart = std::chrono::high_resolution_clock::now();
+    D3D12_RESOURCE_DESC srcDesc = srcRes->GetDesc();
+    D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint{};
+    UINT numRows = 0;
+    UINT64 rowSizeInBytes = 0;
+    UINT64 totalBytes = 0;
+    device->GetCopyableFootprints(
+        &srcDesc, subresourceIndex, 1, 0, &footprint, &numRows, &rowSizeInBytes, &totalBytes);
+
+    if (totalBytes == 0) {
+      return;
+    }
+
+    // Use D3D12StagingDevice for readback buffer allocation.
+    auto stagingBuffer = stagingDevice->allocateReadback(totalBytes);
+    if (!stagingBuffer.valid || !stagingBuffer.buffer.Get()) {
+      IGL_LOG_ERROR("Framebuffer::copyBytesColorAttachment - Failed to allocate readback buffer\n");
+      cache.cacheValid = false;
+      return;
+    }
+
+    // Use D3D12ImmediateCommands for the copy operation.
+    Result result;
+    ID3D12GraphicsCommandList* cmdList = immediateCommands->begin(&result);
+    if (!cmdList || !result.isOk()) {
+      IGL_LOG_ERROR("Framebuffer::copyBytesColorAttachment - Failed to begin command list: %s\n",
+                    result.message.c_str());
+      stagingDevice->free(stagingBuffer, 0);
+      cache.cacheValid = false;
+      return;
+    }
+
+    const auto previousState = srcTex->getSubresourceState(mipLevel, copyLayer);
+    srcTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_SOURCE, mipLevel, copyLayer);
+
+    D3D12_TEXTURE_COPY_LOCATION dstLoc{};
+    dstLoc.pResource = stagingBuffer.buffer.Get();
+    dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+    dstLoc.PlacedFootprint = footprint;
+
+    D3D12_TEXTURE_COPY_LOCATION srcLoc{};
+    srcLoc.pResource = srcRes;
+    srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+    srcLoc.SubresourceIndex = subresourceIndex;
+
+    D3D12_BOX srcBox{};
+    srcBox.left = 0;
+    srcBox.top = 0;
+    srcBox.front = 0;
+    srcBox.right = mipWidth;
+    srcBox.bottom = mipHeight;
+    srcBox.back = 1;
+    cmdList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, &srcBox);
+
+    srcTex->transitionTo(cmdList, previousState, mipLevel, copyLayer);
+
+    // Submit and wait using the shared fence.
+    uint64_t fenceValue = immediateCommands->submit(true, &result);
+    if (fenceValue == 0 || !result.isOk()) {
+      IGL_LOG_ERROR("Framebuffer::copyBytesColorAttachment - Failed to submit command list: %s\n",
+                    result.message.c_str());
+      stagingDevice->free(stagingBuffer, 0);
+      cache.cacheValid = false;
+      return;
+    }
+
+    // Map and read the readback buffer
+    void* mapped = nullptr;
+    D3D12_RANGE readRange{0, totalBytes};
+    if (FAILED(stagingBuffer.buffer->Map(0, &readRange, &mapped))) {
+      IGL_LOG_ERROR("Framebuffer::copyBytesColorAttachment - Failed to map readback buffer\n");
+      stagingDevice->free(stagingBuffer, fenceValue);
+      cache.cacheValid = false;
+      return;
+    }
+
+    const uint8_t* srcPtr = static_cast<const uint8_t*>(mapped) + footprint.Offset;
+    const size_t srcRowPitch = footprint.Footprint.RowPitch;
+    const size_t copyRowBytes = fullRowBytes;
+
+    cache.cachedRowPitch = static_cast<uint64_t>(fullRowBytes);
+    cache.cachedData.resize(static_cast<size_t>(cache.cachedRowPitch) *
+                            static_cast<size_t>(mipHeight));
+
+    // Direct copy with vertical flip only; no channel swap needed.
+    // DXGI_FORMAT_R8G8B8A8_UNORM has R,G,B,A byte order matching IGL expectations.
+    for (uint32_t row = 0; row < mipHeight; ++row) {
+      const uint8_t* s = srcPtr + static_cast<size_t>(row) * srcRowPitch;
+      uint8_t* d =
+          cache.cachedData.data() +
+          static_cast<size_t>(mipHeight - 1 - row) * static_cast<size_t>(cache.cachedRowPitch);
+
+      std::memcpy(d, s, copyRowBytes);
+    }
+
+    stagingBuffer.buffer->Unmap(0, nullptr);
+
+    // Free the staging buffer back to the pool.
+    stagingDevice->free(stagingBuffer, fenceValue);
+
+    cache.cachedWidth = mipWidth;
+    cache.cachedHeight = mipHeight;
+    cache.cachedBytesPerPixel = bytesPerPixel;
+    cache.cachedMipLevel = mipLevel;
+    cache.cachedLayer = copyLayer;
+    cache.cachedFrameFenceValue = frameFenceValue;
+    cache.cacheValid = true;
+
+    const auto refreshEnd = std::chrono::high_resolution_clock::now();
+    const double refreshMs =
+        std::chrono::duration<double, std::milli>(refreshEnd - refreshStart).count();
+    IGL_D3D12_LOG_VERBOSE("copyBytesColorAttachment: refreshed subresource (mip=%u, layer=%u) in %.2f ms (%ux%u)\n",
+                 mipLevel,
+                 copyLayer,
+                 refreshMs,
+                 mipWidth,
+                 mipHeight);
+  }
+
+  if (!cache.cacheValid) {
+    return;
+  }
+
+  if (range.width == 0 || range.height == 0 ||
+      range.x + range.width > cache.cachedWidth ||
+      range.y + range.height > cache.cachedHeight) {
+    return;
+  }
+
+  const size_t copyRowBytes =
+      static_cast<size_t>(range.width) * cache.cachedBytesPerPixel;
+  const size_t dstRowPitch = bytesPerRow ? bytesPerRow : copyRowBytes;
+  uint8_t* dstPtr = static_cast<uint8_t*>(pixelBytes);
+
+  for (uint32_t destRow = 0; destRow < range.height; ++destRow) {
+    const uint32_t gpuRow = range.y + (range.height - 1 - destRow);
+    if (gpuRow >= cache.cachedHeight) {
+      return;
+    }
+    const uint32_t cachedRow = cache.cachedHeight - 1 - gpuRow;
+    const uint8_t* src =
+        cache.cachedData.data() +
+        static_cast<size_t>(cachedRow) * static_cast<size_t>(cache.cachedRowPitch) +
+        static_cast<size_t>(range.x) * cache.cachedBytesPerPixel;
+    std::memcpy(dstPtr + static_cast<size_t>(destRow) * dstRowPitch, src, copyRowBytes);
+  }
+}
+
+void Framebuffer::copyBytesDepthAttachment(ICommandQueue& cmdQueue,
+                                           void* pixelBytes,
+                                           const TextureRangeDesc& range,
+                                           size_t bytesPerRow) const {
+  // Depth attachment readback.
+  if (!pixelBytes) {
+    return;
+  }
+
+  auto* d3dQueueWrapper = dynamic_cast<CommandQueue*>(&cmdQueue);
+  if (!d3dQueueWrapper) {
+    return;
+  }
+
+  auto& iglDevice = d3dQueueWrapper->getDevice();
+  auto& ctx = iglDevice.getD3D12Context();
+  auto* device = ctx.getDevice();
+  if (!device) {
+    return;
+  }
+
+  // Get shared staging infrastructure.
+  auto* immediateCommands = iglDevice.getImmediateCommands();
+  auto* stagingDevice = iglDevice.getStagingDevice();
+  if (!immediateCommands || !stagingDevice) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Shared infrastructure not available\n");
+    return;
+  }
+
+  auto depthTex = std::static_pointer_cast<Texture>(desc_.depthAttachment.texture);
+  if (!depthTex) {
+    return;
+  }
+
+  ID3D12Resource* depthRes = depthTex->getResource();
+  if (!depthRes) {
+    return;
+  }
+
+  const uint32_t mipLevel = range.mipLevel;
+  const uint32_t copyLayer = (depthTex->getType() == TextureType::Cube) ? range.face : range.layer;
+  const uint32_t subresourceIndex = depthTex->calcSubresourceIndex(mipLevel, copyLayer);
+
+  const auto texDims = depthTex->getDimensions();
+  const uint32_t mipWidth = std::max<uint32_t>(1u, texDims.width >> mipLevel);
+  const uint32_t mipHeight = std::max<uint32_t>(1u, texDims.height >> mipLevel);
+
+  // Get footprint for the depth resource
+  D3D12_RESOURCE_DESC depthDesc = depthRes->GetDesc();
+
+  // Validate and log depth format to clarify raw-bits vs converted-float behavior.
+  const DXGI_FORMAT depthFormat = depthDesc.Format;
+  const bool isD32Float = (depthFormat == DXGI_FORMAT_D32_FLOAT ||
+                           depthFormat == DXGI_FORMAT_D32_FLOAT_S8X24_UINT);
+
+  if (!isD32Float) {
+    IGL_D3D12_LOG_VERBOSE("Framebuffer::copyBytesDepthAttachment - Format 0x%X is not D32_FLOAT; "
+                          "returning raw GPU bits (not normalized [0,1] floats). "
+                          "For UNORM formats, caller must convert manually.\n",
+                          static_cast<unsigned int>(depthFormat));
+  }
+
+  D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint{};
+  UINT numRows = 0;
+  UINT64 rowSizeInBytes = 0;
+  UINT64 totalBytes = 0;
+  device->GetCopyableFootprints(
+      &depthDesc, subresourceIndex, 1, 0, &footprint, &numRows, &rowSizeInBytes, &totalBytes);
+
+  if (totalBytes == 0) {
+    return;
+  }
+
+  // Allocate readback buffer from the staging device.
+  auto stagingBuffer = stagingDevice->allocateReadback(totalBytes);
+  if (!stagingBuffer.valid || !stagingBuffer.buffer.Get()) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Failed to allocate readback buffer\n");
+    return;
+  }
+
+  // Begin immediate command recording.
+  Result result;
+  ID3D12GraphicsCommandList* cmdList = immediateCommands->begin(&result);
+  if (!cmdList || !result.isOk()) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Failed to begin command list: %s\n",
+                  result.message.c_str());
+    stagingDevice->free(stagingBuffer, 0);
+    return;
+  }
+
+  // Transition depth texture to copy source
+  const auto previousState = depthTex->getSubresourceState(mipLevel, copyLayer);
+  depthTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_SOURCE, mipLevel, copyLayer);
+
+  // Set up copy locations
+  D3D12_TEXTURE_COPY_LOCATION dstLoc{};
+  dstLoc.pResource = stagingBuffer.buffer.Get();
+  dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+  dstLoc.PlacedFootprint = footprint;
+
+  D3D12_TEXTURE_COPY_LOCATION srcLoc{};
+  srcLoc.pResource = depthRes;
+  srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+  srcLoc.SubresourceIndex = subresourceIndex;
+
+  D3D12_BOX srcBox{};
+  srcBox.left = 0;
+  srcBox.top = 0;
+  srcBox.front = 0;
+  srcBox.right = mipWidth;
+  srcBox.bottom = mipHeight;
+  srcBox.back = 1;
+
+  // Copy depth data
+  cmdList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, &srcBox);
+
+  // Transition back to previous state
+  depthTex->transitionTo(cmdList, previousState, mipLevel, copyLayer);
+
+  // Submit and wait using the shared fence.
+  uint64_t fenceValue = immediateCommands->submit(true, &result);
+  if (fenceValue == 0 || !result.isOk()) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Failed to submit command list: %s\n",
+                  result.message.c_str());
+    stagingDevice->free(stagingBuffer, 0);
+    return;
+  }
+
+  // Map readback buffer and copy data
+  void* mapped = nullptr;
+  D3D12_RANGE readRange{0, totalBytes};
+  if (FAILED(stagingBuffer.buffer->Map(0, &readRange, &mapped))) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Failed to map readback buffer\n");
+    stagingDevice->free(stagingBuffer, fenceValue);
+    return;
+  }
+
+  // Validate range bounds before copying.
+  if (range.width == 0 || range.height == 0 ||
+      range.x + range.width > mipWidth ||
+      range.y + range.height > mipHeight) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesDepthAttachment - Invalid range: [%u,%u %ux%u] exceeds mip size %ux%u\n",
+                  range.x, range.y, range.width, range.height, mipWidth, mipHeight);
+    stagingBuffer.buffer->Unmap(0, nullptr);
+    stagingDevice->free(stagingBuffer, fenceValue);
+    return;
+  }
+
+  const uint8_t* srcPtr = static_cast<const uint8_t*>(mapped) + footprint.Offset;
+  const size_t srcRowPitch = footprint.Footprint.RowPitch;
+
+  // Depth readback contract: callers (tests) provide a float-per-pixel buffer.
+  // Use 4 bytes per destination pixel regardless of the underlying DXGI format,
+  // and only copy that many bytes from the GPU data to avoid overrunning the
+  // caller's buffer (e.g., for combined depth-stencil formats like D32_S8).
+  //
+  // LIMITATION: This implementation assumes a "raw bits" contract - it copies
+  // the native GPU representation without format conversion. This is correct for
+  // D32_FLOAT (which is already IEEE 754 float), but for normalized integer depth
+  // formats (D16_UNORM, D24_UNORM_S8_UINT), the copied data is raw bits, not
+  // converted [0,1] floats. Callers expecting normalized depth values from non-float
+  // formats will receive unconverted data. Future work should add format detection
+  // and explicit UNORM-to-float conversion for broader compatibility.
+  constexpr size_t kDstBytesPerPixel = sizeof(float);
+
+  // Derive the native bytes-per-pixel for the copied subresource using the
+  // rowSizeInBytes returned by GetCopyableFootprints when possible.
+  size_t nativeBytesPerPixel = 0;
+  if (mipWidth > 0 && rowSizeInBytes > 0) {
+    nativeBytesPerPixel = static_cast<size_t>(rowSizeInBytes) / static_cast<size_t>(mipWidth);
+  }
+
+  const size_t copyRowBytes = static_cast<size_t>(range.width) * kDstBytesPerPixel;
+  const size_t dstRowPitch = bytesPerRow ? bytesPerRow : copyRowBytes;
+  uint8_t* dstPtr = static_cast<uint8_t*>(pixelBytes);
+
+  for (uint32_t destRow = 0; destRow < range.height; ++destRow) {
+    const uint32_t gpuRow = range.y + (range.height - 1 - destRow);
+    if (gpuRow >= mipHeight) {
+      break;
+    }
+    const uint32_t srcRow = mipHeight - 1 - gpuRow;
+    const uint8_t* src =
+        srcPtr + static_cast<size_t>(srcRow) * srcRowPitch +
+        static_cast<size_t>(range.x) * (nativeBytesPerPixel > 0 ? nativeBytesPerPixel : kDstBytesPerPixel);
+
+    std::memcpy(dstPtr + static_cast<size_t>(destRow) * dstRowPitch, src, copyRowBytes);
+  }
+
+  stagingBuffer.buffer->Unmap(0, nullptr);
+
+  // Free staging buffer back to the pool.
+  stagingDevice->free(stagingBuffer, fenceValue);
+}
+
+void Framebuffer::copyBytesStencilAttachment(ICommandQueue& cmdQueue,
+                                             void* pixelBytes,
+                                             const TextureRangeDesc& range,
+                                             size_t bytesPerRow) const {
+  // Stencil attachment readback.
+  if (!pixelBytes) {
+    return;
+  }
+
+  auto* d3dQueueWrapper = dynamic_cast<CommandQueue*>(&cmdQueue);
+  if (!d3dQueueWrapper) {
+    return;
+  }
+
+  auto& iglDevice = d3dQueueWrapper->getDevice();
+  auto& ctx = iglDevice.getD3D12Context();
+  auto* device = ctx.getDevice();
+  if (!device) {
+    return;
+  }
+
+  // Get shared infrastructure.
+  auto* immediateCommands = iglDevice.getImmediateCommands();
+  auto* stagingDevice = iglDevice.getStagingDevice();
+  if (!immediateCommands || !stagingDevice) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Shared infrastructure not available\n");
+    return;
+  }
+
+  auto stencilTex = std::static_pointer_cast<Texture>(desc_.stencilAttachment.texture);
+  if (!stencilTex) {
+    return;
+  }
+
+  ID3D12Resource* stencilRes = stencilTex->getResource();
+  if (!stencilRes) {
+    return;
+  }
+
+  const uint32_t mipLevel = range.mipLevel;
+  const uint32_t copyLayer = (stencilTex->getType() == TextureType::Cube) ? range.face : range.layer;
+
+  // Detect stencil format and select the appropriate plane slice.
+  D3D12_RESOURCE_DESC stencilDesc = stencilRes->GetDesc();
+  const DXGI_FORMAT stencilFormat = stencilDesc.Format;
+
+  // Determine plane slice based on format:
+  // - Planar depth-stencil formats: stencil is in plane 1
+  // - Pure stencil formats: plane 0
+  UINT planeSlice = 0; // Default for non-planar
+
+  if (stencilFormat == DXGI_FORMAT_D24_UNORM_S8_UINT ||
+      stencilFormat == DXGI_FORMAT_D32_FLOAT_S8X24_UINT ||
+      stencilFormat == DXGI_FORMAT_R24G8_TYPELESS ||
+      stencilFormat == DXGI_FORMAT_R32G8X24_TYPELESS) {
+    // Planar depth-stencil: Plane 0 = depth, Plane 1 = stencil
+    planeSlice = 1;
+  } else if (stencilFormat == DXGI_FORMAT_R8_TYPELESS) {
+    // Pure stencil formats: Plane 0
+    planeSlice = 0;
+  } else {
+    IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Unsupported stencil format 0x%X; "
+                  "assuming plane 0. May fail for planar formats.\n",
+                  static_cast<unsigned int>(stencilFormat));
+    IGL_DEBUG_ASSERT(false, "Unsupported stencil format - add to known format list");
+    planeSlice = 0;
+  }
+
+  const UINT numMipLevels = stencilTex->getNumMipLevels();
+  const UINT numLayers = stencilTex->getNumLayers();
+  const uint32_t subresourceIndex = D3D12CalcSubresource(mipLevel, copyLayer, planeSlice, numMipLevels, numLayers);
+
+  const auto texDims = stencilTex->getDimensions();
+  const uint32_t mipWidth = std::max<uint32_t>(1u, texDims.width >> mipLevel);
+  const uint32_t mipHeight = std::max<uint32_t>(1u, texDims.height >> mipLevel);
+
+  // Get footprint for the stencil plane (reuse stencilDesc from above)
+  D3D12_PLACED_SUBRESOURCE_FOOTPRINT footprint{};
+  UINT numRows = 0;
+  UINT64 rowSizeInBytes = 0;
+  UINT64 totalBytes = 0;
+  device->GetCopyableFootprints(
+      &stencilDesc, subresourceIndex, 1, 0, &footprint, &numRows, &rowSizeInBytes, &totalBytes);
+
+  if (totalBytes == 0) {
+    return;
+  }
+
+  // Allocate readback buffer from the staging device.
+  auto stagingBuffer = stagingDevice->allocateReadback(totalBytes);
+  if (!stagingBuffer.valid || !stagingBuffer.buffer.Get()) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Failed to allocate readback buffer\n");
+    return;
+  }
+
+  // Begin immediate command recording.
+  Result result;
+  ID3D12GraphicsCommandList* cmdList = immediateCommands->begin(&result);
+  if (!cmdList || !result.isOk()) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Failed to begin command list: %s\n",
+                  result.message.c_str());
+    stagingDevice->free(stagingBuffer, 0);
+    return;
+  }
+
+  // Transition stencil texture to copy source
+  const auto previousState = stencilTex->getSubresourceState(mipLevel, copyLayer);
+  stencilTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_SOURCE, mipLevel, copyLayer);
+
+  // Set up copy locations for stencil plane
+  D3D12_TEXTURE_COPY_LOCATION dstLoc{};
+  dstLoc.pResource = stagingBuffer.buffer.Get();
+  dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+  dstLoc.PlacedFootprint = footprint;
+
+  D3D12_TEXTURE_COPY_LOCATION srcLoc{};
+  srcLoc.pResource = stencilRes;
+  srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+  srcLoc.SubresourceIndex = subresourceIndex;
+
+  D3D12_BOX srcBox{};
+  srcBox.left = 0;
+  srcBox.top = 0;
+  srcBox.front = 0;
+  srcBox.right = mipWidth;
+  srcBox.bottom = mipHeight;
+  srcBox.back = 1;
+
+  // Copy stencil data
+  cmdList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, &srcBox);
+
+  // Transition back to previous state
+  stencilTex->transitionTo(cmdList, previousState, mipLevel, copyLayer);
+
+  // Submit and wait using the shared fence.
+  uint64_t fenceValue = immediateCommands->submit(true, &result);
+  if (fenceValue == 0 || !result.isOk()) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Failed to submit command list: %s\n",
+                  result.message.c_str());
+    stagingDevice->free(stagingBuffer, 0);
+    return;
+  }
+
+  // Map readback buffer and copy data
+  void* mapped = nullptr;
+  D3D12_RANGE readRange{0, totalBytes};
+  if (FAILED(stagingBuffer.buffer->Map(0, &readRange, &mapped))) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Failed to map readback buffer\n");
+    stagingDevice->free(stagingBuffer, fenceValue);
+    return;
+  }
+
+  // Validate range bounds before copying.
+  if (range.width == 0 || range.height == 0 ||
+      range.x + range.width > mipWidth ||
+      range.y + range.height > mipHeight) {
+    IGL_LOG_ERROR("Framebuffer::copyBytesStencilAttachment - Invalid range: [%u,%u %ux%u] exceeds mip size %ux%u\n",
+                  range.x, range.y, range.width, range.height, mipWidth, mipHeight);
+    stagingBuffer.buffer->Unmap(0, nullptr);
+    stagingDevice->free(stagingBuffer, fenceValue);
+    return;
+  }
+
+  const uint8_t* srcPtr = static_cast<const uint8_t*>(mapped) + footprint.Offset;
+  const size_t srcRowPitch = footprint.Footprint.RowPitch;
+
+  // Stencil is always 8-bit (1 byte per pixel)
+  const size_t bytesPerPixel = 1;
+
+  // Copy with vertical flip (D3D12 textures are top-down, IGL expects bottom-up)
+  const size_t copyRowBytes = static_cast<size_t>(range.width) * bytesPerPixel;
+  const size_t dstRowPitch = bytesPerRow ? bytesPerRow : copyRowBytes;
+  uint8_t* dstPtr = static_cast<uint8_t*>(pixelBytes);
+
+  for (uint32_t destRow = 0; destRow < range.height; ++destRow) {
+    const uint32_t gpuRow = range.y + (range.height - 1 - destRow);
+    if (gpuRow >= mipHeight) {
+      break;
+    }
+    const uint32_t srcRow = mipHeight - 1 - gpuRow;
+    const uint8_t* src = srcPtr + static_cast<size_t>(srcRow) * srcRowPitch +
+                         static_cast<size_t>(range.x) * bytesPerPixel;
+    std::memcpy(dstPtr + static_cast<size_t>(destRow) * dstRowPitch, src, copyRowBytes);
+  }
+
+  stagingBuffer.buffer->Unmap(0, nullptr);
+
+  // Free staging buffer back to the pool.
+  stagingDevice->free(stagingBuffer, fenceValue);
+}
+
+void Framebuffer::copyTextureColorAttachment(ICommandQueue& cmdQueue,
+                                             size_t index,
+                                             std::shared_ptr<ITexture> destTexture,
+                                             const TextureRangeDesc& range) const {
+  // Bounds check for index parameter
+  if (index >= IGL_COLOR_ATTACHMENTS_MAX) {
+    IGL_LOG_ERROR("Framebuffer::copyTextureColorAttachment: index %zu out of bounds (max %u)\n",
+                  index, IGL_COLOR_ATTACHMENTS_MAX);
+    return;
+  }
+
+  // Get device and shared infrastructure directly (avoid transient CommandBuffer).
+  auto* d3dQueueWrapper = dynamic_cast<CommandQueue*>(&cmdQueue);
+  if (!d3dQueueWrapper) {
+    IGL_LOG_ERROR("Framebuffer::copyTextureColorAttachment - Invalid command queue\n");
+    IGL_DEBUG_ASSERT(false, "D3D12 Framebuffer used with non-D3D12 command queue");
+    return;
+  }
+
+  auto& iglDevice = d3dQueueWrapper->getDevice();
+  auto* immediateCommands = iglDevice.getImmediateCommands();
+  if (!immediateCommands) {
+    IGL_LOG_ERROR("Framebuffer::copyTextureColorAttachment - Immediate commands not available\n");
+    IGL_DEBUG_ASSERT(false, "D3D12ImmediateCommands not initialized");
+    return;
+  }
+
+  auto srcTex = std::static_pointer_cast<igl::d3d12::Texture>(desc_.colorAttachments[index].texture);
+  auto dstTex = std::static_pointer_cast<igl::d3d12::Texture>(destTexture);
+  if (!srcTex || !dstTex) {
+    return;
+  }
+  ID3D12Resource* srcRes = srcTex->getResource();
+  ID3D12Resource* dstRes = dstTex->getResource();
+  if (!srcRes || !dstRes) {
+    return;
+  }
+  Result result;
+  ID3D12GraphicsCommandList* cmdList = immediateCommands->begin(&result);
+  if (!cmdList || !result.isOk()) {
+    IGL_LOG_ERROR("Framebuffer::copyTextureColorAttachment - Failed to begin command list: %s\n",
+                  result.message.c_str());
+    return;
+  }
+
+  const uint32_t mipLevel = range.mipLevel;
+  const uint32_t layer = range.layer;
+  const auto srcPrevState = srcTex->getSubresourceState(mipLevel, layer);
+  srcTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_SOURCE, mipLevel, layer);
+  dstTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_DEST, mipLevel, layer);
+
+  // Calculate proper subresource indices for array textures and cubemaps
+  // D3D12CalcSubresource(MipSlice, ArraySlice, PlaneSlice, MipLevels, ArraySize)
+  const UINT srcMipLevels = srcTex->getNumMipLevels();
+  const UINT dstMipLevels = dstTex->getNumMipLevels();
+  const UINT srcArraySize = srcTex->getNumLayers();
+  const UINT dstArraySize = dstTex->getNumLayers();
+
+  D3D12_TEXTURE_COPY_LOCATION dstLoc{};
+  dstLoc.pResource = dstRes;
+  dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+  dstLoc.SubresourceIndex = D3D12CalcSubresource(mipLevel, layer, 0, dstMipLevels, dstArraySize);
+
+  D3D12_TEXTURE_COPY_LOCATION srcLoc{};
+  srcLoc.pResource = srcRes;
+  srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+  srcLoc.SubresourceIndex = D3D12CalcSubresource(mipLevel, layer, 0, srcMipLevels, srcArraySize);
+
+  D3D12_BOX srcBox{};
+  srcBox.left = range.x;
+  srcBox.top = range.y;
+  srcBox.front = 0;
+  srcBox.right = range.x + range.width;
+  srcBox.bottom = range.y + range.height;
+  srcBox.back = 1;
+  cmdList->CopyTextureRegion(&dstLoc, range.x, range.y, 0, &srcLoc, &srcBox);
+
+  // Transition dest to shader resource for sampling. Source back to its previous state.
+  srcTex->transitionTo(cmdList, srcPrevState, mipLevel, layer);
+  dstTex->transitionTo(cmdList, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, mipLevel, layer);
+
+  // Submit and wait using the shared fence (replaces manual CreateEvent/WaitForSingleObject).
+  uint64_t fenceValue = immediateCommands->submit(true, &result);
+  if (fenceValue == 0 || !result.isOk()) {
+    IGL_LOG_ERROR("Framebuffer::copyTextureColorAttachment - Failed to submit command list: %s\n",
+                  result.message.c_str());
+    return;
+  }
+}
+
+void Framebuffer::updateDrawable(std::shared_ptr<ITexture> texture) {
+  desc_.colorAttachments[0].texture = std::move(texture);
+}
+
+void Framebuffer::updateDrawable(SurfaceTextures surfaceTextures) {
+  desc_.colorAttachments[0].texture = std::move(surfaceTextures.color);
+  desc_.depthAttachment.texture = surfaceTextures.depth;
+  // Depth and stencil typically share the same texture
+  desc_.stencilAttachment.texture = std::move(surfaceTextures.depth);
+}
+
+void Framebuffer::updateResolveAttachment(std::shared_ptr<ITexture> texture) {
+  desc_.colorAttachments[0].resolveTexture = std::move(texture);
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/Framebuffer.h b/src/igl/d3d12/Framebuffer.h
new file mode 100644
index 0000000000..a01649658e
--- /dev/null
+++ b/src/igl/d3d12/Framebuffer.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <array>
+#include <limits>
+#include <vector>
+#include <igl/Framebuffer.h>
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+class Framebuffer final : public IFramebuffer {
+ public:
+  Framebuffer(const FramebufferDesc& desc);
+  ~Framebuffer() override;
+
+  std::vector<size_t> getColorAttachmentIndices() const override;
+  std::shared_ptr<ITexture> getColorAttachment(size_t index) const override;
+  std::shared_ptr<ITexture> getResolveColorAttachment(size_t index) const override;
+  std::shared_ptr<ITexture> getDepthAttachment() const override;
+  std::shared_ptr<ITexture> getResolveDepthAttachment() const override;
+  std::shared_ptr<ITexture> getStencilAttachment() const override;
+  FramebufferMode getMode() const override;
+  bool isSwapchainBound() const override;
+
+  void copyBytesColorAttachment(ICommandQueue& cmdQueue,
+                                 size_t index,
+                                 void* pixelBytes,
+                                 const TextureRangeDesc& range,
+                                 size_t bytesPerRow) const override;
+  void copyBytesDepthAttachment(ICommandQueue& cmdQueue,
+                                void* pixelBytes,
+                                const TextureRangeDesc& range,
+                                size_t bytesPerRow) const override;
+  void copyBytesStencilAttachment(ICommandQueue& cmdQueue,
+                                  void* pixelBytes,
+                                  const TextureRangeDesc& range,
+                                  size_t bytesPerRow) const override;
+  void copyTextureColorAttachment(ICommandQueue& cmdQueue,
+                                  size_t index,
+                                  std::shared_ptr<ITexture> destTexture,
+                                  const TextureRangeDesc& range) const override;
+  void updateDrawable(std::shared_ptr<ITexture> texture) override;
+  void updateDrawable(SurfaceTextures surfaceTextures) override;
+  void updateResolveAttachment(std::shared_ptr<ITexture> texture) override;
+
+ private:
+  // Simplified readback resources (removed per-attachment allocator/fence; use shared infrastructure).
+  struct ReadbackResources {
+    // Cached data for repeated reads from same region
+    std::vector<uint8_t> cachedData;
+    uint32_t cachedWidth = 0;
+    uint32_t cachedHeight = 0;
+    uint32_t cachedMipLevel = 0;
+    uint32_t cachedLayer = 0;
+    uint64_t cachedRowPitch = 0;
+    size_t cachedBytesPerPixel = 0;
+    UINT64 cachedFrameFenceValue = std::numeric_limits<UINT64>::max();
+    bool cacheValid = false;
+  };
+
+  mutable std::array<ReadbackResources, IGL_COLOR_ATTACHMENTS_MAX> readbackCache_{};
+  FramebufferDesc desc_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/HeadlessContext.cpp b/src/igl/d3d12/HeadlessContext.cpp
new file mode 100644
index 0000000000..824b9cf267
--- /dev/null
+++ b/src/igl/d3d12/HeadlessContext.cpp
@@ -0,0 +1,387 @@
+/*
+ * Minimal headless D3D12 context for unit tests (no swapchain / no HWND).
+ */
+
+#include <igl/d3d12/HeadlessContext.h>
+#include <igl/d3d12/DescriptorHeapManager.h>
+#include <string>
+
+namespace igl::d3d12 {
+
+namespace {
+// Import ComPtr for readability
+template<typename T>
+using ComPtr = igl::d3d12::ComPtr<T>;
+} // namespace
+
+HeadlessD3D12Context::~HeadlessD3D12Context() = default;
+
+Result HeadlessD3D12Context::initializeHeadless(uint32_t width, uint32_t height,
+                                                const D3D12ContextConfig& config) {
+  width_ = width;
+  height_ = height;
+
+  // Store and validate configuration.
+  config_ = config;
+  config_.validate();
+
+  // Headless mode: No swapchain, so use kMaxFramesInFlight as buffer count (T43)
+  swapchainBufferCount_ = kMaxFramesInFlight;
+  renderTargets_.resize(swapchainBufferCount_);
+  frameContexts_.resize(swapchainBufferCount_);
+  IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Initialized with %u frame buffers (no swapchain)\n",
+                        swapchainBufferCount_);
+
+  // Initialize DXGI factory flags and debug configuration (mirrors windowed D3D12Context).
+  auto getEnvBool = [](const char* name, bool defaultValue) -> bool {
+    const char* value = std::getenv(name);
+    if (!value) {
+      return defaultValue;
+    }
+    return (std::string(value) == "1") || (std::string(value) == "true");
+  };
+
+  bool enableDebugLayer = getEnvBool("IGL_D3D12_DEBUG",
+#ifdef _DEBUG
+    true  // Default ON in debug builds
+#else
+    false // Default OFF in release builds
+#endif
+  );
+  bool enableGPUValidation = getEnvBool("IGL_D3D12_GPU_VALIDATION", false);
+  bool enableDRED = getEnvBool("IGL_D3D12_DRED",
+#ifdef _DEBUG
+    true  // Default ON in debug builds
+#else
+    false // Default OFF in release builds
+#endif
+  );
+  bool enableDXGIDebug = getEnvBool("IGL_DXGI_DEBUG",
+#ifdef _DEBUG
+    true  // Default ON in debug builds
+#else
+    false // Default OFF in release builds
+#endif
+  );
+
+  IGL_D3D12_LOG_VERBOSE("=== Headless D3D12 Debug Configuration ===\n");
+  IGL_D3D12_LOG_VERBOSE("  Debug Layer:       %s\n", enableDebugLayer ? "ENABLED" : "DISABLED");
+  IGL_D3D12_LOG_VERBOSE("  GPU Validation:    %s\n", enableGPUValidation ? "ENABLED" : "DISABLED");
+  IGL_D3D12_LOG_VERBOSE("  DRED:              %s\n", enableDRED ? "ENABLED" : "DISABLED");
+  IGL_D3D12_LOG_VERBOSE("  DXGI Debug:        %s\n", enableDXGIDebug ? "ENABLED" : "DISABLED");
+  IGL_D3D12_LOG_VERBOSE("=========================================\n");
+
+  UINT dxgiFactoryFlags = 0;
+
+  // Enable debug layer (and GPU-based validation) if configured.
+  if (enableDebugLayer) {
+    igl::d3d12::ComPtr<ID3D12Debug> debugController;
+    if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(debugController.GetAddressOf())))) {
+      debugController->EnableDebugLayer();
+      IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Debug layer ENABLED\n");
+
+      if (enableDXGIDebug) {
+        dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG;
+        IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: DXGI debug layer ENABLED\n");
+      }
+
+      if (enableGPUValidation) {
+        igl::d3d12::ComPtr<ID3D12Debug1> debugController1;
+        if (SUCCEEDED(debugController->QueryInterface(IID_PPV_ARGS(debugController1.GetAddressOf())))) {
+          debugController1->SetEnableGPUBasedValidation(TRUE);
+          IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: GPU-Based Validation ENABLED\n");
+        } else {
+          IGL_LOG_ERROR("HeadlessD3D12Context: Failed to enable GPU-Based Validation (requires ID3D12Debug1)\n");
+        }
+      }
+    } else {
+      IGL_LOG_ERROR("HeadlessD3D12Context: Failed to get D3D12 debug interface - Graphics Tools may not be installed\n");
+    }
+  } else {
+    IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Debug layer DISABLED\n");
+  }
+
+  // Enable DRED if configured (Device Removed Extended Data for better crash diagnostics).
+  if (enableDRED) {
+    igl::d3d12::ComPtr<ID3D12DeviceRemovedExtendedDataSettings1> dredSettings1;
+    if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(dredSettings1.GetAddressOf())))) {
+      dredSettings1->SetAutoBreadcrumbsEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON);
+      dredSettings1->SetPageFaultEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON);
+      dredSettings1->SetBreadcrumbContextEnablement(D3D12_DRED_ENABLEMENT_FORCED_ON);
+      IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: DRED 1.2 fully configured\n");
+    } else {
+      IGL_LOG_ERROR("HeadlessD3D12Context: Failed to configure DRED (requires Windows 10 19041+)\n");
+    }
+  }
+
+  // Enable experimental features for headless contexts (unit tests)
+  // This allows unsigned DXIL shaders to run
+  // NOTE: This is ONLY called in headless mode (unit tests), NOT in windowed render sessions
+  {
+    UUID experimentalFeatures[] = {D3D12ExperimentalShaderModels};
+    HRESULT hr = D3D12EnableExperimentalFeatures(1, experimentalFeatures, nullptr, nullptr);
+    if (SUCCEEDED(hr)) {
+      IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Experimental shader models enabled (allows unsigned DXIL)\n");
+    } else {
+      IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Failed to enable experimental features (0x%08X) - signed DXIL required\n", static_cast<unsigned>(hr));
+    }
+  }
+
+  // Create DXGI factory with debug flag in debug builds.
+  HRESULT hr = CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(dxgiFactory_.GetAddressOf()));
+  if (FAILED(hr)) {
+    return Result(Result::Code::RuntimeError, "Failed to create DXGI factory");
+  }
+
+  // Helper function to try creating device with progressive feature level fallback (A-004)
+  auto tryCreateDeviceWithFallback =
+      [](IDXGIAdapter1* adapter, D3D_FEATURE_LEVEL& outFeatureLevel) -> igl::d3d12::ComPtr<ID3D12Device> {
+    const D3D_FEATURE_LEVEL featureLevels[] = {
+        D3D_FEATURE_LEVEL_12_2,
+        D3D_FEATURE_LEVEL_12_1,
+        D3D_FEATURE_LEVEL_12_0,
+        D3D_FEATURE_LEVEL_11_1,
+        D3D_FEATURE_LEVEL_11_0,
+    };
+
+    igl::d3d12::ComPtr<ID3D12Device> device;
+    for (D3D_FEATURE_LEVEL fl : featureLevels) {
+      HRESULT hr = D3D12CreateDevice(adapter, fl, IID_PPV_ARGS(device.GetAddressOf()));
+      if (SUCCEEDED(hr)) {
+        outFeatureLevel = fl;
+        IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Device created with Feature Level %d.%d\n",
+                     (fl >> 12) & 0xF, (fl >> 8) & 0xF);
+        return device;
+      }
+    }
+    outFeatureLevel = static_cast<D3D_FEATURE_LEVEL>(0);
+    return nullptr;
+  };
+
+  auto featureLevelToString = [](D3D_FEATURE_LEVEL level) -> const char* {
+    switch (level) {
+      case D3D_FEATURE_LEVEL_12_2: return "12.2";
+      case D3D_FEATURE_LEVEL_12_1: return "12.1";
+      case D3D_FEATURE_LEVEL_12_0: return "12.0";
+      case D3D_FEATURE_LEVEL_11_1: return "11.1";
+      case D3D_FEATURE_LEVEL_11_0: return "11.0";
+      default: return "Unknown";
+    }
+  };
+
+  igl::d3d12::ComPtr<IDXGIFactory6> factory6;
+  (void)dxgiFactory_->QueryInterface(IID_PPV_ARGS(factory6.GetAddressOf()));
+
+  bool created = false;
+  D3D_FEATURE_LEVEL selectedFeatureLevel = D3D_FEATURE_LEVEL_11_0;
+
+  if (factory6.Get()) {
+    for (UINT i = 0;; ++i) {
+      igl::d3d12::ComPtr<IDXGIAdapter1> adapter;
+      if (FAILED(factory6->EnumAdapterByGpuPreference(i, DXGI_GPU_PREFERENCE_HIGH_PERFORMANCE,
+                                                      IID_PPV_ARGS(adapter.GetAddressOf())))) {
+        break;
+      }
+      DXGI_ADAPTER_DESC1 desc{};
+      adapter->GetDesc1(&desc);
+      if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
+        continue;
+      }
+
+      D3D_FEATURE_LEVEL featureLevel = static_cast<D3D_FEATURE_LEVEL>(0);
+      auto device = tryCreateDeviceWithFallback(adapter.Get(), featureLevel);
+      if (device.Get() != nullptr) {
+        device_ = device;
+        created = true;
+        selectedFeatureLevel = featureLevel;
+        IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Selected HW adapter (FL %s)\n",
+                     featureLevelToString(featureLevel));
+        break;
+      }
+    }
+  }
+  if (!created) {
+    for (UINT i = 0;; ++i) {
+      igl::d3d12::ComPtr<IDXGIAdapter1> adapter;
+      if (dxgiFactory_->EnumAdapters1(i, adapter.GetAddressOf()) == DXGI_ERROR_NOT_FOUND) {
+        break;
+      }
+      DXGI_ADAPTER_DESC1 desc{};
+      adapter->GetDesc1(&desc);
+      if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) {
+        continue;
+      }
+
+      D3D_FEATURE_LEVEL featureLevel = static_cast<D3D_FEATURE_LEVEL>(0);
+      auto device = tryCreateDeviceWithFallback(adapter.Get(), featureLevel);
+      if (device.Get() != nullptr) {
+        device_ = device;
+        created = true;
+        selectedFeatureLevel = featureLevel;
+        IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Selected HW adapter via EnumAdapters1 (FL %s)\n",
+                     featureLevelToString(featureLevel));
+        break;
+      }
+    }
+  }
+  if (!created) {
+    igl::d3d12::ComPtr<IDXGIAdapter> warp;
+    if (SUCCEEDED(dxgiFactory_->EnumWarpAdapter(IID_PPV_ARGS(warp.GetAddressOf())))) {
+      igl::d3d12::ComPtr<IDXGIAdapter1> warp1;
+      warp->QueryInterface(IID_PPV_ARGS(warp1.GetAddressOf()));
+      if (warp1.Get()) {
+        D3D_FEATURE_LEVEL featureLevel = static_cast<D3D_FEATURE_LEVEL>(0);
+        auto device = tryCreateDeviceWithFallback(warp1.Get(), featureLevel);
+        if (device.Get() != nullptr) {
+          device_ = device;
+          created = true;
+          selectedFeatureLevel = featureLevel;
+          IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Using WARP adapter (FL %s)\n",
+                       featureLevelToString(featureLevel));
+        }
+      }
+    }
+  }
+  if (!created) {
+    return Result(Result::Code::RuntimeError, "Failed to create any D3D12 device");
+  }
+
+  // Store selected feature level (A-004)
+  selectedFeatureLevel_ = selectedFeatureLevel;
+
+#ifdef _DEBUG
+  {
+    igl::d3d12::ComPtr<ID3D12InfoQueue> infoQueue;
+    if (SUCCEEDED(device_->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) {
+      infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_CORRUPTION, FALSE);
+      infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_ERROR, FALSE);
+      infoQueue->SetBreakOnSeverity(D3D12_MESSAGE_SEVERITY_WARNING, FALSE);
+    }
+  }
+#endif
+
+  // Create command queue
+  D3D12_COMMAND_QUEUE_DESC queueDesc = {};
+  queueDesc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
+  queueDesc.Flags = D3D12_COMMAND_QUEUE_FLAG_NONE;
+  hr = device_->CreateCommandQueue(&queueDesc, IID_PPV_ARGS(commandQueue_.GetAddressOf()));
+  if (FAILED(hr)) {
+    return Result(Result::Code::RuntimeError, "Failed to create command queue");
+  }
+
+  // Create per-frame descriptor heaps (consistent with windowed D3D12Context)
+  // Allow override via env vars for headless tests
+  UINT cbvSrvUavHeapSize = 1024; // default matching Microsoft MiniEngine
+  {
+    char buf[32] = {};
+    const DWORD n = GetEnvironmentVariableA("IGL_D3D12_CBV_SRV_UAV_HEAP_SIZE", buf, sizeof(buf));
+    if (n > 0) {
+      cbvSrvUavHeapSize = std::max<UINT>(256, static_cast<UINT>(strtoul(buf, nullptr, 10)));
+    }
+  }
+
+  UINT samplerHeapSize = kMaxSamplers; // Match D3D12_MAX_SHADER_VISIBLE_SAMPLER_HEAP_SIZE (2048)
+  {
+    char buf[32] = {};
+    const DWORD n = GetEnvironmentVariableA("IGL_D3D12_SAMPLER_HEAP_SIZE", buf, sizeof(buf));
+    if (n > 0) {
+      samplerHeapSize = std::max<UINT>(16, static_cast<UINT>(strtoul(buf, nullptr, 10)));
+    }
+  }
+
+  // Cache descriptor sizes
+  cbvSrvUavDescriptorSize_ = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+  samplerDescriptorSize_ = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER);
+
+  IGL_D3D12_LOG_VERBOSE("HeadlessContext: Creating per-frame descriptor heaps (CBV/SRV/UAV=%u, Samplers=%u)...\n",
+               cbvSrvUavHeapSize, samplerHeapSize);
+
+  // Create per-frame shader-visible descriptor heaps and an initial page for each frame.
+  for (UINT i = 0; i < swapchainBufferCount_; i++) {
+    // CBV/SRV/UAV heap per frame - create initial page
+    igl::d3d12::ComPtr<ID3D12DescriptorHeap> initialHeap;
+    D3D12_DESCRIPTOR_HEAP_DESC desc = {};
+    desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+    desc.NumDescriptors = cbvSrvUavHeapSize;
+    desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+    desc.NodeMask = 0;
+    hr = device_->CreateDescriptorHeap(&desc, IID_PPV_ARGS(initialHeap.GetAddressOf()));
+    if (FAILED(hr)) {
+      return Result(Result::Code::RuntimeError, "Failed to create per-frame CBV/SRV/UAV heap for frame " + std::to_string(i));
+    }
+
+    // Initialize page vector with first page
+    frameContexts_[i].cbvSrvUavHeapPages.clear();
+    frameContexts_[i].cbvSrvUavHeapPages.emplace_back(initialHeap, cbvSrvUavHeapSize);
+    frameContexts_[i].currentCbvSrvUavPageIndex = 0;
+
+    IGL_D3D12_LOG_VERBOSE("  Frame %u: Created CBV/SRV/UAV heap page (%u descriptors)\n", i, cbvSrvUavHeapSize);
+
+    // Sampler heap per frame
+    desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+    desc.NumDescriptors = samplerHeapSize;
+    desc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+    desc.NodeMask = 0;
+    hr = device_->CreateDescriptorHeap(&desc, IID_PPV_ARGS(frameContexts_[i].samplerHeap.GetAddressOf()));
+    if (FAILED(hr)) {
+      return Result(Result::Code::RuntimeError, "Failed to create per-frame Sampler heap for frame " + std::to_string(i));
+    }
+    IGL_D3D12_LOG_VERBOSE("  Frame %u: Created Sampler heap (%u descriptors)\n", i, samplerHeapSize);
+  }
+
+  IGL_D3D12_LOG_VERBOSE("HeadlessContext: Per-frame descriptor heaps created successfully\n");
+
+  // Create per-frame command allocators (following Microsoft's D3D12HelloFrameBuffering pattern)
+  IGL_D3D12_LOG_VERBOSE("HeadlessContext: Creating per-frame command allocators...\n");
+  for (UINT i = 0; i < swapchainBufferCount_; i++) {
+    hr = device_->CreateCommandAllocator(
+        D3D12_COMMAND_LIST_TYPE_DIRECT,
+        IID_PPV_ARGS(frameContexts_[i].allocator.GetAddressOf()));
+    if (FAILED(hr)) {
+      return Result(Result::Code::RuntimeError, "Failed to create command allocator for frame " + std::to_string(i));
+    }
+    IGL_D3D12_LOG_VERBOSE("  Frame %u: Created command allocator\n", i);
+  }
+  IGL_D3D12_LOG_VERBOSE("HeadlessContext: Per-frame command allocators created successfully\n");
+
+  // Fence for GPU synchronization
+  hr = device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence_.GetAddressOf()));
+  if (FAILED(hr)) {
+    return Result(Result::Code::RuntimeError, "Failed to create fence");
+  }
+
+  // Create descriptor heap manager with the same sizes for consistency.
+  {
+    DescriptorHeapManager::Sizes sz{};
+    sz.cbvSrvUav = cbvSrvUavHeapSize;
+    sz.samplers = samplerHeapSize;
+    sz.rtvs = 64;
+    sz.dsvs = 32;
+    descriptorHeaps_ = std::make_unique<DescriptorHeapManager>();
+    const Result r = descriptorHeaps_->initialize(device_.Get(), sz);
+    if (!r.isOk()) {
+      IGL_LOG_ERROR("HeadlessD3D12Context: Failed to initialize descriptor heap manager: %s\n",
+                    r.message.c_str());
+      // Non-fatal: continue without a dedicated manager.
+      descriptorHeaps_.reset();
+    }
+    // Expose manager to base context for consumers that only see D3D12Context
+    heapMgr_ = descriptorHeaps_.get();
+  }
+
+  // Create command signatures for indirect drawing.
+  IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Creating command signatures...\n");
+  Result commandSigResult = createCommandSignatures();
+  if (!commandSigResult.isOk()) {
+    IGL_LOG_ERROR("HeadlessD3D12Context: Failed to create command signatures: %s\n",
+                  commandSigResult.message.c_str());
+    return commandSigResult;
+  }
+  IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Command signatures created successfully\n");
+
+  IGL_D3D12_LOG_VERBOSE("HeadlessD3D12Context: Initialization complete\n");
+  return Result();
+}
+
+} // namespace igl::d3d12
+
diff --git a/src/igl/d3d12/HeadlessContext.h b/src/igl/d3d12/HeadlessContext.h
new file mode 100644
index 0000000000..ae385523cb
--- /dev/null
+++ b/src/igl/d3d12/HeadlessContext.h
@@ -0,0 +1,33 @@
+/*
+ * Minimal headless D3D12 context for unit tests (no swapchain / no HWND).
+ */
+
+#pragma once
+
+#include <igl/d3d12/D3D12Context.h>
+#include <igl/d3d12/DescriptorHeapManager.h>
+#include <memory>
+
+namespace igl::d3d12 {
+
+class HeadlessD3D12Context final : public D3D12Context {
+ public:
+  HeadlessD3D12Context() = default;
+  ~HeadlessD3D12Context();
+
+  // Initialize a headless context with default dimensions used only for fallback viewports
+  // Accepts optional D3D12ContextConfig for configurable sizes.
+  // NOTE: Headless mode currently uses environment variable overrides and internal defaults
+  // for descriptor heap sizes. Config parameter is stored for base-class consistency and
+  // future extension but is not fully wired to all heap creation paths yet.
+  Result initializeHeadless(uint32_t width = 256, uint32_t height = 256,
+                           const D3D12ContextConfig& config = D3D12ContextConfig::defaultConfig());
+
+  // Access to descriptor heap manager for tests (may be null on failure)
+  [[nodiscard]] DescriptorHeapManager* getDescriptorHeapManager() const { return descriptorHeaps_.get(); }
+
+ private:
+  std::unique_ptr<DescriptorHeapManager> descriptorHeaps_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/PlatformDevice.cpp b/src/igl/d3d12/PlatformDevice.cpp
new file mode 100644
index 0000000000..9a4927baae
--- /dev/null
+++ b/src/igl/d3d12/PlatformDevice.cpp
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/PlatformDevice.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/Texture.h>
+#include <igl/d3d12/D3D12Context.h>
+
+namespace igl::d3d12 {
+
+PlatformDevice::PlatformDevice(Device& device) : device_(device) {}
+
+std::shared_ptr<ITexture> PlatformDevice::createTextureFromNativeDepth(uint32_t width,
+                                                                       uint32_t height,
+                                                                       Result* outResult) {
+  auto& ctx = device_.getD3D12Context();
+
+  // Create depth texture with D3D12
+  TextureDesc depthDesc = TextureDesc::new2D(TextureFormat::Z_UNorm32,
+                                            width,
+                                            height,
+                                            TextureDesc::TextureUsageBits::Attachment,
+                                            "Swapchain Depth Texture");
+
+  // Allocate new depth texture if null or mismatches in size
+  if (!nativeDepthTexture_ || width != nativeDepthTexture_->getDimensions().width ||
+      height != nativeDepthTexture_->getDimensions().height) {
+    nativeDepthTexture_ = device_.createTexture(depthDesc, outResult);
+  }
+
+  Result::setResult(outResult, Result::Code::Ok);
+  return nativeDepthTexture_;
+}
+
+std::shared_ptr<ITexture> PlatformDevice::createTextureFromNativeDrawable(Result* outResult) {
+  IGL_D3D12_LOG_VERBOSE("PlatformDevice::createTextureFromNativeDrawable() called\n");
+  auto& ctx = device_.getD3D12Context();
+
+  // Get current back buffer from swapchain
+  uint32_t backBufferIndex = ctx.getCurrentBackBufferIndex();
+  ID3D12Resource* backBuffer = ctx.getCurrentBackBuffer();
+
+  IGL_D3D12_LOG_VERBOSE("  backBufferIndex=%u, backBuffer=%p\n", backBufferIndex, backBuffer);
+
+  if (!backBuffer) {
+    IGL_LOG_ERROR("  No back buffer available!\n");
+    Result::setResult(outResult, Result::Code::RuntimeError, "No back buffer available");
+    return nullptr;
+  }
+
+  // Get back buffer description
+  D3D12_RESOURCE_DESC desc = backBuffer->GetDesc();
+  const auto width = static_cast<uint32_t>(desc.Width);
+  const auto height = static_cast<uint32_t>(desc.Height);
+
+  // Determine texture format based on DXGI format
+  // IMPORTANT: Use dxgiFormatToTextureFormat() to get the CORRECT IGL format
+  // from the actual D3D12 resource format. Do NOT hardcode RGBA_SRGB!
+  igl::TextureFormat iglFormat = dxgiFormatToTextureFormat(desc.Format);
+  if (iglFormat == igl::TextureFormat::Invalid) {
+    IGL_LOG_ERROR("  Unsupported DXGI format: %d\n", desc.Format);
+    Result::setResult(outResult, Result::Code::RuntimeError, "Unsupported swapchain DXGI format");
+    return nullptr;
+  }
+
+  // Ensure we have enough cached textures for swapchain images
+  while (nativeDrawableTextures_.size() <= backBufferIndex) {
+    nativeDrawableTextures_.push_back(nullptr);
+  }
+
+  // Allocate new drawable texture if null or mismatches
+  if (!nativeDrawableTextures_[backBufferIndex] ||
+      width != nativeDrawableTextures_[backBufferIndex]->getDimensions().width ||
+      height != nativeDrawableTextures_[backBufferIndex]->getDimensions().height) {
+
+    TextureDesc textureDesc;
+    textureDesc.type = TextureType::TwoD;
+    textureDesc.format = iglFormat;
+    textureDesc.width = width;
+    textureDesc.height = height;
+    textureDesc.depth = 1;
+    textureDesc.numLayers = 1;
+    textureDesc.numSamples = 1;
+    textureDesc.numMipLevels = 1;
+    textureDesc.usage = TextureDesc::TextureUsageBits::Attachment;
+    textureDesc.debugName = "Swapchain Back Buffer";
+
+    nativeDrawableTextures_[backBufferIndex] = Texture::createFromResource(
+        backBuffer,
+        iglFormat,
+        textureDesc,
+        ctx.getDevice(),
+        ctx.getCommandQueue(),
+        D3D12_RESOURCE_STATE_PRESENT);
+
+    if (!nativeDrawableTextures_[backBufferIndex]) {
+      Result::setResult(outResult, Result::Code::RuntimeError,
+                       "Failed to create texture from back buffer");
+      return nullptr;
+    }
+  }
+
+  Result::setResult(outResult, Result::Code::Ok);
+  return nativeDrawableTextures_[backBufferIndex];
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/PlatformDevice.h b/src/igl/d3d12/PlatformDevice.h
new file mode 100644
index 0000000000..b3a3dd6b72
--- /dev/null
+++ b/src/igl/d3d12/PlatformDevice.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/PlatformDevice.h>
+#include <igl/Texture.h>
+
+namespace igl::d3d12 {
+
+class Device;
+
+/// @brief Implements the igl::IPlatformDevice interface for D3D12
+class PlatformDevice : public IPlatformDevice {
+ public:
+  static constexpr igl::PlatformDeviceType kType = igl::PlatformDeviceType::D3D12;
+
+  explicit PlatformDevice(Device& device);
+  ~PlatformDevice() override = default;
+
+  /// Creates a Depth Texture from the D3D12 swapchain
+  /// @param width Width of the depth texture
+  /// @param height Height of the depth texture
+  /// @param outResult optional result
+  /// @return pointer to generated Texture or nullptr
+  std::shared_ptr<ITexture> createTextureFromNativeDepth(uint32_t width,
+                                                         uint32_t height,
+                                                         Result* outResult);
+
+  /// Creates a texture from the D3D12 swapchain back buffer
+  /// @param outResult optional result
+  /// @return pointer to generated Texture or nullptr
+  std::shared_ptr<ITexture> createTextureFromNativeDrawable(Result* outResult);
+
+  /// Clear the cached textures
+  void clear() {
+    nativeDrawableTextures_.clear();
+    nativeDepthTexture_ = nullptr;
+  }
+
+ protected:
+  [[nodiscard]] bool isType(PlatformDeviceType t) const noexcept override {
+    return t == kType;
+  }
+
+ private:
+  Device& device_;
+  std::vector<std::shared_ptr<ITexture>> nativeDrawableTextures_;
+  std::shared_ptr<ITexture> nativeDepthTexture_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/README.md b/src/igl/d3d12/README.md
new file mode 100644
index 0000000000..3c39ab9d95
--- /dev/null
+++ b/src/igl/d3d12/README.md
@@ -0,0 +1,89 @@
+# IGL DirectX 12 Backend
+
+This directory contains the DirectX 12 backend implementation for IGL (Intermediate Graphics Library).
+
+## Status
+
+**Current Phase:** Phase 0 - CMake Setup Complete
+**Next Phase:** Phase 1 - Stub Infrastructure
+
+## Architecture
+
+The D3D12 backend follows the same architectural pattern as the Vulkan backend, leveraging the 87% similarity between the two APIs.
+
+### Core Components (To Be Implemented)
+
+```
+d3d12/
+├── Common.h/cpp                  - Common types, constants, utilities
+├── D3D12Headers.h                - D3D12/DXGI includes wrapper
+├── Device.h/cpp                  - ID3D12Device wrapper
+├── CommandQueue.h/cpp            - ID3D12CommandQueue wrapper
+├── CommandBuffer.h/cpp           - ID3D12GraphicsCommandList wrapper
+├── RenderCommandEncoder.h/cpp    - Render command encoding
+├── ComputeCommandEncoder.h/cpp   - Compute command encoding
+├── D3D12Context.h/cpp            - Core D3D12 state management
+├── RenderPipelineState.h/cpp     - ID3D12PipelineState wrapper
+├── ComputePipelineState.h/cpp    - Compute pipeline state
+├── Buffer.h/cpp                  - ID3D12Resource (buffers)
+├── Texture.h/cpp                 - ID3D12Resource (textures)
+├── Sampler.h/cpp                 - D3D12_SAMPLER_DESC
+├── Framebuffer.h/cpp             - RTV + DSV collection
+├── ShaderModule.h/cpp            - DXIL/DXBC bytecode
+├── ShaderStages.h/cpp            - Shader stage management
+├── DescriptorHeapPool.h/cpp      - Descriptor heap management
+├── RootSignature.h/cpp           - Root signature cache
+├── DXGISwapchain.h/cpp           - DXGI swapchain wrapper
+└── D3D12Helpers.h/cpp            - Utility functions
+```
+
+## Build Instructions
+
+### Prerequisites
+
+- Windows 10 1909+ or Windows 11
+- Visual Studio 2019 or later
+- Windows SDK (10.0.19041.0 or later)
+- DirectX Shader Compiler (DXC) - included with Windows SDK 10.0.20348.0+
+
+### CMake Configuration
+
+```bash
+cmake -DIGL_WITH_D3D12=ON -DIGL_WITH_VULKAN=OFF -DIGL_WITH_OPENGL=OFF ..
+```
+
+Or with other backends enabled:
+
+```bash
+cmake -DIGL_WITH_D3D12=ON ..
+```
+
+### Build
+
+```bash
+cmake --build . --config Release
+```
+
+## Implementation Plan
+
+See [DIRECTX12_MIGRATION_PLAN.md](../../../DIRECTX12_MIGRATION_PLAN.md) for the complete migration plan.
+
+### Progress
+
+- [x] Phase 0: CMake Setup
+  - [x] DirectX 12 Agility SDK headers
+  - [x] CMake configuration
+- [ ] Phase 1: Stub Infrastructure (13 stub classes)
+- [ ] Phase 2: EmptySession (Clear screen)
+- [ ] Phase 3: TinyMeshSession (Triangle rendering)
+- [ ] Phase 4: three-cubes (Full demo)
+
+## References
+
+- [DirectX 12 Programming Guide](https://learn.microsoft.com/en-us/windows/win32/direct3d12/directx-12-programming-guide)
+- [DirectX Shader Compiler](https://github.com/microsoft/DirectXShaderCompiler)
+- [DirectX-Headers](https://github.com/microsoft/DirectX-Headers)
+
+## License
+
+Licensed under the MIT License. See [LICENSE](../../../LICENSE.md) for details.
diff --git a/src/igl/d3d12/RenderCommandEncoder.cpp b/src/igl/d3d12/RenderCommandEncoder.cpp
new file mode 100644
index 0000000000..fef1c19489
--- /dev/null
+++ b/src/igl/d3d12/RenderCommandEncoder.cpp
@@ -0,0 +1,1827 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/RenderCommandEncoder.h>
+#include <cstdlib>
+#include <igl/d3d12/CommandBuffer.h>
+#include <igl/d3d12/Buffer.h>
+#include <igl/d3d12/RenderPipelineState.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/Texture.h>
+#include <igl/RenderPass.h>
+#include <igl/d3d12/HeadlessContext.h>
+#include <igl/d3d12/DescriptorHeapManager.h>
+#include <igl/d3d12/SamplerState.h>
+
+namespace igl::d3d12 {
+
+RenderCommandEncoder::RenderCommandEncoder(CommandBuffer& commandBuffer,
+                                           const std::shared_ptr<IFramebuffer>& framebuffer)
+    : IRenderCommandEncoder(nullptr),
+      commandBuffer_(commandBuffer),
+      commandList_(commandBuffer.getCommandList()),
+      resourcesBinder_(commandBuffer, false /* isCompute */),
+      framebuffer_(framebuffer) {
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::RenderCommandEncoder() - Lightweight initialization\n");
+}
+
+void RenderCommandEncoder::begin(const RenderPassDesc& renderPass) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::begin() - command list is closed or null\n");
+    return;
+  }
+  // Enforce single-call semantics: begin() allocates descriptors and cannot be safely called twice.
+  IGL_DEBUG_ASSERT(!hasBegun_, "begin() called multiple times - this will cause resource leaks");
+  hasBegun_ = true;
+
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::begin() - START\n");
+  auto& context = commandBuffer_.getContext();
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Got context\n");
+
+  // Set descriptor heaps for this command list.
+  // Must use per-frame heaps from D3D12Context, not DescriptorHeapManager.
+  // Per-frame heaps are isolated per frame to prevent descriptor conflicts.
+  DescriptorHeapManager* heapMgr = context.getDescriptorHeapManager();
+
+  // Use active heap from frame context, not the legacy accessor.
+  // This ensures we bind the currently active page, not hardcoded page 0.
+  auto& frameCtx = context.getFrameContexts()[context.getCurrentFrameIndex()];
+  cbvSrvUavHeap_ = frameCtx.activeCbvSrvUavHeap.Get();
+  samplerHeap_ = frameCtx.samplerHeap.Get();
+
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Using active per-frame heap from FrameContext\n");
+
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: CBV/SRV/UAV heap (active) = %p\n", cbvSrvUavHeap_);
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Sampler heap = %p\n", samplerHeap_);
+
+  // Bind active heap (may be page 0 or a later page).
+  ID3D12DescriptorHeap* heaps[] = {cbvSrvUavHeap_, samplerHeap_};
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Setting descriptor heaps...\n");
+  commandList_->SetDescriptorHeaps(2, heaps);
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Descriptor heaps set\n");
+
+  // Create RTV from framebuffer if provided; otherwise fallback to swapchain RTV
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Setting up RTV...\n");
+  D3D12_CPU_DESCRIPTOR_HANDLE rtv = {};
+  std::vector<D3D12_CPU_DESCRIPTOR_HANDLE> rtvs;
+  rtvIndices_.clear();
+  bool usedOffscreenRTV = false;
+  // Note: heapMgr already retrieved above for setting descriptor heaps
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: DescriptorHeapManager = %p\n", heapMgr);
+
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Checking framebuffer_=%p\n", framebuffer_.get());
+  // Only create offscreen RTV if we have DescriptorHeapManager AND it's not a swapchain texture
+  // Swapchain textures should use context.getCurrentRTV() directly
+  if (framebuffer_ && framebuffer_->getColorAttachment(0) && heapMgr) {
+    IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Has framebuffer with color attachment AND DescriptorHeapManager\n");
+    ID3D12Device* device = context.getDevice();
+    if (device) {
+      // Create RTVs for each color attachment
+      const size_t count = std::min<size_t>(framebuffer_->getColorAttachmentIndices().size(), D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT);
+      IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: MRT count = %zu (indices.size=%zu)\n", count, framebuffer_->getColorAttachmentIndices().size());
+      for (size_t i = 0; i < count; ++i) {
+        auto tex = std::static_pointer_cast<Texture>(framebuffer_->getColorAttachment(i));
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: MRT loop i=%zu, tex=%p, resource=%p\n", i, tex.get(), tex ? tex->getResource() : nullptr);
+        if (!tex || !tex->getResource()) {
+          IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: MRT loop i=%zu SKIPPED (null tex or resource)\n", i);
+          continue;
+        }
+        const bool hasAttachmentDesc = (i < renderPass.colorAttachments.size());
+        // CRITICAL: Extract values before using in expressions to avoid MSVC debug iterator checks
+        const uint32_t mipLevel = hasAttachmentDesc ? renderPass.colorAttachments[i].mipLevel : 0;
+        const uint32_t attachmentLayer = hasAttachmentDesc ? renderPass.colorAttachments[i].layer : 0;
+        const uint32_t attachmentFace = hasAttachmentDesc ? renderPass.colorAttachments[i].face : 0;
+        // Allocate RTV
+        uint32_t rtvIdx = heapMgr->allocateRTV();
+        if (rtvIdx == UINT32_MAX) {
+          IGL_LOG_ERROR("RenderCommandEncoder: Failed to allocate RTV descriptor (heap exhausted)\n");
+          continue;
+        }
+  // Check return value from getHandle.
+        D3D12_CPU_DESCRIPTOR_HANDLE rtvHandle;
+        if (!heapMgr->getRTVHandle(rtvIdx, &rtvHandle)) {
+          IGL_LOG_ERROR("RenderCommandEncoder: Failed to get RTV handle for index %u\n", rtvIdx);
+          heapMgr->freeRTV(rtvIdx);
+          continue;
+        }
+        rtvIndices_.push_back(rtvIdx);
+        // Create RTV view - use the resource's actual format to avoid SRGB/UNORM mismatches
+        D3D12_RESOURCE_DESC resourceDesc = tex->getResource()->GetDesc();
+        D3D12_RENDER_TARGET_VIEW_DESC rdesc = {};
+        rdesc.Format = resourceDesc.Format;  // Use actual D3D12 resource format, not IGL format
+
+        // Determine if this is a texture array or texture view.
+        // Cube textures are stored as 2D array resources (6 slices per cube).
+        const bool isView = tex->isView();
+        const bool isCubeTexture = (tex->getType() == TextureType::Cube);
+        const uint32_t arraySliceOffset = isView ? tex->getArraySliceOffset() : 0;
+        const uint32_t totalArraySlices =
+            isView ? tex->getNumArraySlicesInView() : resourceDesc.DepthOrArraySize;
+        const bool isArrayTexture = !isCubeTexture &&
+                                     ((isView && tex->getNumArraySlicesInView() > 0) ||
+                                      (!isView && resourceDesc.DepthOrArraySize > 1));
+        uint32_t targetArraySlice = attachmentLayer;
+        if (isCubeTexture) {
+          // Cube textures map faces onto 2D array slices. See Texture Subresources (D3D12).
+          const uint32_t clampedFace = std::min<uint32_t>(attachmentFace, 5u);
+          const uint32_t cubesInView = (totalArraySlices + 5u) / 6u;
+          const uint32_t clampedCubeIndex =
+              std::min<uint32_t>(attachmentLayer, (cubesInView == 0u) ? 0u : (cubesInView - 1u));
+          const uint32_t baseSlice = arraySliceOffset + clampedCubeIndex * 6u;
+          const uint32_t maxSlice =
+              (totalArraySlices > 0u) ? (arraySliceOffset + totalArraySlices - 1u)
+                                      : arraySliceOffset;
+          targetArraySlice = std::min<uint32_t>(baseSlice + clampedFace, maxSlice);
+        }
+
+        // Set view dimension based on sample count (MSAA support) and array type
+        if (resourceDesc.SampleDesc.Count > 1) {
+          // MSAA texture
+          if (isCubeTexture) {
+            rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY;
+            rdesc.Texture2DMSArray.FirstArraySlice = targetArraySlice;
+            rdesc.Texture2DMSArray.ArraySize = 1;
+            IGL_D3D12_LOG_VERBOSE(
+                "RenderCommandEncoder: Creating MSAA cube RTV with %u samples, face %u, cube index %u (array slice %u)\n",
+                resourceDesc.SampleDesc.Count,
+                attachmentFace,
+                attachmentLayer,
+                rdesc.Texture2DMSArray.FirstArraySlice);
+          } else if (isArrayTexture) {
+            // MSAA texture array - use TEXTURE2DMSARRAY view dimension
+            rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMSARRAY;
+            if (isView) {
+              rdesc.Texture2DMSArray.FirstArraySlice = tex->getArraySliceOffset();
+              rdesc.Texture2DMSArray.ArraySize = tex->getNumArraySlicesInView();
+            } else {
+              rdesc.Texture2DMSArray.FirstArraySlice = attachmentLayer;
+              rdesc.Texture2DMSArray.ArraySize = 1;  // Render to single layer
+            }
+            IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Creating MSAA array RTV with %u samples, layer %u\n",
+                         resourceDesc.SampleDesc.Count, rdesc.Texture2DMSArray.FirstArraySlice);
+          } else {
+            // MSAA non-array texture - use TEXTURE2DMS view dimension
+            rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DMS;
+            IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Creating MSAA RTV with %u samples\n", resourceDesc.SampleDesc.Count);
+          }
+        } else {
+          // Non-MSAA texture
+          if (isCubeTexture) {
+            rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
+            rdesc.Texture2DArray.MipSlice = mipLevel;
+            rdesc.Texture2DArray.PlaneSlice = 0;
+            rdesc.Texture2DArray.FirstArraySlice = targetArraySlice;
+            rdesc.Texture2DArray.ArraySize = 1;
+            IGL_D3D12_LOG_VERBOSE(
+                "RenderCommandEncoder: Creating cube RTV, mip %u, face %u, cube index %u (array slice %u)\n",
+                mipLevel,
+                attachmentFace,
+                attachmentLayer,
+                rdesc.Texture2DArray.FirstArraySlice);
+          } else if (isArrayTexture) {
+            // Texture array - use TEXTURE2DARRAY view dimension
+            rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2DARRAY;
+            // CRITICAL: Extract value before assignment to avoid MSVC debug iterator bounds check
+            const uint32_t mipSliceArray = (i < renderPass.colorAttachments.size()) ? renderPass.colorAttachments[i].mipLevel : 0;
+            rdesc.Texture2DArray.MipSlice = mipSliceArray;
+            rdesc.Texture2DArray.PlaneSlice = 0;
+            if (isView) {
+              rdesc.Texture2DArray.FirstArraySlice = tex->getArraySliceOffset();
+              rdesc.Texture2DArray.ArraySize = tex->getNumArraySlicesInView();
+            } else {
+              rdesc.Texture2DArray.FirstArraySlice = attachmentLayer;
+              rdesc.Texture2DArray.ArraySize = 1;  // Render to single layer
+            }
+            IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Creating array RTV, mip %u, layer %u\n",
+                         rdesc.Texture2DArray.MipSlice, rdesc.Texture2DArray.FirstArraySlice);
+          } else {
+            // Non-array texture - use standard TEXTURE2D view dimension
+            rdesc.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
+            // CRITICAL: Extract value before assignment to avoid MSVC debug iterator bounds check
+            const uint32_t mipSlice2D = (i < renderPass.colorAttachments.size()) ? renderPass.colorAttachments[i].mipLevel : 0;
+            rdesc.Texture2D.MipSlice = mipSlice2D;
+            rdesc.Texture2D.PlaneSlice = 0;
+            IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Creating RTV, mip %u\n", rdesc.Texture2D.MipSlice);
+          }
+        }
+        // Pre-creation validation.
+        IGL_DEBUG_ASSERT(device != nullptr, "Device is null before CreateRenderTargetView");
+        IGL_DEBUG_ASSERT(tex->getResource() != nullptr, "Texture resource is null before CreateRenderTargetView");
+        IGL_DEBUG_ASSERT(rtvHandle.ptr != 0, "RTV descriptor handle is invalid");
+
+        device->CreateRenderTargetView(tex->getResource(), &rdesc, rtvHandle);
+
+        // Transition to RENDER_TARGET
+        // IMPORTANT: For multi-frame rendering, offscreen targets may have been transitioned to
+        // PIXEL_SHADER_RESOURCE in the previous frame's endEncoding(). We MUST transition them
+        // back to RENDER_TARGET at the start of each render pass.
+        // The transitionTo() function checks current state and only transitions if needed.
+        const uint32_t transitionSlice =
+            isCubeTexture ? targetArraySlice : attachmentLayer;
+        tex->transitionTo(
+            commandList_, D3D12_RESOURCE_STATE_RENDER_TARGET, mipLevel, transitionSlice);
+
+        // Clear if requested
+        if (hasAttachmentDesc && renderPass.colorAttachments[i].loadAction == LoadAction::Clear) {
+          const auto& clearColor = renderPass.colorAttachments[i].clearColor;
+          const float color[] = {clearColor.r, clearColor.g, clearColor.b, clearColor.a};
+          IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Clearing MRT attachment %zu with color (%.2f, %.2f, %.2f, %.2f)\n",
+                       i, color[0], color[1], color[2], color[3]);
+          commandList_->ClearRenderTargetView(rtvHandle, color, 0, nullptr);
+        } else {
+          // CRITICAL: Must extract value completely outside ternary to avoid MSVC debug iterator check
+          int loadActionDbg = -1;
+          if (i < renderPass.colorAttachments.size()) {
+            loadActionDbg = (int)renderPass.colorAttachments[i].loadAction;
+          }
+          IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: NOT clearing MRT attachment %zu (loadAction=%d, hasAttachment=%d)\n",
+                       i, loadActionDbg, i < renderPass.colorAttachments.size());
+        }
+        rtvs.push_back(rtvHandle);
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: MRT Created RTV #%zu, total RTVs now=%zu\n", i, rtvs.size());
+      }
+      IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: MRT Total RTVs created: %zu\n", rtvs.size());
+      if (!rtvs.empty()) {
+        rtv = rtvs[0];
+        usedOffscreenRTV = true;
+      }
+    }
+  }
+  if (!usedOffscreenRTV) {
+    IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Using swapchain back buffer\n");
+    auto* backBuffer = context.getCurrentBackBuffer();
+    IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Got back buffer=%p\n", backBuffer);
+    if (!backBuffer) {
+      IGL_LOG_ERROR("RenderCommandEncoder: No back buffer available\n");
+      return;
+    }
+    IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Transitioning back buffer to RENDER_TARGET\n");
+    D3D12_RESOURCE_BARRIER barrier = {};
+    barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+    barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+    barrier.Transition.pResource = backBuffer;
+    barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_PRESENT;
+    barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_RENDER_TARGET;
+    barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+    commandList_->ResourceBarrier(1, &barrier);
+    IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Resource barrier executed\n");
+
+    if (!renderPass.colorAttachments.empty() &&
+        renderPass.colorAttachments[0].loadAction == LoadAction::Clear) {
+      IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Clearing render target\n");
+      const auto& cc = renderPass.colorAttachments[0].clearColor;
+      const float col[] = {cc.r, cc.g, cc.b, cc.a};
+      D3D12_CPU_DESCRIPTOR_HANDLE swapRtv = context.getCurrentRTV();
+      commandList_->ClearRenderTargetView(swapRtv, col, 0, nullptr);
+      IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Clear complete\n");
+    }
+    rtv = context.getCurrentRTV();
+    IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Got RTV handle\n");
+  }
+
+  // Create/Bind depth-stencil view if we have a framebuffer with a depth attachment
+  const bool hasDepth = (framebuffer_ && framebuffer_->getDepthAttachment());
+  if (hasDepth) {
+    auto depthTex = std::static_pointer_cast<igl::d3d12::Texture>(framebuffer_->getDepthAttachment());
+    ID3D12Device* device = context.getDevice();
+    if (device && depthTex && depthTex->getResource()) {
+      if (heapMgr) {
+        dsvIndex_ = heapMgr->allocateDSV();
+  // Check return value from getHandle.
+        if (!heapMgr->getDSVHandle(dsvIndex_, &dsvHandle_)) {
+          IGL_LOG_ERROR("RenderCommandEncoder: Failed to get DSV handle for index %u\n", dsvIndex_);
+          heapMgr->freeDSV(dsvIndex_);
+          dsvIndex_ = UINT32_MAX;
+          return;
+        }
+      } else {
+        // Fallback: transient heap
+        igl::d3d12::ComPtr<ID3D12DescriptorHeap> tmpHeap;
+        D3D12_DESCRIPTOR_HEAP_DESC dsvHeapDesc = {};
+        dsvHeapDesc.NumDescriptors = 1;
+        dsvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_DSV;
+        dsvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_NONE;
+        device->CreateDescriptorHeap(&dsvHeapDesc, IID_PPV_ARGS(tmpHeap.GetAddressOf()));
+        dsvHandle_ = tmpHeap->GetCPUDescriptorHandleForHeapStart();
+      }
+
+      // Create DSV description
+      D3D12_DEPTH_STENCIL_VIEW_DESC dsvDesc = {};
+      dsvDesc.Format = textureFormatToDXGIFormat(depthTex->getFormat());
+      dsvDesc.Flags = D3D12_DSV_FLAG_NONE;
+
+      // Set view dimension based on sample count (MSAA support)
+      D3D12_RESOURCE_DESC depthResourceDesc = depthTex->getResource()->GetDesc();
+      if (depthResourceDesc.SampleDesc.Count > 1) {
+        // MSAA depth texture - use TEXTURE2DMS view dimension
+        dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2DMS;
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Creating MSAA DSV with %u samples\n", depthResourceDesc.SampleDesc.Count);
+      } else {
+        // Non-MSAA depth texture - use standard TEXTURE2D view dimension
+        dsvDesc.ViewDimension = D3D12_DSV_DIMENSION_TEXTURE2D;
+        dsvDesc.Texture2D.MipSlice = renderPass.depthAttachment.mipLevel;
+      }
+
+      // Transition the entire depth resource to DEPTH_WRITE before clearing.
+      // Some render paths (e.g. mipmapped depth) may have touched multiple
+      // subresources; using transitionAll ensures the clear sees a valid state
+      // for every subresource referenced by this DSV.
+      depthTex->transitionAll(commandList_, D3D12_RESOURCE_STATE_DEPTH_WRITE);
+
+      // Pre-creation validation.
+      IGL_DEBUG_ASSERT(device != nullptr, "Device is null before CreateDepthStencilView");
+      IGL_DEBUG_ASSERT(depthTex->getResource() != nullptr, "Depth texture resource is null");
+      IGL_DEBUG_ASSERT(dsvHandle_.ptr != 0, "DSV descriptor handle is invalid");
+
+      device->CreateDepthStencilView(depthTex->getResource(), &dsvDesc, dsvHandle_);
+
+      // Clear depth and/or stencil if requested
+      const bool clearDepth = (renderPass.depthAttachment.loadAction == LoadAction::Clear);
+      const bool clearStencil = (renderPass.stencilAttachment.loadAction == LoadAction::Clear);
+
+      if (clearDepth || clearStencil) {
+        D3D12_CLEAR_FLAGS clearFlags = static_cast<D3D12_CLEAR_FLAGS>(0);
+        if (clearDepth) {
+          clearFlags = static_cast<D3D12_CLEAR_FLAGS>(clearFlags | D3D12_CLEAR_FLAG_DEPTH);
+        }
+        if (clearStencil) {
+          clearFlags = static_cast<D3D12_CLEAR_FLAGS>(clearFlags | D3D12_CLEAR_FLAG_STENCIL);
+        }
+
+        const float depthClearValue = renderPass.depthAttachment.clearDepth;
+        const UINT8 stencilClearValue = static_cast<UINT8>(renderPass.stencilAttachment.clearStencil);
+
+        commandList_->ClearDepthStencilView(dsvHandle_, clearFlags, depthClearValue, stencilClearValue, 0, nullptr);
+
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Cleared depth-stencil (depth=%d, stencil=%d, depthVal=%.2f, stencilVal=%u)\n",
+                     clearDepth, clearStencil, depthClearValue, stencilClearValue);
+      }
+
+      // Bind RTV + DSV (or DSV-only for depth-only rendering)
+      if (!rtvs.empty()) {
+        // Multi-render target or offscreen rendering with color+depth
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with %zu RTVs + DSV\n", rtvs.size());
+        commandList_->OMSetRenderTargets(static_cast<UINT>(rtvs.size()), rtvs.data(), FALSE, &dsvHandle_);
+      } else if (usedOffscreenRTV) {
+        // Single offscreen render target with depth
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with 1 RTV + DSV\n");
+        commandList_->OMSetRenderTargets(1, &rtv, FALSE, &dsvHandle_);
+      } else if (!framebuffer_->getColorAttachment(0)) {
+        // Depth-only rendering (no color attachments) - shadow mapping scenario
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Depth-only rendering - OMSetRenderTargets with 0 RTVs + DSV\n");
+        commandList_->OMSetRenderTargets(0, nullptr, FALSE, &dsvHandle_);
+      } else {
+        // Swapchain backbuffer with depth
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with swapchain RTV + DSV\n");
+        commandList_->OMSetRenderTargets(1, &rtv, FALSE, &dsvHandle_);
+      }
+    } else {
+      IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Binding RTV without DSV (no resource)\n");
+      if (!rtvs.empty()) {
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with %zu RTVs, no DSV\n", rtvs.size());
+        commandList_->OMSetRenderTargets(static_cast<UINT>(rtvs.size()), rtvs.data(), FALSE, nullptr);
+      } else {
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with 1 RTV, no DSV\n");
+        commandList_->OMSetRenderTargets(1, &rtv, FALSE, nullptr);
+      }
+    }
+  } else {
+    IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Binding RTV without DSV (no hasDepth)\n");
+    if (!rtvs.empty()) {
+      IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with %zu RTVs, no DSV (no hasDepth)\n", rtvs.size());
+      commandList_->OMSetRenderTargets(static_cast<UINT>(rtvs.size()), rtvs.data(), FALSE, nullptr);
+    } else {
+      IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: OMSetRenderTargets with 1 RTV, no DSV (no hasDepth)\n");
+      commandList_->OMSetRenderTargets(1, &rtv, FALSE, nullptr);
+    }
+  }
+
+  // Set a default full-screen viewport/scissor if caller forgets. Prefer framebuffer attachments.
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Setting default viewport...\n");
+  if (framebuffer_) {
+    // Prefer color attachment if present; otherwise fall back to depth attachment.
+    auto colorTex = std::static_pointer_cast<Texture>(framebuffer_->getColorAttachment(0));
+    auto depthTex = std::static_pointer_cast<Texture>(framebuffer_->getDepthAttachment());
+
+    Dimensions dims{};
+    if (colorTex && colorTex->getResource()) {
+      dims = colorTex->getDimensions();
+      IGL_D3D12_LOG_VERBOSE(
+          "RenderCommandEncoder: Using framebuffer color attachment dimensions: %ux%u\n",
+          dims.width,
+          dims.height);
+    } else if (depthTex && depthTex->getResource()) {
+      dims = depthTex->getDimensions();
+      IGL_D3D12_LOG_VERBOSE(
+          "RenderCommandEncoder: Using framebuffer depth attachment dimensions: %ux%u\n",
+          dims.width,
+          dims.height);
+    }
+
+    if (dims.width > 0 && dims.height > 0) {
+      D3D12_VIEWPORT vp{};
+      vp.TopLeftX = 0;
+      vp.TopLeftY = 0;
+      vp.Width = static_cast<FLOAT>(dims.width);
+      vp.Height = static_cast<FLOAT>(dims.height);
+      vp.MinDepth = 0.0f;
+      vp.MaxDepth = 1.0f;
+      commandList_->RSSetViewports(1, &vp);
+
+      D3D12_RECT sc{};
+      sc.left = 0;
+      sc.top = 0;
+      sc.right = static_cast<LONG>(dims.width);
+      sc.bottom = static_cast<LONG>(dims.height);
+      commandList_->RSSetScissorRects(1, &sc);
+
+      IGL_D3D12_LOG_VERBOSE(
+          "RenderCommandEncoder: Set default viewport/scissor to %ux%u\n",
+          dims.width,
+          dims.height);
+    } else {
+      IGL_LOG_ERROR(
+          "RenderCommandEncoder: Framebuffer has no valid color or depth attachment dimensions; "
+          "falling back to back buffer viewport.\n");
+      auto* backBufferRes = context.getCurrentBackBuffer();
+      if (backBufferRes) {
+        D3D12_RESOURCE_DESC bbDesc = backBufferRes->GetDesc();
+        D3D12_VIEWPORT vp{};
+        vp.TopLeftX = 0;
+        vp.TopLeftY = 0;
+        vp.Width = static_cast<FLOAT>(bbDesc.Width);
+        vp.Height = static_cast<FLOAT>(bbDesc.Height);
+        vp.MinDepth = 0.0f;
+        vp.MaxDepth = 1.0f;
+        commandList_->RSSetViewports(1, &vp);
+
+        D3D12_RECT scissor{};
+        scissor.left = 0;
+        scissor.top = 0;
+        scissor.right = static_cast<LONG>(bbDesc.Width);
+        scissor.bottom = static_cast<LONG>(bbDesc.Height);
+        commandList_->RSSetScissorRects(1, &scissor);
+
+        IGL_D3D12_LOG_VERBOSE(
+            "RenderCommandEncoder: Fallback viewport/scissor to back buffer %llux%u\n",
+            bbDesc.Width,
+            bbDesc.Height);
+      } else {
+        IGL_LOG_ERROR("RenderCommandEncoder: No back buffer available!\n");
+      }
+    }
+  } else {
+    IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Using back buffer\n");
+    auto* backBufferRes = context.getCurrentBackBuffer();
+    if (backBufferRes) {
+      D3D12_RESOURCE_DESC bbDesc = backBufferRes->GetDesc();
+      IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Back buffer dimensions: %llux%u\n", bbDesc.Width, bbDesc.Height);
+      D3D12_VIEWPORT vp = {}; vp.TopLeftX=0; vp.TopLeftY=0; vp.Width=(float)bbDesc.Width; vp.Height=(float)bbDesc.Height; vp.MinDepth=0; vp.MaxDepth=1;
+      commandList_->RSSetViewports(1, &vp);
+      D3D12_RECT scissor = {}; scissor.left=0; scissor.top=0; scissor.right=(LONG)bbDesc.Width; scissor.bottom=(LONG)bbDesc.Height; commandList_->RSSetScissorRects(1, &scissor);
+      IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Set default viewport/scissor to back buffer %llux%u\n", bbDesc.Width, bbDesc.Height);
+    } else {
+      IGL_LOG_ERROR("RenderCommandEncoder: No back buffer available!\n");
+    }
+  }
+
+  // Capture actual framebuffer formats for dynamic PSO selection (Vulkan-style pattern)
+  // This enables PSO variants to be selected at draw time based on framebuffer formats
+  dynamicState_ = D3D12RenderPipelineDynamicState();  // Reset to UNKNOWN
+
+  // Capture RTV formats from actual framebuffer resources
+  // IMPORTANT: Use Texture::getFormat() not resource format - texture views may have different formats
+  if (framebuffer_) {
+    const size_t numColorAttachments = std::min<size_t>(
+        framebuffer_->getColorAttachmentIndices().size(),
+        D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT);
+    for (size_t i = 0; i < numColorAttachments; ++i) {
+      auto colorTex = std::static_pointer_cast<igl::d3d12::Texture>(
+          framebuffer_->getColorAttachment(i));
+      if (colorTex) {
+        // Use getFormat() which returns the view format, not the resource format
+        dynamicState_.rtvFormats[i] = textureFormatToDXGIFormat(colorTex->getFormat());
+      }
+    }
+
+    // Capture DSV format
+    auto depthTex = std::static_pointer_cast<igl::d3d12::Texture>(
+        framebuffer_->getDepthAttachment());
+    if (depthTex) {
+      // Use getFormat() which returns the view format, not the resource format
+      dynamicState_.dsvFormat = textureFormatToDXGIFormat(depthTex->getFormat());
+    }
+  } else {
+    // Fallback: swapchain back buffer
+    auto* backBuffer = context.getCurrentBackBuffer();
+    if (backBuffer) {
+      D3D12_RESOURCE_DESC bbDesc = backBuffer->GetDesc();
+      dynamicState_.rtvFormats[0] = bbDesc.Format;
+    }
+  }
+
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::begin() - Captured framebuffer formats: RTV[0]=%d, DSV=%d\n",
+                        static_cast<int>(dynamicState_.rtvFormats[0]),
+                        static_cast<int>(dynamicState_.dsvFormat));
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::begin() - Complete!\n");
+}
+
+void RenderCommandEncoder::endEncoding() {
+  auto& context2 = commandBuffer_.getContext();
+
+  // ========== MSAA RESOLVE OPERATION ==========
+  // Resolve MSAA textures to non-MSAA textures before transitioning resources
+  // This must happen AFTER rendering but BEFORE the final state transitions
+  if (framebuffer_) {
+    // Resolve color attachments
+    const auto indices = framebuffer_->getColorAttachmentIndices();
+    for (size_t i : indices) {
+      auto msaaAttachment = std::static_pointer_cast<Texture>(framebuffer_->getColorAttachment(i));
+      auto resolveAttachment = std::static_pointer_cast<Texture>(framebuffer_->getResolveColorAttachment(i));
+
+      // Check if both MSAA source and resolve target exist
+      if (msaaAttachment && resolveAttachment &&
+          msaaAttachment->getResource() && resolveAttachment->getResource()) {
+
+        // Verify MSAA source has samples > 1 and resolve target has samples == 1
+        D3D12_RESOURCE_DESC msaaDesc = msaaAttachment->getResource()->GetDesc();
+        D3D12_RESOURCE_DESC resolveDesc = resolveAttachment->getResource()->GetDesc();
+
+        if (msaaDesc.SampleDesc.Count > 1 && resolveDesc.SampleDesc.Count == 1) {
+          IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::endEncoding - Resolving MSAA color attachment %zu (%u samples -> 1 sample)\n",
+                       i, msaaDesc.SampleDesc.Count);
+
+          // Transition MSAA texture to RESOLVE_SOURCE state
+          msaaAttachment->transitionAll(commandList_, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
+
+          // Transition resolve texture to RESOLVE_DEST state
+          resolveAttachment->transitionAll(commandList_, D3D12_RESOURCE_STATE_RESOLVE_DEST);
+
+          // Perform resolve operation: converts multi-sample texture to single-sample
+          // This averages all samples in the MSAA texture and writes to the resolve texture
+          commandList_->ResolveSubresource(
+              resolveAttachment->getResource(),  // pDstResource (non-MSAA)
+              0,                                  // DstSubresource (mip 0, layer 0)
+              msaaAttachment->getResource(),      // pSrcResource (MSAA)
+              0,                                  // SrcSubresource (mip 0, layer 0)
+              msaaDesc.Format                     // Format (must be compatible)
+          );
+
+          IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::endEncoding - MSAA color resolve completed for attachment %zu\n", i);
+
+          // Transition resolve texture to PIXEL_SHADER_RESOURCE for subsequent use
+          resolveAttachment->transitionAll(commandList_, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
+        }
+      }
+    }
+
+    // Resolve depth attachment if present
+    auto msaaDepth = std::static_pointer_cast<Texture>(framebuffer_->getDepthAttachment());
+    auto resolveDepth = std::static_pointer_cast<Texture>(framebuffer_->getResolveDepthAttachment());
+
+    if (msaaDepth && resolveDepth &&
+        msaaDepth->getResource() && resolveDepth->getResource()) {
+
+      D3D12_RESOURCE_DESC msaaDesc = msaaDepth->getResource()->GetDesc();
+      D3D12_RESOURCE_DESC resolveDesc = resolveDepth->getResource()->GetDesc();
+
+      if (msaaDesc.SampleDesc.Count > 1 && resolveDesc.SampleDesc.Count == 1) {
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::endEncoding - Resolving MSAA depth attachment (%u samples -> 1 sample)\n",
+                     msaaDesc.SampleDesc.Count);
+
+        // Transition depth textures to appropriate resolve states
+        msaaDepth->transitionAll(commandList_, D3D12_RESOURCE_STATE_RESOLVE_SOURCE);
+        resolveDepth->transitionAll(commandList_, D3D12_RESOURCE_STATE_RESOLVE_DEST);
+
+        // Resolve depth buffer
+        commandList_->ResolveSubresource(
+            resolveDepth->getResource(),
+            0,
+            msaaDepth->getResource(),
+            0,
+            msaaDesc.Format
+        );
+
+        IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::endEncoding - MSAA depth resolve completed\n");
+
+        // Transition resolved depth to shader resource for sampling
+        resolveDepth->transitionAll(commandList_, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
+      }
+    }
+  }
+  // ========== END MSAA RESOLVE OPERATION ==========
+
+  // For offscreen framebuffers (MRT targets), transition all attachments to PIXEL_SHADER_RESOURCE
+  // so they can be sampled in subsequent passes
+  if (framebuffer_ && framebuffer_->getColorAttachment(0)) {
+    auto swapColor = std::static_pointer_cast<Texture>(framebuffer_->getColorAttachment(0));
+
+    // Check if this is the swapchain backbuffer
+    const bool isSwapchainTarget = (swapColor && swapColor->getResource() == context2.getCurrentBackBuffer());
+
+    if (isSwapchainTarget) {
+      // Swapchain framebuffer: transition to PRESENT
+      swapColor->transitionAll(commandList_, D3D12_RESOURCE_STATE_PRESENT);
+    } else {
+      // Offscreen framebuffer (e.g., MRT targets): transition all color attachments to PIXEL_SHADER_RESOURCE
+      // This allows the render targets to be sampled in subsequent rendering passes (multi-frame support)
+      const auto indices = framebuffer_->getColorAttachmentIndices();
+      for (size_t i : indices) {
+        auto attachment = std::static_pointer_cast<Texture>(framebuffer_->getColorAttachment(i));
+        if (attachment && attachment->getResource()) {
+          attachment->transitionAll(commandList_, D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
+        }
+      }
+    }
+  } else {
+    // No framebuffer provided - using swapchain directly
+    auto* backBuffer = context2.getCurrentBackBuffer();
+    if (backBuffer) {
+      D3D12_RESOURCE_BARRIER barrier = {};
+      barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+      barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+      barrier.Transition.pResource = backBuffer;
+      barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_RENDER_TARGET;
+      barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_PRESENT;
+      barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+      commandList_->ResourceBarrier(1, &barrier);
+    }
+  }
+
+  // G-001: Flush any remaining barriers before ending encoding
+  flushBarriers();
+
+  // Return RTV/DSV indices to the descriptor heap manager if used
+  if (auto* mgr = context2.getDescriptorHeapManager()) {
+    if (!rtvIndices_.empty()) {
+      for (auto idx : rtvIndices_) {
+        mgr->freeRTV(idx);
+      }
+      rtvIndices_.clear();
+    }
+    if (dsvIndex_ != UINT32_MAX) {
+      mgr->freeDSV(dsvIndex_);
+      dsvIndex_ = UINT32_MAX;
+    }
+  }
+}
+
+void RenderCommandEncoder::bindViewport(const Viewport& viewport) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::bindViewport called on closed command list\n");
+    return;
+  }
+  IGL_D3D12_LOG_VERBOSE("bindViewport called: x=%.1f, y=%.1f, w=%.1f, h=%.1f\n",
+               viewport.x, viewport.y, viewport.width, viewport.height);
+  D3D12_VIEWPORT vp = {};
+  vp.TopLeftX = viewport.x;
+  vp.TopLeftY = viewport.y;
+  vp.Width = viewport.width;
+  vp.Height = viewport.height;
+  vp.MinDepth = viewport.minDepth;
+  vp.MaxDepth = viewport.maxDepth;
+  commandList_->RSSetViewports(1, &vp);
+}
+
+void RenderCommandEncoder::bindScissorRect(const ScissorRect& rect) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::bindScissorRect called on closed command list\n");
+    return;
+  }
+  D3D12_RECT scissor = {};
+  scissor.left = static_cast<LONG>(rect.x);
+  scissor.top = static_cast<LONG>(rect.y);
+  scissor.right = static_cast<LONG>(rect.x + rect.width);
+  scissor.bottom = static_cast<LONG>(rect.y + rect.height);
+  commandList_->RSSetScissorRects(1, &scissor);
+}
+
+void RenderCommandEncoder::bindRenderPipelineState(
+    const std::shared_ptr<IRenderPipelineState>& pipelineState) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::bindRenderPipelineState called on closed command list\n");
+    return;
+  }
+  if (!pipelineState) {
+    IGL_LOG_ERROR("bindRenderPipelineState: pipelineState is null!\n");
+    return;
+  }
+
+  auto* d3dPipelineState = static_cast<const RenderPipelineState*>(pipelineState.get());
+
+  // Cache pipeline state for dynamic PSO variant selection at draw time
+  currentRenderPipelineState_ = d3dPipelineState;
+
+  // Get PSO variant matching actual framebuffer formats (Vulkan-style dynamic selection)
+  auto* pso = d3dPipelineState->getPipelineState(dynamicState_, commandBuffer_.getDevice());
+  auto* rootSig = d3dPipelineState->getRootSignature();
+
+  if (!pso) {
+    IGL_LOG_ERROR("bindRenderPipelineState: PSO is null!\n");
+    return;
+  }
+  if (!rootSig) {
+    IGL_LOG_ERROR("bindRenderPipelineState: Root signature is null!\n");
+    return;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("bindRenderPipelineState: PSO=%p, RootSig=%p\n", pso, rootSig);
+
+  commandList_->SetPipelineState(pso);
+  commandList_->SetGraphicsRootSignature(rootSig);
+
+  // Set primitive topology from the pipeline state
+  D3D_PRIMITIVE_TOPOLOGY topology = d3dPipelineState->getPrimitiveTopology();
+  IGL_D3D12_LOG_VERBOSE("bindRenderPipelineState: Setting topology=%d\n", (int)topology);
+  commandList_->IASetPrimitiveTopology(topology);
+
+  // Cache vertex stride from pipeline (used when binding vertex buffers)
+  currentVertexStride_ = d3dPipelineState->getVertexStride();
+  // Fill per-slot strides
+  for (size_t s = 0; s < IGL_BUFFER_BINDINGS_MAX; ++s) {
+    vertexStrides_[s] = d3dPipelineState->getVertexStride(s);
+  }
+}
+
+void RenderCommandEncoder::bindDepthStencilState(
+    const std::shared_ptr<IDepthStencilState>& /*depthStencilState*/) {}
+
+
+void RenderCommandEncoder::bindVertexBuffer(uint32_t index,
+                                            IBuffer& buffer,
+                                            size_t bufferOffset) {
+  IGL_D3D12_LOG_VERBOSE("bindVertexBuffer called: index=%u\n", index);
+  if (index >= IGL_BUFFER_BINDINGS_MAX) {
+    IGL_LOG_ERROR("bindVertexBuffer: index %u exceeds max %u\n", index, IGL_BUFFER_BINDINGS_MAX);
+    return;
+  }
+
+  auto* d3dBuffer = static_cast<Buffer*>(&buffer);
+  cachedVertexBuffers_[index].bufferLocation = d3dBuffer->gpuAddress(bufferOffset);
+  cachedVertexBuffers_[index].sizeInBytes = static_cast<UINT>(d3dBuffer->getSizeInBytes() - bufferOffset);
+  cachedVertexBuffers_[index].bound = true;
+}
+
+void RenderCommandEncoder::bindIndexBuffer(IBuffer& buffer,
+                                           IndexFormat format,
+                                           size_t bufferOffset) {
+  IGL_D3D12_LOG_VERBOSE("bindIndexBuffer called\n");
+  auto* d3dBuffer = static_cast<Buffer*>(&buffer);
+  cachedIndexBuffer_.bufferLocation = d3dBuffer->gpuAddress(bufferOffset);
+  cachedIndexBuffer_.sizeInBytes = static_cast<UINT>(d3dBuffer->getSizeInBytes() - bufferOffset);
+  // D3D12 only supports 16-bit and 32-bit index formats (not 8-bit)
+  cachedIndexBuffer_.format = (format == IndexFormat::UInt16) ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R32_UINT;
+  cachedIndexBuffer_.bound = true;
+}
+
+void RenderCommandEncoder::bindBytes(size_t /*index*/,
+                                     uint8_t /*target*/,
+                                     const void* /*data*/,
+                                     size_t /*length*/) {
+  // D3D12 backend does not support bindBytes
+  // Applications should use uniform buffers (bindBuffer) instead
+  // This is a no-op to maintain compatibility with cross-platform code
+  IGL_DEBUG_ASSERT_NOT_IMPLEMENTED();
+  IGL_LOG_INFO_ONCE("bindBytes is not supported in D3D12 backend. Use bindBuffer with uniform buffers instead.\n");
+}
+void RenderCommandEncoder::bindPushConstants(const void* data,
+                                             size_t length,
+                                             size_t offset) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::bindPushConstants called on closed command list\n");
+    return;
+  }
+  if (!data || length == 0) {
+    return;
+  }
+
+  // Query pipeline for dynamic root parameter index (eliminates hardcoded assumptions)
+  if (!currentRenderPipelineState_) {
+    IGL_LOG_ERROR("bindPushConstants called without bound pipeline state\n");
+    return;
+  }
+
+  if (!currentRenderPipelineState_->hasPushConstants()) {
+    IGL_LOG_ERROR("bindPushConstants called but pipeline has no push constants\n");
+    return;
+  }
+
+  const UINT rootParamIndex = currentRenderPipelineState_->getPushConstantRootParameterIndex();
+
+  // Offset and length are in bytes; convert to 32-bit units.
+  const UINT offset32 = static_cast<UINT>(offset / sizeof(uint32_t));
+  const UINT num32 = static_cast<UINT>((length + sizeof(uint32_t) - 1) / sizeof(uint32_t));
+
+  // D3D12 permits up to 64 bytes (16 DWORDs) of root constants; enforce this
+  // conservatively to avoid exceeding the root signature declaration.
+  if (offset32 + num32 > 16) {
+    IGL_LOG_ERROR("bindPushConstants: push constant range (%u dwords at offset %u) exceeds 16 dword limit\n",
+                  num32,
+                  offset32);
+    return;
+  }
+
+  commandList_->SetGraphicsRoot32BitConstants(rootParamIndex, num32, data, offset32);
+}
+void RenderCommandEncoder::bindSamplerState(size_t index,
+                                            uint8_t /*target*/,
+                                            ISamplerState* samplerState) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::bindSamplerState called on closed command list\n");
+    return;
+  }
+  // Delegate to D3D12ResourcesBinder for centralized descriptor management.
+  resourcesBinder_.bindSamplerState(static_cast<uint32_t>(index), samplerState);
+
+  // Clear bindBindGroup cache to switch from bindBindGroup path to bindSamplerState path
+  // This ensures draw() will call resourcesBinder_.updateBindings() instead of using cached handles
+  cachedTextureCount_ = 0;
+  cachedSamplerCount_ = 0;
+  usedBindGroup_ = false;
+}
+void RenderCommandEncoder::bindTexture(size_t index,
+                                       uint8_t /*target*/,
+                                       ITexture* texture) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::bindTexture called on closed command list\n");
+    return;
+  }
+  // Delegate to single-argument version
+  bindTexture(index, texture);
+}
+
+void RenderCommandEncoder::bindTexture(size_t index, ITexture* texture) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::bindTexture called on closed command list\n");
+    return;
+  }
+  // Delegate to D3D12ResourcesBinder for centralized descriptor management.
+  resourcesBinder_.bindTexture(static_cast<uint32_t>(index), texture);
+
+  // Clear bindBindGroup cache to switch from bindBindGroup path to bindTexture path
+  // This ensures draw() will call resourcesBinder_.updateBindings() instead of using cached handles
+  cachedTextureCount_ = 0;
+  cachedSamplerCount_ = 0;
+  usedBindGroup_ = false;
+}
+void RenderCommandEncoder::bindUniform(const UniformDesc& /*uniformDesc*/, const void* /*data*/) {}
+
+void RenderCommandEncoder::draw(size_t vertexCount,
+                                uint32_t instanceCount,
+                                uint32_t firstVertex,
+                                uint32_t baseInstance) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::draw called on closed command list\n");
+    return;
+  }
+  // G-001: Flush any pending barriers before draw call
+  flushBarriers();
+
+  // Apply all resource bindings (textures, samplers, buffers) before draw.
+  // Even when bindBindGroup() is used for CBV tables, textures/samplers may still be
+  // managed by D3D12ResourcesBinder, so always update bindings here.
+  {
+    Result bindResult;
+    if (!resourcesBinder_.updateBindings(currentRenderPipelineState_, &bindResult)) {
+      IGL_LOG_ERROR("draw: Failed to update resource bindings: %s\n", bindResult.message.c_str());
+      return;
+    }
+  }
+
+  // D3D12 requires ALL root parameters to be bound before drawing
+  // Hybrid render root signature layout (see Device::createRenderPipeline):
+  // - Root parameter 0: Root 32-bit constants for b2 (push constants)
+  // - Root parameter 1: Root CBV for b0 (legacy bindBuffer)
+  // - Root parameter 2: Root CBV for b1 (legacy bindBuffer)
+  // - Root parameter 3: CBV descriptor table for b3-b15 (bindBindGroup buffer table)
+  // - Root parameter 4: SRV descriptor table for t0-tN
+  // - Root parameter 5: Sampler descriptor table for s0-tN
+  // - Root parameter 6: UAV descriptor table for u0-uN (storage buffers)
+
+  // Bind descriptor tables using dynamic root parameter indices from pipeline reflection
+  // The indices are computed based on which resources the shader actually uses
+
+  // Apply vertex buffers. If the bound pipeline has no vertex input state
+  // (no attributes/bindings), skip IASetVertexBuffers entirely so that
+  // fullscreen / skybox style passes using SV_VertexID do not trigger
+  // validation errors when a previous pass left a vertex buffer bound.
+  bool pipelineHasVertexInput = (currentVertexStride_ != 0);
+  if (!pipelineHasVertexInput) {
+    for (uint32_t i = 0; i < IGL_BUFFER_BINDINGS_MAX; ++i) {
+      if (vertexStrides_[i] != 0) {
+        pipelineHasVertexInput = true;
+        break;
+      }
+    }
+  }
+
+  if (pipelineHasVertexInput) {
+    for (uint32_t i = 0; i < IGL_BUFFER_BINDINGS_MAX; ++i) {
+      if (!cachedVertexBuffers_[i].bound) {
+        continue;
+      }
+      UINT stride = vertexStrides_[i];
+      if (stride == 0) {
+        if (currentVertexStride_ == 0) {
+          IGL_LOG_INFO_ONCE(
+              "Vertex buffer bound to slot %u but pipeline reports no stride; "
+              "using conservative fallback stride of 32 bytes\n",
+              i);
+          stride = 32;
+        } else {
+          stride = currentVertexStride_;
+        }
+      }
+      D3D12_VERTEX_BUFFER_VIEW vbView = {};
+      vbView.BufferLocation = cachedVertexBuffers_[i].bufferLocation;
+      vbView.SizeInBytes = cachedVertexBuffers_[i].sizeInBytes;
+      vbView.StrideInBytes = stride;
+      IGL_D3D12_LOG_VERBOSE(
+          "draw: VB[%u] = GPU 0x%llx, size=%u, stride=%u\n",
+          i,
+          vbView.BufferLocation,
+          vbView.SizeInBytes,
+          vbView.StrideInBytes);
+      commandList_->IASetVertexBuffers(i, 1, &vbView);
+    }
+  } else {
+    // No vertex input expected for this pipeline; skip IASetVertexBuffers
+    // even if a previous pass bound a vertex buffer.
+    IGL_D3D12_LOG_VERBOSE(
+        "draw: Pipeline has no vertex input layout; skipping IASetVertexBuffers for this draw\n");
+  }
+
+  commandBuffer_.incrementDrawCount();
+
+  IGL_D3D12_LOG_VERBOSE("draw: DrawInstanced(vertexCount=%zu, instanceCount=%u, firstVertex=%u, baseInstance=%u)\n", vertexCount, instanceCount, firstVertex, baseInstance);
+  commandList_->DrawInstanced(static_cast<UINT>(vertexCount),
+                              instanceCount,
+                              firstVertex,
+                              baseInstance);
+}
+
+void RenderCommandEncoder::drawIndexed(size_t indexCount,
+                                       uint32_t instanceCount,
+                                       uint32_t firstIndex,
+                                       int32_t vertexOffset,
+                                       uint32_t baseInstance) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::drawIndexed called on closed command list\n");
+    return;
+  }
+  // G-001: Flush any pending barriers before draw call
+  flushBarriers();
+
+  // Apply all resource bindings (textures, samplers, buffers) before draw.
+  // Even when bindBindGroup() is used for CBV tables, textures/samplers may still be
+  // managed by D3D12ResourcesBinder, so always update bindings here.
+  {
+    Result bindResult;
+    if (!resourcesBinder_.updateBindings(currentRenderPipelineState_, &bindResult)) {
+      IGL_LOG_ERROR("drawIndexed: Failed to update resource bindings: %s\n", bindResult.message.c_str());
+      return;
+    }
+  }
+
+  // D3D12 requires ALL root parameters to be bound before drawing
+  // Hybrid render root signature layout (see Device::createRenderPipeline):
+  // - Root parameter 0: Root 32-bit constants for b2 (push constants)
+  // - Root parameter 1: Root CBV for b0 (legacy bindBuffer)
+  // - Root parameter 2: Root CBV for b1 (legacy bindBuffer)
+  // - Root parameter 3: CBV descriptor table for b3-b15 (bindBindGroup buffer table)
+  // - Root parameter 4: SRV descriptor table for t0-tN
+  // - Root parameter 5: Sampler descriptor table for s0-tN
+  // - Root parameter 6: UAV descriptor table for u0-uN (storage buffers)
+
+  // Bind descriptor tables using dynamic root parameter indices from pipeline reflection
+
+  // Descriptor tables (CBV/SRV/Sampler/UAV) are bound by D3D12ResourcesBinder::updateBindings()
+  // based on the current pipeline's reflection. No additional descriptor table binding is
+  // required here.
+
+  // Apply cached vertex buffer bindings now that pipeline state is bound.
+  // If the current pipeline has no vertex input layout (no attributes or
+  // bindings), skip IASetVertexBuffers so that draws using SV_VertexID do
+  // not rely on stale vertex buffer state from previous passes.
+  bool pipelineHasVertexInput = (currentVertexStride_ != 0);
+  if (!pipelineHasVertexInput) {
+    for (uint32_t i = 0; i < IGL_BUFFER_BINDINGS_MAX; ++i) {
+      if (vertexStrides_[i] != 0) {
+        pipelineHasVertexInput = true;
+        break;
+      }
+    }
+  }
+
+  if (pipelineHasVertexInput) {
+    for (uint32_t i = 0; i < IGL_BUFFER_BINDINGS_MAX; ++i) {
+      if (!cachedVertexBuffers_[i].bound) {
+        continue;
+      }
+      UINT stride = vertexStrides_[i];
+      if (stride == 0) {
+        if (currentVertexStride_ == 0) {
+          IGL_LOG_INFO_ONCE(
+              "Vertex buffer bound to slot %u but pipeline reports no stride; "
+              "using conservative fallback stride of 32 bytes\n",
+              i);
+          stride = 32;
+        } else {
+          stride = currentVertexStride_;
+        }
+      }
+      D3D12_VERTEX_BUFFER_VIEW vbView = {};
+      vbView.BufferLocation = cachedVertexBuffers_[i].bufferLocation;
+      vbView.SizeInBytes = cachedVertexBuffers_[i].sizeInBytes;
+      vbView.StrideInBytes = stride;
+      commandList_->IASetVertexBuffers(i, 1, &vbView);
+    }
+  } else {
+    IGL_D3D12_LOG_VERBOSE(
+        "drawIndexed: Pipeline has no vertex input layout; skipping IASetVertexBuffers for this draw\n");
+  }
+
+    // Apply cached index buffer binding
+    if (cachedIndexBuffer_.bound) {
+      D3D12_INDEX_BUFFER_VIEW ibView = {};
+      ibView.BufferLocation = cachedIndexBuffer_.bufferLocation;
+      ibView.SizeInBytes = cachedIndexBuffer_.sizeInBytes;
+      ibView.Format = cachedIndexBuffer_.format;
+      commandList_->IASetIndexBuffer(&ibView);
+    }
+
+    // Track per-command-buffer draw count; CommandQueue aggregates into device on submit
+    commandBuffer_.incrementDrawCount();
+
+  commandList_->DrawIndexedInstanced(static_cast<UINT>(indexCount),
+                                     instanceCount,
+                                     firstIndex,
+                                     vertexOffset,
+                                     baseInstance);
+
+#if IGL_DEBUG
+  static const bool kLogDrawErrors = []() {
+    const char* env = std::getenv("IGL_D3D12_LOG_DRAW_ERRORS");
+    return env && (env[0] == '1');
+  }();
+  if (kLogDrawErrors) {
+    auto* device = commandBuffer_.getContext().getDevice();
+    if (device) {
+      igl::d3d12::ComPtr<ID3D12InfoQueue> infoQueue;
+      if (SUCCEEDED(device->QueryInterface(IID_PPV_ARGS(infoQueue.GetAddressOf())))) {
+        const UINT64 messageCount = infoQueue->GetNumStoredMessages();
+        for (UINT64 i = 0; i < messageCount; ++i) {
+          SIZE_T length = 0;
+          if (FAILED(infoQueue->GetMessage(i, nullptr, &length)) || length == 0) {
+            continue;
+          }
+          auto* message = static_cast<D3D12_MESSAGE*>(malloc(length));
+          if (message && SUCCEEDED(infoQueue->GetMessage(i, message, &length))) {
+            IGL_LOG_ERROR("[D3D12 Debug] %s\n", message->pDescription ? message->pDescription : "<no description>");
+          }
+          free(message);
+        }
+        infoQueue->ClearStoredMessages();
+      }
+    }
+  }
+#endif
+}
+
+void RenderCommandEncoder::drawMeshTasks(const Dimensions& /*threadgroupsPerGrid*/,
+                                         const Dimensions& /*threadsPerTaskThreadgroup*/,
+                                         const Dimensions& /*threadsPerMeshThreadgroup*/) {
+  IGL_LOG_ERROR("RenderCommandEncoder::drawMeshTasks is not implemented on D3D12\n");
+}
+void RenderCommandEncoder::multiDrawIndirect(IBuffer& indirectBuffer,
+                                             size_t indirectBufferOffset,
+                                             uint32_t drawCount,
+                                             uint32_t stride) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndirect: command list is closed or null\n");
+    return;
+  }
+
+  // Get D3D12 buffer resource
+  auto* d3dBuffer = static_cast<Buffer*>(&indirectBuffer);
+  if (!d3dBuffer) {
+    IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndirect: indirectBuffer is null\n");
+    return;
+  }
+
+  ID3D12Resource* argBuffer = d3dBuffer->getResource();
+  if (!argBuffer) {
+    IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndirect: argBuffer resource is null\n");
+    return;
+  }
+
+  // Get command signature from D3D12Context
+  auto& ctx = commandBuffer_.getContext();
+  ID3D12CommandSignature* signature = ctx.getDrawIndirectSignature();
+  if (!signature) {
+    IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndirect: command signature is null\n");
+    return;
+  }
+
+  // Use default stride if not provided (sizeof D3D12_DRAW_ARGUMENTS = 16 bytes)
+  const UINT actualStride = stride ? stride : sizeof(D3D12_DRAW_ARGUMENTS);
+
+  // ExecuteIndirect for multi-draw
+  // Parameters: signature, maxCommandCount, argumentBuffer, argumentBufferOffset, countBuffer, countBufferOffset
+  commandList_->ExecuteIndirect(
+      signature,
+      drawCount,
+      argBuffer,
+      static_cast<UINT64>(indirectBufferOffset),
+      nullptr,  // No count buffer (exact draw count specified)
+      0);
+
+  // Track draw call count
+  commandBuffer_.incrementDrawCount(drawCount);
+
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::multiDrawIndirect: Executed %u indirect draws (stride: %u)\n",
+               drawCount, actualStride);
+}
+void RenderCommandEncoder::multiDrawIndexedIndirect(IBuffer& indirectBuffer,
+                                                    size_t indirectBufferOffset,
+                                                    uint32_t drawCount,
+                                                    uint32_t stride) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndexedIndirect: command list is closed or null\n");
+    return;
+  }
+
+  // Get D3D12 buffer resource
+  auto* d3dBuffer = static_cast<Buffer*>(&indirectBuffer);
+  if (!d3dBuffer) {
+    IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndexedIndirect: indirectBuffer is null\n");
+    return;
+  }
+
+  ID3D12Resource* argBuffer = d3dBuffer->getResource();
+  if (!argBuffer) {
+    IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndexedIndirect: argBuffer resource is null\n");
+    return;
+  }
+
+  // Get command signature from D3D12Context
+  auto& ctx = commandBuffer_.getContext();
+  ID3D12CommandSignature* signature = ctx.getDrawIndexedIndirectSignature();
+  if (!signature) {
+    IGL_LOG_ERROR("RenderCommandEncoder::multiDrawIndexedIndirect: command signature is null\n");
+    return;
+  }
+
+  // Use default stride if not provided (sizeof D3D12_DRAW_INDEXED_ARGUMENTS = 20 bytes)
+  const UINT actualStride = stride ? stride : sizeof(D3D12_DRAW_INDEXED_ARGUMENTS);
+
+  // ExecuteIndirect for multi-draw indexed
+  // Parameters: signature, maxCommandCount, argumentBuffer, argumentBufferOffset, countBuffer, countBufferOffset
+  commandList_->ExecuteIndirect(
+      signature,
+      drawCount,
+      argBuffer,
+      static_cast<UINT64>(indirectBufferOffset),
+      nullptr,  // No count buffer (exact draw count specified)
+      0);
+
+  // Track draw call count
+  commandBuffer_.incrementDrawCount(drawCount);
+
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder::multiDrawIndexedIndirect: Executed %u indirect indexed draws (stride: %u)\n",
+               drawCount, actualStride);
+}
+
+void RenderCommandEncoder::setStencilReferenceValue(uint32_t value) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    return;
+  }
+  // Set stencil reference value for stencil testing
+  commandList_->OMSetStencilRef(value);
+  IGL_D3D12_LOG_VERBOSE("setStencilReferenceValue: Set stencil ref to %u\n", value);
+}
+
+void RenderCommandEncoder::setBlendColor(const Color& color) {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    return;
+  }
+  // Set blend factor constants for BlendFactor::BlendColor operations
+  // D3D12 uses RGBA float array, matching IGL Color structure
+  const float blendFactor[4] = {color.r, color.g, color.b, color.a};
+  commandList_->OMSetBlendFactor(blendFactor);
+  IGL_D3D12_LOG_VERBOSE("setBlendColor: Set blend factor to (%.2f, %.2f, %.2f, %.2f)\n",
+               color.r, color.g, color.b, color.a);
+}
+
+void RenderCommandEncoder::setDepthBias(float /*depthBias*/, float /*slopeScale*/, float /*clamp*/) {
+  // Note: Depth bias is configured in the pipeline state (RasterizerState)
+  // D3D12 does not support dynamic depth bias changes during rendering
+  // This would require rebuilding the PSO with different depth bias values
+}
+
+void RenderCommandEncoder::pushDebugGroupLabel(const char* label, const Color& /*color*/) const {
+  if (!commandBuffer_.isRecording() || !commandList_ || !label) {
+    return;
+  }
+  const size_t len = strlen(label);
+  std::wstring wlabel(len, L' ');
+  std::mbstowcs(&wlabel[0], label, len);
+  commandList_->BeginEvent(
+      0, wlabel.c_str(), static_cast<UINT>((wlabel.length() + 1) * sizeof(wchar_t)));
+}
+
+void RenderCommandEncoder::insertDebugEventLabel(const char* label, const Color& /*color*/) const {
+  if (!commandBuffer_.isRecording() || !commandList_ || !label) {
+    return;
+  }
+  const size_t len = strlen(label);
+  std::wstring wlabel(len, L' ');
+  std::mbstowcs(&wlabel[0], label, len);
+  commandList_->SetMarker(
+      0, wlabel.c_str(), static_cast<UINT>((wlabel.length() + 1) * sizeof(wchar_t)));
+}
+
+void RenderCommandEncoder::popDebugGroupLabel() const {
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    return;
+  }
+  commandList_->EndEvent();
+}
+
+void RenderCommandEncoder::bindBuffer(uint32_t index,
+                                       uint8_t /*target*/,
+                                       IBuffer* buffer,
+                                       size_t bufferOffset,
+                                       size_t bufferSize) {
+  // D3D12 does not differentiate shader stages for buffer binding in this path.
+  bindBuffer(index, buffer, bufferOffset, bufferSize);
+}
+
+void RenderCommandEncoder::bindBuffer(uint32_t index,
+                                       IBuffer* buffer,
+                                       size_t offset,
+                                       size_t bufferSize) {
+  IGL_D3D12_LOG_VERBOSE("bindBuffer START: index=%u\n", index);
+  if (!buffer) {
+    IGL_D3D12_LOG_VERBOSE("bindBuffer: null buffer, returning\n");
+    return;
+  }
+
+  auto* d3dBuffer = static_cast<Buffer*>(buffer);
+
+  // Check if this is a storage buffer - needs SRV binding for shader reads
+  const bool isStorageBuffer = (d3dBuffer->getBufferType() & BufferDesc::BufferTypeBits::Storage) != 0;
+
+  if (isStorageBuffer) {
+    // Storage buffer - create SRV for ByteAddressBuffer reads in pixel shader
+    IGL_D3D12_LOG_VERBOSE("bindBuffer: Storage buffer detected at index %u - creating SRV for pixel shader read\n", index);
+
+    // For raw (ByteAddressBuffer) SRVs we treat the buffer as a sequence of 4-byte units.
+    // This matches HLSL ByteAddressBuffer / RWByteAddressBuffer semantics.
+    if ((offset & 3) != 0) {
+      IGL_LOG_ERROR("bindBuffer: Storage buffer offset %zu is not 4-byte aligned (required for DXGI_FORMAT_R32_TYPELESS). "
+                    "Raw buffer SRV FirstElement will be rounded down, which may cause incorrect data access.\n", offset);
+      // Continue but log warning - FirstElement below uses integer division
+    }
+
+    auto& context = commandBuffer_.getContext();
+    auto* device = context.getDevice();
+    if (!device || cbvSrvUavHeap_ == nullptr) {
+      IGL_LOG_ERROR("bindBuffer: Missing device or per-frame CBV/SRV/UAV heap\n");
+      return;
+    }
+
+    // Allocate descriptor slot from command buffer's shared counter
+    // Uses Result-based allocation with dynamic heap growth.
+    uint32_t descriptorIndex = 0;
+    Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex);
+    if (!allocResult.isOk()) {
+      IGL_LOG_ERROR("bindBuffer: Failed to allocate descriptor: %s\n", allocResult.message.c_str());
+      return;
+    }
+    IGL_D3D12_LOG_VERBOSE("bindBuffer: Allocated SRV descriptor slot %u for buffer at t%u\n", descriptorIndex, index);
+
+    // Create SRV descriptor for ByteAddressBuffer (raw view)
+    D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
+    srvDesc.Format = DXGI_FORMAT_R32_TYPELESS;  // Raw buffer (ByteAddressBuffer)
+    srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
+    srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+    // FirstElement/NumElements expressed in 32-bit units (4 bytes)
+    srvDesc.Buffer.FirstElement = static_cast<UINT64>(offset) / 4;  // Offset in 32-bit elements
+    // NumElements must be (totalSize - offset) to avoid exceeding buffer bounds
+    srvDesc.Buffer.NumElements =
+        static_cast<UINT>((buffer->getSizeInBytes() - offset) / 4);  // Size in 32-bit elements
+    srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_RAW;  // Raw buffer access
+
+    D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex);
+    D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex);
+    // Pre-creation validation.
+    IGL_DEBUG_ASSERT(device != nullptr, "Device is null before CreateShaderResourceView");
+    IGL_DEBUG_ASSERT(d3dBuffer->getResource() != nullptr, "Buffer resource is null");
+    IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "SRV descriptor handle is invalid");
+
+    device->CreateShaderResourceView(d3dBuffer->getResource(), &srvDesc, cpuHandle);
+
+    IGL_D3D12_LOG_VERBOSE("bindBuffer: Created SRV at descriptor slot %u (FirstElement=%llu, NumElements=%u)\n",
+                 descriptorIndex, srvDesc.Buffer.FirstElement, srvDesc.Buffer.NumElements);
+
+    // Cache GPU handle for descriptor table binding in draw calls.
+    // SRVs are bound to root parameter 4 (render root signature SRV table).
+    cachedTextureGpuHandles_[index] = gpuHandle;
+    cachedTextureCount_ = std::max(cachedTextureCount_, static_cast<size_t>(index + 1));
+
+    // For pipelines that declare SRVs but do not use the generic texture binding path
+    // (e.g., ComputeSession visualization using ByteAddressBuffer at t0), bind the SRV
+    // descriptor table directly to the SRV root parameter. This does not conflict with
+    // D3D12ResourcesBinder because updateTextureBindings() is a no-op when no textures
+    // are bound via bindTexture().
+    if (currentRenderPipelineState_ && commandList_) {
+      const UINT srvTableIndex =
+          currentRenderPipelineState_->getSRVTableRootParameterIndex();
+      if (srvTableIndex != UINT_MAX) {
+        commandList_->SetGraphicsRootDescriptorTable(srvTableIndex, gpuHandle);
+        IGL_D3D12_LOG_VERBOSE(
+            "bindBuffer: Bound storage buffer SRV at slot %u (t%u) to SRV table root param %u "
+            "(GPU handle 0x%llx)\n",
+            index,
+            index,
+            srvTableIndex,
+            gpuHandle.ptr);
+      }
+    }
+
+    IGL_D3D12_LOG_VERBOSE("bindBuffer: Storage buffer SRV binding complete\n");
+
+    // CRITICAL: Track the Buffer OBJECT (not just resource) to keep it alive until GPU finishes
+    // This prevents the Buffer destructor from releasing the resource while GPU commands reference it
+    // Use weak_from_this().lock() instead of shared_from_this() to avoid exception
+    std::shared_ptr<IBuffer> sharedBuffer = d3dBuffer->weak_from_this().lock();
+    if (sharedBuffer) {
+      static_cast<CommandBuffer&>(commandBuffer_).trackTransientBuffer(std::move(sharedBuffer));
+      IGL_D3D12_LOG_VERBOSE("bindBuffer: Tracking Buffer object (shared_ptr) for lifetime management\n");
+    } else {
+      // Buffer not managed by shared_ptr (e.g., persistent buffer from member variable)
+      // Fall back to tracking just the resource (AddRef on ID3D12Resource)
+      static_cast<CommandBuffer&>(commandBuffer_).trackTransientResource(d3dBuffer->getResource());
+      IGL_D3D12_LOG_VERBOSE("bindBuffer: Buffer not shared_ptr-managed, tracking resource only\n");
+    }
+  } else {
+    // Constant buffer (CBV) - delegate to resourcesBinder for reflection-based binding
+    IGL_D3D12_LOG_VERBOSE("bindBuffer: Constant buffer at index %u - delegating to resourcesBinder\n", index);
+
+    // D3D12 requires constant buffer addresses to be 256-byte aligned
+    if ((offset & 255) != 0) {
+      IGL_LOG_ERROR("bindBuffer: ERROR - CBV offset %zu is not 256-byte aligned (required by D3D12). "
+                    "Constant buffers must be created at aligned offsets. Ignoring bind request.\n", offset);
+      return;
+    }
+
+    // CRITICAL: Track the Buffer OBJECT (not just resource) to keep it alive until GPU finishes
+    std::shared_ptr<IBuffer> sharedBuffer = d3dBuffer->weak_from_this().lock();
+    if (sharedBuffer) {
+      static_cast<CommandBuffer&>(commandBuffer_).trackTransientBuffer(std::move(sharedBuffer));
+      IGL_D3D12_LOG_VERBOSE("bindBuffer: Tracking Buffer object (shared_ptr) for lifetime management\n");
+    } else {
+      static_cast<CommandBuffer&>(commandBuffer_).trackTransientResource(d3dBuffer->getResource());
+      IGL_D3D12_LOG_VERBOSE("bindBuffer: Buffer not shared_ptr-managed, tracking resource only\n");
+    }
+
+    // Use bufferSize if provided; otherwise, bind the remaining bytes from offset.
+    // This matches the cross-backend contract: bufferSize == 0 means "remaining size".
+    const size_t fullSize = buffer->getSizeInBytes();
+    const size_t size = (bufferSize != 0 && bufferSize <= fullSize)
+                            ? bufferSize
+                            : (offset < fullSize ? (fullSize - offset) : 0);
+
+    // Delegate to resourcesBinder which caches the binding and marks dirty flag
+    // The actual binding will happen in resourcesBinder_.updateBindings()
+    resourcesBinder_.bindBuffer(index, buffer, offset, size, false, 0);
+  }
+
+  IGL_D3D12_LOG_VERBOSE("bindBuffer END\n");
+}
+void RenderCommandEncoder::bindBindGroup(BindGroupTextureHandle handle) {
+  IGL_D3D12_LOG_VERBOSE("bindBindGroup(texture): handle valid=%d\n", !handle.empty());
+
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("bindBindGroup(texture): command list is closed or null\n");
+    return;
+  }
+
+  // Get the bind group descriptor from the device
+  auto& device = commandBuffer_.getDevice();
+  const auto* desc = device.getBindGroupTextureDesc(handle);
+  if (!desc) {
+    IGL_LOG_ERROR("bindBindGroup(texture): Invalid handle or descriptor not found\n");
+    return;
+  }
+
+  // Delegate actual descriptor allocation and binding to D3D12ResourcesBinder
+  // so that bindBindGroup(texture) behaves like a grouped bindTexture/bindSamplerState.
+  for (uint32_t i = 0; i < IGL_TEXTURE_SAMPLERS_MAX; ++i) {
+    if (desc->textures[i]) {
+      resourcesBinder_.bindTexture(i, desc->textures[i].get());
+    }
+  }
+  for (uint32_t i = 0; i < IGL_TEXTURE_SAMPLERS_MAX; ++i) {
+    if (desc->samplers[i]) {
+      resourcesBinder_.bindSamplerState(i, desc->samplers[i].get());
+    }
+  }
+}
+
+void RenderCommandEncoder::bindBindGroup(BindGroupBufferHandle handle,
+                                          uint32_t numDynamicOffsets,
+                                          const uint32_t* dynamicOffsets) {
+  IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): handle valid=%d, dynCount=%u\n", !handle.empty(), numDynamicOffsets);
+
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("bindBindGroup(buffer): command list is closed or null\n");
+    return;
+  }
+
+  auto& device = commandBuffer_.getDevice();
+  const auto* desc = device.getBindGroupBufferDesc(handle);
+  if (!desc) {
+    IGL_LOG_ERROR("bindBindGroup(buffer): Invalid handle or descriptor not found\n");
+    return;
+  }
+
+  // NEW PATH:
+  // Delegate all buffer bindings to D3D12ResourcesBinder so that CBVs/SRVs/UAVs are created and
+  // bound through a single, reflection-aware path. BindGroup slots map directly to shader
+  // registers (bN / tN / uN), just like Vulkan bindings.
+  {
+    uint32_t dynIdx = 0;
+    for (uint32_t slot = 0; slot < IGL_UNIFORM_BLOCKS_BINDING_MAX; ++slot) {
+      auto& bufferHandle = desc->buffers[slot];
+      size_t baseOffset = desc->offset[slot];
+      size_t size = desc->size[slot];
+
+      if ((desc->isDynamicBufferMask & (1u << slot)) != 0) {
+        if (dynIdx < numDynamicOffsets && dynamicOffsets) {
+          baseOffset = dynamicOffsets[dynIdx++];
+        }
+      }
+
+      if (!bufferHandle) {
+        // Unbind any previous buffer/UAV at this slot.
+        resourcesBinder_.bindBuffer(slot, nullptr, 0, 0, false, 0);
+        continue;
+      }
+
+      auto* buf = static_cast<Buffer*>(bufferHandle.get());
+      const bool isUniform =
+          (buf->getBufferType() & BufferDesc::BufferTypeBits::Uniform) != 0;
+      const bool isStorage =
+          (buf->getBufferType() & BufferDesc::BufferTypeBits::Storage) != 0;
+
+      // Track resource so its lifetime is tied to the command buffer.
+      commandBuffer_.trackTransientResource(buf->getResource());
+
+      if (isUniform) {
+        // For CBVs, size == 0 means "remaining bytes from offset". Respect explicit sizes when provided.
+        resourcesBinder_.bindBuffer(slot, buf, baseOffset, size, false, 0);
+      } else if (isStorage) {
+        // Storage buffer: delegate to UAV/SRV binding path in D3D12ResourcesBinder.
+        // Use the buffer's storage element stride when available; default to 4 bytes.
+        size_t elementStride = buf->getStorageElementStride();
+        if (elementStride == 0) {
+          elementStride = 4;
+        }
+        resourcesBinder_.bindBuffer(slot, buf, baseOffset, size, true, elementStride);
+      } else {
+        IGL_LOG_ERROR(
+            "bindBindGroup(buffer): Buffer at slot %u is neither Uniform nor Storage\n", slot);
+      }
+    }
+    usedBindGroup_ = true;
+    return;
+  }
+
+  auto* cmd = commandList_;
+  if (!cmd) {
+    IGL_LOG_ERROR("bindBindGroup(buffer): null command list\n");
+    return;
+  }
+
+  // CRITICAL: D3D12 descriptor tables MUST be contiguous in the descriptor heap.
+  // SetGraphicsRootDescriptorTable passes a GPU handle to the START of a contiguous block.
+  // D3D12 accesses descriptors using: baseHandle + tableOffset.
+  //
+  // Example: If BindGroupBufferDesc has buffers at slots 3-6:
+  //   - We need descriptors at heap indices [base+0] through [base+6]
+  //   - Slots 0-2 get NULL CBVs, slots 3-6 get real CBVs
+  //   - SetGraphicsRootDescriptorTable receives handle to heap[base+0]
+  //   - Shader accessing b3 reads from heap[base+3]
+  //
+  // First pass: Determine highest slot index to calculate total descriptor count
+  uint32_t maxSlotUsed = 0;
+  for (uint32_t slot = 0; slot < IGL_UNIFORM_BLOCKS_BINDING_MAX; ++slot) {
+    if (desc->buffers[slot]) {
+      auto* buf = static_cast<Buffer*>(desc->buffers[slot].get());
+      const bool isUniform = (buf->getBufferType() & BufferDesc::BufferTypeBits::Uniform) != 0;
+      if (isUniform) {
+        maxSlotUsed = slot;
+      }
+    }
+  }
+
+  if (maxSlotUsed == 0 && !desc->buffers[0]) {
+    // No uniform buffers to bind
+    return;
+  }
+
+  cbvTableCount_ = maxSlotUsed + 1;
+
+  // Allocate a CONTIGUOUS block of descriptors for the entire descriptor table
+  uint32_t baseDescriptorIndex = 0;
+  Result allocResult = commandBuffer_.allocateCbvSrvUavRange(static_cast<uint32_t>(cbvTableCount_), &baseDescriptorIndex);
+  if (!allocResult.isOk()) {
+    IGL_LOG_ERROR("bindBindGroup(buffer): Failed to allocate contiguous CBV descriptor range (%zu descriptors): %s\n",
+                 cbvTableCount_, allocResult.message.c_str());
+    return;
+  }
+
+  auto& context = commandBuffer_.getContext();
+  auto* d3d12Device = context.getDevice();
+
+  IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): Allocated contiguous CBV descriptor block: base=%u, count=%zu\n",
+               baseDescriptorIndex, cbvTableCount_);
+
+  // Second pass: Create CBV descriptors in the contiguous block
+  uint32_t dynIdx = 0;
+  for (uint32_t slot = 0; slot < cbvTableCount_; ++slot) {
+    // Calculate descriptor index within the contiguous block
+    uint32_t descriptorIndex = baseDescriptorIndex + slot;
+    D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex);
+
+    if (!desc->buffers[slot]) {
+      // Create NULL CBV for empty slots
+      D3D12_CONSTANT_BUFFER_VIEW_DESC nullCbvDesc = {};
+      nullCbvDesc.BufferLocation = 0;
+      nullCbvDesc.SizeInBytes = 256;  // Minimum CBV alignment
+      d3d12Device->CreateConstantBufferView(&nullCbvDesc, cpuHandle);
+
+      IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): Created NULL CBV at heap[%u] for empty slot %u\n",
+                   descriptorIndex, slot);
+      continue;
+    }
+
+    auto* buf = static_cast<Buffer*>(desc->buffers[slot].get());
+    const bool isUniform = (buf->getBufferType() & BufferDesc::BufferTypeBits::Uniform) != 0;
+    const bool isStorage = (buf->getBufferType() & BufferDesc::BufferTypeBits::Storage) != 0;
+
+    // Track buffer resource to prevent it from being deleted while GPU address is cached
+    commandBuffer_.trackTransientResource(buf->getResource());
+
+    size_t baseOffset = desc->offset[slot];
+    if ((desc->isDynamicBufferMask & (1u << slot)) != 0) {
+      if (dynIdx < numDynamicOffsets && dynamicOffsets) {
+        baseOffset = dynamicOffsets[dynIdx++];
+      }
+    }
+
+    if (isUniform) {
+      // 256B alignment required for CBVs
+      const size_t aligned = (baseOffset + 255) & ~size_t(255);
+      D3D12_GPU_VIRTUAL_ADDRESS addr = buf->gpuAddress(aligned);
+
+      if (slot < IGL_BUFFER_BINDINGS_MAX) {
+        // Respect requested buffer size and enforce the 64 KB limit.
+        // If size[slot] is 0, use remaining buffer size from offset
+        size_t requestedSize = desc->size[slot];
+        if (requestedSize == 0) {
+          requestedSize = buf->getSizeInBytes() - aligned;
+        }
+
+        // D3D12 spec: Constant buffers must be ≤ 64 KB
+        constexpr size_t kMaxCBVSize = 65536;  // 64 KB
+        if (requestedSize > kMaxCBVSize) {
+          IGL_LOG_ERROR("bindBindGroup(buffer): Constant buffer size (%zu bytes) exceeds D3D12 64 KB limit at slot %u\n",
+                        requestedSize, slot);
+          continue;  // Skip this binding
+        }
+
+        // Create CBV descriptor in the contiguous block
+        D3D12_CONSTANT_BUFFER_VIEW_DESC cbvDesc = {};
+        cbvDesc.BufferLocation = addr;
+        cbvDesc.SizeInBytes = static_cast<UINT>((requestedSize + 255) & ~255);  // Must be 256-byte aligned
+
+        // Pre-creation validation.
+        IGL_DEBUG_ASSERT(d3d12Device != nullptr, "Device is null before CreateConstantBufferView");
+        IGL_DEBUG_ASSERT(addr != 0, "Buffer GPU address is null");
+        IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "CBV descriptor handle is invalid");
+        IGL_DEBUG_ASSERT(cbvDesc.SizeInBytes <= kMaxCBVSize, "CBV size exceeds 64 KB after alignment");
+
+        d3d12Device->CreateConstantBufferView(&cbvDesc, cpuHandle);
+
+        IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): Created CBV at heap[%u] for slot %u (addr=0x%llx, size=%u)\n",
+                     descriptorIndex, slot, addr, cbvDesc.SizeInBytes);
+      } else {
+        IGL_LOG_ERROR("bindBindGroup(buffer): BindGroupBufferDesc slot %u exceeds maximum (%u)\n", slot, IGL_BUFFER_BINDINGS_MAX);
+      }
+    } else if (isStorage) {
+      // Implement storage buffer binding via UAV/SRV descriptors.
+      auto& storageContext = commandBuffer_.getContext();
+      auto* d3dDevice = storageContext.getDevice();
+      ID3D12Resource* resource = buf->getResource();
+
+      // Determine if buffer is read-write (UAV) or read-only (SRV)
+      // D3D12 storage buffers with UAV flag are read-write by default
+      // Private/Shared storage indicates read-write access, Managed indicates read-only
+      const bool isReadWrite = (buf->storage() == ResourceStorage::Private ||
+                                buf->storage() == ResourceStorage::Shared);
+
+      if (isReadWrite) {
+        // Create UAV for read-write storage buffer
+        // Uses Result-based allocation with dynamic heap growth.
+        uint32_t descriptorIndex = 0;
+        Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex);
+        if (!allocResult.isOk()) {
+          IGL_LOG_ERROR("bindBindGroup(buffer): Failed to allocate UAV descriptor: %s\n", allocResult.message.c_str());
+          continue;
+        }
+        D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex);
+        D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex);
+
+        // Create UAV descriptor for structured buffer
+        // Use the storage stride from BufferDesc when available; default to 4 bytes otherwise.
+        size_t elementStride = buf->getStorageElementStride();
+        if (elementStride == 0) {
+          elementStride = 4;
+        }
+
+        // Validate baseOffset doesn't exceed buffer size
+        const size_t bufferSizeBytes = buf->getSizeInBytes();
+        if (baseOffset > bufferSizeBytes) {
+          IGL_LOG_ERROR("bindBindGroup(buffer): baseOffset %zu exceeds buffer size %zu; skipping UAV binding\n",
+                        baseOffset, bufferSizeBytes);
+          continue;
+        }
+
+        if (baseOffset % elementStride != 0) {
+          IGL_LOG_ERROR("bindBindGroup(buffer): Storage buffer baseOffset %zu is not aligned to "
+                        "element stride (%zu bytes). UAV FirstElement will be truncated (offset/stride).\n",
+                        baseOffset, elementStride);
+        }
+
+        const size_t remaining = bufferSizeBytes - baseOffset;
+
+        // Check for undersized buffer (would create empty or partial view)
+        if (remaining < elementStride) {
+          IGL_LOG_ERROR("bindBindGroup(buffer): Remaining buffer size %zu is less than element stride %zu; "
+                        "UAV will have NumElements=0 (empty view). Check buffer size and offset.\n",
+                        remaining, elementStride);
+          // Continue to create the descriptor, but it will be empty (NumElements=0)
+        }
+
+        D3D12_UNORDERED_ACCESS_VIEW_DESC uavDesc = {};
+        uavDesc.Format = DXGI_FORMAT_UNKNOWN;
+        uavDesc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
+        uavDesc.Buffer.FirstElement = static_cast<UINT64>(baseOffset / elementStride);
+        // CRITICAL: NumElements must be (size - offset) / stride, not total size / stride
+        uavDesc.Buffer.NumElements = static_cast<UINT>(remaining / elementStride);
+        uavDesc.Buffer.StructureByteStride = static_cast<UINT>(elementStride);
+        uavDesc.Buffer.CounterOffsetInBytes = 0;
+        uavDesc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
+
+        // Pre-creation validation.
+        IGL_DEBUG_ASSERT(d3dDevice != nullptr, "Device is null before CreateUnorderedAccessView");
+        IGL_DEBUG_ASSERT(resource != nullptr, "Buffer resource is null");
+        IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "UAV descriptor handle is invalid");
+
+        d3dDevice->CreateUnorderedAccessView(resource, nullptr, &uavDesc, cpuHandle);
+
+        // Bind UAV descriptor table using dynamic root parameter index from pipeline
+        const UINT uavTableIndex = currentRenderPipelineState_->getUAVTableRootParameterIndex();
+        if (uavTableIndex != UINT_MAX) {
+          commandList_->SetGraphicsRootDescriptorTable(uavTableIndex, gpuHandle);
+          IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): bound read-write storage buffer at slot %u (UAV u%u, root param %u, GPU handle 0x%llx)\n",
+                       slot, slot, uavTableIndex, gpuHandle.ptr);
+        } else {
+          IGL_LOG_ERROR("bindBindGroup(buffer): Pipeline has no UAV table root parameter for storage buffer binding\n");
+        }
+      } else {
+        // Create SRV for read-only storage buffer
+        // Uses Result-based allocation with dynamic heap growth.
+        uint32_t descriptorIndex = 0;
+        Result allocResult = commandBuffer_.getNextCbvSrvUavDescriptor(&descriptorIndex);
+        if (!allocResult.isOk()) {
+          IGL_LOG_ERROR("bindBindGroup(buffer): Failed to allocate SRV descriptor: %s\n", allocResult.message.c_str());
+          continue;
+        }
+        D3D12_CPU_DESCRIPTOR_HANDLE cpuHandle = context.getCbvSrvUavCpuHandle(descriptorIndex);
+        D3D12_GPU_DESCRIPTOR_HANDLE gpuHandle = context.getCbvSrvUavGpuHandle(descriptorIndex);
+
+        // Create SRV descriptor for structured buffer
+        size_t elementStride = buf->getStorageElementStride();
+        if (elementStride == 0) {
+          elementStride = 4;
+        }
+
+        // Validate baseOffset doesn't exceed buffer size
+        const size_t bufferSizeBytes = buf->getSizeInBytes();
+        if (baseOffset > bufferSizeBytes) {
+          IGL_LOG_ERROR("bindBindGroup(buffer): baseOffset %zu exceeds buffer size %zu; skipping SRV binding\n",
+                        baseOffset, bufferSizeBytes);
+          continue;
+        }
+
+        if (baseOffset % elementStride != 0) {
+          IGL_LOG_ERROR("bindBindGroup(buffer): Storage buffer baseOffset %zu is not aligned to "
+                        "element stride (%zu bytes). SRV FirstElement will be truncated (offset/stride).\n",
+                        baseOffset, elementStride);
+        }
+
+        const size_t remaining = bufferSizeBytes - baseOffset;
+
+        // Check for undersized buffer (would create empty or partial view)
+        if (remaining < elementStride) {
+          IGL_LOG_ERROR("bindBindGroup(buffer): Remaining buffer size %zu is less than element stride %zu; "
+                        "SRV will have NumElements=0 (empty view). Check buffer size and offset.\n",
+                        remaining, elementStride);
+          // Continue to create the descriptor, but it will be empty (NumElements=0)
+        }
+
+        D3D12_SHADER_RESOURCE_VIEW_DESC srvDesc = {};
+        srvDesc.Format = DXGI_FORMAT_UNKNOWN;
+        srvDesc.ViewDimension = D3D12_SRV_DIMENSION_BUFFER;
+        srvDesc.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+        srvDesc.Buffer.FirstElement = static_cast<UINT64>(baseOffset / elementStride);
+        // CRITICAL: NumElements must be (size - offset) / stride, not total size / stride
+        srvDesc.Buffer.NumElements = static_cast<UINT>(remaining / elementStride);
+        srvDesc.Buffer.StructureByteStride = static_cast<UINT>(elementStride);
+        srvDesc.Buffer.Flags = D3D12_BUFFER_SRV_FLAG_NONE;
+
+        // Pre-creation validation.
+        IGL_DEBUG_ASSERT(d3dDevice != nullptr, "Device is null before CreateShaderResourceView");
+        IGL_DEBUG_ASSERT(resource != nullptr, "Buffer resource is null");
+        IGL_DEBUG_ASSERT(cpuHandle.ptr != 0, "SRV descriptor handle is invalid");
+
+        d3dDevice->CreateShaderResourceView(resource, &srvDesc, cpuHandle);
+
+        // Bind SRV descriptor table (graphics root parameter 4: SRV table)
+        // Note: This shares the texture SRV table; storage buffers and textures will be bound together.
+        // PRECEDENCE: Storage buffer SRVs bound here will override any previous texture SRVs bound via
+        // Bind SRV descriptor table using dynamic root parameter index from pipeline
+        // This may rebind the SRV table that was previously set by bindBindGroup(texture). The last
+        // SetGraphicsRootDescriptorTable call wins - storage buffer bindings take precedence.
+        const UINT srvTableIndex = currentRenderPipelineState_->getSRVTableRootParameterIndex();
+        if (srvTableIndex != UINT_MAX) {
+          commandList_->SetGraphicsRootDescriptorTable(srvTableIndex, gpuHandle);
+          IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): bound read-only storage buffer at slot %u (SRV t%u, root param %u, GPU handle 0x%llx)\n",
+                       slot, slot, srvTableIndex, gpuHandle.ptr);
+        } else {
+          IGL_LOG_ERROR("bindBindGroup(buffer): Pipeline has no SRV table root parameter for storage buffer binding\n");
+        }
+      }
+    }
+  }
+
+  // Store the GPU handle of the FIRST descriptor in the contiguous block.
+  // SetGraphicsRootDescriptorTable will use this handle, and D3D12 will access
+  // subsequent descriptors using: baseHandle + tableOffset.
+  D3D12_GPU_DESCRIPTOR_HANDLE baseGpuHandle = context.getCbvSrvUavGpuHandle(baseDescriptorIndex);
+  cachedCbvTableGpuHandles_[0] = baseGpuHandle;
+
+  IGL_D3D12_LOG_VERBOSE("bindBindGroup(buffer): Stored base GPU handle 0x%llx for CBV table (spans heap[%u] to heap[%u])\n",
+               baseGpuHandle.ptr, baseDescriptorIndex, baseDescriptorIndex + cbvTableCount_ - 1);
+
+  // Mark that bindBindGroup was used (vs storage buffer SRV or binder paths).
+  usedBindGroup_ = true;
+}
+
+// G-001: Barrier batching implementation
+void RenderCommandEncoder::flushBarriers() {
+  if (pendingBarriers_.empty()) {
+    return;
+  }
+  if (!commandBuffer_.isRecording() || !commandList_) {
+    IGL_LOG_ERROR("RenderCommandEncoder::flushBarriers called on closed command list; clearing pending barriers\n");
+    pendingBarriers_.clear();
+    return;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Flushing %zu batched resource barriers\n",
+               pendingBarriers_.size());
+
+  // Submit all pending barriers in a single API call
+  commandList_->ResourceBarrier(static_cast<UINT>(pendingBarriers_.size()),
+                                 pendingBarriers_.data());
+
+  // Clear the pending barrier queue
+  pendingBarriers_.clear();
+}
+
+void RenderCommandEncoder::queueBarrier(const D3D12_RESOURCE_BARRIER& barrier) {
+  pendingBarriers_.push_back(barrier);
+  IGL_D3D12_LOG_VERBOSE("RenderCommandEncoder: Queued barrier (total pending: %zu)\n",
+               pendingBarriers_.size());
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/RenderCommandEncoder.h b/src/igl/d3d12/RenderCommandEncoder.h
new file mode 100644
index 0000000000..3965098dea
--- /dev/null
+++ b/src/igl/d3d12/RenderCommandEncoder.h
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/RenderCommandEncoder.h>
+#include <igl/RenderPass.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12ResourcesBinder.h>
+#include <igl/d3d12/RenderPipelineState.h>
+#include <cstdint>
+#include <vector>
+
+namespace igl::d3d12 {
+
+class CommandBuffer;
+
+/**
+ * @brief D3D12 implementation of render command encoder
+ *
+ * IMPORTANT BINDING PRECEDENCE NOTES:
+ * ====================================
+ * This encoder supports multiple ways to bind shader resources (textures, buffers, samplers).
+ * Some binding methods share the same D3D12 root parameters, which means the LAST binding wins:
+ *
+ * 1. SRV Table (Root Parameter 4):
+ *    - Textures bound via bindTexture() or D3D12ResourcesBinder
+ *    - Storage buffers (read-only) bound via bindBindGroup(BindGroupBufferHandle)
+ *    - If you bind BOTH textures and storage buffer SRVs, the last binding before draw() wins
+ *    - Application code must coordinate which binding method to use per draw call
+ *
+ * 2. Sampler Table (Root Parameter 5):
+ *    - Samplers bound via bindSamplerState() or D3D12ResourcesBinder
+ *
+ * 3. CBV Table (Root Parameter 3):
+ *    - Constant buffers b3-b15 bound via bindBindGroup(BindGroupBufferHandle)
+ *
+ * See individual binding method documentation for details.
+ */
+class RenderCommandEncoder final : public IRenderCommandEncoder {
+ public:
+  RenderCommandEncoder(CommandBuffer& commandBuffer,
+                       const std::shared_ptr<IFramebuffer>& framebuffer);
+  ~RenderCommandEncoder() override = default;
+
+  // Initialize encoder and setup render targets
+  // IMPORTANT: Must be called exactly once after construction by CommandBuffer::createRenderCommandEncoder.
+  // Calling multiple times will result in resource leaks and undefined behavior.
+  // Debug builds will assert if called more than once.
+  void begin(const RenderPassDesc& renderPass);
+
+  void endEncoding() override;
+
+  void bindViewport(const Viewport& viewport) override;
+  void bindScissorRect(const ScissorRect& rect) override;
+  void bindRenderPipelineState(const std::shared_ptr<IRenderPipelineState>& pipelineState) override;
+  void bindDepthStencilState(const std::shared_ptr<IDepthStencilState>& depthStencilState) override;
+
+  void bindVertexBuffer(uint32_t index, IBuffer& buffer, size_t bufferOffset = 0) override;
+  void bindIndexBuffer(IBuffer& buffer, IndexFormat format, size_t bufferOffset = 0) override;
+
+  void bindBytes(size_t index, uint8_t target, const void* data, size_t length) override;
+  void bindPushConstants(const void* data, size_t length, size_t offset = 0) override;
+  void bindSamplerState(size_t index, uint8_t target, ISamplerState* samplerState) override;
+  void bindTexture(size_t index, uint8_t target, ITexture* texture) override;
+  void bindTexture(size_t index, ITexture* texture) override;
+  void bindUniform(const UniformDesc& uniformDesc, const void* data) override;
+
+  void draw(size_t vertexCount,
+            uint32_t instanceCount = 1,
+            uint32_t firstVertex = 0,
+            uint32_t baseInstance = 0) override;
+  void drawIndexed(size_t indexCount,
+                   uint32_t instanceCount = 1,
+                   uint32_t firstIndex = 0,
+                   int32_t vertexOffset = 0,
+                   uint32_t baseInstance = 0) override;
+  void drawMeshTasks(const Dimensions& threadgroupsPerGrid,
+                     const Dimensions& threadsPerTaskThreadgroup,
+                     const Dimensions& threadsPerMeshThreadgroup) override;
+  void multiDrawIndirect(IBuffer& indirectBuffer,
+                        size_t indirectBufferOffset,
+                        uint32_t drawCount,
+                        uint32_t stride = 0) override;
+  void multiDrawIndexedIndirect(IBuffer& indirectBuffer,
+                               size_t indirectBufferOffset,
+                               uint32_t drawCount,
+                               uint32_t stride = 0) override;
+
+  void setStencilReferenceValue(uint32_t value) override;
+  void setBlendColor(const Color& color) override;
+  void setDepthBias(float depthBias, float slopeScale, float clamp) override;
+
+  // ICommandEncoder interface
+  void pushDebugGroupLabel(const char* label, const Color& color) const override;
+  void insertDebugEventLabel(const char* label, const Color& color) const override;
+  void popDebugGroupLabel() const override;
+
+  // Additional IRenderCommandEncoder interface
+  void bindBuffer(uint32_t index,
+                  uint8_t target,
+                  IBuffer* buffer,
+                  size_t bufferOffset = 0,
+                  size_t bufferSize = 0) override;
+  void bindBuffer(uint32_t index, IBuffer* buffer, size_t offset, size_t bufferSize) override;
+  void bindBindGroup(BindGroupTextureHandle handle) override;
+  void bindBindGroup(BindGroupBufferHandle handle,
+                     uint32_t numDynamicOffsets,
+                     const uint32_t* dynamicOffsets) override;
+
+ private:
+ CommandBuffer& commandBuffer_;
+  ID3D12GraphicsCommandList* commandList_;
+
+  // Centralized resource binding management.
+  D3D12ResourcesBinder resourcesBinder_;
+
+  // Guard against multiple begin() calls.
+  // begin() allocates RTV/DSV descriptors and sets up state that should only happen once
+  bool hasBegun_ = false;
+
+  // Cache current vertex stride from bound pipeline's input layout
+  UINT currentVertexStride_ = 0;
+  // Optional per-slot strides fetched from pipeline
+  UINT vertexStrides_[IGL_BUFFER_BINDINGS_MAX] = {};
+
+  // Offscreen RTV/DSV support
+  std::shared_ptr<IFramebuffer> framebuffer_;
+  // If DescriptorHeapManager is available, we borrow indices from its heaps.
+  // Otherwise, we fall back to small ad-hoc heaps (constructor local scope).
+  std::vector<uint32_t> rtvIndices_;
+  uint32_t dsvIndex_ = UINT32_MAX;
+  D3D12_CPU_DESCRIPTOR_HANDLE dsvHandle_{};
+
+  // Per-frame descriptor heaps (set in constructor from D3D12Context)
+  // CRITICAL: These MUST be per-frame isolated heaps, NOT shared DescriptorHeapManager heaps
+  ID3D12DescriptorHeap* cbvSrvUavHeap_ = nullptr;
+  ID3D12DescriptorHeap* samplerHeap_ = nullptr;
+
+  // Cached descriptor table GPU handles
+  // These are set by bindTexture/bindSamplerState and used in drawIndexed
+  // to avoid invalidation by multiple SetDescriptorHeaps calls
+  // IMPORTANT: Bindings must be DENSE and start at slot 0 for each table.
+  // SetGraphicsRootDescriptorTable always uses cachedTextureGpuHandles_[0] as the base,
+  // so binding only higher slots (e.g., slot 1 without slot 0) will fail.
+  D3D12_GPU_DESCRIPTOR_HANDLE cachedTextureGpuHandle_{};
+  D3D12_GPU_DESCRIPTOR_HANDLE cachedSamplerGpuHandle_{};
+  // Support up to IGL_TEXTURE_SAMPLERS_MAX textures/samplers (t0-t15, s0-s15)
+  D3D12_GPU_DESCRIPTOR_HANDLE cachedTextureGpuHandles_[IGL_TEXTURE_SAMPLERS_MAX] = {};
+  D3D12_GPU_DESCRIPTOR_HANDLE cachedSamplerGpuHandles_[IGL_TEXTURE_SAMPLERS_MAX] = {};
+  size_t cachedTextureCount_ = 0;
+  size_t cachedSamplerCount_ = 0;
+
+  // Track whether bindBindGroup was explicitly called (vs storage buffer SRV or binder paths)
+  // This decouples bindBindGroup usage from cachedTextureCount_/cachedSamplerCount_
+  bool usedBindGroup_ = false;
+
+  // Cached vertex buffer bindings
+  // Store binding info and apply in draw calls after pipeline state is bound
+  struct CachedVertexBuffer {
+    D3D12_GPU_VIRTUAL_ADDRESS bufferLocation = 0;
+    UINT sizeInBytes = 0;
+    bool bound = false;
+  };
+  CachedVertexBuffer cachedVertexBuffers_[IGL_BUFFER_BINDINGS_MAX] = {};
+
+  // Cached index buffer binding
+  struct CachedIndexBuffer {
+    D3D12_GPU_VIRTUAL_ADDRESS bufferLocation = 0;
+    UINT sizeInBytes = 0;
+    DXGI_FORMAT format = DXGI_FORMAT_UNKNOWN;
+    bool bound = false;
+  };
+  CachedIndexBuffer cachedIndexBuffer_ = {};
+
+  // Track which constant buffer root parameters have been bound
+  // D3D12 requires all root parameters to be set before drawing
+  // Root parameter 1 = b0 (UniformsPerFrame) - root descriptor
+  // Root parameter 2 = b1 (UniformsPerObject) - root descriptor
+  D3D12_GPU_VIRTUAL_ADDRESS cachedConstantBuffers_[2] = {0, 0}; // b0, b1
+  bool constantBufferBound_[2] = {false, false};
+
+  // Cached CBV descriptor table for b2-b15 (root parameter 3)
+  // Supports up to 14 additional uniform buffers via descriptor table
+  D3D12_GPU_DESCRIPTOR_HANDLE cachedCbvTableGpuHandles_[IGL_BUFFER_BINDINGS_MAX] = {};
+  bool cbvTableBound_[IGL_BUFFER_BINDINGS_MAX] = {};
+  size_t cbvTableCount_ = 0;
+
+  // G-001: Barrier batching infrastructure
+  // Accumulates resource barriers and flushes them before draw/dispatch calls
+  // This reduces D3D12 API overhead and allows driver optimization
+  std::vector<D3D12_RESOURCE_BARRIER> pendingBarriers_;
+
+  // Flushes all pending barriers to the command list
+  void flushBarriers();
+
+  // Queue a barrier for batched submission
+  void queueBarrier(const D3D12_RESOURCE_BARRIER& barrier);
+
+  // Dynamic PSO selection (Vulkan-style pattern)
+  // Stores actual framebuffer formats captured in begin()
+  // Used to select correct PSO variant at draw time
+  D3D12RenderPipelineDynamicState dynamicState_;
+
+  // Cached render pipeline state for dynamic PSO variant selection
+  const RenderPipelineState* currentRenderPipelineState_ = nullptr;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/RenderPipelineState.cpp b/src/igl/d3d12/RenderPipelineState.cpp
new file mode 100644
index 0000000000..8927277118
--- /dev/null
+++ b/src/igl/d3d12/RenderPipelineState.cpp
@@ -0,0 +1,338 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/RenderPipelineState.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/Common.h>
+#include <igl/RenderPipelineReflection.h>
+#include <igl/NameHandle.h>
+#include <igl/d3d12/VertexInputState.h>
+#include <igl/d3d12/ShaderModule.h>
+#include <igl/d3d12/D3D12ReflectionUtils.h>
+#include <d3dcompiler.h>
+
+namespace igl::d3d12 {
+
+RenderPipelineState::RenderPipelineState(const RenderPipelineDesc& desc,
+                                         igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState,
+                                         igl::d3d12::ComPtr<ID3D12RootSignature> rootSignature)
+    : IRenderPipelineState(desc),
+      pipelineState_(std::move(pipelineState)),
+      rootSignature_(std::move(rootSignature)) {
+  // Set D3D12 object names for PIX debugging
+  const std::string& debugName = desc.debugName.toString();
+  if (pipelineState_.Get() && !debugName.empty()) {
+    std::wstring wideName(debugName.begin(), debugName.end());
+    pipelineState_->SetName((L"PSO_" + wideName).c_str());
+    IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set PIX debug name 'PSO_%s'\n", debugName.c_str());
+  }
+  if (rootSignature_.Get() && !debugName.empty()) {
+    std::wstring wideName(debugName.begin(), debugName.end());
+    rootSignature_->SetName((L"RootSig_" + wideName).c_str());
+    IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set PIX root signature name 'RootSig_%s'\n", debugName.c_str());
+  }
+
+  // Extract shader reflection info for future dynamic binding support
+  if (desc.shaderStages) {
+    auto* vertexModule = static_cast<const ShaderModule*>(desc.shaderStages->getVertexModule().get());
+    auto* fragmentModule = static_cast<const ShaderModule*>(desc.shaderStages->getFragmentModule().get());
+
+    // Prefer vertex shader for push constants if both define them
+    if (vertexModule) {
+      const auto& vsReflection = vertexModule->getReflectionInfo();
+      if (vsReflection.hasPushConstants) {
+        shaderReflection_.hasPushConstants = true;
+        shaderReflection_.pushConstantSlot = vsReflection.pushConstantSlot;
+        shaderReflection_.pushConstantSize = vsReflection.pushConstantSize;
+        shaderReflection_.pushConstantRootParamIndex = 0;  // Push constants are always root parameter 0
+        IGL_D3D12_LOG_VERBOSE("RenderPipelineState: VS push constants at b%u (%u DWORDs, root param %u)\n",
+                     vsReflection.pushConstantSlot, vsReflection.pushConstantSize,
+                     shaderReflection_.pushConstantRootParamIndex);
+      }
+    }
+
+    // Use fragment shader push constants if vertex shader doesn't have them
+    if (!shaderReflection_.hasPushConstants && fragmentModule) {
+      const auto& psReflection = fragmentModule->getReflectionInfo();
+      if (psReflection.hasPushConstants) {
+        shaderReflection_.hasPushConstants = true;
+        shaderReflection_.pushConstantSlot = psReflection.pushConstantSlot;
+        shaderReflection_.pushConstantSize = psReflection.pushConstantSize;
+        shaderReflection_.pushConstantRootParamIndex = 0;  // Push constants are always root parameter 0
+        IGL_D3D12_LOG_VERBOSE("RenderPipelineState: PS push constants at b%u (%u DWORDs, root param %u)\n",
+                     psReflection.pushConstantSlot, psReflection.pushConstantSize,
+                     shaderReflection_.pushConstantRootParamIndex);
+      }
+    }
+  }
+
+  // Convert IGL primitive topology to D3D12 primitive topology
+  switch (desc.topology) {
+    case PrimitiveType::Point:
+      primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_POINTLIST;
+      IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set topology to POINTLIST\n");
+      break;
+    case PrimitiveType::Line:
+      primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_LINELIST;
+      IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set topology to LINELIST\n");
+      break;
+    case PrimitiveType::LineStrip:
+      primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_LINESTRIP;
+      IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set topology to LINESTRIP\n");
+      break;
+    case PrimitiveType::Triangle:
+      primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
+      IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set topology to TRIANGLELIST\n");
+      break;
+    case PrimitiveType::TriangleStrip:
+      primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP;
+      IGL_D3D12_LOG_VERBOSE("RenderPipelineState: Set topology to TRIANGLESTRIP\n");
+      break;
+  }
+
+  // Cache the vertex stride from the vertex input state binding (slot 0) if available
+  const auto& vis = desc.vertexInputState;
+  if (vis) {
+    // Try backend downcast to extract VertexInputStateDesc
+    if (auto* d3dVis = dynamic_cast<const igl::d3d12::VertexInputState*>(vis.get())) {
+      const auto& d = d3dVis->getDesc();
+      if (d.numInputBindings > 0) {
+        vertexStride_ = static_cast<uint32_t>(d.inputBindings[0].stride);
+        // Cache per-slot strides
+        for (size_t s = 0; s < d.numInputBindings && s < IGL_BUFFER_BINDINGS_MAX; ++s) {
+          vertexStrides_[s] = static_cast<uint32_t>(d.inputBindings[s].stride);
+        }
+        // If attributes reference slots beyond numInputBindings or strides are zero,
+        // derive reasonable defaults so sessions that bind to slot 1 still work.
+        size_t maxSlot = 0;
+        for (size_t i = 0; i < d.numAttributes; ++i) {
+          if (d.attributes[i].bufferIndex > maxSlot) {
+            maxSlot = d.attributes[i].bufferIndex;
+          }
+        }
+        // Helper to compute a minimal stride per slot from attributes (max end offset among attrs in that slot)
+        auto computeStrideForSlot = [&](size_t slot) -> uint32_t {
+          size_t maxEnd = 0;
+          for (size_t i = 0; i < d.numAttributes; ++i) {
+            const auto& a = d.attributes[i];
+            if (a.bufferIndex != slot) continue;
+            size_t compSize = 0;
+            switch (a.format) {
+              case VertexAttributeFormat::Float1: compSize = 4; break;
+              case VertexAttributeFormat::Float2: compSize = 8; break;
+              case VertexAttributeFormat::Float3: compSize = 12; break;
+              case VertexAttributeFormat::Float4: compSize = 16; break;
+              case VertexAttributeFormat::Byte1: compSize = 1; break;
+              case VertexAttributeFormat::Byte2: compSize = 2; break;
+              case VertexAttributeFormat::Byte4: compSize = 4; break;
+              case VertexAttributeFormat::UByte4Norm: compSize = 4; break;
+              default: compSize = 0; break;
+            }
+            maxEnd = std::max(maxEnd, a.offset + compSize);
+          }
+          // Fallback to slot0 stride if present
+          if (maxEnd == 0 && d.numInputBindings > 0) {
+            return static_cast<uint32_t>(d.inputBindings[0].stride);
+          }
+          return static_cast<uint32_t>(maxEnd);
+        };
+        for (size_t s = 0; s <= maxSlot && s < IGL_BUFFER_BINDINGS_MAX; ++s) {
+          if (vertexStrides_[s] == 0) {
+            vertexStrides_[s] = computeStrideForSlot(s);
+          }
+        }
+        if (vertexStride_ == 0) {
+          vertexStride_ = vertexStrides_[0];
+        }
+      }
+    }
+  }
+}
+
+std::shared_ptr<IRenderPipelineReflection> RenderPipelineState::renderPipelineReflection() {
+  if (reflection_) {
+    return reflection_;
+  }
+
+  struct ReflectionImpl final : public IRenderPipelineReflection {
+    std::vector<BufferArgDesc> ubs;
+    std::vector<SamplerArgDesc> samplers;
+    std::vector<TextureArgDesc> textures;
+    const std::vector<BufferArgDesc>& allUniformBuffers() const override { return ubs; }
+    const std::vector<SamplerArgDesc>& allSamplers() const override { return samplers; }
+    const std::vector<TextureArgDesc>& allTextures() const override { return textures; }
+  };
+
+  auto out = std::make_shared<ReflectionImpl>();
+
+  auto reflectShader = [&](const std::shared_ptr<IShaderModule>& mod, ShaderStage stage) {
+    if (!mod) return;
+    auto* d3dMod = dynamic_cast<const igl::d3d12::ShaderModule*>(mod.get());
+    if (!d3dMod) return;
+    const auto& bc = d3dMod->getBytecode();
+    if (bc.empty()) return;
+    igl::d3d12::ComPtr<ID3D12ShaderReflection> refl;
+    if (FAILED(D3DReflect(bc.data(), bc.size(), IID_PPV_ARGS(refl.GetAddressOf())))) return;
+    D3D12_SHADER_DESC sd{};
+    if (FAILED(refl->GetDesc(&sd))) return;
+
+    // Constant buffers
+    for (UINT i = 0; i < sd.ConstantBuffers; ++i) {
+      auto* cb = refl->GetConstantBufferByIndex(i);
+      D3D12_SHADER_BUFFER_DESC cbd{}; if (FAILED(cb->GetDesc(&cbd))) continue;
+      int bufferIndex = -1;
+      for (UINT r = 0; r < sd.BoundResources; ++r) {
+        D3D12_SHADER_INPUT_BIND_DESC bind{};
+        if (SUCCEEDED(refl->GetResourceBindingDesc(r, &bind))) {
+          if (bind.Type == D3D_SIT_CBUFFER && std::string(bind.Name) == std::string(cbd.Name)) {
+            bufferIndex = static_cast<int>(bind.BindPoint);
+            break;
+          }
+        }
+      }
+      BufferArgDesc ub;
+      ub.name = igl::genNameHandle(cbd.Name ? cbd.Name : "");
+      ub.bufferAlignment = 256;
+      ub.bufferDataSize = cbd.Size;
+      ub.bufferIndex = bufferIndex;
+      ub.shaderStage = stage;
+      ub.isUniformBlock = true;
+      for (UINT v = 0; v < cbd.Variables; ++v) {
+        auto* var = cb->GetVariableByIndex(v);
+        D3D12_SHADER_VARIABLE_DESC vd{}; if (FAILED(var->GetDesc(&vd))) continue;
+        auto* t = var->GetType(); if (!t) continue;
+        D3D12_SHADER_TYPE_DESC td{}; if (FAILED(t->GetDesc(&td))) continue;
+        BufferArgDesc::BufferMemberDesc m;
+        m.name = igl::genNameHandle(vd.Name ? vd.Name : "");
+        m.type = ReflectionUtils::mapUniformType(td);
+        m.offset = vd.StartOffset;
+        m.arrayLength = td.Elements ? td.Elements : 1;
+        ub.members.push_back(std::move(m));
+      }
+      out->ubs.push_back(std::move(ub));
+    }
+
+    // Textures and samplers
+    for (UINT r = 0; r < sd.BoundResources; ++r) {
+      D3D12_SHADER_INPUT_BIND_DESC bind{};
+      if (FAILED(refl->GetResourceBindingDesc(r, &bind))) continue;
+      if (bind.Type == D3D_SIT_TEXTURE) {
+        TextureArgDesc t; t.name = bind.Name ? bind.Name : ""; t.type = TextureType::TwoD; t.textureIndex = bind.BindPoint; t.shaderStage = stage; out->textures.push_back(std::move(t));
+      } else if (bind.Type == D3D_SIT_SAMPLER) {
+        SamplerArgDesc s; s.name = bind.Name ? bind.Name : ""; s.samplerIndex = bind.BindPoint; s.shaderStage = stage; out->samplers.push_back(std::move(s));
+      }
+    }
+  };
+
+  if (auto stages = getRenderPipelineDesc().shaderStages) {
+    reflectShader(stages->getVertexModule(), ShaderStage::Vertex);
+    reflectShader(stages->getFragmentModule(), ShaderStage::Fragment);
+  }
+
+  reflection_ = out;
+  return reflection_;
+}
+
+void RenderPipelineState::setRenderPipelineReflection(
+    const IRenderPipelineReflection& /*renderPipelineReflection*/) {}
+
+int RenderPipelineState::getIndexByName(const igl::NameHandle& /*name*/,
+                                        ShaderStage /*stage*/) const {
+  return -1;
+}
+
+int RenderPipelineState::getIndexByName(const std::string& /*name*/,
+                                        ShaderStage /*stage*/) const {
+  return -1;
+}
+
+ID3D12PipelineState* RenderPipelineState::getPipelineState(
+    const D3D12RenderPipelineDynamicState& dynamicState,
+    Device& device) const {
+  // Fast path: Check if dynamic state matches base PSO
+  // This happens when pipeline was created with same formats as framebuffer
+  const auto& desc = getRenderPipelineDesc();
+  bool matchesBasePSO = true;
+
+  // Check render target formats
+  const UINT numRTs = static_cast<UINT>(
+      std::min<size_t>(desc.targetDesc.colorAttachments.size(),
+                      D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT));
+  for (UINT i = 0; i < numRTs; ++i) {
+    if (dynamicState.rtvFormats[i] !=
+        textureFormatToDXGIFormat(desc.targetDesc.colorAttachments[i].textureFormat)) {
+      matchesBasePSO = false;
+      break;
+    }
+  }
+
+  // Check depth-stencil format
+  if (matchesBasePSO) {
+    const DXGI_FORMAT baseDSVFormat =
+        (desc.targetDesc.depthAttachmentFormat != TextureFormat::Invalid)
+            ? textureFormatToDXGIFormat(desc.targetDesc.depthAttachmentFormat)
+            : DXGI_FORMAT_UNKNOWN;
+    if (dynamicState.dsvFormat != baseDSVFormat) {
+      matchesBasePSO = false;
+    }
+  }
+
+  // Return base PSO if formats match
+  if (matchesBasePSO) {
+    return pipelineState_.Get();
+  }
+
+  // Check variant cache
+  auto it = psoVariants_.find(dynamicState);
+  if (it != psoVariants_.end()) {
+    return it->second.Get();
+  }
+
+  // Create PSO variant with substituted formats (Vulkan-style on-demand creation)
+  IGL_LOG_INFO("Creating PSO variant: RTV[0]=%d (base) -> %d (framebuffer)\n",
+                textureFormatToDXGIFormat(desc.targetDesc.colorAttachments[0].textureFormat),
+                dynamicState.rtvFormats[0]);
+
+  // Following Vulkan's approach: create modified RenderPipelineDesc with substituted formats
+  // Create a modified descriptor with framebuffer formats substituted
+  RenderPipelineDesc variantDesc = desc;  // Copy all state
+
+  // Substitute RT formats from actual framebuffer
+  for (UINT i = 0; i < numRTs; ++i) {
+    if (dynamicState.rtvFormats[i] != DXGI_FORMAT_UNKNOWN) {
+      // Convert DXGI format back to IGL TextureFormat
+      variantDesc.targetDesc.colorAttachments[i].textureFormat =
+          dxgiFormatToTextureFormat(dynamicState.rtvFormats[i]);
+      IGL_LOG_INFO("  RTV[%u]: substituted format %d\n", i, dynamicState.rtvFormats[i]);
+    }
+  }
+
+  // Substitute DSV format if present
+  if (dynamicState.dsvFormat != DXGI_FORMAT_UNKNOWN) {
+    variantDesc.targetDesc.depthAttachmentFormat =
+        dxgiFormatToTextureFormat(dynamicState.dsvFormat);
+  }
+
+  // Call Device::createPipelineStateVariant() to create PSO with modified formats
+  Result variantResult;
+  auto variantPSO = device.createPipelineStateVariant(
+      variantDesc, rootSignature_.Get(), &variantResult);
+
+  if (!variantPSO.Get()) {
+    IGL_LOG_ERROR("PSO variant creation failed: %s\n", variantResult.message.c_str());
+    IGL_LOG_ERROR("Falling back to base PSO (this will cause D3D12 validation errors!)\n");
+    return pipelineState_.Get();  // Fallback to base PSO
+  }
+
+  // Cache the variant for future use
+  psoVariants_[dynamicState] = variantPSO;
+  IGL_LOG_INFO("PSO variant created and cached successfully: PSO=%p\n", variantPSO.Get());
+
+  return variantPSO.Get();
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/RenderPipelineState.h b/src/igl/d3d12/RenderPipelineState.h
new file mode 100644
index 0000000000..768dff68e6
--- /dev/null
+++ b/src/igl/d3d12/RenderPipelineState.h
@@ -0,0 +1,159 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/RenderPipelineState.h>
+#include <igl/d3d12/Common.h>
+#include <unordered_map>
+#include <array>
+
+namespace igl::d3d12 {
+
+class Device;  // Forward declaration
+
+/**
+ * @brief Encapsulates dynamic render state that affects PSO selection
+ *
+ * Following Vulkan's RenderPipelineDynamicState pattern, this structure serves as a hash key
+ * for PSO variant caching. D3D12 PSOs are immutable and must match the exact render target
+ * formats at draw time.
+ *
+ * Key differences from Vulkan:
+ * - Vulkan: renderPassIndex_ encodes all render pass compatibility (formats + load/store ops)
+ * - D3D12: We only need render target formats (no render pass object exists)
+ *
+ * The structure is designed for efficient hashing and comparison:
+ * - Packed into fixed-size array for fast memcmp
+ * - Zero-initialized padding for consistent hashing
+ */
+struct D3D12RenderPipelineDynamicState {
+  // Render target formats (up to 8 MRT targets)
+  std::array<DXGI_FORMAT, D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT> rtvFormats;
+  // Depth-stencil format
+  DXGI_FORMAT dsvFormat;
+
+  D3D12RenderPipelineDynamicState() {
+    rtvFormats.fill(DXGI_FORMAT_UNKNOWN);
+    dsvFormat = DXGI_FORMAT_UNKNOWN;
+  }
+
+  bool operator==(const D3D12RenderPipelineDynamicState& other) const {
+    return rtvFormats == other.rtvFormats && dsvFormat == other.dsvFormat;
+  }
+
+  struct HashFunction {
+    size_t operator()(const D3D12RenderPipelineDynamicState& s) const {
+      size_t hash = 0;
+      for (const auto& fmt : s.rtvFormats) {
+        hash ^= std::hash<DXGI_FORMAT>{}(fmt) + 0x9e3779b9 + (hash << 6) + (hash >> 2);
+      }
+      hash ^= std::hash<DXGI_FORMAT>{}(s.dsvFormat) + 0x9e3779b9 + (hash << 6) + (hash >> 2);
+      return hash;
+    }
+  };
+};
+
+class RenderPipelineState final : public IRenderPipelineState {
+ public:
+  RenderPipelineState(const RenderPipelineDesc& desc,
+                      igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState,
+                      igl::d3d12::ComPtr<ID3D12RootSignature> rootSignature);
+  ~RenderPipelineState() override = default;
+
+  std::shared_ptr<IRenderPipelineReflection> renderPipelineReflection() override;
+  void setRenderPipelineReflection(
+      const IRenderPipelineReflection& renderPipelineReflection) override;
+  int getIndexByName(const igl::NameHandle& name, ShaderStage stage) const override;
+  int getIndexByName(const std::string& name, ShaderStage stage) const override;
+
+  // D3D12-specific accessors
+  ID3D12PipelineState* getPipelineState() const { return pipelineState_.Get(); }
+
+  /**
+   * @brief Get PSO variant for specific render target formats (Vulkan-style dynamic PSO selection)
+   *
+   * This method follows Vulkan's getVkPipeline(dynamicState) pattern to create PSO variants
+   * on-demand based on actual framebuffer formats. D3D12 PSOs are immutable and must exactly
+   * match render target formats at creation time.
+   *
+   * @param dynamicState Contains actual framebuffer RTVformats and DSV format at draw time
+   * @param device IGL D3D12 device for PSO creation
+   * @return PSO variant matching the requested formats, or nullptr on error
+   */
+  ID3D12PipelineState* getPipelineState(const D3D12RenderPipelineDynamicState& dynamicState,
+                                         Device& device) const;
+
+  ID3D12RootSignature* getRootSignature() const { return rootSignature_.Get(); }
+  uint32_t getVertexStride() const { return vertexStride_; }
+  uint32_t getVertexStride(size_t slot) const { return (slot < IGL_BUFFER_BINDINGS_MAX) ? vertexStrides_[slot] : 0; }
+  D3D_PRIMITIVE_TOPOLOGY getPrimitiveTopology() const { return primitiveTopology_; }
+
+  // Query push constant binding info from shader reflection
+  bool hasPushConstants() const { return shaderReflection_.hasPushConstants; }
+  UINT getPushConstantSlot() const { return shaderReflection_.pushConstantSlot; }
+  UINT getPushConstantRootParameterIndex() const { return shaderReflection_.pushConstantRootParamIndex; }
+
+  // Query root parameter layout (dynamic based on shader reflection)
+  UINT getCBVTableRootParameterIndex() const { return rootParamLayout_.cbvTableIndex; }
+  UINT getSRVTableRootParameterIndex() const { return rootParamLayout_.srvTableIndex; }
+  UINT getSamplerTableRootParameterIndex() const { return rootParamLayout_.samplerTableIndex; }
+  UINT getUAVTableRootParameterIndex() const { return rootParamLayout_.uavTableIndex; }
+
+  // Query descriptor range sizes (how many descriptors the root signature expects)
+  UINT getCBVDescriptorCount() const { return rootParamLayout_.cbvDescriptorCount; }
+  UINT getSRVDescriptorCount() const { return rootParamLayout_.srvDescriptorCount; }
+  UINT getSamplerDescriptorCount() const { return rootParamLayout_.samplerDescriptorCount; }
+  UINT getUAVDescriptorCount() const { return rootParamLayout_.uavDescriptorCount; }
+
+ private:
+  friend class Device;  // Device needs access to create PSO variants
+
+  // Base PSO created from RenderPipelineDesc (may not match actual framebuffer formats)
+  igl::d3d12::ComPtr<ID3D12PipelineState> pipelineState_;
+  igl::d3d12::ComPtr<ID3D12RootSignature> rootSignature_;
+  std::shared_ptr<IRenderPipelineReflection> reflection_;
+  uint32_t vertexStride_ = 0;
+  uint32_t vertexStrides_[IGL_BUFFER_BINDINGS_MAX] = {};
+  D3D_PRIMITIVE_TOPOLOGY primitiveTopology_ = D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
+
+  // PSO variant cache following Vulkan's pattern
+  // Maps framebuffer formats → PSO variant
+  mutable std::unordered_map<D3D12RenderPipelineDynamicState,
+                              igl::d3d12::ComPtr<ID3D12PipelineState>,
+                              D3D12RenderPipelineDynamicState::HashFunction>
+      psoVariants_;
+
+  // Shader reflection info for dynamic resource binding
+  // Stores merged reflection data from vertex + fragment shaders
+  struct {
+    bool hasPushConstants = false;
+    UINT pushConstantSlot = UINT_MAX;
+    UINT pushConstantSize = 0;
+    UINT pushConstantRootParamIndex = 0;  // Root parameter index for push constants in root signature
+  } shaderReflection_;
+
+  // Root parameter layout (dynamically computed from shader reflection)
+  // These indices tell encoders which root parameter to use for each resource type
+  // Pure reflection-based approach - no hardcoded assumptions
+  struct {
+    UINT cbvTableIndex = UINT_MAX;  // CBV descriptor table
+    UINT srvTableIndex = UINT_MAX;  // SRV descriptor table
+    UINT samplerTableIndex = UINT_MAX;  // Sampler descriptor table
+    UINT uavTableIndex = UINT_MAX;  // UAV descriptor table
+
+    // Descriptor range sizes (from root signature, 0 to maxSlot inclusive)
+    // These define how many descriptors the root signature expects in each table
+    // ResourcesBinder must allocate exactly these counts to match the root signature
+    UINT cbvDescriptorCount = 0;   // Number of CBV descriptors (0 to maxCBVSlot)
+    UINT srvDescriptorCount = 0;   // Number of SRV descriptors (0 to maxSRVSlot)
+    UINT samplerDescriptorCount = 0;  // Number of sampler descriptors (0 to maxSamplerSlot)
+    UINT uavDescriptorCount = 0;   // Number of UAV descriptors (0 to maxUAVSlot)
+  } rootParamLayout_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/SamplerState.cpp b/src/igl/d3d12/SamplerState.cpp
new file mode 100644
index 0000000000..9e5757eafd
--- /dev/null
+++ b/src/igl/d3d12/SamplerState.cpp
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/SamplerState.h>
+#include <functional>
+
+namespace igl::d3d12 {
+
+size_t SamplerState::hash() const noexcept {
+  size_t h = 0;
+
+  // Hash all D3D12_SAMPLER_DESC fields using the same technique as Device.cpp
+  // Magic constant 0x9e3779b9 is the golden ratio used for hash mixing
+  h ^= std::hash<int>{}(static_cast<int>(desc_.Filter)) + 0x9e3779b9 + (h << 6) + (h >> 2);
+  h ^= std::hash<int>{}(static_cast<int>(desc_.AddressU)) + 0x9e3779b9 + (h << 6) + (h >> 2);
+  h ^= std::hash<int>{}(static_cast<int>(desc_.AddressV)) + 0x9e3779b9 + (h << 6) + (h >> 2);
+  h ^= std::hash<int>{}(static_cast<int>(desc_.AddressW)) + 0x9e3779b9 + (h << 6) + (h >> 2);
+  h ^= std::hash<float>{}(desc_.MipLODBias) + 0x9e3779b9 + (h << 6) + (h >> 2);
+  h ^= std::hash<uint32_t>{}(desc_.MaxAnisotropy) + 0x9e3779b9 + (h << 6) + (h >> 2);
+  h ^= std::hash<int>{}(static_cast<int>(desc_.ComparisonFunc)) + 0x9e3779b9 + (h << 6) + (h >> 2);
+
+  // Hash border color array
+  h ^= std::hash<float>{}(desc_.BorderColor[0]) + 0x9e3779b9 + (h << 6) + (h >> 2);
+  h ^= std::hash<float>{}(desc_.BorderColor[1]) + 0x9e3779b9 + (h << 6) + (h >> 2);
+  h ^= std::hash<float>{}(desc_.BorderColor[2]) + 0x9e3779b9 + (h << 6) + (h >> 2);
+  h ^= std::hash<float>{}(desc_.BorderColor[3]) + 0x9e3779b9 + (h << 6) + (h >> 2);
+
+  h ^= std::hash<float>{}(desc_.MinLOD) + 0x9e3779b9 + (h << 6) + (h >> 2);
+  h ^= std::hash<float>{}(desc_.MaxLOD) + 0x9e3779b9 + (h << 6) + (h >> 2);
+
+  return h;
+}
+
+bool SamplerState::operator==(const SamplerState& rhs) const noexcept {
+  // Compare all D3D12_SAMPLER_DESC fields
+  return desc_.Filter == rhs.desc_.Filter &&
+         desc_.AddressU == rhs.desc_.AddressU &&
+         desc_.AddressV == rhs.desc_.AddressV &&
+         desc_.AddressW == rhs.desc_.AddressW &&
+         desc_.MipLODBias == rhs.desc_.MipLODBias &&
+         desc_.MaxAnisotropy == rhs.desc_.MaxAnisotropy &&
+         desc_.ComparisonFunc == rhs.desc_.ComparisonFunc &&
+         desc_.BorderColor[0] == rhs.desc_.BorderColor[0] &&
+         desc_.BorderColor[1] == rhs.desc_.BorderColor[1] &&
+         desc_.BorderColor[2] == rhs.desc_.BorderColor[2] &&
+         desc_.BorderColor[3] == rhs.desc_.BorderColor[3] &&
+         desc_.MinLOD == rhs.desc_.MinLOD &&
+         desc_.MaxLOD == rhs.desc_.MaxLOD;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/SamplerState.h b/src/igl/d3d12/SamplerState.h
new file mode 100644
index 0000000000..44833bbb44
--- /dev/null
+++ b/src/igl/d3d12/SamplerState.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/SamplerState.h>
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+class SamplerState final : public ISamplerState {
+ public:
+  explicit SamplerState(const D3D12_SAMPLER_DESC& desc) : desc_(desc) {}
+  ~SamplerState() override = default;
+
+  bool isYUV() const noexcept override { return false; }
+
+  const D3D12_SAMPLER_DESC& getDesc() const { return desc_; }
+
+  /// Computes hash value based on D3D12_SAMPLER_DESC fields
+  size_t hash() const noexcept;
+
+  /// Compares two SamplerState objects for equality
+  bool operator==(const SamplerState& rhs) const noexcept;
+
+ private:
+  D3D12_SAMPLER_DESC desc_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/ShaderModule.cpp b/src/igl/d3d12/ShaderModule.cpp
new file mode 100644
index 0000000000..a872112f20
--- /dev/null
+++ b/src/igl/d3d12/ShaderModule.cpp
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/ShaderModule.h>
+#include <algorithm>
+#include <cstring>
+
+namespace igl::d3d12 {
+
+void ShaderModule::setReflection(igl::d3d12::ComPtr<ID3D12ShaderReflection> reflection) {
+  reflection_ = reflection;
+  if (reflection_.Get()) {
+    extractShaderMetadata();
+  }
+}
+
+void ShaderModule::extractShaderMetadata() {
+  if (!reflection_.Get()) {
+    IGL_LOG_ERROR("ShaderModule::extractShaderMetadata: reflection_ is NULL!\n");
+    return;
+  }
+
+  D3D12_SHADER_DESC shaderDesc = {};
+  HRESULT hr = reflection_->GetDesc(&shaderDesc);
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("ShaderModule::extractShaderMetadata: Failed to get shader desc: 0x%08X\n", hr);
+    return;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("ShaderModule: Reflection extracted - %u constant buffers, %u bound resources, %u input params, %u output params\n",
+               shaderDesc.ConstantBuffers,
+               shaderDesc.BoundResources,
+               shaderDesc.InputParameters,
+               shaderDesc.OutputParameters);
+
+  // Reset reflection info
+  reflectionInfo_ = ShaderReflectionInfo{};
+
+  // Extract resource bindings (textures, buffers, samplers, UAVs)
+  resourceBindings_.clear();
+  for (UINT i = 0; i < shaderDesc.BoundResources; i++) {
+    D3D12_SHADER_INPUT_BIND_DESC bindDesc = {};
+    hr = reflection_->GetResourceBindingDesc(i, &bindDesc);
+    if (FAILED(hr)) {
+      IGL_LOG_ERROR("ShaderModule::extractShaderMetadata: Failed to get resource binding %u: 0x%08X\n", i, hr);
+      continue;
+    }
+
+    ResourceBinding binding;
+    binding.name = bindDesc.Name;
+    binding.type = bindDesc.Type;
+    binding.bindPoint = bindDesc.BindPoint;
+    binding.bindCount = bindDesc.BindCount;
+    binding.space = bindDesc.Space;
+
+    resourceBindings_.push_back(binding);
+
+    // Populate reflection info for root signature selection
+    if (bindDesc.Type == D3D_SIT_CBUFFER) {
+      reflectionInfo_.usedCBVSlots.push_back(bindDesc.BindPoint);
+      reflectionInfo_.maxCBVSlot = std::max(reflectionInfo_.maxCBVSlot, bindDesc.BindPoint);
+      IGL_LOG_INFO("  Found CBV: '%s' at b%u\n", bindDesc.Name, bindDesc.BindPoint);
+    } else if (bindDesc.Type == D3D_SIT_TEXTURE ||
+               bindDesc.Type == D3D_SIT_STRUCTURED ||
+               bindDesc.Type == D3D_SIT_BYTEADDRESS) {
+      reflectionInfo_.usedSRVSlots.push_back(bindDesc.BindPoint);
+      reflectionInfo_.maxSRVSlot = std::max(reflectionInfo_.maxSRVSlot, bindDesc.BindPoint);
+      IGL_LOG_INFO("  Found SRV: '%s' at t%u\n", bindDesc.Name, bindDesc.BindPoint);
+    } else if (bindDesc.Type == D3D_SIT_UAV_RWTYPED ||
+               bindDesc.Type == D3D_SIT_UAV_RWSTRUCTURED ||
+               bindDesc.Type == D3D_SIT_UAV_RWBYTEADDRESS ||
+               bindDesc.Type == D3D_SIT_UAV_APPEND_STRUCTURED ||
+               bindDesc.Type == D3D_SIT_UAV_CONSUME_STRUCTURED ||
+               bindDesc.Type == D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER) {
+      reflectionInfo_.usedUAVSlots.push_back(bindDesc.BindPoint);
+      reflectionInfo_.maxUAVSlot = std::max(reflectionInfo_.maxUAVSlot, bindDesc.BindPoint);
+      IGL_LOG_INFO("  Found UAV: '%s' at u%u\n", bindDesc.Name, bindDesc.BindPoint);
+    } else if (bindDesc.Type == D3D_SIT_SAMPLER) {
+      reflectionInfo_.usedSamplerSlots.push_back(bindDesc.BindPoint);
+      reflectionInfo_.maxSamplerSlot = std::max(reflectionInfo_.maxSamplerSlot, bindDesc.BindPoint);
+      IGL_LOG_INFO("  Found Sampler: '%s' at s%u\n", bindDesc.Name, bindDesc.BindPoint);
+    }
+
+    const char* typeStr = "Unknown";
+    switch (bindDesc.Type) {
+      case D3D_SIT_CBUFFER: typeStr = "CBV (Constant Buffer)"; break;
+      case D3D_SIT_TBUFFER: typeStr = "TBuffer"; break;
+      case D3D_SIT_TEXTURE: typeStr = "SRV (Texture)"; break;
+      case D3D_SIT_SAMPLER: typeStr = "Sampler"; break;
+      case D3D_SIT_UAV_RWTYPED: typeStr = "UAV (RW Typed)"; break;
+      case D3D_SIT_STRUCTURED: typeStr = "SRV (StructuredBuffer)"; break;
+      case D3D_SIT_UAV_RWSTRUCTURED: typeStr = "UAV (RWStructuredBuffer)"; break;
+      case D3D_SIT_BYTEADDRESS: typeStr = "SRV (ByteAddressBuffer)"; break;
+      case D3D_SIT_UAV_RWBYTEADDRESS: typeStr = "UAV (RWByteAddressBuffer)"; break;
+      case D3D_SIT_UAV_APPEND_STRUCTURED: typeStr = "UAV (AppendStructuredBuffer)"; break;
+      case D3D_SIT_UAV_CONSUME_STRUCTURED: typeStr = "UAV (ConsumeStructuredBuffer)"; break;
+      case D3D_SIT_UAV_RWSTRUCTURED_WITH_COUNTER: typeStr = "UAV (RWStructuredBuffer with counter)"; break;
+      default: break;
+    }
+
+    IGL_LOG_DEBUG("  Resource [%u]: '%s' | Type: %s | Slot: t%u/b%u/s%u/u%u | Space: %u | Count: %u\n",
+                  i,
+                  bindDesc.Name,
+                  typeStr,
+                  bindDesc.Type == D3D_SIT_TEXTURE ? bindDesc.BindPoint : 0,
+                  bindDesc.Type == D3D_SIT_CBUFFER ? bindDesc.BindPoint : 0,
+                  bindDesc.Type == D3D_SIT_SAMPLER ? bindDesc.BindPoint : 0,
+                  (bindDesc.Type == D3D_SIT_UAV_RWTYPED || bindDesc.Type == D3D_SIT_UAV_RWSTRUCTURED) ? bindDesc.BindPoint : 0,
+                  bindDesc.Space,
+                  bindDesc.BindCount);
+  }
+
+  // Extract constant buffer information
+  constantBuffers_.clear();
+  for (UINT i = 0; i < shaderDesc.ConstantBuffers; i++) {
+    ID3D12ShaderReflectionConstantBuffer* cb = reflection_->GetConstantBufferByIndex(i);
+    if (!cb) {
+      IGL_LOG_ERROR("ShaderModule::extractShaderMetadata: Failed to get constant buffer %u\n", i);
+      continue;
+    }
+
+    D3D12_SHADER_BUFFER_DESC bufferDesc = {};
+    hr = cb->GetDesc(&bufferDesc);
+    if (FAILED(hr)) {
+      IGL_LOG_ERROR("ShaderModule::extractShaderMetadata: Failed to get CB desc %u: 0x%08X\n", i, hr);
+      continue;
+    }
+
+    ConstantBufferInfo cbInfo;
+    cbInfo.name = bufferDesc.Name;
+    cbInfo.size = bufferDesc.Size;
+    cbInfo.numVariables = bufferDesc.Variables;
+
+    constantBuffers_.push_back(cbInfo);
+
+    IGL_LOG_DEBUG("  Constant Buffer [%u]: '%s' | Size: %u bytes | Variables: %u\n",
+                  i,
+                  bufferDesc.Name,
+                  bufferDesc.Size,
+                  bufferDesc.Variables);
+
+    // Optionally log variable details for debugging
+    for (UINT v = 0; v < bufferDesc.Variables; v++) {
+      ID3D12ShaderReflectionVariable* var = cb->GetVariableByIndex(v);
+      if (var) {
+        D3D12_SHADER_VARIABLE_DESC varDesc = {};
+        if (SUCCEEDED(var->GetDesc(&varDesc))) {
+          IGL_LOG_DEBUG("    Variable [%u]: '%s' | Offset: %u | Size: %u bytes\n",
+                        v,
+                        varDesc.Name,
+                        varDesc.StartOffset,
+                        varDesc.Size);
+        }
+      }
+    }
+  }
+
+  // Detect push constants by name convention: cbuffer must be named "PushConstants"
+  // This allows distinguishing between push constants (used with bindBytes) and regular
+  // small uniform buffers (used with bindBuffer), since both may be small (≤64 bytes).
+  for (const auto& binding : resourceBindings_) {
+    if (binding.type == D3D_SIT_CBUFFER) {
+      // Find the corresponding constant buffer info to get size and name
+      for (const auto& cbInfo : constantBuffers_) {
+        if (cbInfo.name == binding.name) {
+          // Check if this is push constants by name (must contain "PushConstant")
+            if (cbInfo.name.find("PushConstant") != std::string::npos && cbInfo.size <= 64) {
+            reflectionInfo_.hasPushConstants = true;
+            reflectionInfo_.pushConstantSlot = binding.bindPoint;
+            reflectionInfo_.pushConstantSize = (cbInfo.size + 3) / 4; // Convert bytes to DWORDs
+            IGL_D3D12_LOG_VERBOSE("  Detected push constants: '%s' at b%u (%u DWORDs / %u bytes)\n",
+                         cbInfo.name.c_str(),
+                         binding.bindPoint,
+                         reflectionInfo_.pushConstantSize,
+                         cbInfo.size);
+          }
+          break;
+        }
+      }
+    }
+  }
+}
+
+bool ShaderModule::hasResource(const std::string& name) const {
+  for (const auto& binding : resourceBindings_) {
+    if (binding.name == name) {
+      return true;
+    }
+  }
+  return false;
+}
+
+UINT ShaderModule::getResourceBindPoint(const std::string& name) const {
+  for (const auto& binding : resourceBindings_) {
+    if (binding.name == name) {
+      return binding.bindPoint;
+    }
+  }
+  return UINT_MAX; // Not found
+}
+
+size_t ShaderModule::getConstantBufferSize(const std::string& name) const {
+  for (const auto& cb : constantBuffers_) {
+    if (cb.name == name) {
+      return cb.size;
+    }
+  }
+  return 0; // Not found
+}
+
+bool ShaderModule::validateBytecode() const {
+  // Check minimum size for signature
+  if (bytecode_.size() < 4) {
+    IGL_LOG_ERROR("Shader bytecode too small (< 4 bytes): %zu bytes\n", bytecode_.size());
+    return false;
+  }
+
+  const char* signature = reinterpret_cast<const char*>(bytecode_.data());
+
+  // Valid signatures: "DXBC" (legacy D3D11/D3D12) or "DXIL" (modern D3D12)
+  if (std::memcmp(signature, "DXBC", 4) == 0) {
+    IGL_LOG_DEBUG("Shader bytecode validated: DXBC format (%zu bytes)\n", bytecode_.size());
+    return true;  // Valid DXBC shader
+  }
+
+  if (std::memcmp(signature, "DXIL", 4) == 0) {
+    IGL_LOG_DEBUG("Shader bytecode validated: DXIL format (%zu bytes)\n", bytecode_.size());
+    return true;  // Valid DXIL shader
+  }
+
+  // Log the invalid signature for debugging
+  IGL_LOG_ERROR("Invalid shader bytecode signature: 0x%02X%02X%02X%02X (expected 'DXBC' or 'DXIL')\n",
+                static_cast<unsigned char>(signature[0]),
+                static_cast<unsigned char>(signature[1]),
+                static_cast<unsigned char>(signature[2]),
+                static_cast<unsigned char>(signature[3]));
+  return false;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/ShaderModule.h b/src/igl/d3d12/ShaderModule.h
new file mode 100644
index 0000000000..71e5ed7b04
--- /dev/null
+++ b/src/igl/d3d12/ShaderModule.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/Shader.h>
+#include <igl/d3d12/Common.h>
+#include <igl/Log.h>
+#include <d3d12shader.h>
+#include <vector>
+#include <string>
+
+namespace igl::d3d12 {
+
+class ShaderModule final : public IShaderModule {
+ public:
+  // Resource binding information extracted from shader reflection
+  struct ResourceBinding {
+    std::string name;
+    D3D_SHADER_INPUT_TYPE type;  // CBV, SRV, UAV, Sampler
+    UINT bindPoint;
+    UINT bindCount;
+    UINT space;
+  };
+
+  // Constant buffer information from reflection
+  struct ConstantBufferInfo {
+    std::string name;
+    UINT size;
+    UINT numVariables;
+  };
+
+  // Shader resource usage summary for root signature selection
+  struct ShaderReflectionInfo {
+    // Push constants (inline root constants)
+    bool hasPushConstants = false;
+    UINT pushConstantSlot = UINT_MAX;  // Which b# register
+    UINT pushConstantSize = 0;         // Size in 32-bit values
+
+    // Resource slot usage (for conflict detection)
+    std::vector<UINT> usedCBVSlots;    // Constant buffer slots (b#)
+    std::vector<UINT> usedSRVSlots;    // Shader resource view slots (t#)
+    std::vector<UINT> usedUAVSlots;    // Unordered access view slots (u#)
+    std::vector<UINT> usedSamplerSlots; // Sampler slots (s#)
+
+    // Maximum slot indices used (for root signature sizing)
+    UINT maxCBVSlot = 0;
+    UINT maxSRVSlot = 0;
+    UINT maxUAVSlot = 0;
+    UINT maxSamplerSlot = 0;
+  };
+
+  ShaderModule(ShaderModuleInfo info, std::vector<uint8_t> bytecode)
+      : IShaderModule(info), bytecode_(std::move(bytecode)) {
+    if (!validateBytecode()) {
+      IGL_LOG_ERROR("ShaderModule: Created with invalid bytecode (validation failed)\n");
+    }
+  }
+  ~ShaderModule() override = default;
+
+  const std::vector<uint8_t>& getBytecode() const { return bytecode_; }
+
+  // Shader reflection API
+  void setReflection(igl::d3d12::ComPtr<ID3D12ShaderReflection> reflection);
+  const std::vector<ResourceBinding>& getResourceBindings() const { return resourceBindings_; }
+  const std::vector<ConstantBufferInfo>& getConstantBuffers() const { return constantBuffers_; }
+  const ShaderReflectionInfo& getReflectionInfo() const { return reflectionInfo_; }
+
+  bool hasResource(const std::string& name) const;
+  UINT getResourceBindPoint(const std::string& name) const;
+  size_t getConstantBufferSize(const std::string& name) const;
+
+  // Bytecode validation
+  bool validateBytecode() const;
+
+ private:
+  std::vector<uint8_t> bytecode_; // DXIL bytecode
+  igl::d3d12::ComPtr<ID3D12ShaderReflection> reflection_;
+  std::vector<ResourceBinding> resourceBindings_;
+  std::vector<ConstantBufferInfo> constantBuffers_;
+  ShaderReflectionInfo reflectionInfo_;
+
+  void extractShaderMetadata();
+};
+
+class ShaderStages final : public IShaderStages {
+ public:
+  ShaderStages(ShaderStagesDesc desc) : IShaderStages(desc) {}
+  ~ShaderStages() override = default;
+};
+
+class ShaderLibrary final : public IShaderLibrary {
+ public:
+  explicit ShaderLibrary(std::vector<std::shared_ptr<IShaderModule>> modules)
+      : IShaderLibrary(std::move(modules)) {}
+  ~ShaderLibrary() override = default;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/Texture.cpp b/src/igl/d3d12/Texture.cpp
new file mode 100644
index 0000000000..7d06b0ee21
--- /dev/null
+++ b/src/igl/d3d12/Texture.cpp
@@ -0,0 +1,1323 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <algorithm>
+#include <igl/d3d12/Texture.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/DXCCompiler.h>
+#include <igl/d3d12/UploadRingBuffer.h>
+#include <igl/d3d12/D3D12FenceWaiter.h>
+
+// No channel swap needed: DXGI_FORMAT_R8G8B8A8_UNORM matches IGL TextureFormat::RGBA_UNorm8 byte order.
+
+namespace igl::d3d12 {
+
+namespace {
+// Import ComPtr for readability
+template<typename T>
+using ComPtr = igl::d3d12::ComPtr<T>;
+} // namespace
+
+std::shared_ptr<Texture> Texture::createFromResource(ID3D12Resource* resource,
+                                                     TextureFormat format,
+                                                     const TextureDesc& desc,
+                                                     ID3D12Device* device,
+                                                     ID3D12CommandQueue* queue,
+                                                     D3D12_RESOURCE_STATES initialState,
+                                                     Device* iglDevice) {
+  if (!resource) {
+    IGL_LOG_ERROR("Texture::createFromResource - resource is NULL!\n");
+    return nullptr;
+  }
+
+  auto texture = std::make_shared<Texture>(format);
+
+  // Attach the resource to ComPtr (takes ownership, AddRefs)
+  resource->AddRef();
+  texture->resource_.Attach(resource);
+
+  texture->device_ = device;
+  texture->queue_ = queue;
+  texture->iglDevice_ = iglDevice;  // Store igl Device for upload-related operations.
+  texture->format_ = format;
+  texture->dimensions_ = Dimensions{desc.width, desc.height, desc.depth};
+  texture->type_ = desc.type;
+  texture->numLayers_ = desc.numLayers;
+  texture->numMipLevels_ = desc.numMipLevels;
+  texture->samples_ = desc.numSamples;
+  texture->usage_ = desc.usage;
+
+  texture->initializeStateTracking(initialState);
+
+  IGL_D3D12_LOG_VERBOSE("Texture::createFromResource - SUCCESS: %dx%d format=%d\n",
+               desc.width, desc.height, (int)format);
+
+  return texture;
+}
+
+std::shared_ptr<Texture> Texture::createTextureView(std::shared_ptr<Texture> parent,
+                                                     const TextureViewDesc& desc) {
+  if (!parent) {
+    IGL_LOG_ERROR("Texture::createTextureView - parent is NULL!\n");
+    return nullptr;
+  }
+
+  // Determine the format to use for the view
+  TextureFormat viewFormat = (desc.format != TextureFormat::Invalid) ? desc.format : parent->format_;
+
+  auto view = std::make_shared<Texture>(viewFormat);
+
+  // Share the D3D12 resource (don't create new one)
+  // ComPtr doesn't have copy assignment, so we need to use Attach() and AddRef()
+  auto* parentResource = parent->resource_.Get();
+  if (parentResource) {
+    parentResource->AddRef();
+    view->resource_.Attach(parentResource);
+  }
+  view->isView_ = true;
+  view->parentTexture_ = parent;
+
+  // Defensive check: parent and view must share the same underlying D3D12 resource
+  IGL_DEBUG_ASSERT(parent->resource_.Get() == view->resource_.Get(),
+                   "Parent and view must share the same D3D12 resource");
+
+  // Store view parameters (cumulative offsets for nested views)
+  view->mipLevelOffset_ = parent->mipLevelOffset_ + desc.mipLevel;
+  view->numMipLevelsInView_ = desc.numMipLevels;
+
+  // CRITICAL FIX: D3D12 SRV descriptors require MipLevels >= 1
+  // If numMipLevels is 0 (uninitialized), default to 1 to prevent invalid SRV creation
+  if (view->numMipLevelsInView_ == 0) {
+    IGL_LOG_ERROR("Texture::createTextureView - numMipLevels is 0, defaulting to 1 (SRV requires MipLevels >= 1)\n");
+    view->numMipLevelsInView_ = 1;
+  }
+
+  // Validate mip level bounds to prevent out-of-range access
+  const uint32_t parentMipCount = parent->getNumMipLevels();
+  const uint32_t requestedMipEnd = desc.mipLevel + view->numMipLevelsInView_;
+  if (requestedMipEnd > parentMipCount) {
+    IGL_LOG_ERROR("Texture::createTextureView - mip range [%u, %u) exceeds parent mip count %u, clamping\n",
+                  desc.mipLevel, requestedMipEnd, parentMipCount);
+    // Clamp to valid range
+    view->numMipLevelsInView_ = (parentMipCount > desc.mipLevel) ? (parentMipCount - desc.mipLevel) : 1;
+  }
+
+  view->arraySliceOffset_ = parent->arraySliceOffset_ + desc.layer;
+  view->numArraySlicesInView_ = desc.numLayers;
+
+  // Copy properties from parent
+  view->device_ = parent->device_;
+  view->queue_ = parent->queue_;
+  view->iglDevice_ = parent->iglDevice_;  // Propagate igl Device pointer.
+  view->format_ = viewFormat;
+  view->type_ = desc.type;
+  view->usage_ = parent->usage_;
+  view->samples_ = parent->samples_;
+
+  // Calculate view dimensions based on mip level
+  const uint32_t mipDivisor = 1u << desc.mipLevel;
+  view->dimensions_ = Dimensions{
+      std::max(1u, parent->dimensions_.width >> desc.mipLevel),
+      std::max(1u, parent->dimensions_.height >> desc.mipLevel),
+      std::max(1u, parent->dimensions_.depth >> desc.mipLevel)
+  };
+  view->numLayers_ = desc.numLayers;
+  // Use the validated numMipLevelsInView_ value (which has been corrected if it was 0)
+  view->numMipLevels_ = view->numMipLevelsInView_;
+
+  // Views delegate state tracking to the root texture and do not maintain separate state.
+  // State is accessed via getStateOwner(), which walks to the root for views.
+  // Views share the same D3D12 resource and subresourceStates_ tracking with their root.
+
+  IGL_D3D12_LOG_VERBOSE("Texture::createTextureView - SUCCESS: view of %dx%d, mips %u-%u, layers %u-%u\n",
+               view->dimensions_.width, view->dimensions_.height,
+               desc.mipLevel, desc.mipLevel + desc.numMipLevels - 1,
+               desc.layer, desc.layer + desc.numLayers - 1);
+
+  return view;
+}
+
+Texture::~Texture() {
+  // Texture views share the parent's resource, so they don't own descriptors.
+  // Only free descriptors for non-view textures.
+  if (isView_) {
+    return;
+  }
+
+  // Get descriptor heap manager from device.
+  // Note: in the current architecture, descriptors are allocated/freed by RenderCommandEncoder,
+  // not stored in Texture. This destructor is defensive in case descriptors become per-texture later.
+  if (!iglDevice_) {
+    return;
+  }
+
+  // For now, descriptors are managed by RenderCommandEncoder and freed when the encoder is destroyed.
+  // The rtvIndices_, dsvIndices_, and srvIndex_ members are currently unused but reserved for future use.
+}
+
+Result Texture::upload(const TextureRangeDesc& range,
+                      const void* data,
+                      size_t bytesPerRow) const {
+  IGL_D3D12_LOG_VERBOSE("Texture::upload() - START: %dx%d\n", range.width, range.height);
+
+  if (!device_ || !queue_ || !resource_.Get()) {
+    IGL_LOG_ERROR("Texture::upload() - FAILED: device, queue, or resource not available\n");
+    return Result(Result::Code::RuntimeError, "Device, queue, or resource not available for upload");
+  }
+
+  if (!data) {
+    IGL_LOG_ERROR("Texture::upload() - FAILED: data is null\n");
+    return Result(Result::Code::ArgumentInvalid, "Upload data is null");
+  }
+
+  IGL_D3D12_LOG_VERBOSE("Texture::upload() - Proceeding with upload\n");
+
+  // Calculate dimensions and data size
+  const uint32_t width = range.width > 0 ? range.width : dimensions_.width;
+  const uint32_t height = range.height > 0 ? range.height : dimensions_.height;
+  const uint32_t depth = range.depth > 0 ? range.depth : dimensions_.depth;
+
+  const auto props = TextureFormatProperties::fromTextureFormat(format_);
+  const bool isBC7 = (format_ == TextureFormat::RGBA_BC7_UNORM_4x4 ||
+                      format_ == TextureFormat::RGBA_BC7_SRGB_4x4);
+
+  // Calculate bytes per row if not provided. For block-compressed formats
+  // like BC7, rows are expressed in blocks, not texels, so use the number
+  // of blocks in X multiplied by bytesPerBlock.
+  if (bytesPerRow == 0) {
+    if (isBC7) {
+      const uint32_t blocksX = (width + 3u) / 4u;
+      bytesPerRow = static_cast<size_t>(blocksX) * props.bytesPerBlock;
+    } else {
+      const size_t bpp = std::max<uint8_t>(props.bytesPerBlock, 1);
+      bytesPerRow = static_cast<size_t>(width) * bpp;
+    }
+  }
+
+  // Get the resource description to calculate required size
+  D3D12_RESOURCE_DESC resourceDesc = resource_->GetDesc();
+
+  // Determine how many layers/faces and mip levels we need to upload
+  const uint32_t numSlicesToUpload = (type_ == TextureType::Cube) ? range.numFaces : range.numLayers;
+  const uint32_t baseSlice = (type_ == TextureType::Cube) ? range.face : range.layer;
+  const uint32_t numMipsToUpload = range.numMipLevels;
+  const uint32_t baseMip = range.mipLevel;
+  IGL_D3D12_LOG_VERBOSE("Texture::upload - type=%d, baseSlice=%u, numSlicesToUpload=%u, baseMip=%u, numMipsToUpload=%u\n",
+               (int)type_, baseSlice, numSlicesToUpload, baseMip, numMipsToUpload);
+
+  // Calculate total staging buffer size for ALL subresources
+  UINT64 totalStagingSize = 0;
+  std::vector<D3D12_PLACED_SUBRESOURCE_FOOTPRINT> layouts;
+  std::vector<UINT> numRowsArray;
+  std::vector<UINT64> rowSizesArray;
+
+  for (uint32_t mipOffset = 0; mipOffset < numMipsToUpload; ++mipOffset) {
+    for (uint32_t sliceOffset = 0; sliceOffset < numSlicesToUpload; ++sliceOffset) {
+      const uint32_t subresource = calcSubresourceIndex(baseMip + mipOffset, baseSlice + sliceOffset);
+      D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout = {};
+      UINT numRows = 0;
+      UINT64 rowSize = 0;
+      UINT64 subresSize = 0;
+      device_->GetCopyableFootprints(&resourceDesc, subresource, 1, totalStagingSize, &layout, &numRows, &rowSize, &subresSize);
+      layouts.push_back(layout);
+      numRowsArray.push_back(numRows);
+      rowSizesArray.push_back(rowSize);
+      totalStagingSize += subresSize;
+    }
+  }
+
+  // Try to allocate from upload ring buffer first.
+  UploadRingBuffer* ringBuffer = nullptr;
+  UploadRingBuffer::Allocation ringAllocation;
+  bool useRingBuffer = false;
+  UINT64 uploadFenceValue = 0;
+
+  if (iglDevice_) {
+    // Reclaim completed upload buffers before allocating new ones.
+    iglDevice_->processCompletedUploads();
+
+    ringBuffer = iglDevice_->getUploadRingBuffer();
+    // Get fence value that will signal when this upload completes
+    uploadFenceValue = iglDevice_->getNextUploadFenceValue();
+
+    if (ringBuffer) {
+      // D3D12 requires 512-byte alignment for texture uploads (D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT)
+      constexpr uint64_t kTextureUploadAlignment = 512;
+      ringAllocation = ringBuffer->allocate(totalStagingSize, kTextureUploadAlignment, uploadFenceValue);
+
+      if (ringAllocation.valid) {
+        useRingBuffer = true;
+      }
+    }
+  }
+
+  // Fallback: Create temporary staging buffer if ring buffer allocation failed
+  igl::d3d12::ComPtr<ID3D12Resource> stagingBuffer;
+  void* mappedData = nullptr;
+  uint64_t stagingBaseOffset = 0;
+  HRESULT hr = S_OK;
+
+  if (useRingBuffer) {
+    // Use ring buffer allocation
+    mappedData = ringAllocation.cpuAddress;
+    stagingBaseOffset = ringAllocation.offset;
+  } else {
+    // Create temporary staging buffer
+    D3D12_HEAP_PROPERTIES uploadHeapProps = {};
+    uploadHeapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
+    uploadHeapProps.CreationNodeMask = 1;
+    uploadHeapProps.VisibleNodeMask = 1;
+
+    D3D12_RESOURCE_DESC stagingDesc = {};
+    stagingDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+    stagingDesc.Width = totalStagingSize;
+    stagingDesc.Height = 1;
+    stagingDesc.DepthOrArraySize = 1;
+    stagingDesc.MipLevels = 1;
+    stagingDesc.Format = DXGI_FORMAT_UNKNOWN;
+    stagingDesc.SampleDesc.Count = 1;
+    stagingDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+
+    hr = device_->CreateCommittedResource(&uploadHeapProps, D3D12_HEAP_FLAG_NONE, &stagingDesc,
+                                                   D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
+                                                   IID_PPV_ARGS(stagingBuffer.GetAddressOf()));
+    if (FAILED(hr)) {
+      return Result(Result::Code::RuntimeError, "Failed to create staging buffer");
+    }
+
+    // Map staging buffer once
+    hr = stagingBuffer->Map(0, nullptr, &mappedData);
+    if (FAILED(hr)) {
+      return Result(Result::Code::RuntimeError, "Failed to map staging buffer");
+    }
+  }
+
+  // Copy all subresource data to the staging buffer.
+  // Direct copy: no channel swap needed for RGBA formats.
+  // DXGI_FORMAT_R8G8B8A8_UNORM has R,G,B,A byte order matching IGL TextureFormat::RGBA_UNorm8.
+  size_t srcDataOffset = 0;
+  size_t layoutIdx = 0;
+
+  for (uint32_t mipOffset = 0; mipOffset < numMipsToUpload; ++mipOffset) {
+    const uint32_t mipWidth = std::max(width >> (baseMip + mipOffset), 1u);
+    const uint32_t mipHeight = std::max(height >> (baseMip + mipOffset), 1u);
+    const uint32_t mipDepth = std::max(depth >> (baseMip + mipOffset), 1u);
+
+    size_t mipBytesPerRow = 0;
+    if (isBC7) {
+      const uint32_t blocksX = (mipWidth + 3u) / 4u;
+      mipBytesPerRow = static_cast<size_t>(blocksX) * props.bytesPerBlock;
+    } else {
+      mipBytesPerRow = (bytesPerRow * mipWidth) / width;
+    }
+
+    for (uint32_t sliceOffset = 0; sliceOffset < numSlicesToUpload; ++sliceOffset) {
+      const auto& layout = layouts[layoutIdx];
+      const UINT numRows = numRowsArray[layoutIdx];
+      const UINT64 rowSize = rowSizesArray[layoutIdx];
+      layoutIdx++;
+
+      const uint8_t* srcData = static_cast<const uint8_t*>(data) + srcDataOffset;
+      uint8_t* dstData = static_cast<uint8_t*>(mappedData) + layout.Offset;
+      const size_t copyBytes = std::min(static_cast<size_t>(rowSize), mipBytesPerRow);
+
+      // For uncompressed formats, the source data is tightly packed by the
+      // requested region's height (mipHeight). For block-compressed formats
+      // (e.g. BC7), numRows represents the number of block rows returned by
+      // GetCopyableFootprints. Use mipHeight for uncompressed uploads and
+      // numRows for BC7 so that source layout matches the caller's data.
+      const UINT rowsToCopy = isBC7 ? numRows : mipHeight;
+
+      const size_t srcDepthPitch = mipBytesPerRow * rowsToCopy;
+      const size_t dstDepthPitch = layout.Footprint.RowPitch * layout.Footprint.Height;
+
+      for (UINT z = 0; z < mipDepth; ++z) {
+        const uint8_t* srcSlice = srcData + z * srcDepthPitch;
+        uint8_t* dstSlice = dstData + z * dstDepthPitch;
+        for (UINT row = 0; row < rowsToCopy; ++row) {
+          const uint8_t* srcRow = srcSlice + row * mipBytesPerRow;
+          uint8_t* dstRow = dstSlice + row * layout.Footprint.RowPitch;
+          memcpy(dstRow, srcRow, copyBytes);
+        }
+      }
+
+      // Advance source pointer by the size of this subresource (all rows, all slices).
+      srcDataOffset += mipBytesPerRow * rowsToCopy * mipDepth;
+    }
+  }
+
+  // Unmap temporary staging buffer (ring buffer stays persistently mapped)
+  if (!useRingBuffer && stagingBuffer.Get()) {
+    stagingBuffer->Unmap(0, nullptr);
+  }
+
+  // Get command allocator from pool with fence tracking when an iglDevice is available.
+  igl::d3d12::ComPtr<ID3D12CommandAllocator> cmdAlloc;
+  if (iglDevice_) {
+    cmdAlloc = iglDevice_->getUploadCommandAllocator();
+    if (!cmdAlloc.Get()) {
+      return Result(Result::Code::RuntimeError, "Failed to get command allocator from pool");
+    }
+  } else {
+    // Fallback for textures created without Device* (shouldn't happen in normal flow)
+    hr = device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(cmdAlloc.GetAddressOf()));
+    if (FAILED(hr)) {
+      return Result(Result::Code::RuntimeError, "Failed to create command allocator");
+    }
+  }
+
+  igl::d3d12::ComPtr<ID3D12GraphicsCommandList> cmdList;
+  hr = device_->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, cmdAlloc.Get(), nullptr,
+                                   IID_PPV_ARGS(cmdList.GetAddressOf()));
+  if (FAILED(hr)) {
+    if (iglDevice_) {
+      // Return allocator to pool with fence value 0 (immediately available)
+      iglDevice_->returnUploadCommandAllocator(cmdAlloc, 0);
+    }
+    return Result(Result::Code::RuntimeError, "Failed to create command list");
+  }
+
+  // Record all copy commands
+  layoutIdx = 0;
+  for (uint32_t mipOffset = 0; mipOffset < numMipsToUpload; ++mipOffset) {
+    const uint32_t currentMip = baseMip + mipOffset;
+    const uint32_t mipWidth = std::max(width >> currentMip, 1u);
+    const uint32_t mipHeight = std::max(height >> currentMip, 1u);
+    const uint32_t mipDepth = std::max(depth >> currentMip, 1u);
+
+    for (uint32_t sliceOffset = 0; sliceOffset < numSlicesToUpload; ++sliceOffset) {
+      const uint32_t currentSlice = baseSlice + sliceOffset;
+      const uint32_t subresource = calcSubresourceIndex(currentMip, currentSlice);
+
+      // const_cast needed because upload is const (required by ITexture interface)
+      // but state tracking is non-const by design
+      const_cast<Texture*>(this)->transitionTo(cmdList.Get(), D3D12_RESOURCE_STATE_COPY_DEST, currentMip, currentSlice);
+
+      D3D12_TEXTURE_COPY_LOCATION dst = {};
+      dst.pResource = resource_.Get();
+      dst.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+      dst.SubresourceIndex = subresource;
+
+      if (type_ == TextureType::Cube) {
+        IGL_D3D12_LOG_VERBOSE("CopyTextureRegion: Copying to CUBE subresource=%u (mip=%u, slice=%u)\n",
+                     subresource, currentMip, currentSlice);
+      }
+
+      D3D12_TEXTURE_COPY_LOCATION src = {};
+      src.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+
+      // Use ring buffer or temporary staging buffer
+      if (useRingBuffer) {
+        src.pResource = ringBuffer->getUploadHeap();
+        // Adjust layout offset to account for ring buffer base offset
+        D3D12_PLACED_SUBRESOURCE_FOOTPRINT adjustedLayout = layouts[layoutIdx];
+        adjustedLayout.Offset += stagingBaseOffset;
+        src.PlacedFootprint = adjustedLayout;
+      } else {
+        src.pResource = stagingBuffer.Get();
+        src.PlacedFootprint = layouts[layoutIdx];
+      }
+      layoutIdx++;
+
+      // For block-compressed formats like BC7, CopyTextureRegion requires the
+      // source box to be aligned to block boundaries. Small mips (e.g. 2x2)
+      // violate this if we specify an explicit box in texel units. Since the
+      // staging layout already matches the subresource footprint, simply copy
+      // the entire subresource by passing a null box for BC7.
+      if (isBC7) {
+        cmdList->CopyTextureRegion(&dst, range.x, range.y, range.z, &src, nullptr);
+      } else {
+        D3D12_BOX srcBox = {0, 0, 0, mipWidth, mipHeight, mipDepth};
+        cmdList->CopyTextureRegion(&dst, range.x, range.y, range.z, &src, &srcBox);
+      }
+
+      // const_cast needed (see above)
+      const_cast<Texture*>(this)->transitionTo(cmdList.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, currentMip, currentSlice);
+    }
+  }
+
+  cmdList->Close();
+
+  // Execute once and wait once
+  ID3D12CommandList* cmdLists[] = {cmdList.Get()};
+  queue_->ExecuteCommandLists(1, cmdLists);
+
+  // Use upload fence for command allocator synchronization.
+  // Use pre-allocated uploadFenceValue (already incremented for ring buffer).
+  if (iglDevice_) {
+    ID3D12Fence* uploadFence = iglDevice_->getUploadFence();
+
+    hr = queue_->Signal(uploadFence, uploadFenceValue);
+    if (FAILED(hr)) {
+      IGL_LOG_ERROR("Texture::upload: Failed to signal upload fence: 0x%08X\n", hr);
+      // Return allocator with 0 to avoid blocking the pool
+      iglDevice_->returnUploadCommandAllocator(cmdAlloc, 0);
+      return Result(Result::Code::RuntimeError, "Failed to signal fence");
+    }
+
+    // Return allocator to pool with fence value (will be reused after the fence is signaled).
+    iglDevice_->returnUploadCommandAllocator(cmdAlloc, uploadFenceValue);
+
+    // Track staging buffer for async cleanup (no synchronous wait).
+    // Only track temporary staging buffers; ring buffer is persistent.
+    // Pass uploadFenceValue (already signaled above) to track with the correct fence.
+    if (!useRingBuffer && stagingBuffer.Get()) {
+      iglDevice_->trackUploadBuffer(std::move(stagingBuffer), uploadFenceValue);
+    }
+  } else {
+    // Fallback for textures without iglDevice_ (shouldn't happen in normal flow)
+    // In this case, we need to wait synchronously since we can't track the buffer
+    igl::d3d12::ComPtr<ID3D12Fence> fence;
+    hr = device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf()));
+    if (FAILED(hr)) {
+      return Result(Result::Code::RuntimeError, "Failed to create fence");
+    }
+
+    queue_->Signal(fence.Get(), 1);
+
+    FenceWaiter waiter(fence.Get(), 1);
+    Result waitResult = waiter.wait();
+    if (!waitResult.isOk()) {
+      return waitResult;  // Propagate detailed timeout/setup error
+    }
+  }
+
+  return Result();
+}
+
+Result Texture::uploadCube(const TextureRangeDesc& range,
+                          TextureCubeFace face,
+                          const void* data,
+                          size_t bytesPerRow) const {
+  // Cube textures are stored as texture arrays with 6 slices (one per face).
+  // The upload() method already handles cube textures correctly when face/numFaces are set.
+
+  // Validate this is a cube texture
+  if (type_ != TextureType::Cube) {
+    return Result(Result::Code::ArgumentInvalid, "uploadCube called on non-cube texture");
+  }
+
+  // Create a modified range with the correct face index
+  TextureRangeDesc cubeRange = range;
+  cubeRange.face = static_cast<uint32_t>(face);  // Convert TextureCubeFace enum to face index (0-5)
+  cubeRange.numFaces = 1;  // Upload single face
+
+  // Delegate to upload() which handles cube texture subresource indexing correctly
+  return upload(cubeRange, data, bytesPerRow);
+}
+
+Result Texture::uploadInternal(TextureType type,
+                                const TextureRangeDesc& range,
+                                const void* data,
+                                size_t bytesPerRow,
+                                const uint32_t* mipLevelBytes) const {
+  if (!(type == TextureType::TwoD || type == TextureType::TwoDArray || type == TextureType::ThreeD || type == TextureType::Cube)) {
+    return Result(Result::Code::Unimplemented, "Upload not implemented for this texture type");
+  }
+
+  // Delegate to upload() which now handles multi-mip, multi-layer, and cube textures natively
+  return upload(range, data, bytesPerRow);
+}
+
+Dimensions Texture::getDimensions() const {
+  return dimensions_;
+}
+
+uint32_t Texture::getNumLayers() const {
+  return static_cast<uint32_t>(numLayers_);
+}
+
+TextureType Texture::getType() const {
+  return type_;
+}
+
+TextureDesc::TextureUsage Texture::getUsage() const {
+  return usage_;
+}
+
+uint32_t Texture::getSamples() const {
+  return static_cast<uint32_t>(samples_);
+}
+
+uint32_t Texture::getNumMipLevels() const {
+  return static_cast<uint32_t>(numMipLevels_);
+}
+
+uint64_t Texture::getTextureId() const {
+  return reinterpret_cast<uint64_t>(resource_.Get());
+}
+
+TextureFormat Texture::getFormat() const {
+  return format_;
+}
+
+bool Texture::isRequiredGenerateMipmap() const {
+  return false;
+}
+
+void Texture::generateMipmap(ICommandQueue& /*cmdQueue*/, const TextureRangeDesc* /*range*/) const {
+  IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdQueue) - START: numMips=%u\n", numMipLevels_);
+
+  if (!device_ || !queue_ || !resource_.Get() || numMipLevels_ < 2) {
+    IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Skipping: device=%p queue=%p resource=%p numMips=%u\n",
+                 device_, queue_, resource_.Get(), numMipLevels_);
+    return;
+  }
+
+  D3D12_RESOURCE_DESC resourceDesc = resource_->GetDesc();
+
+  // Only support 2D textures for mipmap generation
+  if (resourceDesc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D) {
+    IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Skipping: only 2D textures supported (dimension=%d)\n",
+                 (int)resourceDesc.Dimension);
+    return;
+  }
+
+  // Skip depth/stencil textures entirely. The current D3D12 mipmap path only
+  // supports color render-target textures; attempting to add ALLOW_RENDER_TARGET
+  // to a depth/stencil resource would violate D3D12's flag rules.
+  if (resourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_DEPTH_STENCIL) {
+    IGL_D3D12_LOG_VERBOSE(
+        "Texture::generateMipmap() - Skipping: depth/stencil textures are not "
+        "handled by this mipmap path (Flags=0x%08X)\n",
+        resourceDesc.Flags);
+    return;
+  }
+
+  // If texture wasn't created with a render-target-capable flag, skip mipmap
+  // generation gracefully on D3D12. The current implementation only supports
+  // color 2D textures with ALLOW_RENDER_TARGET; depth/stencil and other usage
+  // patterns rely on backend-specific paths or pre-generated mips.
+  if (!(resourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {
+    IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Recreating texture with RENDER_TARGET flag for mipmap generation\n");
+
+    // Save current resource using ComPtr for automatic reference counting
+    // Note: ComPtr copy is deleted, so we manually AddRef and Attach
+    ID3D12Resource* rawOldResource = resource_.Get();
+    if (rawOldResource) {
+      rawOldResource->AddRef();
+    }
+    igl::d3d12::ComPtr<ID3D12Resource> oldResource;
+    oldResource.Attach(rawOldResource);
+
+    // Modify descriptor to add RENDER_TARGET flag
+    resourceDesc.Flags |= D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET;
+
+    // Create new resource with RENDER_TARGET flag
+    D3D12_HEAP_PROPERTIES heapProps = {};
+    heapProps.Type = D3D12_HEAP_TYPE_DEFAULT;
+
+    D3D12_CLEAR_VALUE clearValue = {};
+    clearValue.Format = resourceDesc.Format;
+
+    igl::d3d12::ComPtr<ID3D12Resource> newResource;
+    HRESULT hr = device_->CreateCommittedResource(
+        &heapProps,
+        D3D12_HEAP_FLAG_NONE,
+        &resourceDesc,
+        D3D12_RESOURCE_STATE_COPY_DEST,
+        &clearValue,
+        IID_PPV_ARGS(newResource.GetAddressOf()));
+
+    if (FAILED(hr)) {
+      IGL_D3D12_LOG_VERBOSE(
+          "Texture::generateMipmap() - Skipping: failed to recreate texture with "
+          "RENDER_TARGET flag (HRESULT=0x%08X)\n",
+          static_cast<unsigned>(hr));
+      return;
+    }
+
+    // Copy mip 0 from old resource to new resource
+    igl::d3d12::ComPtr<ID3D12CommandAllocator> copyAlloc;
+    igl::d3d12::ComPtr<ID3D12GraphicsCommandList> copyList;
+    if (FAILED(device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(copyAlloc.GetAddressOf())))) {
+      IGL_LOG_ERROR("Texture::generateMipmap() - Failed to create copy command allocator\n");
+      return;
+    }
+    if (FAILED(device_->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, copyAlloc.Get(), nullptr, IID_PPV_ARGS(copyList.GetAddressOf())))) {
+      IGL_LOG_ERROR("Texture::generateMipmap() - Failed to create copy command list\n");
+      return;
+    }
+
+    // Transition old resource to COPY_SOURCE
+    D3D12_RESOURCE_BARRIER barrierOld = {};
+    barrierOld.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+    barrierOld.Transition.pResource = oldResource.Get();
+    barrierOld.Transition.StateBefore = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
+    barrierOld.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_SOURCE;
+    barrierOld.Transition.Subresource = 0;
+    copyList->ResourceBarrier(1, &barrierOld);
+
+    // Copy mip 0
+    D3D12_TEXTURE_COPY_LOCATION srcLoc = {};
+    srcLoc.pResource = oldResource.Get();
+    srcLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+    srcLoc.SubresourceIndex = 0;
+
+    D3D12_TEXTURE_COPY_LOCATION dstLoc = {};
+    dstLoc.pResource = newResource.Get();
+    dstLoc.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+    dstLoc.SubresourceIndex = 0;
+
+    copyList->CopyTextureRegion(&dstLoc, 0, 0, 0, &srcLoc, nullptr);
+
+    // Transition the entire new resource to PIXEL_SHADER_RESOURCE for mipmap
+    // generation. The resource was created in COPY_DEST; only mip 0 was
+    // written by the copy above, but all mips will be consumed as SRVs/RTVs
+    // in the subsequent fullscreen-blit loop. Using ALL_SUBRESOURCES here
+    // ensures the debug layer's notion of the initial state matches our
+    // state tracking for every subresource (mip >= 1 included).
+    D3D12_RESOURCE_BARRIER barrierNew = {};
+    barrierNew.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+    barrierNew.Transition.pResource = newResource.Get();
+    barrierNew.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+    barrierNew.Transition.StateAfter = D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE;
+    barrierNew.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+    copyList->ResourceBarrier(1, &barrierNew);
+
+    copyList->Close();
+    ID3D12CommandList* copyLists[] = {copyList.Get()};
+    queue_->ExecuteCommandLists(1, copyLists);
+
+    // Wait for copy to complete
+    igl::d3d12::ComPtr<ID3D12Fence> copyFence;
+    if (FAILED(device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(copyFence.GetAddressOf())))) {
+      IGL_LOG_ERROR("Texture::generateMipmap() - Failed to create copy fence\n");
+      return;
+    }
+    queue_->Signal(copyFence.Get(), 1);
+
+    FenceWaiter waiter(copyFence.Get(), 1);
+    Result waitResult = waiter.wait();
+    if (!waitResult.isOk()) {
+      IGL_LOG_ERROR("Texture::generateMipmap() - Fence wait failed: %s\n",
+                    waitResult.message.c_str());
+      return;
+    }
+
+    // oldResource will be automatically released by ComPtr destructor
+
+    // Replace resource with new one (need const_cast since function is const)
+    auto& mutableResource = const_cast<igl::d3d12::ComPtr<ID3D12Resource>&>(resource_);
+    mutableResource.Reset();
+    mutableResource = std::move(newResource);
+
+    // Update state tracking for new resource - all mips are now in PIXEL_SHADER_RESOURCE
+    // const_cast needed because generateMipmap is const (required by ITexture interface)
+    // but state tracking is non-const by design
+    const_cast<Texture*>(this)->initializeStateTracking(D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE);
+
+    // Update resourceDesc for the rest of the function
+    resourceDesc = resource_->GetDesc();
+
+    IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Texture recreated successfully\n");
+  }
+
+  IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Proceeding with mipmap generation\n");
+
+  // Use pre-compiled shaders from Device instead of runtime compilation.
+  // Note: iglDevice_ should always be set in normal flow (see Texture::createFromResource)
+  // This check is defensive; if it triggers, it indicates a texture creation path that bypassed proper initialization
+  if (!iglDevice_) {
+    IGL_LOG_ERROR("Texture::generateMipmap() - No IGL device available (texture not properly initialized)\n");
+    IGL_LOG_ERROR("  This is a programming error: textures must be created via Device methods to support mipmap generation\n");
+    return;
+  }
+
+  const auto& vsBytecode = iglDevice_->getMipmapVSBytecode();
+  const auto& psBytecode = iglDevice_->getMipmapPSBytecode();
+  ID3D12RootSignature* rootSig = iglDevice_->getMipmapRootSignature();
+
+  // Validate pre-compiled shaders are available
+  // This can fail if device initialization encountered DXC errors
+  if (vsBytecode.empty() || psBytecode.empty() || !rootSig) {
+    IGL_LOG_ERROR("Texture::generateMipmap() - Pre-compiled mipmap shaders unavailable\n");
+    IGL_LOG_ERROR("  Device may not support mipmap generation (check Device initialization logs for DXC errors)\n");
+    return;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap() - Using pre-compiled shaders (%zu bytes VS, %zu bytes PS)\n",
+               vsBytecode.size(), psBytecode.size());
+
+  D3D12_GRAPHICS_PIPELINE_STATE_DESC pso = {};
+  pso.pRootSignature = rootSig;
+  pso.VS = {vsBytecode.data(), vsBytecode.size()};
+  pso.PS = {psBytecode.data(), psBytecode.size()};
+  pso.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
+  pso.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
+  pso.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
+  pso.RasterizerState.DepthClipEnable = TRUE;
+  pso.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
+  pso.SampleMask = UINT_MAX;
+  pso.SampleDesc.Count = 1;
+  pso.NumRenderTargets = 1;
+  pso.RTVFormats[0] = resourceDesc.Format;
+  pso.DSVFormat = DXGI_FORMAT_UNKNOWN;
+
+  igl::d3d12::ComPtr<ID3D12PipelineState> psoObj;
+  if (FAILED(device_->CreateGraphicsPipelineState(&pso, IID_PPV_ARGS(psoObj.GetAddressOf())))) {
+    return;
+  }
+
+  // Create descriptor heap large enough for all mip levels
+  // We need one SRV descriptor per mip level (numMipLevels_ - 1 blits)
+  D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {};
+  srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+  srvHeapDesc.NumDescriptors = numMipLevels_ - 1;  // One SRV per source mip level
+  srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> srvHeap;
+  if (FAILED(device_->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(srvHeap.GetAddressOf())))) return;
+
+  D3D12_DESCRIPTOR_HEAP_DESC smpHeapDesc = {};
+  smpHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+  smpHeapDesc.NumDescriptors = 1;
+  smpHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> smpHeap;
+  if (FAILED(device_->CreateDescriptorHeap(&smpHeapDesc, IID_PPV_ARGS(smpHeap.GetAddressOf())))) return;
+
+  // Pre-creation validation.
+  IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateSampler");
+  IGL_DEBUG_ASSERT(smpHeap.Get() != nullptr, "Sampler heap is null");
+
+  // Fixed sampler
+  D3D12_SAMPLER_DESC samp = {};
+  samp.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
+  samp.AddressU = samp.AddressV = samp.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
+  samp.MinLOD = 0; samp.MaxLOD = D3D12_FLOAT32_MAX;
+
+  D3D12_CPU_DESCRIPTOR_HANDLE smpHandle = smpHeap->GetCPUDescriptorHandleForHeapStart();
+  IGL_DEBUG_ASSERT(smpHandle.ptr != 0, "Sampler descriptor handle is invalid");
+  device_->CreateSampler(&samp, smpHandle);
+
+  igl::d3d12::ComPtr<ID3D12CommandAllocator> alloc;
+  igl::d3d12::ComPtr<ID3D12GraphicsCommandList> list;
+  if (FAILED(device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(alloc.GetAddressOf())))) return;
+  if (FAILED(device_->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, alloc.Get(), psoObj.Get(), IID_PPV_ARGS(list.GetAddressOf())))) return;
+
+  ID3D12DescriptorHeap* heaps[] = {srvHeap.Get(), smpHeap.Get()};
+  list->SetDescriptorHeaps(2, heaps);
+  list->SetPipelineState(psoObj.Get());
+  list->SetGraphicsRootSignature(rootSig);
+  list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+
+  // Get descriptor size for incrementing through the heap
+  const UINT srvDescriptorSize = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+  D3D12_CPU_DESCRIPTOR_HANDLE srvCpuStart = srvHeap->GetCPUDescriptorHandleForHeapStart();
+  D3D12_GPU_DESCRIPTOR_HANDLE srvGpuStart = srvHeap->GetGPUDescriptorHandleForHeapStart();
+  D3D12_GPU_DESCRIPTOR_HANDLE smpGpu = smpHeap->GetGPUDescriptorHandleForHeapStart();
+
+  // Create single RTV descriptor heap outside the loop (reused for all mip levels)
+  D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {};
+  rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
+  rtvHeapDesc.NumDescriptors = 1;
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> rtvHeap;
+  if (FAILED(device_->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(rtvHeap.GetAddressOf())))) return;
+  D3D12_CPU_DESCRIPTOR_HANDLE rtvCpu = rtvHeap->GetCPUDescriptorHandleForHeapStart();
+
+  // Ensure mip 0 is in PIXEL_SHADER_RESOURCE state for first SRV read
+  // const_cast needed because generateMipmap is const (required by ITexture interface)
+  // but state tracking is non-const by design
+  const_cast<Texture*>(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, 0, 0);
+
+  for (UINT mip = 0; mip + 1 < numMipLevels_; ++mip) {
+    // Calculate descriptor handle for this mip level
+    D3D12_CPU_DESCRIPTOR_HANDLE srvCpu = srvCpuStart;
+    srvCpu.ptr += mip * srvDescriptorSize;
+    D3D12_GPU_DESCRIPTOR_HANDLE srvGpu = srvGpuStart;
+    srvGpu.ptr += mip * srvDescriptorSize;
+
+    // Pre-creation validation.
+    IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateShaderResourceView");
+    IGL_DEBUG_ASSERT(resource_.Get() != nullptr, "Resource is null before CreateShaderResourceView");
+    IGL_DEBUG_ASSERT(srvCpu.ptr != 0, "SRV descriptor handle is invalid");
+
+    D3D12_SHADER_RESOURCE_VIEW_DESC srv = {};
+    srv.Format = resourceDesc.Format;
+    srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+    srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+    srv.Texture2D.MostDetailedMip = mip;
+    srv.Texture2D.MipLevels = 1;
+    device_->CreateShaderResourceView(resource_.Get(), &srv, srvCpu);
+
+    // Pre-creation validation.
+    IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateRenderTargetView");
+    IGL_DEBUG_ASSERT(resource_.Get() != nullptr, "Resource is null before CreateRenderTargetView");
+    IGL_DEBUG_ASSERT(rtvCpu.ptr != 0, "RTV descriptor handle is invalid");
+
+    D3D12_RENDER_TARGET_VIEW_DESC rtv = {};
+    rtv.Format = resourceDesc.Format;
+    rtv.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
+    rtv.Texture2D.MipSlice = mip + 1;
+
+    // Reuse the same RTV heap by recreating the view for each mip level
+    device_->CreateRenderTargetView(resource_.Get(), &rtv, rtvCpu);
+
+    // Transition mip level to render target using state tracking
+    // const_cast needed (see above).
+    const_cast<Texture*>(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, mip + 1, 0);
+
+    list->OMSetRenderTargets(1, &rtvCpu, FALSE, nullptr);
+    const UINT w = std::max<UINT>(1u, (UINT)(resourceDesc.Width >> (mip + 1)));
+    const UINT h = std::max<UINT>(1u, (UINT)(resourceDesc.Height >> (mip + 1)));
+    D3D12_VIEWPORT vp{0.0f, 0.0f, (FLOAT)w, (FLOAT)h, 0.0f, 1.0f};
+    D3D12_RECT sc{0, 0, (LONG)w, (LONG)h};
+    list->RSSetViewports(1, &vp);
+    list->RSSetScissorRects(1, &sc);
+
+    list->SetGraphicsRootDescriptorTable(0, srvGpu);
+    list->SetGraphicsRootDescriptorTable(1, smpGpu);
+    list->DrawInstanced(3, 1, 0, 0);
+
+    // Transition mip level to shader resource for next iteration
+    // const_cast needed (see above).
+    const_cast<Texture*>(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, mip + 1, 0);
+  }
+
+  list->Close();
+  ID3D12CommandList* lists[] = {list.Get()};
+  queue_->ExecuteCommandLists(1, lists);
+
+  igl::d3d12::ComPtr<ID3D12Fence> fence;
+  if (FAILED(device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf())))) return;
+  queue_->Signal(fence.Get(), 1);
+
+  FenceWaiter waiter(fence.Get(), 1);
+  Result waitResult = waiter.wait();
+  if (!waitResult.isOk()) {
+    IGL_LOG_ERROR("Texture::generateMipmap() - Fence wait failed: %s\n",
+                  waitResult.message.c_str());
+  }
+}
+
+void Texture::generateMipmap(ICommandBuffer& /*cmdBuffer*/, const TextureRangeDesc* /*range*/) const {
+  IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdBuffer) - START: numMips=%u\n", numMipLevels_);
+
+  if (!device_ || !queue_ || !resource_.Get() || numMipLevels_ < 2) {
+    IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdBuffer) - Skipping: device=%p queue=%p resource=%p numMips=%u\n",
+                 device_, queue_, resource_.Get(), numMipLevels_);
+    return;
+  }
+
+  D3D12_RESOURCE_DESC resourceDesc = resource_->GetDesc();
+
+  // Only support 2D textures for mipmap generation
+  if (resourceDesc.Dimension != D3D12_RESOURCE_DIMENSION_TEXTURE2D) {
+    IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdBuffer) - Skipping: only 2D textures supported\n");
+    return;
+  }
+
+  // Check if texture was created with RENDER_TARGET flag (required for mipmap generation)
+  if (!(resourceDesc.Flags & D3D12_RESOURCE_FLAG_ALLOW_RENDER_TARGET)) {
+    IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdBuffer) - Skipping: texture not created with RENDER_TARGET usage\n");
+    IGL_D3D12_LOG_VERBOSE("  To enable mipmap generation, create texture with TextureDesc::TextureUsageBits::Attachment\n");
+    return;
+  }
+
+  // Use pre-compiled shaders from Device instead of runtime compilation.
+  // Note: iglDevice_ should always be set in normal flow (see Texture::createFromResource)
+  // This check is defensive; if it triggers, it indicates a texture creation path that bypassed proper initialization
+  if (!iglDevice_) {
+    IGL_LOG_ERROR("Texture::generateMipmap(cmdBuffer) - No IGL device available (texture not properly initialized)\n");
+    IGL_LOG_ERROR("  This is a programming error: textures must be created via Device methods to support mipmap generation\n");
+    return;
+  }
+
+  const auto& vsBytecode = iglDevice_->getMipmapVSBytecode();
+  const auto& psBytecode = iglDevice_->getMipmapPSBytecode();
+  ID3D12RootSignature* rootSig = iglDevice_->getMipmapRootSignature();
+
+  // Validate pre-compiled shaders are available
+  // This can fail if device initialization encountered DXC errors
+  if (vsBytecode.empty() || psBytecode.empty() || !rootSig) {
+    IGL_LOG_ERROR("Texture::generateMipmap(cmdBuffer) - Pre-compiled mipmap shaders unavailable\n");
+    IGL_LOG_ERROR("  Device may not support mipmap generation (check Device initialization logs for DXC errors)\n");
+    return;
+  }
+
+  IGL_D3D12_LOG_VERBOSE("Texture::generateMipmap(cmdBuffer) - Using pre-compiled shaders (%zu bytes VS, %zu bytes PS)\n",
+               vsBytecode.size(), psBytecode.size());
+
+  D3D12_GRAPHICS_PIPELINE_STATE_DESC pso = {};
+  pso.pRootSignature = rootSig;
+  pso.VS = {vsBytecode.data(), vsBytecode.size()};
+  pso.PS = {psBytecode.data(), psBytecode.size()};
+  pso.PrimitiveTopologyType = D3D12_PRIMITIVE_TOPOLOGY_TYPE_TRIANGLE;
+  pso.RasterizerState.FillMode = D3D12_FILL_MODE_SOLID;
+  pso.RasterizerState.CullMode = D3D12_CULL_MODE_NONE;
+  pso.RasterizerState.DepthClipEnable = TRUE;
+  pso.BlendState.RenderTarget[0].RenderTargetWriteMask = D3D12_COLOR_WRITE_ENABLE_ALL;
+  pso.SampleMask = UINT_MAX;
+  pso.SampleDesc.Count = 1;
+  pso.NumRenderTargets = 1;
+  pso.RTVFormats[0] = resourceDesc.Format;
+  pso.DSVFormat = DXGI_FORMAT_UNKNOWN;
+  igl::d3d12::ComPtr<ID3D12PipelineState> psoObj;
+  if (FAILED(device_->CreateGraphicsPipelineState(&pso, IID_PPV_ARGS(psoObj.GetAddressOf())))) return;
+  // Create descriptor heap large enough for all mip levels
+  // We need one SRV descriptor per mip level (numMipLevels_ - 1 blits)
+  D3D12_DESCRIPTOR_HEAP_DESC srvHeapDesc = {};
+  srvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
+  srvHeapDesc.NumDescriptors = numMipLevels_ - 1;  // One SRV per source mip level
+  srvHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> srvHeap;
+  if (FAILED(device_->CreateDescriptorHeap(&srvHeapDesc, IID_PPV_ARGS(srvHeap.GetAddressOf())))) return;
+  D3D12_DESCRIPTOR_HEAP_DESC smpHeapDesc = {};
+  smpHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER;
+  smpHeapDesc.NumDescriptors = 1;
+  smpHeapDesc.Flags = D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE;
+
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> smpHeap;
+  if (FAILED(device_->CreateDescriptorHeap(&smpHeapDesc, IID_PPV_ARGS(smpHeap.GetAddressOf())))) return;
+
+  // Pre-creation validation.
+  IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateSampler");
+  IGL_DEBUG_ASSERT(smpHeap.Get() != nullptr, "Sampler heap is null");
+
+  D3D12_SAMPLER_DESC samp = {};
+  samp.Filter = D3D12_FILTER_MIN_MAG_MIP_LINEAR;
+  samp.AddressU = samp.AddressV = samp.AddressW = D3D12_TEXTURE_ADDRESS_MODE_CLAMP;
+  samp.MinLOD = 0; samp.MaxLOD = D3D12_FLOAT32_MAX;
+
+  D3D12_CPU_DESCRIPTOR_HANDLE smpHandle = smpHeap->GetCPUDescriptorHandleForHeapStart();
+  IGL_DEBUG_ASSERT(smpHandle.ptr != 0, "Sampler descriptor handle is invalid");
+  device_->CreateSampler(&samp, smpHandle);
+  igl::d3d12::ComPtr<ID3D12CommandAllocator> alloc;
+  igl::d3d12::ComPtr<ID3D12GraphicsCommandList> list;
+  if (FAILED(device_->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(alloc.GetAddressOf())))) return;
+  if (FAILED(device_->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, alloc.Get(), psoObj.Get(), IID_PPV_ARGS(list.GetAddressOf())))) return;
+  ID3D12DescriptorHeap* heaps[] = {srvHeap.Get(), smpHeap.Get()};
+  list->SetDescriptorHeaps(2, heaps);
+  list->SetPipelineState(psoObj.Get());
+  list->SetGraphicsRootSignature(rootSig);
+  list->IASetPrimitiveTopology(D3D_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+  // Get descriptor size for incrementing through the heap
+  const UINT srvDescriptorSize = device_->GetDescriptorHandleIncrementSize(D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV);
+  D3D12_CPU_DESCRIPTOR_HANDLE srvCpuStart = srvHeap->GetCPUDescriptorHandleForHeapStart();
+  D3D12_GPU_DESCRIPTOR_HANDLE srvGpuStart = srvHeap->GetGPUDescriptorHandleForHeapStart();
+  D3D12_GPU_DESCRIPTOR_HANDLE smpGpu = smpHeap->GetGPUDescriptorHandleForHeapStart();
+
+  // Create single RTV descriptor heap outside the loop (reused for all mip levels)
+  D3D12_DESCRIPTOR_HEAP_DESC rtvHeapDesc = {};
+  rtvHeapDesc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_RTV;
+  rtvHeapDesc.NumDescriptors = 1;
+  igl::d3d12::ComPtr<ID3D12DescriptorHeap> rtvHeap;
+  if (FAILED(device_->CreateDescriptorHeap(&rtvHeapDesc, IID_PPV_ARGS(rtvHeap.GetAddressOf())))) return;
+  D3D12_CPU_DESCRIPTOR_HANDLE rtvCpu = rtvHeap->GetCPUDescriptorHandleForHeapStart();
+
+  // Ensure mip 0 is in PIXEL_SHADER_RESOURCE state for first SRV read
+  // const_cast needed because generateMipmap is const (required by ITexture interface)
+  // but state tracking is non-const by design
+  const_cast<Texture*>(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, 0, 0);
+
+  for (UINT mip = 0; mip + 1 < numMipLevels_; ++mip) {
+    // Calculate descriptor handle for this mip level
+    D3D12_CPU_DESCRIPTOR_HANDLE srvCpu = srvCpuStart;
+    srvCpu.ptr += mip * srvDescriptorSize;
+    D3D12_GPU_DESCRIPTOR_HANDLE srvGpu = srvGpuStart;
+    srvGpu.ptr += mip * srvDescriptorSize;
+
+    // Pre-creation validation.
+    IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateShaderResourceView");
+    IGL_DEBUG_ASSERT(resource_.Get() != nullptr, "Resource is null before CreateShaderResourceView");
+    IGL_DEBUG_ASSERT(srvCpu.ptr != 0, "SRV descriptor handle is invalid");
+
+    D3D12_SHADER_RESOURCE_VIEW_DESC srv = {};
+    srv.Format = resourceDesc.Format;
+    srv.ViewDimension = D3D12_SRV_DIMENSION_TEXTURE2D;
+    srv.Shader4ComponentMapping = D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING;
+    srv.Texture2D.MostDetailedMip = mip;
+    srv.Texture2D.MipLevels = 1;
+    device_->CreateShaderResourceView(resource_.Get(), &srv, srvCpu);
+
+    // Pre-creation validation.
+    IGL_DEBUG_ASSERT(device_ != nullptr, "Device is null before CreateRenderTargetView");
+    IGL_DEBUG_ASSERT(resource_.Get() != nullptr, "Resource is null before CreateRenderTargetView");
+    IGL_DEBUG_ASSERT(rtvCpu.ptr != 0, "RTV descriptor handle is invalid");
+
+    D3D12_RENDER_TARGET_VIEW_DESC rtv = {};
+    rtv.Format = resourceDesc.Format;
+    rtv.ViewDimension = D3D12_RTV_DIMENSION_TEXTURE2D;
+    rtv.Texture2D.MipSlice = mip + 1;
+
+    // Reuse the same RTV heap by recreating the view for each mip level
+    device_->CreateRenderTargetView(resource_.Get(), &rtv, rtvCpu);
+
+    // Transition mip level to render target using state tracking
+    // const_cast needed (see above).
+    const_cast<Texture*>(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_RENDER_TARGET, mip + 1, 0);
+
+    list->OMSetRenderTargets(1, &rtvCpu, FALSE, nullptr);
+    const UINT w = std::max<UINT>(1u, (UINT)(resourceDesc.Width >> (mip + 1)));
+    const UINT h = std::max<UINT>(1u, (UINT)(resourceDesc.Height >> (mip + 1)));
+    D3D12_VIEWPORT vp{0.0f, 0.0f, (FLOAT)w, (FLOAT)h, 0.0f, 1.0f};
+    D3D12_RECT sc{0, 0, (LONG)w, (LONG)h};
+    list->RSSetViewports(1, &vp);
+    list->RSSetScissorRects(1, &sc);
+    list->SetGraphicsRootDescriptorTable(0, srvGpu);
+    list->SetGraphicsRootDescriptorTable(1, smpGpu);
+    list->DrawInstanced(3, 1, 0, 0);
+
+    // Transition mip level to shader resource for next iteration
+    // const_cast needed (see above).
+    const_cast<Texture*>(this)->transitionTo(list.Get(), D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE, mip + 1, 0);
+  }
+  list->Close();
+  ID3D12CommandList* lists[] = {list.Get()};
+  queue_->ExecuteCommandLists(1, lists);
+  igl::d3d12::ComPtr<ID3D12Fence> fence;
+  if (FAILED(device_->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(fence.GetAddressOf())))) return;
+  queue_->Signal(fence.Get(), 1);
+
+  FenceWaiter waiter(fence.Get(), 1);
+  Result waitResult = waiter.wait();
+  if (!waitResult.isOk()) {
+    IGL_LOG_ERROR("Texture::generateMipmap(cmdBuffer) - Fence wait failed: %s\n",
+                  waitResult.message.c_str());
+  }
+}
+
+void Texture::initializeStateTracking(D3D12_RESOURCE_STATES initialState) {
+  // Simplified per-subresource state tracking: always use a vector (no dual-mode).
+  if (!resource_.Get()) {
+    subresourceStates_.clear();
+    return;
+  }
+
+  const uint32_t mipLevels = static_cast<uint32_t>(std::max<size_t>(numMipLevels_, 1));
+  uint32_t arraySize;
+  if (type_ == TextureType::ThreeD) {
+    arraySize = 1u;
+  } else if (type_ == TextureType::Cube) {
+    arraySize = static_cast<uint32_t>(std::max<size_t>(numLayers_, 1)) * 6u;
+  } else {
+    arraySize = static_cast<uint32_t>(std::max<size_t>(numLayers_, 1));
+  }
+  const size_t numSubresources = static_cast<size_t>(mipLevels) * arraySize;
+  subresourceStates_.assign(numSubresources, initialState);
+}
+
+uint32_t Texture::calcSubresourceIndex(uint32_t mipLevel, uint32_t layer) const {
+  // For views, map view-local coordinates to resource coordinates.
+  // Note: mipLevelOffset_ and arraySliceOffset_ are resource-relative (accumulated at view creation for nested views).
+  const uint32_t resourceMip = isView_ ? (mipLevel + mipLevelOffset_) : mipLevel;
+  const uint32_t resourceLayer = isView_ ? (layer + arraySliceOffset_) : layer;
+
+  // Use state owner's dimensions for subresource calculation
+  const Texture* owner = getStateOwner();
+  IGL_DEBUG_ASSERT(owner != nullptr, "State owner must not be null");
+  const uint32_t mipLevels = static_cast<uint32_t>(std::max<size_t>(owner->numMipLevels_, 1));
+  uint32_t arraySize;
+  if (owner->type_ == TextureType::ThreeD) {
+    arraySize = 1u;
+  } else if (owner->type_ == TextureType::Cube) {
+    // Cube textures: 6 faces per layer
+    arraySize = static_cast<uint32_t>(std::max<size_t>(owner->numLayers_, 1)) * 6u;
+  } else {
+    arraySize = static_cast<uint32_t>(std::max<size_t>(owner->numLayers_, 1));
+  }
+  const uint32_t clampedMip = std::min(resourceMip, mipLevels - 1);
+  const uint32_t clampedLayer = std::min(resourceLayer, arraySize - 1);
+  // D3D12CalcSubresource formula: MipSlice + (ArraySlice * MipLevels)
+  const uint32_t subresource = clampedMip + (clampedLayer * mipLevels);
+#ifdef IGL_DEBUG
+  // Reduce log verbosity - only log in debug builds for views
+  if ((type_ == TextureType::Cube || type_ == TextureType::TwoDArray) && isView_) {
+    IGL_D3D12_LOG_VERBOSE("calcSubresourceIndex (view): type=%d, mip=%u, layer=%u -> resource mip=%u, layer=%u -> subresource=%u\n",
+                 (int)type_, mipLevel, layer, resourceMip, resourceLayer, subresource);
+  }
+#endif
+  return subresource;
+}
+
+void Texture::transitionTo(ID3D12GraphicsCommandList* commandList,
+                           D3D12_RESOURCE_STATES newState,
+                           uint32_t mipLevel,
+                           uint32_t layer) {
+  // Simplified per-subresource state tracking.
+  Texture* owner = getStateOwner();
+  if (!commandList || !owner || !owner->resource_.Get() || owner->subresourceStates_.empty()) {
+    return;
+  }
+
+  // For depth-stencil textures, transition all subresources (both depth and stencil planes).
+  const auto props = getProperties();
+  const bool isDepthStencil =
+      props.isDepthOrStencil() &&
+      (props.hasStencil() || format_ == TextureFormat::Z_UNorm24);
+
+  if (isDepthStencil) {
+    // Verify all subresources are in the same state before using ALL_SUBRESOURCES.
+    D3D12_RESOURCE_STATES firstState = owner->subresourceStates_[0];
+    bool allSameState = true;
+    for (const auto& state : owner->subresourceStates_) {
+      if (state != firstState) {
+        allSameState = false;
+        IGL_LOG_ERROR("Depth-stencil texture has divergent subresource states - this violates invariant\n");
+        break;
+      }
+    }
+
+    if (firstState == newState) {
+      return;  // All subresources already in target state
+    }
+
+    // Safety check: If states have diverged, return early to avoid invalid ALL_SUBRESOURCES barrier.
+    if (!allSameState) {
+      IGL_DEBUG_ASSERT(false, "Depth-stencil textures must have uniform state across all subresources");
+      return;  // Intentionally skip transition to avoid undefined behavior
+    }
+
+    D3D12_RESOURCE_BARRIER barrier = {};
+    barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+    barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+    barrier.Transition.pResource = owner->resource_.Get();
+    barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+    barrier.Transition.StateBefore = firstState;
+    barrier.Transition.StateAfter = newState;
+    commandList->ResourceBarrier(1, &barrier);
+
+    // Update all subresource states
+    for (auto& state : owner->subresourceStates_) {
+      state = newState;
+    }
+    return;
+  }
+
+  // Non-depth-stencil: transition single subresource
+  const uint32_t subresource = calcSubresourceIndex(mipLevel, layer);
+  if (subresource >= owner->subresourceStates_.size()) {
+    return;
+  }
+
+  auto& currentState = owner->subresourceStates_[subresource];
+  if (currentState == newState) {
+    return;
+  }
+
+  D3D12_RESOURCE_BARRIER barrier = {};
+  barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+  barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+  barrier.Transition.pResource = owner->resource_.Get();
+  barrier.Transition.Subresource = subresource;
+  barrier.Transition.StateBefore = currentState;
+  barrier.Transition.StateAfter = newState;
+  commandList->ResourceBarrier(1, &barrier);
+
+  currentState = newState;
+}
+
+void Texture::transitionAll(ID3D12GraphicsCommandList* commandList,
+                            D3D12_RESOURCE_STATES newState) {
+  // Simplified per-subresource state tracking.
+  Texture* owner = getStateOwner();
+  if (!commandList || !owner || !owner->resource_.Get() || owner->subresourceStates_.empty()) {
+    return;
+  }
+
+  // For depth-stencil textures (multi-plane in D3D12), keep all planes and
+  // mips in a single coherent state by using an ALL_SUBRESOURCES barrier.
+  // This avoids mismatches like depth in DEPTH_WRITE while stencil (plane 1)
+  // remains in COMMON/PRESENT, which triggers the D3D12 debug error
+  // INVALID_SUBRESOURCE_STATE on ClearDepthStencilView.
+  const auto props = getProperties();
+  const bool isDepthStencil =
+      props.isDepthOrStencil() &&
+      (props.hasStencil() || format_ == TextureFormat::Z_UNorm24);
+
+  if (isDepthStencil) {
+    D3D12_RESOURCE_STATES firstState = owner->subresourceStates_[0];
+    bool allSameState = true;
+    for (const auto& state : owner->subresourceStates_) {
+      if (state != firstState) {
+        allSameState = false;
+        IGL_LOG_ERROR(
+            "Texture::transitionAll - depth-stencil texture has divergent subresource states; "
+            "expected uniform state before ALL_SUBRESOURCES barrier\n");
+        break;
+      }
+    }
+
+    if (firstState == newState) {
+      // All subresources (planes/mips) already in the requested state.
+      return;
+    }
+
+    if (!allSameState) {
+      // Safety: avoid issuing an ALL_SUBRESOURCES barrier with inconsistent
+      // tracking; this would make our internal state unreliable.
+      IGL_DEBUG_ASSERT(
+          false,
+          "Texture::transitionAll - depth-stencil textures must have uniform state across all "
+          "subresources before ALL_SUBRESOURCES transition");
+      return;
+    }
+
+    D3D12_RESOURCE_BARRIER barrier = {};
+    barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+    barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+    barrier.Transition.pResource = owner->resource_.Get();
+    barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+    barrier.Transition.StateBefore = firstState;
+    barrier.Transition.StateAfter = newState;
+    commandList->ResourceBarrier(1, &barrier);
+
+    for (auto& state : owner->subresourceStates_) {
+      state = newState;
+    }
+    return;
+  }
+
+  // Check if all subresources are already in the target state
+  bool allMatch = true;
+  for (const auto& state : owner->subresourceStates_) {
+    if (state != newState) {
+      allMatch = false;
+      break;
+    }
+  }
+  if (allMatch) {
+    return;
+  }
+
+  // Transition each subresource individually
+  for (size_t i = 0; i < owner->subresourceStates_.size(); ++i) {
+    auto& state = owner->subresourceStates_[i];
+    if (state == newState) {
+      continue;
+    }
+
+    D3D12_RESOURCE_BARRIER barrier = {};
+    barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+    barrier.Flags = D3D12_RESOURCE_BARRIER_FLAG_NONE;
+    barrier.Transition.pResource = owner->resource_.Get();
+    barrier.Transition.Subresource = static_cast<UINT>(i);
+    barrier.Transition.StateBefore = state;
+    barrier.Transition.StateAfter = newState;
+    commandList->ResourceBarrier(1, &barrier);
+
+    state = newState;
+  }
+}
+
+D3D12_RESOURCE_STATES Texture::getSubresourceState(uint32_t mipLevel, uint32_t layer) const {
+  // Simplified per-subresource state tracking.
+  const Texture* owner = getStateOwner();
+  if (owner->subresourceStates_.empty()) {
+    return D3D12_RESOURCE_STATE_COMMON;
+  }
+
+  const uint32_t index = calcSubresourceIndex(mipLevel, layer);
+  if (index >= owner->subresourceStates_.size()) {
+    return D3D12_RESOURCE_STATE_COMMON;
+  }
+
+  return owner->subresourceStates_[index];
+}
+
+} // namespace igl::d3d12
+
+
diff --git a/src/igl/d3d12/Texture.h b/src/igl/d3d12/Texture.h
new file mode 100644
index 0000000000..a2589659a6
--- /dev/null
+++ b/src/igl/d3d12/Texture.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <vector>
+#include <igl/Texture.h>
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+class Texture final : public ITexture {
+ public:
+  Texture() : ITexture(TextureFormat::Invalid), format_(TextureFormat::Invalid) {}
+  explicit Texture(TextureFormat format) : ITexture(format), format_(format) {}
+
+  // Explicit destructor to free descriptor heap slots.
+  ~Texture() override;
+
+  // Factory method to create texture from existing D3D12 resource
+  static std::shared_ptr<Texture> createFromResource(
+      ID3D12Resource* resource,
+      TextureFormat format,
+      const TextureDesc& desc,
+      ID3D12Device* device = nullptr,
+      ID3D12CommandQueue* queue = nullptr,
+      D3D12_RESOURCE_STATES initialState = D3D12_RESOURCE_STATE_COMMON,
+      class Device* iglDevice = nullptr);
+
+  // Factory method to create texture view from parent texture
+  static std::shared_ptr<Texture> createTextureView(
+      std::shared_ptr<Texture> parent,
+      const TextureViewDesc& desc);
+
+  // D3D12-specific upload methods (not part of ITexture interface)
+  Result upload(const TextureRangeDesc& range,
+                const void* data,
+                size_t bytesPerRow = 0) const;
+  Result uploadCube(const TextureRangeDesc& range,
+                   TextureCubeFace face,
+                   const void* data,
+                   size_t bytesPerRow = 0) const;
+
+  Dimensions getDimensions() const override;
+  uint32_t getNumLayers() const override;
+  TextureType getType() const override;
+  TextureDesc::TextureUsage getUsage() const override;
+  uint32_t getSamples() const override;
+  uint32_t getNumMipLevels() const override;
+  uint64_t getTextureId() const override;
+  bool isRequiredGenerateMipmap() const override;
+
+  void generateMipmap(ICommandQueue& cmdQueue,
+                      const TextureRangeDesc* IGL_NULLABLE range = nullptr) const override;
+  void generateMipmap(ICommandBuffer& cmdBuffer,
+                      const TextureRangeDesc* IGL_NULLABLE range = nullptr) const override;
+
+  // D3D12-specific accessors (not part of ITexture interface)
+  TextureFormat getFormat() const;
+  ID3D12Resource* getResource() const { return resource_.Get(); }
+  // State transition methods are non-const (state updates not allowed in const methods)
+  void transitionTo(ID3D12GraphicsCommandList* commandList,
+                    D3D12_RESOURCE_STATES newState,
+                    uint32_t mipLevel = 0,
+                    uint32_t layer = 0);
+  void transitionAll(ID3D12GraphicsCommandList* commandList,
+                     D3D12_RESOURCE_STATES newState);
+  D3D12_RESOURCE_STATES getSubresourceState(uint32_t mipLevel = 0,
+                                            uint32_t layer = 0) const;
+
+  // Texture view support
+  bool isView() const { return isView_; }
+  uint32_t getMipLevelOffset() const { return mipLevelOffset_; }
+  uint32_t getNumMipLevelsInView() const { return numMipLevelsInView_; }
+  uint32_t getArraySliceOffset() const { return arraySliceOffset_; }
+  uint32_t getNumArraySlicesInView() const { return numArraySlicesInView_; }
+
+  // Subresource calculation helper
+  uint32_t calcSubresourceIndex(uint32_t mipLevel, uint32_t layer) const;
+
+ protected:
+  // Override the base class upload method
+  Result uploadInternal(TextureType type,
+                        const TextureRangeDesc& range,
+                        const void* data,
+                        size_t bytesPerRow = 0,
+                        const uint32_t* mipLevelBytes = nullptr) const override;
+
+ private:
+  igl::d3d12::ComPtr<ID3D12Resource> resource_;
+  ID3D12Device* device_ = nullptr; // Non-owning pointer
+  ID3D12CommandQueue* queue_ = nullptr; // Non-owning pointer.
+  class Device* iglDevice_ = nullptr; // Non-owning pointer to igl::d3d12::Device for upload operations.
+  TextureFormat format_;
+  Dimensions dimensions_{0, 0, 0};
+  TextureType type_ = TextureType::TwoD;
+  size_t numLayers_ = 1;
+  size_t numMipLevels_ = 1;
+  size_t samples_ = 1;
+  TextureDesc::TextureUsage usage_ = 0;
+  void initializeStateTracking(D3D12_RESOURCE_STATES initialState);
+
+  // Simplified per-subresource state tracking.
+  // Views delegate state tracking to their root texture; only root textures maintain state.
+  // Always uses a per-subresource vector for simplicity (no dual-mode complexity).
+  std::vector<D3D12_RESOURCE_STATES> subresourceStates_;
+
+  // Helper to get the texture that owns state tracking (walks to root for nested views)
+  Texture* getStateOwner() {
+    Texture* owner = this;
+    while (owner->isView_ && owner->parentTexture_) {
+      owner = owner->parentTexture_.get();
+    }
+    return owner;
+  }
+  const Texture* getStateOwner() const {
+    const Texture* owner = this;
+    while (owner->isView_ && owner->parentTexture_) {
+      owner = owner->parentTexture_.get();
+    }
+    return owner;
+  }
+
+  // Texture view support
+  bool isView_ = false;
+  std::shared_ptr<Texture> parentTexture_;  // For views, reference to parent
+  uint32_t mipLevelOffset_ = 0;  // MostDetailedMip for SRV
+  uint32_t numMipLevelsInView_ = 0;  // MipLevels for SRV
+  uint32_t arraySliceOffset_ = 0;  // FirstArraySlice for SRV
+  uint32_t numArraySlicesInView_ = 0;  // ArraySize for SRV
+
+  // Descriptor indices for cleanup in destructor.
+  // These descriptors are allocated from DescriptorHeapManager and must be freed.
+  std::vector<uint32_t> rtvIndices_;  // RTV descriptors (one per mip level)
+  std::vector<uint32_t> dsvIndices_;  // DSV descriptors (for depth/stencil textures)
+  uint32_t srvIndex_ = UINT32_MAX;    // SRV descriptor (UINT32_MAX = not allocated)
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/TextureCopyUtils.cpp b/src/igl/d3d12/TextureCopyUtils.cpp
new file mode 100644
index 0000000000..b12768fb95
--- /dev/null
+++ b/src/igl/d3d12/TextureCopyUtils.cpp
@@ -0,0 +1,279 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/TextureCopyUtils.h>
+
+#include <igl/d3d12/D3D12Headers.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/Common.h>
+#include <igl/d3d12/D3D12Context.h>
+#include <igl/d3d12/Texture.h>
+#include <igl/d3d12/Buffer.h>
+#include <igl/d3d12/D3D12ImmediateCommands.h>
+#include <igl/d3d12/D3D12StagingDevice.h>
+
+#include <cstring>
+
+namespace igl::d3d12::TextureCopyUtils {
+
+Result executeCopyTextureToBuffer(D3D12Context& ctx,
+                                   Device& iglDevice,
+                                   Texture& srcTex,
+                                   Buffer& dstBuf,
+                                   uint64_t destinationOffset,
+                                   uint32_t mipLevel,
+                                   uint32_t layer) {
+  ID3D12Resource* srcRes = srcTex.getResource();
+  ID3D12Resource* dstRes = dstBuf.getResource();
+
+  if (!srcRes || !dstRes) {
+    return Result{Result::Code::ArgumentInvalid, "Invalid source or destination resource"};
+  }
+
+  ID3D12Device* device = ctx.getDevice();
+
+  if (!device) {
+    return Result{Result::Code::RuntimeError, "Device is null"};
+  }
+
+  // Get texture description for GetCopyableFootprints
+  D3D12_RESOURCE_DESC srcDesc = srcRes->GetDesc();
+
+  // Calculate subresource index
+  const uint32_t subresource = srcTex.calcSubresourceIndex(mipLevel, layer);
+
+  // Get copyable footprint for this subresource
+  D3D12_PLACED_SUBRESOURCE_FOOTPRINT layout = {};
+  UINT numRows = 0;
+  UINT64 rowSizeInBytes = 0;
+  UINT64 totalBytes = 0;
+
+  device->GetCopyableFootprints(&srcDesc,
+                                subresource,
+                                1,
+                                destinationOffset,
+                                &layout,
+                                &numRows,
+                                &rowSizeInBytes,
+                                &totalBytes);
+
+  // Calculate the unpacked texture data size (without D3D12 padding)
+  // rowSizeInBytes is the unpadded row size, so we can use it directly
+  const UINT64 unpackedDataSize = rowSizeInBytes * numRows * layout.Footprint.Depth;
+
+  // Check if destination buffer is large enough for the unpacked data
+  if (destinationOffset + unpackedDataSize > dstBuf.getSizeInBytes()) {
+    return Result{Result::Code::ArgumentOutOfRange, "Destination buffer too small"};
+  }
+
+  // Use centralized staging device for readback buffer allocation.
+  auto* stagingDevice = iglDevice.getStagingDevice();
+  if (!stagingDevice) {
+    return Result{Result::Code::RuntimeError, "Staging device not available"};
+  }
+
+  // Allocate readback staging buffer (D3D12 requires row-pitch alignment)
+  auto staging = stagingDevice->allocateReadback(layout.Offset + totalBytes);
+  if (!staging.valid) {
+    return Result{Result::Code::RuntimeError, "Failed to allocate readback staging buffer"};
+  }
+
+  ID3D12Resource* readbackBuffer = staging.buffer.Get();
+  ID3D12Resource* copyDestination = readbackBuffer;
+
+  // Use centralized immediate commands instead of creating transient allocator/list.
+  auto* immediateCommands = iglDevice.getImmediateCommands();
+  if (!immediateCommands) {
+    return Result{Result::Code::RuntimeError, "Immediate commands not available"};
+  }
+
+  Result cmdResult;
+  ID3D12GraphicsCommandList* cmdList = immediateCommands->begin(&cmdResult);
+  if (!cmdList || !cmdResult.isOk()) {
+    return Result{Result::Code::RuntimeError, "Failed to begin immediate command list"};
+  }
+
+  // Get current texture state (for restoration after the copy)
+  const D3D12_RESOURCE_STATES srcStateBefore = srcTex.getSubresourceState(mipLevel, layer);
+
+  // Transition texture to COPY_SOURCE using centralized state tracking so
+  // that subsequent transitions observe a consistent state across all
+  // command lists and avoid BEFORE/AFTER mismatches.
+  srcTex.transitionTo(cmdList, D3D12_RESOURCE_STATE_COPY_SOURCE, mipLevel, layer);
+
+  // Setup source texture copy location
+  D3D12_TEXTURE_COPY_LOCATION srcLocation = {};
+  srcLocation.pResource = srcRes;
+  srcLocation.Type = D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX;
+  srcLocation.SubresourceIndex = subresource;
+
+  // Setup destination buffer copy location
+  D3D12_TEXTURE_COPY_LOCATION dstLocation = {};
+  dstLocation.pResource = copyDestination;
+  dstLocation.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
+  dstLocation.PlacedFootprint = layout;
+
+  // Perform the copy
+  cmdList->CopyTextureRegion(&dstLocation, 0, 0, 0, &srcLocation, nullptr);
+
+  // Transition texture back to original state using the same tracking path.
+  srcTex.transitionTo(cmdList, srcStateBefore, mipLevel, layer);
+
+  // Submit via immediate commands with synchronous wait.
+  Result submitResult;
+  const uint64_t fenceValue = immediateCommands->submit(true, &submitResult);
+  if (!submitResult.isOk() || fenceValue == 0) {
+    return Result{Result::Code::RuntimeError,
+                  "Failed to submit immediate commands: " + submitResult.message};
+  }
+
+  // Copy from readback staging buffer to final destination
+  void* readbackData = nullptr;
+  // Map the readback buffer region containing the texture data
+  D3D12_RANGE readRange{static_cast<SIZE_T>(layout.Offset),
+                       static_cast<SIZE_T>(layout.Offset + totalBytes)};
+
+  if (SUCCEEDED(readbackBuffer->Map(0, &readRange, &readbackData)) && readbackData) {
+      // Check if destination buffer is in DEFAULT heap (Storage buffers)
+      // We cannot call map() on DEFAULT heap buffers because Buffer::map() would
+      // create its own staging buffer and copy FROM (empty) DEFAULT buffer first
+      D3D12_HEAP_PROPERTIES heapProps;
+      dstRes->GetHeapProperties(&heapProps, nullptr);
+      const bool isDefaultHeap = (heapProps.Type == D3D12_HEAP_TYPE_DEFAULT);
+
+#ifdef IGL_DEBUG
+      IGL_D3D12_LOG_VERBOSE("copyTextureToBuffer: Destination heap type = %d (1=DEFAULT, 2=UPLOAD, 3=READBACK), isDefaultHeap=%d\n",
+                   heapProps.Type, isDefaultHeap);
+#endif
+
+      if (!isDefaultHeap) {
+        // Destination is CPU-mappable (UPLOAD/READBACK heap) - copy via CPU
+        // Copy row-by-row, removing D3D12's row pitch padding
+        Result mapResult;
+        void* dstData = dstBuf.map(BufferRange(unpackedDataSize, destinationOffset), &mapResult);
+        if (dstData && mapResult.isOk()) {
+          const uint8_t* src = static_cast<uint8_t*>(readbackData) + layout.Offset;
+          uint8_t* dst = static_cast<uint8_t*>(dstData);
+          const UINT64 srcRowPitch = layout.Footprint.RowPitch;
+          const UINT64 dstRowPitch = rowSizeInBytes;  // Unpadded row size
+
+          for (UINT z = 0; z < layout.Footprint.Depth; ++z) {
+            for (UINT row = 0; row < numRows; ++row) {
+              std::memcpy(dst, src, dstRowPitch);
+              src += srcRowPitch;
+              dst += dstRowPitch;
+            }
+          }
+
+          dstBuf.unmap();
+        } else {
+          readbackBuffer->Unmap(0, nullptr);
+          return Result{Result::Code::RuntimeError, "Failed to map destination buffer"};
+        }
+      } else {
+        // Destination is NOT CPU-mappable (DEFAULT heap) - need GPU copy
+        // Create temporary UPLOAD buffer with unpacked data, then GPU copy to destination
+        D3D12_HEAP_PROPERTIES uploadHeap{};
+        uploadHeap.Type = D3D12_HEAP_TYPE_UPLOAD;
+
+        D3D12_RESOURCE_DESC uploadDesc{};
+        uploadDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+        uploadDesc.Width = unpackedDataSize;
+        uploadDesc.Height = 1;
+        uploadDesc.DepthOrArraySize = 1;
+        uploadDesc.MipLevels = 1;
+        uploadDesc.Format = DXGI_FORMAT_UNKNOWN;
+        uploadDesc.SampleDesc.Count = 1;
+        uploadDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+
+        igl::d3d12::ComPtr<ID3D12Resource> uploadBuffer;
+        HRESULT hr = device->CreateCommittedResource(&uploadHeap,
+                                                      D3D12_HEAP_FLAG_NONE,
+                                                      &uploadDesc,
+                                                      D3D12_RESOURCE_STATE_GENERIC_READ,
+                                                      nullptr,
+                                                      IID_PPV_ARGS(uploadBuffer.GetAddressOf()));
+        if (FAILED(hr)) {
+          readbackBuffer->Unmap(0, nullptr);
+          return Result{Result::Code::RuntimeError, "Failed to create upload buffer"};
+        }
+
+        // Map upload buffer and unpack data from readback
+        void* uploadData = nullptr;
+        if (SUCCEEDED(uploadBuffer->Map(0, nullptr, &uploadData)) && uploadData) {
+          const uint8_t* src = static_cast<uint8_t*>(readbackData) + layout.Offset;
+          uint8_t* dst = static_cast<uint8_t*>(uploadData);
+          const UINT64 srcRowPitch = layout.Footprint.RowPitch;
+          const UINT64 dstRowPitch = rowSizeInBytes;
+
+          for (UINT z = 0; z < layout.Footprint.Depth; ++z) {
+            for (UINT row = 0; row < numRows; ++row) {
+              std::memcpy(dst, src, dstRowPitch);
+              src += srcRowPitch;
+              dst += dstRowPitch;
+            }
+          }
+          uploadBuffer->Unmap(0, nullptr);
+
+          // GPU copy from upload buffer to destination DEFAULT buffer using immediate commands.
+          Result gpuCopyResult;
+          ID3D12GraphicsCommandList* copyList = immediateCommands->begin(&gpuCopyResult);
+          if (!copyList || !gpuCopyResult.isOk()) {
+            readbackBuffer->Unmap(0, nullptr);
+            return Result{Result::Code::RuntimeError, "Failed to begin immediate command list for GPU copy"};
+          }
+
+          // Transition destination buffer to COPY_DEST state
+          D3D12_RESOURCE_BARRIER barrier = {};
+          barrier.Type = D3D12_RESOURCE_BARRIER_TYPE_TRANSITION;
+          barrier.Transition.pResource = dstRes;
+          barrier.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
+          barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COMMON;
+          barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_COPY_DEST;
+          copyList->ResourceBarrier(1, &barrier);
+
+          // Copy unpacked data to destination
+#ifdef IGL_DEBUG
+          IGL_D3D12_LOG_VERBOSE("copyTextureToBuffer: GPU copy %llu bytes from upload buffer to DEFAULT buffer at offset %llu\n",
+                       unpackedDataSize, destinationOffset);
+#endif
+          copyList->CopyBufferRegion(dstRes, destinationOffset, uploadBuffer.Get(), 0, unpackedDataSize);
+
+          // Transition destination buffer back to UAV state (Storage buffer)
+          barrier.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST;
+          barrier.Transition.StateAfter = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
+          copyList->ResourceBarrier(1, &barrier);
+
+          // Submit and wait for GPU copy.
+          Result copySubmitResult;
+          const uint64_t copyFenceValue = immediateCommands->submit(true, &copySubmitResult);
+#ifdef IGL_DEBUG
+          IGL_D3D12_LOG_VERBOSE("copyTextureToBuffer: GPU copy complete!\n");
+#endif
+          if (!copySubmitResult.isOk() || copyFenceValue == 0) {
+            readbackBuffer->Unmap(0, nullptr);
+            return Result{Result::Code::RuntimeError,
+                          "Failed to submit GPU copy: " + copySubmitResult.message};
+          }
+        } else {
+          readbackBuffer->Unmap(0, nullptr);
+          return Result{Result::Code::RuntimeError, "Failed to map upload buffer"};
+        }
+      }
+
+    readbackBuffer->Unmap(0, nullptr);
+  } else {
+    return Result{Result::Code::RuntimeError, "Failed to map readback buffer"};
+  }
+
+  // Return staging buffer to pool.
+  stagingDevice->free(staging, fenceValue);
+
+  return Result{};
+}
+
+} // namespace igl::d3d12::TextureCopyUtils
diff --git a/src/igl/d3d12/TextureCopyUtils.h b/src/igl/d3d12/TextureCopyUtils.h
new file mode 100644
index 0000000000..d94a75c423
--- /dev/null
+++ b/src/igl/d3d12/TextureCopyUtils.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/Common.h>
+#include <cstdint>
+
+struct ID3D12Device;
+struct ID3D12CommandQueue;
+struct ID3D12Resource;
+
+namespace igl::d3d12 {
+
+class D3D12Context;
+class Device;
+class Texture;
+class Buffer;
+
+namespace TextureCopyUtils {
+
+/**
+ * Executes a texture-to-buffer copy operation.
+ * Handles D3D12 row-pitch alignment, readback staging, and unpacking.
+ *
+ * @param ctx D3D12 context for device/queue access
+ * @param iglDevice IGL device for command allocator pooling
+ * @param srcTex Source texture to copy from
+ * @param dstBuf Destination buffer to copy to
+ * @param destinationOffset Offset in bytes into destination buffer
+ * @param mipLevel Mipmap level to copy from source texture
+ * @param layer Array layer to copy from source texture
+ * @return Result indicating success or failure
+ */
+[[nodiscard]] Result executeCopyTextureToBuffer(D3D12Context& ctx,
+                                                 Device& iglDevice,
+                                                 Texture& srcTex,
+                                                 Buffer& dstBuf,
+                                                 uint64_t destinationOffset,
+                                                 uint32_t mipLevel,
+                                                 uint32_t layer);
+
+} // namespace TextureCopyUtils
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/Timer.cpp b/src/igl/d3d12/Timer.cpp
new file mode 100644
index 0000000000..7c922a1ced
--- /dev/null
+++ b/src/igl/d3d12/Timer.cpp
@@ -0,0 +1,234 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/Timer.h>
+
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/D3D12Context.h>
+
+namespace igl::d3d12 {
+
+Timer::Timer(const Device& device) {
+  auto& ctx = device.getD3D12Context();
+  auto* d3dDevice = ctx.getDevice();
+  auto* commandQueue = ctx.getCommandQueue();
+
+  // Query GPU timestamp frequency
+  // This returns the number of ticks per second for GPU timestamps
+  HRESULT hr = commandQueue->GetTimestampFrequency(&timestampFrequency_);
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("Timer: Failed to get timestamp frequency (0x%08X). Timer disabled.\n", hr);
+    resourceCreationFailed_ = true;
+    timestampFrequency_ = 0;  // Leave at 0 to indicate timer is disabled
+    return;
+  }
+
+  // Create query heap for 2 timestamps (begin and end).
+  // Use D3D12_QUERY_HEAP_TYPE_TIMESTAMP for GPU timer queries.
+  D3D12_QUERY_HEAP_DESC queryHeapDesc = {};
+  queryHeapDesc.Type = D3D12_QUERY_HEAP_TYPE_TIMESTAMP;
+  queryHeapDesc.Count = 2;  // Begin and end timestamps
+  queryHeapDesc.NodeMask = 0;  // Single GPU
+
+  hr = d3dDevice->CreateQueryHeap(&queryHeapDesc, IID_PPV_ARGS(queryHeap_.GetAddressOf()));
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("Timer: Failed to create query heap (0x%08X). Timer disabled.\n", hr);
+    resourceCreationFailed_ = true;
+    timestampFrequency_ = 0;
+    return;
+  }
+
+  // Create readback buffer to hold query results
+  // Must use READBACK heap type for CPU access
+  D3D12_HEAP_PROPERTIES heapProps = {};
+  heapProps.Type = D3D12_HEAP_TYPE_READBACK;
+  heapProps.CPUPageProperty = D3D12_CPU_PAGE_PROPERTY_UNKNOWN;
+  heapProps.MemoryPoolPreference = D3D12_MEMORY_POOL_UNKNOWN;
+  heapProps.CreationNodeMask = 1;
+  heapProps.VisibleNodeMask = 1;
+
+  D3D12_RESOURCE_DESC resourceDesc = {};
+  resourceDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+  resourceDesc.Alignment = 0;
+  resourceDesc.Width = 2 * sizeof(uint64_t);  // Space for 2 timestamps
+  resourceDesc.Height = 1;
+  resourceDesc.DepthOrArraySize = 1;
+  resourceDesc.MipLevels = 1;
+  resourceDesc.Format = DXGI_FORMAT_UNKNOWN;
+  resourceDesc.SampleDesc.Count = 1;
+  resourceDesc.SampleDesc.Quality = 0;
+  resourceDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+  resourceDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
+
+  hr = d3dDevice->CreateCommittedResource(
+      &heapProps,
+      D3D12_HEAP_FLAG_NONE,
+      &resourceDesc,
+      D3D12_RESOURCE_STATE_COPY_DEST,  // Readback buffers must be in COPY_DEST state
+      nullptr,
+      IID_PPV_ARGS(readbackBuffer_.GetAddressOf()));
+
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("Timer: Failed to create readback buffer (0x%08X). Timer disabled.\n", hr);
+    resourceCreationFailed_ = true;
+    timestampFrequency_ = 0;
+    queryHeap_.Reset();  // Clean up partially created resources
+    return;
+  }
+
+#ifdef IGL_DEBUG
+  IGL_D3D12_LOG_VERBOSE("Timer: Created successfully (frequency: %llu Hz)\n", timestampFrequency_);
+#endif
+}
+
+Timer::~Timer() {
+  // ComPtr handles cleanup automatically
+}
+
+void Timer::begin(ID3D12GraphicsCommandList* commandList) {
+  if (resourceCreationFailed_ || timestampFrequency_ == 0) {
+    // Timer disabled due to resource creation or frequency query failure - silently no-op
+    return;
+  }
+
+  if (!commandList) {
+    IGL_LOG_ERROR("Timer::begin() called with null command list\n");
+    return;
+  }
+
+  // Record begin timestamp (index 0) at the start of GPU work.
+  // This is a bottom-of-pipe operation that samples when the GPU finishes preceding work.
+  commandList->EndQuery(queryHeap_.Get(), D3D12_QUERY_TYPE_TIMESTAMP, 0);
+}
+
+void Timer::end(ID3D12GraphicsCommandList* commandList, ID3D12Fence* fence, uint64_t fenceValue) {
+  if (resourceCreationFailed_ || timestampFrequency_ == 0) {
+    // Timer disabled - silently no-op
+    return;
+  }
+
+  if (!commandList) {
+    IGL_LOG_ERROR("Timer::end() called with null command list\n");
+    return;
+  }
+
+  if (!fence) {
+    IGL_LOG_ERROR("Timer::end() called with null fence\n");
+    return;
+  }
+
+  if (ended_.load(std::memory_order_acquire)) {
+    IGL_LOG_ERROR("Timer::end() called multiple times\n");
+    return;
+  }
+
+  // Record end timestamp (index 1) at the end of GPU work.
+  // Bottom-of-pipe operation: samples when the GPU finishes all preceding work.
+  commandList->EndQuery(queryHeap_.Get(), D3D12_QUERY_TYPE_TIMESTAMP, 1);
+
+  // Resolve query data to the readback buffer.
+  // This GPU command copies timestamp values from the query heap to a CPU-readable buffer;
+  // the resolved data is only valid after the fence signals completion.
+  commandList->ResolveQueryData(
+      queryHeap_.Get(),
+      D3D12_QUERY_TYPE_TIMESTAMP,
+      0,  // Start index
+      2,  // Count (begin + end)
+      readbackBuffer_.Get(),
+      0   // Destination offset
+  );
+
+  // Store fence and fence value for later completion checking.
+  // Thread-safe: fence_ is written once; atomics ensure visibility.
+  fence_ = fence;
+  fenceValue_.store(fenceValue, std::memory_order_release);
+  ended_.store(true, std::memory_order_release);
+}
+
+uint64_t Timer::getElapsedTimeNanos() const {
+  if (!readbackBuffer_.Get() || !ended_.load(std::memory_order_acquire)) {
+    return 0;
+  }
+
+  // Check if the fence has signaled; results are only valid after GPU completes.
+  // Thread-safe: fence_ is set once before the ended_ flag, and memory ordering ensures visibility.
+  uint64_t fenceVal = fenceValue_.load(std::memory_order_acquire);
+  if (!fence_ || fence_->GetCompletedValue() < fenceVal) {
+    return 0;  // GPU hasn't finished yet, return 0
+  }
+
+  // If we've already resolved and cached the result, return it.
+  // Thread-safe: resolved_ flag prevents multiple threads from mapping simultaneously.
+  if (resolved_.load(std::memory_order_acquire)) {
+    return cachedElapsedNanos_.load(std::memory_order_relaxed);
+  }
+
+  // GPU has completed; it is now safe to read the query results.
+  // Map the readback buffer to read timestamp values.
+  void* mappedData = nullptr;
+  D3D12_RANGE readRange{0, sizeof(uint64_t) * 2};  // Only read the 2 timestamps
+  HRESULT hr = readbackBuffer_->Map(0, &readRange, &mappedData);
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("Timer: Failed to map readback buffer: 0x%08X\n", hr);
+    return 0;
+  }
+
+  // Read timestamp values
+  const auto* timestamps = static_cast<const uint64_t*>(mappedData);
+  uint64_t beginTime = timestamps[0];
+  uint64_t endTime = timestamps[1];
+
+  // Unmap buffer
+  D3D12_RANGE writeRange{0, 0};  // No writes
+  readbackBuffer_->Unmap(0, &writeRange);
+
+  // Validate timestamp data
+  if (endTime <= beginTime) {
+#ifdef IGL_DEBUG
+    IGL_LOG_ERROR("Timer: Invalid timestamp data (begin=%llu, end=%llu) - GPU work may not have executed\n",
+                  beginTime, endTime);
+#endif
+    return 0;
+  }
+
+  if (timestampFrequency_ == 0) {
+#ifdef IGL_DEBUG
+    IGL_LOG_ERROR("Timer: Invalid timestamp frequency (0 Hz) - timer disabled\n");
+#endif
+    return 0;
+  }
+
+  // Calculate elapsed time in GPU ticks
+  uint64_t deltaTicks = endTime - beginTime;
+
+  // Convert ticks to nanoseconds using floating-point math for accuracy,
+  // as recommended by Microsoft docs: nanoseconds = (ticks / frequency) * 1,000,000,000.
+  const double nanosPerSecond = 1000000000.0;
+  double elapsedNanos = (static_cast<double>(deltaTicks) / static_cast<double>(timestampFrequency_)) * nanosPerSecond;
+
+  // Cache the result so we don't re-read from GPU.
+  // Thread-safe: store cached value before setting the resolved flag.
+  cachedElapsedNanos_.store(static_cast<uint64_t>(elapsedNanos), std::memory_order_release);
+  resolved_.store(true, std::memory_order_release);
+
+  return static_cast<uint64_t>(elapsedNanos);
+}
+
+bool Timer::resultsAvailable() const {
+  // Results are available only after the fence has signaled completion.
+  // This ensures we don't read uninitialized or garbage data from the query heap.
+  // Thread-safe: use atomic loads with proper memory ordering.
+  if (!ended_.load(std::memory_order_acquire) || !fence_) {
+    return false;
+  }
+
+  // Check if GPU has completed execution (fence signaled)
+  uint64_t fenceVal = fenceValue_.load(std::memory_order_acquire);
+  return fence_->GetCompletedValue() >= fenceVal;
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/Timer.h b/src/igl/d3d12/Timer.h
new file mode 100644
index 0000000000..104f04d9e5
--- /dev/null
+++ b/src/igl/d3d12/Timer.h
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/Timer.h>
+#include <igl/d3d12/Common.h>
+#include <atomic>
+
+namespace igl::d3d12 {
+
+class Device;
+
+/// @brief GPU timer implementation using D3D12 timestamp queries
+/// @details Implements ITimer interface for D3D12 backend using query heaps.
+///
+/// Lifecycle:
+/// - Constructor creates query heap and readback buffer resources
+/// - begin() called when command list is reset for recording (CommandBuffer::begin())
+/// - GPU work is encoded in the command list
+/// - end() called during submission before command list is closed (CommandQueue::submit())
+/// - Query results are fence-synchronized and only read after GPU completes
+///
+/// Cross-platform timestamp semantics
+/// ----------------------------------
+/// All timestamps returned by getElapsedTimeNanos() are in nanoseconds, providing
+/// cross-platform consistency with Vulkan and other backends.
+///
+/// D3D12 GPU timestamps are automatically converted from hardware ticks to nanoseconds
+/// using the GPU timestamp frequency (ID3D12CommandQueue::GetTimestampFrequency()).
+///
+/// Formula: elapsedNanos = (endTicks - startTicks) * 1,000,000,000 / frequencyHz.
+///
+/// This ensures consistent timing across all IGL backends regardless of hardware.
+///
+/// The implementation measures GPU execution time via timestamp placement
+/// and fence-synchronized readback, and is safe for cross-thread queries.
+class Timer final : public ITimer {
+ public:
+  /// @brief Constructor - creates query heap and readback buffer, starts timer
+  /// @param device D3D12 device used to create resources
+  explicit Timer(const Device& device);
+  ~Timer() override;
+
+  Timer(const Timer&) = delete;
+  Timer& operator=(const Timer&) = delete;
+  Timer(Timer&&) = delete;
+  Timer& operator=(Timer&&) = delete;
+
+  /// @brief Record start timestamp in command list
+  /// @param commandList D3D12 command list to record start timestamp
+  void begin(ID3D12GraphicsCommandList* commandList);
+
+  /// @brief Record end timestamp and associate with fence value
+  /// @param commandList D3D12 command list to record end timestamp and resolve queries
+  /// @param fence Fence to check for GPU completion
+  /// @param fenceValue Fence value that will be signaled when GPU completes
+  void end(ID3D12GraphicsCommandList* commandList, ID3D12Fence* fence, uint64_t fenceValue);
+
+  /// @brief Returns elapsed GPU time in nanoseconds
+  /// @return Elapsed time in nanoseconds, or 0 if results not yet available
+  [[nodiscard]] uint64_t getElapsedTimeNanos() const override;
+
+  /// @brief Check if timer results are available
+  /// @return true if results can be read without blocking (fence has signaled)
+  [[nodiscard]] bool resultsAvailable() const override;
+
+ private:
+  igl::d3d12::ComPtr<ID3D12QueryHeap> queryHeap_;
+  igl::d3d12::ComPtr<ID3D12Resource> readbackBuffer_;
+  uint64_t timestampFrequency_ = 0;  // GPU timestamp frequency (ticks per second), 0 = timer disabled
+  bool resourceCreationFailed_ = false;  // Track if constructor failed to create resources
+
+  // Fence synchronization for accurate GPU timing.
+  // Thread-safe: use atomics to allow safe cross-thread queries.
+  ID3D12Fence* fence_ = nullptr;     // Fence to check completion (not owned, set once in end())
+  std::atomic<uint64_t> fenceValue_{0};      // Fence value when timer ended
+  mutable std::atomic<bool> resolved_{false};        // Has query data been resolved and cached? (mutable for lazy resolution in const getter)
+  std::atomic<bool> ended_{false};           // Has end() been called?
+
+  // Cached results to avoid re-reading from GPU.
+  // Thread-safe: only written once after the fence signals, then immutable (mutable for lazy resolution in const getter).
+  mutable std::atomic<uint64_t> cachedElapsedNanos_{0};
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/UploadRingBuffer.cpp b/src/igl/d3d12/UploadRingBuffer.cpp
new file mode 100644
index 0000000000..52a97d3149
--- /dev/null
+++ b/src/igl/d3d12/UploadRingBuffer.cpp
@@ -0,0 +1,284 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include <igl/d3d12/UploadRingBuffer.h>
+#include <igl/d3d12/D3D12Context.h>
+
+namespace igl::d3d12 {
+
+UploadRingBuffer::UploadRingBuffer(ID3D12Device* device, uint64_t size)
+    : device_(device), size_(size) {
+  if (!device_) {
+    IGL_LOG_ERROR("UploadRingBuffer: Device is null\n");
+    return;
+  }
+
+  // Create large upload heap
+  D3D12_HEAP_PROPERTIES uploadHeapProps = {};
+  uploadHeapProps.Type = D3D12_HEAP_TYPE_UPLOAD;
+  uploadHeapProps.CreationNodeMask = 1;
+  uploadHeapProps.VisibleNodeMask = 1;
+
+  D3D12_RESOURCE_DESC bufferDesc = {};
+  bufferDesc.Dimension = D3D12_RESOURCE_DIMENSION_BUFFER;
+  bufferDesc.Alignment = 0;
+  bufferDesc.Width = size_;
+  bufferDesc.Height = 1;
+  bufferDesc.DepthOrArraySize = 1;
+  bufferDesc.MipLevels = 1;
+  bufferDesc.Format = DXGI_FORMAT_UNKNOWN;
+  bufferDesc.SampleDesc.Count = 1;
+  bufferDesc.SampleDesc.Quality = 0;
+  bufferDesc.Layout = D3D12_TEXTURE_LAYOUT_ROW_MAJOR;
+  bufferDesc.Flags = D3D12_RESOURCE_FLAG_NONE;
+
+  HRESULT hr = device_->CreateCommittedResource(
+      &uploadHeapProps,
+      D3D12_HEAP_FLAG_NONE,
+      &bufferDesc,
+      D3D12_RESOURCE_STATE_GENERIC_READ,
+      nullptr,
+      IID_PPV_ARGS(uploadHeap_.GetAddressOf()));
+
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("UploadRingBuffer: Failed to create upload heap (HRESULT=0x%08X)\n", hr);
+    return;
+  }
+
+  // Map the entire buffer persistently
+  D3D12_RANGE readRange = {0, 0}; // Not reading from GPU
+  hr = uploadHeap_->Map(0, &readRange, &cpuBase_);
+  if (FAILED(hr)) {
+    IGL_LOG_ERROR("UploadRingBuffer: Failed to map upload heap (HRESULT=0x%08X)\n", hr);
+    cpuBase_ = nullptr;
+    return;
+  }
+
+  gpuBase_ = uploadHeap_->GetGPUVirtualAddress();
+
+  IGL_D3D12_LOG_VERBOSE("UploadRingBuffer: Created ring buffer (size=%llu MB, cpuBase=%p, gpuBase=0x%llX)\n",
+               size_ / (1024 * 1024), cpuBase_, gpuBase_);
+
+  // Track resource creation
+  D3D12Context::trackResourceCreation("UploadRingBuffer", size_);
+}
+
+UploadRingBuffer::~UploadRingBuffer() {
+  if (uploadHeap_.Get() && cpuBase_) {
+    uploadHeap_->Unmap(0, nullptr);
+    cpuBase_ = nullptr;
+  }
+
+  if (uploadHeap_.Get()) {
+    // Track resource destruction
+    D3D12Context::trackResourceDestruction("UploadRingBuffer", size_);
+  }
+
+  IGL_D3D12_LOG_VERBOSE("UploadRingBuffer: Destroyed (allocations=%llu, failures=%llu)\n",
+               allocationCount_, failureCount_);
+}
+
+UploadRingBuffer::Allocation UploadRingBuffer::allocate(uint64_t size, uint64_t alignment, uint64_t fenceValue) {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  if (!uploadHeap_.Get() || !cpuBase_) {
+    IGL_LOG_ERROR("UploadRingBuffer::allocate: Ring buffer not initialized\n");
+    failureCount_++;
+    return Allocation{};
+  }
+
+  if (size == 0) {
+    IGL_LOG_ERROR("UploadRingBuffer::allocate: Size is zero\n");
+    failureCount_++;
+    return Allocation{};
+  }
+
+  // Align size up for proper alignment of next allocation
+  const uint64_t alignedSize = alignUp(size, alignment);
+
+  // Invariants (all protected by mutex_):
+  // - head_ is the next free offset where a new allocation can start
+  // - tail_ is the offset of the oldest in-flight allocation (or equals head_ when empty)
+  // - pendingAllocations_ is a queue of all in-flight allocations in submission order
+  // - When pendingAllocations_.empty(), the entire buffer is free: tail_ == head_
+  const bool bufferEmpty = pendingAllocations_.empty();
+  const uint64_t currentHead = head_;
+  const uint64_t currentTail = bufferEmpty ? currentHead : tail_;
+
+  // Detect full ring: head == tail with in-flight allocations means buffer is completely occupied
+  const bool bufferFull = !bufferEmpty && (currentHead == currentTail);
+
+  if (bufferFull) {
+    // Ring buffer is completely full - no free space available
+    failureCount_++;
+    IGL_D3D12_LOG_VERBOSE("UploadRingBuffer: Ring buffer completely full (size=%llu)\n", size_);
+    return Allocation{};
+  }
+
+  // Align head to requested alignment
+  const uint64_t alignedHead = alignUp(currentHead, alignment);
+
+  // Determine available free space based on buffer state
+  // When empty: entire buffer is available starting from head_
+  // When head > tail: in-flight region spans [tail, head); free space is [head, size_) and [0, tail)
+  // When head < tail: in-flight region spans [tail, size_) + [0, head); free space is [head, tail)
+
+  bool canFit = false;
+  uint64_t allocationOffset = alignedHead;
+
+  if (bufferEmpty) {
+    // Entire buffer is free
+    if (alignedHead + alignedSize <= size_) {
+      canFit = true;
+      allocationOffset = alignedHead;
+    } else if (alignedSize <= size_) {
+      // Wrap to beginning
+      allocationOffset = 0;
+      canFit = true;
+    }
+  } else if (currentHead >= currentTail) {
+    // In-flight allocations have wrapped around: free regions are [head, size_) and [0, tail)
+    if (alignedHead + alignedSize <= size_) {
+      // Fits at current head position
+      canFit = true;
+      allocationOffset = alignedHead;
+    } else if (alignedSize <= currentTail) {
+      // Wrap around to beginning
+      allocationOffset = 0;
+      canFit = true;
+    }
+  } else {
+    // In-flight allocations have not wrapped: free space is [head, tail)
+    if (alignedHead + alignedSize <= currentTail) {
+      canFit = true;
+      allocationOffset = alignedHead;
+    }
+  }
+
+  if (!canFit) {
+    // Not enough space - caller will fall back to dedicated staging buffer
+    // This is expected behavior when ring is full, not an error condition
+    // Note: failureCount_ tracks ring-full events as a diagnostic metric, not errors
+    failureCount_++;
+    IGL_D3D12_LOG_VERBOSE("UploadRingBuffer: Insufficient space (request=%llu, approx used=%llu/%llu)\n",
+                 alignedSize, getUsedSizeUnlocked(), size_);
+    return Allocation{};
+  }
+
+  // Final validation: ensure allocation doesn't overlap with in-flight allocations
+  const uint64_t allocationEnd = allocationOffset + alignedSize;
+
+#ifdef _DEBUG
+  // Debug: verify allocation doesn't overlap with in-flight allocations
+  if (!bufferEmpty) {
+    if (allocationOffset == 0) {
+      // Wraparound case: ensure we don't exceed tail
+      IGL_DEBUG_ASSERT(allocationEnd <= currentTail,
+                      "UploadRingBuffer: Allocation [0, %llu) would overlap tail at %llu",
+                      allocationEnd, currentTail);
+    } else if (currentHead >= currentTail) {
+      // In-flight region wrapped: allocation should be in free region [head, size_)
+      IGL_DEBUG_ASSERT(allocationOffset >= currentHead && allocationEnd <= size_,
+                      "UploadRingBuffer: Allocation [%llu, %llu) outside free region [%llu, %llu)",
+                      allocationOffset, allocationEnd, currentHead, size_);
+    } else {
+      // In-flight region not wrapped: allocation should be in free region [head, tail)
+      IGL_DEBUG_ASSERT(allocationOffset >= currentHead && allocationEnd <= currentTail,
+                      "UploadRingBuffer: Allocation [%llu, %llu) outside free region [%llu, %llu)",
+                      allocationOffset, allocationEnd, currentHead, currentTail);
+    }
+  }
+#endif
+
+  // Create allocation.
+  Allocation allocation;
+  allocation.buffer = uploadHeap_;
+  allocation.cpuAddress = static_cast<uint8_t*>(cpuBase_) + allocationOffset;
+  allocation.gpuAddress = gpuBase_ + allocationOffset;
+  allocation.offset = allocationOffset;
+  allocation.size = alignedSize;
+  allocation.valid = true;
+
+  // Track pending allocation for retirement
+  pendingAllocations_.push({allocationOffset, alignedSize, fenceValue});
+
+  // Update head pointer
+  uint64_t newHead = allocationOffset + alignedSize;
+  if (newHead >= size_) {
+    newHead = 0; // Wrap around
+  }
+  head_ = newHead;
+
+  // Update tail_ for first allocation when buffer transitions from empty
+  if (bufferEmpty) {
+    tail_ = allocationOffset;
+  }
+
+  allocationCount_++;
+
+#ifdef _DEBUG
+  // Debug validation: ensure invariants hold after allocation
+  IGL_DEBUG_ASSERT(newHead <= size_, "Head exceeded buffer size!");
+  IGL_DEBUG_ASSERT(!pendingAllocations_.empty() || head_ == tail_,
+                  "Buffer should have pending allocations or head == tail");
+
+  // Validate that used size is reasonable (use unlocked helper since we hold mutex_)
+  const uint64_t usedSize = getUsedSizeUnlocked();
+  IGL_DEBUG_ASSERT(usedSize <= size_, "Used size %llu exceeds buffer size %llu", usedSize, size_);
+#endif
+
+  return allocation;
+}
+
+void UploadRingBuffer::retire(uint64_t completedFenceValue) {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  // Process all pending allocations that have completed
+  while (!pendingAllocations_.empty()) {
+    const auto& pending = pendingAllocations_.front();
+
+    if (pending.fenceValue > completedFenceValue) {
+      // This and all subsequent allocations are still pending
+      break;
+    }
+
+    // This allocation has completed, reclaim the memory
+    pendingAllocations_.pop();
+
+    // Update tail_ to point to the next oldest allocation, or to head_ if buffer is now empty
+    if (!pendingAllocations_.empty()) {
+      tail_ = pendingAllocations_.front().offset;
+    } else {
+      // Buffer is now empty: reset tail to head to maintain invariant
+      tail_ = head_;
+    }
+  }
+
+#ifdef _DEBUG
+  // Validate invariant: when empty, tail == head
+  if (pendingAllocations_.empty()) {
+    IGL_DEBUG_ASSERT(tail_ == head_,
+                    "Buffer empty but tail (%llu) != head (%llu)", tail_, head_);
+  }
+#endif
+}
+
+uint64_t UploadRingBuffer::getUsedSizeUnlocked() const {
+  // Note: Caller must hold mutex_
+  if (head_ >= tail_) {
+    return head_ - tail_;
+  } else {
+    return (size_ - tail_) + head_;
+  }
+}
+
+uint64_t UploadRingBuffer::getUsedSize() const {
+  std::lock_guard<std::mutex> lock(mutex_);
+  return getUsedSizeUnlocked();
+}
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/UploadRingBuffer.h b/src/igl/d3d12/UploadRingBuffer.h
new file mode 100644
index 0000000000..b3657f7217
--- /dev/null
+++ b/src/igl/d3d12/UploadRingBuffer.h
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/d3d12/Common.h>
+#include <queue>
+#include <mutex>
+
+namespace igl::d3d12 {
+
+/**
+ * @brief Upload ring buffer for streaming resources.
+ *
+ * Manages a large staging buffer (64-256MB) for efficient resource uploads.
+ * Implements a ring buffer pattern with fence-based memory retirement to
+ * reduce allocator churn and memory fragmentation.
+ *
+ * Key features:
+ * - Large pre-allocated upload heap
+ * - Linear sub-allocation with wraparound
+ * - Fence-based memory retirement and recycling
+ * - Thread-safe allocation
+ */
+class UploadRingBuffer {
+ public:
+  /**
+   * @brief Represents a sub-allocation from the ring buffer
+   */
+  struct Allocation {
+    igl::d3d12::ComPtr<ID3D12Resource> buffer;  // Underlying buffer resource.
+    void* cpuAddress = nullptr;                     // CPU-visible mapped address
+    D3D12_GPU_VIRTUAL_ADDRESS gpuAddress = 0;      // GPU virtual address
+    uint64_t offset = 0;                            // Offset within ring buffer
+    uint64_t size = 0;                              // Size of allocation
+    bool valid = false;                             // Whether allocation succeeded
+  };
+
+  /**
+   * @brief Constructs an upload ring buffer
+   * @param device D3D12 device for resource creation
+   * @param size Total size of ring buffer (default: 128MB)
+   *
+   * T14: Default value (128MB) matches D3D12ContextConfig::defaultConfig().uploadRingBufferSize.
+   * In production, call sites pass D3D12ContextConfig::uploadRingBufferSize explicitly so the
+   * ring size is driven by the active context configuration rather than this default.
+   */
+  explicit UploadRingBuffer(ID3D12Device* device, uint64_t size = 128 * 1024 * 1024);
+  ~UploadRingBuffer();
+
+  // Non-copyable
+  UploadRingBuffer(const UploadRingBuffer&) = delete;
+  UploadRingBuffer& operator=(const UploadRingBuffer&) = delete;
+
+  /**
+   * @brief Allocates staging memory from the ring buffer
+   * @param size Size in bytes to allocate
+   * @param alignment Alignment requirement (e.g., 256 for constant buffers)
+   * @param fenceValue Fence value when this allocation will be retired
+   * @return Allocation structure (check valid flag for success)
+   *
+   * Note: If allocation fails due to insufficient space, returns invalid allocation.
+   * Caller should fall back to creating a dedicated staging buffer.
+   */
+  Allocation allocate(uint64_t size, uint64_t alignment, uint64_t fenceValue);
+
+  /**
+   * @brief Retires allocations that have completed on GPU
+   * @param completedFenceValue Fence value that has been signaled by GPU
+   *
+   * Reclaims memory from allocations associated with fence values <= completedFenceValue.
+   * This allows the ring buffer to wrap around and reuse memory.
+   */
+  void retire(uint64_t completedFenceValue);
+
+  /**
+   * @brief Gets total size of ring buffer
+   */
+  uint64_t getTotalSize() const { return size_; }
+
+  /**
+   * @brief Gets estimated used size based on head/tail distance (for diagnostics)
+   *
+   * Note: Returns approximate usage; does not account for internal alignment gaps.
+   * Returns 0 when buffer is empty (tail == head with no pending allocations).
+   * Also returns 0 when buffer is completely full (tail == head with pending allocations);
+   * use pendingAllocations or getFailureCount() to distinguish empty vs. full states.
+   */
+  uint64_t getUsedSize() const;
+
+  /**
+   * @brief Gets number of allocations made (for performance metrics)
+   */
+  uint64_t getAllocationCount() const { return allocationCount_; }
+
+  /**
+   * @brief Gets number of times allocation could not be satisfied from ring buffer (for metrics)
+   *
+   * Note: This counts ring-full events where callers fall back to dedicated staging buffers,
+   * not error conditions. It is a diagnostic metric for ring buffer utilization.
+   */
+  uint64_t getFailureCount() const { return failureCount_; }
+
+  /**
+   * @brief Gets the underlying upload heap resource (for copy operations)
+   */
+  ID3D12Resource* getUploadHeap() const { return uploadHeap_.Get(); }
+
+ private:
+  /**
+   * @brief Represents a pending allocation waiting for GPU completion
+   */
+  struct PendingAllocation {
+    uint64_t offset;      // Start offset in ring buffer
+    uint64_t size;        // Size of allocation
+    uint64_t fenceValue;  // Fence value when allocation can be retired
+  };
+
+  /**
+   * @brief Aligns value up to specified alignment
+   */
+  static uint64_t alignUp(uint64_t value, uint64_t alignment) {
+    return (value + alignment - 1) & ~(alignment - 1);
+  }
+
+  /**
+   * @brief Internal helper to compute used size without locking
+   * @note Caller must hold mutex_
+   * @note Returns 0 when head == tail (both empty and full states)
+   */
+  uint64_t getUsedSizeUnlocked() const;
+
+
+  ID3D12Device* device_ = nullptr;
+  igl::d3d12::ComPtr<ID3D12Resource> uploadHeap_;
+  void* cpuBase_ = nullptr;                         // CPU-mapped base address
+  D3D12_GPU_VIRTUAL_ADDRESS gpuBase_ = 0;          // GPU base address
+
+  uint64_t size_ = 0;                               // Total ring buffer size
+  uint64_t head_ = 0;                               // Next free offset for new allocations (protected by mutex_)
+  uint64_t tail_ = 0;                               // Offset of oldest in-flight allocation; equals head_ when empty (protected by mutex_)
+
+  std::queue<PendingAllocation> pendingAllocations_; // Allocations waiting for GPU
+  mutable std::mutex mutex_;                        // Thread safety
+
+  // Metrics
+  uint64_t allocationCount_ = 0;
+  uint64_t failureCount_ = 0;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/d3d12/VertexInputState.h b/src/igl/d3d12/VertexInputState.h
new file mode 100644
index 0000000000..119ad4c22c
--- /dev/null
+++ b/src/igl/d3d12/VertexInputState.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <igl/VertexInputState.h>
+#include <igl/d3d12/Common.h>
+
+namespace igl::d3d12 {
+
+class VertexInputState final : public IVertexInputState {
+ public:
+  explicit VertexInputState(const VertexInputStateDesc& desc) : desc_(desc) {}
+  ~VertexInputState() override = default;
+
+  const VertexInputStateDesc& getDesc() const { return desc_; }
+
+ private:
+  VertexInputStateDesc desc_;
+};
+
+} // namespace igl::d3d12
diff --git a/src/igl/metal/DeviceFeatureSet.mm b/src/igl/metal/DeviceFeatureSet.mm
index 2e9f7f112f..dfd8395085 100644
--- a/src/igl/metal/DeviceFeatureSet.mm
+++ b/src/igl/metal/DeviceFeatureSet.mm
@@ -286,6 +286,27 @@ static size_t getGPUFamily(id<MTLDevice> device) {
   case DeviceFeatureLimits::MaxBindBytesBytes:
     result = 4096;
     return true;
+  case DeviceFeatureLimits::MaxTextureDimension3D:
+#if IGL_PLATFORM_IOS
+    result = (gpuFamily_ <= 2) ? 2048 : 2048;
+#else
+    result = 2048;
+#endif
+    return true;
+  case DeviceFeatureLimits::MaxComputeWorkGroupSizeX:
+  case DeviceFeatureLimits::MaxComputeWorkGroupSizeY:
+  case DeviceFeatureLimits::MaxComputeWorkGroupSizeZ:
+    result = 1024;
+    return true;
+  case DeviceFeatureLimits::MaxComputeWorkGroupInvocations:
+    result = 1024;
+    return true;
+  case DeviceFeatureLimits::MaxVertexInputAttributes:
+    result = 31;
+    return true;
+  case DeviceFeatureLimits::MaxColorAttachments:
+    result = 8;
+    return true;
   default:
     IGL_DEBUG_ABORT(
         "invalid feature limit query: feature limit query is not implemented or does not exist\n");
diff --git a/src/igl/opengl/DeviceFeatureSet.cpp b/src/igl/opengl/DeviceFeatureSet.cpp
index b22b7bd553..952d0a8378 100644
--- a/src/igl/opengl/DeviceFeatureSet.cpp
+++ b/src/igl/opengl/DeviceFeatureSet.cpp
@@ -1133,6 +1133,50 @@ bool DeviceFeatureSet::getFeatureLimits(DeviceFeatureLimits featureLimits, size_
   case DeviceFeatureLimits::MaxBindBytesBytes:
     result = 0;
     return true;
+  case DeviceFeatureLimits::MaxTextureDimension3D:
+    glContext_.getIntegerv(GL_MAX_3D_TEXTURE_SIZE, &tsize);
+    result = (size_t)tsize;
+    return true;
+  case DeviceFeatureLimits::MaxComputeWorkGroupSizeX:
+    if (hasFeature(DeviceFeatures::Compute)) {
+      // OpenGL ES 3.1+ and OpenGL 4.3+: use conservative value
+      result = 1024;
+    } else {
+      result = 0;
+    }
+    return true;
+  case DeviceFeatureLimits::MaxComputeWorkGroupSizeY:
+    if (hasFeature(DeviceFeatures::Compute)) {
+      // OpenGL ES 3.1+ and OpenGL 4.3+: use conservative value
+      result = 1024;
+    } else {
+      result = 0;
+    }
+    return true;
+  case DeviceFeatureLimits::MaxComputeWorkGroupSizeZ:
+    if (hasFeature(DeviceFeatures::Compute)) {
+      // OpenGL ES 3.1+ and OpenGL 4.3+: use conservative value
+      result = 64;
+    } else {
+      result = 0;
+    }
+    return true;
+  case DeviceFeatureLimits::MaxComputeWorkGroupInvocations:
+    if (hasFeature(DeviceFeatures::Compute)) {
+      glContext_.getIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &tsize);
+      result = (size_t)tsize;
+    } else {
+      result = 0;
+    }
+    return true;
+  case DeviceFeatureLimits::MaxVertexInputAttributes:
+    glContext_.getIntegerv(GL_MAX_VERTEX_ATTRIBS, &tsize);
+    result = (size_t)tsize;
+    return true;
+  case DeviceFeatureLimits::MaxColorAttachments:
+    glContext_.getIntegerv(GL_MAX_COLOR_ATTACHMENTS, &tsize);
+    result = (size_t)tsize;
+    return true;
   default:
     IGL_DEBUG_ABORT(
         "invalid feature limit query: feature limit query is not implemented or does "
diff --git a/src/igl/tests/CMakeLists.txt b/src/igl/tests/CMakeLists.txt
index 797b18fbb5..2406b83a42 100644
--- a/src/igl/tests/CMakeLists.txt
+++ b/src/igl/tests/CMakeLists.txt
@@ -37,6 +37,13 @@ if(IGL_WITH_METAL)
   list(APPEND HEADER_FILES util/device/metal/TestDevice.h)
 endif()
 
+if(IGL_WITH_D3D12)
+  file(GLOB D3D12_SRC_FILES LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} d3d12/*.cpp)
+  list(APPEND SRC_FILES ${D3D12_SRC_FILES})
+  list(APPEND SRC_FILES util/device/d3d12/TestDevice.cpp)
+  list(APPEND HEADER_FILES util/device/d3d12/TestDevice.h)
+endif()
+
 if(IGL_WITH_IGLU)
   file(GLOB IGLU_SRC_FILES LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} iglu/*.cpp)
   file(GLOB IGLU_SRC_FILES LIST_DIRECTORIES false RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} iglu/texture_loader/*.cpp)
@@ -51,6 +58,8 @@ endif()
 
 enable_testing()
 
+# Add custom main to initialize COM and install signal handlers before gtest
+list(APPEND SRC_FILES main.cpp)
 add_executable(IGLTests ${SRC_FILES} ${HEADER_FILES})
 
 if(WIN32)
@@ -60,6 +69,23 @@ if(WIN32)
 
   target_compile_definitions(IGLTests PRIVATE -DNOMINMAX)
   target_compile_definitions(IGLTests PRIVATE -DIGL_UNIT_TESTS_GLES_VERSION=3)
+  # Ensure Unicode matches across GoogleTest and our test binary on Windows
+  # Ensure tests are built without UNICODE to match GoogleTest defaults and avoid CRT mismatches
+  if(MSVC)
+    target_compile_options(IGLTests PRIVATE /UUNICODE /U_UNICODE)
+    if(TARGET gtest)
+      target_compile_options(gtest PRIVATE /UUNICODE /U_UNICODE)
+    endif()
+    if(TARGET gtest_main)
+      target_compile_options(gtest_main PRIVATE /UUNICODE /U_UNICODE)
+    endif()
+    if(TARGET gmock)
+      target_compile_options(gmock PRIVATE /UUNICODE /U_UNICODE)
+    endif()
+    if(TARGET gmock_main)
+      target_compile_options(gmock_main PRIVATE /UUNICODE /U_UNICODE)
+    endif()
+  endif()
   target_include_directories(IGLTests PRIVATE "${IGL_ROOT_DIR}/third-party/deps/src/glew/include")
   # IGL tests use EGL, here's a stub for Windows
   add_library(
@@ -79,22 +105,46 @@ endif()
 igl_set_cxxstd(IGLTests 20)
 igl_set_folder(IGLTests "IGL")
 
-# gtest
+# gtest - FORCE static linkage to avoid DLL initialization issues
+# NOTE: gmock is NOT needed - no tests use it
 # cmake-format: off
-set(BUILD_GMOCK    OFF CACHE BOOL "")
-set(INSTALL_GTEST  OFF CACHE BOOL "")
-set(GTEST_HAS_ABSL OFF CACHE BOOL "")
+set(BUILD_SHARED_LIBS      OFF CACHE BOOL "Build shared libraries" FORCE)
+set(BUILD_GMOCK            OFF CACHE BOOL "Build gmock" FORCE)
+set(INSTALL_GTEST          OFF CACHE BOOL "Install gtest" FORCE)
+set(GTEST_HAS_ABSL         OFF CACHE BOOL "Use Abseil" FORCE)
+set(gtest_force_shared_crt OFF CACHE BOOL "Force shared CRT" FORCE)
+# Explicitly disable shared library build
+set(BUILD_SHARED_LIBS OFF)
+# CRITICAL: Set these compile definitions globally BEFORE including gtest
+add_compile_definitions(GTEST_LINKED_AS_SHARED_LIBRARY=0 GTEST_CREATE_SHARED_LIBRARY=0)
 # cmake-format: on
-add_subdirectory(${IGL_ROOT_DIR}/third-party/deps/src/gtest "gtest")
+add_subdirectory(${IGL_ROOT_DIR}/third-party/deps/src/gtest "gtest" EXCLUDE_FROM_ALL)
+
+# CRITICAL: Force static linking by removing any DLL export definitions
+if(TARGET gtest)
+  target_compile_definitions(gtest PRIVATE GTEST_CREATE_SHARED_LIBRARY=0)
+  target_compile_definitions(gtest PUBLIC GTEST_LINKED_AS_SHARED_LIBRARY=0)
+endif()
 
 igl_set_folder(gtest "third-party")
 igl_set_folder(gtest_main "third-party")
+igl_set_cxxstd(gtest 17)
+igl_set_cxxstd(gtest_main 17)
 
 target_link_libraries(IGLTests PUBLIC IGLLibrary)
 target_link_libraries(IGLTests PUBLIC gtest)
-target_link_libraries(IGLTests PUBLIC gtest_main)
+# Use our custom main.cpp instead of gtest_main
+# NOTE: gmock is NOT linked - no tests use gmock, only gtest
 target_link_libraries(IGLTests PUBLIC IGLUmanagedUniformBuffer)
 target_link_libraries(IGLTests PUBLIC IGLUshaderCross)
+# Add STB include directory for image utilities used by test artifacts
+target_include_directories(IGLTests PRIVATE "${IGL_ROOT_DIR}/third-party/deps/src/stb")
+
+# Enable experimental features ONLY for unit tests (NOT for render sessions)
+# This allows D3D12 headless tests to use unsigned DXIL shaders
+if(IGL_WITH_D3D12)
+  target_compile_definitions(IGLTests PRIVATE IGL_ENABLE_EXPERIMENTAL_FEATURES_FOR_TESTS=1)
+endif()
 if(TARGET glfw)
   target_link_libraries(IGLTests PRIVATE glfw)
 endif()
@@ -108,14 +158,53 @@ if(IGL_WITH_IGLU)
   target_link_libraries(IGLTests PUBLIC IGLUuniform)
 endif()
 
-if(IGL_WITH_VULKAN)
+if(IGL_WITH_D3D12)
+  target_compile_definitions(IGLTests PUBLIC -DIGL_BACKEND_TYPE="d3d12")
+elseif(IGL_WITH_VULKAN)
   target_compile_definitions(IGLTests PUBLIC -DIGL_BACKEND_TYPE="vulkan")
 elseif(IGL_WITH_OPENGL OR IGL_WITH_OPENGLES)
   target_compile_definitions(IGLTests PUBLIC -DIGL_BACKEND_TYPE="ogl")
 endif()
 
+# Ensure gtest/gmock are linked statically
+target_compile_definitions(IGLTests PRIVATE GTEST_LINKED_AS_SHARED_LIBRARY=0 GTEST_CREATE_SHARED_LIBRARY=0)
+
 if(UNIX)
   if (CMAKE_C_COMPILER_ID STREQUAL "GNU")
     target_compile_options(IGLTests PUBLIC $<$<COMPILE_LANGUAGE:CXX>:-Wno-volatile>)
   endif()
 endif()
+
+# Register tests with CTest so they can be discovered
+include(GoogleTest)
+gtest_discover_tests(IGLTests
+  DISCOVERY_TIMEOUT 60
+  DISCOVERY_MODE PRE_TEST
+)
+
+# Fallback: also register the test executable directly for manual runs
+if (WIN32)
+  add_test(NAME IGLTests COMMAND ${CMAKE_CURRENT_BINARY_DIR}/$<CONFIG>/IGLTests.exe)
+else()
+  add_test(NAME IGLTests COMMAND ${CMAKE_CURRENT_BINARY_DIR}/IGLTests)
+endif()
+
+# Copy dxil.dll for D3D12 tests (required for DXIL signing)
+if(IGL_WITH_D3D12 AND WIN32 AND MSVC)
+  # Try to locate dxil.dll from the Windows SDK without relying on machine-specific SDK versions
+  find_file(DXIL_DLL_FOR_TESTS
+    NAMES dxil.dll
+    HINTS
+      "$ENV{WindowsSdkBinPath}"
+      "$ENV{WindowsSdkDir}/bin"
+    PATH_SUFFIXES x64
+  )
+  if(DXIL_DLL_FOR_TESTS)
+    add_custom_command(TARGET IGLTests POST_BUILD
+      COMMAND ${CMAKE_COMMAND} -E copy_if_different
+        "${DXIL_DLL_FOR_TESTS}"
+        "$<TARGET_FILE_DIR:IGLTests>/"
+      COMMENT "Copying dxil.dll for IGLTests"
+    )
+  endif()
+endif()
diff --git a/src/igl/tests/ComputeCommandEncoder.cpp b/src/igl/tests/ComputeCommandEncoder.cpp
index 125860eeec..af1eeef086 100644
--- a/src/igl/tests/ComputeCommandEncoder.cpp
+++ b/src/igl/tests/ComputeCommandEncoder.cpp
@@ -52,8 +52,10 @@ class ComputeCommandEncoderTest : public ::testing::Test {
         BufferDesc::BufferTypeBits::Storage, dataIn.data(), sizeof(float) * dataIn.size());
     bufferIn_ = iglDev_->createBuffer(vbInDesc, nullptr);
     ASSERT_TRUE(bufferIn_ != nullptr);
+    // Use ResourceStorage::Shared for output buffers so they can be mapped for reading
     const BufferDesc bufferOutDesc =
-        BufferDesc(BufferDesc::BufferTypeBits::Storage, nullptr, sizeof(float) * dataIn.size());
+        BufferDesc(BufferDesc::BufferTypeBits::Storage, nullptr, sizeof(float) * dataIn.size(),
+                   ResourceStorage::Shared);
     bufferOut0_ = iglDev_->createBuffer(bufferOutDesc, nullptr);
     ASSERT_TRUE(bufferOut0_ != nullptr);
     bufferOut1_ = iglDev_->createBuffer(bufferOutDesc, nullptr);
@@ -73,6 +75,9 @@ class ComputeCommandEncoderTest : public ::testing::Test {
       } else if (iglDev_->getBackendType() == igl::BackendType::Metal) {
         source = igl::tests::data::shader::kMtlSimpleComputeShader;
         entryName = igl::tests::data::shader::kSimpleComputeFunc;
+      } else if (iglDev_->getBackendType() == igl::BackendType::D3D12) {
+        source = igl::tests::data::shader::kD3D12SimpleComputeShader;
+        entryName = igl::tests::data::shader::kSimpleComputeFunc;
       } else {
         IGL_DEBUG_ASSERT_NOT_REACHED();
       }
diff --git a/src/igl/tests/Device.cpp b/src/igl/tests/Device.cpp
index c3daacdecf..25f9a7ad22 100644
--- a/src/igl/tests/Device.cpp
+++ b/src/igl/tests/Device.cpp
@@ -9,6 +9,10 @@
 #include "data/VertexIndexData.h"
 #include "util/Common.h"
 
+#if IGL_PLATFORM_WINDOWS
+#include <windows.h>
+#endif
+
 #include <string>
 #include <igl/CommandBuffer.h>
 #include <igl/RenderPass.h>
@@ -31,7 +35,11 @@ namespace igl::tests {
 class DeviceTest : public ::testing::Test {
  public:
   DeviceTest() = default;
-  ~DeviceTest() override = default;
+  ~DeviceTest() override {
+    cmdBuf_.reset();
+    cmdQueue_.reset();
+    iglDev_.reset();
+  }
 
   // Set up common resources. This will create a device and a command queue
   void SetUp() override {
@@ -205,10 +213,13 @@ TEST_F(DeviceTest, GetBackendType) {
     ASSERT_EQ(backend_, util::kBackendOgl);
   } else if (iglDev_->getBackendType() == igl::BackendType::Vulkan) {
     ASSERT_EQ(backend_, util::kBackendVul);
+  } else if (iglDev_->getBackendType() == igl::BackendType::D3D12) {
+    ASSERT_EQ(backend_, util::kBackendD3D12);
   } else {
-    // Unknow backend. Please add to this test.
     ASSERT_TRUE(0);
   }
 }
 
 } // namespace igl::tests
+
+
diff --git a/src/igl/tests/DeviceFeatureSet.cpp b/src/igl/tests/DeviceFeatureSet.cpp
index fd44f55f77..a3c55fe85f 100644
--- a/src/igl/tests/DeviceFeatureSet.cpp
+++ b/src/igl/tests/DeviceFeatureSet.cpp
@@ -235,30 +235,31 @@ TEST_F(DeviceFeatureSetTest, hasFeatureForMacOSOrWinOrAndroidTest) {
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ExternalMemoryObjects));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::PushConstants));
     } else {
-      EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::Texture2DArray));
-      EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::Texture3D));
+      // D3D12 backend
+      EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::Texture2DArray));
+      EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::Texture3D));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::TextureArrayExt));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::TextureExternalImage));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::Multiview));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::BindUniform));
-      EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::TexturePartialMipChain));
+      EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::TexturePartialMipChain));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::BufferRing));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::BufferNoCopy));
-      EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ShaderLibrary));
-      EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::BindBytes));
+      EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::ShaderLibrary));
+      EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::BindBytes));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::BufferDeviceAddress));
       EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::ShaderTextureLod));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ShaderTextureLodExt));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::StandardDerivativeExt));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::SamplerMinMaxLod));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::DrawIndexedIndirect));
-      EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::MultipleRenderTargets));
+      EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::MultipleRenderTargets));
       EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::ExplicitBinding));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ExplicitBindingExt));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::TextureFormatRG));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ValidationLayersEnabled));
       EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::ExternalMemoryObjects));
-      EXPECT_FALSE(iglDev_->hasFeature(DeviceFeatures::PushConstants));
+      EXPECT_TRUE(iglDev_->hasFeature(DeviceFeatures::PushConstants));  // D3D12 supports push constants via root constants (shader register b2)
     }
   }
 
diff --git a/src/igl/tests/RenderCommandEncoder.cpp b/src/igl/tests/RenderCommandEncoder.cpp
index a45137377f..0c560a8cfc 100644
--- a/src/igl/tests/RenderCommandEncoder.cpp
+++ b/src/igl/tests/RenderCommandEncoder.cpp
@@ -684,8 +684,9 @@ TEST_F(RenderCommandEncoderTest, shouldDrawTriangleStrip) {
 }
 
 TEST_F(RenderCommandEncoderTest, shouldDrawTriangleStripCopyTextureToBuffer) {
-  if (iglDev_->getBackendType() != igl::BackendType::Vulkan) {
-    GTEST_SKIP() << "Not implemented for non-Vulkan backends";
+  if (iglDev_->getBackendType() != igl::BackendType::Vulkan &&
+      iglDev_->getBackendType() != igl::BackendType::D3D12) {
+    GTEST_SKIP() << "Not implemented for this backend";
     return;
   }
 
@@ -865,8 +866,9 @@ TEST_F(RenderCommandEncoderTest, DepthBiasShouldDrawAPoint) {
 }
 
 TEST_F(RenderCommandEncoderTest, drawUsingBindPushConstants) {
-  if (iglDev_->getBackendType() != igl::BackendType::Vulkan) {
-    GTEST_SKIP() << "Push constants are only supported in Vulkan";
+  if (iglDev_->getBackendType() != igl::BackendType::Vulkan &&
+      iglDev_->getBackendType() != igl::BackendType::D3D12) {
+    GTEST_SKIP() << "Push constants are only supported in Vulkan and D3D12";
     return;
   }
 
@@ -878,12 +880,21 @@ TEST_F(RenderCommandEncoderTest, drawUsingBindPushConstants) {
 
   // Create new shader stages with push constant shaders
   std::unique_ptr<IShaderStages> pushConstantStages;
-  igl::tests::util::createShaderStages(iglDev_,
-                                       data::shader::kVulkanPushConstantVertShader,
-                                       igl::tests::data::shader::kShaderFunc,
-                                       data::shader::kVulkanPushConstantFragShader,
-                                       igl::tests::data::shader::kShaderFunc,
-                                       pushConstantStages);
+  if (iglDev_->getBackendType() == igl::BackendType::D3D12) {
+    igl::tests::util::createShaderStages(iglDev_,
+                                         data::shader::kD3D12PushConstantVertShader,
+                                         std::string("main"),
+                                         data::shader::kD3D12PushConstantFragShader,
+                                         std::string("main"),
+                                         pushConstantStages);
+  } else {
+    igl::tests::util::createShaderStages(iglDev_,
+                                         data::shader::kVulkanPushConstantVertShader,
+                                         igl::tests::data::shader::kShaderFunc,
+                                         data::shader::kVulkanPushConstantFragShader,
+                                         igl::tests::data::shader::kShaderFunc,
+                                         pushConstantStages);
+  }
   ASSERT_TRUE(pushConstantStages);
   shaderStages_ = std::move(pushConstantStages);
 
diff --git a/src/igl/tests/ShaderLibrary.cpp b/src/igl/tests/ShaderLibrary.cpp
index a2fd84f5ce..c49f862f09 100644
--- a/src/igl/tests/ShaderLibrary.cpp
+++ b/src/igl/tests/ShaderLibrary.cpp
@@ -51,6 +51,8 @@ TEST_F(ShaderLibraryTest, CreateFromSource) {
     source = data::shader::kMtlSimpleShader.data();
   } else if (iglDev_->getBackendType() == igl::BackendType::Vulkan) {
     source = data::shader::kVulkanSimpleVertShader.data();
+  } else if (iglDev_->getBackendType() == igl::BackendType::D3D12) {
+    source = data::shader::kD3D12SimpleShader.data();
   } else {
     IGL_DEBUG_ASSERT_NOT_REACHED();
   }
@@ -96,6 +98,8 @@ TEST_F(ShaderLibraryTest, CreateFromSourceMultipleModules) {
   } else if (iglDev_->getBackendType() == igl::BackendType::Vulkan) {
     GTEST_SKIP() << "Vulkan does not support multiple modules from the same source code.";
     return;
+  } else if (iglDev_->getBackendType() == igl::BackendType::D3D12) {
+    source = data::shader::kD3D12SimpleShader.data();
   }
 
   // Check if source is null before passing it to fromStringInput
@@ -135,6 +139,8 @@ TEST_F(ShaderLibraryTest, CreateFromSourceNoResult) {
     source = data::shader::kMtlSimpleShader.data();
   } else if (iglDev_->getBackendType() == igl::BackendType::Vulkan) {
     source = data::shader::kVulkanSimpleVertShader.data();
+  } else if (iglDev_->getBackendType() == igl::BackendType::D3D12) {
+    source = data::shader::kD3D12SimpleShader.data();
   } else {
     IGL_DEBUG_ASSERT_NOT_REACHED();
   }
diff --git a/src/igl/tests/ShaderModule.cpp b/src/igl/tests/ShaderModule.cpp
index 479b225604..2505537004 100644
--- a/src/igl/tests/ShaderModule.cpp
+++ b/src/igl/tests/ShaderModule.cpp
@@ -77,36 +77,70 @@ TEST_F(ShaderModuleTest, CompileShaderModule) {
   Result ret;
 
   const char* source = nullptr;
-  if (backend_ == util::kBackendOgl) {
+  const auto be = iglDev_->getBackendType();
+  if (be == BackendType::OpenGL) {
     source = data::shader::kOglSimpleVertShader.data();
-  } else if (backend_ == util::kBackendMtl) {
+  } else if (be == BackendType::Metal) {
     source = data::shader::kMtlSimpleShader.data();
-  } else if (backend_ == util::kBackendVul) {
+  } else if (be == BackendType::Vulkan) {
     source = data::shader::kVulkanSimpleVertShader.data();
+  } else if (be == BackendType::D3D12) {
+    // Minimal HLSL vertex shader for D3D12 backend
+    source = R"(
+struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; };
+struct VSOut { float4 position : SV_POSITION; float2 uv : TEXCOORD0; };
+VSOut vertexShader(VSIn i) { VSOut o; o.position = i.position_in; o.uv = i.uv_in; return o; }
+VSOut main(VSIn i) { return vertexShader(i); }
+)";
   } else {
-    ASSERT_TRUE(0);
+    // Fallback: use D3D12-compatible HLSL to avoid backend string mismatches
+    source = R"(
+struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; };
+struct VSOut { float4 position : SV_POSITION; float2 uv : TEXCOORD0; };
+VSOut vertexShader(VSIn i) { VSOut o; o.position = i.position_in; o.uv = i.uv_in; return o; }
+VSOut main(VSIn i) { return vertexShader(i); }
+)";
   }
 
   auto shaderModule = ShaderModuleCreator::fromStringInput(
-      *iglDev_, source, {ShaderStage::Vertex, "vertexShader"}, "test", &ret);
+      *iglDev_, source,
+      {ShaderStage::Vertex, (be == BackendType::D3D12) ? std::string("main") : std::string("vertexShader")},
+      "test", &ret);
   ASSERT_TRUE(ret.isOk()) << ret.message.c_str();
   ASSERT_TRUE(shaderModule != nullptr);
 }
 
 TEST_F(ShaderModuleTest, CompileShaderModuleNoResult) {
   const char* source = nullptr;
-  if (backend_ == util::kBackendOgl) {
+  const auto be2 = iglDev_->getBackendType();
+  if (be2 == BackendType::OpenGL) {
     source = data::shader::kOglSimpleVertShader.data();
-  } else if (backend_ == util::kBackendMtl) {
+  } else if (be2 == BackendType::Metal) {
     source = data::shader::kMtlSimpleShader.data();
-  } else if (backend_ == util::kBackendVul) {
+  } else if (be2 == BackendType::Vulkan) {
     source = data::shader::kVulkanSimpleVertShader.data();
+  } else if (be2 == BackendType::D3D12) {
+    // Minimal HLSL vertex shader for D3D12 backend
+    source = R"(
+struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; };
+struct VSOut { float4 position : SV_POSITION; float2 uv : TEXCOORD0; };
+VSOut vertexShader(VSIn i) { VSOut o; o.position = i.position_in; o.uv = i.uv_in; return o; }
+VSOut main(VSIn i) { return vertexShader(i); }
+)";
   } else {
-    ASSERT_TRUE(0);
+    // Fallback to D3D12-compatible HLSL
+    source = R"(
+struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; };
+struct VSOut { float4 position : SV_POSITION; float2 uv : TEXCOORD0; };
+VSOut vertexShader(VSIn i) { VSOut o; o.position = i.position_in; o.uv = i.uv_in; return o; }
+VSOut main(VSIn i) { return vertexShader(i); }
+)";
   }
 
   auto shaderModule = ShaderModuleCreator::fromStringInput(
-      *iglDev_, source, {ShaderStage::Vertex, "vertexShader"}, "test", nullptr);
+      *iglDev_, source,
+      {ShaderStage::Vertex, (be2 == BackendType::D3D12) ? std::string("main") : std::string("vertexShader")},
+      "test", nullptr);
   ASSERT_TRUE(shaderModule != nullptr);
 }
 } // namespace igl::tests
diff --git a/src/igl/tests/Texture.cpp b/src/igl/tests/Texture.cpp
index 847ef102dc..d7db7e1d88 100644
--- a/src/igl/tests/Texture.cpp
+++ b/src/igl/tests/Texture.cpp
@@ -86,7 +86,9 @@ TEST_F(TextureTest, Passthrough) {
   cmds->bindRenderPipelineState(pipelineState);
 
   cmds->bindTexture(textureUnit_, BindTarget::kFragment, inputTexture_.get());
+  IGL_LOG_INFO("TEST: About to bind sampler: textureUnit_=%zu, samp_=%p\n", textureUnit_, samp_.get());
   cmds->bindSamplerState(textureUnit_, BindTarget::kFragment, samp_.get());
+  IGL_LOG_INFO("TEST: After bind sampler\n");
 
   cmds->bindIndexBuffer(*ib_, IndexFormat::UInt16);
   cmds->drawIndexed(6);
diff --git a/src/igl/tests/TextureArray.cpp b/src/igl/tests/TextureArray.cpp
index 2958937cd2..f1415fc39b 100644
--- a/src/igl/tests/TextureArray.cpp
+++ b/src/igl/tests/TextureArray.cpp
@@ -163,6 +163,13 @@ class TextureArrayTest : public ::testing::Test {
                                igl::tests::data::shader::kSimpleVertFunc,
                                igl::tests::data::shader::kSimpleFragFunc,
                                stages);
+    } else if (iglDev_->getBackendType() == BackendType::D3D12) {
+      util::createShaderStages(iglDev_,
+                               igl::tests::data::shader::kD3D12SimpleVertShaderTexArray,
+                               igl::tests::data::shader::kShaderFunc,
+                               igl::tests::data::shader::kD3D12SimpleFragShaderTexArray,
+                               igl::tests::data::shader::kShaderFunc,
+                               stages);
     }
 
     ASSERT_TRUE(stages != nullptr);
diff --git a/src/igl/tests/TextureArrayFloat.cpp b/src/igl/tests/TextureArrayFloat.cpp
index 02a42a996c..a7dbe6f00c 100644
--- a/src/igl/tests/TextureArrayFloat.cpp
+++ b/src/igl/tests/TextureArrayFloat.cpp
@@ -178,6 +178,13 @@ class TextureArrayFloatTest : public ::testing::Test {
                                igl::tests::data::shader::kSimpleVertFunc,
                                igl::tests::data::shader::kSimpleFragFunc,
                                stages);
+    } else if (iglDev_->getBackendType() == BackendType::D3D12) {
+      util::createShaderStages(iglDev_,
+                               igl::tests::data::shader::kD3D12SimpleVertShaderTex2dArray,
+                               igl::tests::data::shader::kShaderFunc,
+                               igl::tests::data::shader::kD3D12SimpleFragShaderTex2dArray,
+                               igl::tests::data::shader::kShaderFunc,
+                               stages);
     }
 
     ASSERT_TRUE(stages != nullptr);
diff --git a/src/igl/tests/TextureCube.cpp b/src/igl/tests/TextureCube.cpp
index b49fada132..6478945ab2 100644
--- a/src/igl/tests/TextureCube.cpp
+++ b/src/igl/tests/TextureCube.cpp
@@ -129,6 +129,13 @@ class TextureCubeTest : public ::testing::Test {
                                igl::tests::data::shader::kVulkanSimpleFragShaderCube,
                                igl::tests::data::shader::kShaderFunc,
                                stages);
+    } else if (iglDev_->getBackendType() == BackendType::D3D12) {
+      util::createShaderStages(iglDev_,
+                               igl::tests::data::shader::kD3D12SimpleVertShaderCube,
+                               igl::tests::data::shader::kShaderFunc,
+                               igl::tests::data::shader::kD3D12SimpleFragShaderCube,
+                               igl::tests::data::shader::kShaderFunc,
+                               stages);
     } else {
       ASSERT_TRUE(false);
     }
diff --git a/src/igl/tests/data/ShaderData.h b/src/igl/tests/data/ShaderData.h
index 895cb43b70..f1cfa61355 100644
--- a/src/igl/tests/data/ShaderData.h
+++ b/src/igl/tests/data/ShaderData.h
@@ -523,6 +523,33 @@ constexpr std::string_view kVulkanPushConstantFragShader =
         out_FragColor = tex * pushConstants.colorMultiplier;
       });
 
+// D3D12 HLSL push constant shaders
+constexpr const char* kD3D12PushConstantVertShader = R"(
+struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; };
+struct PSIn { float4 position : SV_POSITION; float2 uv : TEXCOORD0; };
+PSIn main(VSIn i) {
+  PSIn o;
+  o.position = i.position_in;
+  o.uv = i.uv_in;
+  return o;
+}
+)";
+
+constexpr const char* kD3D12PushConstantFragShader = R"(
+Texture2D inputImage : register(t0);
+SamplerState samp0 : register(s0);
+
+cbuffer PushConstants : register(b2) {
+  float4 colorMultiplier;
+};
+
+struct PSIn { float4 position : SV_POSITION; float2 uv : TEXCOORD0; };
+float4 main(PSIn i) : SV_TARGET {
+  float4 tex = inputImage.Sample(samp0, i.uv);
+  return tex * colorMultiplier;
+}
+)";
+
 constexpr std::string_view kVulkanSimpleVertShaderTex2dArray =
 IGL_TO_STRING(
     layout(location = 0) in vec4 position_in;
@@ -638,4 +665,217 @@ constexpr std::string_view kVulkanSimpleComputeShader =
             fOut[id] = fIn[id] * 2.0f;
         });
 // clang-format on
+//-----------------------------------------------------------------------------
+// D3D12/HLSL Shaders
+//-----------------------------------------------------------------------------
+
+// Simple D3D12 Shader with separate vertex and fragment functions
+// This is used for ShaderLibrary tests where multiple entry points are in the same source
+constexpr std::string_view kD3D12SimpleShader =
+    IGL_TO_STRING(
+      struct VSIn {
+        float4 position_in : POSITION;
+        float2 uv_in : TEXCOORD0;
+      };
+
+      struct VSOut {
+        float4 position : SV_POSITION;
+        float2 uv : TEXCOORD0;
+      };
+
+      VSOut vertexShader(VSIn input) {
+        VSOut output;
+        output.position = input.position_in;
+        output.uv = input.uv_in;
+        return output;
+      }
+
+      Texture2D inputImage : register(t0);
+      SamplerState linearSampler : register(s0);
+
+      float4 fragmentShader(VSOut input) : SV_TARGET {
+        return inputImage.Sample(linearSampler, input.uv);
+      }
+    );
+
+// Simple D3D12 Vertex shader (standalone)
+constexpr std::string_view kD3D12SimpleVertShader =
+    IGL_TO_STRING(
+      struct VSIn {
+        float4 position_in : POSITION;
+        float2 uv_in : TEXCOORD0;
+      };
+
+      struct VSOut {
+        float4 position : SV_POSITION;
+        float2 uv : TEXCOORD0;
+      };
+
+      VSOut main(VSIn input) {
+        VSOut output;
+        output.position = input.position_in;
+        output.uv = input.uv_in;
+        return output;
+      }
+    );
+
+// Simple D3D12 Fragment shader (standalone)
+constexpr std::string_view kD3D12SimpleFragShader =
+    IGL_TO_STRING(
+      struct PSIn {
+        float4 position : SV_POSITION;
+        float2 uv : TEXCOORD0;
+      };
+
+      Texture2D inputImage : register(t0);
+      SamplerState linearSampler : register(s0);
+
+      float4 main(PSIn input) : SV_TARGET {
+        return inputImage.Sample(linearSampler, input.uv);
+      }
+    );
+
+// Simple D3D12 Compute shader
+constexpr std::string_view kD3D12SimpleComputeShader =
+    IGL_TO_STRING(
+      RWStructuredBuffer<float> floatsIn : register(u0);
+      RWStructuredBuffer<float> floatsOut : register(u1);
+
+      [numthreads(6, 1, 1)]
+      void doubleKernel(uint3 threadID : SV_DispatchThreadID) {
+        uint id = threadID.x;
+        floatsOut[id] = floatsIn[id] * 2.0;
+      }
+    );
+
+// D3D12 Texture2DArray Vertex shader
+constexpr std::string_view kD3D12SimpleVertShaderTexArray =
+    IGL_TO_STRING(
+      cbuffer VertexUniforms : register(b2) {
+        int layer;
+      };
+
+      struct VSIn {
+        float4 position_in : POSITION;
+        float2 uv_in : TEXCOORD0;
+      };
+
+      struct VSOut {
+        float4 position : SV_POSITION;
+        float2 uv : TEXCOORD0;
+        nointerpolation uint layerOut : TEXCOORD1;
+      };
+
+      VSOut main(VSIn input) {
+        VSOut output;
+        output.position = input.position_in;
+        output.uv = input.uv_in;
+        output.layerOut = layer;
+        return output;
+      }
+    );
+
+// D3D12 Texture2DArray Fragment shader
+constexpr std::string_view kD3D12SimpleFragShaderTexArray =
+    IGL_TO_STRING(
+      Texture2DArray inputImage : register(t0);
+      SamplerState inputSampler : register(s0);
+
+      struct PSIn {
+        float4 position : SV_POSITION;
+        float2 uv : TEXCOORD0;
+        nointerpolation uint layerIn : TEXCOORD1;
+      };
+
+      float4 main(PSIn input) : SV_TARGET {
+        return inputImage.Sample(inputSampler, float3(input.uv, input.layerIn));
+      }
+    );
+
+// D3D12 TextureCube Vertex shader
+constexpr std::string_view kD3D12SimpleVertShaderCube =
+    IGL_TO_STRING(
+      cbuffer VertexUniforms : register(b1) {
+        float4 view;
+      };
+
+      struct VSIn {
+        float4 position_in : POSITION;
+        float2 uv_in : TEXCOORD0;
+      };
+
+      struct VSOut {
+        float4 position : SV_POSITION;
+        float3 viewDir : TEXCOORD0;
+      };
+
+      VSOut main(VSIn input) {
+        VSOut output;
+        output.position = input.position_in;
+        output.viewDir = view.xyz;
+        return output;
+      }
+    );
+
+// D3D12 TextureCube Fragment shader
+constexpr std::string_view kD3D12SimpleFragShaderCube =
+    IGL_TO_STRING(
+      TextureCube inputImage : register(t0);
+      SamplerState inputSampler : register(s0);
+
+      struct PSIn {
+        float4 position : SV_POSITION;
+        float3 viewDir : TEXCOORD0;
+      };
+
+      float4 main(PSIn input) : SV_TARGET {
+        return inputImage.Sample(inputSampler, input.viewDir);
+      }
+    );
+
+// D3D12 Texture2DArray Vertex shader
+constexpr std::string_view kD3D12SimpleVertShaderTex2dArray =
+    IGL_TO_STRING(
+      cbuffer VertexUniforms : register(b2) {
+        int layer;
+      };
+
+      struct VSIn {
+        float4 position_in : POSITION;
+        float2 uv_in : TEXCOORD0;
+      };
+
+      struct VSOut {
+        float4 position : SV_POSITION;
+        float2 uv : TEXCOORD0;
+        uint layer : TEXCOORD1;
+      };
+
+      VSOut main(VSIn input) {
+        VSOut output;
+        output.position = input.position_in;
+        output.uv = input.uv_in;
+        output.layer = uint(layer);
+        return output;
+      }
+    );
+
+// D3D12 Texture2DArray Fragment shader
+constexpr std::string_view kD3D12SimpleFragShaderTex2dArray =
+    IGL_TO_STRING(
+      Texture2DArray<float4> inputImage : register(t0);
+      SamplerState inputSampler : register(s0);
+
+      struct PSIn {
+        float4 position : SV_POSITION;
+        float2 uv : TEXCOORD0;
+        uint layer : TEXCOORD1;
+      };
+
+      float4 main(PSIn input) : SV_TARGET {
+        return inputImage.Sample(inputSampler, float3(input.uv, input.layer));
+      }
+    );
+
+// clang-format on
 } // namespace igl::tests::data::shader
diff --git a/src/igl/tests/main.cpp b/src/igl/tests/main.cpp
new file mode 100644
index 0000000000..9bd4154570
--- /dev/null
+++ b/src/igl/tests/main.cpp
@@ -0,0 +1,38 @@
+/*
+ * Custom test entrypoint: initialize COM for D3D12 before running gtest.
+ */
+
+#include <gtest/gtest.h>
+#include <cstdio>
+#include <csignal>
+#if defined(_WIN32)
+#include <windows.h>
+#include <combaseapi.h>
+#endif
+
+static void signalHandler(int signum) {
+  std::printf("CRASH: Signal %d caught\n", signum);
+  std::_Exit(signum);
+}
+
+int main(int argc, char** argv) {
+  // Install basic signal handler for early crash diagnostics
+  std::signal(SIGSEGV, signalHandler);
+
+  // Initialize COM in multithreaded mode for D3D12 usage (Windows only)
+#if defined(_WIN32)
+  const HRESULT hr = CoInitializeEx(nullptr, COINIT_MULTITHREADED);
+  if (FAILED(hr)) {
+    std::printf("COM initialization failed: 0x%08X\n", static_cast<unsigned>(hr));
+    return 1;
+  }
+#endif
+
+  ::testing::InitGoogleTest(&argc, argv);
+  const int result = RUN_ALL_TESTS();
+
+#if defined(_WIN32)
+  CoUninitialize();
+#endif
+  return result;
+}
diff --git a/src/igl/tests/util/ArtifactUtils.cpp b/src/igl/tests/util/ArtifactUtils.cpp
new file mode 100644
index 0000000000..c994c72098
--- /dev/null
+++ b/src/igl/tests/util/ArtifactUtils.cpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#include "ArtifactUtils.h"
+
+#include <algorithm>
+#include <array>
+#include <cstdlib>
+#include <cstring>
+#include <fstream>
+#include <iomanip>
+#include <sstream>
+#include <stdexcept>
+#include <string_view>
+
+#include <stb_image_write.h>
+
+namespace igl::tests::util {
+
+namespace {
+
+constexpr std::array<std::uint32_t, 8> kInitialState = {
+    0x6A09E667u, 0xBB67AE85u, 0x3C6EF372u, 0xA54FF53Au,
+    0x510E527Fu, 0x9B05688Cu, 0x1F83D9ABu, 0x5BE0CD19u};
+
+constexpr std::array<std::uint32_t, 64> kRoundConstants = {
+    0x428A2F98u, 0x71374491u, 0xB5C0FBCFu, 0xE9B5DBA5u, 0x3956C25Bu, 0x59F111F1u, 0x923F82A4u,
+    0xAB1C5ED5u, 0xD807AA98u, 0x12835B01u, 0x243185BEu, 0x550C7DC3u, 0x72BE5D74u, 0x80DEB1FEu,
+    0x9BDC06A7u, 0xC19BF174u, 0xE49B69C1u, 0xEFBE4786u, 0x0FC19DC6u, 0x240CA1CCu, 0x2DE92C6Fu,
+    0x4A7484AAu, 0x5CB0A9DCu, 0x76F988DAu, 0x983E5152u, 0xA831C66Du, 0xB00327C8u, 0xBF597FC7u,
+    0xC6E00BF3u, 0xD5A79147u, 0x06CA6351u, 0x14292967u, 0x27B70A85u, 0x2E1B2138u, 0x4D2C6DFCu,
+    0x53380D13u, 0x650A7354u, 0x766A0ABBu, 0x81C2C92Eu, 0x92722C85u, 0xA2BFE8A1u, 0xA81A664Bu,
+    0xC24B8B70u, 0xC76C51A3u, 0xD192E819u, 0xD6990624u, 0xF40E3585u, 0x106AA070u, 0x19A4C116u,
+    0x1E376C08u, 0x2748774Cu, 0x34B0BCB5u, 0x391C0CB3u, 0x4ED8AA4Au, 0x5B9CCA4Fu, 0x682E6FF3u,
+    0x748F82EEu, 0x78A5636Fu, 0x84C87814u, 0x8CC70208u, 0x90BEFFFAu, 0xA4506CEBu, 0xBEF9A3F7u,
+    0xC67178F2u};
+
+inline std::uint32_t rotr(std::uint32_t value, std::uint32_t bits) {
+  return (value >> bits) | (value << (32u - bits));
+}
+
+class Sha256Context {
+ public:
+  Sha256Context() = default;
+
+  void update(const std::uint8_t* data, std::size_t len) {
+    if (finalized_) {
+      throw std::logic_error("SHA256 update after finalization");
+    }
+
+    totalBits_ += static_cast<std::uint64_t>(len) * 8u;
+
+    while (len > 0) {
+      const auto space = 64u - bufferSize_;
+      const auto toCopy = std::min<std::size_t>(len, space);
+      std::memcpy(buffer_.data() + bufferSize_, data, toCopy);
+      bufferSize_ += toCopy;
+      data += toCopy;
+      len -= toCopy;
+
+      if (bufferSize_ == 64u) {
+        processBlock(buffer_.data());
+        bufferSize_ = 0u;
+      }
+    }
+  }
+
+  std::array<std::uint8_t, 32> finalize() {
+    if (!finalized_) {
+      finalizeInternal();
+    }
+    return digest_;
+  }
+
+ private:
+  void processBlock(const std::uint8_t* block) {
+    std::array<std::uint32_t, 64> w{};
+    for (std::size_t i = 0; i != 16; ++i) {
+      const auto idx = i * 4;
+      w[i] = (static_cast<std::uint32_t>(block[idx]) << 24u) |
+             (static_cast<std::uint32_t>(block[idx + 1]) << 16u) |
+             (static_cast<std::uint32_t>(block[idx + 2]) << 8u) |
+             static_cast<std::uint32_t>(block[idx + 3]);
+    }
+
+    for (std::size_t i = 16; i != 64; ++i) {
+      const auto s0 = rotr(w[i - 15], 7u) ^ rotr(w[i - 15], 18u) ^ (w[i - 15] >> 3u);
+      const auto s1 = rotr(w[i - 2], 17u) ^ rotr(w[i - 2], 19u) ^ (w[i - 2] >> 10u);
+      w[i] = w[i - 16] + s0 + w[i - 7] + s1;
+    }
+
+    auto a = state_[0];
+    auto b = state_[1];
+    auto c = state_[2];
+    auto d = state_[3];
+    auto e = state_[4];
+    auto f = state_[5];
+    auto g = state_[6];
+    auto h = state_[7];
+
+    for (std::size_t i = 0; i != 64; ++i) {
+      const auto S1 = rotr(e, 6u) ^ rotr(e, 11u) ^ rotr(e, 25u);
+      const auto ch = (e & f) ^ ((~e) & g);
+      const auto temp1 = h + S1 + ch + kRoundConstants[i] + w[i];
+      const auto S0 = rotr(a, 2u) ^ rotr(a, 13u) ^ rotr(a, 22u);
+      const auto maj = (a & b) ^ (a & c) ^ (b & c);
+      const auto temp2 = S0 + maj;
+
+      h = g;
+      g = f;
+      f = e;
+      e = d + temp1;
+      d = c;
+      c = b;
+      b = a;
+      a = temp1 + temp2;
+    }
+
+    state_[0] += a;
+    state_[1] += b;
+    state_[2] += c;
+    state_[3] += d;
+    state_[4] += e;
+    state_[5] += f;
+    state_[6] += g;
+    state_[7] += h;
+  }
+
+  void finalizeInternal() {
+    buffer_[bufferSize_] = 0x80u;
+    ++bufferSize_;
+
+    if (bufferSize_ > 56u) {
+      std::fill(buffer_.begin() + bufferSize_, buffer_.end(), 0u);
+      processBlock(buffer_.data());
+      bufferSize_ = 0u;
+    }
+
+    std::fill(buffer_.begin() + bufferSize_, buffer_.begin() + 56u, 0u);
+
+    for (int i = 0; i < 8; ++i) {
+      buffer_[56u + i] = static_cast<std::uint8_t>((totalBits_ >> (56u - 8u * i)) & 0xFFu);
+    }
+
+    processBlock(buffer_.data());
+
+    for (std::size_t i = 0; i != state_.size(); ++i) {
+      digest_[i * 4u + 0u] = static_cast<std::uint8_t>((state_[i] >> 24u) & 0xFFu);
+      digest_[i * 4u + 1u] = static_cast<std::uint8_t>((state_[i] >> 16u) & 0xFFu);
+      digest_[i * 4u + 2u] = static_cast<std::uint8_t>((state_[i] >> 8u) & 0xFFu);
+      digest_[i * 4u + 3u] = static_cast<std::uint8_t>(state_[i] & 0xFFu);
+    }
+
+    finalized_ = true;
+  }
+
+  std::array<std::uint32_t, 8> state_ = kInitialState;
+  std::array<std::uint8_t, 64> buffer_{};
+  std::array<std::uint8_t, 32> digest_{};
+  std::uint64_t totalBits_ = 0u;
+  std::size_t bufferSize_ = 0u;
+  bool finalized_ = false;
+};
+
+} // namespace
+
+std::string currentBackend() {
+  return std::string(IGL_BACKEND_TYPE);
+}
+
+std::filesystem::path artifactsRoot() {
+  if (const char* env = std::getenv("IGL_ARTIFACT_ROOT"); env && *env != '\0') {
+    return std::filesystem::path(env);
+  }
+  return std::filesystem::current_path() / "artifacts";
+}
+
+std::filesystem::path ensureArtifactDirectory(const std::string& relativeGroup,
+                                              const std::string& backend) {
+  std::filesystem::path base = artifactsRoot() / std::filesystem::path(relativeGroup) / backend;
+  std::filesystem::create_directories(base);
+  return base;
+}
+
+ArtifactPaths makeArtifactPaths(const std::string& relativeGroup,
+                                const std::string& backend,
+                                const std::string& testName,
+                                bool includeImage) {
+  ArtifactPaths paths;
+  auto base = ensureArtifactDirectory(relativeGroup, backend);
+
+  paths.shaFile = base / (testName + ".sha256");
+
+  if (includeImage) {
+    auto imageDir = base / "640x360";
+    std::filesystem::create_directories(imageDir);
+    paths.pngFile = imageDir / (testName + ".png");
+  }
+
+  return paths;
+}
+
+void writeBinaryFile(const std::filesystem::path& path, std::span<const std::uint8_t> bytes) {
+  if (!path.parent_path().empty()) {
+    std::filesystem::create_directories(path.parent_path());
+  }
+  std::ofstream out(path, std::ios::binary);
+  if (!out) {
+    throw std::runtime_error("Failed to open file for writing: " + path.string());
+  }
+  out.write(reinterpret_cast<const char*>(bytes.data()),
+            static_cast<std::streamsize>(bytes.size()));
+  if (!out) {
+    throw std::runtime_error("Failed to write all bytes to: " + path.string());
+  }
+}
+
+void writeTextFile(const std::filesystem::path& path, const std::string& text) {
+  if (!path.parent_path().empty()) {
+    std::filesystem::create_directories(path.parent_path());
+  }
+  std::ofstream out(path, std::ios::binary);
+  if (!out) {
+    throw std::runtime_error("Failed to open file for writing: " + path.string());
+  }
+  out << text;
+  if (!out) {
+    throw std::runtime_error("Failed to write text to: " + path.string());
+  }
+}
+
+std::string computeSha256(std::span<const std::uint8_t> bytes) {
+  Sha256Context ctx;
+  ctx.update(bytes.data(), bytes.size());
+  const auto digest = ctx.finalize();
+
+  std::ostringstream oss;
+  oss << std::hex << std::setfill('0');
+  for (auto byte : digest) {
+    oss << std::setw(2) << static_cast<unsigned>(byte);
+  }
+  return oss.str();
+}
+
+void writeSha256File(const std::filesystem::path& path, const std::string& hash) {
+  writeTextFile(path, hash + "\n");
+}
+
+void writePng(const std::filesystem::path& path,
+              const std::uint8_t* rgbaPixels,
+              std::uint32_t width,
+              std::uint32_t height) {
+  if (!path.parent_path().empty()) {
+    std::filesystem::create_directories(path.parent_path());
+  }
+  if (stbi_write_png(path.string().c_str(), static_cast<int>(width), static_cast<int>(height), 4,
+                     rgbaPixels, static_cast<int>(width * 4u)) == 0) {
+    throw std::runtime_error("Failed to write PNG: " + path.string());
+  }
+}
+
+} // namespace igl::tests::util
diff --git a/src/igl/tests/util/ArtifactUtils.h b/src/igl/tests/util/ArtifactUtils.h
new file mode 100644
index 0000000000..4874e21e58
--- /dev/null
+++ b/src/igl/tests/util/ArtifactUtils.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <cstdint>
+#include <filesystem>
+#include <span>
+#include <string>
+
+namespace igl::tests::util {
+
+struct ArtifactPaths {
+  std::filesystem::path shaFile;
+  std::filesystem::path pngFile;
+};
+
+std::string currentBackend();
+
+std::filesystem::path artifactsRoot();
+
+std::filesystem::path ensureArtifactDirectory(const std::string& relativeGroup,
+                                              const std::string& backend);
+
+ArtifactPaths makeArtifactPaths(const std::string& relativeGroup,
+                                const std::string& backend,
+                                const std::string& testName,
+                                bool includeImage);
+
+void writeBinaryFile(const std::filesystem::path& path, std::span<const std::uint8_t> bytes);
+
+void writeTextFile(const std::filesystem::path& path, const std::string& text);
+
+std::string computeSha256(std::span<const std::uint8_t> bytes);
+
+void writeSha256File(const std::filesystem::path& path, const std::string& hash);
+
+void writePng(const std::filesystem::path& path,
+              const std::uint8_t* rgbaPixels,
+              std::uint32_t width,
+              std::uint32_t height);
+
+} // namespace igl::tests::util
+
diff --git a/src/igl/tests/util/Common.cpp b/src/igl/tests/util/Common.cpp
index aa427e3e80..4f9b7b0b82 100644
--- a/src/igl/tests/util/Common.cpp
+++ b/src/igl/tests/util/Common.cpp
@@ -167,6 +167,25 @@ void createSimpleShaderStages(const std::shared_ptr<IDevice>& dev,
                        fragShader,
                        std::string(igl::tests::data::shader::kShaderFunc),
                        stages);
+  } else if (backendVersion.flavor == igl::BackendFlavor::D3D12) {
+    // Minimal HLSL equivalent used for D3D12 tests
+    const char* vsHlsl = R"(
+struct VSIn { float4 position_in : POSITION; float2 uv_in : TEXCOORD0; };
+struct PSIn { float4 position : SV_POSITION; float2 uv : TEXCOORD0; };
+PSIn main(VSIn i) { PSIn o; o.position = i.position_in; o.uv = i.uv_in; return o; }
+)";
+    const char* psHlsl = R"(
+Texture2D inputImage : register(t0);
+SamplerState samp0 : register(s0);
+struct PSIn { float4 position : SV_POSITION; float2 uv : TEXCOORD0; };
+float4 main(PSIn i) : SV_TARGET { return inputImage.Sample(samp0, i.uv); }
+)";
+    createShaderStages(dev,
+                       vsHlsl,
+                       std::string("main"),
+                       psHlsl,
+                       std::string("main"),
+                       stages);
   } else {
     ASSERT_TRUE(0);
   }
diff --git a/src/igl/tests/util/Common.h b/src/igl/tests/util/Common.h
index 510d930956..76169230d0 100644
--- a/src/igl/tests/util/Common.h
+++ b/src/igl/tests/util/Common.h
@@ -22,6 +22,7 @@ namespace igl::tests::util {
 constexpr std::string_view kBackendOgl("ogl");
 constexpr std::string_view kBackendMtl("metal");
 constexpr std::string_view kBackendVul("vulkan");
+constexpr std::string_view kBackendD3D12("d3d12");
 
 // Creates an IGL device and a command queue
 void createDeviceAndQueue(std::shared_ptr<IDevice>& dev, std::shared_ptr<ICommandQueue>& cq);
diff --git a/src/igl/tests/util/TestDevice.cpp b/src/igl/tests/util/TestDevice.cpp
index 854a0392ad..c465045ac0 100644
--- a/src/igl/tests/util/TestDevice.cpp
+++ b/src/igl/tests/util/TestDevice.cpp
@@ -37,6 +37,8 @@ std::shared_ptr<IDevice> createTestDevice() {
     return device::createTestDevice(::igl::BackendType::Metal);
   } else if (backend == "vulkan") {
     return device::createTestDevice(::igl::BackendType::Vulkan);
+  } else if (backend == "d3d12") {
+    return device::createTestDevice(::igl::BackendType::D3D12);
   // @fb-only
     // @fb-only
   } else {
diff --git a/src/igl/tests/util/TextureFormatTestBase.cpp b/src/igl/tests/util/TextureFormatTestBase.cpp
index de57c9e530..fcfcb36528 100644
--- a/src/igl/tests/util/TextureFormatTestBase.cpp
+++ b/src/igl/tests/util/TextureFormatTestBase.cpp
@@ -16,13 +16,35 @@
 #include <igl/SamplerState.h>
 #include <igl/VertexInputState.h>
 
+#if IGL_PLATFORM_WINDOWS
+#include <windows.h>
+#include <eh.h>
+#endif
+
 namespace igl::tests::util {
 
+#if IGL_PLATFORM_WINDOWS
+namespace {
+struct SehException : std::exception {
+  explicit SehException(unsigned int c) : code(c) {}
+  const char* what() const noexcept override { return "Structured exception"; }
+  unsigned int code;
+};
+
+void __cdecl sehTranslator(unsigned int code, EXCEPTION_POINTERS*) {
+  throw SehException(code);
+}
+} // namespace
+#endif
+
 #define OFFSCREEN_TEX_WIDTH 2
 #define OFFSCREEN_TEX_HEIGHT 2
 
 void TextureFormatTestBase::SetUp() {
   setDebugBreakEnabled(false);
+#if IGL_PLATFORM_WINDOWS
+  _set_se_translator(sehTranslator);
+#endif
 
   util::createDeviceAndQueue(iglDev_, cmdQueue_);
   ASSERT_TRUE(iglDev_ != nullptr);
@@ -199,35 +221,54 @@ void TextureFormatTestBase::render(std::shared_ptr<ITexture> sampledTexture,
   Dependencies dep;
   dep.textures[0] = sampledTexture.get();
 
-  Result result;
-  auto cmds = cmdBuf->createRenderCommandEncoder(renderPass_, framebuffer, dep, &result);
-  ASSERT_TRUE(result.isOk());
-  cmds->bindVertexBuffer(data::shader::kSimplePosIndex, *vb_);
-  cmds->bindVertexBuffer(data::shader::kSimpleUvIndex, *uv_);
-
-  // Create createFramebuffer fills in proper texture formats and shader stages in
-  // renderPipelineDesc_
-
-  auto pipelineState = iglDev_->createRenderPipeline(renderPipelineDesc_, &ret);
-  ASSERT_EQ(ret.code, Result::Code::Ok) << ret.message;
-  ASSERT_TRUE(pipelineState != nullptr);
-
-  cmds->bindRenderPipelineState(pipelineState);
-
-  cmds->bindTexture(textureUnit_, BindTarget::kFragment, sampledTexture.get());
-  // Choose appropriate sampler.
-  cmds->bindSamplerState(textureUnit_,
-                         BindTarget::kFragment,
-                         (linearSampling ? linearSampler_ : nearestSampler_).get());
-
-  cmds->bindIndexBuffer(*ib_, IndexFormat::UInt16);
-  cmds->drawIndexed(6);
-
-  cmds->endEncoding();
-
-  cmdQueue_->submit(*cmdBuf);
-
-  cmdBuf->waitUntilCompleted();
+  try {
+    Result result;
+    auto cmds = cmdBuf->createRenderCommandEncoder(renderPass_, framebuffer, dep, &result);
+    ASSERT_TRUE(result.isOk());
+    cmds->bindVertexBuffer(data::shader::kSimplePosIndex, *vb_);
+    cmds->bindVertexBuffer(data::shader::kSimpleUvIndex, *uv_);
+
+    // Create createFramebuffer fills in proper texture formats and shader stages in
+    // renderPipelineDesc_
+
+    auto pipelineState = iglDev_->createRenderPipeline(renderPipelineDesc_, &ret);
+    ASSERT_EQ(ret.code, Result::Code::Ok) << ret.message;
+    ASSERT_TRUE(pipelineState != nullptr);
+
+    cmds->bindRenderPipelineState(pipelineState);
+
+    cmds->bindTexture(textureUnit_, BindTarget::kFragment, sampledTexture.get());
+    // Choose appropriate sampler.
+    cmds->bindSamplerState(textureUnit_,
+                           BindTarget::kFragment,
+                           (linearSampling ? linearSampler_ : nearestSampler_).get());
+
+    cmds->bindIndexBuffer(*ib_, IndexFormat::UInt16);
+    IGL_LOG_INFO("TextureFormatTestBase::render issuing draw for %s\n", testProperties.name);
+    cmds->drawIndexed(6);
+
+    cmds->endEncoding();
+    IGL_LOG_INFO("TextureFormatTestBase::render finished encoding for %s\n", testProperties.name);
+
+    cmdQueue_->submit(*cmdBuf);
+    IGL_LOG_INFO("TextureFormatTestBase::render submitted work for %s\n", testProperties.name);
+
+    cmdBuf->waitUntilCompleted();
+#if IGL_DEBUG
+    IGL_LOG_INFO("TextureFormatTestBase::render completed for format %s\n", testProperties.name);
+#else
+    (void)testProperties;
+#endif
+#if IGL_PLATFORM_WINDOWS
+  } catch (const SehException& seh) {
+    IGL_LOG_ERROR("TextureFormatTestBase::render caught SEH exception 0x%08X", seh.code);
+    ADD_FAILURE() << "TextureFormatTestBase::render caught SEH exception 0x" << std::hex << seh.code;
+    return;
+#endif
+  } catch (const std::exception& ex) {
+    ADD_FAILURE() << "TextureFormatTestBase::render threw std::exception: " << ex.what();
+    return;
+  }
 }
 
 std::pair<TextureFormat, bool> TextureFormatTestBase::checkSupport(
diff --git a/src/igl/tests/util/TextureValidationHelpers.h b/src/igl/tests/util/TextureValidationHelpers.h
index 83134a9f97..cf922a441a 100644
--- a/src/igl/tests/util/TextureValidationHelpers.h
+++ b/src/igl/tests/util/TextureValidationHelpers.h
@@ -130,9 +130,10 @@ inline void validateTextureRange(IDevice& device,
   fb->copyBytesColorAttachment(cmdQueue, 0, actualData.data(), range);
 
   if (!isRenderTarget && (device.getBackendType() == igl::BackendType::Metal ||
-                          device.getBackendType() == igl::BackendType::Vulkan)) {
-    // The Vulkan and Metal implementations of copyBytesColorAttachment flip the returned image
-    // vertically. This is the desired behavior for render targets, but for non-render target
+                          device.getBackendType() == igl::BackendType::Vulkan ||
+                          device.getBackendType() == igl::BackendType::D3D12)) {
+    // The Vulkan, Metal, and D3D12 implementations of copyBytesColorAttachment flip the returned
+    // image vertically. This is the desired behavior for render targets, but for non-render target
     // textures, we want the unflipped data. This flips the output image again to get the unmodified
     // data.
     std::vector<ColorType> tmpData;
diff --git a/src/igl/tests/util/device/TestDevice.cpp b/src/igl/tests/util/device/TestDevice.cpp
index 48979d2bc3..dd0e5ccf43 100644
--- a/src/igl/tests/util/device/TestDevice.cpp
+++ b/src/igl/tests/util/device/TestDevice.cpp
@@ -27,6 +27,9 @@
 #if IGL_VULKAN_SUPPORTED
 #include <igl/tests/util/device/vulkan/TestDevice.h>
 #endif
+#if IGL_D3D12_SUPPORTED
+#include <igl/tests/util/device/d3d12/TestDevice.h>
+#endif
 // @fb-only
 // @fb-only
 // @fb-only
@@ -45,6 +48,8 @@ bool isBackendTypeSupported(BackendType backendType) {
     return IGL_OPENGL_SUPPORTED;
   case ::igl::BackendType::Vulkan:
     return IGL_VULKAN_SUPPORTED;
+  case ::igl::BackendType::D3D12:
+    return IGL_D3D12_SUPPORTED;
   // @fb-only
     // @fb-only
   }
@@ -71,6 +76,20 @@ std::unique_ptr<IDevice> createTestDevice(BackendType backendType, const TestDev
     return vulkan::createTestDevice(config.enableVulkanValidationLayers);
 #else
     return nullptr;
+#endif
+  }
+  if (backendType == ::igl::BackendType::D3D12) {
+#if IGL_D3D12_SUPPORTED
+    IGL_LOG_INFO("[Tests] Creating D3D12 test device (debug layer: enabled)\n");
+    auto dev = d3d12::createTestDevice(true);
+    if (!dev) {
+      IGL_LOG_ERROR("[Tests] D3D12 test device creation failed\n");
+    } else {
+      IGL_LOG_INFO("[Tests] D3D12 test device created OK\n");
+    }
+    return dev;
+#else
+    return nullptr;
 #endif
   }
   // @fb-only
diff --git a/src/igl/tests/util/device/TestDevice.h b/src/igl/tests/util/device/TestDevice.h
index 879516cad6..4fec188d05 100644
--- a/src/igl/tests/util/device/TestDevice.h
+++ b/src/igl/tests/util/device/TestDevice.h
@@ -32,6 +32,12 @@
 #define IGL_VULKAN_SUPPORTED 0
 #endif
 
+#if IGL_PLATFORM_WINDOWS && IGL_BACKEND_ENABLE_D3D12 && !defined(IGL_UNIT_TESTS_NO_D3D12)
+#define IGL_D3D12_SUPPORTED 1
+#else
+#define IGL_D3D12_SUPPORTED 0
+#endif
+
 namespace igl::tests::util::device {
 
 struct TestDeviceConfig {
@@ -44,7 +50,9 @@ struct TestDeviceConfig {
  */
 bool isBackendTypeSupported(BackendType backendType);
 
-#if IGL_OPENGL_SUPPORTED
+#if IGL_D3D12_SUPPORTED
+constexpr BackendType kDefaultBackendType = BackendType::D3D12;
+#elif IGL_OPENGL_SUPPORTED
 constexpr BackendType kDefaultBackendType = BackendType::OpenGL;
 #elif IGL_VULKAN_SUPPORTED
 constexpr BackendType kDefaultBackendType = BackendType::Vulkan;
diff --git a/src/igl/tests/util/device/d3d12/TestDevice.cpp b/src/igl/tests/util/device/d3d12/TestDevice.cpp
new file mode 100644
index 0000000000..7b1cb7124b
--- /dev/null
+++ b/src/igl/tests/util/device/d3d12/TestDevice.cpp
@@ -0,0 +1,28 @@
+/* Minimal D3D12 test device factory using a headless context. */
+
+#include <igl/IGL.h>
+#include <igl/Log.h>
+#include <igl/d3d12/D3D12Headers.h>
+#include <igl/d3d12/Device.h>
+#include <igl/d3d12/HeadlessContext.h>
+
+#include "TestDevice.h"
+
+namespace igl::tests::util::device::d3d12 {
+
+std::unique_ptr<igl::d3d12::Device> createTestDevice(bool enableDebugLayer) {
+  IGL_LOG_INFO("[Tests] D3D12 test device requested (debug layer: %s)\n",
+               enableDebugLayer ? "enabled" : "disabled");
+
+  // Enabling the debug layer happens inside D3D12Context::createDevice() when available.
+  // Build a headless context (no swapchain) suitable for unit tests.
+  auto ctx = std::make_unique<igl::d3d12::HeadlessD3D12Context>();
+  auto res = ctx->initializeHeadless(256, 256);
+  if (res.code != Result::Code::Ok) {
+    IGL_LOG_ERROR("[Tests] D3D12 headless context init failed: %s\n", res.message.c_str());
+    return nullptr;
+  }
+  return std::make_unique<igl::d3d12::Device>(std::move(ctx));
+}
+
+} // namespace igl::tests::util::device::d3d12
diff --git a/src/igl/tests/util/device/d3d12/TestDevice.h b/src/igl/tests/util/device/d3d12/TestDevice.h
new file mode 100644
index 0000000000..edc9b6ca09
--- /dev/null
+++ b/src/igl/tests/util/device/d3d12/TestDevice.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ *
+ * This source code is licensed under the MIT license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+#pragma once
+
+#include <memory>
+#include <igl/d3d12/Device.h>
+
+namespace igl::tests::util::device::d3d12 {
+
+/**
+ * Create and return an igl::d3d12::Device that is suitable for running tests against.
+ * This creates a headless device without a swapchain, suitable for unit testing.
+ */
+std::unique_ptr<igl::d3d12::Device> createTestDevice(bool enableDebugLayer = true);
+
+} // namespace igl::tests::util::device::d3d12
diff --git a/src/igl/vulkan/Device.cpp b/src/igl/vulkan/Device.cpp
index 7f26f23a36..7b00f9e6da 100644
--- a/src/igl/vulkan/Device.cpp
+++ b/src/igl/vulkan/Device.cpp
@@ -782,6 +782,27 @@ bool Device::getFeatureLimitsInternal(DeviceFeatureLimits featureLimits, size_t&
   case DeviceFeatureLimits::MaxBindBytesBytes:
     result = 0;
     return true;
+  case DeviceFeatureLimits::MaxTextureDimension3D:
+    result = limits.maxImageDimension3D;
+    return true;
+  case DeviceFeatureLimits::MaxComputeWorkGroupSizeX:
+    result = limits.maxComputeWorkGroupSize[0];
+    return true;
+  case DeviceFeatureLimits::MaxComputeWorkGroupSizeY:
+    result = limits.maxComputeWorkGroupSize[1];
+    return true;
+  case DeviceFeatureLimits::MaxComputeWorkGroupSizeZ:
+    result = limits.maxComputeWorkGroupSize[2];
+    return true;
+  case DeviceFeatureLimits::MaxComputeWorkGroupInvocations:
+    result = limits.maxComputeWorkGroupInvocations;
+    return true;
+  case DeviceFeatureLimits::MaxVertexInputAttributes:
+    result = limits.maxVertexInputAttributes;
+    return true;
+  case DeviceFeatureLimits::MaxColorAttachments:
+    result = limits.maxColorAttachments;
+    return true;
   }
 
   IGL_DEBUG_ABORT("DeviceFeatureLimits value not handled: %d", (int)featureLimits);
diff --git a/src/igl/vulkan/VulkanFeatures.cpp b/src/igl/vulkan/VulkanFeatures.cpp
index 8ba5909077..ec0ec644d1 100644
--- a/src/igl/vulkan/VulkanFeatures.cpp
+++ b/src/igl/vulkan/VulkanFeatures.cpp
@@ -124,12 +124,12 @@ VulkanFeatures::VulkanFeatures(VulkanContextConfig config) noexcept :
       .taskShader = VK_TRUE,
       .meshShader = VK_TRUE,
   }),
-  config(config) {
+  config_(config) {
   extensions_.resize(kNumberOfExtensionTypes);
   enabledExtensions_.resize(kNumberOfExtensionTypes);
 
   // All the above get assembled into a feature chain
-  assembleFeatureChain(config);
+  assembleFeatureChain(config_);
 }
 
 void VulkanFeatures::populateWithAvailablePhysicalDeviceFeatures(
@@ -185,7 +185,7 @@ Result VulkanFeatures::checkSelectedFeatures(
 
 #define ENABLE_FEATURE_1_1_EXT(requestedFeatureStruct, availableFeatureStruct, feature) \
   ENABLE_VULKAN_FEATURE(requestedFeatureStruct, availableFeatureStruct, feature, "1.1 EXT")
-  if (config.enableDescriptorIndexing) {
+  if (config_.enableDescriptorIndexing) {
     ENABLE_FEATURE_1_1_EXT(featuresDescriptorIndexing,
                            availableFeatures.featuresDescriptorIndexing,
                            shaderSampledImageArrayNonUniformIndexing)
@@ -250,7 +250,7 @@ Result VulkanFeatures::checkSelectedFeatures(
   return Result{};
 }
 
-void VulkanFeatures::assembleFeatureChain(const VulkanContextConfig& contextConfig) noexcept {
+void VulkanFeatures::assembleFeatureChain(const VulkanContextConfig& config) noexcept {
   // Versions 1.0 and 1.1 are always present
 
   // Reset all pNext pointers. We might be copying the chain from another VulkanFeatures object,
@@ -310,7 +310,7 @@ void VulkanFeatures::assembleFeatureChain(const VulkanContextConfig& contextConf
   if (hasExtension(VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME)) {
     ivkAddNext(&vkPhysicalDeviceFeatures2, &featuresUniformBufferStandardLayout);
   }
-  if (contextConfig.enableMultiviewPerViewViewports) {
+  if (config_.enableMultiviewPerViewViewports) {
     if (hasExtension(VK_QCOM_MULTIVIEW_PER_VIEW_VIEWPORTS_EXTENSION_NAME)) {
       ivkAddNext(&vkPhysicalDeviceFeatures2, &featuresMultiviewPerViewViewports);
     } else {
@@ -328,7 +328,7 @@ VulkanFeatures& VulkanFeatures::operator=(const VulkanFeatures& other) noexcept
   }
 
   const bool sameConfiguration =
-      config.enableDescriptorIndexing == other.config.enableDescriptorIndexing;
+      config_.enableDescriptorIndexing == other.config_.enableDescriptorIndexing;
   if (!sameConfiguration) {
     return *this;
   }
@@ -357,7 +357,7 @@ VulkanFeatures& VulkanFeatures::operator=(const VulkanFeatures& other) noexcept
   enabledExtensions_ = other.enabledExtensions_;
   extensionProps_ = other.extensionProps_;
 
-  assembleFeatureChain(config);
+  assembleFeatureChain(config_);
 
   return *this;
 }
@@ -422,7 +422,7 @@ bool VulkanFeatures::enable(const char* extensionName, ExtensionType extensionTy
   return false;
 }
 
-void VulkanFeatures::enableCommonInstanceExtensions(const VulkanContextConfig& contextConfig) {
+void VulkanFeatures::enableCommonInstanceExtensions(const VulkanContextConfig& config) {
   enable(VK_KHR_SURFACE_EXTENSION_NAME, ExtensionType::Instance);
   enable(VK_EXT_DEBUG_UTILS_EXTENSION_NAME, ExtensionType::Instance);
 #if IGL_PLATFORM_WINDOWS
@@ -443,7 +443,7 @@ void VulkanFeatures::enableCommonInstanceExtensions(const VulkanContextConfig& c
 #endif // IGL_PLATFORM_MACOSX
 
 #if !IGL_PLATFORM_ANDROID
-  if (contextConfig.enableValidation) {
+  if (config.enableValidation) {
     enable(VK_EXT_VALIDATION_FEATURES_EXTENSION_NAME, ExtensionType::Instance);
   }
 #endif // !IGL_PLATFORM_ANDROID
@@ -454,12 +454,12 @@ void VulkanFeatures::enableCommonInstanceExtensions(const VulkanContextConfig& c
   has_VK_EXT_headless_surface =
       enable(VK_EXT_HEADLESS_SURFACE_EXTENSION_NAME, ExtensionType::Instance);
 
-  if (contextConfig.headless) {
+  if (config.headless) {
     if (!has_VK_EXT_headless_surface) {
       IGL_LOG_ERROR("VK_EXT_headless_surface extension not supported\n");
     }
   }
-  if (contextConfig.swapChainColorSpace != igl::ColorSpace::SRGB_NONLINEAR) {
+  if (config.swapChainColorSpace != igl::ColorSpace::SRGB_NONLINEAR) {
     const bool enabledExtension =
         enable(VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME, ExtensionType::Instance);
     if (!enabledExtension) {
@@ -468,7 +468,7 @@ void VulkanFeatures::enableCommonInstanceExtensions(const VulkanContextConfig& c
   }
 }
 
-void VulkanFeatures::enableCommonDeviceExtensions(const VulkanContextConfig& contextConfig) {
+void VulkanFeatures::enableCommonDeviceExtensions(const VulkanContextConfig& config) {
   enable(VK_KHR_DRIVER_PROPERTIES_EXTENSION_NAME, ExtensionType::Device);
   enable(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, ExtensionType::Device);
   enable(VK_KHR_SWAPCHAIN_EXTENSION_NAME, ExtensionType::Device);
@@ -531,7 +531,7 @@ void VulkanFeatures::enableCommonDeviceExtensions(const VulkanContextConfig& con
   has_VK_EXT_fragment_density_map =
       enable(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME, ExtensionType::Device);
 
-  if (contextConfig.enableMultiviewPerViewViewports) {
+  if (config_.enableMultiviewPerViewViewports) {
     has_VK_QCOM_multiview_per_view_viewports =
         enable(VK_QCOM_MULTIVIEW_PER_VIEW_VIEWPORTS_EXTENSION_NAME, ExtensionType::Device);
     IGL_SOFT_ASSERT(has_VK_QCOM_multiview_per_view_viewports,
diff --git a/src/igl/vulkan/VulkanFeatures.h b/src/igl/vulkan/VulkanFeatures.h
index 6743c34bbf..5ee3629733 100644
--- a/src/igl/vulkan/VulkanFeatures.h
+++ b/src/igl/vulkan/VulkanFeatures.h
@@ -97,15 +97,15 @@ class VulkanFeatures final {
   /// @param extensionType The type of the extensions
   /// @param validationEnabled Flag that informs the class whether the Validation Layer is
   /// enabled or not.
-  void enableCommonInstanceExtensions(const VulkanContextConfig& contextConfig);
-  void enableCommonDeviceExtensions(const VulkanContextConfig& contextConfig);
+  void enableCommonInstanceExtensions(const VulkanContextConfig& config);
+  void enableCommonDeviceExtensions(const VulkanContextConfig& config);
 
  public:
   friend class Device;
   friend class VulkanContext;
 
   // A copy of the config used by the VulkanContext
-  VulkanContextConfig config{};
+  VulkanContextConfig config_{};
 
   // NOLINTBEGIN(readability-identifier-naming)
   bool has_VK_EXT_descriptor_indexing = false; // promoted to Vulkan 1.2
@@ -139,7 +139,7 @@ class VulkanFeatures final {
 
   /// @brief Assembles the feature chain for the VkPhysicalDeviceFeatures2 structure by connecting
   /// the existing/required feature structures and their pNext chain.
-  void assembleFeatureChain(const VulkanContextConfig& contextConfig) noexcept;
+  void assembleFeatureChain(const VulkanContextConfig& config) noexcept;
   bool hasExtension(const char* ext) const;
 
   /// @brief Enables the extension with name `extensionName` of the type `extensionType` if the
diff --git a/third-party/bootstrap-deps.json b/third-party/bootstrap-deps.json
index 194729043a..c9f3094552 100644
--- a/third-party/bootstrap-deps.json
+++ b/third-party/bootstrap-deps.json
@@ -111,19 +111,27 @@
         "revision": "v1.91.2"
     }
 },
-{
-    "name": "volk",
-    "source": {
-        "type": "git",
-        "url": "https://github.com/zeux/volk",
-        "revision": "1.4.304"
-    }
-},
-{
-    "name": "vma",
-    "source": {
-        "type": "git",
-        "url": "https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git",
+{
+    "name": "volk",
+    "source": {
+        "type": "git",
+        "url": "https://github.com/zeux/volk",
+        "revision": "1.4.304"
+    }
+},
+{
+    "name": "DirectX-Headers",
+    "source": {
+        "type": "git",
+        "url": "https://github.com/microsoft/DirectX-Headers.git",
+        "revision": "v1.614.0"
+    }
+},
+{
+    "name": "vma",
+    "source": {
+        "type": "git",
+        "url": "https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git",
         "revision": "v3.2.0"
     }
 },