From 6e87f1ee86fc6647683c40c9ff423da5e5a5d216 Mon Sep 17 00:00:00 2001 From: Xaver Riedel Date: Sun, 30 Nov 2025 12:41:43 +0100 Subject: [PATCH 1/2] Shader Execution Reordering --- gpu/pipelines/GeneralPipelineDisk.cu | 38 ++++++++++++++++-------- gpu/pipelines/GeneralPipelineLine.cu | 38 ++++++++++++++++-------- gpu/pipelines/GeneralPipelineTriangle.cu | 36 ++++++++++++++-------- 3 files changed, 74 insertions(+), 38 deletions(-) diff --git a/gpu/pipelines/GeneralPipelineDisk.cu b/gpu/pipelines/GeneralPipelineDisk.cu index 8bc70c7..f5a3d1e 100644 --- a/gpu/pipelines/GeneralPipelineDisk.cu +++ b/gpu/pipelines/GeneralPipelineDisk.cu @@ -172,21 +172,33 @@ extern "C" __global__ void __raygen__() { // the values we store the PRD pointer in: uint32_t u0, u1; packPointer((void *)&prd, u0, u1); + unsigned int hintBitLength = 2; while (continueRay(launchParams, prd)) { - optixTrace(launchParams.traversable, // traversable GAS - make_float3(prd.pos[0], prd.pos[1], prd.pos[2]), // origin - make_float3(prd.dir[0], prd.dir[1], prd.dir[2]), // direction - 1e-4f, // tmin - 1e20f, // tmax - 0.0f, // rayTime - OptixVisibilityMask(255), - OPTIX_RAY_FLAG_DISABLE_ANYHIT, // OPTIX_RAY_FLAG_NONE, - 0, // SBT offset - 1, // SBT stride - 0, // missSBTIndex - u0, u1); // Payload - prd.totalCount = 0; // Reset PerRayData + optixTraverse(launchParams.traversable, // traversable GAS + make_float3(prd.pos[0], prd.pos[1], prd.pos[2]), // origin + make_float3(prd.dir[0], prd.dir[1], prd.dir[2]), // direction + 1e-4f, // tmin + 1e20f, // tmax + 0.0f, // rayTime + OptixVisibilityMask(255), + OPTIX_RAY_FLAG_DISABLE_ANYHIT, // OPTIX_RAY_FLAG_NONE, + 0, // SBT offset + 1, // SBT stride + 0, // missSBTIndex + u0, u1); // Payload + unsigned int hint = 0; + if (prd.rayWeight < launchParams.rayWeightThreshold || prd.energy < 0.f) { + hint |= (1 << 0); + } + if (optixHitObjectIsHit()) { + const HitSBTDataDisk *hitData = reinterpret_cast( + optixHitObjectGetSbtDataPointer()); + hint |= hitData->base.isBoundary << 1; + } + optixReorder(hint, hintBitLength); + optixInvoke(u0, u1); + prd.totalCount = 0; // Reset PerRayData prd.numReflections++; } } diff --git a/gpu/pipelines/GeneralPipelineLine.cu b/gpu/pipelines/GeneralPipelineLine.cu index ecc45ea..ddb97db 100644 --- a/gpu/pipelines/GeneralPipelineLine.cu +++ b/gpu/pipelines/GeneralPipelineLine.cu @@ -114,21 +114,33 @@ extern "C" __global__ void __raygen__() { // the values we store the PRD pointer in: uint32_t u0, u1; packPointer((void *)&prd, u0, u1); + unsigned int hintBitLength = 2; while (continueRay(launchParams, prd)) { - optixTrace(launchParams.traversable, // traversable GAS - make_float3(prd.pos[0], prd.pos[1], prd.pos[2]), // origin - make_float3(prd.dir[0], prd.dir[1], prd.dir[2]), // direction - 1e-4f, // tmin - 1e20f, // tmax - 0.0f, // rayTime - OptixVisibilityMask(255), - OPTIX_RAY_FLAG_DISABLE_ANYHIT, // OPTIX_RAY_FLAG_NONE, - 0, // SBT offset - 1, // SBT stride - 0, // missSBTIndex - u0, u1); // Payload - prd.totalCount = 0; // Reset PerRayData + optixTraverse(launchParams.traversable, // traversable GAS + make_float3(prd.pos[0], prd.pos[1], prd.pos[2]), // origin + make_float3(prd.dir[0], prd.dir[1], prd.dir[2]), // direction + 1e-4f, // tmin + 1e20f, // tmax + 0.0f, // rayTime + OptixVisibilityMask(255), + OPTIX_RAY_FLAG_DISABLE_ANYHIT, // OPTIX_RAY_FLAG_NONE, + 0, // SBT offset + 1, // SBT stride + 0, // missSBTIndex + u0, u1); // Payload + unsigned int hint = 0; + if (prd.rayWeight < launchParams.rayWeightThreshold || prd.energy < 0.f) { + hint |= (1 << 0); + } + if (optixHitObjectIsHit()) { + const HitSBTDataLine *hitData = reinterpret_cast( + optixHitObjectGetSbtDataPointer()); + hint |= hitData->base.isBoundary << 1; + } + optixReorder(hint, hintBitLength); + optixInvoke(u0, u1); + prd.totalCount = 0; // Reset PerRayData prd.numReflections++; } } diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 3fd166a..172c3e9 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -84,20 +84,32 @@ extern "C" __global__ void __raygen__() { // the values we store the PRD pointer in: uint32_t u0, u1; packPointer((void *)&prd, u0, u1); + unsigned int hintBitLength = 2; while (continueRay(launchParams, prd)) { - optixTrace(launchParams.traversable, // traversable GAS - make_float3(prd.pos[0], prd.pos[1], prd.pos[2]), // origin - make_float3(prd.dir[0], prd.dir[1], prd.dir[2]), // direction - 1e-4f, // tmin - 1e20f, // tmax - 0.0f, // rayTime - OptixVisibilityMask(255), - OPTIX_RAY_FLAG_DISABLE_ANYHIT, // OPTIX_RAY_FLAG_NONE, - 0, // SBT offset - 1, // SBT stride - 0, // missSBTIndex - u0, u1); // Payload + optixTraverse(launchParams.traversable, // traversable GAS + make_float3(prd.pos[0], prd.pos[1], prd.pos[2]), // origin + make_float3(prd.dir[0], prd.dir[1], prd.dir[2]), // direction + 1e-4f, // tmin + 1e20f, // tmax + 0.0f, // rayTime + OptixVisibilityMask(255), + OPTIX_RAY_FLAG_DISABLE_ANYHIT, // OPTIX_RAY_FLAG_NONE, + 0, // SBT offset + 1, // SBT stride + 0, // missSBTIndex + u0, u1); // Payload + unsigned int hint = 0; + if (prd.rayWeight < launchParams.rayWeightThreshold || prd.energy < 0.f) { + hint |= (1 << 0); + } + if (optixHitObjectIsHit()) { + const HitSBTDataDisk *hitData = reinterpret_cast( + optixHitObjectGetSbtDataPointer()); + hint |= hitData->base.isBoundary << 1; + } + optixReorder(hint, hintBitLength); + optixInvoke(u0, u1); prd.numReflections++; #ifdef COUNT_RAYS int *counter = reinterpret_cast(launchParams.customData); From 2f466927ac8222936340a1958e28fb8c006b6078 Mon Sep 17 00:00:00 2001 From: Xaver Riedel Date: Sun, 30 Nov 2025 13:33:35 +0100 Subject: [PATCH 2/2] fix sbt pointer type --- gpu/pipelines/GeneralPipelineTriangle.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 172c3e9..7e293ff 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -104,7 +104,7 @@ extern "C" __global__ void __raygen__() { hint |= (1 << 0); } if (optixHitObjectIsHit()) { - const HitSBTDataDisk *hitData = reinterpret_cast( + const HitSBTDataTriangle *hitData = reinterpret_cast( optixHitObjectGetSbtDataPointer()); hint |= hitData->base.isBoundary << 1; }