From f0d8e4d1d41e48d8e5fc953b5642f19d4437728a Mon Sep 17 00:00:00 2001 From: reiter Date: Mon, 24 Nov 2025 16:54:33 +0100 Subject: [PATCH 1/2] Add check to free CudaBuffer and fix missing frees --- CMakeLists.txt | 2 +- cmake/generate_ptx.cmake | 1 + gpu/include/raygDiskGeometry.hpp | 4 ++- gpu/include/raygLineGeometry.hpp | 11 +++--- gpu/include/raygPerRayData.hpp | 5 ++- gpu/include/raygRNG.hpp | 43 ------------------------ gpu/include/raygReflection.hpp | 4 +-- gpu/include/raygTrace.hpp | 4 +++ gpu/include/raygTriangleGeometry.hpp | 3 +- gpu/pipelines/GeneralPipelineDisk.cu | 1 - gpu/pipelines/GeneralPipelineLine.cu | 1 - gpu/pipelines/GeneralPipelineTriangle.cu | 1 - include/viennaray/rayTraceKernel.hpp | 12 +++---- include/viennaray/rayUtil.hpp | 25 ++++++-------- 14 files changed, 39 insertions(+), 78 deletions(-) delete mode 100644 gpu/include/raygRNG.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 40db018..bf6347f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -92,7 +92,7 @@ include("cmake/cpm.cmake") CPMAddPackage( NAME ViennaCore - VERSION 1.6.3 + VERSION 1.7.0 GIT_REPOSITORY "https://github.com/ViennaTools/ViennaCore" OPTIONS "VIENNACORE_USE_GPU ${VIENNARAY_USE_GPU}") diff --git a/cmake/generate_ptx.cmake b/cmake/generate_ptx.cmake index b399551..e57af19 100644 --- a/cmake/generate_ptx.cmake +++ b/cmake/generate_ptx.cmake @@ -50,6 +50,7 @@ function(generate_kernel generated_files) cuda_include_directories(${ViennaCore_SOURCE_DIR}/include/viennacore) cuda_include_directories(${VIENNARAY_GPU_INCLUDE}) cuda_include_directories(${OptiX_INCLUDE_DIR}) + add_compile_definitions(VIENNACORE_COMPILE_GPU) cuda_compile_ptx(generated_ptx_files ${cu_source_files} ${cmake_options} ${options}) diff --git a/gpu/include/raygDiskGeometry.hpp b/gpu/include/raygDiskGeometry.hpp index 71d4e9b..363bb8b 100644 --- a/gpu/include/raygDiskGeometry.hpp +++ b/gpu/include/raygDiskGeometry.hpp @@ -164,7 +164,7 @@ template struct DiskGeometry { CudaBuffer compactedSizeBuffer; compactedSizeBuffer.alloc(sizeof(uint64_t)); - OptixAccelEmitDesc emitDesc; + OptixAccelEmitDesc emitDesc = {}; emitDesc.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE; emitDesc.result = compactedSizeBuffer.dPointer(); @@ -194,6 +194,8 @@ template struct DiskGeometry { outputBuffer.free(); // << the UNcompacted, temporary output buffer tempBuffer.free(); compactedSizeBuffer.free(); + d_aabbBuffer.free(); + d_aabbBoundaryBuffer.free(); launchParams.traversable = asHandle; } diff --git a/gpu/include/raygLineGeometry.hpp b/gpu/include/raygLineGeometry.hpp index 002bd7d..d9055e1 100644 --- a/gpu/include/raygLineGeometry.hpp +++ b/gpu/include/raygLineGeometry.hpp @@ -64,9 +64,9 @@ struct LineGeometry { } // Send AABB boxes to GPU - CudaBuffer d_aabbBuffer; - d_aabbBuffer.allocUpload(aabb); - CUdeviceptr d_aabb = d_aabbBuffer.dPointer(); + CudaBuffer aabbBuffer; + aabbBuffer.allocUpload(aabb); + CUdeviceptr d_aabb = aabbBuffer.dPointer(); // line inputs lineInput[0] = {}; @@ -141,7 +141,7 @@ struct LineGeometry { CudaBuffer compactedSizeBuffer; compactedSizeBuffer.alloc(sizeof(uint64_t)); - OptixAccelEmitDesc emitDesc; + OptixAccelEmitDesc emitDesc = {}; emitDesc.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE; emitDesc.result = compactedSizeBuffer.dPointer(); @@ -171,6 +171,8 @@ struct LineGeometry { outputBuffer.free(); // << the UNcompacted, temporary output buffer tempBuffer.free(); compactedSizeBuffer.free(); + aabbBuffer.free(); + d_aabbBoundaryBuffer.free(); launchParams.traversable = asHandle; } @@ -206,6 +208,7 @@ struct LineGeometry { void freeBuffers() { geometryNodesBuffer.free(); geometryLinesBuffer.free(); + geometryNormalsBuffer.free(); boundaryNodesBuffer.free(); boundaryLinesBuffer.free(); asBuffer.free(); diff --git a/gpu/include/raygPerRayData.hpp b/gpu/include/raygPerRayData.hpp index e27d9a7..9b25828 100644 --- a/gpu/include/raygPerRayData.hpp +++ b/gpu/include/raygPerRayData.hpp @@ -2,8 +2,7 @@ #include -#include "raygRNG.hpp" - +#include #include #include @@ -27,7 +26,7 @@ struct PerRayData { float load = 0.f; // RNG - RNGState RNGstate; + CudaRNG RNGstate; // Hit data unsigned int numBoundaryHits = 0; diff --git a/gpu/include/raygRNG.hpp b/gpu/include/raygRNG.hpp deleted file mode 100644 index 5c28637..0000000 --- a/gpu/include/raygRNG.hpp +++ /dev/null @@ -1,43 +0,0 @@ -#pragma once - -#include -#include - -namespace viennaray::gpu { - -typedef curandStatePhilox4_32_10_t RNGState; - -// Other possible RNGState types: -// typedef curandStateXORWOW_t curtRNGState; // bad -// typedef curandStateMRG32k3a_t curtRNGState // not tested -// typedef curandStateSobol32_t curtRNGState; // not tested -// typedef curandStateScrambledSobol32_t curtRNGState; // not tested - -#ifdef __CUDACC__ -template -static __device__ __inline__ unsigned int tea(unsigned int v0, - unsigned int v1) { - unsigned int s0 = 0; - - for (unsigned int n = 0; n < N; n++) { - s0 += 0x9e3779b9; - v0 += ((v1 << 4) + 0xa341316c) ^ (v1 + s0) ^ ((v1 >> 5) + 0xc8013ea4); - v1 += ((v0 << 4) + 0xad90777d) ^ (v0 + s0) ^ ((v0 >> 5) + 0x7e95761e); - } - - return v0; -} - -__device__ __inline__ float getNextRand(RNGState *state) { - return curand_uniform(state); -} - -__device__ __inline__ float getNormalDistRand(RNGState *state) { - float4 u0 = curand_uniform4(state); - float r = sqrtf(-2.f * logf(u0.x)); - float theta = 2.f * M_PIf * u0.y; - return r * sinf(theta); -} -#endif - -} // namespace viennaray::gpu diff --git a/gpu/include/raygReflection.hpp b/gpu/include/raygReflection.hpp index 277700e..43f30d8 100644 --- a/gpu/include/raygReflection.hpp +++ b/gpu/include/raygReflection.hpp @@ -6,9 +6,9 @@ #include #include "raygPerRayData.hpp" -#include "raygRNG.hpp" #include "raygSBTRecords.hpp" +#include #include namespace viennaray::gpu { @@ -64,7 +64,7 @@ specularReflection(PerRayData *prd, const Vec3Df &geoNormal) { prd->dir = prd->dir - (2 * DotProduct(prd->dir, geoNormal)) * geoNormal; } -static __device__ Vec3Df PickRandomPointOnUnitSphere(RNGState *state) { +static __device__ Vec3Df PickRandomPointOnUnitSphere(CudaRNG *state) { const float4 u = curand_uniform4(state); // (0,1] const float z = 1.0f - 2.0f * u.x; // uniform in [-1,1] const float r2 = fmaxf(0.0f, 1.0f - z * z); diff --git a/gpu/include/raygTrace.hpp b/gpu/include/raygTrace.hpp index b3866ad..59114fe 100644 --- a/gpu/include/raygTrace.hpp +++ b/gpu/include/raygTrace.hpp @@ -241,6 +241,10 @@ template class Trace { .print(); } cellDataBuffer_ = passedCellDataBuffer; +#ifndef NDEBUG + // In debug mode, we set the buffer as reference to avoid accidental frees + cellDataBuffer_.isRef = true; +#endif numCellData = numData; } diff --git a/gpu/include/raygTriangleGeometry.hpp b/gpu/include/raygTriangleGeometry.hpp index 8cc4a2a..3c0b3a4 100644 --- a/gpu/include/raygTriangleGeometry.hpp +++ b/gpu/include/raygTriangleGeometry.hpp @@ -145,7 +145,7 @@ struct TriangleGeometry { CudaBuffer compactedSizeBuffer; compactedSizeBuffer.alloc(sizeof(uint64_t)); - OptixAccelEmitDesc emitDesc; + OptixAccelEmitDesc emitDesc = {}; emitDesc.type = OPTIX_PROPERTY_TYPE_COMPACTED_SIZE; emitDesc.result = compactedSizeBuffer.dPointer(); @@ -269,6 +269,7 @@ struct TriangleGeometry { void freeBuffers() { geometryIndexBuffer.free(); geometryVertexBuffer.free(); + geometryNormalBuffer.free(); boundaryIndexBuffer.free(); boundaryVertexBuffer.free(); asBuffer.free(); diff --git a/gpu/pipelines/GeneralPipelineDisk.cu b/gpu/pipelines/GeneralPipelineDisk.cu index 027292f..8bc70c7 100644 --- a/gpu/pipelines/GeneralPipelineDisk.cu +++ b/gpu/pipelines/GeneralPipelineDisk.cu @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/gpu/pipelines/GeneralPipelineLine.cu b/gpu/pipelines/GeneralPipelineLine.cu index af56204..ecc45ea 100644 --- a/gpu/pipelines/GeneralPipelineLine.cu +++ b/gpu/pipelines/GeneralPipelineLine.cu @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/gpu/pipelines/GeneralPipelineTriangle.cu b/gpu/pipelines/GeneralPipelineTriangle.cu index 655b991..3fd166a 100644 --- a/gpu/pipelines/GeneralPipelineTriangle.cu +++ b/gpu/pipelines/GeneralPipelineTriangle.cu @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include diff --git a/include/viennaray/rayTraceKernel.hpp b/include/viennaray/rayTraceKernel.hpp index 73c8643..eee350b 100644 --- a/include/viennaray/rayTraceKernel.hpp +++ b/include/viennaray/rayTraceKernel.hpp @@ -222,10 +222,9 @@ template class TraceKernel { // Calculate point of impact const auto &ray = rayHit.ray; - const auto hitPoint = - Vec3D{ray.org_x + ray.dir_x * ray.tfar, - ray.org_y + ray.dir_y * ray.tfar, - ray.org_z + ray.dir_z * ray.tfar}; + const auto hitPoint = Vec3Df{ray.org_x + ray.dir_x * ray.tfar, + ray.org_y + ray.dir_y * ray.tfar, + ray.org_z + ray.dir_z * ray.tfar}; const auto rayDir = Vec3D{ray.dir_x, ray.dir_y, ray.dir_z}; @@ -239,6 +238,7 @@ template class TraceKernel { if (hitFromBack) { // if hitFromBack == true, then the ray hits the back of a disk // the second time. In this case we discard the ray. + ++raysTerminated; break; } hitFromBack = true; @@ -263,8 +263,7 @@ template class TraceKernel { // origins of hit disks { // distance on first disk hit const auto &disk = geometry_.getPrimRef(rayHit.hit.primID); - const auto &diskOrigin = - *reinterpret_cast const *>(&disk); + const auto &diskOrigin = *reinterpret_cast(&disk); impactDistances.push_back( Distance(hitPoint, diskOrigin) + 1e-6f); // add eps to avoid division by 0 @@ -323,6 +322,7 @@ template class TraceKernel { } if (++numReflections > config_.maxReflections) { // terminate ray if too many reflections + ++raysTerminated; break; } reflect = rejectionControl(rayWeight, initialRayWeight, rngState); diff --git a/include/viennaray/rayUtil.hpp b/include/viennaray/rayUtil.hpp index a4defcd..0184156 100644 --- a/include/viennaray/rayUtil.hpp +++ b/include/viennaray/rayUtil.hpp @@ -267,14 +267,15 @@ template [[nodiscard]] static Vec3D pickRandomPointOnUnitSphere(RNG &rngState) { static thread_local std::uniform_real_distribution uniDist( - NumericType(0), NumericType(1)); - NumericType x, y, z, x2py2; + NumericType(-1), NumericType(1)); + NumericType x, y, z; + double x2py2; do { - x = 2 * uniDist(rngState) - 1.; - y = 2 * uniDist(rngState) - 1.; + x = uniDist(rngState); + y = uniDist(rngState); x2py2 = x * x + y * y; } while (x2py2 >= 1.); - NumericType tmp = 2 * std::sqrt(1. - x2py2); + double tmp = 2. * std::sqrt(1. - x2py2); x *= tmp; y *= tmp; z = 1. - 2 * x2py2; @@ -297,9 +298,7 @@ template return B; } const T invLen = T(1) / std::sqrt(len2); - u[0] *= invLen; - u[1] *= invLen; - u[2] *= invLen; + u = u * invLen; B[0] = u; // 2) choose a helper vector not collinear with u @@ -313,14 +312,12 @@ template h = Vec3D{T(0), -u[2], u[1]}; } - // 3) v = normalized(h) - auto v = h; - Normalize(v); - B[1] = v; + // 3) normalize h + Normalize(h); + B[1] = h; // 4) w = u × v (already unit-length up to tiny FP error) - auto w = CrossProduct(u, v); - B[2] = w; + B[2] = CrossProduct(u, h); return B; } From 16c0b3b6227c8a3f9fcae2096cbde3ed9884593e Mon Sep 17 00:00:00 2001 From: reiter Date: Mon, 24 Nov 2025 16:56:03 +0100 Subject: [PATCH 2/2] Bump version --- CMakeLists.txt | 2 +- README.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index bf6347f..43fecfe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.20 FATAL_ERROR) project( ViennaRay LANGUAGES CXX - VERSION 3.8.0) + VERSION 3.8.1) # -------------------------------------------------------------------------------------------------------- # Library switches diff --git a/README.md b/README.md index 74ebd5d..c1afc10 100644 --- a/README.md +++ b/README.md @@ -63,7 +63,7 @@ We recommend using [CPM.cmake](https://github.com/cpm-cmake/CPM.cmake) to consum * Installation with CPM ```cmake - CPMAddPackage("gh:viennatools/viennaray@3.8.0") # Use the latest release version + CPMAddPackage("gh:viennatools/viennaray@3.8.1") # Use the latest release version ``` * With a local installation