diff --git a/CMakeLists.txt b/CMakeLists.txt index 81c5904..d7c6ffb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.20 FATAL_ERROR) project( ViennaRay LANGUAGES CXX - VERSION 3.8.5) + VERSION 3.9.1) # -------------------------------------------------------------------------------------------------------- # Library switches @@ -98,7 +98,7 @@ include("cmake/cpm.cmake") CPMAddPackage( NAME ViennaCore - VERSION 1.7.4 + VERSION 1.8.0 GIT_REPOSITORY "https://github.com/ViennaTools/ViennaCore" OPTIONS "VIENNACORE_USE_GPU ${VIENNARAY_USE_GPU}") diff --git a/cmake/cpm.cmake b/cmake/cpm.cmake index 1d1775d..5488a28 100644 --- a/cmake/cpm.cmake +++ b/cmake/cpm.cmake @@ -2,8 +2,8 @@ # # SPDX-FileCopyrightText: Copyright (c) 2019-2023 Lars Melchior and contributors -set(CPM_DOWNLOAD_VERSION 0.40.5) -set(CPM_HASH_SUM "c46b876ae3b9f994b4f05a4c15553e0485636862064f1fcc9d8b4f832086bc5d") +set(CPM_DOWNLOAD_VERSION 0.42.0) +set(CPM_HASH_SUM "2020b4fc42dba44817983e06342e682ecfc3d2f484a581f11cc5731fbe4dce8a") if(CPM_SOURCE_CACHE) set(CPM_DOWNLOAD_LOCATION "${CPM_SOURCE_CACHE}/cpm/CPM_${CPM_DOWNLOAD_VERSION}.cmake") diff --git a/gpu/include/raygDiskGeometry.hpp b/gpu/include/raygDiskGeometry.hpp index ee9e9ce..2512c97 100644 --- a/gpu/include/raygDiskGeometry.hpp +++ b/gpu/include/raygDiskGeometry.hpp @@ -11,7 +11,7 @@ namespace viennaray::gpu { using namespace viennacore; -template struct DiskGeometry { +struct DiskGeometry { // geometry CudaBuffer geometryPointBuffer; CudaBuffer geometryNormalBuffer; @@ -26,6 +26,7 @@ template struct DiskGeometry { CudaBuffer asBuffer; /// build acceleration structure from triangle mesh + template void buildAccel(DeviceContext &context, const DiskMesh &mesh, LaunchParams &launchParams, const bool ignoreBoundary, float sourceOffset) { @@ -99,7 +100,7 @@ template struct DiskGeometry { diskInput[0].customPrimitiveArray.sbtIndexOffsetStrideInBytes = 0; // ------------------------- boundary input ------------------------- - auto boundaryMesh = makeBoundary(mesh); + auto boundaryMesh = makeBoundary(mesh); // upload the model to the device: the builder boundaryPointBuffer.allocUpload(boundaryMesh.nodes); boundaryNormalBuffer.allocUpload(boundaryMesh.normals); @@ -194,7 +195,7 @@ template struct DiskGeometry { launchParams.traversable = asHandle; } - DiskMesh makeBoundary(const DiskMesh &passedMesh) { + template DiskMesh makeBoundary(const DiskMesh &passedMesh) { DiskMesh boundaryMesh; Vec3Df bbMin = passedMesh.minimumExtent; diff --git a/gpu/include/raygTrace.hpp b/gpu/include/raygTrace.hpp index 2425011..93b321c 100644 --- a/gpu/include/raygTrace.hpp +++ b/gpu/include/raygTrace.hpp @@ -52,7 +52,7 @@ template class Trace { } void setPipelineFileName(const std::string &fileName) { - pipelineFileName = fileName; + pipelineFileName_ = fileName; } void setCallables(std::string fileName, const std::filesystem::path &path) { @@ -85,49 +85,50 @@ template class Trace { } if (cellDataBuffer_.sizeInBytes / sizeof(float) != - numCellData * launchParams.numElements) { + numCellData_ * launchParams_.numElements) { VIENNACORE_LOG_ERROR( "Cell data buffer size does not match the expected size."); } // Resize our cuda result buffer - resultBuffer.allocInit(launchParams.numElements * numFluxes_, - ResultType(0)); - launchParams.resultBuffer = (ResultType *)resultBuffer.dPointer(); + resultBuffer_.allocInit(launchParams_.numElements * numFluxes_, + ResultType(0)); + launchParams_.resultBuffer = (ResultType *)resultBuffer_.dPointer(); if (materialIdsBuffer_.sizeInBytes != 0) { - launchParams.materialIds = (int *)materialIdsBuffer_.dPointer(); + launchParams_.materialIds = (int *)materialIdsBuffer_.dPointer(); } - launchParams.seed = config_.rngSeed + config_.runNumber++; + launchParams_.seed = config_.rngSeed + config_.runNumber++; if (config_.useRandomSeed) { std::random_device rd; std::uniform_int_distribution gen; - launchParams.seed = gen(rd); + launchParams_.seed = gen(rd); } // Threshold value for neighbor detection in disk-based geometries - launchParams.tThreshold = 1.1 * gridDelta_; // TODO: find the best value + assert(gridDelta_ > 0.0f); + launchParams_.tThreshold = 1.1 * gridDelta_; // TODO: find the best value - launchParams.maxReflections = config_.maxReflections; - launchParams.maxBoundaryHits = config_.maxBoundaryHits; + launchParams_.maxReflections = config_.maxReflections; + launchParams_.maxBoundaryHits = config_.maxBoundaryHits; int numPointsPerDim = static_cast( - std::sqrt(static_cast(launchParams.numElements))); + std::sqrt(static_cast(launchParams_.numElements))); if (config_.numRaysFixed > 0) { numPointsPerDim = 1; config_.numRaysPerPoint = config_.numRaysFixed; } - numRays = numPointsPerDim * numPointsPerDim * config_.numRaysPerPoint; - if (numRays > (1 << 29)) { + numRays_ = numPointsPerDim * numPointsPerDim * config_.numRaysPerPoint; + if (numRays_ > (1 << 29)) { VIENNACORE_LOG_WARNING("Too many rays for single launch: " + - util::prettyDouble(numRays)); + util::prettyDouble(numRays_)); config_.numRaysPerPoint = (1 << 29) / (numPointsPerDim * numPointsPerDim); - numRays = numPointsPerDim * numPointsPerDim * config_.numRaysPerPoint; + numRays_ = numPointsPerDim * numPointsPerDim * config_.numRaysPerPoint; } - VIENNACORE_LOG_DEBUG("Number of rays: " + util::prettyDouble(numRays)); + VIENNACORE_LOG_DEBUG("Number of rays: " + util::prettyDouble(numRays_)); // set up material specific sticking probabilities materialStickingBuffer_.resize(particles_.size()); @@ -156,7 +157,7 @@ template class Trace { // Every particle gets its own stream and launch parameters std::vector streams(particles_.size()); - launchParamsBuffers.resize(particles_.size()); + launchParamsBuffers_.resize(particles_.size()); if (particleMap_.empty()) { VIENNACORE_LOG_ERROR("No particle name->particleType mapping provided."); @@ -167,14 +168,14 @@ template class Trace { if (it == particleMap_.end()) { VIENNACORE_LOG_ERROR("Unknown particle name: " + particles_[i].name); } - launchParams.particleType = it->second; - launchParams.particleIdx = static_cast(i); - launchParams.cosineExponent = + launchParams_.particleType = it->second; + launchParams_.particleIdx = static_cast(i); + launchParams_.cosineExponent = static_cast(particles_[i].cosineExponent); - launchParams.sticking = static_cast(particles_[i].sticking); + launchParams_.sticking = static_cast(particles_[i].sticking); if (!particles_[i].materialSticking.empty()) { assert(materialStickingBuffer_[i].sizeInBytes != 0); - launchParams.materialSticking = + launchParams_.materialSticking = (float *)materialStickingBuffer_[i].dPointer(); } @@ -182,12 +183,12 @@ template class Trace { Vec3Df direction{static_cast(particles_[i].direction[0]), static_cast(particles_[i].direction[1]), static_cast(particles_[i].direction[2])}; - launchParams.source.directionBasis = + launchParams_.source.directionBasis = rayInternal::getOrthonormalBasis(direction); - launchParams.source.customDirectionBasis = true; + launchParams_.source.customDirectionBasis = true; } - launchParamsBuffers[i].allocUploadSingle(launchParams); + launchParamsBuffers_[i].allocUploadSingle(launchParams_); CUDA_CHECK(StreamCreate(&streams[i])); } @@ -196,23 +197,23 @@ template class Trace { #ifndef NDEBUG // Launch on single stream in debug mode for (size_t i = 0; i < particles_.size(); i++) { - OPTIX_CHECK(optixLaunch(pipeline_, streams[0], - /*! parameters and SBT */ - launchParamsBuffers[i].dPointer(), - launchParamsBuffers[i].sizeInBytes, &sbt, - /*! dimensions of the launch: */ - config_.numRaysPerPoint, numPointsPerDim, - numPointsPerDim)); + OPTIX_CHECK(optixLaunch( + pipeline_, streams[0], + /*! parameters and SBT */ + launchParamsBuffers_[i].dPointer(), + launchParamsBuffers_[i].sizeInBytes, &shaderBindingTable_, + /*! dimensions of the launch: */ + config_.numRaysPerPoint, numPointsPerDim, numPointsPerDim)); } #else // Launch on multiple streams in release mode for (size_t i = 0; i < particles_.size(); i++) { - OPTIX_CHECK(optixLaunch(pipeline_, streams[i], - /*! parameters and SBT */ - launchParamsBuffers[i].dPointer(), - launchParamsBuffers[i].sizeInBytes, &sbt, - /*! dimensions of the launch: */ - config_.numRaysPerPoint, numPointsPerDim, - numPointsPerDim)); + OPTIX_CHECK(optixLaunch( + pipeline_, streams[i], + /*! parameters and SBT */ + launchParamsBuffers_[i].dPointer(), + launchParamsBuffers_[i].sizeInBytes, &shaderBindingTable_, + /*! dimensions of the launch: */ + config_.numRaysPerPoint, numPointsPerDim, numPointsPerDim)); } #endif @@ -222,13 +223,13 @@ template class Trace { CUDA_CHECK(StreamDestroy(s)); } - resultsDownloaded = false; + resultsDownloaded_ = false; } void setElementData(const CudaBuffer &passedCellDataBuffer, const unsigned numData) { if (passedCellDataBuffer.sizeInBytes / sizeof(float) / numData != - launchParams.numElements) { + launchParams_.numElements) { VIENNACORE_LOG_WARNING( "Passed cell data does not match number of elements."); } @@ -237,13 +238,13 @@ template class Trace { // In debug mode, we set the buffer as reference to avoid accidental frees cellDataBuffer_.isRef = true; #endif - numCellData = numData; + numCellData_ = numData; } template void setMaterialIds(const std::vector &materialIds, const bool mapToConsecutive = true) { - assert(materialIds.size() == launchParams.numElements); + assert(materialIds.size() == launchParams_.numElements); if (mapToConsecutive) { uniqueMaterialIds_.clear(); @@ -257,15 +258,15 @@ template class Trace { } assert(currentId == materialIdMap.size()); - std::vector materialIdsMapped(launchParams.numElements); + std::vector materialIdsMapped(launchParams_.numElements); #pragma omp parallel for - for (size_t i = 0; i < launchParams.numElements; i++) { + for (size_t i = 0; i < launchParams_.numElements; i++) { materialIdsMapped[i] = materialIdMap[materialIds[i]]; } materialIdsBuffer_.allocUpload(materialIdsMapped); } else { - std::vector materialIdsMapped(launchParams.numElements); - for (size_t i = 0; i < launchParams.numElements; i++) { + std::vector materialIdsMapped(launchParams_.numElements); + for (size_t i = 0; i < launchParams_.numElements; i++) { materialIdsMapped[i] = static_cast(materialIds[i]); } materialIdsBuffer_.allocUpload(materialIdsMapped); @@ -302,47 +303,47 @@ template class Trace { callableMap_ = std::get<1>(maps); } - size_t getNumberOfRays() const { return numRays; } + size_t getNumberOfRays() const { return numRays_; } std::vector getFlux(int particleIdx, int dataIdx, int smoothingNeighbors = 0) { - if (!resultsDownloaded) { - results.resize(launchParams.numElements * numFluxes_); - resultBuffer.download(results.data(), - launchParams.numElements * numFluxes_); - resultsDownloaded = true; + if (!resultsDownloaded_) { + results_.resize(launchParams_.numElements * numFluxes_); + resultBuffer_.download(results_.data(), + launchParams_.numElements * numFluxes_); + resultsDownloaded_ = true; } - std::vector flux(launchParams.numElements); + std::vector flux(launchParams_.numElements); unsigned int offset = 0; for (size_t i = 0; i < particles_.size(); i++) { if (particleIdx > i) offset += particles_[i].dataLabels.size(); } - offset = (offset + dataIdx) * launchParams.numElements; - std::memcpy(flux.data(), results.data() + offset, - launchParams.numElements * sizeof(ResultType)); + offset = (offset + dataIdx) * launchParams_.numElements; + std::memcpy(flux.data(), results_.data() + offset, + launchParams_.numElements * sizeof(ResultType)); if (smoothingNeighbors > 0) smoothFlux(flux, smoothingNeighbors); return flux; } - void setUseCellData(unsigned numData) { numCellData = numData; } + void setUseCellData(unsigned numData) { numCellData_ = numData; } void setPeriodicBoundary(const bool periodic) { - launchParams.periodicBoundary = periodic; + launchParams_.periodicBoundary = periodic; } - void setIgnoreBoundary(const bool ignore) { ignoreBoundary = ignore; } + void setIgnoreBoundary(const bool ignore) { ignoreBoundary_ = ignore; } void freeBuffers() { - resultBuffer.free(); - hitgroupRecordBuffer.free(); - missRecordBuffer.free(); - raygenRecordBuffer.free(); - directCallableRecordBuffer.free(); + resultBuffer_.free(); + hitgroupRecordBuffer_.free(); + missRecordBuffer_.free(); + raygenRecordBuffer_.free(); + directCallableRecordBuffer_.free(); dataPerParticleBuffer_.free(); - for (auto &buffer : launchParamsBuffers) { + for (auto &buffer : launchParamsBuffers_) { buffer.free(); } materialIdsBuffer_.free(); @@ -364,24 +365,24 @@ template class Trace { optixModuleDestroy(moduleCallable_); moduleCallable_ = nullptr; } - if (raygenPG) { - optixProgramGroupDestroy(raygenPG); - raygenPG = nullptr; + if (raygenPG_) { + optixProgramGroupDestroy(raygenPG_); + raygenPG_ = nullptr; } - if (missPG) { - optixProgramGroupDestroy(missPG); - missPG = nullptr; + if (missPG_) { + optixProgramGroupDestroy(missPG_); + missPG_ = nullptr; } - if (hitgroupPG) { - optixProgramGroupDestroy(hitgroupPG); - hitgroupPG = nullptr; + if (hitgroupPG_) { + optixProgramGroupDestroy(hitgroupPG_); + hitgroupPG_ = nullptr; } - for (auto &pg : directCallablePGs) { + for (auto &pg : directCallablePGs_) { if (pg) { optixProgramGroupDestroy(pg); } } - directCallablePGs.clear(); + directCallablePGs_.clear(); } unsigned int prepareParticlePrograms() { @@ -404,7 +405,7 @@ template class Trace { numFluxes_ += p.dataLabels.size(); } dataPerParticleBuffer_.allocUpload(dataPerParticle); - launchParams.dataPerParticle = + launchParams_.dataPerParticle = (unsigned int *)dataPerParticleBuffer_.dPointer(); VIENNACORE_LOG_DEBUG("Number of flux arrays: " + std::to_string(numFluxes_)); @@ -412,35 +413,48 @@ template class Trace { return numFluxes_; } - CudaBuffer &getData() { return cellDataBuffer_; } + [[nodiscard]] CudaBuffer &getData() { return cellDataBuffer_; } - CudaBuffer &getResults() { return resultBuffer; } + [[nodiscard]] CudaBuffer &getResultBuffer() { return resultBuffer_; } - std::vector> &getParticles() { return particles_; } + [[nodiscard]] std::vector> getResults() { + downloadResults(); + std::vector> resultArrays; + resultArrays.resize(numFluxes_); + for (unsigned int i = 0; i < numFluxes_; ++i) { + resultArrays[i].resize(launchParams_.numElements); + std::memcpy(resultArrays[i].data(), + results_.data() + i * launchParams_.numElements, + launchParams_.numElements * sizeof(ResultType)); + } + return resultArrays; + } + + [[nodiscard]] std::vector> &getParticles() { return particles_; } [[nodiscard]] unsigned int getNumberOfRates() const { return numFluxes_; } [[nodiscard]] unsigned int getNumberOfElements() const { - return launchParams.numElements; + return launchParams_.numElements; } void setParameters(CUdeviceptr d_params) { - launchParams.customData = (void *)d_params; + launchParams_.customData = (void *)d_params; } void downloadResults() { - if (!resultsDownloaded) { - results.resize(launchParams.numElements * numFluxes_); - resultBuffer.download(results.data(), - launchParams.numElements * numFluxes_); - resultsDownloaded = true; + if (!resultsDownloaded_) { + results_.resize(launchParams_.numElements * numFluxes_); + resultBuffer_.download(results_.data(), + launchParams_.numElements * numFluxes_); + resultsDownloaded_ = true; } } + // To be implemented by derived classes virtual void smoothFlux(std::vector &flux, int smoothingNeighbors) {} - // To be implemented by derived classes virtual void normalizeResults() = 0; protected: @@ -448,11 +462,49 @@ template class Trace { private: void initRayTracer() { - launchParams.D = D; - context_->addModule(normModuleName); - normKernelName.append(geometryType_); - // launchParamsBuffer.alloc(sizeof(launchParams)); - // normKernelName.push_back(NumericType); + launchParams_.D = D; + context_->addModule(normModuleName_); + normKernelName_.append(geometryType_); + } + + void createProgramGroup(const OptixProgramGroupDesc *pgDesc, + const OptixProgramGroupOptions *pgOptions, + OptixProgramGroup *prog) { +#ifdef VIENNACORE_CUDA_LOG_DEBUG + char log[2048]; + size_t sizeof_log = sizeof(log); + auto result = optixProgramGroupCreate(context_->optix, pgDesc, 1, pgOptions, + log, &sizeof_log, prog); + if (sizeof_log > 1) { + size_t len = std::min(sizeof_log, sizeof(log) - 1); + log[len] = '\0'; + std::cerr << "Program group log:\n" << log << std::endl; + } + OPTIX_CHECK_RESULT(result); +#else + OPTIX_CHECK(optixProgramGroupCreate(context_->optix, pgDesc, 1, pgOptions, + NULL, NULL, prog)); +#endif + } + + void createModule(const char *input, size_t inputSize, OptixModule *module) { +#ifdef VIENNACORE_CUDA_LOG_DEBUG + char log[8192]; + size_t sizeof_log = sizeof(log); + auto res = optixModuleCreate(context_->optix, &moduleCompileOptions_, + &pipelineCompileOptions_, input, inputSize, + log, &sizeof_log, module); + if (sizeof_log > 1) { + size_t len = std::min(sizeof_log, sizeof(log) - 1); + log[len] = '\0'; + std::cerr << "Module log:\n" << log << std::endl; + } + OPTIX_CHECK_RESULT(res); +#else + OPTIX_CHECK(optixModuleCreate(context_->optix, &moduleCompileOptions_, + &pipelineCompileOptions_, input, inputSize, + NULL, NULL, module)); +#endif } /// Creates the modules that contain all the programs we are going to use. @@ -476,15 +528,10 @@ template class Trace { pipelineCompileOptions_.numAttributeValues = 0; pipelineCompileOptions_.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE; pipelineCompileOptions_.pipelineLaunchParamsVariableName = - globalParamsName.c_str(); - - pipelineLinkOptions_.maxTraceDepth = 1; + globalParamsName_.c_str(); size_t inputSize = 0; - - char log[2048]; - size_t sizeof_log = sizeof(log); - std::string pipelineFile = pipelineFileName + geometryType_ + ".optixir"; + std::string pipelineFile = pipelineFileName_ + geometryType_ + ".optixir"; std::filesystem::path pipelinePath = context_->modulePath / pipelineFile; if (!std::filesystem::exists(pipelinePath)) { VIENNACORE_LOG_ERROR("Pipeline file " + pipelinePath.string() + @@ -497,14 +544,7 @@ template class Trace { " not found."); } - OPTIX_CHECK(optixModuleCreate(context_->optix, &moduleCompileOptions_, - &pipelineCompileOptions_, pipelineInput, - inputSize, log, &sizeof_log, &module_)); - // if (sizeof_log > 1) - // PRINT(log); - - char logCallable[2048]; - size_t sizeof_log_callable = sizeof(logCallable); + createModule(pipelineInput, inputSize, &module_); if (callableFile_.empty()) { VIENNACORE_LOG_WARNING("No callable file set."); @@ -516,13 +556,7 @@ template class Trace { " not found."); } - OPTIX_CHECK(optixModuleCreate(context_->optix, &moduleCompileOptions_, - &pipelineCompileOptions_, callableInput, - inputSize, logCallable, &sizeof_log_callable, - &moduleCallable_)); - // if (sizeof_log_callable > 1) { - // std::cout << "Callable module log: " << logCallable << std::endl; - // } + createModule(callableInput, inputSize, &moduleCallable_); } /// does all setup for the raygen program @@ -533,14 +567,7 @@ template class Trace { pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN; pgDesc.raygen.module = module_; pgDesc.raygen.entryFunctionName = entryFunctionName.c_str(); - - // OptixProgramGroup raypg; - char log[2048]; - size_t sizeof_log = sizeof(log); - OPTIX_CHECK(optixProgramGroupCreate(context_->optix, &pgDesc, 1, &pgOptions, - log, &sizeof_log, &raygenPG)); - // if (sizeof_log > 1) - // PRINT(log); + createProgramGroup(&pgDesc, &pgOptions, &raygenPG_); } /// does all setup for the miss program @@ -551,14 +578,7 @@ template class Trace { pgDesc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS; pgDesc.miss.module = module_; pgDesc.miss.entryFunctionName = entryFunctionName.c_str(); - - // OptixProgramGroup raypg; - char log[2048]; - size_t sizeof_log = sizeof(log); - OPTIX_CHECK(optixProgramGroupCreate(context_->optix, &pgDesc, 1, &pgOptions, - log, &sizeof_log, &missPG)); - // if (sizeof_log > 1) - // PRINT(log); + createProgramGroup(&pgDesc, &pgOptions, &missPG_); } /// does all setup for the hitgroup program @@ -576,12 +596,7 @@ template class Trace { pgDesc.hitgroup.entryFunctionNameIS = entryFunctionNameIS.c_str(); } - char log[2048]; - size_t sizeof_log = sizeof(log); - OPTIX_CHECK(optixProgramGroupCreate(context_->optix, &pgDesc, 1, &pgOptions, - log, &sizeof_log, &hitgroupPG)); - // if (sizeof_log > 1) - // PRINT(log); + createProgramGroup(&pgDesc, &pgOptions, &hitgroupPG_); } /// does all setup for the direct callables @@ -603,45 +618,49 @@ template class Trace { entryFunctionNames[callableIndex(cfg.particle, cfg.slot)] = cfg.callable; } - directCallablePGs.resize(numCallables); + directCallablePGs_.resize(numCallables); for (size_t i = 0; i < numCallables; i++) { OptixProgramGroupOptions dcOptions = {}; OptixProgramGroupDesc dcDesc = {}; dcDesc.kind = OPTIX_PROGRAM_GROUP_KIND_CALLABLES; dcDesc.callables.moduleDC = moduleCallable_; dcDesc.callables.entryFunctionNameDC = entryFunctionNames[i].c_str(); - - char log[2048]; - size_t sizeof_log = sizeof(log); - OPTIX_CHECK(optixProgramGroupCreate(context_->optix, &dcDesc, 1, - &dcOptions, log, &sizeof_log, - &directCallablePGs[i])); - // if (sizeof_log > 1) - // PRINT(log); + createProgramGroup(&dcDesc, &dcOptions, &directCallablePGs_[i]); } } /// assembles the full pipeline of all programs void createPipelines() { + OptixPipelineLinkOptions pipelineLinkOptions = {}; + pipelineLinkOptions.maxTraceDepth = 1; + std::vector programGroups; - programGroups.push_back(raygenPG); - programGroups.push_back(missPG); - programGroups.push_back(hitgroupPG); + programGroups.push_back(raygenPG_); + programGroups.push_back(missPG_); + programGroups.push_back(hitgroupPG_); - for (auto const &directCallablePG : directCallablePGs) { + for (auto const &directCallablePG : directCallablePGs_) { programGroups.push_back(directCallablePG); } +#ifdef VIENNACORE_CUDA_LOG_DEBUG char log[2048]; size_t sizeof_log = sizeof(log); - OPTIX_CHECK(optixPipelineCreate(context_->optix, &pipelineCompileOptions_, - &pipelineLinkOptions_, programGroups.data(), - static_cast(programGroups.size()), log, - &sizeof_log, &pipeline_)); - // #ifndef NDEBUG - // if (sizeof_log > 1) - // PRINT(log); - // #endif + auto resPipeline = + optixPipelineCreate(context_->optix, &pipelineCompileOptions_, + &pipelineLinkOptions, programGroups.data(), + programGroups.size(), log, &sizeof_log, &pipeline_); + if (sizeof_log > 1) { + size_t len = std::min(sizeof_log, sizeof(log) - 1); + log[len] = '\0'; + std::cerr << "Pipeline creation log:\n" << log << std::endl; + } + OPTIX_CHECK_RESULT(resPipeline); +#else + OPTIX_CHECK(optixPipelineCreate( + context_->optix, &pipelineCompileOptions_, &pipelineLinkOptions, + programGroups.data(), programGroups.size(), NULL, NULL, &pipeline_)); +#endif OptixStackSizes stackSizes = {}; for (auto &pg : programGroups) { @@ -656,9 +675,9 @@ template class Trace { // or recursive tracing OPTIX_CHECK(optixUtilComputeStackSizes( &stackSizes, - pipelineLinkOptions_.maxTraceDepth, // OptixTrace recursion depth - 0, // continuation callable depth - 1, // direct callable depth + pipelineLinkOptions.maxTraceDepth, // OptixTrace recursion depth + 0, // continuation callable depth + 1, // direct callable depth &dcStackFromTrav, &dcStackFromState, &continuationStack)); OPTIX_CHECK(optixPipelineSetStackSize( @@ -673,37 +692,40 @@ template class Trace { void generateSBT() { // build raygen record RaygenRecord raygenRecord = {}; - optixSbtRecordPackHeader(raygenPG, &raygenRecord); + optixSbtRecordPackHeader(raygenPG_, &raygenRecord); raygenRecord.data = nullptr; - raygenRecordBuffer.allocUploadSingle(raygenRecord); - sbt.raygenRecord = raygenRecordBuffer.dPointer(); + raygenRecordBuffer_.allocUploadSingle(raygenRecord); + shaderBindingTable_.raygenRecord = raygenRecordBuffer_.dPointer(); // build miss record MissRecord missRecord = {}; - optixSbtRecordPackHeader(missPG, &missRecord); + optixSbtRecordPackHeader(missPG_, &missRecord); missRecord.data = nullptr; - missRecordBuffer.allocUploadSingle(missRecord); - sbt.missRecordBase = missRecordBuffer.dPointer(); - sbt.missRecordStrideInBytes = sizeof(MissRecord); - sbt.missRecordCount = 1; + missRecordBuffer_.allocUploadSingle(missRecord); + shaderBindingTable_.missRecordBase = missRecordBuffer_.dPointer(); + shaderBindingTable_.missRecordStrideInBytes = sizeof(MissRecord); + shaderBindingTable_.missRecordCount = 1; // build geometry specific hitgroup records buildHitGroups(); - // callable programs - if (!directCallablePGs.empty()) { - std::vector callableRecords(directCallablePGs.size()); - for (size_t j = 0; j < directCallablePGs.size(); ++j) { + // build callable programs + if (!directCallablePGs_.empty()) { + std::vector callableRecords(directCallablePGs_.size()); + for (size_t j = 0; j < directCallablePGs_.size(); ++j) { CallableRecord callableRecord = {}; - optixSbtRecordPackHeader(directCallablePGs[j], &callableRecord); + optixSbtRecordPackHeader(directCallablePGs_[j], &callableRecord); callableRecords[j] = callableRecord; } - directCallableRecordBuffer.allocUpload(callableRecords); + directCallableRecordBuffer_.allocUpload(callableRecords); - sbt.callablesRecordBase = directCallableRecordBuffer.dPointer(); - sbt.callablesRecordStrideInBytes = sizeof(CallableRecord); - sbt.callablesRecordCount = - static_cast(directCallablePGs.size()); + shaderBindingTable_.callablesRecordBase = + directCallableRecordBuffer_.dPointer(); + shaderBindingTable_.callablesRecordStrideInBytes = sizeof(CallableRecord); + shaderBindingTable_.callablesRecordCount = + static_cast(directCallablePGs_.size()); + } else { + assert(false && "No direct callables found."); } } @@ -732,42 +754,41 @@ template class Trace { OptixPipeline pipeline_{}; OptixPipelineCompileOptions pipelineCompileOptions_ = {}; - OptixPipelineLinkOptions pipelineLinkOptions_ = {}; OptixModule module_{}; OptixModule moduleCallable_{}; OptixModuleCompileOptions moduleCompileOptions_ = {}; // program groups, and the SBT built around - OptixProgramGroup raygenPG{}; - CudaBuffer raygenRecordBuffer; - OptixProgramGroup missPG{}; - CudaBuffer missRecordBuffer; - OptixProgramGroup hitgroupPG{}; - CudaBuffer hitgroupRecordBuffer; - std::vector directCallablePGs; - CudaBuffer directCallableRecordBuffer; - OptixShaderBindingTable sbt{}; - - // launch parameters, on the host, constant for all particles - LaunchParams launchParams; - std::vector launchParamsBuffers; + OptixProgramGroup raygenPG_{}; + CudaBuffer raygenRecordBuffer_; + OptixProgramGroup missPG_{}; + CudaBuffer missRecordBuffer_; + OptixProgramGroup hitgroupPG_{}; + CudaBuffer hitgroupRecordBuffer_; + std::vector directCallablePGs_; + CudaBuffer directCallableRecordBuffer_; + OptixShaderBindingTable shaderBindingTable_{}; + + // launch parameters + LaunchParams launchParams_; + std::vector launchParamsBuffers_; // one per particle // results Buffer - CudaBuffer resultBuffer; - std::vector results; + CudaBuffer resultBuffer_; + std::vector results_; rayInternal::KernelConfig config_; - bool ignoreBoundary = false; - bool resultsDownloaded = false; + bool ignoreBoundary_ = false; + bool resultsDownloaded_ = false; - size_t numRays = 0; - unsigned numCellData = 0; - const std::string globalParamsName = "launchParams"; + size_t numRays_ = 0; + unsigned numCellData_ = 0; + const std::string globalParamsName_ = "launchParams"; - const std::string normModuleName = "normKernels.ptx"; - std::string normKernelName = "normalize_surface_"; - std::string pipelineFileName = "GeneralPipeline"; + const std::string normModuleName_ = "normKernels.ptx"; + std::string normKernelName_ = "normalize_surface_"; + std::string pipelineFileName_ = "GeneralPipeline"; }; } // namespace viennaray::gpu diff --git a/gpu/include/raygTraceDisk.hpp b/gpu/include/raygTraceDisk.hpp index e2c6183..5a2a572 100644 --- a/gpu/include/raygTraceDisk.hpp +++ b/gpu/include/raygTraceDisk.hpp @@ -16,63 +16,63 @@ template class TraceDisk final : public Trace { explicit TraceDisk(unsigned deviceID = 0) : Trace("Disk", deviceID) {} - ~TraceDisk() override { diskGeometry.freeBuffers(); } + ~TraceDisk() override { diskGeometry_.freeBuffers(); } void setGeometry(const DiskMesh &passedMesh, float sourceOffset = 0.f) { assert(context_ && "Context not initialized."); - diskMesh = passedMesh; - if (diskMesh.gridDelta <= 0.f) { + diskMesh_ = passedMesh; + if (diskMesh_.gridDelta <= 0.f) { VIENNACORE_LOG_ERROR("DiskMesh gridDelta must be positive and non-zero."); } - if (diskMesh.radius <= 0.f) { - diskMesh.radius = rayInternal::DiskFactor<3> * diskMesh.gridDelta; + if (diskMesh_.radius <= 0.f) { + diskMesh_.radius = rayInternal::DiskFactor<3> * diskMesh_.gridDelta; } - minBox = diskMesh.minimumExtent; - maxBox = diskMesh.maximumExtent; + minBox = diskMesh_.minimumExtent; + maxBox = diskMesh_.maximumExtent; if constexpr (D == 2) { - minBox[2] = -diskMesh.gridDelta; - maxBox[2] = diskMesh.gridDelta; + minBox[2] = -diskMesh_.gridDelta; + maxBox[2] = diskMesh_.gridDelta; } - this->gridDelta_ = static_cast(diskMesh.gridDelta); - pointNeighborhood_.template init<3>(diskMesh.nodes, 2 * diskMesh.radius, - diskMesh.minimumExtent, - diskMesh.maximumExtent); - diskGeometry.buildAccel(*context_, diskMesh, launchParams, - this->ignoreBoundary, sourceOffset); + this->gridDelta_ = static_cast(diskMesh_.gridDelta); + pointNeighborhood_.template init<3>(diskMesh_.nodes, 2 * diskMesh_.radius, + diskMesh_.minimumExtent, + diskMesh_.maximumExtent); + diskGeometry_.buildAccel(*context_, diskMesh_, launchParams_, + ignoreBoundary_, sourceOffset); } void smoothFlux(std::vector &flux, int smoothingNeighbors) override { auto oldFlux = flux; - const T requiredDistance = smoothingNeighbors * 2.0 * diskMesh.radius; + const T requiredDistance = smoothingNeighbors * 2.0 * diskMesh_.radius; PointNeighborhood *pointNeighborhood; // use pointer to avoid copies if (smoothingNeighbors == 1) { // re-use the neighborhood from setGeometry pointNeighborhood = &pointNeighborhood_; } else if (pointNeighborhoodCache_.getNumPoints() == - launchParams.numElements && + launchParams_.numElements && std::abs(pointNeighborhoodCache_.getDistance() - requiredDistance) < 1e-6) { // re-use cached neighborhood pointNeighborhood = &pointNeighborhoodCache_; } else { // create a new neighborhood with a larger radius and cache it - pointNeighborhoodCache_.template init<3>(diskMesh.nodes, requiredDistance, - diskMesh.minimumExtent, - diskMesh.maximumExtent); + pointNeighborhoodCache_.template init<3>( + diskMesh_.nodes, requiredDistance, diskMesh_.minimumExtent, + diskMesh_.maximumExtent); pointNeighborhood = &pointNeighborhoodCache_; } #pragma omp parallel for - for (int idx = 0; idx < launchParams.numElements; idx++) { + for (int idx = 0; idx < launchParams_.numElements; idx++) { ResultType vv = oldFlux[idx]; auto const &neighborhood = pointNeighborhood->getNeighborIndices(idx); ResultType sum = 1.0; - auto const normal = diskMesh.normals[idx]; + auto const normal = diskMesh_.normals[idx]; for (auto const &nbi : neighborhood) { - auto nnormal = diskMesh.normals[nbi]; + auto nnormal = diskMesh_.normals[nbi]; auto weight = DotProduct(normal, nnormal); if (weight > 0.) { vv += oldFlux[nbi] * weight; @@ -84,33 +84,35 @@ template class TraceDisk final : public Trace { } void normalizeResults() override { - assert(resultBuffer.sizeInBytes != 0 && + assert(this->resultBuffer_.sizeInBytes != 0 && "Normalization: Result buffer not initialized."); double sourceArea = 0.0; if constexpr (D == 2) { sourceArea = - (launchParams.source.maxPoint[0] - launchParams.source.minPoint[0]); + (launchParams_.source.maxPoint[0] - launchParams_.source.minPoint[0]); } else { sourceArea = - (launchParams.source.maxPoint[0] - launchParams.source.minPoint[0]) * - (launchParams.source.maxPoint[1] - launchParams.source.minPoint[1]); + (launchParams_.source.maxPoint[0] - + launchParams_.source.minPoint[0]) * + (launchParams_.source.maxPoint[1] - launchParams_.source.minPoint[1]); } // calculate areas on host and send to device for now const Vec2D bdBox = {minBox, maxBox}; - std::vector areas(launchParams.numElements); + std::vector areas(launchParams_.numElements); DiskBoundingBoxXYIntersector xy_intersector(bdBox); - const auto radius = diskMesh.radius; + const auto radius = diskMesh_.radius; + const bool ignoreBoundary = this->ignoreBoundary_; constexpr std::array boundaryDirs = {0, 1}; #pragma omp parallel for - for (long idx = 0; idx < launchParams.numElements; ++idx) { - const Vec3Df &coord = diskMesh.nodes[idx]; - const Vec3Df &normal = diskMesh.normals[idx]; + for (long idx = 0; idx < launchParams_.numElements; ++idx) { + const Vec3Df &coord = diskMesh_.nodes[idx]; + const Vec3Df &normal = diskMesh_.normals[idx]; if constexpr (D == 3) { areas[idx] = radius * radius * M_PIf; // full disk area - if (this->ignoreBoundary) { + if (ignoreBoundary) { // no boundaries continue; } @@ -123,10 +125,10 @@ template class TraceDisk final : public Trace { // direction areas[idx] = xy_intersector.areaInside(disk, normal); } else { - constexpr float eps = 1e-4f; // 2D + constexpr float eps = 1e-4f; areas[idx] = 2.f * radius; // full disk area - if (this->ignoreBoundary) { + if (ignoreBoundary) { // no boundaries continue; } @@ -166,12 +168,12 @@ template class TraceDisk final : public Trace { CudaBuffer areaBuffer; areaBuffer.allocUpload(areas); CUdeviceptr d_areas = areaBuffer.dPointer(); - CUdeviceptr d_data = resultBuffer.dPointer(); + CUdeviceptr d_data = this->resultBuffer_.dPointer(); void *kernel_args[] = { - &d_data, &d_areas, &launchParams.numElements, - &sourceArea, &this->numRays, &this->numFluxes_}; - LaunchKernel::launch(this->normModuleName, this->normKernelName, + &d_data, &d_areas, &launchParams_.numElements, + &sourceArea, &this->numRays_, &this->numFluxes_}; + LaunchKernel::launch(this->normModuleName_, this->normKernelName_, kernel_args, *context_); areaBuffer.free(); } @@ -181,40 +183,43 @@ template class TraceDisk final : public Trace { // geometry hitgroup std::vector hitgroupRecords; HitgroupRecordDisk geometryHitgroupRecord = {}; - optixSbtRecordPackHeader(hitgroupPG, &geometryHitgroupRecord); + optixSbtRecordPackHeader(this->hitgroupPG_, &geometryHitgroupRecord); geometryHitgroupRecord.data.point = - (Vec3Df *)diskGeometry.geometryPointBuffer.dPointer(); - geometryHitgroupRecord.data.radius = diskMesh.radius; + (Vec3Df *)diskGeometry_.geometryPointBuffer.dPointer(); + geometryHitgroupRecord.data.radius = diskMesh_.radius; geometryHitgroupRecord.data.base.geometryType = 1; geometryHitgroupRecord.data.base.isBoundary = false; geometryHitgroupRecord.data.base.cellData = (void *)this->cellDataBuffer_.dPointer(); geometryHitgroupRecord.data.base.normal = - (Vec3Df *)diskGeometry.geometryNormalBuffer.dPointer(); + (Vec3Df *)diskGeometry_.geometryNormalBuffer.dPointer(); hitgroupRecords.push_back(geometryHitgroupRecord); // boundary hitgroup - if (!this->ignoreBoundary) { + if (!ignoreBoundary_) { HitgroupRecordDisk boundaryHitgroupRecord = {}; - optixSbtRecordPackHeader(hitgroupPG, &boundaryHitgroupRecord); + optixSbtRecordPackHeader(this->hitgroupPG_, &boundaryHitgroupRecord); boundaryHitgroupRecord.data.point = - (Vec3Df *)diskGeometry.boundaryPointBuffer.dPointer(); + (Vec3Df *)diskGeometry_.boundaryPointBuffer.dPointer(); boundaryHitgroupRecord.data.base.normal = - (Vec3Df *)diskGeometry.boundaryNormalBuffer.dPointer(); - boundaryHitgroupRecord.data.radius = diskGeometry.boundaryRadius; + (Vec3Df *)diskGeometry_.boundaryNormalBuffer.dPointer(); + boundaryHitgroupRecord.data.radius = diskGeometry_.boundaryRadius; boundaryHitgroupRecord.data.base.geometryType = 1; boundaryHitgroupRecord.data.base.isBoundary = true; hitgroupRecords.push_back(boundaryHitgroupRecord); } - hitgroupRecordBuffer.allocUpload(hitgroupRecords); - sbt.hitgroupRecordBase = hitgroupRecordBuffer.dPointer(); - sbt.hitgroupRecordStrideInBytes = sizeof(HitgroupRecordDisk); - sbt.hitgroupRecordCount = this->ignoreBoundary ? 1 : 2; + this->hitgroupRecordBuffer_.allocUpload(hitgroupRecords); + this->shaderBindingTable_.hitgroupRecordBase = + this->hitgroupRecordBuffer_.dPointer(); + this->shaderBindingTable_.hitgroupRecordStrideInBytes = + sizeof(HitgroupRecordDisk); + this->shaderBindingTable_.hitgroupRecordCount = ignoreBoundary_ ? 1 : 2; } - DiskMesh diskMesh; - DiskGeometry diskGeometry; +private: + DiskMesh diskMesh_; + DiskGeometry diskGeometry_; PointNeighborhood pointNeighborhood_; PointNeighborhood pointNeighborhoodCache_; @@ -222,20 +227,8 @@ template class TraceDisk final : public Trace { Vec3Df maxBox{}; using Trace::context_; - using Trace::geometryType_; - - using Trace::launchParams; - using Trace::resultBuffer; - - using Trace::raygenPG; - using Trace::raygenRecordBuffer; - using Trace::missPG; - using Trace::missRecordBuffer; - using Trace::hitgroupPG; - using Trace::hitgroupRecordBuffer; - using Trace::directCallablePGs; - using Trace::directCallableRecordBuffer; - using Trace::sbt; + using Trace::launchParams_; + using Trace::ignoreBoundary_; }; } // namespace viennaray::gpu diff --git a/gpu/include/raygTraceLine.hpp b/gpu/include/raygTraceLine.hpp index acc9d3a..e738a08 100644 --- a/gpu/include/raygTraceLine.hpp +++ b/gpu/include/raygTraceLine.hpp @@ -14,13 +14,13 @@ template class TraceLine final : public Trace { explicit TraceLine(int deviceID = 0) : Trace("Line", deviceID) {} - ~TraceLine() override { lineGeometry.freeBuffers(); } + ~TraceLine() override { lineGeometry_.freeBuffers(); } void setGeometry(const LineMesh &passedMesh, const float sourceOffset = 0.f) { this->gridDelta_ = static_cast(passedMesh.gridDelta); - lineMesh = passedMesh; - lineGeometry.buildAccel(*context_, lineMesh, launchParams, - this->ignoreBoundary, sourceOffset); + lineMesh_ = passedMesh; + lineGeometry_.buildAccel(*context_, lineMesh_, launchParams_, + ignoreBoundary_, sourceOffset); } void smoothFlux(std::vector &flux, int numNeighbors) override { @@ -28,30 +28,30 @@ template class TraceLine final : public Trace { } void normalizeResults() override { - assert(resultBuffer.sizeInBytes != 0 && + assert(this->resultBuffer_.sizeInBytes != 0 && "Normalization: Result buffer not initialized."); double sourceArea = - launchParams.source.maxPoint[0] - launchParams.source.minPoint[0]; + launchParams_.source.maxPoint[0] - launchParams_.source.minPoint[0]; // calculate areas on host and send to device for now - std::vector areas(launchParams.numElements, 0.f); + std::vector areas(launchParams_.numElements, 0.f); #pragma omp for - for (int idx = 0; idx < launchParams.numElements; ++idx) { - Vec3Df const &p0 = lineMesh.nodes[lineMesh.lines[idx][0]]; - Vec3Df const &p1 = lineMesh.nodes[lineMesh.lines[idx][1]]; + for (int idx = 0; idx < launchParams_.numElements; ++idx) { + Vec3Df const &p0 = lineMesh_.nodes[lineMesh_.lines[idx][0]]; + Vec3Df const &p1 = lineMesh_.nodes[lineMesh_.lines[idx][1]]; areas[idx] = Norm(p1 - p0); } CudaBuffer areaBuffer; areaBuffer.allocUpload(areas); CUdeviceptr d_areas = areaBuffer.dPointer(); - CUdeviceptr d_data = resultBuffer.dPointer(); + CUdeviceptr d_data = this->resultBuffer_.dPointer(); void *kernel_args[] = { - &d_data, &d_areas, &launchParams.numElements, - &sourceArea, &this->numRays, &this->numFluxes_}; - LaunchKernel::launch(this->normModuleName, this->normKernelName, + &d_data, &d_areas, &launchParams_.numElements, + &sourceArea, &this->numRays_, &this->numFluxes_}; + LaunchKernel::launch(this->normModuleName_, this->normKernelName_, kernel_args, *context_); areaBuffer.free(); } @@ -62,56 +62,47 @@ template class TraceLine final : public Trace { // geometry hitgroup HitgroupRecordLine geometryHitgroupRecord = {}; - optixSbtRecordPackHeader(hitgroupPG, &geometryHitgroupRecord); + optixSbtRecordPackHeader(this->hitgroupPG_, &geometryHitgroupRecord); geometryHitgroupRecord.data.nodes = - (Vec3Df *)lineGeometry.geometryNodesBuffer.dPointer(); + (Vec3Df *)lineGeometry_.geometryNodesBuffer.dPointer(); geometryHitgroupRecord.data.lines = - (Vec2D *)lineGeometry.geometryLinesBuffer.dPointer(); + (Vec2D *)lineGeometry_.geometryLinesBuffer.dPointer(); geometryHitgroupRecord.data.base.geometryType = 2; geometryHitgroupRecord.data.base.isBoundary = false; geometryHitgroupRecord.data.base.cellData = (void *)this->cellDataBuffer_.dPointer(); geometryHitgroupRecord.data.base.normal = - (Vec3Df *)lineGeometry.geometryNormalsBuffer.dPointer(); + (Vec3Df *)lineGeometry_.geometryNormalsBuffer.dPointer(); hitgroupRecords.push_back(geometryHitgroupRecord); // boundary hitgroup - if (!this->ignoreBoundary) { + if (!ignoreBoundary_) { HitgroupRecordLine boundaryHitgroupRecord = {}; - optixSbtRecordPackHeader(hitgroupPG, &boundaryHitgroupRecord); + optixSbtRecordPackHeader(this->hitgroupPG_, &boundaryHitgroupRecord); boundaryHitgroupRecord.data.nodes = - (Vec3Df *)lineGeometry.boundaryNodesBuffer.dPointer(); + (Vec3Df *)lineGeometry_.boundaryNodesBuffer.dPointer(); boundaryHitgroupRecord.data.lines = - (Vec2D *)lineGeometry.boundaryLinesBuffer.dPointer(); + (Vec2D *)lineGeometry_.boundaryLinesBuffer.dPointer(); boundaryHitgroupRecord.data.base.geometryType = 2; boundaryHitgroupRecord.data.base.isBoundary = true; hitgroupRecords.push_back(boundaryHitgroupRecord); } - hitgroupRecordBuffer.allocUpload(hitgroupRecords); - sbt.hitgroupRecordBase = hitgroupRecordBuffer.dPointer(); - sbt.hitgroupRecordStrideInBytes = sizeof(HitgroupRecordLine); - sbt.hitgroupRecordCount = this->ignoreBoundary ? 1 : 2; + this->hitgroupRecordBuffer_.allocUpload(hitgroupRecords); + this->shaderBindingTable_.hitgroupRecordBase = + this->hitgroupRecordBuffer_.dPointer(); + this->shaderBindingTable_.hitgroupRecordStrideInBytes = + sizeof(HitgroupRecordLine); + this->shaderBindingTable_.hitgroupRecordCount = ignoreBoundary_ ? 1 : 2; } private: - LineMesh lineMesh; - LineGeometry lineGeometry; + LineMesh lineMesh_; + LineGeometry lineGeometry_; using Trace::context_; - - using Trace::launchParams; - using Trace::resultBuffer; - - using Trace::raygenPG; - using Trace::raygenRecordBuffer; - using Trace::missPG; - using Trace::missRecordBuffer; - using Trace::hitgroupPG; - using Trace::hitgroupRecordBuffer; - using Trace::directCallablePGs; - using Trace::directCallableRecordBuffer; - using Trace::sbt; + using Trace::launchParams_; + using Trace::ignoreBoundary_; }; } // namespace viennaray::gpu diff --git a/gpu/include/raygTraceTriangle.hpp b/gpu/include/raygTraceTriangle.hpp index 59441a5..2e9cf94 100644 --- a/gpu/include/raygTraceTriangle.hpp +++ b/gpu/include/raygTraceTriangle.hpp @@ -12,18 +12,18 @@ template class TraceTriangle final : public Trace { explicit TraceTriangle(std::shared_ptr &passedContext) : Trace(passedContext, "Triangle") { if constexpr (D == 2) { - this->normKernelName.append("_2D"); + this->normKernelName_.append("_2D"); } } explicit TraceTriangle(unsigned deviceID = 0) : Trace("Triangle", deviceID) { if constexpr (D == 2) { - this->normKernelName.append("_2D"); + this->normKernelName_.append("_2D"); } } - ~TraceTriangle() override { triangleGeometry.freeBuffers(); } + ~TraceTriangle() override { triangleGeometry_.freeBuffers(); } void setGeometry(const TriangleMesh &passedMesh, const float sourceOffset = 0.f) { @@ -32,37 +32,35 @@ template class TraceTriangle final : public Trace { assert(!passedMesh.nodes.empty() && "Triangle mesh has no vertices."); this->gridDelta_ = static_cast(passedMesh.gridDelta); - triangleGeometry.buildAccel(*context_, passedMesh, launchParams, - this->ignoreBoundary, sourceOffset); + triangleGeometry_.buildAccel(*context_, passedMesh, launchParams_, + ignoreBoundary_, sourceOffset); if constexpr (D == 2) { - triangleMesh = passedMesh; + triangleMesh_ = passedMesh; } } - void smoothFlux(std::vector &flux, - int smoothingNeighbors) override {} - void normalizeResults() override { double sourceArea = 0.0; if constexpr (D == 2) { sourceArea = - (launchParams.source.maxPoint[0] - launchParams.source.minPoint[0]); + (launchParams_.source.maxPoint[0] - launchParams_.source.minPoint[0]); } else { sourceArea = - (launchParams.source.maxPoint[0] - launchParams.source.minPoint[0]) * - (launchParams.source.maxPoint[1] - launchParams.source.minPoint[1]); + (launchParams_.source.maxPoint[0] - + launchParams_.source.minPoint[0]) * + (launchParams_.source.maxPoint[1] - launchParams_.source.minPoint[1]); } - assert(resultBuffer.sizeInBytes != 0 && + assert(this->resultBuffer_.sizeInBytes != 0 && "Normalization: Result buffer not initialized."); - CUdeviceptr d_data = resultBuffer.dPointer(); - CUdeviceptr d_vertex = triangleGeometry.geometryVertexBuffer.dPointer(); - CUdeviceptr d_index = triangleGeometry.geometryIndexBuffer.dPointer(); + CUdeviceptr d_data = this->resultBuffer_.dPointer(); + CUdeviceptr d_vertex = triangleGeometry_.geometryVertexBuffer.dPointer(); + CUdeviceptr d_index = triangleGeometry_.geometryIndexBuffer.dPointer(); void *kernel_args[] = {&d_data, &d_vertex, - &d_index, &launchParams.numElements, - &sourceArea, &this->numRays, + &d_index, &launchParams_.numElements, + &sourceArea, &this->numRays_, &this->numFluxes_}; - LaunchKernel::launch(this->normModuleName, this->normKernelName, + LaunchKernel::launch(this->normModuleName_, this->normKernelName_, kernel_args, *context_); } @@ -72,29 +70,29 @@ template class TraceTriangle final : public Trace { std::vector hitgroupRecords; HitgroupRecordTriangle geometryHitgroupRecord = {}; - optixSbtRecordPackHeader(hitgroupPG, &geometryHitgroupRecord); + optixSbtRecordPackHeader(this->hitgroupPG_, &geometryHitgroupRecord); geometryHitgroupRecord.data.vertex = - (Vec3Df *)triangleGeometry.geometryVertexBuffer.dPointer(); + (Vec3Df *)triangleGeometry_.geometryVertexBuffer.dPointer(); geometryHitgroupRecord.data.index = - (Vec3D *)triangleGeometry.geometryIndexBuffer.dPointer(); + (Vec3D *)triangleGeometry_.geometryIndexBuffer.dPointer(); geometryHitgroupRecord.data.base.geometryType = 0; geometryHitgroupRecord.data.base.isBoundary = false; geometryHitgroupRecord.data.base.cellData = (void *)this->cellDataBuffer_.dPointer(); geometryHitgroupRecord.data.base.normal = - (Vec3Df *)triangleGeometry.geometryNormalBuffer.dPointer(); + (Vec3Df *)triangleGeometry_.geometryNormalBuffer.dPointer(); // add geometry hitgroup record hitgroupRecords.push_back(geometryHitgroupRecord); // boundary hitgroup - if (!this->ignoreBoundary) { + if (!ignoreBoundary_) { HitgroupRecordTriangle boundaryHitgroupRecord = {}; - optixSbtRecordPackHeader(hitgroupPG, &boundaryHitgroupRecord); + optixSbtRecordPackHeader(this->hitgroupPG_, &boundaryHitgroupRecord); boundaryHitgroupRecord.data.vertex = - (Vec3Df *)triangleGeometry.boundaryVertexBuffer.dPointer(); + (Vec3Df *)triangleGeometry_.boundaryVertexBuffer.dPointer(); boundaryHitgroupRecord.data.index = - (Vec3D *)triangleGeometry.boundaryIndexBuffer.dPointer(); + (Vec3D *)triangleGeometry_.boundaryIndexBuffer.dPointer(); boundaryHitgroupRecord.data.base.geometryType = 0; boundaryHitgroupRecord.data.base.isBoundary = true; @@ -103,30 +101,21 @@ template class TraceTriangle final : public Trace { } // upload hitgroup records - hitgroupRecordBuffer.allocUpload(hitgroupRecords); - sbt.hitgroupRecordBase = hitgroupRecordBuffer.dPointer(); - sbt.hitgroupRecordStrideInBytes = sizeof(HitgroupRecordTriangle); - sbt.hitgroupRecordCount = this->ignoreBoundary ? 1 : 2; + this->hitgroupRecordBuffer_.allocUpload(hitgroupRecords); + this->shaderBindingTable_.hitgroupRecordBase = + this->hitgroupRecordBuffer_.dPointer(); + this->shaderBindingTable_.hitgroupRecordStrideInBytes = + sizeof(HitgroupRecordTriangle); + this->shaderBindingTable_.hitgroupRecordCount = ignoreBoundary_ ? 1 : 2; } private: - TriangleMesh triangleMesh; - TriangleGeometry triangleGeometry; + TriangleMesh triangleMesh_; + TriangleGeometry triangleGeometry_; using Trace::context_; - - using Trace::launchParams; - using Trace::resultBuffer; - - using Trace::raygenPG; - using Trace::raygenRecordBuffer; - using Trace::missPG; - using Trace::missRecordBuffer; - using Trace::hitgroupPG; - using Trace::hitgroupRecordBuffer; - using Trace::directCallablePGs; - using Trace::directCallableRecordBuffer; - using Trace::sbt; + using Trace::launchParams_; + using Trace::ignoreBoundary_; }; } // namespace viennaray::gpu