Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ option(EMBREE_RAY_MASK "Enable Embree Ray Masking" OFF)

option(VIENNARAY_USE_GPU "Enable GPU support" OFF)
option(VIENNARAY_USE_WDIST "Enable weighted distribution of ray weights" OFF)
option(VIENNARAY_GPU_DOUBLE_PRECISION "Use double precision on GPU" ON)

option(VIENNARAY_BUILD_EXAMPLES "Build examples" OFF)
option(VIENNARAY_BUILD_TESTS "Build tests" OFF)
Expand Down Expand Up @@ -67,6 +68,11 @@ if(VIENNARAY_USE_WDIST)
target_compile_definitions(${PROJECT_NAME} INTERFACE VIENNARAY_USE_WDIST)
endif()

if(VIENNARAY_GPU_DOUBLE_PRECISION)
message(STATUS "[ViennaRay] Using double precision on GPU")
target_compile_definitions(${PROJECT_NAME} INTERFACE VIENNARAY_GPU_DOUBLE_PRECISION)
endif()

if(VIENNARAY_PRINT_PROGRESS)
target_compile_definitions(${PROJECT_NAME} INTERFACE VIENNARAY_PRINT_PROGRESS)
endif()
Expand All @@ -92,7 +98,7 @@ include("cmake/cpm.cmake")

CPMAddPackage(
NAME ViennaCore
VERSION 1.7.3
VERSION 1.7.4
GIT_REPOSITORY "https://github.com/ViennaTools/ViennaCore"
OPTIONS "VIENNACORE_USE_GPU ${VIENNARAY_USE_GPU}")

Expand Down
9 changes: 9 additions & 0 deletions cmake/generate_ptx.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ function(generate_pipeline target_name generated_files)
cuda_include_directories(${VIENNARAY_GPU_INCLUDE})
cuda_include_directories(${ViennaCore_SOURCE_DIR}/include/viennacore)
add_compile_definitions(VIENNACORE_COMPILE_GPU)
if(VIENNARAY_GPU_DOUBLE_PRECISION)
add_compile_definitions(VIENNARAY_GPU_DOUBLE_PRECISION)
endif()

# Generate OptiX IR files if enabled
if(VIENNARAY_GENERATE_OPTIXIR)
Expand Down Expand Up @@ -53,6 +56,9 @@ function(generate_kernel generated_files)
cuda_include_directories(${VIENNARAY_GPU_INCLUDE})
cuda_include_directories(${OptiX_INCLUDE_DIR})
add_compile_definitions(VIENNACORE_COMPILE_GPU)
if(VIENNARAY_GPU_DOUBLE_PRECISION)
add_compile_definitions(VIENNARAY_GPU_DOUBLE_PRECISION)
endif()

cuda_compile_ptx(generated_ptx_files ${cu_source_files} ${cmake_options} ${options})

Expand Down Expand Up @@ -98,6 +104,9 @@ function(add_gpu_executable target_name_base target_name_var)
cuda_include_directories(${VIENNARAY_GPU_INCLUDE})
cuda_include_directories(${ViennaCore_SOURCE_DIR}/include/viennacore)
add_compile_definitions(VIENNACORE_COMPILE_GPU)
if(VIENNARAY_GPU_DOUBLE_PRECISION)
add_compile_definitions(VIENNARAY_GPU_DOUBLE_PRECISION)
endif()

# Create CUDA kernels
cuda_wrap_srcs(
Expand Down
15 changes: 7 additions & 8 deletions gpu/examples/trenchTriangles.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include <raygTraceTriangle.hpp>

#include <cuda_runtime.h>
#include <omp.h>

// #define COUNT_RAYS
Expand Down Expand Up @@ -29,7 +30,7 @@ int main(int argc, char **argv) {

gpu::Particle<NumericType> particle;
particle.name = "Particle";
particle.sticking = 0.1f;
particle.sticking = 1.f;
particle.dataLabels = {"particleFlux"};
particle.materialSticking[7] = 0.1f;
particle.materialSticking[1] = 1.0f;
Expand All @@ -38,14 +39,14 @@ int main(int argc, char **argv) {
std::vector<gpu::CallableConfig> cMap = {
{0, gpu::CallableSlot::COLLISION, "__direct_callable__particleCollision"},
{0, gpu::CallableSlot::REFLECTION,
"__direct_callable__particleReflection"}};
"__direct_callable__particleReflectionConstSticking"}};

gpu::TraceTriangle<NumericType, D> tracer(context);
tracer.setGeometry(mesh);
tracer.setMaterialIds(materialIds);
tracer.setCallables("CallableWrapper", context->modulePath);
tracer.setParticleCallableMap({pMap, cMap});
tracer.setNumberOfRaysPerPoint(2000);
tracer.setNumberOfRaysPerPoint(5000);
tracer.insertNextParticle(particle);
tracer.prepareParticlePrograms();

Expand All @@ -58,16 +59,14 @@ int main(int argc, char **argv) {
#endif

tracer.apply();

tracer.normalizeResults();
auto flux = tracer.getFlux(0, 0);
rayInternal::writeVTP<float, D>("trenchTriangles_triMesh.vtp", mesh.nodes,
mesh.triangles, flux);

rayInternal::writeVTP<float, D, gpu::ResultType>(
"trenchTriangles_triMesh.vtp", mesh.nodes, mesh.triangles, flux);

#ifdef COUNT_RAYS
rayCountBuffer.download(&rayCount, 1);
std::cout << "Trace count: " << rayCount << std::endl;
#endif

context->destroy();
}
8 changes: 7 additions & 1 deletion gpu/include/raygLaunchParams.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,19 @@

namespace viennaray::gpu {

#ifdef VIENNARAY_GPU_DOUBLE_PRECISION
using ResultType = double;
#else
using ResultType = float;
#endif

__both__ __forceinline__ unsigned callableIndex(unsigned p, CallableSlot s) {
return p * static_cast<unsigned>(CallableSlot::COUNT) +
static_cast<unsigned>(s);
}

struct LaunchParams {
float *resultBuffer;
ResultType *resultBuffer;

float rayWeightThreshold = 0.1f;
float tThreshold = 0.5f;
Expand Down
29 changes: 15 additions & 14 deletions gpu/include/raygTrace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,9 @@ template <class T, int D> class Trace {
}

// Resize our cuda result buffer
resultBuffer.allocInit(launchParams.numElements * numFluxes_, float(0));
launchParams.resultBuffer = (float *)resultBuffer.dPointer();
resultBuffer.allocInit(launchParams.numElements * numFluxes_,
ResultType(0));
launchParams.resultBuffer = (ResultType *)resultBuffer.dPointer();

if (materialIdsBuffer_.sizeInBytes != 0) {
launchParams.materialIds = (int *)materialIdsBuffer_.dPointer();
Expand All @@ -111,8 +112,8 @@ template <class T, int D> class Trace {
launchParams.maxReflections = config_.maxReflections;
launchParams.maxBoundaryHits = config_.maxBoundaryHits;

int numPointsPerDim =
static_cast<int>(std::sqrt(static_cast<T>(launchParams.numElements)));
int numPointsPerDim = static_cast<int>(
std::sqrt(static_cast<double>(launchParams.numElements)));

if (config_.numRaysFixed > 0) {
numPointsPerDim = 1;
Expand Down Expand Up @@ -186,8 +187,7 @@ template <class T, int D> class Trace {
launchParams.source.customDirectionBasis = true;
}

launchParamsBuffers[i].alloc(sizeof(launchParams));
launchParamsBuffers[i].upload(&launchParams, 1);
launchParamsBuffers[i].allocUploadSingle(launchParams);

CUDA_CHECK(StreamCreate(&streams[i]));
}
Expand Down Expand Up @@ -304,24 +304,24 @@ template <class T, int D> class Trace {

size_t getNumberOfRays() const { return numRays; }

std::vector<float> getFlux(int particleIdx, int dataIdx,
int smoothingNeighbors = 0) {
std::vector<ResultType> getFlux(int particleIdx, int dataIdx,
int smoothingNeighbors = 0) {
if (!resultsDownloaded) {
results.resize(launchParams.numElements * numFluxes_);
resultBuffer.download(results.data(),
launchParams.numElements * numFluxes_);
resultsDownloaded = true;
}

std::vector<float> flux(launchParams.numElements);
std::vector<ResultType> flux(launchParams.numElements);
unsigned int offset = 0;
for (size_t i = 0; i < particles_.size(); i++) {
if (particleIdx > i)
offset += particles_[i].dataLabels.size();
}
offset = (offset + dataIdx) * launchParams.numElements;
std::memcpy(flux.data(), results.data() + offset,
launchParams.numElements * sizeof(float));
launchParams.numElements * sizeof(ResultType));
if (smoothingNeighbors > 0)
smoothFlux(flux, smoothingNeighbors);
return flux;
Expand Down Expand Up @@ -437,7 +437,8 @@ template <class T, int D> class Trace {
}
}

virtual void smoothFlux(std::vector<float> &flux, int smoothingNeighbors) {}
virtual void smoothFlux(std::vector<ResultType> &flux,
int smoothingNeighbors) {}

// To be implemented by derived classes
virtual void normalizeResults() = 0;
Expand All @@ -449,7 +450,7 @@ template <class T, int D> class Trace {
void initRayTracer() {
launchParams.D = D;
context_->addModule(normModuleName);
normKernelName.append(geometryType_ + "_f");
normKernelName.append(geometryType_);
// launchParamsBuffer.alloc(sizeof(launchParams));
// normKernelName.push_back(NumericType);
}
Expand Down Expand Up @@ -746,15 +747,15 @@ template <class T, int D> class Trace {
CudaBuffer hitgroupRecordBuffer;
std::vector<OptixProgramGroup> directCallablePGs;
CudaBuffer directCallableRecordBuffer;
OptixShaderBindingTable sbt = {};
OptixShaderBindingTable sbt{};

// launch parameters, on the host, constant for all particles
LaunchParams launchParams;
std::vector<CudaBuffer> launchParamsBuffers;

// results Buffer
CudaBuffer resultBuffer;
std::vector<float> results;
std::vector<ResultType> results;

rayInternal::KernelConfig config_;
bool ignoreBoundary = false;
Expand Down
9 changes: 5 additions & 4 deletions gpu/include/raygTraceDisk.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ template <class T, int D> class TraceDisk final : public Trace<T, D> {
this->ignoreBoundary, sourceOffset);
}

void smoothFlux(std::vector<float> &flux, int smoothingNeighbors) override {
void smoothFlux(std::vector<ResultType> &flux,
int smoothingNeighbors) override {
auto oldFlux = flux;
const T requiredDistance = smoothingNeighbors * 2.0 * diskMesh.radius;
PointNeighborhood<float, D>
Expand All @@ -66,9 +67,9 @@ template <class T, int D> class TraceDisk final : public Trace<T, D> {

#pragma omp parallel for
for (int idx = 0; idx < launchParams.numElements; idx++) {
float vv = oldFlux[idx];
ResultType vv = oldFlux[idx];
auto const &neighborhood = pointNeighborhood->getNeighborIndices(idx);
float sum = 1.f;
ResultType sum = 1.0;
auto const normal = diskMesh.normals[idx];
for (auto const &nbi : neighborhood) {
auto nnormal = diskMesh.normals[nbi];
Expand All @@ -85,7 +86,7 @@ template <class T, int D> class TraceDisk final : public Trace<T, D> {
void normalizeResults() override {
assert(resultBuffer.sizeInBytes != 0 &&
"Normalization: Result buffer not initialized.");
float sourceArea = 0.f;
double sourceArea = 0.0;
if constexpr (D == 2) {
sourceArea =
(launchParams.source.maxPoint[0] - launchParams.source.minPoint[0]);
Expand Down
4 changes: 2 additions & 2 deletions gpu/include/raygTraceLine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ template <class T, int D> class TraceLine final : public Trace<T, D> {
this->ignoreBoundary, sourceOffset);
}

void smoothFlux(std::vector<float> &flux, int numNeighbors) override {
void smoothFlux(std::vector<ResultType> &flux, int numNeighbors) override {
// not implemented for line geometry
}

void normalizeResults() override {
assert(resultBuffer.sizeInBytes != 0 &&
"Normalization: Result buffer not initialized.");

float sourceArea =
double sourceArea =
launchParams.source.maxPoint[0] - launchParams.source.minPoint[0];

// calculate areas on host and send to device for now
Expand Down
5 changes: 3 additions & 2 deletions gpu/include/raygTraceTriangle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,11 @@ template <class T, int D> class TraceTriangle final : public Trace<T, D> {
}
}

void smoothFlux(std::vector<float> &flux, int smoothingNeighbors) override {}
void smoothFlux(std::vector<ResultType> &flux,
int smoothingNeighbors) override {}

void normalizeResults() override {
float sourceArea = 0.f;
double sourceArea = 0.0;
if constexpr (D == 2) {
sourceArea =
(launchParams.source.maxPoint[0] - launchParams.source.minPoint[0]);
Expand Down
2 changes: 1 addition & 1 deletion gpu/include/raygTriangleGeometry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ struct TriangleGeometry {

#ifndef NDEBUG
rayInternal::writeVTP(boundaryMesh, "triangleMesh_boundary.vtp",
std::vector<float>());
std::vector<double>());
#endif

return boundaryMesh;
Expand Down
Loading