Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.20 FATAL_ERROR)
project(
ViennaRay
LANGUAGES CXX
VERSION 3.8.2)
VERSION 3.8.3)

# --------------------------------------------------------------------------------------------------------
# Library switches
Expand Down
10 changes: 6 additions & 4 deletions cmake/generate_ptx.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ function(generate_pipeline target_name generated_files)
cuda_get_sources_and_options(cu_optix_source_files cmake_options options ${ARGN})

# Add the path to the OptiX headers to our include paths.
include_directories(${OptiX_INCLUDE_DIR})
cuda_include_directories(${OptiX_INCLUDE_DIR})

# Include ViennaRay headers which are used in pipelines
include_directories(${VIENNARAY_GPU_INCLUDE_DIR})
include_directories(${ViennaCore_SOURCE_DIR}/include/viennacore) # needed for Context
cuda_include_directories(${VIENNARAY_GPU_INCLUDE})
cuda_include_directories(${ViennaCore_SOURCE_DIR}/include/viennacore)
add_compile_definitions(VIENNACORE_COMPILE_GPU)

# Generate OptiX IR files if enabled
Expand Down Expand Up @@ -38,7 +38,9 @@ function(generate_pipeline target_name generated_files)
list(APPEND generated_files_local ${generated_ptx_files})
endif()

list(APPEND ${generated_files} ${generated_files_local})
set(${generated_files}
${generated_files_local}
PARENT_SCOPE)
endfunction()

function(generate_kernel generated_files)
Expand Down
16 changes: 8 additions & 8 deletions gpu/include/raygBoundary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,21 @@ reflectFromBoundary(PerRayData *prd, const SBTData *hsd, const int D) {
const unsigned int primID = optixGetPrimitiveIndex();
prd->numBoundaryHits++;

if constexpr (std::is_same<SBTData, HitSBTDataDisk>::value) {
if constexpr (std::is_same_v<SBTData, HitSBTDataDisk>) {
prd->pos =
prd->pos + prd->dir * (optixGetRayTmax() - launchParams.tThreshold);
if (primID == 0 || primID == 1) {
prd->dir[0] -= 2 * prd->dir[0]; // x boundary
} else if ((primID == 2 || primID == 3) && D == 3) {
prd->dir[1] -= 2 * prd->dir[1]; // y boundary
}
} else if constexpr (std::is_same<SBTData, HitSBTDataTriangle>::value) {
} else if constexpr (std::is_same_v<SBTData, HitSBTDataTriangle>) {
prd->pos = prd->pos + prd->dir * optixGetRayTmax();
unsigned dim = primID / 4;
prd->dir[dim] -= 2 * prd->dir[dim];
prd->pos[dim] = hsd->vertex[hsd->index[primID][0]][dim];
} else if constexpr (std::is_same<SBTData, HitSBTDataLine>::value) {
prd->pos = prd->pos + prd->dir * (optixGetRayTmax());
} else if constexpr (std::is_same_v<SBTData, HitSBTDataLine>) {
prd->pos = prd->pos + prd->dir * optixGetRayTmax();
if (primID == 0 || primID == 1) // x boundary
prd->dir[0] -= 2 * prd->dir[0];
}
Expand All @@ -48,7 +48,7 @@ applyPeriodicBoundary(PerRayData *prd, const SBTData *hsd, const int D) {
const unsigned int primID = optixGetPrimitiveIndex();
prd->numBoundaryHits++;

if constexpr (std::is_same<SBTData, HitSBTDataDisk>::value) {
if constexpr (std::is_same_v<SBTData, HitSBTDataDisk>) {
prd->pos =
prd->pos + prd->dir * (optixGetRayTmax() - launchParams.tThreshold);
if (primID == 0) { // xmin
Expand All @@ -60,12 +60,12 @@ applyPeriodicBoundary(PerRayData *prd, const SBTData *hsd, const int D) {
} else if (D == 3 && primID == 3) { // ymax
prd->pos[1] = hsd->point[2][1];
}
} else if constexpr (std::is_same<SBTData, HitSBTDataTriangle>::value) {
} else if constexpr (std::is_same_v<SBTData, HitSBTDataTriangle>) {
prd->pos = prd->pos + prd->dir * optixGetRayTmax();
unsigned dim = primID / 4;
prd->pos[dim] = hsd->vertex[hsd->index[primID ^ 2][0]][dim];
} else if constexpr (std::is_same<SBTData, HitSBTDataLine>::value) {
prd->pos = prd->pos + prd->dir * (optixGetRayTmax());
} else if constexpr (std::is_same_v<SBTData, HitSBTDataLine>) {
prd->pos = prd->pos + prd->dir * optixGetRayTmax();
if (primID == 0) { // xmin
prd->pos[0] = hsd->nodes[1][0];
} else if (primID == 1) { // xmax
Expand Down
3 changes: 1 addition & 2 deletions gpu/include/raygCallableConfig.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#pragma once

#include <optix_types.h>
#include <vcVectorType.hpp>
#include <string>

namespace viennaray::gpu {

Expand Down
10 changes: 2 additions & 8 deletions gpu/include/raygDiskGeometry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ template <int D> struct DiskGeometry {

// 2 inputs: one for the geometry, one for the boundary
std::array<OptixBuildInput, 2> diskInput{};
std::array<uint32_t, 2> diskInputFlags{};

// ------------------- geometry input -------------------
// upload the model to the device: the builder
Expand Down Expand Up @@ -105,11 +104,6 @@ template <int D> struct DiskGeometry {
boundaryPointBuffer.allocUpload(boundaryMesh.nodes);
boundaryNormalBuffer.allocUpload(boundaryMesh.normals);

// create local variables, because we need a *pointer* to the
// device pointers
CUdeviceptr d_boundPoints = boundaryPointBuffer.dPointer();
CUdeviceptr d_boundNormals = boundaryNormalBuffer.dPointer();

// AABB build input for boundary disks
std::vector<OptixAabb> aabbBoundary(boundaryMesh.nodes.size());
for (size_t i = 0; i < boundaryMesh.nodes.size(); ++i) {
Expand Down Expand Up @@ -217,12 +211,12 @@ template <int D> struct DiskGeometry {
}

// Find maximum extent in each dimension
Vec3Df extent = bbMax - bbMin;
const Vec3Df extent = bbMax - bbMin;
float maxExtent = std::max(std::max(extent[0], extent[1]), extent[2]);

// has to be the same as in raygTrace.hpp (hitGroupRecords)
if constexpr (D == 2) {
boundaryMesh.radius = 0.5 * maxExtent;
boundaryMesh.radius = 0.5f * maxExtent;
} else {
boundaryMesh.radius = maxExtent * rayInternal::DiskFactor<D>;
}
Expand Down
8 changes: 1 addition & 7 deletions gpu/include/raygLineGeometry.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ struct LineGeometry {
CudaBuffer asBuffer;

/// build acceleration structure from triangle mesh
void buildAccel(DeviceContext &context, const LineMesh &mesh,
void buildAccel(const DeviceContext &context, const LineMesh &mesh,
LaunchParams &launchParams, const bool ignoreBoundary,
const float sourceOffset = 0.f) {
assert(context.deviceID != -1 && "Context not initialized.");
Expand All @@ -39,19 +39,13 @@ struct LineGeometry {

// 2 inputs: one for the geometry, one for the boundary
std::array<OptixBuildInput, 2> lineInput{};
std::array<uint32_t, 2> lineInputFlags{};

// ------------------- geometry input -------------------
// upload the model to the device: the builder
geometryNodesBuffer.allocUpload(mesh.nodes);
geometryLinesBuffer.allocUpload(mesh.lines);
geometryNormalsBuffer.allocUpload(mesh.normals);

// create local variables, because we need a *pointer* to the
// device pointers
CUdeviceptr d_geoNodes = geometryNodesBuffer.dPointer();
CUdeviceptr d_geoLines = geometryLinesBuffer.dPointer();

// AABB build input
std::vector<OptixAabb> aabb(mesh.lines.size());

Expand Down
18 changes: 9 additions & 9 deletions gpu/include/raygPerRayData.hpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
#pragma once

#include <optix.h>

#include <vcRNG.hpp>
#include <vcVectorType.hpp>

#include <stdint.h>
#include <cstdint>

#define MAX_NEIGHBORS 8

Expand Down Expand Up @@ -44,29 +42,31 @@ struct PerRayData {

// this can only get compiled if included in a cuda kernel
#ifdef __CUDACC__
static __forceinline__ __device__ void *unpackPointer(uint32_t i0,
#include <optix.h>

static __device__ __forceinline__ void *unpackPointer(uint32_t i0,
uint32_t i1) {
const uint64_t uptr = static_cast<uint64_t>(i0) << 32 | i1;
void *ptr = reinterpret_cast<void *>(uptr);
return ptr;
}

static __forceinline__ __device__ void packPointer(void *ptr, uint32_t &i0,
static __device__ __forceinline__ void packPointer(void *ptr, uint32_t &i0,
uint32_t &i1) {
const uint64_t uptr = reinterpret_cast<uint64_t>(ptr);
i0 = uptr >> 32;
i1 = uptr & 0x00000000ffffffff;
}

static __forceinline__ __device__ PerRayData *getPRD() {
static __device__ __forceinline__ PerRayData *getPRD() {
const uint32_t u0 = optixGetPayload_0();
const uint32_t u1 = optixGetPayload_1();
return reinterpret_cast<PerRayData *>(unpackPointer(u0, u1));
}

static __device__ void initializeRNGState(PerRayData *prd,
unsigned int linearLaunchIndex,
unsigned int seed) {
static __device__ __forceinline__ void
initializeRNGState(PerRayData *prd, unsigned int linearLaunchIndex,
unsigned int seed) {
auto rngSeed = tea<3>(linearLaunchIndex, seed);
curand_init(rngSeed, 0, 0, &prd->RNGstate);
}
Expand Down
23 changes: 12 additions & 11 deletions gpu/include/raygSource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ getOrthonormalBasis(const Vec3Df &n) {
return {n, t, b2};
}

__device__ void initializeRayDirection(PerRayData *prd, const float power,
const uint16_t D) {
__device__ __forceinline__ void
initializeRayDirection(PerRayData *prd, const float power, const uint16_t D) {
// source direction
const float4 u = curand_uniform4(&prd->RNGstate); // (0,1]
const float tt = powf(u.w, 2.f / (power + 1.f));
Expand All @@ -44,9 +44,9 @@ __device__ void initializeRayDirection(PerRayData *prd, const float power,
Normalize(prd->dir);
}

__device__ void initializeRayDirection(PerRayData *prd, const float power,
const std::array<Vec3Df, 3> &basis,
const uint16_t D) {
__device__ __forceinline__ void
initializeRayDirection(PerRayData *prd, const float power,
const std::array<Vec3Df, 3> &basis, const uint16_t D) {
// source direction
do {
const float4 u = curand_uniform4(&prd->RNGstate); // (0,1]
Expand All @@ -72,9 +72,9 @@ __device__ void initializeRayDirection(PerRayData *prd, const float power,
Normalize(prd->dir);
}

__device__ void initializeRayPosition(PerRayData *prd,
const LaunchParams::SourcePlane &source,
const uint16_t D) {
__device__ __forceinline__ void
initializeRayPosition(PerRayData *prd, const LaunchParams::SourcePlane &source,
const uint16_t D) {
const float4 u = curand_uniform4(&prd->RNGstate); // (0,1]
prd->pos[0] =
source.minPoint[0] + u.x * (source.maxPoint[0] - source.minPoint[0]);
Expand All @@ -90,8 +90,9 @@ __device__ void initializeRayPosition(PerRayData *prd,
}

// This is slightly faster because there is only one call to curand_uniform4
__device__ void initializeRayPositionAndDirection(PerRayData *prd,
LaunchParams *launchParams) {
__device__ __forceinline__ void
initializeRayPositionAndDirection(PerRayData *prd,
const LaunchParams *launchParams) {
const float4 u = curand_uniform4(&prd->RNGstate); // (0,1]
prd->pos[0] = launchParams->source.minPoint[0] +
u.x * (launchParams->source.maxPoint[0] -
Expand All @@ -104,7 +105,7 @@ __device__ void initializeRayPositionAndDirection(PerRayData *prd,
const float tt = powf(u.w, 2.f / (launchParams->cosineExponent + 1.f));
float s, c;
__sincosf(2.f * M_PIf * u.z, &s, &c);
float sqrt1mtt = sqrtf(1 - tt);
const float sqrt1mtt = sqrtf(1 - tt);
prd->dir[0] = c * sqrt1mtt;
prd->dir[1] = s * sqrt1mtt;
prd->dir[2] = -1.f * sqrtf(tt);
Expand Down
Loading