Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.20 FATAL_ERROR)
project(
ViennaRay
LANGUAGES CXX
VERSION 3.10.1)
VERSION 3.10.2)

# --------------------------------------------------------------------------------------------------------
# Library switches
Expand Down Expand Up @@ -99,7 +99,7 @@ include("cmake/cpm.cmake")

CPMAddPackage(
NAME ViennaCore
VERSION 1.9.0
VERSION 1.9.2
GIT_REPOSITORY "https://github.com/ViennaTools/ViennaCore"
OPTIONS "VIENNACORE_USE_GPU ${VIENNARAY_USE_GPU}")

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ We recommend using [CPM.cmake](https://github.com/cpm-cmake/CPM.cmake) to consum
* Installation with CPM

```cmake
CPMAddPackage("gh:viennatools/viennaray@3.10.1") # Use the latest release version
CPMAddPackage("gh:viennatools/viennaray@3.10.2") # Use the latest release version
```

* With a local installation
Expand Down
3 changes: 2 additions & 1 deletion gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ endif()
viennacore_add_optixir(GeneralPipelineDisk ${VIENNARAY_PIPELINE_DIR}/GeneralPipelineDisk.cu)
viennacore_add_optixir(GeneralPipelineTriangle ${VIENNARAY_PIPELINE_DIR}/GeneralPipelineTriangle.cu)
viennacore_add_optixir(GeneralPipelineLine ${VIENNARAY_PIPELINE_DIR}/GeneralPipelineLine.cu)
viennacore_add_optixir(ViennaRayCallableWrapper ${VIENNARAY_PIPELINE_DIR}/CallableWrapper.cu)
viennacore_add_optixir(ViennaRayCallableWrapper ${VIENNARAY_PIPELINE_DIR}/CallableWrapper.cu
DEPENDS "${VIENNARAY_PIPELINE_DIR}/Particle.cuh")

# Add the norm kernels
viennacore_add_ptx(normKernels ${VIENNARAY_CUDA_KERNELS})
Expand Down
2 changes: 1 addition & 1 deletion gpu/examples/trenchTriangles.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ int main(int argc, char **argv) {
std::vector<gpu::CallableConfig> cMap = {
{0, gpu::CallableSlot::COLLISION, "__direct_callable__particleCollision"},
{0, gpu::CallableSlot::REFLECTION,
"__direct_callable__particleReflectionConstSticking"}};
"__direct_callable__particleReflection"}};

gpu::TraceTriangle<NumericType, D> tracer(context);
tracer.setGeometry(mesh);
Expand Down
116 changes: 69 additions & 47 deletions include/viennaray/gpu/raygTrace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,31 +134,33 @@ template <class T, int D> class Trace {
// set up material specific sticking probabilities
materialStickingBuffer_.resize(particles_.size());
for (size_t i = 0; i < particles_.size(); i++) {
if (!particles_[i].materialSticking.empty()) {
auto &materialStickingMap = particles_[i].materialSticking;

if (!materialStickingMap.empty()) {
if (uniqueMaterialIds_.empty() || materialIdsBuffer_.sizeInBytes == 0) {
VIENNACORE_LOG_ERROR(
"Material IDs not set, when using material dependent "
"sticking.");
}
std::vector<float> materialSticking(uniqueMaterialIds_.size());
unsigned currentId = 0;
for (auto &matId : uniqueMaterialIds_) {
if (particles_[i].materialSticking.find(matId) ==
particles_[i].materialSticking.end()) {
materialSticking[currentId++] =
static_cast<float>(particles_[i].sticking);
std::vector<float> materialStickingArray(uniqueMaterialIds_.size());
for (size_t idx = 0; idx < uniqueMaterialIds_.size(); ++idx) {
if (auto it = materialStickingMap.find(uniqueMaterialIds_[idx]);
it != materialStickingMap.end()) {
materialStickingArray[idx] = static_cast<float>(it->second);
} else {
materialSticking[currentId++] =
static_cast<float>(particles_[i].materialSticking[matId]);
// not in map, use default sticking
materialStickingArray[idx] =
static_cast<float>(particles_[i].sticking);
}
}
materialStickingBuffer_[i].allocUpload(materialSticking);
materialStickingBuffer_[i].allocUpload(materialStickingArray);
}
}

// Every particle gets its own stream and launch parameters
std::vector<cudaStream_t> streams(particles_.size());
// Every particle gets its launch parameters
launchParamsBuffers_.resize(particles_.size());
assert(launchParamsBuffers_.size() == streams_.size() &&
"Number of streams not initialized correctly.");

if (particleMap_.empty()) {
VIENNACORE_LOG_ERROR("No particle name->particleType mapping provided.");
Expand Down Expand Up @@ -190,16 +192,14 @@ template <class T, int D> class Trace {
}

launchParamsBuffers_[i].allocUploadSingle(launchParams_);

CUDA_CHECK(StreamCreate(&streams[i]));
}

generateSBT();

#ifndef NDEBUG // Launch on single stream in debug mode
for (size_t i = 0; i < particles_.size(); i++) {
OPTIX_CHECK(optixLaunch(
pipeline_, streams[0],
pipeline_, streams_[0],
/*! parameters and SBT */
launchParamsBuffers_[i].dPointer(),
launchParamsBuffers_[i].sizeInBytes, &shaderBindingTable_,
Expand All @@ -209,7 +209,7 @@ template <class T, int D> class Trace {
#else // Launch on multiple streams in release mode
for (size_t i = 0; i < particles_.size(); i++) {
OPTIX_CHECK(optixLaunch(
pipeline_, streams[i],
pipeline_, streams_[i],
/*! parameters and SBT */
launchParamsBuffers_[i].dPointer(),
launchParamsBuffers_[i].sizeInBytes, &shaderBindingTable_,
Expand All @@ -218,12 +218,7 @@ template <class T, int D> class Trace {
}
#endif

// sync
for (auto &s : streams) {
CUDA_CHECK(StreamSynchronize(s));
CUDA_CHECK(StreamDestroy(s));
}

isSynced_ = false;
resultsDownloaded_ = false;
}

Expand All @@ -244,45 +239,50 @@ template <class T, int D> class Trace {

template <class NumericType>
void setMaterialIds(const std::vector<NumericType> &materialIds,
const bool mapToConsecutive = true,
const std::set<int> &pUniqueMaterialIds = {}) {
const bool mapToConsecutive = true) {
assert(materialIds.size() == launchParams_.numElements);

uniqueMaterialIds_.clear();
if (!pUniqueMaterialIds.empty()) {
uniqueMaterialIds_ = pUniqueMaterialIds;
// copy material IDs
if constexpr (std::is_same_v<NumericType, int>) {
uniqueMaterialIds_ = materialIds;
} else {
for (auto &matId : materialIds) {
uniqueMaterialIds_.insert(static_cast<int>(matId));
}
// cast to int
uniqueMaterialIds_.resize(materialIds.size());
std::transform(materialIds.begin(), materialIds.end(),
uniqueMaterialIds_.begin(),
[](auto x) { return static_cast<int>(x); });
}

std::vector<int> materialMap(uniqueMaterialIds_.begin(),
uniqueMaterialIds_.end());
materialMapBuffer_.allocUpload(materialMap);
// reduce to sorted unique IDs
std::sort(uniqueMaterialIds_.begin(), uniqueMaterialIds_.end());
uniqueMaterialIds_.erase(
std::unique(uniqueMaterialIds_.begin(), uniqueMaterialIds_.end()),
uniqueMaterialIds_.end());

if (mapToConsecutive) {
std::unordered_map<NumericType, unsigned> materialIdMap;
int currentId = 0;
for (auto &uniqueMaterialId : uniqueMaterialIds_) {
materialIdMap[uniqueMaterialId] = currentId++;
}
assert(currentId == materialIdMap.size());
std::vector<int> materialIdsMapped(launchParams_.numElements);

std::vector<int> materialIdsMapped(launchParams_.numElements);
if (mapToConsecutive) {
// mapping to consecutive IDs. Use binary search
#pragma omp parallel for
for (int i = 0; i < launchParams_.numElements; i++) {
materialIdsMapped[i] = materialIdMap[materialIds[i]];
int idx = 0, matId = static_cast<int>(materialIds[i]);
for (; idx < uniqueMaterialIds_.size(); ++idx) {
if (uniqueMaterialIds_[idx] == matId)
break;
}
materialIdsMapped[i] = idx;
}
materialIdsBuffer_.allocUpload(materialIdsMapped);
} else {
std::vector<int> materialIdsMapped(launchParams_.numElements);
// no mapping, just copy
#pragma omp parallel for
for (int i = 0; i < launchParams_.numElements; i++) {
materialIdsMapped[i] = static_cast<int>(materialIds[i]);
}
materialIdsBuffer_.allocUpload(materialIdsMapped);
}

// upload to device
materialMapBuffer_.allocUpload(uniqueMaterialIds_);
materialIdsBuffer_.allocUpload(materialIdsMapped);
}

void setNumberOfRaysPerPoint(const size_t pNumRays) {
Expand Down Expand Up @@ -396,6 +396,9 @@ template <class T, int D> class Trace {
}
}
directCallablePGs_.clear();
for (auto &s : streams_) {
CUDA_CHECK(StreamDestroy(s));
}
}

unsigned int prepareParticlePrograms() {
Expand Down Expand Up @@ -423,6 +426,12 @@ template <class T, int D> class Trace {
VIENNACORE_LOG_DEBUG("Number of flux arrays: " +
std::to_string(numFluxes_));

// each particle gets its own stream
streams_.resize(particles_.size());
for (size_t i = 0; i < particles_.size(); i++) {
CUDA_CHECK(StreamCreate(&streams_[i]));
}

return numFluxes_;
}

Expand Down Expand Up @@ -455,8 +464,19 @@ template <class T, int D> class Trace {
launchParams_.customData = (void *)d_params;
}

void syncStreams() {
if (isSynced_)
return;

for (auto &s : streams_) {
CUDA_CHECK(StreamSynchronize(s));
}
isSynced_ = true;
}

void downloadResults() {
if (!resultsDownloaded_) {
syncStreams();
results_.resize(launchParams_.numElements * numFluxes_);
resultBuffer_.download(results_.data(),
launchParams_.numElements * numFluxes_);
Expand Down Expand Up @@ -752,7 +772,7 @@ template <class T, int D> class Trace {
std::unordered_map<std::string, unsigned> particleMap_;
std::vector<CallableConfig> callableMap_;

std::set<int> uniqueMaterialIds_;
std::vector<int> uniqueMaterialIds_;
CudaBuffer materialIdsBuffer_;

float gridDelta_ = 0.0f;
Expand Down Expand Up @@ -788,6 +808,7 @@ template <class T, int D> class Trace {
// launch parameters
LaunchParams launchParams_;
std::vector<CudaBuffer> launchParamsBuffers_; // one per particle
std::vector<cudaStream_t> streams_;

// results Buffer
CudaBuffer resultBuffer_;
Expand All @@ -796,6 +817,7 @@ template <class T, int D> class Trace {
rayInternal::KernelConfig config_;
bool ignoreBoundary_ = false;
bool resultsDownloaded_ = false;
bool isSynced_ = false;

size_t numRays_ = 0;
unsigned numCellData_ = 0;
Expand Down
1 change: 1 addition & 0 deletions include/viennaray/gpu/raygTraceDisk.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ template <class T, int D> class TraceDisk final : public Trace<T, D> {
areaBuffer.allocUpload(areas);
CUdeviceptr d_areas = areaBuffer.dPointer();
CUdeviceptr d_data = this->resultBuffer_.dPointer();
this->syncStreams();

void *kernel_args[] = {
&d_data, &d_areas, &launchParams_.numElements,
Expand Down
1 change: 1 addition & 0 deletions include/viennaray/gpu/raygTraceLine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ template <class T, int D> class TraceLine final : public Trace<T, D> {
areaBuffer.allocUpload(areas);
CUdeviceptr d_areas = areaBuffer.dPointer();
CUdeviceptr d_data = this->resultBuffer_.dPointer();
this->syncStreams();

void *kernel_args[] = {
&d_data, &d_areas, &launchParams_.numElements,
Expand Down
1 change: 1 addition & 0 deletions include/viennaray/gpu/raygTraceTriangle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ template <class T, int D> class TraceTriangle final : public Trace<T, D> {
launchParams_.source.minPoint[0]) *
(launchParams_.source.maxPoint[1] - launchParams_.source.minPoint[1]);
}
this->syncStreams();
assert(this->resultBuffer_.sizeInBytes != 0 &&
"Normalization: Result buffer not initialized.");
CUdeviceptr d_data = this->resultBuffer_.dPointer();
Expand Down