From bccb9e76f21842da580a30b8514993d542e0059c Mon Sep 17 00:00:00 2001 From: skambapugithub Date: Fri, 6 Oct 2023 14:02:11 -0700 Subject: [PATCH 1/4] Added Quicksilver SYCL version --- CMakeLists.txt | 175 +++++++ LICENSE.md | 102 +--- README.md | 79 +-- src/AtomicMacro.hh | 187 +++++++ src/BulkStorage.hh | 13 +- src/CollisionEvent.hh | 208 +++++++- src/CoralBenchmark.cc | 19 +- src/CycleTracking.cc | 125 +---- src/CycleTracking.hh | 226 +++++++- src/DeclareMacro.hh | 123 +++-- src/DecompositionObject.cc | 3 +- src/DirectionCosine.cc.dp.cpp | 13 + src/DirectionCosine.hh | 111 ++-- src/EnergySpectrum.cc | 20 +- src/EnergySpectrum.hh | 16 +- src/FacetPair.hh | 12 +- src/GlobalFccGrid.cc.dp.cpp | 189 +++++++ src/GridAssignmentObject.cc.dp.cpp | 241 +++++++++ src/MCT.hh | 795 ++++++++++++++++++++++++++++- src/MC_Base_Particle.hh | 222 ++++---- src/MC_Cell_State.hh | 3 +- src/MC_Distance_To_Facet.hh | 54 +- src/MC_Domain.cc.dp.cpp | 480 +++++++++++++++++ src/MC_Domain.hh | 259 ++++++++-- src/MC_Facet_Adjacency.hh | 16 + src/MC_Facet_Crossing_Event.hh | 122 ++++- src/MC_Facet_Geometry.hh | 4 +- src/MC_Fast_Timer.cc.dp.cpp | 202 ++++++++ src/MC_Location.hh | 74 ++- src/MC_Nearest_Facet.hh | 47 +- src/MC_Particle.hh | 133 +++-- src/MC_Particle_Buffer.cc | 12 +- src/MC_Processor_Info.hh | 22 +- src/MC_RNG_State.hh | 141 +++++ src/MC_Segment_Outcome.hh | 318 +++++++++++- src/MC_SourceNow.hh | 206 +++++++- src/MC_Vector.hh | 114 +++-- src/MacroscopicCrossSection.hh | 160 +++++- src/Makefile.dpct | 216 ++++++++ src/MaterialDatabase.hh | 148 +++++- src/MemoryControl.hh | 88 +++- src/MonteCarlo.cc.dp.cpp | 206 ++++++++ src/MonteCarlo.hh | 24 + src/NuclearData.hh | 667 ++++++++++++++++++++++-- src/Parameters.cc | 317 ++++++------ src/Parameters.hh | 222 ++++---- src/ParticleVault.cc | 24 +- src/ParticleVault.hh | 128 ++++- src/ParticleVaultContainer.cc | 59 ++- src/ParticleVaultContainer.hh | 258 ++++++---- src/PopulationControl.cc.dp.cpp | 172 +++++++ src/QS_Vector.hh | 158 ++++-- src/Random.cc | 7 + src/Random.h | 33 ++ src/SendQueue.hh | 132 ++++- src/Tallies.cc | 177 ++++--- src/Tallies.hh | 465 +++++++++-------- src/cudaFunctions.cc.dp.cpp | 124 +++++ src/cudaFunctions.hh | 87 +++- src/cudaUtils.hh | 111 +++- src/initMC.cc.dp.cpp | 556 ++++++++++++++++++++ src/main.cc.dp.cpp | 521 +++++++++++++++++++ src/mpi_stubs_internal.hh | 146 ++++-- src/qs_assert.hh | 28 +- src/utilsMpi.cc.dp.cpp | 386 ++++++++++++++ src/utilsMpi.hh | 200 ++++---- 66 files changed, 9305 insertions(+), 1601 deletions(-) create mode 100644 CMakeLists.txt create mode 100644 src/AtomicMacro.hh create mode 100644 src/DirectionCosine.cc.dp.cpp create mode 100644 src/GlobalFccGrid.cc.dp.cpp create mode 100644 src/GridAssignmentObject.cc.dp.cpp create mode 100644 src/MC_Domain.cc.dp.cpp create mode 100644 src/MC_Fast_Timer.cc.dp.cpp create mode 100644 src/Makefile.dpct create mode 100644 src/MonteCarlo.cc.dp.cpp create mode 100644 src/PopulationControl.cc.dp.cpp create mode 100644 src/Random.cc create mode 100644 src/Random.h create mode 100644 src/cudaFunctions.cc.dp.cpp create mode 100644 src/initMC.cc.dp.cpp create mode 100644 src/main.cc.dp.cpp create mode 100644 src/utilsMpi.cc.dp.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 00000000..f6d39b14 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,175 @@ +# ModificationsCopyright(C) 2023 Intel Corporation + +# Redistributionand use in source and binary forms, with or without modification, + +# arepermitted provided that the following conditions are met: + +# 1. Redistributions of source code must retain the above copyright notice, + +# thislist of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright notice, + +# thislist of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# 3. Neither the name of the copyright holder nor the names of its contributors + +# maybe used to endorse or promote products derived from this software + +# withoutspecific prior written permission. + +# THISSOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + +# ANDANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + +# THEIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + +# AREDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS + +# BELIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, + +# ORCONSEQUENTIAL DAMAGES(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT + +# OFSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + +# ORBUSINESS INTERRUPTION) +# HOWEVERCAUSED AND ON ANY THEORY OF LIABILITY, + +# WHETHERIN CONTRACT, STRICT LIABILITY, OR TORT(INCLUDING NEGLIGENCE + +# OROTHERWISE) +# ARISINGIN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + +# EVEN + +# IFADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# SPDX-License-Identifier: BSD-3-Clause + +cmake_minimum_required(VERSION 2.80) +project(qs LANGUAGES CXX) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +option(ENABLE_KERNEL_PROFILING "Build using kernel profiling" OFF) +option(GPU_AOT "Build AOT for Intel GPU" OFF) +option(USE_NVIDIA_BACKEND "Build for NVIDIA backend" OFF) +option(USE_AMDHIP_BACKEND "Build for AMD HIP backend" OFF) +option(USE_SM "Specifies which streaming multiprocessor architecture to use" OFF) + +include_directories( + ${CMAKE_SOURCE_DIR}/src/ +) + +set(SRC_LIST + ${CMAKE_SOURCE_DIR}/src/CoralBenchmark.cc + ${CMAKE_SOURCE_DIR}/src/CycleTracking.cc + ${CMAKE_SOURCE_DIR}/src/DecompositionObject.cc + ${CMAKE_SOURCE_DIR}/src/DirectionCosine.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/EnergySpectrum.cc + ${CMAKE_SOURCE_DIR}/src/GlobalFccGrid.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/GridAssignmentObject.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/InputBlock.cc + ${CMAKE_SOURCE_DIR}/src/MC_Base_Particle.cc + ${CMAKE_SOURCE_DIR}/src/MC_Domain.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/MC_Fast_Timer.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/MC_Particle_Buffer.cc + ${CMAKE_SOURCE_DIR}/src/MeshPartition.cc + ${CMAKE_SOURCE_DIR}/src/MonteCarlo.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/MpiCommObject.cc + ${CMAKE_SOURCE_DIR}/src/Parameters.cc + ${CMAKE_SOURCE_DIR}/src/ParticleVault.cc + ${CMAKE_SOURCE_DIR}/src/ParticleVaultContainer.cc + ${CMAKE_SOURCE_DIR}/src/PopulationControl.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/SharedMemoryCommObject.cc + ${CMAKE_SOURCE_DIR}/src/Tallies.cc + ${CMAKE_SOURCE_DIR}/src/cmdLineParser.cc + ${CMAKE_SOURCE_DIR}/src/cudaFunctions.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/initMC.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/main.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/parseUtils.cc + ${CMAKE_SOURCE_DIR}/src/utils.cc + ${CMAKE_SOURCE_DIR}/src/utilsMpi.cc.dp.cpp + ${CMAKE_SOURCE_DIR}/src/Random.cc +) + +# Declare the executable target built from your sources +# add_executable(gpu_example test.cc) + +# enable kernel profiling on demand +if(GEN9) + message("-- NOTE: Building for GEN9 archetecture") + add_definitions(-DGEN9) +endif() + +if(ENABLE_KERNEL_PROFILING) + message("-- NOTE: Enabling Kernel time measurement") + add_definitions(-DENABLE_KERNEL_PROFILING) +endif() + +if(DEBUG) + message("-- NOTE: Enabling debug mode") + add_definitions(-DDEBUG) +endif() + +set(DEF_INTEL_WL_CXX_FLAGS " -DRUN_ON_GPU=1 -DHAVE_SYCL=1 ") +set(DEF_NVIDIA_WL_CXX_FLAGS " -DHAVE_SYCL=1 ") +set(DEF_AMD_WL_CXX_FLAGS " -DUNIFIED_HOST=1 -DHAVE_SYCL=1 ") + +set(DEF_INTEL_GENERAL_CXX_FLAGS " -O3 -fsycl -ffast-math ") +set(DEF_NVIDIA_GENERAL_CXX_FLAGS " -O3 -fsycl -ffast-math ") +set(DEF_AMD_GENERAL_CXX_FLAGS " -O3 -fsycl -ffast-math ") + + +# -DCMAKE_CXX_FLAGS=" -blah -blah " overrides the default flags (BOTH general and WL specific) +# -DOVERRIDE_GENERAL_CXX_FLAGS=" -blah -blah " overrides the general flags only (and not the workload specific flags) +# passing in both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS is not allowed, in order to prevent ambiguity + +#set(USE_DEFAULT_FLAGS OFF) +if(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "" AND NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(FATAL_ERROR "Both CMAKE_CXX_FLAGS and OVERRIDE_GENERAL_CXX_FLAGS cannot be passed in together") +elseif("${CMAKE_CXX_FLAGS}" STREQUAL "" AND "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(STATUS "Using DEFAULT compilation flags") + set(INTEL_GPU_CXX_FLAGS "${DEF_INTEL_GENERAL_CXX_FLAGS} ${DEF_INTEL_WL_CXX_FLAGS}") + set(NVIDIA_GPU_CXX_FLAGS "${DEF_NVIDIA_GENERAL_CXX_FLAGS} ${DEF_NVIDIA_WL_CXX_FLAGS}") + set(AMD_GPU_CXX_FLAGS "${DEF_AMD_GENERAL_CXX_FLAGS} ${DEF_AMD_WL_CXX_FLAGS}") +elseif(NOT "${OVERRIDE_GENERAL_CXX_FLAGS}" STREQUAL "") + message(STATUS "OVERRIDING GENERAL compilation flags") + set(INTEL_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_INTEL_WL_CXX_FLAGS}") + set(NVIDIA_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_NVIDIA_WL_CXX_FLAGS}") + set(AMD_GPU_CXX_FLAGS "${OVERRIDE_GENERAL_CXX_FLAGS} ${DEF_AMD_WL_CXX_FLAGS}") +elseif(NOT "${CMAKE_CXX_FLAGS}" STREQUAL "") + message(STATUS "OVERRIDING GENERAL and WORKLOAD SPECIFIC compilation flags") + set(INTEL_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(NVIDIA_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(AMD_GPU_CXX_FLAGS "${CMAKE_CXX_FLAGS}") +endif() + +if(GPU_AOT) + message(STATUS "Enabling INTEL backend") + set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}") + if((${GPU_AOT} STREQUAL "pvc") OR(${GPU_AOT} STREQUAL "PVC")) + message(STATUS "Enabling Intel GPU AOT compilation for ${GPU_AOT}") + string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=spir64_gen -Xs \"-device 0x0bd5 -revision_id 0x2f\" -Xs \"-options -ze-opt-large-register-file\" ") + else() + message(STATUS "Using custom AOT compilation flag ${GPU_AOT}") + string(APPEND CMAKE_CXX_FLAGS " ${GPU_AOT} ") # User should be aware of advanced AOT compilation flags + endif() +elseif(USE_NVIDIA_BACKEND) + message(STATUS "Enabling NVIDIA backend") + set(CMAKE_CXX_FLAGS "${NVIDIA_GPU_CXX_FLAGS}") + string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_${USE_SM} ") +elseif(USE_AMDHIP_BACKEND) + message(STATUS "Enabling AMD HIP backend for ${USE_AMDHIP_BACKEND} AMD architecture") + set(CMAKE_CXX_FLAGS "${AMD_GPU_CXX_FLAGS}") + string(APPEND CMAKE_CXX_FLAGS " -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=${USE_AMDHIP_BACKEND} ") +else() + # JIT case + message(STATUS "Enabling INTEL backend") + set(CMAKE_CXX_FLAGS "${INTEL_GPU_CXX_FLAGS}") +endif() + +message(STATUS "CXX Compilation flags set to: ${CMAKE_CXX_FLAGS}") + +add_executable(qs ${SRC_LIST}) +target_link_libraries(qs sycl stdc++ stdc++fs) diff --git a/LICENSE.md b/LICENSE.md index 6120dfe5..a52e8ee8 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,74 +1,28 @@ -LICENSE -======= - -Copyright (c) 2017, Lawrence Livermore National Security, LLC. - -Produced at the Lawrence Livermore National Laboratory - -Written by David Richards [richards12@llnl.gov], Ryan Bleile, -Patrick Brantley, Shawn Dawson, Scott McKinley, Matt O'Brien - -LLNL-CODE-684037. - -All rights reserved. - -This file is part of Quicksilver. For details, see - http://www.github.com/LLNL/Quicksilver. Please also read -the Additional BSD Notice below. - -Redistribution and use in source and binary forms, with or -without modification, are permitted provided that the following -conditions are met: - -* Redistributions of source code must retain the above copyright - notice, this list of conditions and the disclaimer below. - -* Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the disclaimer (as noted below) - in the documentation and/or other materials provided with the - distribution. - -* Neither the name of the LLNS/LLNL nor the names of its contributors - may be used to endorse or promote products derived from this - software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND -CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL LAWRENCE LIVERMORE NATIONAL -SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR CONTRIBUTORS BE -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING -IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF -THE POSSIBILITY OF SUCH DAMAGE. - - -Additional BSD Notice ---------------------- - -1. This notice is required to be provided under our contract with the - U.S. Department of Energy (DOE). This work was produced at Lawrence - Livermore National Laboratory under Contract No. DE-AC52-07NA27344 - with the DOE. - -2. Neither the United States Government nor Lawrence Livermore - National Security, LLC nor any of their employees, makes any - warranty, express or implied, or assumes any liability or - responsibility for the accuracy, completeness, or usefulness of any - information, apparatus, product, or process disclosed, or - represents that its use would not infringe privately-owned rights. - -3. Also, reference herein to any specific commercial products, - process, or services by trade name, trademark, manufacturer or - otherwise does not necessarily constitute or imply its endorsement, - recommendation, or favoring by the United States Government or - Lawrence Livermore National Security, LLC. The views and opinions - of authors expressed herein do not necessarily state or reflect - those of the United States Government or Lawrence Livermore - National Security, LLC, and shall not be used for advertising or - product endorsement purposes. +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause diff --git a/README.md b/README.md index 5169cf75..a96b328e 100644 --- a/README.md +++ b/README.md @@ -19,35 +19,47 @@ vectorization potential. For more information, visit the [LLNL co-design pages.](https://codesign.llnl.gov/quicksilver.php) +**To build sycl version** -Building Quicksilver --------------------- +source /path/to/oneAPI package -Instructions to build Quicksilver can be found in the -Makefile. Quicksilver is a relatively easy to build code with no -external dependencies (except MPI and OpenMP). You should be able to -build Quicksilver on nearly any system by customizing the values of -only four variables in the Makefile: +mkdir build && cd build -* CXX The name of the C++ compiler (with path if necessary) - Quicksilver uses C++11 features, so a C++11 compliant compiler - should be used. +CXX=icpx cmake ../ -DGPU_AOT=PVC -* CXXFLAGS Command line switches to pass to the C++ compiler when - compiling objects *and* when linking the executable. +make -sj -* CPPFLAGS Command line switches to pass to the compiler *only* when - compiling objects +**To build sycl version on nvidia backend** -* LDFLAGS Command line switches to pass to the compiler *only* - when linking the executable +source /path/to/clang/ -Sample definitions for a number of common systems are provided. +mkdir build && cd build -Quicksilver recognizes a number of pre-processor macros that enable or -disable various code features such as MPI, OpenMP, etc. These are -described in the Makefile. +//For A100 Machine +CC=clang CXX=clang++ cmake ../ -DUSE_NVIDIA_BACKEND=YES -DUSE_SM=80 + +//For H100 Machine + +CC=clang CXX=clang++ cmake ../ -DUSE_NVIDIA_BACKEND=YES -DUSE_SM=90 + +make -sj + +**To build sycl version on amd backend** + +source /path/to/clang/ + +mkdir build && cd build + +//For MI-100 Machine + +CC=clang CXX=clang++ cmake ../ -DUSE_AMDHIP_BACKEND=gfx908 + +//For MI-250 Machine + +CC=clang CXX=clang++ cmake ../ -DUSE_AMDHIP_BACKEND=gfx90a + +make -sj Running Quicksilver ------------------- @@ -63,18 +75,27 @@ Quicksilver also has the property that the output of every run is a valid input file. Hence you can repeat any run for which you have the output file by using that output as an input file. +For benchmarking run the example "Examples/CORAL2_Benchmark/Problem1/Coral2_P1_1.inp" -License and Distribution Information ------------------------------------- +**To run sycl version** + +export QS_DEVICE=GPU + +./qs -i ../Examples/AllScattering/scatteringOnly.inp -Quicksilver is available [on github](https://github.com/LLNL/Quicksilver) +**To run sycl version on nvidia backend** +export QS_DEVICE=GPU -Quicksilver is open source software with a BSD license. See -[LICENSE.md](https://github.com/LLNL/Quicksilver/blob/master/LICENSE.md) +./qs -i ../Examples/AllScattering/scatteringOnly.inp -This work was performed under the auspices of the U.S. Department of -Energy by Lawrence Livermore National Laboratory under Contract -DE-AC52-07NA27344. +**To run sycl version on amd backend** + +export QS_DEVICE=GPU + +ONEAPI_DEVICE_SELECTOR=hip:* ./qs -i ../Examples/AllScattering/scatteringOnly.inp + +License and Distribution Information +------------------------------------ -LLNL-CODE-684037 +Quicksilver is available [on github](https://github.com/LLNL/Quicksilver) \ No newline at end of file diff --git a/src/AtomicMacro.hh b/src/AtomicMacro.hh new file mode 100644 index 00000000..80e5876b --- /dev/null +++ b/src/AtomicMacro.hh @@ -0,0 +1,187 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +// Determine which atomics to use based on platform being compiled for +// + +#ifndef ATOMICS_HD +#define ATOMICS_HD + +#include +#include +#include +#include + +inline double ull2d(const unsigned long long &val) +{ + return *((double *)&val); +} + +#if defined(HAVE_SYCL) + +// If in a CUDA GPU section use the CUDA atomics +#ifdef __SYCL_DEVICE_ONLY__ + +/// Atomically add the value operand to the value at the addr and assign the +/// result to the value at addr. +/// \param [in, out] addr The pointer to the data. +/// \param operand The value to add to the value at \p addr. +/// \param memoryOrder The memory ordering used. +/// \returns The value at the \p addr before the call. +template +inline T atomic_fetch_add(T *addr, T operand) +{ + auto atm = + sycl::atomic_ref(addr[0]); + return atm.fetch_add(operand); +} + +// Currently not atomic here. But its only used when it does not necissarially need to be atomic. +#define ATOMIC_WRITE(x, v) \ + x = v; + +#define ATOMIC_ADD(x, v) \ + { \ + using Ty = std::remove_reference::type; \ + atomic_fetch_add(&x, v); \ + } + +#define ATOMIC_UPDATE(x) \ + { \ + using Ty = std::remove_reference::type; \ + Ty inc = 1; \ + atomic_fetch_add(&x, inc); \ + } + +#define ATOMIC_CAPTURE(x, v, p) \ + { \ + using Ty = typename std::remove_reference::type; \ + \ + p = atomic_fetch_add(&x, v); \ + } +// If in a CPU OpenMP section use the OpenMP atomics +#elif defined(USE_OPENMP_ATOMICS) +#define ATOMIC_WRITE(x, v) \ + _Pragma("omp atomic write") \ + x = v; + +#define ATOMIC_ADD(x, v) \ + _Pragma("omp atomic") \ + x += v; + +#define ATOMIC_UPDATE(x) \ + _Pragma("omp atomic update") \ + x++; + +#define ATOMIC_CAPTURE(x, v, p) \ + _Pragma("omp atomic capture") \ + { \ + p = x; \ + x = x + v; \ + } + +// If in a serial section, no need to use atomics +#else +#define ATOMIC_WRITE(x, v) \ + x = v; + +#define ATOMIC_UPDATE(x) \ + x++; + +#define ATOMIC_ADD(x, v) \ + x += v; + +#define ATOMIC_CAPTURE(x, v, p) \ + { \ + p = x; \ + x = x + v; \ + } + +#endif + +// If in a OpenMP section use the OpenMP atomics +#elif defined(USE_OPENMP_ATOMICS) +#define ATOMIC_WRITE(x, v) \ + _Pragma("omp atomic write") \ + x = v; + +#define ATOMIC_ADD(x, v) \ + _Pragma("omp atomic") \ + x += v; + +#define ATOMIC_UPDATE(x) \ + _Pragma("omp atomic update") \ + x++; + +#define ATOMIC_CAPTURE(x, v, p) \ + _Pragma("omp atomic capture") \ + { \ + p = x; \ + x = x + v; \ + } + +// If in a serial section, no need to use atomics +#else +#define ATOMIC_WRITE(x, v) \ + x = v; + +#define ATOMIC_UPDATE(x) \ + x++; + +#define ATOMIC_ADD(x, v) \ + x += v; + +#define ATOMIC_CAPTURE(x, v, p) \ + { \ + p = x; \ + x = x + v; \ + } +#endif + +#endif diff --git a/src/BulkStorage.hh b/src/BulkStorage.hh index d0afe9e5..b0977c1e 100644 --- a/src/BulkStorage.hh +++ b/src/BulkStorage.hh @@ -26,16 +26,17 @@ class BulkStorage ~BulkStorage() { + // Check for instances that never allocated memory. + // I'm not exactly sure how this can happen, but it does. + if (_bulkStorage == 0) + return; + --(*_refCount); if (*_refCount > 0) return; - + + MemoryControl::deallocate(_bulkStorage, _capacity, _memPolicy); delete _refCount; - - // Catch the case that the storage was never allocated. This - // happens when setCapacity is never called on this instance. - if (_bulkStorage != 0) - MemoryControl::deallocate(_bulkStorage, _capacity, _memPolicy); } /// Needed for copy-swap idiom diff --git a/src/CollisionEvent.hh b/src/CollisionEvent.hh index 58c9a5cd..431850a6 100644 --- a/src/CollisionEvent.hh +++ b/src/CollisionEvent.hh @@ -1,15 +1,219 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef COLLISION_EVENT_HH #define COLLISION_EVENT_HH +#include +#include "CollisionEvent.hh" +#include "MC_Particle.hh" +#include "NuclearData.hh" +#include "DirectionCosine.hh" +#include "MonteCarlo.hh" +#include "MC_Cell_State.hh" +#include "MaterialDatabase.hh" +#include "MacroscopicCrossSection.hh" +#include "MC_Base_Particle.hh" +#include "ParticleVaultContainer.hh" +#include "PhysicalConstants.hh" #include "DeclareMacro.hh" +#include "AtomicMacro.hh" + +#define MAX_PRODUCTION_SIZE 4 class MonteCarlo; class MC_Particle; -HOST_DEVICE -bool CollisionEvent(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int tally_index ); +inline HOST_DEVICE void updateTrajectory(double energy, double angle, MC_Particle &particle) +{ + particle.kinetic_energy = energy; + double cosTheta = angle; + double randomNumber = rngSample(&particle.random_number_seed); + double phi = 2 * 3.14159265 * randomNumber; + double sinPhi = sycl::sin(phi); + double cosPhi = sycl::cos(phi); + double sinTheta = sycl::sqrt((1.0 - (cosTheta * cosTheta))); + particle.direction_cosine.Rotate3DVector(sinTheta, cosTheta, sinPhi, cosPhi); + double speed = + (PhysicalConstants::_speedOfLight * + sycl::sqrt( + (1.0 - ((PhysicalConstants::_neutronRestMassEnergy * + PhysicalConstants::_neutronRestMassEnergy) / + ((energy + PhysicalConstants::_neutronRestMassEnergy) * + (energy + PhysicalConstants::_neutronRestMassEnergy)))))); + particle.velocity.x = speed * particle.direction_cosine.alpha; + particle.velocity.y = speed * particle.direction_cosine.beta; + particle.velocity.z = speed * particle.direction_cosine.gamma; + randomNumber = rngSample(&particle.random_number_seed); + particle.num_mean_free_paths = -1.0 * sycl::log(randomNumber); +} HOST_DEVICE_END +inline HOST_DEVICE bool CollisionEvent(MonteCarlo *monteCarlo, MC_Particle &mc_particle, unsigned int tally_index, int particle_index, int *tallyArray) +{ + + const MC_Cell_State &cell = monteCarlo->domain_d[mc_particle.domain].cell_state[mc_particle.cell]; + + int globalMatIndex = cell._material; + + //------------------------------------------------------------------------------------------------------------------ + // Pick the isotope and reaction. + //------------------------------------------------------------------------------------------------------------------ + double randomNumber = rngSample(&mc_particle.random_number_seed); + double totalCrossSection = mc_particle.totalCrossSection; + double currentCrossSection = totalCrossSection * randomNumber; + int selectedIso = -1; + int selectedUniqueNumber = -1; + int selectedReact = -1; + + int numIsos = (int)monteCarlo->_material_d[globalMatIndex]._isosize; + + for (int isoIndex = 0; isoIndex < numIsos && currentCrossSection >= 0; isoIndex++) + { + + int uniqueNumber = monteCarlo->_material_d[globalMatIndex]._iso[isoIndex]._gid; + int numReacts = monteCarlo->_nuclearData_d->getNumberReactions(uniqueNumber); + + for (int reactIndex = 0; reactIndex < numReacts; reactIndex++) + { + currentCrossSection -= macroscopicCrossSection(monteCarlo, reactIndex, mc_particle.domain, mc_particle.cell, + isoIndex, mc_particle.energy_group); + if (currentCrossSection < 0) + { + selectedIso = isoIndex; + selectedUniqueNumber = uniqueNumber; + selectedReact = reactIndex; + break; + } + } + } + qs_assert(selectedIso != -1); + + //------------------------------------------------------------------------------------------------------------------ + // Do the collision. + //------------------------------------------------------------------------------------------------------------------ + double energyOut[MAX_PRODUCTION_SIZE]; + double angleOut[MAX_PRODUCTION_SIZE]; + int nOut = 0; + + double mat_mass = monteCarlo->_material_d[globalMatIndex]._mass; + monteCarlo->_nuclearData_d->_isotopes[selectedUniqueNumber]._species[0]._reactions[selectedReact].sampleCollision( + mc_particle.kinetic_energy, mat_mass, &energyOut[0], &angleOut[0], nOut, &(mc_particle.random_number_seed), MAX_PRODUCTION_SIZE); + //-------------------------------------------------------------------------------------------------------------- + // Post-Collision Phase 1: + // Tally the collision + //-------------------------------------------------------------------------------------------------------------- + + // Set the reaction for this particle. + + ATOMIC_UPDATE(tallyArray[tally_index * NUM_TALLIES + 3]); + + NuclearDataReaction::Enum reactionType = (NuclearDataReaction::Enum)monteCarlo->_nuclearData_d->_isotopes[selectedUniqueNumber]._species[0]._reactions[selectedReact]._reactionType; + + switch (reactionType) + { + case NuclearDataReaction::Scatter: + + ATOMIC_UPDATE(tallyArray[tally_index * NUM_TALLIES + 4]); + + break; + case NuclearDataReaction::Absorption: + + ATOMIC_UPDATE(tallyArray[tally_index * NUM_TALLIES + 5]); + + break; + case NuclearDataReaction::Fission: + + ATOMIC_UPDATE(tallyArray[tally_index * NUM_TALLIES + 6]); + ATOMIC_ADD(tallyArray[tally_index * NUM_TALLIES + 7], nOut); + + break; + case NuclearDataReaction::Undefined: +#ifdef DEBUG + printf("reactionType invalid\n"); #endif + qs_assert(false); + } + + if (nOut == 0) + { + return false; + } + for (int secondaryIndex = 1; secondaryIndex < nOut; secondaryIndex++) + { + // Newly created particles start as copies of their parent + MC_Particle secondaryParticle = mc_particle; + secondaryParticle.random_number_seed = rngSpawn_Random_Number_Seed(&mc_particle.random_number_seed); + secondaryParticle.identifier = secondaryParticle.random_number_seed; + updateTrajectory(energyOut[secondaryIndex], angleOut[secondaryIndex], secondaryParticle); + + // Atomic capture will be called here + monteCarlo->_particleVaultContainer->addExtraParticle(secondaryParticle); + } + + updateTrajectory(energyOut[0], angleOut[0], mc_particle); + + // If a fission reaction produces secondary particles we also add the original + // particle to the "extras" that we will handle later. This avoids the + // possibility of a particle doing multiple fission reactions in a single + // kernel invocation and overflowing the extra storage with secondary particles. + if (nOut > 1) + { + // Atomic capture will be called here + monteCarlo->_particleVaultContainer->addExtraParticle(mc_particle); + } + + // If we are still tracking this particle the update its energy group + + mc_particle.energy_group = monteCarlo->_nuclearData_d->getEnergyGroup(mc_particle.kinetic_energy); + + return nOut == 1; +} + +HOST_DEVICE_END + +#endif diff --git a/src/CoralBenchmark.cc b/src/CoralBenchmark.cc index d34c360f..bf01f3e0 100644 --- a/src/CoralBenchmark.cc +++ b/src/CoralBenchmark.cc @@ -1,3 +1,18 @@ +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + #include "CoralBenchmark.hh" #include "MonteCarlo.hh" #include "Parameters.hh" @@ -53,7 +68,7 @@ void BalanceRatioTest( MonteCarlo *monteCarlo, Parameters ¶ms ) uint64_t absorb = balTally._absorb; uint64_t fission = balTally._fission; uint64_t scatter = balTally._scatter; - double absorbRatio, fissionRatio, scatterRatio; + double absorbRatio = 1.0, fissionRatio = 1.0, scatterRatio = 1.0; double percent_tolerance = 1.0; @@ -143,7 +158,7 @@ void BalanceEventTest( MonteCarlo *monteCarlo ) else { fprintf(stdout, " FAIL:: Collision to Facet Crossing Ratio balanced NOT maintained within %g%% tolerance\n", tolerance ); - fprintf(stdout, "\tFacet Crossing: %" PRIu64 "\tCollision: %" PRIu64 "\tRatio: %g\n", facetCrossing, collisions, ratio ); + fprintf(stdout, "\tFacet Crossing: %lu\tCollision: %lu\tRatio: %g\n", facetCrossing, collisions, ratio ); } diff --git a/src/CycleTracking.cc b/src/CycleTracking.cc index c401cd73..efddd7b1 100644 --- a/src/CycleTracking.cc +++ b/src/CycleTracking.cc @@ -1,3 +1,18 @@ +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + #include "CycleTracking.hh" #include "MonteCarlo.hh" #include "ParticleVaultContainer.hh" @@ -7,115 +22,7 @@ #include "MC_Facet_Crossing_Event.hh" #include "MCT.hh" #include "DeclareMacro.hh" -#include "QS_atomics.hh" +#include "AtomicMacro.hh" #include "macros.hh" #include "qs_assert.hh" -HOST_DEVICE -void CycleTrackingGuts( MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault, ParticleVault *processedVault ) -{ - MC_Particle mc_particle; - - // Copy a single particle from the particle vault into mc_particle - MC_Load_Particle(monteCarlo, mc_particle, processingVault, particle_index); - - // set the particle.task to the index of the processed vault the particle will census into. - mc_particle.task = 0;//processed_vault; - - // loop over this particle until we cannot do anything more with it on this processor - CycleTrackingFunction( monteCarlo, mc_particle, particle_index, processingVault, processedVault ); - - //Make sure this particle is marked as completed - processingVault->invalidateParticle( particle_index ); -} -HOST_DEVICE_END - -HOST_DEVICE -void CycleTrackingFunction( MonteCarlo *monteCarlo, MC_Particle &mc_particle, int particle_index, ParticleVault* processingVault, ParticleVault* processedVault) -{ - bool keepTrackingThisParticle = false; - unsigned int tally_index = (particle_index) % monteCarlo->_tallies->GetNumBalanceReplications(); - unsigned int flux_tally_index = (particle_index) % monteCarlo->_tallies->GetNumFluxReplications(); - unsigned int cell_tally_index = (particle_index) % monteCarlo->_tallies->GetNumCellTallyReplications(); - do - { - // Determine the outcome of a particle at the end of this segment such as: - // - // (0) Undergo a collision within the current cell, - // (1) Cross a facet of the current cell, - // (2) Reach the end of the time step and enter census, - // -#ifdef EXPONENTIAL_TALLY - monteCarlo->_tallies->TallyCellValue( exp(rngSample(&mc_particle.random_number_seed)) , mc_particle.domain, cell_tally_index, mc_particle.cell); -#endif - MC_Segment_Outcome_type::Enum segment_outcome = MC_Segment_Outcome(monteCarlo, mc_particle, flux_tally_index); - - QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._numSegments); - - mc_particle.num_segments += 1.; /* Track the number of segments this particle has - undergone this cycle on all processes. */ - switch (segment_outcome) { - case MC_Segment_Outcome_type::Collision: - { - // The particle undergoes a collision event producing: - // (0) Other-than-one same-species secondary particle, or - // (1) Exactly one same-species secondary particle. - if (CollisionEvent(monteCarlo, mc_particle, tally_index ) == MC_Collision_Event_Return::Continue_Tracking) - { - keepTrackingThisParticle = true; - } - else - { - keepTrackingThisParticle = false; - } - } - break; - - case MC_Segment_Outcome_type::Facet_Crossing: - { - // The particle has reached a cell facet. - MC_Tally_Event::Enum facet_crossing_type = MC_Facet_Crossing_Event(mc_particle, monteCarlo, particle_index, processingVault); - - if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Transit_Exit) - { - keepTrackingThisParticle = true; // Transit Event - } - else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Escape) - { - QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._escape); - mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape; - mc_particle.species = -1; - keepTrackingThisParticle = false; - } - else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Reflection) - { - MCT_Reflect_Particle(monteCarlo, mc_particle); - keepTrackingThisParticle = true; - } - else - { - // Enters an adjacent cell in an off-processor domain. - //mc_particle.species = -1; - keepTrackingThisParticle = false; - } - } - break; - - case MC_Segment_Outcome_type::Census: - { - // The particle has reached the end of the time step. - processedVault->pushParticle(mc_particle); - QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._census); - keepTrackingThisParticle = false; - break; - } - - default: - qs_assert(false); - break; // should this be an error - } - - } while ( keepTrackingThisParticle ); -} -HOST_DEVICE_END - diff --git a/src/CycleTracking.hh b/src/CycleTracking.hh index 4dea37a0..643c2b55 100644 --- a/src/CycleTracking.hh +++ b/src/CycleTracking.hh @@ -1,3 +1,51 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef CYCLETRACKING_H_ +#define CYCLETRACKING_H_ + #include "DeclareMacro.hh" // Forward Declaration @@ -5,10 +53,176 @@ class ParticleVault; class MonteCarlo; class MC_Particle; -HOST_DEVICE -void CycleTrackingGuts( MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault, ParticleVault *processedVault ); -HOST_DEVICE_END +#include "CycleTracking.hh" +#include "MonteCarlo.hh" +#include "ParticleVaultContainer.hh" +#include "ParticleVault.hh" +#include "MC_Segment_Outcome.hh" +#include "CollisionEvent.hh" +#include "MC_Facet_Crossing_Event.hh" +#include "MCT.hh" +#include "DeclareMacro.hh" +#include "AtomicMacro.hh" +#include "macros.hh" +#include "qs_assert.hh" + +#ifdef MaxIt + +#else +#define MaxIt INT_MAX +#endif +const int NIt = MaxIt; + + +inline HOST_DEVICE_SYCL +void CycleTrackingFunction( MonteCarlo *monteCarlo, MC_Particle &mc_particle, int particle_index, ParticleVault* processingVault, ParticleVault* processedVault, int * tallyArray) +{ + bool keepTrackingThisParticle = true; + unsigned int tally_index = (particle_index) % monteCarlo->_tallies->GetNumBalanceReplications(); + unsigned int flux_tally_index = (particle_index) % monteCarlo->_tallies->GetNumFluxReplications(); + unsigned int cell_tally_index = (particle_index) % monteCarlo->_tallies->GetNumCellTallyReplications(); + + int i1 = 0; + // The while loop will exit after a particle reaches census or goes through MaxIters iterations, whichever comes first. If a particle reaches MaxIters it will be added to the ExtraVaults and processed in a later kernel. MaxIt can be defined in the makefile, otherwise it defaults to a large number that should ensure that it is never reached. + int MaxIters = MaxIt; + + do + { + // Determine the outcome of a particle at the end of this segment such as: + // + // (0) Undergo a collision within the current cell, + // (1) Cross a facet of the current cell, + // (2) Reach the end of the time step and enter census, + // + MC_Segment_Outcome_type::Enum segment_outcome = MC_Segment_Outcome_type::Max_Number; + i1 += 1; + if (keepTrackingThisParticle) + { + +#ifdef EXPONENTIAL_TALLY + monteCarlo->_tallies->TallyCellValue(exp(rngSample(&mc_particle.random_number_seed)), mc_particle.domain, cell_tally_index, mc_particle.cell); +#endif + segment_outcome = MC_Segment_Outcome(monteCarlo, mc_particle, flux_tally_index); + +#ifdef __SYCL_DEVICE_ONLY__ + ATOMIC_UPDATE(tallyArray[tally_index*NUM_TALLIES+0]); +#else + ATOMIC_UPDATE( monteCarlo->_tallies->_balanceTask[tally_index]._numSegments); +#endif + + + mc_particle.num_segments += 1.; /* Track the number of segments this particle has + undergone this cycle on all processes. */ + } + switch (segment_outcome) + { + + case MC_Segment_Outcome_type::Collision: + { + // The particle undergoes a collision event producing: + // (0) Other-than-one same-species secondary particle, or + // (1) Exactly one same-species secondary particle. + if (CollisionEvent(monteCarlo, mc_particle, tally_index, particle_index, tallyArray) == MC_Collision_Event_Return::Continue_Tracking) + { + keepTrackingThisParticle = true; + } + else + { + keepTrackingThisParticle = false; + } + } + break; + + case MC_Segment_Outcome_type::Facet_Crossing: + { + // The particle has reached a cell facet. + MC_Tally_Event::Enum facet_crossing_type = MC_Facet_Crossing_Event(mc_particle, monteCarlo, particle_index, processingVault); + + if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Transit_Exit) + { + keepTrackingThisParticle = true; // Transit Event + } + else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Escape) + { + #ifdef __SYCL_DEVICE_ONLY__ + ATOMIC_UPDATE( tallyArray[tally_index*NUM_TALLIES+1]); + #else + ATOMIC_UPDATE( monteCarlo->_tallies->_balanceTask[tally_index]._escape); + #endif + + mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape; + mc_particle.species = -1; + keepTrackingThisParticle = false; + } + else if (facet_crossing_type == MC_Tally_Event::Facet_Crossing_Reflection) + { + MCT_Reflect_Particle(monteCarlo, mc_particle); + keepTrackingThisParticle = true; + } + else + { + // Enters an adjacent cell in an off-processor domain. + keepTrackingThisParticle = false; + } + } + break; + + case MC_Segment_Outcome_type::Census: + { + // The particle has reached the end of the time step. + processedVault->pushParticle(mc_particle); + #ifdef __SYCL_DEVICE_ONLY__ + ATOMIC_UPDATE( tallyArray[tally_index*NUM_TALLIES+2]); + #else + ATOMIC_UPDATE( monteCarlo->_tallies->_balanceTask[tally_index]._census); + #endif + + keepTrackingThisParticle = false; + } + break; + + case MC_Segment_Outcome_type::Max_Number: + { + + keepTrackingThisParticle = false; + } + break; + + default: + qs_assert(false); + keepTrackingThisParticle = false; + break; // should this be an error + } + } while (keepTrackingThisParticle && i1 < MaxIt); + + if(keepTrackingThisParticle == false) + { + processingVault->invalidateParticle(particle_index); + } + else + { + monteCarlo->_particleVaultContainer->addExtraParticle(mc_particle); + } + +} + +inline HOST_DEVICE_SYCL +void CycleTrackingGuts( MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault, ParticleVault *processedVault, int * tallyArray) +{ + MC_Particle mc_particle; + + // Copy a single particle from the particle vault into mc_particle + MC_Load_Particle(monteCarlo, mc_particle, processingVault, particle_index); + + // set the particle.task to the index of the processed vault the particle will census into. + mc_particle.task = 0; + + // loop over this particle until we cannot do anything more with it on this processor + CycleTrackingFunction(monteCarlo, mc_particle, particle_index, processingVault, processedVault, tallyArray); + + //Make sure this particle is marked as completed + processingVault->invalidateParticle( particle_index ); +} + -HOST_DEVICE -void CycleTrackingFunction( MonteCarlo *monteCarlo, MC_Particle &mc_particle, int particle_index, ParticleVault* processingVault, ParticleVault* processedVault); -HOST_DEVICE_END +#endif diff --git a/src/DeclareMacro.hh b/src/DeclareMacro.hh index b1e6c7a5..6457ec54 100644 --- a/src/DeclareMacro.hh +++ b/src/DeclareMacro.hh @@ -1,38 +1,101 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include + #ifndef DECLAREMACRO_HH #define DECLAREMACRO_HH -#if defined HAVE_CUDA || defined HAVE_HIP - #define HOST_DEVICE __host__ __device__ - #define HOST_DEVICE_CUDA __host__ __device__ - #define HOST_DEVICE_CLASS - #define HOST_DEVICE_END - #define DEVICE __device__ - #define DEVICE_END - //#define HOST __host__ - #define HOST_END - #define GLOBAL __global__ +#ifdef HAVE_SYCL +#define HOST_DEVICE +#define HOST_DEVICE_SYCL +#define HOST_DEVICE_CLASS +#define HOST_DEVICE_END +#define DEVICE +#define DEVICE_END +// #define HOST __host__ +#define HOST_END +#define GLOBAL #elif HAVE_OPENMP_TARGET - #define HOST_DEVICE _Pragma( "omp declare target" ) - #define HOST_DEVICE_CUDA - #define HOST_DEVICE_CLASS _Pragma( "omp declare target" ) - #define HOST_DEVICE_END _Pragma("omp end declare target") - //#define HOST_DEVICE #pragma omp declare target - //#define HOST_DEVICE_END #pragma omp end declare target - //#define DEVICE #pragma omp declare target - //#define DEVICE_END #pragma omp end declare target - //#define HOST - #define HOST_END - #define GLOBAL +#define HOST_DEVICE _Pragma("omp declare target") +#define HOST_DEVICE_CUDA +#define HOST_DEVICE_CLASS _Pragma("omp declare target") +#define HOST_DEVICE_END _Pragma("omp end declare target") +// #define HOST_DEVICE #pragma omp declare target +// #define HOST_DEVICE_END #pragma omp end declare target +// #define DEVICE #pragma omp declare target +// #define DEVICE_END #pragma omp end declare target +// #define HOST +#define HOST_END +#define GLOBAL #else - #define HOST_DEVICE - #define HOST_DEVICE_CUDA - #define HOST_DEVICE_CLASS - #define HOST_DEVICE_END - #define DEVICE - #define DEVICE_END - //#define HOST - #define HOST_END - #define GLOBAL +#define HOST_DEVICE +#define HOST_DEVICE_CUDA +#define HOST_DEVICE_CLASS +#define HOST_DEVICE_END +#define DEVICE +#define DEVICE_END +// #define HOST +#define HOST_END +#define GLOBAL #endif +#define NUM_TALLIES 8 #endif diff --git a/src/DecompositionObject.cc b/src/DecompositionObject.cc index c0fd799b..106f76dd 100644 --- a/src/DecompositionObject.cc +++ b/src/DecompositionObject.cc @@ -4,6 +4,7 @@ #include #include #include "qs_assert.hh" +#include "Random.h" using std::vector; using std::find; @@ -18,7 +19,7 @@ namespace int nItems = vv.size(); for (unsigned ii=0; iigamma = 1.0 - 2.0*rngSample(seed); + double sine_gamma = sqrt((1.0 - (gamma*gamma))); + double phi = PhysicalConstants::_pi*(2.0*rngSample(seed) - 1.0); + + this->alpha = sine_gamma * cos(phi); + this->beta = sine_gamma * sin(phi); +} diff --git a/src/DirectionCosine.hh b/src/DirectionCosine.hh index e7dd7363..42545f74 100644 --- a/src/DirectionCosine.hh +++ b/src/DirectionCosine.hh @@ -1,6 +1,52 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef DIRECTION_COSINE_INCLUDE #define DIRECTION_COSINE_INCLUDE +#include #include #include "portability.hh" #include "DeclareMacro.hh" @@ -9,49 +55,48 @@ HOST_DEVICE_CLASS class DirectionCosine { public: - double alpha; - double beta; - double gamma; - - HOST_DEVICE_CUDA - DirectionCosine(); - - HOST_DEVICE_CUDA - DirectionCosine(double alpha, double beta, double gamma); - - HOST_DEVICE_CUDA - DirectionCosine &operator=(const DirectionCosine &dc) - { - alpha = dc.alpha; - beta = dc.beta; - gamma = dc.gamma; - return *this; + double alpha; + double beta; + double gamma; + + HOST_DEVICE_SYCL + DirectionCosine(); + + HOST_DEVICE_SYCL + DirectionCosine(double alpha, double beta, double gamma); + + HOST_DEVICE_SYCL + DirectionCosine &operator=(const DirectionCosine &dc) + { + alpha = dc.alpha; + beta = dc.beta; + gamma = dc.gamma; + return *this; } - void Sample_Isotropic(uint64_t *seed); + void Sample_Isotropic(uint64_t *seed); - // rotate a direction cosine given the sine/cosine of theta and phi - HOST_DEVICE_CUDA - inline void Rotate3DVector( double sine_Theta, + // rotate a direction cosine given the sine/cosine of theta and phi + HOST_DEVICE_SYCL + inline void Rotate3DVector(double sine_Theta, double cosine_Theta, double sine_Phi, - double cosine_Phi ); - + double cosine_Phi); }; HOST_DEVICE_END HOST_DEVICE inline DirectionCosine::DirectionCosine() - : alpha(0.0), beta(0.0), gamma(0.0) + : alpha(0.0), beta(0.0), gamma(0.0) { } HOST_DEVICE_END HOST_DEVICE inline DirectionCosine::DirectionCosine(double a_alpha, double a_beta, double a_gamma) - : alpha(a_alpha), - beta(a_beta), - gamma(a_gamma) + : alpha(a_alpha), + beta(a_beta), + gamma(a_gamma) { } HOST_DEVICE_END @@ -124,7 +169,7 @@ inline void DirectionCosine::Rotate3DVector(double sin_Theta, double cos_Theta, { // Calculate additional variables in the rotation matrix. double cos_theta = this->gamma; - double sin_theta = sqrt((1.0 - (cos_theta*cos_theta))); + double sin_theta = sycl::sqrt((1.0 - (cos_theta * cos_theta))); double cos_phi; double sin_phi; @@ -135,14 +180,14 @@ inline void DirectionCosine::Rotate3DVector(double sin_Theta, double cos_Theta, } else { - cos_phi = this->alpha/sin_theta; - sin_phi = this->beta/sin_theta; + cos_phi = this->alpha / sin_theta; + sin_phi = this->beta / sin_theta; } // Calculate the rotated direction cosine - this->alpha = cos_theta*cos_phi*(sin_Theta*cos_Phi) - sin_phi*(sin_Theta*sin_Phi) + sin_theta*cos_phi*cos_Theta; - this->beta = cos_theta*sin_phi*(sin_Theta*cos_Phi) + cos_phi*(sin_Theta*sin_Phi) + sin_theta*sin_phi*cos_Theta; - this->gamma = -sin_theta *(sin_Theta*cos_Phi) + cos_theta *cos_Theta; + this->alpha = cos_theta * cos_phi * (sin_Theta * cos_Phi) - sin_phi * (sin_Theta * sin_Phi) + sin_theta * cos_phi * cos_Theta; + this->beta = cos_theta * sin_phi * (sin_Theta * cos_Phi) + cos_phi * (sin_Theta * sin_Phi) + sin_theta * sin_phi * cos_Theta; + this->gamma = -sin_theta * (sin_Theta * cos_Phi) + cos_theta * cos_Theta; } HOST_DEVICE_END diff --git a/src/EnergySpectrum.cc b/src/EnergySpectrum.cc index 553b3fe5..4cf345a0 100644 --- a/src/EnergySpectrum.cc +++ b/src/EnergySpectrum.cc @@ -1,3 +1,18 @@ +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + #include "EnergySpectrum.hh" #include "MonteCarlo.hh" #include "ParticleVault.hh" @@ -50,11 +65,12 @@ void EnergySpectrum::PrintSpectrum(MonteCarlo* monteCarlo) FILE* spectrumFile; spectrumFile = fopen( _fileName.c_str(), "w" ); - for( int ii = 0; ii < count; ii++ ) + for( int ii = 0; ii < 230; ii++ ) { - fprintf( spectrumFile, "%d\t%g\t%" PRIu64 "\n", ii, monteCarlo->_nuclearData->_energies[ii], sumHist[ii] ); + fprintf( spectrumFile, "%d\t%g\t%lu\n", ii, monteCarlo->_nuclearData->_energies[ii], sumHist[ii] ); } fclose( spectrumFile ); } + delete []sumHist; } diff --git a/src/EnergySpectrum.hh b/src/EnergySpectrum.hh index 0f7d1790..4c565fef 100644 --- a/src/EnergySpectrum.hh +++ b/src/EnergySpectrum.hh @@ -2,20 +2,20 @@ #define ENERGYSPECTRUM_HH #include #include +#include class MonteCarlo; class EnergySpectrum { - public: - EnergySpectrum(std::string name, uint64_t size) : _fileName(name), _censusEnergySpectrum(size,0) {}; - void UpdateSpectrum(MonteCarlo* monteCarlo); - void PrintSpectrum(MonteCarlo* monteCarlo); +public: + EnergySpectrum(std::string name, uint64_t size) : _fileName(std::move(name)), _censusEnergySpectrum(size, 0){}; + void UpdateSpectrum(MonteCarlo *monteCarlo); + void PrintSpectrum(MonteCarlo *monteCarlo); - private: - std::string _fileName; - std::vector _censusEnergySpectrum; +private: + std::string _fileName; + std::vector _censusEnergySpectrum; }; #endif - diff --git a/src/FacetPair.hh b/src/FacetPair.hh index 051202ee..98702e98 100644 --- a/src/FacetPair.hh +++ b/src/FacetPair.hh @@ -6,7 +6,17 @@ class FacetPair { public: - FacetPair(){}; + FacetPair() + : _domainGid1(0), + _domainIndex1(0), + _facetIndex1(0), + _cellIndex1(0), + _domainGid2(0), + _domainIndex2(0), + _facetIndex2(0), + _cellIndex2(0) + {}; + FacetPair(int domainGid1, const MC_Location& location1, int domainGid2, const MC_Location& location2) : _domainGid1(domainGid1), diff --git a/src/GlobalFccGrid.cc.dp.cpp b/src/GlobalFccGrid.cc.dp.cpp new file mode 100644 index 00000000..2afc398f --- /dev/null +++ b/src/GlobalFccGrid.cc.dp.cpp @@ -0,0 +1,189 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +#include +#include "GlobalFccGrid.hh" +#include +#include +#include "MC_Vector.hh" +#include "Tuple.hh" +#include + +using std::vector; +using sycl::max; +using sycl::min; + +namespace +{ + const vector &getFaceTupleOffset(); +} + +GlobalFccGrid::GlobalFccGrid(int nx, int ny, int nz, + double lx, double ly, double lz) + : _nx(nx), _ny(ny), _nz(nz), + _lx(lx), _ly(ly), _lz(lz), + _cellTupleToIndex(nx, ny, nz), + _cellIndexToTuple(nx, ny, nz), + _nodeTupleToIndex(nx + 1, ny + 1, nz + 1, 4), + _nodeIndexToTuple(nx + 1, ny + 1, nz + 1, 4) +{ + _dx = _lx / _nx; + _dy = _ly / _ny; + _dz = _lz / _nz; +} + +Long64 GlobalFccGrid::whichCell(const MC_Vector &r) const +{ + int ix = r.x / _dx; + int iy = r.y / _dy; + int iz = r.z / _dz; + return _cellTupleToIndex(ix, iy, iz); +} + +MC_Vector GlobalFccGrid::cellCenter(Long64 iCell) const +{ + Tuple tt = _cellIndexToTuple(iCell); + MC_Vector r = nodeCoord(Tuple4(tt.x(), tt.y(), tt.z(), 0)); + r += MC_Vector(_dx / 2., _dy / 2., _dz / 2.); + return r; +} + +const vector &GlobalFccGrid::cornerTupleOffsets() const +{ + static vector offset; + if (offset.size() == 0) + { + offset.reserve(14); + offset.push_back(Tuple4(0, 0, 0, 0)); // 0 + offset.push_back(Tuple4(1, 0, 0, 0)); // 1 + offset.push_back(Tuple4(0, 1, 0, 0)); // 2 + offset.push_back(Tuple4(1, 1, 0, 0)); // 3 + offset.push_back(Tuple4(0, 0, 1, 0)); // 4 + offset.push_back(Tuple4(1, 0, 1, 0)); // 5 + offset.push_back(Tuple4(0, 1, 1, 0)); // 6 + offset.push_back(Tuple4(1, 1, 1, 0)); // 7 + offset.push_back(Tuple4(1, 0, 0, 1)); // 8 + offset.push_back(Tuple4(0, 0, 0, 1)); // 9 + offset.push_back(Tuple4(0, 1, 0, 2)); // 10 + offset.push_back(Tuple4(0, 0, 0, 2)); // 11 + offset.push_back(Tuple4(0, 0, 1, 3)); // 12 + offset.push_back(Tuple4(0, 0, 0, 3)); // 13 + } + return offset; +} + +void GlobalFccGrid::getNodeGids(Long64 cellGid, vector &nodeGid) const +{ + if (nodeGid.size() == 0) + { + nodeGid.resize(14); + } + + Tuple tt = _cellIndexToTuple(cellGid); + Tuple4 baseNodeTuple = Tuple4(tt.x(), tt.y(), tt.z(), 0); + const vector &cornerTupleOffset = cornerTupleOffsets(); + for (unsigned ii = 0; ii < 14; ++ii) + nodeGid[ii] = _nodeTupleToIndex(baseNodeTuple + cornerTupleOffset[ii]); +} + +// for faces on the outer surface of the global grid, the returned cell +// gid will be the same as the input cellGid +void GlobalFccGrid::getFaceNbrGids(Long64 cellGid, vector &nbrCellGid) const +{ + if (nbrCellGid.size() == 0) + { + nbrCellGid.resize(6); + } + + Tuple cellTuple = _cellIndexToTuple(cellGid); + const vector &faceTupleOffset = getFaceTupleOffset(); + + for (unsigned ii = 0; ii < 6; ++ii) + { + Tuple faceNbr = cellTuple + faceTupleOffset[ii]; + snapTuple(faceNbr); + nbrCellGid[ii] = _cellTupleToIndex(faceNbr); + } +} + +MC_Vector GlobalFccGrid::nodeCoord(Long64 index) const +{ + return nodeCoord(_nodeIndexToTuple(index)); +} + +MC_Vector GlobalFccGrid::nodeCoord(const Tuple4 &tt) const +{ + vector basisOffset; + basisOffset.reserve(4); + if (basisOffset.size() == 0) + { + basisOffset.push_back(MC_Vector(0., 0., 0.)); + basisOffset.push_back(MC_Vector(0., _dy / 2.0, _dz / 2.0)); + basisOffset.push_back(MC_Vector(_dx / 2.0, 0., _dz / 2.0)); + basisOffset.push_back(MC_Vector(_dx / 2.0, _dy / 2.0, 0.)); + } + + double rx = tt.x() * _dx; + double ry = tt.y() * _dy; + double rz = tt.z() * _dz; + + MC_Vector rr = MC_Vector(rx, ry, rz) + basisOffset[tt.b()]; + + return rr; +} + +void GlobalFccGrid::snapTuple(Tuple &tt) const +{ + tt.x() = min(max(0, tt.x()), _nx - 1); + tt.y() = min(max(0, tt.y()), _ny - 1); + tt.z() = min(max(0, tt.z()), _nz - 1); +} + +namespace +{ + const vector &getFaceTupleOffset() + { + static vector faceTupleOffset; + + if (faceTupleOffset.size() == 0) + { + faceTupleOffset.reserve(6); + faceTupleOffset.push_back(Tuple(1, 0, 0)); + faceTupleOffset.push_back(Tuple(-1, 0, 0)); + faceTupleOffset.push_back(Tuple(0, 1, 0)); + faceTupleOffset.push_back(Tuple(0, -1, 0)); + faceTupleOffset.push_back(Tuple(0, 0, 1)); + faceTupleOffset.push_back(Tuple(0, 0, -1)); + } + + return faceTupleOffset; + } +} diff --git a/src/GridAssignmentObject.cc.dp.cpp b/src/GridAssignmentObject.cc.dp.cpp new file mode 100644 index 00000000..b7ba7388 --- /dev/null +++ b/src/GridAssignmentObject.cc.dp.cpp @@ -0,0 +1,241 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +#include +#include "GridAssignmentObject.hh" +#include +#include +#include +#include "qs_assert.hh" +#include + +#define DIFFSQ(a, b) (MC_Vector(a - b).Dot(MC_Vector(a - b))) + +using std::queue; +using std::vector; +using sycl::max; +using sycl::min; + +/** The present implementation of GridAssignmentObject is judged to be + * sufficiently fast to meet the needs of initial assignment of + * particles to domains. The best way to speed up the code would be to + * more strictly limit the number of cells that are flooded by + * implementing an improved distance calculation in minDist2. + * + * The next best optimization possibility probably involves reducing + * the number of indexToTuple and tupleToIndex calculations (probably + * at the expense of a higher memory footprint. + */ + +GridAssignmentObject::GridAssignmentObject(const vector ¢ers) + : _centers(centers) +{ + // This sets the length scale of the grid cells. The value 5 is + // pretty arbitrary. It could just as easily be 1 or 10. If + // necessary it could be made a parameter that is wired out to the + // input deck. + int centersPerCell = 5; + + MC_Vector minCoord = _centers[0]; + MC_Vector maxCoord = _centers[0]; + for (int ii = 1; ii < _centers.size(); ++ii) + { + const MC_Vector &iCenter = _centers[ii]; + minCoord.x = fmin(minCoord.x, iCenter.x); + minCoord.y = fmin(minCoord.y, iCenter.y); + minCoord.z = fmin(minCoord.z, iCenter.z); + maxCoord.x = fmax(maxCoord.x, iCenter.x); + maxCoord.y = fmax(maxCoord.y, iCenter.y); + maxCoord.z = fmax(maxCoord.z, iCenter.z); + } + _corner = minCoord; + + // It is possible that all of the centers lie on the x-, y-, or + // z-plane. If so, arbitrarily set the length in that direction to + // 1. + double lx = max(1., (maxCoord.x - minCoord.x)); + double ly = max(1., (maxCoord.y - minCoord.y)); + double lz = max(1., (maxCoord.z - minCoord.z)); + + double x = _centers.size() / centersPerCell / (lx * ly * lz); + x = pow(x, 1.0 / 3.0); + _nx = max(1., sycl::floor(x * lx)); + _ny = max(1., sycl::floor(x * ly)); + _nz = max(1., sycl::floor(x * lz)); + _dx = lx / _nx; + _dy = ly / _ny; + _dz = lz / _nz; + + int nCells = _nx * _ny * _nz; + + _grid.resize(nCells); + + for (int ii = 0; ii < _centers.size(); ++ii) + { + int iCell = whichCell(_centers[ii]); + _grid[iCell]._myCenters.push_back(ii); + } +} + +int GridAssignmentObject::nearestCenter(const MC_Vector r) +{ + double r2Min = 1e300; + int minCenter = -1; + + addTupleToQueue(whichCellTuple(r)); + + while (_floodQueue.size() > 0) + { + // pop the next cell to check + int iCell = _floodQueue.front(); + _floodQueue.pop(); + // if cell is too far away to bother continue. + if (minDist2(r, iCell) > r2Min) + continue; + // check all centers in this cell + for (int ii = 0; ii < _grid[iCell]._myCenters.size(); ++ii) + { + int iCenter = _grid[iCell]._myCenters[ii]; + + const MC_Vector &rCenter = _centers[iCenter]; + double r2 = DIFFSQ(r, rCenter); + if (r2 == r2Min) + minCenter = std::min(minCenter, iCenter); + if (r2 < r2Min) + { + r2Min = r2; + minCenter = iCenter; + } + } + // push any unused nbrs to queue. Mark as used. + addNbrsToQueue(iCell); + } + + while (_wetList.size() > 0) + { + _grid[_wetList.front()]._burned = false; + _wetList.pop(); + } + + qs_assert(minCenter >= 0); + return minCenter; +} + +Tuple GridAssignmentObject::whichCellTuple(const MC_Vector r) const +{ + int ix = (r.x - _corner.x) / _dx; + int iy = (r.y - _corner.y) / _dy; + int iz = (r.z - _corner.z) / _dz; + ix = max(0, ix); + iy = max(0, iy); + iz = max(0, iz); + ix = std::min(_nx - 1, ix); + iy = std::min(_ny - 1, iy); + iz = std::min(_nz - 1, iz); + + return Tuple(ix, iy, iz); +} + +int GridAssignmentObject::whichCell(const MC_Vector r) const +{ + return tupleToIndex(whichCellTuple(r)); +} + +int GridAssignmentObject::tupleToIndex(Tuple tuple) const +{ + return tuple.x() + _nx * (tuple.y() + _ny * tuple.z()); +} + +Tuple GridAssignmentObject::indexToTuple(int index) const +{ + int ix = index % _nx; + index /= _nx; + int iy = index % _ny; + int iz = index / _ny; + return Tuple(ix, iy, iz); +} + +/** Finds a lower bound of the squared distance from the point r to the + * cell with index iCell. As presently implemented this calculation is + * very conservative. We could set a larger lower bound by considering + * the location of the particle within the cell in which it lies. */ +double GridAssignmentObject::minDist2(const MC_Vector r, int iCell) const +{ + Tuple ir = whichCellTuple(r); + Tuple iTuple = indexToTuple(iCell); + + double rx = _dx * (abs(iTuple.x() - ir.x()) - 1); + rx = max(0., rx); + double ry = _dy * (abs(iTuple.y() - ir.y()) - 1); + ry = max(0., ry); + double rz = _dz * (abs(iTuple.z() - ir.z()) - 1); + rz = max(0., rz); + + return rx * rx + ry * ry + rz * rz; +} + +void GridAssignmentObject::addTupleToQueue(Tuple iTuple) +{ + int index = tupleToIndex(iTuple); + if (_grid[index]._burned) + return; + _floodQueue.push(index); + _wetList.push(index); + _grid[index]._burned = true; +} + +void GridAssignmentObject::addNbrsToQueue(int iCell) +{ + Tuple iTuple = indexToTuple(iCell); + iTuple.x() += 1; + if (iTuple.x() < _nx) + addTupleToQueue(iTuple); + iTuple.x() -= 2; + if (iTuple.x() >= 0) + addTupleToQueue(iTuple); + iTuple.x() += 1; + + iTuple.y() += 1; + if (iTuple.y() < _ny) + addTupleToQueue(iTuple); + iTuple.y() -= 2; + if (iTuple.y() >= 0) + addTupleToQueue(iTuple); + iTuple.y() += 1; + + iTuple.z() += 1; + if (iTuple.z() < _nz) + addTupleToQueue(iTuple); + iTuple.z() -= 2; + if (iTuple.z() >= 0) + addTupleToQueue(iTuple); + iTuple.z() += 1; +} diff --git a/src/MCT.hh b/src/MCT.hh index e6852acd..380db87e 100644 --- a/src/MCT.hh +++ b/src/MCT.hh @@ -1,11 +1,69 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MCT_HH #define MCT_HH #include "portability.hh" #include "DeclareMacro.hh" +#include "Globals.hh" +#include "MonteCarlo.hh" +#include "MC_Nearest_Facet.hh" +#include "MC_Particle.hh" +#include "MC_Domain.hh" +#include "MC_Location.hh" +#include "DirectionCosine.hh" +#include "MC_Distance_To_Facet.hh" +#include "MC_RNG_State.hh" +#include "PhysicalConstants.hh" +#include "DeclareMacro.hh" + class MC_Particle; class MC_Domain; +class MC_Domain_d; class MC_Location; class MC_Vector; class DirectionCosine; @@ -13,41 +71,738 @@ class MC_Nearest_Facet; class Subfacet_Adjacency; class MonteCarlo; - HOST_DEVICE MC_Nearest_Facet MCT_Nearest_Facet( - MC_Particle *mc_particle, - MC_Location &location, - MC_Vector &coordinate, - const DirectionCosine *direction_cosine, - double distance_threshold, - double current_best_distance, - bool new_segment, - MonteCarlo* monteCarlo); + MC_Particle *mc_particle, + MC_Location &location, + MC_Vector &coordinate, + const DirectionCosine *direction_cosine, + double distance_threshold, + double current_best_distance, + bool new_segment, + MonteCarlo *monteCarlo); HOST_DEVICE_END - HOST_DEVICE void MCT_Generate_Coordinate_3D_G( - uint64_t *random_number_seed, - int domain_num, - int cell, - MC_Vector &coordinate, - MonteCarlo* monteCarlo); + uint64_t *random_number_seed, + int domain_num, + int cell, + MC_Vector &coordinate, + MonteCarlo *monteCarlo); HOST_DEVICE_END +template HOST_DEVICE -MC_Vector MCT_Cell_Position_3D_G( - const MC_Domain &domain, - int cell_index); + MC_Vector + MCT_Cell_Position_3D_G( + const T &domain, + int cell_index); HOST_DEVICE_END -HOST_DEVICE -Subfacet_Adjacency &MCT_Adjacent_Facet(const MC_Location &location, MC_Particle &mc_particle, MonteCarlo* monteCarlo); +inline HOST_DEVICE + + Subfacet_Adjacency & + MCT_Adjacent_Facet(const MC_Location &location, MC_Particle &mc_particle, MonteCarlo *monteCarlo) + +{ +#ifdef __SYCL_DEVICE_ONLY__ + MC_Domain_d &domain = monteCarlo->domain_d[location.domain]; +#else + MC_Domain &domain = monteCarlo->domain[location.domain]; +#endif + Subfacet_Adjacency &adjacency = domain.mesh._cellConnectivity[location.cell]._facet[location.facet].subfacet; + + return adjacency; +} + HOST_DEVICE_END HOST_DEVICE void MCT_Reflect_Particle(MonteCarlo *mcco, MC_Particle &particle); HOST_DEVICE_END +namespace +{ + template + HOST_DEVICE + MC_Nearest_Facet MCT_Nearest_Facet_3D_G( + MC_Particle *mc_particle, + T &domain, + MC_Location &location, + MC_Vector &coordinate, + const DirectionCosine *direction_cosine); + HOST_DEVICE_END + + HOST_DEVICE_SYCL + double MCT_Cell_Volume_3D_G_vector_tetDet(const MC_Vector &v0_, + const MC_Vector &v1_, + const MC_Vector &v2_, + const MC_Vector &v3); + + template + HOST_DEVICE_SYCL + void MCT_Nearest_Facet_3D_G_Move_Particle( + T &domain, // input: domain + const MC_Location &location, + MC_Vector &coordinate, // input/output: move this coordinate + double move_factor); // input: multiplication factor for move + + HOST_DEVICE_SYCL + MC_Nearest_Facet MCT_Nearest_Facet_Find_Nearest( + int num_facets_per_cell, + MC_Distance_To_Facet *distance_to_facet); + + template + HOST_DEVICE_SYCL + MC_Nearest_Facet MCT_Nearest_Facet_Find_Nearest( + MC_Particle *mc_particle, + T *domain, + MC_Location *location, + MC_Vector &coordinate, + int &iteration, // input/output + double &move_factor, // input/output + int num_facets_per_cell, + MC_Distance_To_Facet *distance_to_facet, + int &retry /* output */ ); + + template + HOST_DEVICE_SYCL + void MCT_Facet_Points_3D_G( + const T &domain, // input + int cell, // input + int facet, // input + int num_points_per_facet, // input + int *facet_points /* output */); + + HOST_DEVICE_SYCL + double MCT_Nearest_Facet_3D_G_Distance_To_Segment( + double plane_tolerance, + double facet_normal_dot_direction_cosine, + double A, double B, double C, double D, + const MC_Vector &facet_coords0, + const MC_Vector &facet_coords1, + const MC_Vector &facet_coords2, + const MC_Vector &coordinate, + const DirectionCosine *direction_cosine, + bool allow_enter); + +} + +/// Calculates the nearest facet of the specified cell to the +/// specified coordinates. +/// +/// \return The minimum distance and facet number. + +inline HOST_DEVICE +MC_Nearest_Facet MCT_Nearest_Facet(MC_Particle *mc_particle, + MC_Location &location, + MC_Vector &coordinate, + const DirectionCosine *direction_cosine, + double distance_threshold, + double current_best_distance, + bool new_segment, + MonteCarlo* monteCarlo ) +{ +// #ifndef BCMN_HAVE_OPENMP +// MC_FASTTIMER_START(MC_Fast_Timer::Nearest_Facet); +// #endif +// + + if (location.domain < 0 || location.cell < 0) + { + qs_assert(false); +// std::string output_string; +// mc_particle->Copy_Particle_To_String(output_string); +// MC_Fatal_Jump( "Bad location value. region: %d domain: %d, cell: %d.\nParticle record\n%s\n", +// location.region, location.domain, location.cell, output_string.c_str()); + } + #ifdef __SYCL_DEVICE_ONLY__ + MC_Domain_d &domain = monteCarlo->domain_d[location.domain]; + #else + MC_Domain &domain = monteCarlo->domain[location.domain]; + #endif + + MC_Nearest_Facet nearest_facet = + MCT_Nearest_Facet_3D_G(mc_particle, domain, location, coordinate, direction_cosine); + + if (nearest_facet.distance_to_facet < 0) { nearest_facet.distance_to_facet = 0; } + + if (nearest_facet.distance_to_facet >= PhysicalConstants::_hugeDouble) + { + qs_assert(false); +// MC_Warning( "Infinite distance (cell not bound) for location [Reg:%d Local Dom:%d " +// "Global Dom: %d Cell:%d Fac:%d], coordinate (%g %g %g) and direction (%g %g %g).\n", +// location.region, location.domain, +// mcco->region->Global_Domain_Number(location.region, location.domain), +// location.cell, location.facet, +// coordinate.x, coordinate.y, coordinate.z, +// direction_cosine->alpha, direction_cosine->beta, direction_cosine->gamma); +// if ( mc_particle ) +// { +// MC_Warning( "mc_particle.identifier %" PRIu64 "\n", mc_particle->identifier ); +// } + } + +// #ifndef BCMN_HAVE_OPENMP +// MC_FASTTIMER_STOP(MC_Fast_Timer::Nearest_Facet); +// #endif + + return nearest_facet; +} // End MCT_Nearest_Facet + +HOST_DEVICE_END + +/// Generates a random coordinate inside a polyhedral cell. + inline HOST_DEVICE_SYCL +void MCT_Generate_Coordinate_3D_G(uint64_t *random_number_seed, + int domain_num, + int cell, + MC_Vector &coordinate, + MonteCarlo* monteCarlo ) +{ + #ifdef __SYCL_DEVICE_ONLY__ + const MC_Domain_d &domain = monteCarlo->domain_d[domain_num]; + #else + const MC_Domain &domain = monteCarlo->domain[domain_num]; + #endif + + // Determine the cell-center nodal point coordinates. + MC_Vector center = MCT_Cell_Position_3D_G(domain, cell); + + int num_facets = domain.mesh._cellConnectivity[cell].num_facets; + if (num_facets == 0) + { + coordinate.x = coordinate.y = coordinate.z = 0; + return; + } + + double random_number = rngSample(random_number_seed); + double which_volume = random_number * 6.0 * domain.cell_state[cell]._volume; + + // Find the tet to sample from. + double current_volume = 0.0; + int facet_index = -1; + const MC_Vector *point0 = NULL; + const MC_Vector *point1 = NULL; + const MC_Vector *point2 = NULL; + while (current_volume < which_volume) + { + facet_index++; + + if (facet_index == num_facets) { break; } + + int facet_points[3]; + MCT_Facet_Points_3D_G(domain, cell, facet_index, 3, facet_points); + point0 = &domain.mesh._node[facet_points[0]]; + point1 = &domain.mesh._node[facet_points[1]]; + point2 = &domain.mesh._node[facet_points[2]]; + + double subvolume = MCT_Cell_Volume_3D_G_vector_tetDet(*point0, *point1, *point2, center); + current_volume += subvolume; + + } + + // Sample from the tet. + double r1 = rngSample(random_number_seed); + double r2 = rngSample(random_number_seed); + double r3 = rngSample(random_number_seed); + + // Cut and fold cube into prism. + if (r1 + r2 > 1.0) + { + r1 = 1.0 - r1; + r2 = 1.0 - r2; + } + // Cut and fold prism into tetrahedron. + if (r2 + r3 > 1.0) + { + double tmp = r3; + r3 = 1.0 - r1 - r2; + r2 = 1.0 - tmp; + } + else if (r1 + r2 + r3 > 1.0) + { + double tmp = r3; + r3 = r1 + r2 + r3 - 1.0; + r1 = 1.0 - r2 - tmp; + } + + // numbers 1-4 are the barycentric coordinates of the random point. + double r4 = 1.0 - r1 - r2 - r3; + + // error check + if ((point0 == NULL) || (point1 == NULL) || (point2 == NULL)) + { + MC_Fatal_Jump( "Programmer Error: points must not be NULL: point0=%p point1=%p point2=%p", + point0, point1, point2); + return; + } + + coordinate.x = ( r4 * center.x + r1 * point0->x + r2 * point1->x + r3 * point2->x ); + coordinate.y = ( r4 * center.y + r1 * point0->y + r2 * point1->y + r3 * point2->y ); + coordinate.z = ( r4 * center.z + r1 * point0->z + r2 * point1->z + r3 * point2->z ); +} + + +/// Returns a coordinate that represents the "center" of the cell. +template + inline HOST_DEVICE_SYCL +MC_Vector MCT_Cell_Position_3D_G(const T &domain, + int cell_index) +{ + MC_Vector coordinate; + + int num_points = domain.mesh._cellConnectivity[cell_index].num_points; + + for (int point_index = 0; point_index < num_points; point_index++) + { + int point = domain.mesh._cellConnectivity[cell_index]._point[point_index]; + + coordinate.x += domain.mesh._node[point].x; + coordinate.y += domain.mesh._node[point].y; + coordinate.z += domain.mesh._node[point].z; + } + + double one_over_num_points = 1.0 / ((double)num_points); + coordinate.x *= one_over_num_points; + coordinate.y *= one_over_num_points; + coordinate.z *= one_over_num_points; + + return coordinate; +} + + +namespace +{ + /// Fills in the facet_points array with the domain local point + /// numbers specified by the cell number and cell-local facet number + /// for a 3DG mesh. + template + inline HOST_DEVICE_SYCL + void MCT_Facet_Points_3D_G(const T &domain, // input + int cell, // input + int facet, // input + int num_points_per_facet, // input + int *facet_points /* output */) + { + // Determine the domain local points of the facet in the cell for the 2DG or 3DG mesh. + for (int point_index = 0; point_index < num_points_per_facet; point_index++ ) + facet_points[point_index] = domain.mesh._cellConnectivity[cell]._facet[facet].point[point_index]; + } +} + +namespace +{ + /// Calculates the distance from the specified coordinates to the + /// input segment. This is used to track to the faces of a 3D_G + /// mesh. + inline HOST_DEVICE_SYCL + double MCT_Nearest_Facet_3D_G_Distance_To_Segment(double plane_tolerance, + double facet_normal_dot_direction_cosine, + double A, double B, double C, double D, + const MC_Vector &facet_coords0, + const MC_Vector &facet_coords1, + const MC_Vector &facet_coords2, + const MC_Vector &coordinate, + const DirectionCosine *direction_cosine, + bool allow_enter) + { + double boundingBox_tolerance = 1e-9; + double numerator = -1.0*(A * coordinate.x + + B * coordinate.y + + C * coordinate.z + + D); + + /* Plane equation: numerator = -P(x,y,z) = -(Ax + By + Cz + D) + if: numerator < -1e-8*length(x,y,z) too negative! + if: numerator < 0 && numerator^2 > ( 1e-8*length(x,y,z) )^2 too negative! + reverse inequality since squaring function is decreasing for negative inputs. + If numerator is just SLIGHTLY negative, then the particle is just outside of the face */ + + // Filter out too negative distances + if (!allow_enter && numerator < 0.0 && numerator * numerator > plane_tolerance) { + return PhysicalConstants::_hugeDouble; } + + // we have to restrict the solution to within the triangular face + double distance = numerator / facet_normal_dot_direction_cosine; + + // see if the intersection point of the ray and the plane is within the triangular facet + MC_Vector intersection_pt; + intersection_pt.x = coordinate.x + distance * direction_cosine->alpha; + intersection_pt.y = coordinate.y + distance * direction_cosine->beta; + intersection_pt.z = coordinate.z + distance * direction_cosine->gamma; + + // if the point is completely below the triangle, it is not in the triangle +#define IF_POINT_BELOW_CONTINUE(axis) \ + if ( facet_coords0.axis > intersection_pt.axis + boundingBox_tolerance&& \ + facet_coords1.axis > intersection_pt.axis + boundingBox_tolerance && \ + facet_coords2.axis > intersection_pt.axis + boundingBox_tolerance ) { return PhysicalConstants::_hugeDouble; } + +#define IF_POINT_ABOVE_CONTINUE(axis) \ + if ( facet_coords0.axis < intersection_pt.axis - boundingBox_tolerance && \ + facet_coords1.axis < intersection_pt.axis - boundingBox_tolerance && \ + facet_coords2.axis < intersection_pt.axis - boundingBox_tolerance ) { return PhysicalConstants::_hugeDouble; } + + // Is the intersection point inside the triangular facet? Project to 2D and see. + + // A^2 + B^2 + C^2 = 1, so max(|A|,|B|,|C|) >= 1/sqrt(3) = 0.577 + // (all coefficients can't be small) + double cross0 = 0, cross1 = 0, cross2 = 0; + if ( C < -0.5 || C > 0.5 ) + { + IF_POINT_BELOW_CONTINUE(x); + IF_POINT_ABOVE_CONTINUE(x); + IF_POINT_BELOW_CONTINUE(y); + IF_POINT_ABOVE_CONTINUE(y); + +#define AB_CROSS_AC(ax,ay,bx,by,cx,cy) ( (bx-ax)*(cy-ay) - (by-ay)*(cx-ax) ) + + cross1 = AB_CROSS_AC(facet_coords0.x, facet_coords0.y, + facet_coords1.x, facet_coords1.y, + intersection_pt.x, intersection_pt.y); + cross2 = AB_CROSS_AC(facet_coords1.x, facet_coords1.y, + facet_coords2.x, facet_coords2.y, + intersection_pt.x, intersection_pt.y); + cross0 = AB_CROSS_AC(facet_coords2.x, facet_coords2.y, + facet_coords0.x, facet_coords0.y, + intersection_pt.x, intersection_pt.y); + + } + else if ( B < -0.5 || B > 0.5 ) + { + IF_POINT_BELOW_CONTINUE(x); + IF_POINT_ABOVE_CONTINUE(x); + IF_POINT_BELOW_CONTINUE(z); + IF_POINT_ABOVE_CONTINUE(z); + + cross1 = AB_CROSS_AC(facet_coords0.z, facet_coords0.x, + facet_coords1.z, facet_coords1.x, + intersection_pt.z, intersection_pt.x); + cross2 = AB_CROSS_AC(facet_coords1.z, facet_coords1.x, + facet_coords2.z, facet_coords2.x, + intersection_pt.z, intersection_pt.x); + cross0 = AB_CROSS_AC(facet_coords2.z, facet_coords2.x, + facet_coords0.z, facet_coords0.x, + intersection_pt.z, intersection_pt.x); + + } + else if ( A < -0.5 || A > 0.5 ) + { + IF_POINT_BELOW_CONTINUE(z); + IF_POINT_ABOVE_CONTINUE(z); + IF_POINT_BELOW_CONTINUE(y); + IF_POINT_ABOVE_CONTINUE(y); + + cross1 = AB_CROSS_AC(facet_coords0.y, facet_coords0.z, + facet_coords1.y, facet_coords1.z, + intersection_pt.y, intersection_pt.z); + cross2 = AB_CROSS_AC(facet_coords1.y, facet_coords1.z, + facet_coords2.y, facet_coords2.z, + intersection_pt.y, intersection_pt.z); + cross0 = AB_CROSS_AC(facet_coords2.y, facet_coords2.z, + facet_coords0.y, facet_coords0.z, + intersection_pt.y, intersection_pt.z); + } + + double cross_tol = 1e-9 * MC_FABS(cross0 + cross1 + cross2); // cross product tolerance + + if ( (cross0 > -cross_tol && cross1 > -cross_tol && cross2 > -cross_tol) || + (cross0 < cross_tol && cross1 < cross_tol && cross2 < cross_tol) ) + { + return distance; + } + return PhysicalConstants::_hugeDouble; + } +} + + +/// Reflects the particle off of a reflection boundary. +inline HOST_DEVICE +void MCT_Reflect_Particle(MonteCarlo *monteCarlo, MC_Particle &particle) +{ + DirectionCosine *direction_cosine = particle.Get_Direction_Cosine(); + MC_Location location = particle.Get_Location(); + + #ifdef __SYCL_DEVICE_ONLY__ + const MC_Domain_d &domain = location.get_domain_d(monteCarlo); + #else + const MC_Domain &domain = location.get_domain(monteCarlo); + #endif + const MC_General_Plane &plane = domain.mesh._cellGeometry[location.cell]._facet[location.facet]; + + MC_Vector facet_normal(plane.A, plane.B, plane.C); + + + double dot = 2.0*( direction_cosine->alpha * facet_normal.x + + direction_cosine->beta * facet_normal.y + + direction_cosine->gamma * facet_normal.z ); + + if ( dot > 0 ) // do not reflect a particle that is ALREADY pointing inward + { + // reflect the particle + direction_cosine->alpha -= dot * facet_normal.x; + direction_cosine->beta -= dot * facet_normal.y; + direction_cosine->gamma -= dot * facet_normal.z; + } + + // Calculate the reflected, velocity components. + double particle_speed = particle.velocity.Length(); + particle.velocity.x = particle_speed * particle.direction_cosine.alpha; + particle.velocity.y = particle_speed * particle.direction_cosine.beta; + particle.velocity.z = particle_speed * particle.direction_cosine.gamma; +} +HOST_DEVICE_END + +namespace +{ + /// Loop over all the facets, return the minimum distance. + inline HOST_DEVICE_SYCL + MC_Nearest_Facet MCT_Nearest_Facet_Find_Nearest(int num_facets_per_cell, + MC_Distance_To_Facet *distance_to_facet) + { + MC_Nearest_Facet nearest_facet; + + // largest negative distance (smallest magnitude, but negative) + MC_Nearest_Facet nearest_negative_facet; + nearest_negative_facet.distance_to_facet = -PhysicalConstants::_hugeDouble; + + // Determine the facet that is closest to the specified coordinates. + for (int facet_index = 0; facet_index < num_facets_per_cell; facet_index++) + { + if (distance_to_facet[facet_index].distance > 0.0) + { + if (distance_to_facet[facet_index].distance <= nearest_facet.distance_to_facet) + { + nearest_facet.distance_to_facet = distance_to_facet[facet_index].distance; + nearest_facet.facet = facet_index; + } + } + else // zero or negative distance + { + if (distance_to_facet[facet_index].distance > nearest_negative_facet.distance_to_facet) + { + // smallest in magnitude, but negative + nearest_negative_facet.distance_to_facet = distance_to_facet[facet_index].distance; + nearest_negative_facet.facet = facet_index; + } + } + } + + + if (nearest_facet.distance_to_facet == PhysicalConstants::_hugeDouble) + { + if (nearest_negative_facet.distance_to_facet != -PhysicalConstants::_hugeDouble) + { + // no positive solution, so allow a negative solution, that had really small magnitude. + nearest_facet.distance_to_facet = nearest_negative_facet.distance_to_facet; + nearest_facet.facet = nearest_negative_facet.facet; + } + } + + return nearest_facet; + } +} + + +namespace +{ + /// Loop over all the facets, return the minimum distance. + template + inline HOST_DEVICE_SYCL + MC_Nearest_Facet MCT_Nearest_Facet_Find_Nearest(MC_Particle *mc_particle, + T *domain, + MC_Location *location, + MC_Vector &coordinate, + int &iteration, // input/output + double &move_factor, // input/output + int num_facets_per_cell, + MC_Distance_To_Facet *distance_to_facet, + int &retry /* output */ ) + { + MC_Nearest_Facet nearest_facet = MCT_Nearest_Facet_Find_Nearest(num_facets_per_cell, distance_to_facet); + + const int max_allowed_segments = 10000000; + + retry = 0; + + if (mc_particle ) + { + if ((nearest_facet.distance_to_facet == PhysicalConstants::_hugeDouble && move_factor > 0) || + (mc_particle->num_segments > max_allowed_segments && nearest_facet.distance_to_facet <= 0.0 )) + { + // Could not find a solution, so move the particle towards the center of the cell + // and try again. + MCT_Nearest_Facet_3D_G_Move_Particle(*domain, *location, coordinate, move_factor); + + iteration++; + move_factor *= 2.0; + + if (move_factor > 1.0e-2) + move_factor = 1.0e-2; + + int max_iterations = 10000; + + if (iteration == max_iterations) + { + qs_assert(false); // If we start hitting this assertion we can + // come up with a better mitigation plan. - dfr + retry = 0; + } + else + retry = 1; + + // Allow the distance to the current facet + location->facet = -1; + } + } + return nearest_facet; + } +} + +namespace +{ + /// Calculates the distance from the specified coordinates to each + /// of the facets of the specified cell in a three-dimensional, + /// unstructured, hexahedral (Type 3D_G) domain, storing the minimum + /// distance and associated facet number. + + template + inline HOST_DEVICE + MC_Nearest_Facet MCT_Nearest_Facet_3D_G( + MC_Particle *mc_particle, + T &domain, + MC_Location &location, + MC_Vector &coordinate, + const DirectionCosine *direction_cosine) + { + // int my_task_num = mc_particle == NULL ? 0 : mc_particle->task; + MC_Vector *facet_coords[3]; + int iteration = 0; + double move_factor = 0.5 * PhysicalConstants::_smallDouble; + + // Initialize some data for the unstructured, hexahedral mesh. + int num_facets_per_cell = domain.mesh._cellConnectivity[location.cell].num_facets; + + while (true) // will break out when distance is found + { + // Determine the distance to each facet of the cell. + // (1e-8 * Radius)^2 + double plane_tolerance = 1e-16*(coordinate.x*coordinate.x + + coordinate.y*coordinate.y + + coordinate.z*coordinate.z); + + MC_Distance_To_Facet distance_to_facet[24]; + + for (int facet_index = 0; facet_index < num_facets_per_cell; facet_index++) + { +//to-do mcco->distance_to_facet->task[my_task_num].facet[facet_index].distance = PhysicalConstants::_hugeDouble; + distance_to_facet[facet_index].distance = PhysicalConstants::_hugeDouble; + + MC_General_Plane &plane = domain.mesh._cellGeometry[location.cell]._facet[facet_index]; + + double facet_normal_dot_direction_cosine = + (plane.A * direction_cosine->alpha + + plane.B * direction_cosine->beta + + plane.C * direction_cosine->gamma); + + // Consider only those facets whose outer normals have + // a positive dot product with the direction cosine. + // I.e. the particle is LEAVING the cell. + if (facet_normal_dot_direction_cosine <= 0.0) { continue; } + + /* profiling with gprof showed that putting a call to MC_Facet_Coordinates_3D_G + slowed down the code by about 10%, so we get the facet coords "by hand." */ + int *point = domain.mesh._cellConnectivity[location.cell]._facet[facet_index].point; + facet_coords[0] = &domain.mesh._node[point[0]]; + facet_coords[1] = &domain.mesh._node[point[1]]; + facet_coords[2] = &domain.mesh._node[point[2]]; + + double t = MCT_Nearest_Facet_3D_G_Distance_To_Segment( + plane_tolerance, + facet_normal_dot_direction_cosine, plane.A, plane.B, plane.C, plane.D, + *facet_coords[0], *facet_coords[1], *facet_coords[2], + coordinate, direction_cosine, false); + + //to-do mcco->distance_to_facet->task[my_task_num].facet[facet_index].distance = t; + distance_to_facet[facet_index].distance = t; + } // for facet_index + + int retry = 0; + + MC_Nearest_Facet nearest_facet = MCT_Nearest_Facet_Find_Nearest( + mc_particle, &domain, &location, coordinate, + iteration, move_factor, num_facets_per_cell, + //to-do mcco->distance_to_facet->task[my_task_num].facet, + distance_to_facet, + retry); + + + if (! retry) return nearest_facet; + } // while (true) + } // End MCT_Nearest_Facet_3D_G + + HOST_DEVICE_END + +} // anonymous namespace + +namespace +{ + /// \return 6 times the volume of the tet. + /// + /// subtract v3 from v0, v1 and v2. Then take the triple product of v0, v1 and v2. + inline HOST_DEVICE_SYCL + double MCT_Cell_Volume_3D_G_vector_tetDet(const MC_Vector &v0_, + const MC_Vector &v1_, + const MC_Vector &v2_, + const MC_Vector &v3) + { + MC_Vector v0(v0_), v1(v1_), v2(v2_); + + v0.x -= v3.x; v0.y -= v3.y; v0.z -= v3.z; + v1.x -= v3.x; v1.y -= v3.y; v1.z -= v3.z; + v2.x -= v3.x; v2.y -= v3.y; v2.z -= v3.z; + + return + v0.z*(v1.x*v2.y - v1.y*v2.x) + + v0.y*(v1.z*v2.x - v1.x*v2.z) + + v0.x*(v1.y*v2.z - v1.z*v2.y); + } +} + + +namespace +{ + /// Move the input particle by a small amount toward the center of the cell. + template + inline HOST_DEVICE_SYCL + void MCT_Nearest_Facet_3D_G_Move_Particle(T &domain, // input: domain + const MC_Location &location, + MC_Vector &coordinate, // input/output: move this coordinate + double move_factor) // input: multiplication factor for move + { + MC_Vector move_to = MCT_Cell_Position_3D_G(domain, location.cell); + + coordinate.x += move_factor * ( move_to.x - coordinate.x ); + coordinate.y += move_factor * ( move_to.y - coordinate.y ); + coordinate.z += move_factor * ( move_to.z - coordinate.z ); + } +} + +inline HOST_DEVICE +const MC_Domain &MC_Location::get_domain(MonteCarlo *mcco) const +{ + return mcco->domain[domain]; +} + +HOST_DEVICE_END + +inline HOST_DEVICE +const MC_Domain_d &MC_Location::get_domain_d(MonteCarlo *mcco) const +{ + return mcco->domain_d[domain]; +} + +HOST_DEVICE_END + #endif diff --git a/src/MC_Base_Particle.hh b/src/MC_Base_Particle.hh index feb5c507..f049277f 100644 --- a/src/MC_Base_Particle.hh +++ b/src/MC_Base_Particle.hh @@ -1,3 +1,48 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MC_BASE_PARTICLE #define MC_BASE_PARTICLE @@ -9,41 +54,37 @@ #include "MC_Location.hh" #include "DirectionCosine.hh" - struct MC_Data_Member_Operation { - public: +public: enum Enum { - Count = 0, - Pack = 1, - Unpack = 2, - Reset = 3 + Count = 0, + Pack = 1, + Unpack = 2, + Reset = 3 }; }; -HOST_DEVICE_CLASS - class MC_Base_Particle { - public: - +public: static void Cycle_Setup(); static void Update_Counts(); - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL MC_Base_Particle(); - HOST_DEVICE_CUDA - explicit MC_Base_Particle( const MC_Particle &particle); - HOST_DEVICE_CUDA - MC_Base_Particle( const MC_Base_Particle &particle); + HOST_DEVICE_SYCL + explicit MC_Base_Particle(const MC_Particle &particle); + HOST_DEVICE_SYCL + MC_Base_Particle(const MC_Base_Particle &particle); - HOST_DEVICE_CUDA - MC_Base_Particle& operator= ( const MC_Particle& ); + HOST_DEVICE_SYCL + MC_Base_Particle& operator= (const MC_Particle&); - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL int particle_id_number() const; - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL int invalidate(); // serialize the vault @@ -52,23 +93,23 @@ class MC_Base_Particle MC_Data_Member_Operation::Enum mode); // return a location - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL MC_Location Get_Location() const; // copy contents to a string void Copy_Particle_Base_To_String(std::string &output_string) const; // aliases for the type of particle that we have - HOST_DEVICE_CUDA - inline int type() const { return species; } - HOST_DEVICE_CUDA - inline int index() const { return species; } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL + inline int type() const { return species; } + HOST_DEVICE_SYCL + inline int index() const { return species; } + HOST_DEVICE_SYCL inline int is_valid() const { return (0 <= species); } - HOST_DEVICE_CUDA - inline double Get_Energy() const { return kinetic_energy; } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL + inline double Get_Energy() const { return kinetic_energy; } + HOST_DEVICE_SYCL inline MC_Vector *Get_Velocity() { return &velocity; } @@ -99,20 +140,16 @@ class MC_Base_Particle }; -HOST_DEVICE_END - //---------------------------------------------------------------------------------------------------------------------- // Return a MC_Location given domain, cell, facet. //---------------------------------------------------------------------------------------------------------------------- -HOST_DEVICE +HOST_DEVICE_SYCL inline MC_Location MC_Base_Particle::Get_Location() const { return MC_Location(domain, cell, 0); } // End Get_Location -HOST_DEVICE_END - //---------------------------------------------------------------------------------------------------------------------- // Invalidate a particle. @@ -122,7 +159,7 @@ HOST_DEVICE_END // // return: A value of 1 (true) is returned on success, 0 (false) on failure. //---------------------------------------------------------------------------------------------------------------------- -HOST_DEVICE +HOST_DEVICE_SYCL inline int MC_Base_Particle::invalidate() { if (is_valid()) @@ -132,7 +169,6 @@ inline int MC_Base_Particle::invalidate() } else return 0; } -HOST_DEVICE_END //---------------------------------------------------------------------------------------------------------------------- // Base information for a particle. @@ -141,7 +177,7 @@ HOST_DEVICE_END //---------------------------------------------------------------------------------------------------------------------- // Default constructor. //---------------------------------------------------------------------------------------------------------------------- -HOST_DEVICE +HOST_DEVICE_SYCL inline MC_Base_Particle::MC_Base_Particle( ) : coordinate(), velocity(), @@ -162,65 +198,61 @@ inline MC_Base_Particle::MC_Base_Particle( ) : cell(0) { } -HOST_DEVICE_END - //---------------------------------------------------------------------------------------------------------------------- // Constructor from a base particle type. //---------------------------------------------------------------------------------------------------------------------- -HOST_DEVICE +HOST_DEVICE_SYCL inline MC_Base_Particle::MC_Base_Particle(const MC_Base_Particle &particle) { - coordinate = particle.coordinate; - velocity = particle.velocity; - kinetic_energy = particle.kinetic_energy; - weight = particle.weight; - time_to_census = particle.time_to_census; - age = particle.age; + coordinate = particle.coordinate; + velocity = particle.velocity; + kinetic_energy = particle.kinetic_energy; + weight = particle.weight; + time_to_census = particle.time_to_census; + age = particle.age; num_mean_free_paths = particle.num_mean_free_paths; - num_segments = particle.num_segments; - random_number_seed = particle.random_number_seed; - identifier = particle.identifier; - last_event = particle.last_event; - num_collisions = particle.num_collisions; - breed = particle.breed; - species = particle.species; - domain = particle.domain; - cell = particle.cell; + num_segments = particle.num_segments; + random_number_seed = particle.random_number_seed; + identifier = particle.identifier; + last_event = particle.last_event; + num_collisions = particle.num_collisions; + breed = particle.breed; + species = particle.species; + domain = particle.domain; + cell = particle.cell; } -HOST_DEVICE_END //---------------------------------------------------------------------------------------------------------------------- // Constructor from a particle type. //---------------------------------------------------------------------------------------------------------------------- -HOST_DEVICE +HOST_DEVICE_SYCL inline MC_Base_Particle::MC_Base_Particle(const MC_Particle &particle) { - coordinate = particle.coordinate; - velocity = particle.velocity; - kinetic_energy = particle.kinetic_energy; - weight = particle.weight; - time_to_census = particle.time_to_census; - age = particle.age; + coordinate = particle.coordinate; + velocity = particle.velocity; + kinetic_energy = particle.kinetic_energy; + weight = particle.weight; + time_to_census = particle.time_to_census; + age = particle.age; num_mean_free_paths = particle.num_mean_free_paths; - num_segments = particle.num_segments; - random_number_seed = particle.random_number_seed; - identifier = particle.identifier; - last_event = particle.last_event; - num_collisions = particle.num_collisions; - breed = particle.breed; - species = particle.species; - domain = particle.domain; - cell = particle.cell; + num_segments = particle.num_segments; + random_number_seed = particle.random_number_seed; + identifier = particle.identifier; + last_event = particle.last_event; + num_collisions = particle.num_collisions; + breed = particle.breed; + species = particle.species; + domain = particle.domain; + cell = particle.cell; } -HOST_DEVICE_END //---------------------------------------------------------------------------------------------------------------------- // The assignment operator. // Copies a given (rhs) particle replacing this (lhs) particle.. //---------------------------------------------------------------------------------------------------------------------- -HOST_DEVICE +HOST_DEVICE_SYCL inline MC_Base_Particle& MC_Base_Particle::operator= (const MC_Particle &particle) { coordinate = particle.coordinate; @@ -242,7 +274,6 @@ inline MC_Base_Particle& MC_Base_Particle::operator= (const MC_Particle &particl return *this; } -HOST_DEVICE_END //---------------------------------------------------------------------------------------------------------------------- @@ -292,6 +323,7 @@ inline MC_Particle::MC_Particle( const MC_Base_Particle &from_particle ) weight(from_particle.weight), time_to_census(from_particle.time_to_census), + totalCrossSection(0.0), age(from_particle.age), num_mean_free_paths(from_particle.num_mean_free_paths), @@ -312,6 +344,7 @@ inline MC_Particle::MC_Particle( const MC_Base_Particle &from_particle ) energy_group(0), domain(from_particle.domain), cell(from_particle.cell), + facet(0), normal_dot(0.0) { double speed = from_particle.velocity.Length(); @@ -334,42 +367,39 @@ HOST_DEVICE_END //---------------------------------------------------------------------------------------------------------------------- // Copy_From_Base copies a particle from a base into this version //---------------------------------------------------------------------------------------------------------------------- -HOST_DEVICE +HOST_DEVICE_SYCL inline void MC_Particle::Copy_From_Base( const MC_Base_Particle &from_particle) { - this->coordinate = from_particle.coordinate; - this->velocity.x = from_particle.velocity.x; - this->velocity.y = from_particle.velocity.y; - this->velocity.z = from_particle.velocity.z; - this->kinetic_energy = from_particle.kinetic_energy; - this->weight = from_particle.weight; - this->time_to_census = from_particle.time_to_census; - this->age = from_particle.age; + this->coordinate = from_particle.coordinate; + this->velocity.x = from_particle.velocity.x; + this->velocity.y = from_particle.velocity.y; + this->velocity.z = from_particle.velocity.z; + this->kinetic_energy = from_particle.kinetic_energy; + this->weight = from_particle.weight; + this->time_to_census = from_particle.time_to_census; + this->age = from_particle.age; this->num_mean_free_paths = from_particle.num_mean_free_paths; - this->random_number_seed = from_particle.random_number_seed; - this->identifier = from_particle.identifier; - this->last_event = from_particle.last_event; + this->random_number_seed = from_particle.random_number_seed; + this->identifier = from_particle.identifier; + this->last_event = from_particle.last_event; - this->num_collisions = from_particle.num_collisions; - this->num_segments = from_particle.num_segments; + this->num_collisions = from_particle.num_collisions; + this->num_segments = from_particle.num_segments; - this->species = from_particle.species; - this->breed = from_particle.breed; - this->domain = from_particle.domain; - this->cell = from_particle.cell; + this->species = from_particle.species; + this->breed = from_particle.breed; + this->domain = from_particle.domain; + this->cell = from_particle.cell; } -HOST_DEVICE_END //---------------------------------------------------------------------------------------------------------------------- // Print the input particle to a string. //---------------------------------------------------------------------------------------------------------------------- inline void MC_Base_Particle::Copy_Particle_Base_To_String(std::string &output_string) const { - MC_Particle to_particle( *this ); + MC_Particle to_particle(*this); to_particle.Copy_Particle_To_String(output_string); } - #endif - diff --git a/src/MC_Cell_State.hh b/src/MC_Cell_State.hh index 0d1fd820..00a74a54 100644 --- a/src/MC_Cell_State.hh +++ b/src/MC_Cell_State.hh @@ -20,7 +20,7 @@ class MC_Cell_State double _cellNumberDensity; // number density of ions in cel uint64_t _id; - uint64_t _sourceTally; + unsigned _sourceTally; MC_Cell_State(); @@ -32,6 +32,7 @@ inline MC_Cell_State::MC_Cell_State() _total(), _volume(0.0), _cellNumberDensity(0.0), + _id(0), _sourceTally(0) { } diff --git a/src/MC_Distance_To_Facet.hh b/src/MC_Distance_To_Facet.hh index 9240c2f7..5fd009b8 100644 --- a/src/MC_Distance_To_Facet.hh +++ b/src/MC_Distance_To_Facet.hh @@ -1,3 +1,48 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MCT_DISTANCE_INCLUDE #define MCT_DISTANCE_INCLUDE @@ -10,12 +55,11 @@ public: double distance; int facet; int subfacet; - HOST_DEVICE_CUDA - MC_Distance_To_Facet(): distance(0.0), facet(0), subfacet(0) {} + HOST_DEVICE_SYCL + MC_Distance_To_Facet() : distance(0.0), facet(0), subfacet(0) {} private: - MC_Distance_To_Facet( const MC_Distance_To_Facet& ); // disable copy constructor - MC_Distance_To_Facet& operator=( const MC_Distance_To_Facet& tmp ); // disable assignment operator - + MC_Distance_To_Facet(const MC_Distance_To_Facet&); // disable copy constructor + MC_Distance_To_Facet& operator=(const MC_Distance_To_Facet& tmp); // disable assignment operator }; HOST_DEVICE_END diff --git a/src/MC_Domain.cc.dp.cpp b/src/MC_Domain.cc.dp.cpp new file mode 100644 index 00000000..53ca8c13 --- /dev/null +++ b/src/MC_Domain.cc.dp.cpp @@ -0,0 +1,480 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +#include +#include "MC_Domain.hh" +#include +#include +#include +#include + +#include +using std::cout; +using std::endl; + +#include "Globals.hh" +#include "MonteCarlo.hh" +#include "MC_Cell_State.hh" +#include "macros.hh" +#include "MC_RNG_State.hh" +#include "PhysicalConstants.hh" +#include "MeshPartition.hh" +#include "GlobalFccGrid.hh" +#include "DecompositionObject.hh" +#include "MC_Facet_Adjacency.hh" +#include "Parameters.hh" +#include "MaterialDatabase.hh" +#include "MCT.hh" +#include + +using std::make_pair; +using std::map; +using std::string; +using std::vector; +using sycl::abs; + +namespace +{ + struct FaceInfo + { + MC_Subfacet_Adjacency_Event::Enum _event; + CellInfo _cellInfo; + int _nbrIndex; + }; + + int nodeIndirect[24][3] = {{1, 3, 8}, {3, 7, 8}, {7, 5, 8}, {5, 1, 8}, {0, 4, 9}, {4, 6, 9}, {6, 2, 9}, {2, 0, 9}, {3, 2, 10}, {2, 6, 10}, {6, 7, 10}, {7, 3, 10}, {0, 1, 11}, {1, 5, 11}, {5, 4, 11}, {4, 0, 11}, {4, 5, 12}, {5, 7, 12}, {7, 6, 12}, {6, 4, 12}, {0, 2, 13}, {2, 3, 13}, {3, 1, 13}, {1, 0, 13}}; + + int opposingFacet[24] = {7, 6, 5, 4, 3, 2, 1, 0, 12, 15, + 14, 13, 8, 11, 10, 9, 20, 23, 22, 21, + 16, 19, 18, 17}; + + void bootstrapNodeMap(map &nodeIndexMap, + const MeshPartition &partition, + const GlobalFccGrid &grid); + + void buildCells(qs_vector &cell, + BulkStorage &facetStore, + BulkStorage &pointStore, + const map &nodeIndexMap, + const qs_vector &nbrDomain, + const MeshPartition &partition, + const GlobalFccGrid &grid, + const qs_vector &boundaryCondition); + + void makeFacet(MC_Facet_Adjacency &facet, + const MC_Location &location, + int *nodeIndex, + const vector &faceInfo); + + string findMaterial(const Parameters ¶ms, const MC_Vector &rr); + + qs_vector getBoundaryCondition(const Parameters ¶ms); +} + +MC_Mesh_Domain::MC_Mesh_Domain(const MeshPartition &meshPartition, const GlobalFccGrid &grid, + const DecompositionObject &ddc, + const qs_vector &boundaryCondition) + : _domainGid(meshPartition.domainGid()) +{ + _nbrDomainGid.resize(meshPartition.nbrDomains().size()); + for (unsigned ii = 0; ii < _nbrDomainGid.size(); ++ii) + _nbrDomainGid[ii] = meshPartition.nbrDomains()[ii]; + + _nbrRank.reserve(_nbrDomainGid.size(), VAR_MEM); + _nbrRank.Open(); + for (unsigned ii = 0; ii < _nbrDomainGid.size(); ++ii) + _nbrRank.push_back(ddc.getRank(_nbrDomainGid[ii])); + _nbrRank.Close(); + map nodeIndexMap; + + bootstrapNodeMap(nodeIndexMap, meshPartition, grid); + + int totalCells = 0; + for (auto iter = meshPartition.begin(); iter != meshPartition.end(); ++iter) + { + if (iter->second._domainGid != meshPartition.domainGid()) + continue; + ++totalCells; + } + + _connectivityFacetStorage.setCapacity(totalCells * 24, VAR_MEM); + _connectivityPointStorage.setCapacity(totalCells * 14, VAR_MEM); + + buildCells(_cellConnectivity, _connectivityFacetStorage, _connectivityPointStorage, + nodeIndexMap, _nbrDomainGid, meshPartition, grid, boundaryCondition); + + _node.resize(nodeIndexMap.size(), VAR_MEM); + + for (auto iter = nodeIndexMap.begin(); iter != nodeIndexMap.end(); ++iter) + { + const Long64 &iNodeGid = iter->first; + const int &iNodeIndex = iter->second; + _node[iNodeIndex] = grid.nodeCoord(iNodeGid); + } + + { // limit scope + // initialize _cellGeometry + _cellGeometry.resize(_cellConnectivity.size(), VAR_MEM); + + // First, we need to count up the total number of facets of all + // cells in this domain and initialize the BulkStorage + // of facets (i.e., MC_General_Plane). This code is somewhat + // pedantic since we know all of the cells have 24 facets. + int totalFacets = 0; + for (unsigned iCell = 0; iCell < _cellConnectivity.size(); ++iCell) + totalFacets += _cellConnectivity[iCell].num_facets; + _geomFacetStorage.setCapacity(totalFacets, VAR_MEM); + + // Now initialize all of the facets. + for (unsigned iCell = 0; iCell < _cellConnectivity.size(); ++iCell) + { + int nFacets = _cellConnectivity[iCell].num_facets; + qs_assert(nFacets == 24); + _cellGeometry[iCell]._facet = _geomFacetStorage.getBlock(nFacets); + _cellGeometry[iCell]._size = nFacets; + for (unsigned jFacet = 0; jFacet < nFacets; ++jFacet) + { + qs_assert(_cellConnectivity[iCell]._facet[jFacet].num_points == 3); + const int nodeIndex0 = _cellConnectivity[iCell]._facet[jFacet].point[0]; + const int nodeIndex1 = _cellConnectivity[iCell]._facet[jFacet].point[1]; + const int nodeIndex2 = _cellConnectivity[iCell]._facet[jFacet].point[2]; + const MC_Vector &r0 = _node[nodeIndex0]; + const MC_Vector &r1 = _node[nodeIndex1]; + const MC_Vector &r2 = _node[nodeIndex2]; + _cellGeometry[iCell]._facet[jFacet] = MC_General_Plane(r0, r1, r2); + } + } + } // limit scope +} + +// To emulate data access patterns we're going to put nodes on the +// corners of the hex elements into the node list first. nodes on +// the the face centers are added after all of the corners. +namespace +{ + void bootstrapNodeMap(map &nodeIndexMap, + const MeshPartition &partition, + const GlobalFccGrid &grid) + { + map faceCenters; + vector nodeGid; + for (auto iter = partition.begin(); iter != partition.end(); ++iter) + { + if (iter->second._domainGid != partition.domainGid()) + continue; // skip remote cells + const Long64 &iCellGid = iter->first; + grid.getNodeGids(iCellGid, nodeGid); + for (unsigned ii = 0; ii < 8; ++ii) // yes, 8. Only corners. + nodeIndexMap.insert(make_pair(nodeGid[ii], nodeIndexMap.size())); + for (unsigned ii = 8; ii < 14; ++ii) // save face centers for later. + faceCenters.insert(make_pair(nodeGid[ii], faceCenters.size())); + } + for (auto iter = faceCenters.begin(); iter != faceCenters.end(); ++iter) + iter->second += nodeIndexMap.size(); + + nodeIndexMap.insert(faceCenters.begin(), faceCenters.end()); + } +} + +namespace +{ + // Setting up the subfacet info is tricky because some data members + // of Subfacet_Adjacency don't always apply. + // * neighbor_index is meaningless for boundary facets and facets that + // are adjacent to cells on the same domain. We choose to set + // neighbor_index to -1 in these cases. + // * adjacent is meaningless for boundary facets. In these cases we + // set adjacent = current. + void buildCells(qs_vector &cell, + BulkStorage &facetStore, + BulkStorage &pointStore, + const map &nodeIndexMap, + const qs_vector &nbrDomain, + const MeshPartition &partition, + const GlobalFccGrid &grid, + const qs_vector &boundaryCondition) + + { + map nbrDomainIndex; // nbrDomainIndex[domainGid] = localNbrIndex; + + for (unsigned ii = 0; ii < nbrDomain.size(); ++ii) + nbrDomainIndex[nbrDomain[ii]] = ii; + // for boundary and non-transit facets + nbrDomainIndex[partition.domainGid()] = -1; + + vector nodeGid; + vector faceNbr; + if (cell.size() == 0) + { + cell.reserve(partition.size(), VAR_MEM); + } + cell.Open(); + for (auto iter = partition.begin(); iter != partition.end(); ++iter) + { + if (iter->second._domainGid != partition.domainGid()) + continue; + + const Long64 &iCellGid = iter->first; + const int &domainIndex = iter->second._domainIndex; + const int &cellIndex = iter->second._cellIndex; + const int &foreman = iter->second._foreman; + qs_assert(domainIndex == partition.domainIndex()); + qs_assert(cellIndex == cell.size()); + + cell.push_back(MC_Facet_Adjacency_Cell()); + MC_Facet_Adjacency_Cell &newCell = cell.back(); + + newCell._facet = facetStore.getBlock(newCell.num_facets); + newCell._point = pointStore.getBlock(newCell.num_points); + + grid.getNodeGids(iCellGid, nodeGid); + for (unsigned ii = 0; ii < newCell.num_points; ++ii) + { + auto here = nodeIndexMap.find(nodeGid[ii]); + qs_assert(here != nodeIndexMap.end()); + newCell._point[ii] = here->second; + } + + vector faceInfo(6); + grid.getFaceNbrGids(iCellGid, faceNbr); + for (unsigned ii = 0; ii < 6; ++ii) + { + auto here = partition.findCell(faceNbr[ii]); + qs_assert(here != partition.end()); + const CellInfo &jCellInfo = here->second; + faceInfo[ii]._event = MC_Subfacet_Adjacency_Event::Adjacency_Undefined; + faceInfo[ii]._cellInfo = jCellInfo; + faceInfo[ii]._nbrIndex = nbrDomainIndex[jCellInfo._domainGid]; + if (faceNbr[ii] == iCellGid) + faceInfo[ii]._event = boundaryCondition[ii]; + else + { + if (jCellInfo._foreman == foreman) + faceInfo[ii]._event = MC_Subfacet_Adjacency_Event::Transit_On_Processor; + else + faceInfo[ii]._event = MC_Subfacet_Adjacency_Event::Transit_Off_Processor; + // if (jCellInfo._domainIndex != domainIndex && jCellInfo._foreman == foreman) + // faceInfo[ii]._event = MC_Subfacet_Adjacency_Event::Transit_On_Processor; + // if (jCellInfo._foreman != foreman) + // faceInfo[ii]._event = MC_Subfacet_Adjacency_Event::Transit_Off_Processor; + } + } + + MC_Location location(domainIndex, cellIndex, -1); + for (unsigned ii = 0; ii < newCell.num_facets; ++ii) + { + location.facet = ii; + makeFacet(newCell._facet[ii], location, newCell._point, faceInfo); + } + } + cell.Close(); + } +} + +namespace +{ + void makeFacet(MC_Facet_Adjacency &facet, + const MC_Location &location, + int *nodeIndex, + const vector &faceInfo) + { + const int &facetId = location.facet; + int faceId = facetId / 4; + + facet.num_points = 3; + facet.point[0] = nodeIndex[nodeIndirect[facetId][0]]; + facet.point[1] = nodeIndex[nodeIndirect[facetId][1]]; + facet.point[2] = nodeIndex[nodeIndirect[facetId][2]]; + facet.subfacet.event = faceInfo[faceId]._event; + facet.subfacet.current = location; + facet.subfacet.adjacent.domain = faceInfo[faceId]._cellInfo._domainIndex; + facet.subfacet.adjacent.cell = faceInfo[faceId]._cellInfo._cellIndex; + facet.subfacet.adjacent.facet = opposingFacet[facetId]; + facet.subfacet.neighbor_index = faceInfo[faceId]._nbrIndex; + facet.subfacet.neighbor_global_domain = faceInfo[faceId]._cellInfo._domainGid; + facet.subfacet.neighbor_foreman = faceInfo[faceId]._cellInfo._foreman; + + // handle special case + if (facet.subfacet.event == MC_Subfacet_Adjacency_Event::Boundary_Reflection || + facet.subfacet.event == MC_Subfacet_Adjacency_Event::Boundary_Escape) + facet.subfacet.adjacent.facet = facet.subfacet.current.facet; + } +} + +MC_Vector findCellCenter(const MC_Facet_Adjacency_Cell &cell, + const qs_vector &node) +{ + // find center of cell + MC_Vector cellCenter(0., 0., 0.); + for (int iter = 0; iter < cell.num_points; iter++) + cellCenter += node[cell._point[iter]]; + cellCenter /= cell.num_points; + return cellCenter; +} + +// This is messed up. Why doesn't either the cell or the mesh have a +// member function to compute the volume? +double cellVolume(const MC_Facet_Adjacency_Cell &cell, + const qs_vector &node) +{ + // find center of cell + MC_Vector cellCenter(0., 0., 0.); + for (int iter = 0; iter < cell.num_points; iter++) + cellCenter += node[cell._point[iter]]; + cellCenter /= cell.num_points; + + double volume = 0; + for (unsigned iFacet = 0; iFacet < cell.num_facets; ++iFacet) + { + const int *facetCorner = cell._facet[iFacet].point; + MC_Vector aa = node[facetCorner[0]] - cellCenter; + MC_Vector bb = node[facetCorner[1]] - cellCenter; + MC_Vector cc = node[facetCorner[2]] - cellCenter; + + volume += abs(aa.Dot(bb.Cross(cc))); + } + volume /= 6.0; + return volume; +} + +MC_Domain::MC_Domain(const MeshPartition &meshPartition, const GlobalFccGrid &grid, + const DecompositionObject &ddc, const Parameters ¶ms, + const MaterialDatabase &materialDatabase, int numEnergyGroups) + : domainIndex(meshPartition.domainIndex()), + global_domain(meshPartition.domainGid()), + mesh(meshPartition, grid, ddc, getBoundaryCondition(params)) +{ + cell_state.resize(mesh._cellGeometry.size(), VAR_MEM); + _cachedCrossSectionStorage.setCapacity(cell_state.size() * numEnergyGroups, VAR_MEM); + + for (unsigned ii = 0; ii < cell_state.size(); ++ii) + { + cell_state[ii]._volume = cellVolume(mesh._cellConnectivity[ii], + mesh._node); + + MC_Vector point = MCT_Cell_Position_3D_G(*this, ii); + + std::string matName = findMaterial(params, point); + cell_state[ii]._material = materialDatabase.findMaterial(matName); + + cell_state[ii]._total = _cachedCrossSectionStorage.getBlock(numEnergyGroups); + for (unsigned jj = 0; jj < numEnergyGroups; ++jj) + cell_state[ii]._total[jj] = 0.; + + int numIsos = static_cast(materialDatabase._mat[cell_state[ii]._material]._iso.size()); + // The cellNumberDensity scales the crossSections so we choose to + // set this density to 1.0 so that the totalCrossSection will be + // as requested by the user. + cell_state[ii]._cellNumberDensity = 1.0; + + MC_Vector cellCenter = findCellCenter(mesh._cellConnectivity[ii], mesh._node); + cell_state[ii]._id = grid.whichCell(cellCenter) * UINT64_C(0x0100000000); + cell_state[ii]._sourceTally = 0; + } +} + +void MC_Domain::clearCrossSectionCache(int numEnergyGroups) +{ + for (unsigned ii = 0; ii < cell_state.size(); ++ii) + for (unsigned jj = 0; jj < numEnergyGroups; ++jj) + cell_state[ii]._total[jj] = 0.; +} + +namespace +{ + // Returns true if the specified coordinate in inside the specified + // geometry. False otherwise + bool isInside(const GeometryParameters &geom, const MC_Vector &rr) + { + bool inside = false; + switch (geom.shape) + { + case GeometryParameters::BRICK: + { + if ((rr.x >= geom.xMin && rr.x <= geom.xMax) && + (rr.y >= geom.yMin && rr.y <= geom.yMax) && + (rr.z >= geom.zMin && rr.z <= geom.zMax)) + inside = true; + } + break; + case GeometryParameters::SPHERE: + { + MC_Vector center(geom.xCenter, geom.yCenter, geom.zCenter); + if ((rr - center).Length() <= geom.radius) + inside = true; + } + + break; + default: + qs_assert(false); + } + return inside; + } +} + +// Returns the name of the material present at coordinate rr. If +// multiple materials overlap return the last material found. +namespace +{ + string findMaterial(const Parameters ¶ms, const MC_Vector &rr) + { + string materialName; + for (unsigned ii = 0; ii < params.geometryParams.size(); ++ii) + if (isInside(params.geometryParams[ii], rr)) + materialName = params.geometryParams[ii].materialName; + + qs_assert(materialName.size() > 0); + return materialName; + } +} + +namespace +{ + qs_vector getBoundaryCondition(const Parameters ¶ms) + { + qs_vector bc(6); + if (params.simulationParams.boundaryCondition == "reflect") + bc = qs_vector(6, MC_Subfacet_Adjacency_Event::Boundary_Reflection); + else if (params.simulationParams.boundaryCondition == "escape") + bc = qs_vector(6, MC_Subfacet_Adjacency_Event::Boundary_Escape); + else if (params.simulationParams.boundaryCondition == "octant") + for (unsigned ii = 0; ii < 6; ++ii) + { + if (ii % 2 == 0) + bc[ii] = MC_Subfacet_Adjacency_Event::Boundary_Escape; + if (ii % 2 == 1) + bc[ii] = MC_Subfacet_Adjacency_Event::Boundary_Reflection; + } + else + qs_assert(false); + return bc; + } +} diff --git a/src/MC_Domain.hh b/src/MC_Domain.hh index de71e32f..615bf106 100644 --- a/src/MC_Domain.hh +++ b/src/MC_Domain.hh @@ -1,7 +1,52 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MC_DOMAIN_INCLUDE #define MC_DOMAIN_INCLUDE - +#include #include "QS_Vector.hh" #include "MC_Facet_Adjacency.hh" #include "MC_Vector.hh" @@ -15,66 +60,216 @@ class GlobalFccGrid; class DecompositionObject; class MaterialDatabase; - //---------------------------------------------------------------------------------------------------------------------- // class that manages data set on a mesh like geometry //---------------------------------------------------------------------------------------------------------------------- class MC_Mesh_Domain { - public: +public: + int _domainGid; // dfr: Might be able to delete this later. + + qs_vector _nbrDomainGid; + qs_vector _nbrRank; - int _domainGid; //dfr: Might be able to delete this later. + qs_vector _node; + qs_vector _cellConnectivity; - qs_vector _nbrDomainGid; - qs_vector _nbrRank; + qs_vector _cellGeometry; - qs_vector _node; - qs_vector _cellConnectivity; + BulkStorage _connectivityFacetStorage; + BulkStorage _connectivityPointStorage; + BulkStorage _geomFacetStorage; + + // -------------------------- public interface + MC_Mesh_Domain() : _domainGid(0){}; + MC_Mesh_Domain(const MeshPartition &meshPartition, + const GlobalFccGrid &grid, + const DecompositionObject &ddc, + const qs_vector &boundaryCondition); +}; - qs_vector _cellGeometry; +//---------------------------------------------------------------------------------------------------------------------- +// class that manages a region on a domain. +//---------------------------------------------------------------------------------------------------------------------- +class MC_Domain +{ +public: + int domainIndex; // This appears to be unused. + int global_domain; + qs_vector cell_state; + + BulkStorage _cachedCrossSectionStorage; + + // hold mesh information + MC_Mesh_Domain mesh; - BulkStorage _connectivityFacetStorage; - BulkStorage _connectivityPointStorage; - BulkStorage _geomFacetStorage; - // -------------------------- public interface - MC_Mesh_Domain(){}; - MC_Mesh_Domain(const MeshPartition& meshPartition, - const GlobalFccGrid& grid, - const DecompositionObject& ddc, - const qs_vector& boundaryCondition); + MC_Domain() : domainIndex(-1), global_domain(0){}; + MC_Domain(const MeshPartition &meshPartition, const GlobalFccGrid &grid, + const DecompositionObject &ddc, const Parameters ¶ms, + const MaterialDatabase &materialDatabase, int numEnergyGroups); + void clearCrossSectionCache(int numEnergyGroups); }; +class MC_Mesh_Domain_d +{ +public: + int _domainGid; // dfr: Might be able to delete this later. + + int *_nbrRank; + int _nbrRankSize; + + MC_Vector *_node; + int _nodeSize; + + MC_Facet_Adjacency_Cell *_cellConnectivity; + int _cellConnectivitySize; + + MC_Facet_Geometry_Cell *_cellGeometry; + int _cellGeometrySize; +}; //---------------------------------------------------------------------------------------------------------------------- // class that manages a region on a domain. //---------------------------------------------------------------------------------------------------------------------- -class MC_Domain +class MC_Domain_d { public: - int domainIndex; // This appears to be unused. - int global_domain; + int domainIndex; // This appears to be unused. + int global_domain; - qs_vector cell_state; + MC_Cell_State *cell_state; + int cell_stateSize; - BulkStorage _cachedCrossSectionStorage; - // hold mesh information - MC_Mesh_Domain mesh; + MC_Mesh_Domain_d mesh; +}; + +inline void copyDomainDevice(const int numEnergyGroups, qs_vector domain, MC_Domain_d *domain_d, int &domainSize) +{ + // Create vector of domains that will live on the host, but have pointers to device memory. + domainSize = domain.size(); + MC_Domain_d *domain_h = (MC_Domain_d *)malloc(domainSize * sizeof(MC_Domain_d)); - // -------------------------- public interface - MC_Domain(){}; - MC_Domain(const MeshPartition& meshPartition, const GlobalFccGrid& grid, - const DecompositionObject& ddc, const Parameters& params, - const MaterialDatabase& materialDatabase, int numEnergyGroups); + // loop over the number of domains creating them one at a time + for (int i = 0; i < domainSize; i++) + { + // set the domain index and "global_domain" + domain_h[i].domainIndex = domain[i].domainIndex; + domain_h[i].global_domain = domain[i].global_domain; + // Create an array of cell states + domain_h[i].cell_stateSize = domain[i].cell_state.size(); - void clearCrossSectionCache(int numEnergyGroups); -}; + MC_Cell_State *cell_state_h = (MC_Cell_State *)malloc(domain[i].cell_state.size() * sizeof(MC_Cell_State)); + sycl_device_queue + .memcpy(cell_state_h, domain[i].cell_state.outputPointer(), + domain[i].cell_state.size() * sizeof(MC_Cell_State)) + .wait(); + + for (int j = 0; j < domain[i].cell_state.size(); j++) + { + cell_state_h[j]._total = sycl::malloc_device( + numEnergyGroups, sycl_device_queue); + sycl_device_queue + .memcpy(cell_state_h[j]._total, domain[i].cell_state[j]._total, + numEnergyGroups * sizeof(double)) + .wait(); + } + + domain_h[i].cell_state = sycl::malloc_device( + domain[i].cell_state.size(), sycl_device_queue); + // cudaMemcpy(domain_h[i].cell_state,domain[i].cell_state.outputPointer(),domain[i].cell_state.size()*sizeof(MC_Cell_State),cudaMemcpyHostToDevice); + sycl_device_queue + .memcpy(domain_h[i].cell_state, cell_state_h, + domain[i].cell_state.size() * sizeof(MC_Cell_State)) + .wait(); + domain_h[i].mesh._domainGid = domain[i].mesh._domainGid; + + free(cell_state_h); + + domain_h[i].mesh._nbrRankSize = domain[i].mesh._nbrRank.size(); + domain_h[i].mesh._nbrRank = sycl::malloc_device( + domain[i].mesh._nbrRank.size(), sycl_device_queue); + sycl_device_queue + .memcpy(domain_h[i].mesh._nbrRank, + domain[i].mesh._nbrRank.outputPointer(), + domain[i].mesh._nbrRank.size() * sizeof(int)) + .wait(); + + domain_h[i].mesh._nodeSize = domain[i].mesh._node.size(); + domain_h[i].mesh._node = sycl::malloc_device( + domain_h[i].mesh._nodeSize, sycl_device_queue); + sycl_device_queue + .memcpy(domain_h[i].mesh._node, + domain[i].mesh._node.outputPointer(), + domain_h[i].mesh._nodeSize * sizeof(MC_Vector)) + .wait(); + + int _cellConnectivitySize = domain[i].mesh._cellConnectivity.size(); + domain_h[i].mesh._cellConnectivitySize = _cellConnectivitySize; + MC_Facet_Adjacency_Cell *cellConnectivity = (MC_Facet_Adjacency_Cell *)malloc(_cellConnectivitySize * sizeof(MC_Facet_Adjacency_Cell)); + for (int j = 0; j < _cellConnectivitySize; j++) + { + cellConnectivity[j].num_points = domain[i].mesh._cellConnectivity[j].num_points; + cellConnectivity[j].num_facets = domain[i].mesh._cellConnectivity[j].num_facets; + cellConnectivity[j]._point = sycl::malloc_device( + cellConnectivity[j].num_points, sycl_device_queue); + sycl_device_queue + .memcpy(cellConnectivity[j]._point, + domain[i].mesh._cellConnectivity[j]._point, + cellConnectivity[j].num_points * sizeof(int)) + .wait(); + cellConnectivity[j]._facet = sycl::malloc_device( + cellConnectivity[j].num_facets, sycl_device_queue); + sycl_device_queue + .memcpy(cellConnectivity[j]._facet, + domain[i].mesh._cellConnectivity[j]._facet, + cellConnectivity[j].num_facets * + sizeof(MC_Facet_Adjacency)) + .wait(); + } + domain_h[i].mesh._cellConnectivity = + sycl::malloc_device( + _cellConnectivitySize, sycl_device_queue); + sycl_device_queue + .memcpy(domain_h[i].mesh._cellConnectivity, cellConnectivity, + _cellConnectivitySize * sizeof(MC_Facet_Adjacency_Cell)) + .wait(); + free(cellConnectivity); + + int _cellGeometrySize = domain[i].mesh._cellGeometry.size(); + domain_h[i].mesh._cellGeometrySize = _cellGeometrySize; + MC_Facet_Geometry_Cell *cellGeometry = (MC_Facet_Geometry_Cell *)malloc(_cellGeometrySize * sizeof(MC_Facet_Geometry_Cell)); + for (int j = 0; j < _cellGeometrySize; j++) + { + cellGeometry[j]._size = domain[i].mesh._cellGeometry[j]._size; + cellGeometry[j]._facet = sycl::malloc_device( + cellGeometry[j]._size, sycl_device_queue); + sycl_device_queue + .memcpy(cellGeometry[j]._facet, + domain[i].mesh._cellGeometry[j]._facet, + cellGeometry[j]._size * sizeof(MC_General_Plane)) + .wait(); + } + domain_h[i].mesh._cellGeometry = + sycl::malloc_device( + _cellGeometrySize, sycl_device_queue); + sycl_device_queue + .memcpy(domain_h[i].mesh._cellGeometry, cellGeometry, + _cellGeometrySize * sizeof(MC_Facet_Geometry_Cell)) + .wait(); + free(cellGeometry); + } + sycl_device_queue + .memcpy(domain_d, domain_h, domainSize * sizeof(MC_Domain_d)) + .wait(); + free(domain_h); +} #endif diff --git a/src/MC_Facet_Adjacency.hh b/src/MC_Facet_Adjacency.hh index d10db66e..b9e3c056 100644 --- a/src/MC_Facet_Adjacency.hh +++ b/src/MC_Facet_Adjacency.hh @@ -1,3 +1,18 @@ +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + #ifndef MCT_FACET_ADJACENCY_INCLUDE #define MCT_FACET_ADJACENCY_INCLUDE @@ -6,6 +21,7 @@ #include "MC_Location.hh" #include "macros.hh" + struct MC_Subfacet_Adjacency_Event { public: diff --git a/src/MC_Facet_Crossing_Event.hh b/src/MC_Facet_Crossing_Event.hh index 98170036..fe31f069 100644 --- a/src/MC_Facet_Crossing_Event.hh +++ b/src/MC_Facet_Crossing_Event.hh @@ -1,15 +1,133 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MC_FACET_CROSSING_EVENT_HH #define MC_FACET_CROSSING_EVENT_HH #include "Tallies.hh" #include "DeclareMacro.hh" +#include "ParticleVaultContainer.hh" +#include "ParticleVault.hh" +#include "MC_Domain.hh" +#include "MC_Particle.hh" +#include "MC_Facet_Adjacency.hh" +#include "Globals.hh" +#include "MCT.hh" +#include "MC_Particle_Buffer.hh" +#include "DeclareMacro.hh" +#include "macros.hh" +#include "SendQueue.hh" class ParticleVault; class MC_Particle; +//---------------------------------------------------------------------------------------------------------------------- +// Determines whether the particle has been tracked to a facet such that it: +// (i) enters into an adjacent cell +// (ii) escapes across the system boundary (Vacuum BC), or +// (iii) reflects off of the system boundary (Reflection BC). +// +//---------------------------------------------------------------------------------------------------------------------- + +inline HOST_DEVICE + + MC_Tally_Event::Enum + MC_Facet_Crossing_Event(MC_Particle &mc_particle, MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault) +{ + MC_Location location = mc_particle.Get_Location(); + + Subfacet_Adjacency &facet_adjacency = MCT_Adjacent_Facet(location, mc_particle, monteCarlo); + + if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_On_Processor) + { + // The particle will enter into an adjacent cell. + mc_particle.domain = facet_adjacency.adjacent.domain; + mc_particle.cell = facet_adjacency.adjacent.cell; + mc_particle.facet = facet_adjacency.adjacent.facet; + mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Transit_Exit; + } + else if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Escape) + { + // The particle will escape across the system boundary. + mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape; + } + else if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Reflection) + { + // The particle will reflect off of the system boundary. + mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Reflection; + } + else if (facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_Off_Processor) + { + // The particle will enter into an adjacent cell on a spatial neighbor. + // The neighboring domain is on another processor. Set domain local domain on neighbor proc + + mc_particle.domain = facet_adjacency.adjacent.domain; + mc_particle.cell = facet_adjacency.adjacent.cell; + mc_particle.facet = facet_adjacency.adjacent.facet; + mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Communication; + +#ifdef __SYCL_DEVICE_ONLY__ + int neighbor_rank = monteCarlo->domain_d[facet_adjacency.current.domain].mesh._nbrRank[facet_adjacency.neighbor_index]; +#else + // Select particle buffer + int neighbor_rank = monteCarlo->domain[facet_adjacency.current.domain].mesh._nbrRank[facet_adjacency.neighbor_index]; +#endif + + processingVault->putParticle(mc_particle, particle_index); + + // Push neighbor rank and mc_particle onto the send queue + monteCarlo->_particleVaultContainer->getSendQueue()->push(neighbor_rank, particle_index); + } + + return mc_particle.last_event; +} + +HOST_DEVICE_END + HOST_DEVICE -MC_Tally_Event::Enum MC_Facet_Crossing_Event(MC_Particle &mc_particle, MonteCarlo* monteCarlo, int particle_index, ParticleVault* processingVault); +MC_Tally_Event::Enum MC_Facet_Crossing_Event(MC_Particle &mc_particle, MonteCarlo *monteCarlo, int particle_index, ParticleVault *processingVault); HOST_DEVICE_END #endif - diff --git a/src/MC_Facet_Geometry.hh b/src/MC_Facet_Geometry.hh index 943e86da..4bceb583 100644 --- a/src/MC_Facet_Geometry.hh +++ b/src/MC_Facet_Geometry.hh @@ -14,7 +14,9 @@ public: double D; // Code to compute coefficients stolen from MCT_Facet_Adjacency_3D_G - MC_General_Plane(){}; + MC_General_Plane(){ + A = B = C = D = 0; + }; MC_General_Plane(const MC_Vector& r0, const MC_Vector& r1, const MC_Vector& r2) { A = ((r1.y - r0.y)*(r2.z - r0.z)) - ((r1.z - r0.z)*(r2.y - r0.y)); diff --git a/src/MC_Fast_Timer.cc.dp.cpp b/src/MC_Fast_Timer.cc.dp.cpp new file mode 100644 index 00000000..239afa7d --- /dev/null +++ b/src/MC_Fast_Timer.cc.dp.cpp @@ -0,0 +1,202 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +#include "MC_Fast_Timer.hh" +#include +#include "MonteCarlo.hh" +#include "MC_Processor_Info.hh" +#include "Globals.hh" +#include "portability.hh" + +const char *mc_fast_timer_names[MC_Fast_Timer::Num_Timers] = + { + "main", + "cycleInit", + "cycleTracking", + "cycleTracking_Kernel", + "cycleTracking_MPI", + "cycleTracking_Test_Done", + "cycleFinalize"}; + +static double mc_std_dev(uint64_t const data[], int const nelm); + +static double mc_std_dev(uint64_t const data[], int const nelm) +{ + uint64_t mean = 0.0, sum_deviation = 0.0; + + for (int ndx = 0; ndx < nelm; ++ndx) + { + mean += data[ndx]; + } + mean = mean / nelm; + for (int ndx = 0; ndx < nelm; ++ndx) + { + sum_deviation += (data[ndx] - mean) * (data[ndx] - mean); + } + return sqrt((double)sum_deviation / nelm); +} + +void MC_Fast_Timer_Container::Print_Last_Cycle_Heading(int mpi_rank) +{ +#ifdef DISABLE_TIMERS + return; +#endif + + if (mpi_rank == 0) + { + fprintf(stdout, "\n%-25s %12s %12s %12s %12s %12s %12s\n", "Timer", "Last Cycle", "Last Cycle", "Last Cycle", "Last Cycle", "Last Cycle", "Last Cycle"); + fprintf(stdout, "%-25s %12s %12s %12s %12s %12s %12s\n", "Name", "number", "microSecs", "microSecs", "microSecs", "microSecs", "Efficiency"); + fprintf(stdout, "%-25s %12s %12s %12s %12s %12s %12s\n", "", "of calls", "min", "avg", "max", "stddev", "Rating"); + } +} + +void MC_Fast_Timer_Container::Print_Cumulative_Heading(int mpi_rank) +{ +#ifdef DISABLE_TIMERS + return; +#endif + if (mpi_rank == 0) + { + fprintf(stdout, "\n%-25s %12s %12s %12s %12s %12s %12s\n", "Timer", "Cumulative", "Cumulative", "Cumulative", "Cumulative", "Cumulative", "Cumulative"); + fprintf(stdout, "%-25s %12s %12s %12s %12s %12s %12s\n", "Name", "number", "microSecs", "microSecs", "microSecs", "microSecs", "Efficiency"); + fprintf(stdout, "%-25s %12s %12s %12s %12s %12s %12s\n", "", "of calls", "min", "avg", "max", "stddev", "Rating"); + } +} + +void MC_Fast_Timer_Container::Cumulative_Report(int mpi_rank, int num_ranks, MPI_Comm comm_world, uint64_t numSegments) +{ +#ifdef DISABLE_TIMERS + return; +#endif + + fflush(stdout); + mpiBarrier(comm_world); + + std::vector cumulativeClock(MC_Fast_Timer::Num_Timers); + std::vector max_clock(MC_Fast_Timer::Num_Timers); + std::vector min_clock(MC_Fast_Timer::Num_Timers); + std::vector sum_clock(MC_Fast_Timer::Num_Timers); + std::vector std_dev_use(num_ranks); // used to calculate standard deviation + + for (int timer_index = 0; timer_index < MC_Fast_Timer::Num_Timers; timer_index++) + { + cumulativeClock[timer_index] = this->timers[timer_index].cumulativeClock; + } + + mpiReduce(&cumulativeClock[0], &max_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_MAX, 0, comm_world); + mpiReduce(&cumulativeClock[0], &min_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_MIN, 0, comm_world); + mpiReduce(&cumulativeClock[0], &sum_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_SUM, 0, comm_world); + + this->Print_Cumulative_Heading(mpi_rank); + + for (int timer_index = 0; timer_index < MC_Fast_Timer::Num_Timers; timer_index++) + { + mpiGather(&cumulativeClock[timer_index], 1, MPI_UINT64_T, &std_dev_use[0], 1, MPI_UINT64_T, 0, comm_world); + + uint64_t ave_clock = sum_clock[timer_index] / num_ranks; + if (mpi_rank == 0) + { + fprintf(stdout, "%-25s %12lu %12.3e %12.3e %12.3e %12.3e %12.2f\n", + mc_fast_timer_names[timer_index], + (unsigned long)this->timers[timer_index].numCalls, + (double)min_clock[timer_index], + (double)ave_clock, + (double)max_clock[timer_index], + (double)mc_std_dev(&std_dev_use[0], num_ranks), + (100.0 * ave_clock) / (max_clock[timer_index] + 1.0e-80)); + } + } + if (mpi_rank == 0) + { + int cycleTracking_Index = 2; + fprintf(stdout, "%-25s %12.2f %-25s\n", + "Figure Of Merit", + (numSegments / (max_clock[cycleTracking_Index] * 1e-0)), // updated from segments to mega segments + "[Num Mega Segments / Cycle Tracking Time]"); + } +} + +void MC_Fast_Timer_Container::Last_Cycle_Report(int report_time, int mpi_rank, int num_ranks, MPI_Comm comm_world) +{ +#ifdef DISABLE_TIMERS + return; +#endif + + if (report_time == 1) + { + fflush(stdout); + mpiBarrier(comm_world); + + std::vector lastCycleClock(MC_Fast_Timer::Num_Timers); + std::vector max_clock(MC_Fast_Timer::Num_Timers); + std::vector min_clock(MC_Fast_Timer::Num_Timers); + std::vector sum_clock(MC_Fast_Timer::Num_Timers); + std::vector std_dev_use(num_ranks); // used to calculate standard deviation + + for (int timer_index = 0; timer_index < MC_Fast_Timer::Num_Timers; timer_index++) + { + lastCycleClock[timer_index] = this->timers[timer_index].lastCycleClock; + } + + mpiReduce(&lastCycleClock[0], &max_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_MAX, 0, comm_world); + mpiReduce(&lastCycleClock[0], &min_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_MIN, 0, comm_world); + mpiReduce(&lastCycleClock[0], &sum_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_SUM, 0, comm_world); + + this->Print_Last_Cycle_Heading(mpi_rank); + + for (int timer_index = 0; timer_index < MC_Fast_Timer::Num_Timers; timer_index++) + { + mpiGather(&lastCycleClock[timer_index], 1, MPI_UINT64_T, &std_dev_use[0], 1, MPI_UINT64_T, 0, comm_world); + + uint64_t ave_clock = sum_clock[timer_index] / num_ranks; + if (mpi_rank == 0) + { + fprintf(stdout, "%-25s %12lu %12.3e %12.3e %12.3e %12.3e %12.2f\n", + mc_fast_timer_names[timer_index], + (unsigned long)this->timers[timer_index].numCalls, + (double)min_clock[timer_index], + (double)ave_clock, + (double)max_clock[timer_index], + (double)mc_std_dev(&std_dev_use[0], num_ranks), + (100.0 * ave_clock) / (max_clock[timer_index] + 1.0e-80)); + } + } + } + Clear_Last_Cycle_Timers(); +} + +void MC_Fast_Timer_Container::Clear_Last_Cycle_Timers() +{ + for (int timer_index = 0; timer_index < MC_Fast_Timer::Num_Timers; timer_index++) + { + this->timers[timer_index].lastCycleClock = 0; + } +} diff --git a/src/MC_Location.hh b/src/MC_Location.hh index d6d082b3..3887db32 100644 --- a/src/MC_Location.hh +++ b/src/MC_Location.hh @@ -1,45 +1,94 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MC_LOCATION_INCLUDE #define MC_LOCATION_INCLUDE - // ToDo: How much chaos would be caused by removing the default constructor? #include #include "DeclareMacro.hh" -class MC_Domain; -class MC_Cell_State; -class MonteCarlo; +class MC_Domain; +class MC_Domain_d; +class MC_Cell_State; +class MonteCarlo; HOST_DEVICE_CLASS class MC_Location { - public: +public: int domain; int cell; int facet; -HOST_DEVICE_CUDA +HOST_DEVICE_SYCL MC_Location(int adomain, int acell, int afacet) : domain(adomain), cell(acell), facet(afacet) - {} + { + } -HOST_DEVICE_CUDA +HOST_DEVICE_SYCL MC_Location() : domain(-1), cell(-1), facet(-1) - {} + { + } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL const MC_Domain& get_domain(MonteCarlo *mcco) const; + HOST_DEVICE_SYCL + const MC_Domain_d& get_domain_d(MonteCarlo *mcco) const; }; HOST_DEVICE_END -HOST_DEVICE_CUDA -inline bool operator==(const MC_Location& a, const MC_Location b) +HOST_DEVICE_SYCL +inline bool operator==(const MC_Location &a, const MC_Location b) { return a.domain == b.domain && @@ -47,5 +96,4 @@ inline bool operator==(const MC_Location& a, const MC_Location b) a.facet == b.facet; } - #endif diff --git a/src/MC_Nearest_Facet.hh b/src/MC_Nearest_Facet.hh index 74dd882c..913e7f86 100644 --- a/src/MC_Nearest_Facet.hh +++ b/src/MC_Nearest_Facet.hh @@ -1,3 +1,48 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MCT_NEAREST_FACET_INCLUDE #define MCT_NEAREST_FACET_INCLUDE @@ -18,7 +63,7 @@ class MC_Nearest_Facet dot_product(0.0) {} - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL MC_Nearest_Facet& operator=( const MC_Nearest_Facet& nf ) { this->facet = nf.facet; diff --git a/src/MC_Particle.hh b/src/MC_Particle.hh index ed89e65e..d21abd8e 100644 --- a/src/MC_Particle.hh +++ b/src/MC_Particle.hh @@ -1,8 +1,51 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MC_PARTICLE_INCLUDE #define MC_PARTICLE_INCLUDE -#include - #include "DirectionCosine.hh" #include "Tallies.hh" @@ -14,11 +57,9 @@ class MC_Base_Particle; - class MC_Particle { - public: - +public: // the current position of the particle MC_Vector coordinate; @@ -58,7 +99,7 @@ class MC_Particle // unique identifier used to identify and track individual particles in the simulation uint64_t identifier; - // the last event this particle underwent + // the last event this particle underwent MC_Tally_Event::Enum last_event; int num_collisions; @@ -89,35 +130,35 @@ class MC_Particle double normal_dot; public: - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL MC_Particle(); - HOST_DEVICE_CUDA - MC_Particle( const MC_Base_Particle &from_particle ); + HOST_DEVICE_SYCL + MC_Particle(const MC_Base_Particle &from_particle); - HOST_DEVICE_CUDA - void Copy_From_Base( const MC_Base_Particle &from_particle); + HOST_DEVICE_SYCL + void Copy_From_Base(const MC_Base_Particle &from_particle); - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL MC_Location Get_Location() const; // format a string with the contents of the particle void Copy_Particle_To_String(std::string &output_string) const; // move a particle a distance in the direction_cosine direction - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL void Move_Particle(const DirectionCosine & direction_cosine, const double distance); - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL void PrintParticle(); - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL DirectionCosine *Get_Direction_Cosine() { return &this->direction_cosine; } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL MC_Vector *Get_Velocity() { return &this->velocity; @@ -127,7 +168,7 @@ public: //---------------------------------------------------------------------------------------------------------------------- // Return a MC_Location given domain, cell, facet. //---------------------------------------------------------------------------------------------------------------------- - HOST_DEVICE_CUDA +HOST_DEVICE_SYCL inline MC_Location MC_Particle::Get_Location() const { return MC_Location(domain, cell, facet); @@ -136,8 +177,8 @@ inline MC_Location MC_Particle::Get_Location() const //---------------------------------------------------------------------------------------------------------------------- // Move the particle a straight-line distance along a specified cosine. //---------------------------------------------------------------------------------------------------------------------- - HOST_DEVICE_CUDA -inline void MC_Particle::Move_Particle( const DirectionCosine &my_direction_cosine, +HOST_DEVICE_SYCL +inline void MC_Particle::Move_Particle(const DirectionCosine &my_direction_cosine, const double distance) { coordinate.x += (my_direction_cosine.alpha * distance); @@ -148,38 +189,36 @@ inline void MC_Particle::Move_Particle( const DirectionCosine &my_direction_cosi //---------------------------------------------------------------------------------------------------------------------- // Print all of the particles components //---------------------------------------------------------------------------------------------------------------------- -HOST_DEVICE_CUDA +HOST_DEVICE_SYCL inline void MC_Particle::PrintParticle() { - printf( "coordiante: %g\t%g\t%g\n", coordinate.x, coordinate.y, coordinate.z ); - printf( "velocity: %g\t%g\t%g\n", velocity.x, velocity.y, velocity.z ); - printf( "direction_cosine: %g\t%g\t%g\n", direction_cosine.alpha, direction_cosine.beta, direction_cosine.gamma ); - printf( "kinetic_energy: %g\n", kinetic_energy ); - printf( "Weight: %g\n", weight); - printf( "time_to_census: %g\n", time_to_census); - printf( "totalCrossSection: %g\n", totalCrossSection); - printf( "age: %g\n", age); - printf( "num_mean_free_paths: %g\n", num_mean_free_paths); - printf( "mean_free_path: %g\n", mean_free_path); - printf( "segment_path_length: %g\n", segment_path_length); - printf( "random_number_seed: %" PRIu64 "\n", random_number_seed); - printf( "identifier: %" PRIu64 "\n", identifier); - printf( "last_event: %d\n", last_event); - printf( "num_collision: %d\n", num_collisions); - printf( "num_segments: %g\n", num_segments); - printf( "task: %d\n", task); - printf( "species: %d\n", species); - printf( "breed: %d\n", breed); - printf( "energy_group: %d\n", energy_group); - printf( "domain: %d\n", domain); - printf( "cell: %d\n", cell); - printf( "facet: %d\n", facet); - printf( "normal_dot: %g\n", normal_dot); + printf("coordiante: %g\t%g\t%g\n", coordinate.x, coordinate.y, coordinate.z); + printf("velocity: %g\t%g\t%g\n", velocity.x, velocity.y, velocity.z); + printf("direction_cosine: %g\t%g\t%g\n", direction_cosine.alpha, direction_cosine.beta, direction_cosine.gamma); + printf("kinetic_energy: %g\n", kinetic_energy); + printf("Weight: %g\n", weight); + printf("time_to_census: %g\n", time_to_census); + printf("totalCrossSection: %g\n", totalCrossSection); + printf("age: %g\n", age); + printf("num_mean_free_paths: %g\n", num_mean_free_paths); + printf("mean_free_path: %g\n", mean_free_path); + printf("segment_path_length: %g\n", segment_path_length); + printf("random_number_seed: %zu\n", random_number_seed); + printf("identifier: %zu\n", identifier); + printf("last_event: %d\n", last_event); + printf("num_collision: %d\n", num_collisions); + printf("num_segments: %g\n", num_segments); + printf("task: %d\n", task); + printf("species: %d\n", species); + printf("breed: %d\n", breed); + printf("energy_group: %d\n", energy_group); + printf("domain: %d\n", domain); + printf("cell: %d\n", cell); + printf("facet: %d\n", facet); + printf("normal_dot: %g\n", normal_dot); printf("\n"); } - -int MC_Copy_Particle_Get_Num_Fields(); - +int MC_Copy_Particle_Get_Num_Fields(); #endif // MC_PARTICLE_INCLUDE diff --git a/src/MC_Particle_Buffer.cc b/src/MC_Particle_Buffer.cc index e0181bbb..84f12388 100644 --- a/src/MC_Particle_Buffer.cc +++ b/src/MC_Particle_Buffer.cc @@ -27,7 +27,7 @@ void MCP_Cancel_Request(MPI_Request *request) if (request[0] != MPI_REQUEST_NULL) { MPI_Status status; - int flag; + int flag = 0; mpiCancel(request); mpiWait(request, &status); mpiTest_cancelled(&status, &flag); @@ -61,9 +61,9 @@ void particle_buffer_base_type::Allocate(int buffer_size) // we add 2 ints: 1 for the number of particles and the second int is so the float_data // buffer will be 8 byte aligned - uint64_t length_int_data = (MC_Base_Particle::num_base_ints * buffer_size + 2) * (int)sizeof(int); - uint64_t length_float_data = (MC_Base_Particle::num_base_floats * buffer_size ) * (int)sizeof(double); - uint64_t length_char_data = (MC_Base_Particle::num_base_chars * buffer_size ) * (int)sizeof(char); + uint64_t length_int_data = (uint64_t)(MC_Base_Particle::num_base_ints * buffer_size + 2) * (int)sizeof(int); + uint64_t length_float_data = (uint64_t)(MC_Base_Particle::num_base_floats * buffer_size ) * (int)sizeof(double); + uint64_t length_char_data = (uint64_t)(MC_Base_Particle::num_base_chars * buffer_size ) * (int)sizeof(char); this->length = length_int_data + length_float_data + length_char_data; @@ -114,8 +114,8 @@ void particle_buffer_base_type::Initialize_Buffer() void particle_buffer_base_type::Reset_Offsets() { - uint64_t length_int_data = (MC_Base_Particle::num_base_ints * num_particles + 2) * (int)sizeof(int); - uint64_t length_float_data = (MC_Base_Particle::num_base_floats * num_particles ) * (int)sizeof(double); + uint64_t length_int_data = (uint64_t)(MC_Base_Particle::num_base_ints * num_particles + 2) * (int)sizeof(int); + uint64_t length_float_data = (uint64_t)(MC_Base_Particle::num_base_floats * num_particles ) * (int)sizeof(double); char* p = (char *)int_data; diff --git a/src/MC_Processor_Info.hh b/src/MC_Processor_Info.hh index f8b3b2a4..65ba2439 100644 --- a/src/MC_Processor_Info.hh +++ b/src/MC_Processor_Info.hh @@ -2,6 +2,7 @@ #define MC_PROCESSOR_INFO_HH #include "utilsMpi.hh" +#include "macros.hh" class MC_Processor_Info { @@ -9,20 +10,33 @@ public: int rank; int num_processors; + int num_tasks; int use_gpu; int gpu_id; MPI_Comm comm_mc_world; + MPI_Comm *comm_mc_world_threads; // Communicator to allow threads to make MPI calls. - MC_Processor_Info() - : comm_mc_world(MPI_COMM_WORLD), - use_gpu(0), - gpu_id(0) + MC_Processor_Info() : comm_mc_world(MPI_COMM_WORLD) { mpiComm_rank(comm_mc_world, &rank); mpiComm_size(comm_mc_world, &num_processors); +// num_tasks = omp_get_max_threads(); + num_tasks = 1; + comm_mc_world_threads = new MPI_Comm[num_tasks]; + + for (int thread_ndx=0; thread_ndx( uint64_in >> 32 ); + back_bits = static_cast( uint64_in & 0xffffffff ); + } +HOST_DEVICE_END +} + +//---------------------------------------------------------------------------// + +namespace +{ + // Function sed to hash a 64 bit int into another, unrelated one. It + // does this in two 32 bit chuncks. This function uses the algorithm + // from Numerical Recipies in C, 2nd edition: psdes, p. 302. This is + // used to make 64 bit numbers for use as initial states for the 64 + // bit lcg random number generator. +inline HOST_DEVICE + void pseudo_des( uint32_t& lword, uint32_t& irword ) + { + // This random number generator assumes that type uint32_t is a 32 bit int + // = 1/2 of a 64 bit int. The sizeof operator returns the size in bytes = 8 bits. + + const int NITER = 2; + //const uint32_t c1[] = { 0xbaa96887L, 0x1e17d32cL, 0x03bcdc3cL, 0x0f33d1b2L }; + //const uint32_t c2[] = { 0x4b0f3b58L, 0xe874f0c3L, 0x6955c5a6L, 0x55a7ca46L}; + uint32_t c1[4]; + c1[0]=0xbaa96887L; + c1[1]=0x1e17d32cL; + c1[2]=0x03bcdc3cL; + c1[3]=0x0f33d1b2L; + uint32_t c2[4]; + c2[0]=0x4b0f3b58L; + c2[1]=0xe874f0c3L; + c2[2]=0x6955c5a6L; + c2[3]=0x55a7ca46L; + + + uint32_t ia,ib,iswap,itmph=0,itmpl=0; + + for( int i = 0; i < NITER; i++) + { + ia = ( iswap = irword ) ^ c1[i]; + itmpl = ia & 0xffff; + itmph = ia >> 16; + ib = itmpl*itmpl+ ~(itmph*itmph); + + irword = lword ^ (((ia = (ib >> 16) | + ((ib & 0xffff) << 16)) ^ c2[i])+itmpl*itmph); + + lword=iswap; + } + } +HOST_DEVICE_END +} + +//---------------------------------------------------------------------------// + +namespace +{ + + inline HOST_DEVICE + // Function used to reconstruct a 64 bit from 2 32 bit ints. + uint64_t reconstruct_uint64( uint32_t front_bits, uint32_t back_bits ) + { + uint64_t reconstructed, temp; + reconstructed = static_cast( front_bits ); + temp = static_cast( back_bits ); + + // shift first bits 32 bits to left + reconstructed = reconstructed << 32; + + // temp must be masked to kill leading 1's. Then 'or' with reconstructed + // to get the last bits in + reconstructed |= (temp & 0x00000000ffffffff); + + return reconstructed; + } + HOST_DEVICE_END +} + +//---------------------------------------------------------------------------// + +namespace +{ +inline HOST_DEVICE + // Function used to hash a 64 bit int to get an initial state. + uint64_t hash_state( uint64_t initial_number ) + { + // break initial number apart into 2 32 bit ints + uint32_t front_bits, back_bits; + breakup_uint64( initial_number, front_bits, back_bits ); + + // hash the bits + pseudo_des( front_bits, back_bits ); + + // put the hashed parts together into 1 64 bit int + return reconstruct_uint64( front_bits, back_bits ); + } +HOST_DEVICE_END +} + +//---------------------------------------------------------------------------------------------------------------------- +// This routine spawns a "child" random number seed from a "parent" random number seed. +//---------------------------------------------------------------------------------------------------------------------- + +inline HOST_DEVICE +uint64_t rngSpawn_Random_Number_Seed(uint64_t *parent_seed) +{ + uint64_t spawned_seed = hash_state(*parent_seed); + // Bump the parent seed as that is what is expected from the interface. + rngSample(parent_seed); + return spawned_seed; +} + +HOST_DEVICE_END + #endif diff --git a/src/MC_Segment_Outcome.hh b/src/MC_Segment_Outcome.hh index f8d6a25c..7fdf1820 100644 --- a/src/MC_Segment_Outcome.hh +++ b/src/MC_Segment_Outcome.hh @@ -1,39 +1,333 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MC_SEGMENT_OUTCOME_INCLUDE #define MC_SEGMENT_OUTCOME_INCLUDE +#include +#include "MC_Nearest_Facet.hh" +#include "MC_Location.hh" +#include "MonteCarlo.hh" +#include "Globals.hh" +#include "MC_Particle.hh" +#include "MC_RNG_State.hh" +#include "MC_Cell_State.hh" +#include "Tallies.hh" +#include "utils.hh" +#include "macros.hh" +#include "MacroscopicCrossSection.hh" +#include "MCT.hh" +#include "PhysicalConstants.hh" +#include "DeclareMacro.hh" + class MC_Particle; class MC_Vector; class MonteCarlo; - struct MC_Segment_Outcome_type { - public: +public: enum Enum { - Initialize = -1, - Collision = 0, - Facet_Crossing = 1, - Census = 2, - Max_Number = 3 + Initialize = -1, + Collision = 0, + Facet_Crossing = 1, + Census = 2, + Max_Number = 3 }; }; - struct MC_Collision_Event_Return { - public: +public: enum Enum { - Stop_Tracking = 0, + Stop_Tracking = 0, Continue_Tracking = 1, Continue_Collision = 2 }; }; -#include "DeclareMacro.hh" HOST_DEVICE -MC_Segment_Outcome_type::Enum MC_Segment_Outcome(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int &flux_tally_index); +static inline unsigned int MC_Find_Min(const double *array, + int num_elements); +HOST_DEVICE_END + +//-------------------------------------------------------------------------------------------------- +// Routine MC_Segment_Outcome determines whether the next segment of the particle's trajectory will result in: +// (i) collision within the current cell, +// (ii) exiting from the current cell, or +// (iii) census at the end of the time step. +//-------------------------------------------------------------------------------------------------- + +inline HOST_DEVICE + MC_Segment_Outcome_type::Enum + MC_Segment_Outcome(MonteCarlo *monteCarlo, MC_Particle &mc_particle, unsigned int &flux_tally_index) +{ + // initialize distances to large number + int number_of_events = 3; + double distance[3]; + distance[0] = distance[1] = distance[2] = 1e80; + + // Calculate the particle speed + double particle_speed = mc_particle.Get_Velocity()->Length(); + + // Force collision if a census event narrowly preempts a collision + int force_collision = 0; + if (mc_particle.num_mean_free_paths < 0.0) + { + force_collision = 1; + + if (mc_particle.num_mean_free_paths > -900.0) + { +#if 1 +#ifdef DEBUG + printf(" MC_Segment_Outcome: mc_particle.num_mean_free_paths > -900.0 \n"); +#endif +#else + std::string output_string; + MC_Warning("Forced Collision: num_mean_free_paths < 0 \n" + "Particle record:\n%s", + output_string.c_str()); +#endif + } + + mc_particle.num_mean_free_paths = PhysicalConstants::_smallDouble; + } + + // Randomly determine the distance to the next collision + // based upon the composition of the current cell. + double macroscopic_total_cross_section = weightedMacroscopicCrossSection(monteCarlo, 0, + mc_particle.domain, mc_particle.cell, mc_particle.energy_group); + + // Cache the cross section + mc_particle.totalCrossSection = macroscopic_total_cross_section; + if (macroscopic_total_cross_section == 0.0) + { + mc_particle.mean_free_path = PhysicalConstants::_hugeDouble; + } + else + { + mc_particle.mean_free_path = 1.0 / macroscopic_total_cross_section; + } + + if (mc_particle.num_mean_free_paths == 0.0) + { + // Sample the number of mean-free-paths remaining before + // the next collision from an exponential distribution. + double random_number = rngSample(&mc_particle.random_number_seed); + + mc_particle.num_mean_free_paths = -1.0 * sycl::log(random_number); + } + + // Calculate the distances to collision, nearest facet, and census. + + // Forced collisions do not need to move far. + if (force_collision) + { + distance[MC_Segment_Outcome_type::Collision] = PhysicalConstants::_smallDouble; + } + else + { + distance[MC_Segment_Outcome_type::Collision] = mc_particle.num_mean_free_paths * mc_particle.mean_free_path; + } + + // process census + distance[MC_Segment_Outcome_type::Census] = particle_speed * mc_particle.time_to_census; + + // DEBUG Turn off threshold for now + double distance_threshold = 10.0 * PhysicalConstants::_hugeDouble; + // Get the current winning distance. + double current_best_distance = PhysicalConstants::_hugeDouble; + + DirectionCosine *direction_cosine = mc_particle.Get_Direction_Cosine(); + + bool new_segment = (mc_particle.num_segments == 0 || + mc_particle.last_event == MC_Tally_Event::Collision); + + MC_Location location(mc_particle.Get_Location()); + + // Calculate the minimum distance to each facet of the cell. + MC_Nearest_Facet nearest_facet; + nearest_facet = MCT_Nearest_Facet(&mc_particle, location, mc_particle.coordinate, + direction_cosine, distance_threshold, current_best_distance, new_segment, monteCarlo); + + mc_particle.normal_dot = nearest_facet.dot_product; + + distance[MC_Segment_Outcome_type::Facet_Crossing] = nearest_facet.distance_to_facet; + + // Get out of here if the tracker failed to bound this particle's volume. + if (mc_particle.last_event == MC_Tally_Event::Facet_Crossing_Tracking_Error) + { + return MC_Segment_Outcome_type::Facet_Crossing; + } + + // Calculate the minimum distance to the selected events. + + // Force a collision (if required). + if (force_collision == 1) + { + distance[MC_Segment_Outcome_type::Facet_Crossing] = PhysicalConstants::_hugeDouble; + distance[MC_Segment_Outcome_type::Census] = PhysicalConstants::_hugeDouble; + distance[MC_Segment_Outcome_type::Collision] = PhysicalConstants::_tinyDouble; + } + + // we choose our segment outcome here + MC_Segment_Outcome_type::Enum segment_outcome = + (MC_Segment_Outcome_type::Enum)MC_Find_Min(distance, number_of_events); + + // Before using segment_outcome as an index, verify it is valid + if (segment_outcome < 0 || segment_outcome >= MC_Segment_Outcome_type::Max_Number) + { + MC_Fatal_Jump("segment_outcome '%d' is invalid\n", (int)segment_outcome); + } + + if (distance[segment_outcome] < 0) + { + MC_Fatal_Jump("Negative distances to events are NOT permitted!\n" + "identifier = %" PRIu64 "\n" + "(Collision = %g,\n" + " Facet Crossing = %g,\n" + " Census = %g,\n", + mc_particle.identifier, + distance[MC_Segment_Outcome_type::Collision], + distance[MC_Segment_Outcome_type::Facet_Crossing], + distance[MC_Segment_Outcome_type::Census]); + } + mc_particle.segment_path_length = distance[segment_outcome]; + + mc_particle.num_mean_free_paths -= mc_particle.segment_path_length / mc_particle.mean_free_path; + + MC_Tally_Event::Enum SegmentOutcome_to_LastEvent[MC_Segment_Outcome_type::Max_Number] = + { + MC_Tally_Event::Collision, + MC_Tally_Event::Facet_Crossing_Transit_Exit, + MC_Tally_Event::Census, + }; + + mc_particle.last_event = SegmentOutcome_to_LastEvent[segment_outcome]; + + // Set the segment path length to be the minimum of + // (i) the distance to collision in the cell, or + // (ii) the minimum distance to a facet of the cell, or + // (iii) the distance to census at the end of the time step + if (segment_outcome == MC_Segment_Outcome_type::Collision) + { + mc_particle.num_mean_free_paths = 0.0; + } + else if (segment_outcome == MC_Segment_Outcome_type::Facet_Crossing) + { + mc_particle.facet = nearest_facet.facet; + } + else if (segment_outcome == MC_Segment_Outcome_type::Census) + { + mc_particle.time_to_census = MC_MIN(mc_particle.time_to_census, 0.0); + } + + // If collision was forced, set mc_particle.num_mean_free_paths = 0 + // so that a new value is randomly selected on next pass. + if (force_collision == 1) + { + mc_particle.num_mean_free_paths = 0.0; + } + + // Do not perform any tallies if the segment path length is zero. + // This only introduces roundoff errors. + if (mc_particle.segment_path_length == 0.0) + { + return segment_outcome; + } + + // Move particle to end of segment, accounting for some physics processes along the segment. + + // Project the particle trajectory along the segment path length. + mc_particle.Move_Particle(mc_particle.direction_cosine, mc_particle.segment_path_length); + + double segment_path_time = (mc_particle.segment_path_length / particle_speed); + + // Decrement the time to census and increment age. + mc_particle.time_to_census -= segment_path_time; + mc_particle.age += segment_path_time; + + // Ensure mc_particle.time_to_census is non-negative. + if (mc_particle.time_to_census < 0.0) + { + mc_particle.time_to_census = 0.0; + } + + // Accumulate the particle's contribution to the scalar flux. + monteCarlo->_tallies->TallyScalarFlux(mc_particle.segment_path_length * mc_particle.weight, mc_particle.domain, + flux_tally_index, mc_particle.cell, mc_particle.energy_group); + + return segment_outcome; +} +HOST_DEVICE_END + +HOST_DEVICE +static inline unsigned int MC_Find_Min(const double *array, + int num_elements) +{ + double min = array[0]; + int min_index = 0; + + for (int element_index = 1; element_index < num_elements; ++element_index) + { + if (array[element_index] < min) + { + min = array[element_index]; + min_index = element_index; + } + } + + return min_index; +} HOST_DEVICE_END +// #include "DeclareMacro.hh" +// HOST_DEVICE +// MC_Segment_Outcome_type::Enum MC_Segment_Outcome(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int &flux_tally_index); +// HOST_DEVICE_END + #endif diff --git a/src/MC_SourceNow.hh b/src/MC_SourceNow.hh index 80191c7c..ef95071e 100644 --- a/src/MC_SourceNow.hh +++ b/src/MC_SourceNow.hh @@ -1,9 +1,213 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + #ifndef MC_SOURCE_NOW_HH #define MC_SOURCE_NOW_HH +#include "QS_Vector.hh" +#include +#include "utils.hh" +#include "utilsMpi.hh" +#include "MonteCarlo.hh" +#include "MaterialDatabase.hh" +#include "initMC.hh" +#include "Tallies.hh" +#include "ParticleVaultContainer.hh" +#include "ParticleVault.hh" +#include "MC_Processor_Info.hh" +#include "MC_Cell_State.hh" +#include "MC_Time_Info.hh" +#include "MCT.hh" +#include "PhysicalConstants.hh" +#include "macros.hh" +#include "AtomicMacro.hh" +#include "NVTX_Range.hh" +#include +#include + class MonteCarlo; -void MC_SourceNow(MonteCarlo *mcco); +namespace +{ + double Get_Speed_From_Energy(double energy); +} + +namespace +{ + double Get_Speed_From_Energy(double energy) + { + static const double rest_mass_energy = PhysicalConstants::_neutronRestMassEnergy; + static const double speed_of_light = PhysicalConstants::_speedOfLight; + + return speed_of_light * sqrt(energy * (energy + 2.0 * (rest_mass_energy)) / + ((energy + rest_mass_energy) * (energy + rest_mass_energy))); + } +} + +inline void MC_SourceNow(MonteCarlo *monteCarlo) +{ + NVTX_Range range("MC_Source_Now"); + std::vector source_rate(monteCarlo->_materialDatabase->_mat.size()); // Get this from user input + + for (int material_index = 0; material_index < monteCarlo->_materialDatabase->_mat.size(); material_index++) + { + std::string name = monteCarlo->_materialDatabase->_mat[material_index]._name; + double sourceRate = monteCarlo->_params.materialParams[name].sourceRate; + source_rate[material_index] = sourceRate; + } + + double local_weight_particles = 0; + + for (int domain_index = 0; domain_index < monteCarlo->domain.size(); domain_index++) + { + MC_Domain &domain = monteCarlo->domain[domain_index]; + + for (int cell_index = 0; cell_index < domain.cell_state.size(); cell_index++) + { + MC_Cell_State &cell = domain.cell_state[cell_index]; + double cell_weight_particles = cell._volume * source_rate[cell._material] * monteCarlo->time_info->time_step; + local_weight_particles += cell_weight_particles; + } + } + + double total_weight_particles = 0; + + mpiAllreduce(&local_weight_particles, &total_weight_particles, 1, MPI_DOUBLE, MPI_SUM, monteCarlo->processor_info->comm_mc_world); + + uint64_t num_particles = monteCarlo->_params.simulationParams.nParticles; + double source_fraction = 0.1; + double source_particle_weight = total_weight_particles/(source_fraction * num_particles); + // Store the source particle weight for later use. + monteCarlo->source_particle_weight = source_particle_weight; + + uint64_t vault_size = monteCarlo->_particleVaultContainer->getVaultSize(); + uint64_t processing_index = monteCarlo->_particleVaultContainer->sizeProcessing() / vault_size; + + uint64_t task_index = 0; + uint64_t particle_count = 0; + + // Compute the partial sums on each mpi process. + // uint64_t local_num_particles = (int)(local_weight_particles / source_particle_weight); + + for (int domain_index = 0; domain_index < monteCarlo->domain.size(); domain_index++) + { + MC_Domain &domain = monteCarlo->domain[domain_index]; + + for (int cell_index = 0; cell_index < domain.cell_state.size(); cell_index++) + { + MC_Cell_State &cell = domain.cell_state[cell_index]; + double cell_weight_particles = cell._volume * source_rate[cell._material] * monteCarlo->time_info->time_step; + double cell_num_particles_float = cell_weight_particles / source_particle_weight; + int cell_num_particles = (int)cell_num_particles_float; + + //Can Make this parallel - have an optimization from Leopold to add still + for (int particle_index = 0; particle_index < cell_num_particles; particle_index++) + { + MC_Particle particle; + + uint64_t random_number_seed; + + ATOMIC_CAPTURE( cell._sourceTally, 1, random_number_seed ); + + random_number_seed += cell._id; + + particle.random_number_seed = rngSpawn_Random_Number_Seed(&random_number_seed); + particle.identifier = random_number_seed; + + MCT_Generate_Coordinate_3D_G(&particle.random_number_seed, domain_index, cell_index, particle.coordinate, monteCarlo); + + particle.direction_cosine.Sample_Isotropic(&particle.random_number_seed); + + // sample energy uniformly from [eMin, eMax] MeV + particle.kinetic_energy = (monteCarlo->_params.simulationParams.eMax - monteCarlo->_params.simulationParams.eMin)* + rngSample(&particle.random_number_seed) + monteCarlo->_params.simulationParams.eMin; + + double speed = Get_Speed_From_Energy(particle.kinetic_energy); + + particle.velocity.x = speed * particle.direction_cosine.alpha; + particle.velocity.y = speed * particle.direction_cosine.beta; + particle.velocity.z = speed * particle.direction_cosine.gamma; + + particle.domain = domain_index; + particle.cell = cell_index; + particle.task = task_index; + particle.weight = source_particle_weight; + + double randomNumber = rngSample(&particle.random_number_seed); + particle.num_mean_free_paths = -1.0 * log(randomNumber); + + randomNumber = rngSample(&particle.random_number_seed); + particle.time_to_census = monteCarlo->time_info->time_step * randomNumber; + + MC_Base_Particle base_particle(particle); + + monteCarlo->_particleVaultContainer->addProcessingParticle(base_particle, processing_index); + + particle_count++; + + ATOMIC_UPDATE(monteCarlo->_tallies->_balanceTask[particle.task]._source); + } + } + } + +#if 0 + // Check for duplicate particle random number seeds. + std::vector particle_seeds; + int task_index = 0; + //for ( int task_index = 0; task_index < num_threads; task_index++ ) + { + ParticleVault& particleVault = monteCarlo->_particleVaultContainer->getTaskProcessingVault(task_index); + + uint64_t currentNumParticles = particleVault.size(); + for (int particleIndex = 0; particleIndex < currentNumParticles; particleIndex++) + { + MC_Base_Particle ¤tParticle = particleVault[particleIndex]; + particle_seeds.push_back(currentParticle.random_number_seed); + } + } + + std::sort(particle_seeds.begin(), particle_seeds.end()); + uint64_t num_dupl = 0; + for (size_t pi_index = 0; pi_index #include #include "DeclareMacro.hh" HOST_DEVICE_CLASS class MC_Vector { - public: +public: double x; double y; double z; - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL MC_Vector() : x(0), y(0), z(0) {} - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL MC_Vector(double a, double b, double c) : x(a), y(b), z(c) {} - HOST_DEVICE_CUDA - MC_Vector& operator=( const MC_Vector&tmp ) + HOST_DEVICE_SYCL + MC_Vector &operator=(const MC_Vector &tmp) { - if ( this == &tmp ) { return *this; } + if (this == &tmp) + { + return *this; + } x = tmp.x; y = tmp.y; @@ -29,14 +78,14 @@ class MC_Vector return *this; } - HOST_DEVICE_CUDA - bool operator==( const MC_Vector& tmp ) + HOST_DEVICE_SYCL + bool operator==(const MC_Vector &tmp) { return tmp.x == x && tmp.y == y && tmp.z == z; } - HOST_DEVICE_CUDA - MC_Vector& operator+=( const MC_Vector &tmp ) + HOST_DEVICE_SYCL + MC_Vector &operator+=(const MC_Vector &tmp) { x += tmp.x; y += tmp.y; @@ -44,8 +93,8 @@ class MC_Vector return *this; } - HOST_DEVICE_CUDA - MC_Vector& operator-=( const MC_Vector &tmp ) + HOST_DEVICE_SYCL + MC_Vector &operator-=(const MC_Vector &tmp) { x -= tmp.x; y -= tmp.y; @@ -53,8 +102,8 @@ class MC_Vector return *this; } - HOST_DEVICE_CUDA - MC_Vector& operator*=(const double scalar) + HOST_DEVICE_SYCL + MC_Vector &operator*=(const double scalar) { x *= scalar; y *= scalar; @@ -62,8 +111,8 @@ class MC_Vector return *this; } - HOST_DEVICE_CUDA - MC_Vector& operator/=(const double scalar) + HOST_DEVICE_SYCL + MC_Vector &operator/=(const double scalar) { x /= scalar; y /= scalar; @@ -71,48 +120,49 @@ class MC_Vector return *this; } - HOST_DEVICE_CUDA - const MC_Vector operator+( const MC_Vector &tmp ) const + HOST_DEVICE_SYCL + const MC_Vector operator+(const MC_Vector &tmp) const { return MC_Vector(x + tmp.x, y + tmp.y, z + tmp.z); } - HOST_DEVICE_CUDA - const MC_Vector operator-( const MC_Vector &tmp ) const + HOST_DEVICE_SYCL + const MC_Vector operator-(const MC_Vector &tmp) const { return MC_Vector(x - tmp.x, y - tmp.y, z - tmp.z); } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL const MC_Vector operator*(const double scalar) const { - return MC_Vector(scalar*x, scalar*y, scalar*z); + return MC_Vector(scalar * x, scalar * y, scalar * z); } - HOST_DEVICE_CUDA - inline double Length() const { return std::sqrt(x*x + y*y + z*z); } + HOST_DEVICE_SYCL + inline double Length() const { return sycl::sqrt(x * x + y * y + z * z); } // Distance from this vector to another point. - HOST_DEVICE_CUDA - inline double Distance(const MC_Vector& vv) const - { return std::sqrt((x - vv.x)*(x - vv.x) + (y - vv.y)*(y - vv.y)+ (z - vv.z)*(z - vv.z)); } + HOST_DEVICE_SYCL + inline double Distance(const MC_Vector &vv) const + { + return sycl::sqrt((x - vv.x) * (x - vv.x) + (y - vv.y) * (y - vv.y) + + (z - vv.z) * (z - vv.z)); + } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL inline double Dot(const MC_Vector &tmp) const { - return this->x*tmp.x + this->y*tmp.y + this->z*tmp.z; + return this->x * tmp.x + this->y * tmp.y + this->z * tmp.z; } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL inline MC_Vector Cross(const MC_Vector &v) const { return MC_Vector(y * v.z - z * v.y, z * v.x - x * v.z, x * v.y - y * v.x); } - }; HOST_DEVICE_END - #endif diff --git a/src/MacroscopicCrossSection.hh b/src/MacroscopicCrossSection.hh index 68ab7e1b..6fbcb2e2 100644 --- a/src/MacroscopicCrossSection.hh +++ b/src/MacroscopicCrossSection.hh @@ -1,18 +1,174 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MACROSCOPIC_CROSS_SECTION_HH #define MACROSCOPIC_CROSS_SECTION_HH #include "DeclareMacro.hh" +#include "MacroscopicCrossSection.hh" +#include "MonteCarlo.hh" +#include "MaterialDatabase.hh" +#include "NuclearData.hh" +#include "MC_Cell_State.hh" + class MonteCarlo; HOST_DEVICE -double macroscopicCrossSection(MonteCarlo* monteCarlo, int reactionIndex, int domainIndex, int cellIndex, +double macroscopicCrossSection(MonteCarlo *monteCarlo, int reactionIndex, int domainIndex, int cellIndex, int isoIndex, int energyGroup); HOST_DEVICE_END HOST_DEVICE -double weightedMacroscopicCrossSection(MonteCarlo* monteCarlo, int taskIndex, int domainIndex, +double weightedMacroscopicCrossSection(MonteCarlo *monteCarlo, int taskIndex, int domainIndex, int cellIndex, int energyGroup); HOST_DEVICE_END +//---------------------------------------------------------------------------------------------------------------------- +// Routine MacroscopicCrossSection calculates the number-density-weighted macroscopic cross +// section of a cell. +// +// A reactionIndex of -1 means total cross section. +//---------------------------------------------------------------------------------------------------------------------- + +inline HOST_DEVICE double macroscopicCrossSection(MonteCarlo *monteCarlo, int reactionIndex, int domainIndex, int cellIndex, + int isoIndex, int energyGroup) +{ +// Initialize various data items. +#ifdef __SYCL_DEVICE_ONLY__ + int globalMatIndex = monteCarlo->domain_d[domainIndex].cell_state[cellIndex]._material; + double atomFraction = monteCarlo->_material_d[globalMatIndex]._iso[isoIndex]._atomFraction; +#else + int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material; + double atomFraction = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._atomFraction; +#endif + + double microscopicCrossSection = 0.0; + // The cell number density is the fraction of the atoms in cell + // volume of this isotope. We set this (elsewhere) to 1/nIsotopes. + // This is a statement that we treat materials as if all of their + // isotopes are present in equal amounts + +#ifdef __SYCL_DEVICE_ONLY__ + double cellNumberDensity = monteCarlo->domain_d[domainIndex].cell_state[cellIndex]._cellNumberDensity; + int isotopeGid = monteCarlo->_material_d[globalMatIndex]._iso[isoIndex]._gid; +#else + double cellNumberDensity = monteCarlo->domain[domainIndex].cell_state[cellIndex]._cellNumberDensity; + int isotopeGid = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._gid; +#endif + if (atomFraction == 0.0 || cellNumberDensity == 0.0) + { + return 1e-20; + } + +#ifdef __SYCL_DEVICE_ONLY__ + if (reactionIndex < 0) + { + // Return total cross section + microscopicCrossSection = monteCarlo->_nuclearData_d->getTotalCrossSection(isotopeGid, energyGroup); + } + else + { + // Return the reaction cross section + microscopicCrossSection = monteCarlo->_nuclearData_d->getReactionCrossSection((unsigned int)reactionIndex, isotopeGid, energyGroup); + } +#else + if (reactionIndex < 0) + { + // Return total cross section + microscopicCrossSection = monteCarlo->_nuclearData->getTotalCrossSection(isotopeGid, energyGroup); + } + else + { + // Return the reaction cross section + microscopicCrossSection = monteCarlo->_nuclearData->getReactionCrossSection((unsigned int)reactionIndex, isotopeGid, energyGroup); + } +#endif + + return atomFraction * cellNumberDensity * microscopicCrossSection; +} +HOST_DEVICE_END + +//---------------------------------------------------------------------------------------------------------------------- +// Routine weightedMacroscopicCrossSection calculates the number-density-weighted +// macroscopic cross section of the collection of isotopes in a cell. +// dfr Weighted is a bit of a misnomer here, since there is no weighting +// applied by this routine. In Mercury we would weight for multiple +// materials in a cell. +//---------------------------------------------------------------------------------------------------------------------- +inline HOST_DEVICE double weightedMacroscopicCrossSection(MonteCarlo *monteCarlo, int taskIndex, int domainIndex, + int cellIndex, int energyGroup) +{ +#ifdef __SYCL_DEVICE_ONLY__ + double *precomputedCrossSection = + &monteCarlo->domain_d[domainIndex].cell_state[cellIndex]._total[energyGroup]; +#else + double *precomputedCrossSection = + &monteCarlo->domain[domainIndex].cell_state[cellIndex]._total[energyGroup]; +#endif + qs_assert(precomputedCrossSection != NULL); + if (*precomputedCrossSection > 0.0) + return *precomputedCrossSection; + +#ifdef __SYCL_DEVICE_ONLY__ + int globalMatIndex = monteCarlo->domain_d[domainIndex].cell_state[cellIndex]._material; + int nIsotopes = (int)monteCarlo->_material_d[globalMatIndex]._isosize; +#else + int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material; + int nIsotopes = (int)monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso.size(); +#endif + double sum = 0.0; + for (int isoIndex = 0; isoIndex < nIsotopes; isoIndex++) + { + sum += macroscopicCrossSection(monteCarlo, -1, domainIndex, cellIndex, + isoIndex, energyGroup); + } + + ATOMIC_WRITE(*precomputedCrossSection, sum); + + return sum; +} +HOST_DEVICE_END #endif diff --git a/src/Makefile.dpct b/src/Makefile.dpct new file mode 100644 index 00000000..878e02ed --- /dev/null +++ b/src/Makefile.dpct @@ -0,0 +1,216 @@ +CC := icpx -fsycl + +LD := $(CC) + +LIB := + +FLAGS := -DHAVE_SYCL -O2 + +TARGET_0_SRC_0 = ./CoralBenchmark.cc +TARGET_0_OBJ_0 = ./CoralBenchmark.o +TARGET_0_FLAG_0 = ${FLAGS} + +TARGET_0_SRC_1 = ./CycleTracking.cc +TARGET_0_OBJ_1 = ./CycleTracking.o +TARGET_0_FLAG_1 = ${FLAGS} + +TARGET_0_SRC_2 = ./DecompositionObject.cc +TARGET_0_OBJ_2 = ./DecompositionObject.o +TARGET_0_FLAG_2 = ${FLAGS} + +TARGET_0_SRC_3 = ./DirectionCosine.cc.dp.cpp +TARGET_0_OBJ_3 = ./DirectionCosine.cc.dp.o +TARGET_0_FLAG_3 = ${FLAGS} + +TARGET_0_SRC_4 = ./EnergySpectrum.cc +TARGET_0_OBJ_4 = ./EnergySpectrum.o +TARGET_0_FLAG_4 = ${FLAGS} + +TARGET_0_SRC_5 = ./GlobalFccGrid.cc.dp.cpp +TARGET_0_OBJ_5 = ./GlobalFccGrid.cc.dp.o +TARGET_0_FLAG_5 = ${FLAGS} + +TARGET_0_SRC_6 = ./GridAssignmentObject.cc.dp.cpp +TARGET_0_OBJ_6 = ./GridAssignmentObject.cc.dp.o +TARGET_0_FLAG_6 = ${FLAGS} + +TARGET_0_SRC_7 = ./InputBlock.cc +TARGET_0_OBJ_7 = ./InputBlock.o +TARGET_0_FLAG_7 = ${FLAGS} + +TARGET_0_SRC_8 = ./MC_Base_Particle.cc +TARGET_0_OBJ_8 = ./MC_Base_Particle.o +TARGET_0_FLAG_8 = ${FLAGS} + +TARGET_0_SRC_9 = ./MC_Domain.cc.dp.cpp +TARGET_0_OBJ_9 = ./MC_Domain.cc.dp.o +TARGET_0_FLAG_9 = ${FLAGS} + +TARGET_0_SRC_10 = ./MC_Fast_Timer.cc.dp.cpp +TARGET_0_OBJ_10 = ./MC_Fast_Timer.cc.dp.o +TARGET_0_FLAG_10 = ${FLAGS} + +TARGET_0_SRC_11 = ./MC_Particle_Buffer.cc +TARGET_0_OBJ_11 = ./MC_Particle_Buffer.o +TARGET_0_FLAG_11 = ${FLAGS} + +TARGET_0_SRC_12 = ./MeshPartition.cc +TARGET_0_OBJ_12 = ./MeshPartition.o +TARGET_0_FLAG_12 = ${FLAGS} + +TARGET_0_SRC_13 = ./MonteCarlo.cc.dp.cpp +TARGET_0_OBJ_13 = ./MonteCarlo.cc.dp.o +TARGET_0_FLAG_13 = ${FLAGS} + +TARGET_0_SRC_14 = ./MpiCommObject.cc +TARGET_0_OBJ_14 = ./MpiCommObject.o +TARGET_0_FLAG_14 = ${FLAGS} + +TARGET_0_SRC_15 = ./Parameters.cc +TARGET_0_OBJ_15 = ./Parameters.o +TARGET_0_FLAG_15 = ${FLAGS} + +TARGET_0_SRC_16 = ./ParticleVault.cc +TARGET_0_OBJ_16 = ./ParticleVault.o +TARGET_0_FLAG_16 = ${FLAGS} + +TARGET_0_SRC_17 = ./ParticleVaultContainer.cc +TARGET_0_OBJ_17 = ./ParticleVaultContainer.o +TARGET_0_FLAG_17 = ${FLAGS} + +TARGET_0_SRC_18 = ./PopulationControl.cc.dp.cpp +TARGET_0_OBJ_18 = ./PopulationControl.cc.dp.o +TARGET_0_FLAG_18 = ${FLAGS} + +TARGET_0_SRC_19 = ./SharedMemoryCommObject.cc +TARGET_0_OBJ_19 = ./SharedMemoryCommObject.o +TARGET_0_FLAG_19 = ${FLAGS} + +TARGET_0_SRC_20 = ./Tallies.cc +TARGET_0_OBJ_20 = ./Tallies.o +TARGET_0_FLAG_20 = ${FLAGS} + +TARGET_0_SRC_21 = ./cmdLineParser.cc +TARGET_0_OBJ_21 = ./cmdLineParser.o +TARGET_0_FLAG_21 = ${FLAGS} + +TARGET_0_SRC_22 = ./cudaFunctions.cc.dp.cpp +TARGET_0_OBJ_22 = ./cudaFunctions.cc.dp.o +TARGET_0_FLAG_22 = ${FLAGS} + +TARGET_0_SRC_23 = ./initMC.cc.dp.cpp +TARGET_0_OBJ_23 = ./initMC.cc.dp.o +TARGET_0_FLAG_23 = ${FLAGS} + +TARGET_0_SRC_24 = ./main.cc.dp.cpp +TARGET_0_OBJ_24 = ./main.cc.dp.o +TARGET_0_FLAG_24 = ${FLAGS} + +TARGET_0_SRC_25 = ./parseUtils.cc +TARGET_0_OBJ_25 = ./parseUtils.o +TARGET_0_FLAG_25 = ${FLAGS} + +TARGET_0_SRC_26 = ./utils.cc +TARGET_0_OBJ_26 = ./utils.o +TARGET_0_FLAG_26 = ${FLAGS} + +TARGET_0_SRC_27 = ./utilsMpi.cc.dp.cpp +TARGET_0_OBJ_27 = ./utilsMpi.cc.dp.o +TARGET_0_FLAG_27 = ${FLAGS} + +TARGET_0 := ./qs + +TARGET := ${TARGET_0} + +.PHONY:all clean +OBJS_0 := ${TARGET_0_OBJ_0} ${TARGET_0_OBJ_1} ${TARGET_0_OBJ_2} ${TARGET_0_OBJ_3} ${TARGET_0_OBJ_4} ${TARGET_0_OBJ_5} ${TARGET_0_OBJ_6} ${TARGET_0_OBJ_7} ${TARGET_0_OBJ_8} ${TARGET_0_OBJ_9} ${TARGET_0_OBJ_10} ${TARGET_0_OBJ_11} ${TARGET_0_OBJ_12} ${TARGET_0_OBJ_13} ${TARGET_0_OBJ_14} ${TARGET_0_OBJ_15} ${TARGET_0_OBJ_16} ${TARGET_0_OBJ_17} ${TARGET_0_OBJ_18} ${TARGET_0_OBJ_19} ${TARGET_0_OBJ_20} ${TARGET_0_OBJ_21} ${TARGET_0_OBJ_22} ${TARGET_0_OBJ_23} ${TARGET_0_OBJ_24} ${TARGET_0_OBJ_25} ${TARGET_0_OBJ_26} ${TARGET_0_OBJ_27} +all: $(TARGET) +$(TARGET_0): $(OBJS_0) + $(CC) -o $@ $^ $(LIB) + +$(TARGET_0_OBJ_0):$(TARGET_0_SRC_0) + $(CC) -c ${TARGET_0_SRC_0} -o ${TARGET_0_OBJ_0} $(TARGET_0_FLAG_0) + +$(TARGET_0_OBJ_1):$(TARGET_0_SRC_1) + $(CC) -c ${TARGET_0_SRC_1} -o ${TARGET_0_OBJ_1} $(TARGET_0_FLAG_1) + +$(TARGET_0_OBJ_2):$(TARGET_0_SRC_2) + $(CC) -c ${TARGET_0_SRC_2} -o ${TARGET_0_OBJ_2} $(TARGET_0_FLAG_2) + +$(TARGET_0_OBJ_3):$(TARGET_0_SRC_3) + $(CC) -c ${TARGET_0_SRC_3} -o ${TARGET_0_OBJ_3} $(TARGET_0_FLAG_3) + +$(TARGET_0_OBJ_4):$(TARGET_0_SRC_4) + $(CC) -c ${TARGET_0_SRC_4} -o ${TARGET_0_OBJ_4} $(TARGET_0_FLAG_4) + +$(TARGET_0_OBJ_5):$(TARGET_0_SRC_5) + $(CC) -c ${TARGET_0_SRC_5} -o ${TARGET_0_OBJ_5} $(TARGET_0_FLAG_5) + +$(TARGET_0_OBJ_6):$(TARGET_0_SRC_6) + $(CC) -c ${TARGET_0_SRC_6} -o ${TARGET_0_OBJ_6} $(TARGET_0_FLAG_6) + +$(TARGET_0_OBJ_7):$(TARGET_0_SRC_7) + $(CC) -c ${TARGET_0_SRC_7} -o ${TARGET_0_OBJ_7} $(TARGET_0_FLAG_7) + +$(TARGET_0_OBJ_8):$(TARGET_0_SRC_8) + $(CC) -c ${TARGET_0_SRC_8} -o ${TARGET_0_OBJ_8} $(TARGET_0_FLAG_8) + +$(TARGET_0_OBJ_9):$(TARGET_0_SRC_9) + $(CC) -c ${TARGET_0_SRC_9} -o ${TARGET_0_OBJ_9} $(TARGET_0_FLAG_9) + +$(TARGET_0_OBJ_10):$(TARGET_0_SRC_10) + $(CC) -c ${TARGET_0_SRC_10} -o ${TARGET_0_OBJ_10} $(TARGET_0_FLAG_10) + +$(TARGET_0_OBJ_11):$(TARGET_0_SRC_11) + $(CC) -c ${TARGET_0_SRC_11} -o ${TARGET_0_OBJ_11} $(TARGET_0_FLAG_11) + +$(TARGET_0_OBJ_12):$(TARGET_0_SRC_12) + $(CC) -c ${TARGET_0_SRC_12} -o ${TARGET_0_OBJ_12} $(TARGET_0_FLAG_12) + +$(TARGET_0_OBJ_13):$(TARGET_0_SRC_13) + $(CC) -c ${TARGET_0_SRC_13} -o ${TARGET_0_OBJ_13} $(TARGET_0_FLAG_13) + +$(TARGET_0_OBJ_14):$(TARGET_0_SRC_14) + $(CC) -c ${TARGET_0_SRC_14} -o ${TARGET_0_OBJ_14} $(TARGET_0_FLAG_14) + +$(TARGET_0_OBJ_15):$(TARGET_0_SRC_15) + $(CC) -c ${TARGET_0_SRC_15} -o ${TARGET_0_OBJ_15} $(TARGET_0_FLAG_15) + +$(TARGET_0_OBJ_16):$(TARGET_0_SRC_16) + $(CC) -c ${TARGET_0_SRC_16} -o ${TARGET_0_OBJ_16} $(TARGET_0_FLAG_16) + +$(TARGET_0_OBJ_17):$(TARGET_0_SRC_17) + $(CC) -c ${TARGET_0_SRC_17} -o ${TARGET_0_OBJ_17} $(TARGET_0_FLAG_17) + +$(TARGET_0_OBJ_18):$(TARGET_0_SRC_18) + $(CC) -c ${TARGET_0_SRC_18} -o ${TARGET_0_OBJ_18} $(TARGET_0_FLAG_18) + +$(TARGET_0_OBJ_19):$(TARGET_0_SRC_19) + $(CC) -c ${TARGET_0_SRC_19} -o ${TARGET_0_OBJ_19} $(TARGET_0_FLAG_19) + +$(TARGET_0_OBJ_20):$(TARGET_0_SRC_20) + $(CC) -c ${TARGET_0_SRC_20} -o ${TARGET_0_OBJ_20} $(TARGET_0_FLAG_20) + +$(TARGET_0_OBJ_21):$(TARGET_0_SRC_21) + $(CC) -c ${TARGET_0_SRC_21} -o ${TARGET_0_OBJ_21} $(TARGET_0_FLAG_21) + +$(TARGET_0_OBJ_22):$(TARGET_0_SRC_22) + $(CC) -c ${TARGET_0_SRC_22} -o ${TARGET_0_OBJ_22} $(TARGET_0_FLAG_22) + +$(TARGET_0_OBJ_23):$(TARGET_0_SRC_23) + $(CC) -c ${TARGET_0_SRC_23} -o ${TARGET_0_OBJ_23} $(TARGET_0_FLAG_23) + +$(TARGET_0_OBJ_24):$(TARGET_0_SRC_24) + $(CC) -c ${TARGET_0_SRC_24} -o ${TARGET_0_OBJ_24} $(TARGET_0_FLAG_24) + +$(TARGET_0_OBJ_25):$(TARGET_0_SRC_25) + $(CC) -c ${TARGET_0_SRC_25} -o ${TARGET_0_OBJ_25} $(TARGET_0_FLAG_25) + +$(TARGET_0_OBJ_26):$(TARGET_0_SRC_26) + $(CC) -c ${TARGET_0_SRC_26} -o ${TARGET_0_OBJ_26} $(TARGET_0_FLAG_26) + +$(TARGET_0_OBJ_27):$(TARGET_0_SRC_27) + $(CC) -c ${TARGET_0_SRC_27} -o ${TARGET_0_OBJ_27} $(TARGET_0_FLAG_27) + +clean: + rm -f ${OBJS_0} $(TARGET) diff --git a/src/MaterialDatabase.hh b/src/MaterialDatabase.hh index fd944a99..da450a33 100644 --- a/src/MaterialDatabase.hh +++ b/src/MaterialDatabase.hh @@ -1,83 +1,179 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MATERIALDATABASE_HH #define MATERIALDATABASE_HH +#include #include #include #include #include #include "qs_assert.hh" +#if defined(HAVE_SYCL) +extern sycl::queue sycl_device_queue; // global variable for device queue +#endif + // For this material, store the global id in NuclearData of the isotope class Isotope { - public: +public: Isotope() - : _gid(0), _atomFraction(0) { } - - Isotope(int isotopeGid, double atomFraction) - : _gid(isotopeGid), _atomFraction(atomFraction) { } - + : _gid(0), _atomFraction(0) {} + + Isotope(int isotopeGid, double atomFraction) + : _gid(isotopeGid), _atomFraction(atomFraction) {} + ~Isotope() {} - + int _gid; //!< index into NuclearData double _atomFraction; - }; // Material information class Material { - public: +public: std::string _name; double _mass; qs_vector _iso; Material() - : _name("0"), _mass(1000.0) {} + : _name("0"), _mass(1000.0) {} Material(const std::string &name) - : _name(name), _mass(1000.0){} + : _name(name), _mass(1000.0) {} Material(const std::string &name, double mass) - : _name(name), _mass(mass){} - + : _name(name), _mass(mass) {} + ~Material() {} - void addIsotope(const Isotope& isotope) + void addIsotope(const Isotope &isotope) { - _iso.Open(); - _iso.push_back(isotope); - _iso.Close(); + _iso.Open(); + _iso.push_back(isotope); + _iso.Close(); } - }; - // Top level class to store material information class MaterialDatabase { - public: - - void addMaterial(const Material& material) +public: + void addMaterial(const Material &material) { _mat.Open(); _mat.push_back(material); _mat.Close(); } - - int findMaterial(const std::string& name) const + + int findMaterial(const std::string &name) const { for (int matIndex = 0; matIndex < _mat.size(); matIndex++) { - if (_mat[matIndex]._name == name) { return matIndex; } + if (_mat[matIndex]._name == name) + { + return matIndex; + } } qs_assert(false); return -1; } - + // Store the cross sections and reactions by isotope, which stores it by species qs_vector _mat; +}; + +// Material information +class Material_d +{ +public: + double _mass; + int _isosize; + Isotope *_iso; + + Material_d() + : _mass(1000.0) {} + Material_d(const std::string &name) + : _mass(1000.0) {} + + Material_d(const std::string &name, double mass) + : _mass(mass) {} + + ~Material_d() {} +}; + +inline void copyMaterialDatabase_device(MonteCarlo *mcco) +{ + int numMaterials = mcco->_materialDatabase->_mat.size(); + Material_d *materials_h = (Material_d *)malloc(numMaterials * sizeof(Material_d)); + + for (int j = 0; j < numMaterials; j++) + { + int isosize = mcco->_materialDatabase->_mat[j]._iso.size(); + Isotope *local_I_d; + local_I_d = + sycl::malloc_device(isosize, sycl_device_queue); + sycl_device_queue + .memcpy(local_I_d, + mcco->_materialDatabase->_mat[j]._iso.outputPointer(), + isosize * sizeof(Isotope)) + .wait(); + + materials_h[j]._isosize = isosize; + materials_h[j]._iso = local_I_d; + materials_h[j]._mass = mcco->_materialDatabase->_mat[j]._mass; + } + sycl_device_queue + .memcpy(mcco->_material_d, materials_h, + numMaterials * sizeof(Material_d)) + .wait(); + free(materials_h); }; #endif diff --git a/src/MemoryControl.hh b/src/MemoryControl.hh index f4ce3bf4..71757cf7 100644 --- a/src/MemoryControl.hh +++ b/src/MemoryControl.hh @@ -1,29 +1,84 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef MEMORY_CONTROL_HH #define MEMORY_CONTROL_HH -#include "gpuPortability.hh" +#include "cudaUtils.hh" + #include "qs_assert.hh" namespace MemoryControl { - enum AllocationPolicy {HOST_MEM, UVM_MEM, UNDEFINED_POLICY}; + enum AllocationPolicy + { + HOST_MEM, + UVM_MEM, + UNDEFINED_POLICY + }; template - T* allocate(const int size, const AllocationPolicy policy) + T *allocate(const int size, const AllocationPolicy policy) { - if (size == 0) { return NULL;} - T* tmp = NULL; - + if (size == 0) + { + return NULL; + } + T *tmp = NULL; + switch (policy) { - case AllocationPolicy::HOST_MEM: - tmp = new T [size]; + case AllocationPolicy::HOST_MEM: + tmp = new T[size]; break; #ifdef HAVE_UVM - case AllocationPolicy::UVM_MEM: + case AllocationPolicy::UVM_MEM: void *ptr; - gpuMallocManaged(&ptr, size*sizeof(T)); - tmp = new(ptr) T[size]; + // in my experiments you need the cudaMemAttachGlobal flag set to make pcie atomics work + gpuMallocManaged(&ptr, size * sizeof(T), 1 /*cudaMemAttachGlobal*/); + tmp = new (ptr) T[size]; break; #endif default: @@ -34,16 +89,16 @@ namespace MemoryControl } template - void deallocate(T* data, const int size, const AllocationPolicy policy) + void deallocate(T *data, const int size, const AllocationPolicy policy) { switch (policy) { - case AllocationPolicy::HOST_MEM: - delete[] data; + case MemoryControl::AllocationPolicy::HOST_MEM: + delete[] data; break; #ifdef HAVE_UVM - case AllocationPolicy::UVM_MEM: - for (int i=0; i < size; ++i) + case UVM_MEM: + for (int i = 0; i < size; ++i) data[i].~T(); gpuFree(data); break; @@ -55,5 +110,4 @@ namespace MemoryControl } } - #endif diff --git a/src/MonteCarlo.cc.dp.cpp b/src/MonteCarlo.cc.dp.cpp new file mode 100644 index 00000000..8962252e --- /dev/null +++ b/src/MonteCarlo.cc.dp.cpp @@ -0,0 +1,206 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include "MonteCarlo.hh" +#include "NuclearData.hh" +#include "MaterialDatabase.hh" +#include "ParticleVaultContainer.hh" +#include "MC_RNG_State.hh" +#include "Tallies.hh" +#include "MC_Processor_Info.hh" +#include "MC_Time_Info.hh" +#include "MC_Particle_Buffer.hh" +#include "MC_Fast_Timer.hh" +#include + +#include "macros.hh" // current location of openMP wrappers. +#include "cudaUtils.hh" + +//---------------------------------------------------------------------------------------------------------------------- +// Construct a MonteCarlo object. +//---------------------------------------------------------------------------------------------------------------------- +MonteCarlo::MonteCarlo(const Parameters ¶ms) + : domain_d(NULL), + domainSize(0), + _params(params), + _nuclearData(NULL), + _material_d(NULL), + _nuclearData_d(NULL) +{ + _nuclearData = 0; + _materialDatabase = 0; + +#if defined(HAVE_UVM) + void *ptr1, *ptr2, *ptr3, *ptr4; + + gpuMallocManaged(&ptr1, sizeof(Tallies), 1 /*cudaMemAttachGlobal*/); + gpuMallocManaged(&ptr2, sizeof(MC_Processor_Info), 1 /*cudaMemAttachGlobal*/); + gpuMallocManaged(&ptr3, sizeof(MC_Time_Info), 1 /*cudaMemAttachGlobal*/); + gpuMallocManaged(&ptr4, sizeof(MC_Fast_Timer_Container), 1 /*cudaMemAttachGlobal*/); + + _tallies = new (ptr1) Tallies(params.simulationParams.balanceTallyReplications, + params.simulationParams.fluxTallyReplications, + params.simulationParams.cellTallyReplications, + params.simulationParams.energySpectrum, + params.simulationParams.nGroups); + processor_info = new (ptr2) MC_Processor_Info(); + time_info = new (ptr3) MC_Time_Info(); + fast_timer = new (ptr4) MC_Fast_Timer_Container(); + +#else + _tallies = new Tallies(params.simulationParams.balanceTallyReplications, + params.simulationParams.fluxTallyReplications, + params.simulationParams.cellTallyReplications, + params.simulationParams.energySpectrum, + params.simulationParams.nGroups); + processor_info = new MC_Processor_Info(); + time_info = new MC_Time_Info(); + fast_timer = new MC_Fast_Timer_Container(); +#endif + + source_particle_weight = 0.0; + + size_t num_processors = processor_info->num_processors; + size_t num_particles = params.simulationParams.nParticles; + size_t batch_size = params.simulationParams.batchSize; + size_t num_batches = params.simulationParams.nBatches; + + size_t num_particles_on_process = num_particles / num_processors; + + if (num_particles_on_process <= 0) + { + MC_Fatal_Jump("Not enough particles for each process ( Ranks: %d Num Particles: %d ) \n", num_processors, num_particles); + num_particles_on_process = 1; + } + + if (batch_size == 0) // batch size unset - use num_batches to get batch_size + { + batch_size = (num_particles_on_process / num_batches) + ((num_particles_on_process % num_batches == 0) ? 0 : 1); + } + else // batch size explicatly set - use to find num_batches + { + num_batches = num_particles_on_process / batch_size + ((num_particles_on_process % batch_size == 0) ? 0 : 1); + } + + size_t vector_size = 0; + + for (auto matIter = params.materialParams.begin(); + matIter != params.materialParams.end(); + matIter++) + { + const MaterialParameters &mp = matIter->second; + double nuBar = params.crossSectionParams.at(mp.fissionCrossSection).nuBar; + size_t nb = sycl::ceil(nuBar); + size_t test_size = nb * (batch_size); + + if (test_size > vector_size) + vector_size = test_size; + } + if (vector_size == 0) + vector_size = 2 * batch_size; + + int num_extra_vaults = (vector_size / batch_size) + 1; + // Previous definition was not enough extra space for some reason? need to determine why still + +#if defined(HAVE_UVM) + void *ptr5, *ptr6; + gpuMallocManaged(&ptr5, sizeof(MC_Particle_Buffer), 1 /*cudaMemAttachGlobal*/); + gpuMallocManaged(&ptr6, sizeof(ParticleVaultContainer), 1 /*cudaMemAttachGlobal*/); + particle_buffer = new (ptr5) MC_Particle_Buffer(this, batch_size); + _particleVaultContainer = new (ptr6) ParticleVaultContainer(batch_size, num_batches, num_extra_vaults); +#else + particle_buffer = new MC_Particle_Buffer(this, batch_size); + _particleVaultContainer = new ParticleVaultContainer(batch_size, num_batches, num_extra_vaults); +#endif +} + +//---------------------------------------------------------------------------------------------------------------------- +// Destruct a MonteCarlo object. +//---------------------------------------------------------------------------------------------------------------------- +MonteCarlo::~MonteCarlo() +{ +#if defined(HAVE_UVM) + + _nuclearData->~NuclearData(); + _particleVaultContainer->~ParticleVaultContainer(); + _materialDatabase->~MaterialDatabase(); + _tallies->~Tallies(); + processor_info->~MC_Processor_Info(); + time_info->~MC_Time_Info(); + fast_timer->~MC_Fast_Timer_Container(); + particle_buffer->~MC_Particle_Buffer(); + + // sycl::free(_nuclearData, sycl_device_queue); + gpuFree(_particleVaultContainer); + // sycl::free(_materialDatabase, sycl_device_queue); + gpuFree(_tallies); + gpuFree(processor_info); + gpuFree(time_info); + gpuFree(fast_timer); + gpuFree(particle_buffer); + + sycl::free(domain_d, sycl_device_queue); + sycl::free(_material_d, sycl_device_queue); + sycl::free(_nuclearData_d, sycl_device_queue); + +#else + delete _nuclearData; + delete _particleVaultContainer; + delete _materialDatabase; + delete _tallies; + delete processor_info; + delete time_info; + delete fast_timer; + delete particle_buffer; +#endif +} + +void MonteCarlo::clearCrossSectionCache() +{ + int numEnergyGroups = _nuclearData->_numEnergyGroups; + for (unsigned ii = 0; ii < domain.size(); ++ii) + domain[ii].clearCrossSectionCache(numEnergyGroups); +} diff --git a/src/MonteCarlo.hh b/src/MonteCarlo.hh index 193c68ee..6dc355db 100644 --- a/src/MonteCarlo.hh +++ b/src/MonteCarlo.hh @@ -1,12 +1,29 @@ +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + #ifndef MONTECARLO_HH #define MONTECARLO_HH #include "QS_Vector.hh" #include "MC_Domain.hh" +#include "MC_Location.hh" #include "Parameters.hh" class MC_RNG_State; class NuclearData; +class NuclearData_d; class MaterialDatabase; class ParticleVaultContainer; class Tallies; @@ -14,6 +31,8 @@ class MC_Processor_Info; class MC_Time_Info; class MC_Particle_Buffer; class MC_Fast_Timer_Container; +class MC_Domain; +class Material_d; class MonteCarlo { @@ -27,6 +46,8 @@ public: void clearCrossSectionCache(); qs_vector domain; + MC_Domain_d * domain_d; + int domainSize; Parameters _params; NuclearData* _nuclearData; @@ -37,6 +58,8 @@ public: MC_Fast_Timer_Container *fast_timer; MC_Processor_Info *processor_info; MC_Particle_Buffer *particle_buffer; + Material_d * _material_d; + NuclearData_d* _nuclearData_d; double source_particle_weight; @@ -46,4 +69,5 @@ private: MonteCarlo& operator=(const MonteCarlo&); }; + #endif diff --git a/src/NuclearData.hh b/src/NuclearData.hh index 6760568c..39b337d6 100644 --- a/src/NuclearData.hh +++ b/src/NuclearData.hh @@ -1,6 +1,52 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef NUCLEAR_DATA_HH #define NUCLEAR_DATA_HH +#include #include #include #include "QS_Vector.hh" @@ -9,27 +55,34 @@ #include #include "qs_assert.hh" #include "DeclareMacro.hh" +#include "MC_RNG_State.hh" +#include "EnergySpectrum.hh" + +using std::pow; + +#if defined(HAVE_SYCL) +extern sycl::queue sycl_device_queue; // global variable for device queue +#endif class Polynomial { - public: +public: Polynomial(double aa, double bb, double cc, double dd, double ee) - : - _aa(aa), _bb(bb), _cc(cc), _dd(dd), _ee(ee){} + : _aa(aa), _bb(bb), _cc(cc), _dd(dd), _ee(ee) {} double operator()(double xx) const { return _ee + xx * (_dd + xx * (_cc + xx * (_bb + xx * (_aa)))); } - private: +private: double _aa, _bb, _cc, _dd, _ee; }; // Lowest level class at the reaction level class NuclearDataReaction { - public: +public: // The types of reactions enum Enum { @@ -38,72 +91,269 @@ class NuclearDataReaction Absorption, Fission }; - - NuclearDataReaction(){}; - NuclearDataReaction(Enum reactionType, double nuBar, const qs_vector& energies, - const Polynomial& polynomial, double reationCrossSection); - + NuclearDataReaction() + : _reactionType(Enum::Undefined), + _nuBar(0.0){}; - HOST_DEVICE_CUDA - double getCrossSection(unsigned int group); - HOST_DEVICE_CUDA - void sampleCollision(double incidentEnergy, double material_mass, double* energyOut, - double* angleOut, int &nOut, uint64_t* seed, int max_production_size); - - - qs_vector _crossSection; //!< tabular data for microscopic cross section - Enum _reactionType; //!< What type of reaction is this - double _nuBar; //!< If this is a fission, specify the nu bar + // Set the cross section values and reaction type + // Cross sections are scaled to produce the supplied reactionCrossSection at 1MeV. + inline NuclearDataReaction( + Enum reactionType, double nuBar, const qs_vector &energies, + const Polynomial &polynomial, double reactionCrossSection) + : _crossSection(energies.size() - 1, 0., VAR_MEM), + _reactionType(reactionType), + _nuBar(nuBar) + { + int nGroups = _crossSection.size(); + + for (int ii = 0; ii < nGroups; ++ii) + { + double energy = (energies[ii] + energies[ii + 1]) / 2.0; + _crossSection[ii] = sycl::pow(10.0, (double)polynomial(sycl::log10(energy))); + } + // Find the normalization value for the polynomial. This is the + // value of the energy group that contains 1 MeV + double normalization = 0.0; + for (unsigned ii = 0; ii < nGroups; ++ii) + if (energies[ii + 1] > 1.) // 1 MeV + { + normalization = _crossSection[ii]; + break; + } + qs_assert(normalization > 0.); + + // scale to specified reaction cross section + double scale = reactionCrossSection / normalization; + for (int ii = 0; ii < nGroups; ++ii) + _crossSection[ii] *= scale; + }; + + inline HOST_DEVICE + // Return the cross section for this energy group + double + getCrossSection(unsigned int group) + { + qs_assert(group < _crossSection.size()); + return _crossSection[group]; + }; + HOST_DEVICE_END + + inline HOST_DEVICE void sampleCollision( + double incidentEnergy, double material_mass, double *energyOut, + double *angleOut, int &nOut, uint64_t *seed, int max_production_size) + { + double randomNumber; + switch (_reactionType) + { + case Scatter: + nOut = 1; + randomNumber = rngSample(seed); + energyOut[0] = incidentEnergy * (1.0 - (randomNumber * (1.0 / material_mass))); + randomNumber = rngSample(seed) * 2.0 - 1.0; + angleOut[0] = randomNumber; + break; + case Absorption: + break; + case Fission: + { + int numParticleOut = (int)(_nuBar + rngSample(seed)); + qs_assert(numParticleOut <= max_production_size); + nOut = numParticleOut; + for (int outIndex = 0; outIndex < numParticleOut; outIndex++) + { + randomNumber = rngSample(seed) / 2.0 + 0.5; + energyOut[outIndex] = (20 * randomNumber * randomNumber); + randomNumber = rngSample(seed) * 2.0 - 1.0; + angleOut[outIndex] = randomNumber; + } + } + break; + case Undefined: +#ifdef DEBUG + printf("_reactionType invalid\n"); +#endif + qs_assert(false); + } + }; + HOST_DEVICE_END + + qs_vector _crossSection; //!< tabular data for microscopic cross section + Enum _reactionType; //!< What type of reaction is this + double _nuBar; //!< If this is a fission, specify the nu bar }; // This class holds an array of reactions for neutrons class NuclearDataSpecies { - public: - - void addReaction(NuclearDataReaction::Enum type, double nuBar, qs_vector& energies, - const Polynomial& polynomial, double reactionCrossSection); - +public: + // Then call this for each reaction to set cross section values + inline void addReaction( + NuclearDataReaction::Enum type, double nuBar, + qs_vector &energies, const Polynomial &polynomial, double reactionCrossSection) + { + _reactions.Open(); + _reactions.push_back(NuclearDataReaction(type, nuBar, energies, polynomial, reactionCrossSection)); + _reactions.Close(); + }; + qs_vector _reactions; }; // For this isotope, store the cross sections. In this case the species is just neutron. class NuclearDataIsotope { - public: - +public: NuclearDataIsotope() - : _species(1,VAR_MEM){} - - qs_vector _species; + : _species(1, VAR_MEM) {} + qs_vector _species; }; // Top level class to handle all things related to nuclear data class NuclearData { - public: - - NuclearData(int numGroups, double energyLow, double energyHigh); - - int addIsotope(int nReactions, - const Polynomial& fissionFunction, - const Polynomial& scatterFunction, - const Polynomial& absorptionFunction, - double nuBar, - double totalCrossSection, - double fissionWeight, double scatterWeight, double absorptionWeight); - - HOST_DEVICE_CUDA - int getEnergyGroup(double energy); - HOST_DEVICE_CUDA - int getNumberReactions(unsigned int isotopeIndex); - HOST_DEVICE_CUDA - double getTotalCrossSection(unsigned int isotopeIndex, unsigned int group); - HOST_DEVICE_CUDA - double getReactionCrossSection(unsigned int reactIndex, unsigned int isotopeIndex, unsigned int group); +public: + // Set up the energies boundaries of the neutron + inline NuclearData(int numGroups, double energyLow, double energyHigh) : _energies(numGroups + 1, VAR_MEM) + { + qs_assert(energyLow < energyHigh); + _numEnergyGroups = numGroups; + _energies[0] = energyLow; + _energies[numGroups] = energyHigh; + double logLow = log(energyLow); + double logHigh = log(energyHigh); + double delta = (logHigh - logLow) / (numGroups + 1.0); + for (int energyIndex = 1; energyIndex < numGroups; energyIndex++) + { + double logValue = logLow + delta * energyIndex; + _energies[energyIndex] = exp(logValue); + } + }; + + inline int addIsotope( + int nReactions, + const Polynomial &fissionFunction, + const Polynomial &scatterFunction, + const Polynomial &absorptionFunction, + double nuBar, + double totalCrossSection, + double fissionWeight, double scatterWeight, double absorptionWeight) + { + _isotopes.Open(); + _isotopes.push_back(NuclearDataIsotope()); + _isotopes.Close(); + + double totalWeight = fissionWeight + scatterWeight + absorptionWeight; + + int nFission = nReactions / 3; + int nScatter = nReactions / 3; + int nAbsorption = nReactions / 3; + switch (nReactions % 3) + { + case 0: + break; + case 1: + ++nScatter; + break; + case 2: + ++nScatter; + ++nFission; + break; + } + + double fissionCrossSection = (totalCrossSection * fissionWeight) / (nFission * totalWeight); + double scatterCrossSection = (totalCrossSection * scatterWeight) / (nScatter * totalWeight); + double absorptionCrossSection = (totalCrossSection * absorptionWeight) / (nAbsorption * totalWeight); + + _isotopes.back()._species[0]._reactions.reserve(nReactions, VAR_MEM); + + for (int ii = 0; ii < nReactions; ++ii) + { + NuclearDataReaction::Enum type; + Polynomial polynomial(0.0, 0.0, 0.0, 0.0, 0.0); + double reactionCrossSection = 0.; + // reaction index % 3 is one of the 3 reaction types + switch (ii % 3) + { + case 0: + type = NuclearDataReaction::Scatter; + polynomial = scatterFunction; + reactionCrossSection = scatterCrossSection; + break; + case 1: + type = NuclearDataReaction::Fission; + polynomial = fissionFunction; + reactionCrossSection = fissionCrossSection; + break; + case 2: + type = NuclearDataReaction::Absorption; + polynomial = absorptionFunction; + reactionCrossSection = absorptionCrossSection; + break; + } + _isotopes.back()._species[0].addReaction(type, nuBar, _energies, polynomial, reactionCrossSection); + } + + return _isotopes.size() - 1; + }; + + // For this energy, return the group index + inline HOST_DEVICE int getEnergyGroup(double energy) + { + int numEnergies = (int)_energies.size(); + if (energy <= _energies[0]) + return 0; + if (energy > _energies[numEnergies - 1]) + return numEnergies - 1; + + int high = numEnergies - 1; + int low = 0; + + while (high != low + 1) + { + int mid = (high + low) / 2; + if (energy < _energies[mid]) + high = mid; + else + low = mid; + } + + return low; + }; + HOST_DEVICE_END + + inline HOST_DEVICE int getNumberReactions(unsigned int isotopeIndex) + { + qs_assert(isotopeIndex < _isotopes.size()); + return (int)_isotopes[isotopeIndex]._species[0]._reactions.size(); + }; + HOST_DEVICE_END + + // General routines to help access data lower down + // Return the total cross section for this energy group + inline HOST_DEVICE double getTotalCrossSection(unsigned int isotopeIndex, unsigned int group) + { + qs_assert(isotopeIndex < _isotopes.size()); + int numReacts = (int)_isotopes[isotopeIndex]._species[0]._reactions.size(); + double totalCrossSection = 0.0; + for (int reactIndex = 0; reactIndex < numReacts; reactIndex++) + { + totalCrossSection += _isotopes[isotopeIndex]._species[0]._reactions[reactIndex].getCrossSection(group); + } + return totalCrossSection; + }; + + // Return the total cross section for this energy group + inline HOST_DEVICE double getReactionCrossSection( + unsigned int reactIndex, unsigned int isotopeIndex, unsigned int group) + { + qs_assert(isotopeIndex < _isotopes.size()); + qs_assert(reactIndex < _isotopes[isotopeIndex]._species[0]._reactions.size()); + return _isotopes[isotopeIndex]._species[0]._reactions[reactIndex].getCrossSection(group); + }; + HOST_DEVICE_END int _numEnergyGroups; // Store the cross sections and reactions by isotope, which stores @@ -112,7 +362,324 @@ class NuclearData // This is the overall energy layout. If we had more than just // neutrons, this array would be a vector of vectors. qs_vector _energies; +}; + +// Lowest level class at the reaction level +class NuclearDataReaction_d +{ +public: + // The types of reactions + enum Enum + { + Undefined = 0, + Scatter, + Absorption, + Fission + }; + + NuclearDataReaction_d(){}; + + // Set the cross section values and reaction type + // Cross sections are scaled to produce the supplied reactionCrossSection at 1MeV. + inline NuclearDataReaction_d( + Enum reactionType, double nuBar, const double *energies, int energiessize, + const Polynomial &polynomial, double reactionCrossSection) + : _reactionType(reactionType), + _nuBar(nuBar) + { + + _crossSectionSize = energiessize - 1; + + _crossSection = new double[_crossSectionSize]; + + int nGroups = _crossSectionSize; + + for (int ii = 0; ii < nGroups; ++ii) + { + double energy = (energies[ii] + energies[ii + 1]) / 2.0; + _crossSection[ii] = sycl::pow(10.0, (double)polynomial(sycl::log10(energy))); + } + + // Find the normalization value for the polynomial. This is the + // value of the energy group that contains 1 MeV + double normalization = 0.0; + for (unsigned ii = 0; ii < nGroups; ++ii) + if (energies[ii + 1] > 1.) // 1 MeV + { + normalization = _crossSection[ii]; + break; + } + qs_assert(normalization > 0.); + + // scale to specified reaction cross section + double scale = reactionCrossSection / normalization; + for (int ii = 0; ii < nGroups; ++ii) + _crossSection[ii] *= scale; + }; + + inline HOST_DEVICE + // Return the cross section for this energy group + double + getCrossSection(unsigned int group) + { + qs_assert(group < _crossSectionSize); + return _crossSection[group]; + }; + HOST_DEVICE_END + + inline HOST_DEVICE void sampleCollision( + double incidentEnergy, double material_mass, double *energyOut, + double *angleOut, int &nOut, uint64_t *seed, int max_production_size) + { + double randomNumber; + switch (_reactionType) + { + case Scatter: + nOut = 1; + randomNumber = rngSample(seed); + energyOut[0] = incidentEnergy * (1.0 - (randomNumber * (1.0 / material_mass))); + randomNumber = rngSample(seed) * 2.0 - 1.0; + angleOut[0] = randomNumber; + break; + case Absorption: + break; + case Fission: + { + int numParticleOut = (int)(_nuBar + rngSample(seed)); + qs_assert(numParticleOut <= max_production_size); + nOut = numParticleOut; + for (int outIndex = 0; outIndex < numParticleOut; outIndex++) + { + randomNumber = rngSample(seed) / 2.0 + 0.5; + energyOut[outIndex] = (20 * randomNumber * randomNumber); + randomNumber = rngSample(seed) * 2.0 - 1.0; + angleOut[outIndex] = randomNumber; + } + } + break; + case Undefined: +#ifdef DEBUG + printf("_reactionType invalid\n"); +#endif + qs_assert(false); + } + }; + HOST_DEVICE_END + + double *_crossSection; //!< tabular data for microscopic cross section + int _crossSectionSize; + Enum _reactionType; //!< What type of reaction is this + double _nuBar; //!< If this is a fission, specify the nu bar +}; + +// This class holds an array of reactions for neutrons +class NuclearDataSpecies_d +{ +public: + NuclearDataReaction_d *_reactions; + int _reactionsSize; +}; + +// For this isotope, store the cross sections. In this case the species is just neutron. +class NuclearDataIsotope_d +{ +public: + NuclearDataSpecies_d *_species; + int _speciesSize; +}; + +// Top level class to handle all things related to nuclear data +class NuclearData_d +{ +public: + // Set up the energies boundaries of the neutron + inline NuclearData_d(int numGroups, double energyLow, double energyHigh) + { + + _energies = new double[numGroups + 1]; + qs_assert(energyLow < energyHigh); + _numEnergyGroups = numGroups; + _energies[0] = energyLow; + _energies[numGroups] = energyHigh; + double logLow = log(energyLow); + double logHigh = log(energyHigh); + double delta = (logHigh - logLow) / (numGroups + 1.0); + for (int energyIndex = 1; energyIndex < numGroups; energyIndex++) + { + double logValue = logLow + delta * energyIndex; + _energies[energyIndex] = exp(logValue); + } + }; + + // For this energy, return the group index + inline HOST_DEVICE int getEnergyGroup(double energy) + { + int numEnergies = (int)_energiesSize; + if (energy <= _energies[0]) + return 0; + if (energy > _energies[numEnergies - 1]) + return numEnergies - 1; + + int high = numEnergies - 1; + int low = 0; + + while (high != low + 1) + { + int mid = (high + low) / 2; + if (energy < _energies[mid]) + high = mid; + else + low = mid; + } + + return low; + }; + HOST_DEVICE_END + + inline HOST_DEVICE int getNumberReactions(unsigned int isotopeIndex) + { + qs_assert(isotopeIndex < _isotopesSize); + return (int)_isotopes[isotopeIndex]._species[0]._reactionsSize; + }; + HOST_DEVICE_END + + // General routines to help access data lower down + // Return the total cross section for this energy group + inline HOST_DEVICE double getTotalCrossSection(unsigned int isotopeIndex, unsigned int group) + { + qs_assert(isotopeIndex < _isotopesSize); + int numReacts = (int)_isotopes[isotopeIndex]._species[0]._reactionsSize; + double totalCrossSection = 0.0; + for (int reactIndex = 0; reactIndex < numReacts; reactIndex++) + { + totalCrossSection += _isotopes[isotopeIndex]._species[0]._reactions[reactIndex].getCrossSection(group); + } + return totalCrossSection; + }; + + // Return the total cross section for this energy group + inline HOST_DEVICE double getReactionCrossSection( + unsigned int reactIndex, unsigned int isotopeIndex, unsigned int group) + { + qs_assert(isotopeIndex < _isotopesSize); + qs_assert(reactIndex < _isotopes[isotopeIndex]._species[0]._reactionsSize); + return _isotopes[isotopeIndex]._species[0]._reactions[reactIndex].getCrossSection(group); + }; + HOST_DEVICE_END + + int _numEnergyGroups; + // Store the cross sections and reactions by isotope, which stores + // it by species + NuclearDataIsotope_d *_isotopes; + int _isotopesSize; + // This is the overall energy layout. If we had more than just + // neutrons, this array would be a vector of vectors. + double *_energies; + int _energiesSize; +}; +// This has problems as written for GPU code so replaced vectors with arrays +#if 0 +// Sample the collision +void NuclearDataReaction::sampleCollision( + double incidentEnergy, qs_vector &energyOut, + qs_vector &angleOut, uint64_t* seed) +#endif + +inline void copyNuclearData_device(NuclearData *nuclearData, NuclearData_d *NuclearData_h_o) +{ + NuclearData_d *NuclearData_h = (NuclearData_d *)malloc(sizeof(NuclearData_d)); + + int isotopesSize = nuclearData->_isotopes.size(); + NuclearDataIsotope_d *nuclearIsotope_I_d; + nuclearIsotope_I_d = sycl::malloc_device( + isotopesSize, sycl_device_queue); + NuclearDataIsotope_d *nuclearIsotope_h = (NuclearDataIsotope_d *)malloc(isotopesSize * sizeof(NuclearDataIsotope_d)); + + int energiesSize = nuclearData->_energies.size(); + double *nuclearEnergy_I_d; + nuclearEnergy_I_d = + sycl::malloc_device(energiesSize, sycl_device_queue); + double *nuclearEnergy_h = (double *)malloc(energiesSize * sizeof(double)); + + for (int j = 0; j < isotopesSize; j++) + { + int speciesSize = nuclearData->_isotopes[j]._species.size(); + + NuclearDataSpecies_d *nuclearSpecies_I_d; + nuclearSpecies_I_d = sycl::malloc_device( + speciesSize, sycl_device_queue); + NuclearDataSpecies_d *nuclearSpecies_h = (NuclearDataSpecies_d *)malloc(speciesSize * sizeof(NuclearDataSpecies_d)); + for (int k = 0; k < speciesSize; k++) + { + + int reactionsSize = nuclearData->_isotopes[j]._species[k]._reactions.size(); + + NuclearDataReaction_d *nuclear_I_d; + nuclear_I_d = sycl::malloc_device( + reactionsSize, sycl_device_queue); + + NuclearDataReaction_d *nuclear_h = (NuclearDataReaction_d *)malloc(reactionsSize * sizeof(NuclearDataReaction_d)); + for (int l = 0; l < reactionsSize; l++) + { + double *crossSections_I_d; + int NumcrossSectionSize = nuclearData->_isotopes[j]._species[k]._reactions[l]._crossSection.size(); + crossSections_I_d = sycl::malloc_device( + NumcrossSectionSize, sycl_device_queue); + + sycl_device_queue + .memcpy(crossSections_I_d, + nuclearData->_isotopes[j] + ._species[k] + ._reactions[l] + ._crossSection.outputPointer(), + NumcrossSectionSize * sizeof(double)) + .wait(); + nuclear_h[l]._crossSectionSize = NumcrossSectionSize; + nuclear_h[l]._crossSection = crossSections_I_d; + nuclear_h[l]._reactionType = (NuclearDataReaction_d::Enum)nuclearData->_isotopes[j]._species[k]._reactions[l]._reactionType; + nuclear_h[l]._nuBar = nuclearData->_isotopes[j]._species[k]._reactions[l]._nuBar; + } + sycl_device_queue + .memcpy(nuclear_I_d, nuclear_h, + reactionsSize * sizeof(NuclearDataReaction_d)) + .wait(); + free(nuclear_h); + nuclearSpecies_h[k]._reactionsSize = reactionsSize; + nuclearSpecies_h[k]._reactions = nuclear_I_d; + } + + sycl_device_queue + .memcpy(nuclearSpecies_I_d, nuclearSpecies_h, + speciesSize * sizeof(NuclearDataSpecies_d)) + .wait(); + free(nuclearSpecies_h); + nuclearIsotope_h[j]._speciesSize = speciesSize; + nuclearIsotope_h[j]._species = nuclearSpecies_I_d; + } + + sycl_device_queue + .memcpy(nuclearIsotope_I_d, nuclearIsotope_h, + isotopesSize * sizeof(NuclearDataIsotope_d)) + .wait(); + free(nuclearIsotope_h); + NuclearData_h->_isotopesSize = isotopesSize; + NuclearData_h->_isotopes = nuclearIsotope_I_d; + + sycl_device_queue + .memcpy(nuclearEnergy_I_d, nuclearData->_energies.outputPointer(), + energiesSize * sizeof(double)) + .wait(); + // cudaMemcpy(nuclearEnergy_I_d,nuclearEnergy_h,energiesSize*sizeof(double),cudaMemcpyHostToDevice); + free(nuclearEnergy_h); + NuclearData_h->_energiesSize = energiesSize; + NuclearData_h->_energies = nuclearEnergy_I_d; + + NuclearData_h->_numEnergyGroups = nuclearData->_numEnergyGroups; + sycl_device_queue + .memcpy(NuclearData_h_o, NuclearData_h, sizeof(NuclearData_d)) + .wait(); + free(NuclearData_h); }; #endif diff --git a/src/Parameters.cc b/src/Parameters.cc index a7205da6..90458e4c 100644 --- a/src/Parameters.cc +++ b/src/Parameters.cc @@ -1,7 +1,32 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +SPDX-License-Identifier: BSD-3-Clause +*/ + /// \file /// Read parameters from command line arguments and input file. - // ToDo: // 1. set the default number of mpi ranks in each direction // @@ -33,32 +58,31 @@ #include "InputBlock.hh" #include "utilsMpi.hh" -using std::string; +using std::endl; using std::ifstream; using std::make_pair; -using std::vector; -using std::ostream; using std::map; -using std::endl; +using std::ostream; +using std::string; +using std::vector; namespace { - void parseCommandLine(int argc, char** argv, Parameters& pp); - void parseInputFile(const string& filename, Parameters& pp); - void supplyDefaults(Parameters& params); - - void scanSimulationBlock (const InputBlock& input, Parameters& pp); - void scanGeometryBlock (const InputBlock& input, Parameters& pp); - void scanMaterialBlock (const InputBlock& input, Parameters& pp); - void scanCrossSectionBlock(const InputBlock& input, Parameters& pp); - - void badInputFile(const string& filename); - void badGeometryBlock(const InputBlock& input); - void badMaterialBlock(const InputBlock& input); - void badCrossSectionBlock(const InputBlock& input); + void parseCommandLine(int argc, char **argv, Parameters &pp); + void parseInputFile(const string &filename, Parameters &pp); + void supplyDefaults(Parameters ¶ms); + + void scanSimulationBlock(const InputBlock &input, Parameters &pp); + void scanGeometryBlock(const InputBlock &input, Parameters &pp); + void scanMaterialBlock(const InputBlock &input, Parameters &pp); + void scanCrossSectionBlock(const InputBlock &input, Parameters &pp); + + void badInputFile(const string &filename); + void badGeometryBlock(const InputBlock &input); + void badMaterialBlock(const InputBlock &input); + void badCrossSectionBlock(const InputBlock &input); } - /// A one stop shop to get all parameters from the command line and the /// input file. /// @@ -77,45 +101,49 @@ namespace /// with the same name. /// -Parameters getParameters(int argc, char** argv) +Parameters getParameters(int argc, char **argv) { Parameters params; parseCommandLine(argc, argv, params); - const string& filename = params.simulationParams.inputFile; + const string &filename = params.simulationParams.inputFile; const string energyName = params.simulationParams.energySpectrum; - const string xsecOut = params.simulationParams.crossSectionsOut; - - if (!filename.empty()) parseInputFile(filename, params); - if( energyName != "" ) params.simulationParams.energySpectrum = energyName; - if( xsecOut != "" ) params.simulationParams.crossSectionsOut = xsecOut; - + const string xsecOut = params.simulationParams.crossSectionsOut; + + if (!filename.empty()) + parseInputFile(filename, params); + if (energyName != "") + params.simulationParams.energySpectrum = energyName; + if (xsecOut != "") + params.simulationParams.crossSectionsOut = xsecOut; supplyDefaults(params); - return params; } -void printParameters(const Parameters& pp, ostream& out) +void printParameters(const Parameters &pp, ostream &out) { int rank = -1; mpiComm_rank(MPI_COMM_WORLD, &rank); - if ( rank != 0 ) { return; } + if (rank != 0) + { + return; + } out << pp.simulationParams; - for (unsigned ii=0; ii::const_iterator iter=pp.materialParams.begin(); - iter!=pp.materialParams.end(); ++iter) + for (map::const_iterator iter = pp.materialParams.begin(); + iter != pp.materialParams.end(); ++iter) out << iter->second; - for (map::const_iterator iter=pp.crossSectionParams.begin(); - iter!=pp.crossSectionParams.end(); ++iter) + for (map::const_iterator iter = pp.crossSectionParams.begin(); + iter != pp.crossSectionParams.end(); ++iter) out << iter->second; } -ostream& operator<<(ostream& out, const SimulationParameters& pp) +ostream &operator<<(ostream &out, const SimulationParameters &pp) { out << "Simulation:\n"; out << " dt: " << pp.dt << "\n"; @@ -153,13 +181,13 @@ ostream& operator<<(ostream& out, const SimulationParameters& pp) return out; } -ostream& operator<<(ostream& out, const GeometryParameters& pp) +ostream &operator<<(ostream &out, const GeometryParameters &pp) { out << "Geometry:\n"; out << " material: " << pp.materialName << "\n"; switch (pp.shape) { - case GeometryParameters::BRICK: + case GeometryParameters::BRICK: out << " shape: brick\n"; out << " xMax: " << pp.xMax << "\n"; out << " xMin: " << pp.xMin << "\n"; @@ -168,20 +196,20 @@ ostream& operator<<(ostream& out, const GeometryParameters& pp) out << " zMax: " << pp.zMax << "\n"; out << " zMin: " << pp.zMin << "\n"; break; - case GeometryParameters::SPHERE: + case GeometryParameters::SPHERE: out << " shape: sphere\n"; out << " xCenter: " << pp.xCenter << "\n"; out << " yCenter: " << pp.yCenter << "\n"; out << " zCenter: " << pp.zCenter << "\n"; break; - default: - qs_assert(false); + default: + qs_assert(false); } out << endl; return out; } -ostream& operator<<(ostream& out, const MaterialParameters& pp) +ostream &operator<<(ostream &out, const MaterialParameters &pp) { out << "Material:\n"; out << " name: " << pp.name << "\n"; @@ -200,7 +228,7 @@ ostream& operator<<(ostream& out, const MaterialParameters& pp) return out; } -ostream& operator<<(ostream& out, const CrossSectionParameters& pp) +ostream &operator<<(ostream &out, const CrossSectionParameters &pp) { out << "CrossSection:\n"; out << " name: " << pp.name << "\n"; @@ -216,43 +244,43 @@ ostream& operator<<(ostream& out, const CrossSectionParameters& pp) namespace { - void parseCommandLine(int argc, char** argv, Parameters& pp) + void parseCommandLine(int argc, char **argv, Parameters &pp) { - SimulationParameters& sp = pp.simulationParams; - int help=0; + SimulationParameters &sp = pp.simulationParams; + int help = 0; char name[1024]; name[0] = '\0'; char esName[1024]; esName[0] = '\0'; char xsec[1024]; xsec[0] = '\0'; - - addArg("help", 'h', 0, 'i', &(help), 0, "print this message"); - addArg("dt", 'D', 1, 'd', &(sp.dt), 0, "time step (seconds)"); - addArg("fMax", 'f', 1, 'd', &(sp.fMax), 0, "max random mesh node displacement"); - addArg("inputFile", 'i', 1, 's', &(name), sizeof(name), "name of input file"); - addArg("energySpectrum", 'e', 1, 's', &(esName), sizeof(esName), "name of energy spectrum output file"); - addArg("crossSectionsOut", 'S', 1, 's', &(xsec), sizeof(xsec), "name of cross section output file"); - addArg("loadBalance", 'l', 0, 'i', &(sp.loadBalance), 0, "enable/disable load balancing" ); - addArg("cycleTimers", 'c', 1, 'i', &(sp.cycleTimers), 0, "enable/disable cycle timers" ); - addArg("debugThreads", 't', 1, 'i', &(sp.debugThreads),0, "set thread debug level to 1, 2, 3" ); - addArg("lx", 'X', 1, 'd', &(sp.lx), 0, "x-size of simulation (cm)"); - addArg("ly", 'Y', 1, 'd', &(sp.ly), 0, "y-size of simulation (cm)"); - addArg("lz", 'Z', 1, 'd', &(sp.lz), 0, "z-size of simulation (cm)"); - addArg("nParticles", 'n', 1, 'u', &(sp.nParticles), 0, "number of particles"); - addArg("batchSize", 'g', 1, 'u', &(sp.batchSize), 0, "number of particles in a vault/batch"); - addArg("nBatches", 'b', 1, 'u', &(sp.nBatches), 0, "number of vault/batch to start (sets batchSize automaticaly)"); - addArg("nSteps", 'N', 1, 'i', &(sp.nSteps), 0, "number of time steps"); - addArg("nx", 'x', 1, 'i', &(sp.nx), 0, "number of mesh elements in x"); - addArg("ny", 'y', 1, 'i', &(sp.ny), 0, "number of mesh elements in y"); - addArg("nz", 'z', 1, 'i', &(sp.nz), 0, "number of mesh elements in z"); - addArg("seed", 's', 1, 'i', &(sp.seed), 0, "random number seed"); - addArg("xDom", 'I', 1, 'i', &(sp.xDom), 0, "number of MPI ranks in x"); - addArg("yDom", 'J', 1, 'i', &(sp.yDom), 0, "number of MPI ranks in y"); - addArg("zDom", 'K', 1, 'i', &(sp.zDom), 0, "number of MPI ranks in z"); - addArg("bTally", 'B', 1, 'i', &(sp.balanceTallyReplications), 0, "number of balance tally replications"); - addArg("fTally", 'F', 1, 'i', &(sp.fluxTallyReplications), 0, "number of scalar flux tally replications"); - addArg("cTally", 'C', 1, 'i', &(sp.cellTallyReplications), 0, "number of scalar cell tally replications"); + + addArg("help", 'h', 0, 'i', &(help), 0, "print this message"); + addArg("dt", 'D', 1, 'd', &(sp.dt), 0, "time step (seconds)"); + addArg("fMax", 'f', 1, 'd', &(sp.fMax), 0, "max random mesh node displacement"); + addArg("inputFile", 'i', 1, 's', &(name), sizeof(name), "name of input file"); + addArg("energySpectrum", 'e', 1, 's', &(esName), sizeof(esName), "name of energy spectrum output file"); + addArg("crossSectionsOut", 'S', 1, 's', &(xsec), sizeof(xsec), "name of cross section output file"); + addArg("loadBalance", 'l', 0, 'i', &(sp.loadBalance), 0, "enable/disable load balancing"); + addArg("cycleTimers", 'c', 1, 'i', &(sp.cycleTimers), 0, "enable/disable cycle timers"); + addArg("debugThreads", 't', 1, 'i', &(sp.debugThreads), 0, "set thread debug level to 1, 2, 3"); + addArg("lx", 'X', 1, 'd', &(sp.lx), 0, "x-size of simulation (cm)"); + addArg("ly", 'Y', 1, 'd', &(sp.ly), 0, "y-size of simulation (cm)"); + addArg("lz", 'Z', 1, 'd', &(sp.lz), 0, "z-size of simulation (cm)"); + addArg("nParticles", 'n', 1, 'u', &(sp.nParticles), 0, "number of particles"); + addArg("batchSize", 'g', 1, 'u', &(sp.batchSize), 0, "number of particles in a vault/batch"); + addArg("nBatches", 'b', 1, 'u', &(sp.nBatches), 0, "number of vault/batch to start (sets batchSize automaticaly)"); + addArg("nSteps", 'N', 1, 'i', &(sp.nSteps), 0, "number of time steps"); + addArg("nx", 'x', 1, 'i', &(sp.nx), 0, "number of mesh elements in x"); + addArg("ny", 'y', 1, 'i', &(sp.ny), 0, "number of mesh elements in y"); + addArg("nz", 'z', 1, 'i', &(sp.nz), 0, "number of mesh elements in z"); + addArg("seed", 's', 1, 'i', &(sp.seed), 0, "random number seed"); + addArg("xDom", 'I', 1, 'i', &(sp.xDom), 0, "number of MPI ranks in x"); + addArg("yDom", 'J', 1, 'i', &(sp.yDom), 0, "number of MPI ranks in y"); + addArg("zDom", 'K', 1, 'i', &(sp.zDom), 0, "number of MPI ranks in z"); + addArg("bTally", 'B', 1, 'i', &(sp.balanceTallyReplications), 0, "number of balance tally replications"); + addArg("fTally", 'F', 1, 'i', &(sp.fluxTallyReplications), 0, "number of scalar flux tally replications"); + addArg("cTally", 'C', 1, 'i', &(sp.cellTallyReplications), 0, "number of scalar cell tally replications"); processArgs(argc, argv); @@ -264,7 +292,7 @@ namespace { int rank = -1; mpiComm_rank(MPI_COMM_WORLD, &rank); - if ( rank == 0 ) + if (rank == 0) { printArgs(); } @@ -277,7 +305,7 @@ namespace namespace { - void parseInputFile(const string& filename, Parameters& pp) + void parseInputFile(const string &filename, Parameters &pp) { vector parseTree; int myRank; @@ -285,8 +313,13 @@ namespace if (myRank == 0) { // fill parse tree ifstream in(filename.c_str()); - if (! in) + if (!in) badInputFile(filename); + if (!in.good()) + { + std::cerr << "ERROR : Input file '" << filename << "' does not exist " << std::endl; + return; + } string line; getline(in, line); while (!in.eof()) @@ -305,18 +338,19 @@ namespace { // broadcast parse tree int nBlocks = parseTree.size(); mpiBcast(&nBlocks, 1, MPI_INT, 0, MPI_COMM_WORLD); - for (unsigned ii=0; ii buffer;; + vector buffer; + ; if (myRank == 0) parseTree[ii].serialize(buffer); - + int size = buffer.size(); mpiBcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD); if (myRank != 0) buffer.resize(size); mpiBcast(&buffer[0], size, MPI_BYTE, 0, MPI_COMM_WORLD); - + if (myRank != 0) { parseTree.push_back(InputBlock("")); @@ -325,8 +359,7 @@ namespace } } // broadcast - - for (unsigned ii=0; ii("energySpectrum", sp.energySpectrum); - input.getValue("crossSectionsOut",sp.crossSectionsOut); + input.getValue("crossSectionsOut", sp.crossSectionsOut); input.getValue("boundaryCondition", sp.boundaryCondition); - input.getValue("dt", sp.dt); - input.getValue("fMax", sp.fMax); - input.getValue ("loadBalance", sp.loadBalance); - input.getValue ("cycleTimers", sp.cycleTimers); - input.getValue ("debugThreads",sp.debugThreads); - input.getValue("lx", sp.lx); - input.getValue("ly", sp.ly); - input.getValue("lz", sp.lz); - input.getValue("nParticles", sp.nParticles); - input.getValue("batchSize", sp.batchSize); - input.getValue("nBatches", sp.nBatches); - input.getValue ("nSteps", sp.nSteps); - input.getValue ("nx", sp.nx); - input.getValue ("ny", sp.ny); - input.getValue ("nz", sp.nz); - input.getValue ("seed", sp.seed); - input.getValue ("xDom", sp.xDom); - input.getValue ("yDom", sp.yDom); - input.getValue ("zDom", sp.zDom); - input.getValue("eMax", sp.eMax); - input.getValue("eMin", sp.eMin); - input.getValue ("nGroups", sp.nGroups); - input.getValue("lowWeightCutoff",sp.lowWeightCutoff); - input.getValue("bTally",sp.balanceTallyReplications); - input.getValue("fTally",sp.fluxTallyReplications); - input.getValue("cTally",sp.cellTallyReplications); - input.getValue("coralBenchmark",sp.coralBenchmark); - + input.getValue("dt", sp.dt); + input.getValue("fMax", sp.fMax); + input.getValue("loadBalance", sp.loadBalance); + input.getValue("cycleTimers", sp.cycleTimers); + input.getValue("debugThreads", sp.debugThreads); + input.getValue("lx", sp.lx); + input.getValue("ly", sp.ly); + input.getValue("lz", sp.lz); + input.getValue("nParticles", sp.nParticles); + input.getValue("batchSize", sp.batchSize); + input.getValue("nBatches", sp.nBatches); + input.getValue("nSteps", sp.nSteps); + input.getValue("nx", sp.nx); + input.getValue("ny", sp.ny); + input.getValue("nz", sp.nz); + input.getValue("seed", sp.seed); + input.getValue("xDom", sp.xDom); + input.getValue("yDom", sp.yDom); + input.getValue("zDom", sp.zDom); + input.getValue("eMax", sp.eMax); + input.getValue("eMin", sp.eMin); + input.getValue("nGroups", sp.nGroups); + input.getValue("lowWeightCutoff", sp.lowWeightCutoff); + input.getValue("bTally", sp.balanceTallyReplications); + input.getValue("fTally", sp.fluxTallyReplications); + input.getValue("cTally", sp.cellTallyReplications); + input.getValue("coralBenchmark", sp.coralBenchmark); } } namespace { - void scanGeometryBlock(const InputBlock& input, Parameters& pp) + void scanGeometryBlock(const InputBlock &input, Parameters &pp) { pp.geometryParams.push_back(GeometryParameters()); - GeometryParameters& gg = pp.geometryParams.back(); + GeometryParameters &gg = pp.geometryParams.back(); input.getValue("material", gg.materialName); string shape; input.getValue("shape", shape); @@ -440,7 +472,7 @@ namespace else if (shape == "sphere") { gg.shape = GeometryParameters::SPHERE; - input.getValue("radius", gg.radius); + input.getValue("radius", gg.radius); input.getValue("xCenter", gg.xCenter); input.getValue("yCenter", gg.yCenter); input.getValue("zCenter", gg.zCenter); @@ -451,62 +483,59 @@ namespace } namespace { - void scanMaterialBlock(const InputBlock& input, Parameters& pp) + void scanMaterialBlock(const InputBlock &input, Parameters &pp) { string materialName; input.getValue("name", materialName); if (materialName.empty()) badMaterialBlock(input); - MaterialParameters& mp = pp.materialParams[materialName]; + MaterialParameters &mp = pp.materialParams[materialName]; mp.name = materialName; input.getValue("mass", mp.mass); - input.getValue("absorptionCrossSection", mp.absorptionCrossSection); + input.getValue("absorptionCrossSection", mp.absorptionCrossSection); input.getValue("absorptionCrossSectionRatio", mp.absorptionCrossSectionRatio); - input.getValue("fissionCrossSection", mp.fissionCrossSection); - input.getValue("fissionCrossSectionRatio", mp.fissionCrossSectionRatio); - input.getValue ("nIsotopes", mp.nIsotopes); - input.getValue ("nReactions", mp.nReactions); - input.getValue("totalCrossSection", mp.totalCrossSection); - input.getValue("scatteringCrossSection", mp.scatteringCrossSection); + input.getValue("fissionCrossSection", mp.fissionCrossSection); + input.getValue("fissionCrossSectionRatio", mp.fissionCrossSectionRatio); + input.getValue("nIsotopes", mp.nIsotopes); + input.getValue("nReactions", mp.nReactions); + input.getValue("totalCrossSection", mp.totalCrossSection); + input.getValue("scatteringCrossSection", mp.scatteringCrossSection); input.getValue("scatteringCrossSectionRatio", mp.scatteringCrossSectionRatio); - input.getValue("sourceRate", mp.sourceRate); + input.getValue("sourceRate", mp.sourceRate); } } namespace { - void scanCrossSectionBlock(const InputBlock& input, Parameters& pp) + void scanCrossSectionBlock(const InputBlock &input, Parameters &pp) { string crossSectionName; input.getValue("name", crossSectionName); if (crossSectionName.empty()) badCrossSectionBlock(input); - CrossSectionParameters& cp = pp.crossSectionParams[crossSectionName]; + CrossSectionParameters &cp = pp.crossSectionParams[crossSectionName]; cp.name = crossSectionName; - input.getValue("A", cp.aa); - input.getValue("B", cp.bb); - input.getValue("C", cp.cc); - input.getValue("D", cp.dd); - input.getValue("E", cp.ee); - input.getValue("nuBar", cp.nuBar); - + input.getValue("A", cp.aa); + input.getValue("B", cp.bb); + input.getValue("C", cp.cc); + input.getValue("D", cp.dd); + input.getValue("E", cp.ee); + input.getValue("nuBar", cp.nuBar); } } namespace { - void badInputFile(const string& filename){qs_assert(false);} - void badGeometryBlock(const InputBlock& input) + void badInputFile(const string &filename) { qs_assert(false); } + void badGeometryBlock(const InputBlock &input) { // didn't specify shape. // Must be brick or sphere qs_assert(false); } - void badMaterialBlock(const InputBlock& input) + void badMaterialBlock(const InputBlock &input) { // didn't specify a name qs_assert(false); } - void badCrossSectionBlock(const InputBlock& input){qs_assert(false);} + void badCrossSectionBlock(const InputBlock &input) { qs_assert(false); } } - - diff --git a/src/Parameters.hh b/src/Parameters.hh index 79dfe3cf..92d8b84e 100644 --- a/src/Parameters.hh +++ b/src/Parameters.hh @@ -8,25 +8,30 @@ #include #include #include +#include struct GeometryParameters { - enum Shape{UNDEFINED, BRICK, SPHERE}; + enum Shape + { + UNDEFINED, + BRICK, + SPHERE + }; GeometryParameters() - : materialName(), - shape(UNDEFINED), - radius(0.0), - xCenter(0.0), - yCenter(0.0), - zCenter(0.0), - xMin(0.0), - yMin(0.0), - zMin(0.0), - xMax(0.0), - yMax(0.0), - zMax(0.0) - {}; + : materialName(), + shape(UNDEFINED), + radius(0.0), + xCenter(0.0), + yCenter(0.0), + zCenter(0.0), + xMin(0.0), + yMin(0.0), + zMin(0.0), + xMax(0.0), + yMax(0.0), + zMax(0.0){}; std::string materialName; Shape shape; @@ -45,19 +50,18 @@ struct GeometryParameters struct MaterialParameters { MaterialParameters() - : name(), - mass(1000.0), - totalCrossSection(1.0), - nIsotopes(10), - nReactions(9), - sourceRate(0.0), - scatteringCrossSection(), - absorptionCrossSection(), - fissionCrossSection(), - scatteringCrossSectionRatio(1.0), - absorptionCrossSectionRatio(1.0), - fissionCrossSectionRatio(1.0) - {}; + : name(), + mass(1000.0), + totalCrossSection(1.0), + nIsotopes(10), + nReactions(9), + sourceRate(0.0), + scatteringCrossSection(), + absorptionCrossSection(), + fissionCrossSection(), + scatteringCrossSectionRatio(1.0), + absorptionCrossSectionRatio(1.0), + fissionCrossSectionRatio(1.0){}; std::string name; double mass; @@ -76,14 +80,13 @@ struct MaterialParameters struct CrossSectionParameters { CrossSectionParameters() - : name(), - aa(0.0), - bb(0.0), - cc(0.0), - dd(0.0), - ee(1.0), - nuBar(2.4) - {}; + : name(), + aa(0.0), + bb(0.0), + cc(0.0), + dd(0.0), + ee(1.0), + nuBar(2.4){}; std::string name; double aa; @@ -97,89 +100,88 @@ struct CrossSectionParameters struct SimulationParameters { SimulationParameters() - : inputFile(), - crossSectionsOut(""), - boundaryCondition("reflect"), - energySpectrum(""), - loadBalance(0), - cycleTimers(0), - debugThreads(0), - nParticles(1000000), // 10^6 - batchSize(0), // default to use nBatches - nBatches(10), - nSteps(10), - nx(10), //speed up early testing - ny(10), - nz(10), -// nx(100), -// ny(100), -// nz(100), - seed(1029384756), - xDom(0), - yDom(0), - zDom(0), - dt(1e-8), - fMax(0.1), - lx(100.0), - ly(100.0), - lz(100.0), - eMin(1e-9), - eMax(20), - nGroups(230), - lowWeightCutoff(0.001), - balanceTallyReplications(1), - fluxTallyReplications(1), - cellTallyReplications(1), - coralBenchmark(0) - {}; + : inputFile(), + crossSectionsOut(""), + boundaryCondition("reflect"), + energySpectrum(""), + loadBalance(0), + cycleTimers(0), + debugThreads(0), + nParticles(1000000), // 10^6 + batchSize(0), // default to use nBatches + nBatches(10), + nSteps(10), + nx(10), // speed up early testing + ny(10), + nz(10), + // nx(100), + // ny(100), + // nz(100), + seed(1029384756), + xDom(0), + yDom(0), + zDom(0), + dt(1e-8), + fMax(0.1), + lx(100.0), + ly(100.0), + lz(100.0), + eMin(1e-9), + eMax(20), + nGroups(230), + lowWeightCutoff(0.001), + balanceTallyReplications(1), + fluxTallyReplications(1), + cellTallyReplications(1), + coralBenchmark(0){}; - std::string inputFile; //!< name of input file - std::string energySpectrum; //!< enble computing and printing energy spectrum via of energy spectrum file - std::string crossSectionsOut; //!< enable or disable printing cross section data to a file - std::string boundaryCondition;//!< specifies boundary conditions - int loadBalance; //!< enable or disable load balancing - int cycleTimers; //!< enable or disable cycle timers - int debugThreads; //!< enable or disable thread debugging lines - uint64_t nParticles; //!< number of particles - uint64_t batchSize; //!< number of particles in a batch - uint64_t nBatches; //!< number of batches to start - int nSteps; //!< number of time steps - int nx; //!< number of mesh elements in x-direction - int ny; //!< number of mesh elements in y-direction - int nz; //!< number of mesh elements in z-direction - int seed; //!< random number seed - int xDom; //!< number of MPI ranks in x-direction - int yDom; //!< number of MPI ranks in y-direction - int zDom; //!< number of MPI ranks in z-direction - double dt; //!< time step (seconds) - double fMax; //!< max random fractional displacement of mesh - double lx; //!< size of problem domain in x-direction (cm) - double ly; //!< size of problem domain in y-direction (cm) - double lz; //!< size of problem domain in z-direction (cm) - double eMin; //!< min energy of cross section - double eMax; //!< max energy of cross section - int nGroups; //!< number of groups for cross sections - double lowWeightCutoff; //!< low weight roulette cutoff - int balanceTallyReplications; //!< Number of replications for the balance tallies - int fluxTallyReplications; //!< Number of replications for the scalar flux tally - int cellTallyReplications; //!< Number of replications for the scalar cell tally - int coralBenchmark; //!< enable correctness check for Coral2 benchmark + std::string inputFile; //!< name of input file + std::string energySpectrum; //!< enble computing and printing energy spectrum via of energy spectrum file + std::string crossSectionsOut; //!< enable or disable printing cross section data to a file + std::string boundaryCondition; //!< specifies boundary conditions + int loadBalance; //!< enable or disable load balancing + int cycleTimers; //!< enable or disable cycle timers + int debugThreads; //!< enable or disable thread debugging lines + uint64_t nParticles; //!< number of particles + uint64_t batchSize; //!< number of particles in a batch + uint64_t nBatches; //!< number of batches to start + int nSteps; //!< number of time steps + int nx; //!< number of mesh elements in x-direction + int ny; //!< number of mesh elements in y-direction + int nz; //!< number of mesh elements in z-direction + int seed; //!< random number seed + int xDom; //!< number of MPI ranks in x-direction + int yDom; //!< number of MPI ranks in y-direction + int zDom; //!< number of MPI ranks in z-direction + double dt; //!< time step (seconds) + double fMax; //!< max random fractional displacement of mesh + double lx; //!< size of problem domain in x-direction (cm) + double ly; //!< size of problem domain in y-direction (cm) + double lz; //!< size of problem domain in z-direction (cm) + double eMin; //!< min energy of cross section + double eMax; //!< max energy of cross section + int nGroups; //!< number of groups for cross sections + double lowWeightCutoff; //!< low weight roulette cutoff + int balanceTallyReplications; //!< Number of replications for the balance tallies + int fluxTallyReplications; //!< Number of replications for the scalar flux tally + int cellTallyReplications; //!< Number of replications for the scalar cell tally + int coralBenchmark; //!< enable correctness check for Coral2 benchmark }; struct Parameters { - SimulationParameters simulationParams; - std::vector geometryParams; - std::map materialParams; + SimulationParameters simulationParams; + std::vector geometryParams; + std::map materialParams; std::map crossSectionParams; }; -Parameters getParameters(int argc, char** argv); -void printParameters(const Parameters& params, std::ostream& out); +Parameters getParameters(int argc, char **argv); +void printParameters(const Parameters ¶ms, std::ostream &out); -std::ostream& operator<<(std::ostream& out, const SimulationParameters& pp); -std::ostream& operator<<(std::ostream& out, const GeometryParameters& pp); -std::ostream& operator<<(std::ostream& out, const MaterialParameters& pp); -std::ostream& operator<<(std::ostream& out, const CrossSectionParameters& pp); +std::ostream &operator<<(std::ostream &out, const SimulationParameters &pp); +std::ostream &operator<<(std::ostream &out, const GeometryParameters &pp); +std::ostream &operator<<(std::ostream &out, const MaterialParameters &pp); +std::ostream &operator<<(std::ostream &out, const CrossSectionParameters &pp); #endif diff --git a/src/ParticleVault.cc b/src/ParticleVault.cc index e2e1314c..1af3cf7a 100644 --- a/src/ParticleVault.cc +++ b/src/ParticleVault.cc @@ -2,28 +2,6 @@ #include "MC_Processor_Info.hh" #include "Globals.hh" -#if 0 -void ParticleVault:: -cleanVault( int end_index ) -{ - int s1 = end_index; - int s2 = _particles.size(); - - int starting_point = s2 - ( ( s1<(s2-s1)) ? s1 : (s2-s1)); - -#if defined HAVE_OPENMP_TARGET - #pragma omp target teams distribute parallel for thread_limit(64) -#endif - for( int ii = starting_point; ii < s2; ii++ ) - { - qs_assert( _particles[ii-starting_point].species == -1 ); - _particles[ii-starting_point] = _particles[ii]; - _particles[ii].species = -1; - } - - _particles.eraseEnd( _particles.size() - end_index ); -} -#endif void ParticleVault:: collapse( size_t fill_size, ParticleVault* vault2 ) @@ -34,7 +12,7 @@ collapse( size_t fill_size, ParticleVault* vault2 ) this->append( *vault2 ); vault2->clear(); } - else //Fill in what we can untill either vault2 is empty or we have filled this vault + else //Fill in what we can until either vault2 is empty or we have filled this vault { bool notEmpty = false; uint64_t fill = 0; diff --git a/src/ParticleVault.hh b/src/ParticleVault.hh index 2795b805..e64a5f1f 100644 --- a/src/ParticleVault.hh +++ b/src/ParticleVault.hh @@ -1,9 +1,57 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef PARTICLEVAULT_HH #define PARTICLEVAULT_HH #include "MC_Base_Particle.hh" -#include "QS_Vector.hh" +#include "ParticleVault.hh" +#include "MC_Particle.hh" +#include "MC_Time_Info.hh" #include "DeclareMacro.hh" +#include "QS_Vector.hh" #include @@ -15,9 +63,12 @@ public: bool empty() const {return _particles.empty();} // Get the size of the vault. - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL size_t size() const {return _particles.size();} + HOST_DEVICE_SYCL + void setsize(int size) {_particles.setsize(size);} + // Reserve the size for the container of particles. void reserve(size_t n) { @@ -40,11 +91,11 @@ public: const MC_Base_Particle& operator[](size_t n) const {return _particles[n];} // Put a particle into the vault, down casting its class. - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL void pushParticle(MC_Particle &particle); // Put a base particle into the vault. - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL void pushBaseParticle(MC_Base_Particle &base_particle); // Get a base particle from the vault. @@ -55,15 +106,15 @@ public: // Get a particle from the vault bool getBaseParticleComm(MC_Base_Particle &particle, int index); - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL bool getParticle(MC_Particle &particle, int index); // Copy a particle back into the vault - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL bool putParticle(MC_Particle particle, int index); // invalidates the particle in the vault at an index - HOST_DEVICE_CUDA - void invalidateParticle( int index ); + HOST_DEVICE_SYCL + void invalidateParticle(int index); #if 0 // Remove all of the invalid particles form the _particles list @@ -83,7 +134,7 @@ private: }; // ----------------------------------------------------------------------- -HOST_DEVICE_CUDA +HOST_DEVICE_SYCL inline void ParticleVault:: pushParticle(MC_Particle &particle) { @@ -92,8 +143,9 @@ pushParticle(MC_Particle &particle) _particles[indx] = base_particle; } + // ----------------------------------------------------------------------- -HOST_DEVICE_CUDA +HOST_DEVICE_SYCL inline void ParticleVault:: pushBaseParticle(MC_Base_Particle &base_particle) { @@ -142,7 +194,7 @@ popParticle(MC_Particle &particle) inline bool ParticleVault:: getBaseParticleComm( MC_Base_Particle &particle, int index ) { - if( size() > index ) + if(size() > index) { particle = _particles[index]; _particles[index].species = -1; @@ -156,29 +208,30 @@ getBaseParticleComm( MC_Base_Particle &particle, int index ) } // ----------------------------------------------------------------------- - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL inline bool ParticleVault:: -getParticle( MC_Particle &particle, int index ) +getParticle(MC_Particle &particle, int index) { - qs_assert( size() > index ); - if( size() > index ) + qs_assert(size() > index); + if(size() > index) { - MC_Base_Particle base_particle( _particles[index] ); - particle = MC_Particle( base_particle ); + MC_Base_Particle base_particle(_particles[index]); + particle = MC_Particle(base_particle); + return true; } return false; } // ----------------------------------------------------------------------- - HOST_DEVICE_CUDA +HOST_DEVICE_SYCL inline bool ParticleVault:: putParticle(MC_Particle particle, int index) { - qs_assert( size() > index ); - if( size() > index ) + qs_assert(size() > index); + if(size() > index) { - MC_Base_Particle base_particle( particle ); + MC_Base_Particle base_particle(particle); _particles[index] = base_particle; return true; } @@ -186,12 +239,12 @@ putParticle(MC_Particle particle, int index) } // ----------------------------------------------------------------------- - HOST_DEVICE_CUDA +HOST_DEVICE_SYCL inline void ParticleVault:: -invalidateParticle( int index ) +invalidateParticle(int index) { - qs_assert( index >= 0 ); - qs_assert( index < _particles.size() ); + qs_assert(index >= 0); + qs_assert(index < _particles.size()); _particles[index].species = -1; } @@ -207,8 +260,29 @@ eraseSwapParticle(int index) } // ----------------------------------------------------------------------- -HOST_DEVICE -void MC_Load_Particle(MonteCarlo *mcco, MC_Particle &mc_particle, ParticleVault *particleVault, int particle_index); +inline HOST_DEVICE +void MC_Load_Particle(MonteCarlo *monteCarlo, MC_Particle &mc_particle, ParticleVault *particleVault, int particle_index) +{ + //particleVault.popParticle(mc_particle); + particleVault->getParticle(mc_particle, particle_index); + + // Time to Census + if (mc_particle.time_to_census <= 0.0) + { + mc_particle.time_to_census += monteCarlo->time_info->time_step; + } + + // Age + if (mc_particle.age < 0.0) { mc_particle.age = 0.0; } + +// Energy Group +#ifdef __SYCL_DEVICE_ONLY__ + mc_particle.energy_group = monteCarlo->_nuclearData_d->getEnergyGroup(mc_particle.kinetic_energy); +#else + mc_particle.energy_group = monteCarlo->_nuclearData->getEnergyGroup(mc_particle.kinetic_energy); +#endif +// printf("file=%s line=%d\n",__FILE__,__LINE__); +} HOST_DEVICE_END #endif diff --git a/src/ParticleVaultContainer.cc b/src/ParticleVaultContainer.cc index 80832b4e..b322777d 100644 --- a/src/ParticleVaultContainer.cc +++ b/src/ParticleVaultContainer.cc @@ -1,3 +1,18 @@ +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + #include "ParticleVaultContainer.hh" #include "ParticleVault.hh" #include "SendQueue.hh" @@ -81,11 +96,11 @@ ParticleVaultContainer:: //-------------------------------------------------------------- ParticleVault* ParticleVaultContainer:: -getTaskProcessingVault(uint64_t vaultIndex) +getTaskProcessingVault(uint64_t tallyArray) { -// qs_assert(vaultIndex >= 0); -// qs_assert(vaultIndex < _processingVault.size()); - return _processingVault[vaultIndex]; +// qs_assert(tallyArray >= 0); +// qs_assert(tallyArray < _processingVault.size()); + return _processingVault[tallyArray]; } //-------------------------------------------------------------- @@ -95,11 +110,11 @@ getTaskProcessingVault(uint64_t vaultIndex) //-------------------------------------------------------------- ParticleVault* ParticleVaultContainer:: -getTaskProcessedVault(uint64_t vaultIndex) +getTaskProcessedVault(uint64_t tallyArray) { -// qs_assert(vaultIndex >= 0); -// qs_assert(vaultIndex < _processedVault.size()); - return _processedVault[vaultIndex]; +// qs_assert(tallyArray >= 0); +// qs_assert(tallyArray < _processedVault.size()); + return _processedVault[tallyArray]; } //-------------------------------------------------------------- @@ -131,13 +146,6 @@ getFirstEmptyProcessedVault() //------------getSendQueue-------------------------------------- //Returns a pointer to the Send Queue //-------------------------------------------------------------- -HOST_DEVICE -SendQueue* ParticleVaultContainer:: -getSendQueue() -{ - return this->_sendQueue; -} -HOST_DEVICE_END //-------------------------------------------------------------- //------------sizeProcessing------------------------------------ @@ -337,16 +345,21 @@ addProcessingParticle( MC_Base_Particle &particle, uint64_t &fill_vault_index ) //------------addExtraParticle---------------------------------- //adds a particle to the extra particle vaults (used in kernel) //-------------------------------------------------------------- -HOST_DEVICE -void ParticleVaultContainer:: -addExtraParticle( MC_Particle &particle) + + +uint64_t ParticleVaultContainer::getextraVaultIndex() { - uint64_t index = 0; - QS::atomicCaptureAdd( this->_extraVaultIndex, UINT64_C(1), index ); - uint64_t vault = index / this->_vaultSize; - _extraVault[vault]->pushParticle( particle ); + return this->_extraVaultIndex; +} + + +ParticleVault * ParticleVaultContainer::getExtraVault(int index) +{ + + return _extraVault[index]; } -HOST_DEVICE_END + + //-------------------------------------------------------------- //------------cleanExtraVaults---------------------------------- diff --git a/src/ParticleVaultContainer.hh b/src/ParticleVaultContainer.hh index 6626e4c6..249f83e3 100644 --- a/src/ParticleVaultContainer.hh +++ b/src/ParticleVaultContainer.hh @@ -1,20 +1,69 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef PARTICLEVAULTCONTAINER_HH #define PARTICLEVAULTCONTAINER_HH +#include #include "DeclareMacro.hh" - +#include "ParticleVault.hh" +#include "SendQueue.hh" +#include "MemoryControl.hh" +#include "qs_assert.hh" #include "portability.hh" #include "QS_Vector.hh" #include //--------------------------------------------------------------- -// ParticleVaultContainer is a container of ParticleVaults. -// These Vaults are broken down into user defined chunks that can +// ParticleVaultContainer is a container of ParticleVaults. +// These Vaults are broken down into user defined chunks that can // be used to overlap asynchronous MPI with the tracking kernel. // -// Facilities for storing Processing, Processed, and Extra vaults -// are controled by the ParticleVaultContainer. As well as the -// sendQueue, which lists the particles that must be send to +// Facilities for storing Processing, Processed, and Extra vaults +// are controled by the ParticleVaultContainer. As well as the +// sendQueue, which lists the particles that must be send to // another process via MPI //-------------------------------------------------------------- @@ -23,92 +72,123 @@ class MC_Particle; class ParticleVault; class SendQueue; -//typedef unsigned long long int uint64_cu; +typedef unsigned long long int uint64_cu; class ParticleVaultContainer { - public: - - //Constructor - ParticleVaultContainer( uint64_t vault_size, - uint64_t num_vaults, uint64_t num_extra_vaults ); - - //Destructor - ~ParticleVaultContainer(); - - //Basic Getters - uint64_t getVaultSize(){ return _vaultSize; } - uint64_t getNumExtraVaults(){ return _numExtraVaults; } - - uint64_t processingSize(){ return _processingVault.size(); } - uint64_t processedSize(){ return _processedVault.size(); } - - //Returns the ParticleVault that is currently pointed too - //by index listed - ParticleVault* getTaskProcessingVault(uint64_t vaultIndex); - ParticleVault* getTaskProcessedVault( uint64_t vaultIndex); - - //Returns the index to the first empty Processed Vault - uint64_t getFirstEmptyProcessedVault(); - - //Returns a pointer to the Send Queue - HOST_DEVICE - SendQueue* getSendQueue(); - HOST_DEVICE_END - - //Counts Particles in all vaults - uint64_t sizeProcessing(); - uint64_t sizeProcessed(); - uint64_t sizeExtra(); - - //Collapses Particles down into lowest amount of vaults as - //needed to hold them removes all but the last parially - //filled vault - void collapseProcessing(); - void collapseProcessed(); - - //Swaps the particles in Processed for the empty vaults in - //Processing - void swapProcessingProcessedVaults(); - - //Adds a particle to the processing particle vault - void addProcessingParticle( MC_Base_Particle &particle, uint64_t &fill_vault_index ); - //Adds a particle to the extra particle vault - HOST_DEVICE - void addExtraParticle( MC_Particle &particle ); - HOST_DEVICE_END - - //Pushes particles from Extra Vaults onto the Processing - //Vault list - void cleanExtraVaults(); - - private: - - //The Size of the ParticleVaults (fixed at runtime for - //each run) - uint64_t _vaultSize; - - //The number of Extra Vaults needed based on hueristics - //(fixed at runtime for each run) - uint64_t _numExtraVaults; - - //A running index for the number of particles int the extra - //particle vaults - uint64_t _extraVaultIndex; - - //The send queue - stores particle index and neighbor index - //for any particles that hit (TRANSIT_OFF_PROCESSOR) - SendQueue *_sendQueue; - - //The list of active particle vaults (size - grow-able) - std::vector _processingVault; - - //The list of censused particle vaults (size - grow-able) - std::vector _processedVault; - - //The list of extra particle vaults (size - fixed) - qs_vector _extraVault; - +public: + // Constructor + ParticleVaultContainer(uint64_t vault_size, + uint64_t num_vaults, uint64_t num_extra_vaults); + + // Destructor + ~ParticleVaultContainer(); + + // Basic Getters + uint64_t getVaultSize() { return _vaultSize; } + + HOST_DEVICE + uint64_t getNumExtraVaults() { return _numExtraVaults; } + HOST_DEVICE_END + + uint64_t processingSize() { return _processingVault.size(); } + uint64_t processedSize() { return _processedVault.size(); } + + // Returns the ParticleVault that is currently pointed too + // by index listed + ParticleVault *getTaskProcessingVault(uint64_t tallyArray); + ParticleVault *getTaskProcessedVault(uint64_t tallyArray); + + // Returns the index to the first empty Processed Vault + uint64_t getFirstEmptyProcessedVault(); + + // Returns a pointer to the Send Queue + HOST_DEVICE + SendQueue *getSendQueue(); + HOST_DEVICE_END + + // Counts Particles in all vaults + uint64_t sizeProcessing(); + uint64_t sizeProcessed(); + uint64_t sizeExtra(); + + // Collapses Particles down into lowest amount of vaults as + // needed to hold them removes all but the last parially + // filled vault + void collapseProcessing(); + void collapseProcessed(); + + // Swaps the particles in Processed for the empty vaults in + // Processing + void swapProcessingProcessedVaults(); + + // Adds a particle to the processing particle vault + void addProcessingParticle(MC_Base_Particle &particle, uint64_t &fill_vault_index); + // Adds a particle to the extra particle vault + HOST_DEVICE + void addExtraParticle(MC_Particle &particle); + HOST_DEVICE_END + + HOST_DEVICE + void addExtraParticle(MC_Particle &particle, int *tallyArray, int *particleindex); + HOST_DEVICE_END + + uint64_t getextraVaultIndex(); + + ParticleVault *getExtraVault(int index); + + // Pushes particles from Extra Vaults onto the Processing + // Vault list + void cleanExtraVaults(); + +private: + // The Size of the ParticleVaults (fixed at runtime for + // each run) + uint64_t _vaultSize; + + // The number of Extra Vaults needed based on hueristics + //(fixed at runtime for each run) + uint64_t _numExtraVaults; + + // A running index for the number of particles int the extra + // particle vaults + uint64_cu _extraVaultIndex; + + // The send queue - stores particle index and neighbor index + // for any particles that hit (TRANSIT_OFF_PROCESSOR) + SendQueue *_sendQueue; + + // The list of active particle vaults (size - grow-able) + std::vector _processingVault; + + // The list of censused particle vaults (size - grow-able) + std::vector _processedVault; + + // The list of extra particle vaults (size - fixed) + qs_vector _extraVault; }; +//-------------------------------------------------------------- +//------------getSendQueue-------------------------------------- +// Returns a pointer to the Send Queue +//-------------------------------------------------------------- +inline HOST_DEVICE + SendQueue * + ParticleVaultContainer:: + getSendQueue() +{ + return this->_sendQueue; +} +HOST_DEVICE_END + +inline HOST_DEVICE void ParticleVaultContainer:: + addExtraParticle(MC_Particle &particle) +{ + uint64_cu index = 0; + ATOMIC_CAPTURE(this->_extraVaultIndex, (uint64_cu)1, index); + uint64_t vault = index / this->_vaultSize; + _extraVault[vault]->pushParticle(particle); +} +HOST_DEVICE_END + #endif diff --git a/src/PopulationControl.cc.dp.cpp b/src/PopulationControl.cc.dp.cpp new file mode 100644 index 00000000..285fee77 --- /dev/null +++ b/src/PopulationControl.cc.dp.cpp @@ -0,0 +1,172 @@ +#include "PopulationControl.hh" +#include "MC_Processor_Info.hh" +#include "MonteCarlo.hh" +#include "Globals.hh" +#include "MC_Particle.hh" +#include "ParticleVaultContainer.hh" +#include "ParticleVault.hh" +#include "utilsMpi.hh" +#include "NVTX_Range.hh" +#include + +namespace +{ + void PopulationControlGuts(const double splitRRFactor, + uint64_t currentNumParticles, + ParticleVaultContainer* my_particle_vault, + Balance& taskBalance); +} + +void PopulationControl(MonteCarlo* monteCarlo, bool loadBalance) +{ + NVTX_Range range("PopulationControl"); + + uint64_t targetNumParticles = monteCarlo->_params.simulationParams.nParticles; + uint64_t globalNumParticles = 0; + uint64_t localNumParticles = monteCarlo->_particleVaultContainer->sizeProcessing(); + + if (loadBalance) + { + // If we are parallel, we will have one domain per mpi processs. The targetNumParticles is across + // all MPI processes, so we need to divide by the number or ranks to get the per-mpi-process number targetNumParticles + targetNumParticles = ceil((double)targetNumParticles / (double)mcco->processor_info->num_processors ); + + //NO LONGER SPLITING VAULTS BY THREADS +// // If we are threaded, targetNumParticles should be divided by the number of threads (tasks) to balance +// // the particles across the thread level vaults. +// targetNumParticles = ceil((double)targetNumParticles / (double)mcco->processor_info->num_tasks); + } + else + { + mpiAllreduce(&localNumParticles, &globalNumParticles, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); + } + + Balance & taskBalance = monteCarlo->_tallies->_balanceTask[0]; + + double splitRRFactor = 1.0; + if (loadBalance) + { + int currentNumParticles = localNumParticles; + if (currentNumParticles != 0) + splitRRFactor = (double)targetNumParticles / (double)currentNumParticles; + else + splitRRFactor = 1.0; + } + else + { + if(globalNumParticles != 0) + splitRRFactor = (double)targetNumParticles / (double)globalNumParticles; + } + + if (splitRRFactor != 1.0) // no need to split if population is already correct. + PopulationControlGuts(splitRRFactor, localNumParticles, monteCarlo->_particleVaultContainer, taskBalance); + + monteCarlo->_particleVaultContainer->collapseProcessing(); + + return; +} + + +namespace +{ +void PopulationControlGuts(const double splitRRFactor, uint64_t currentNumParticles, ParticleVaultContainer* my_particle_vault, Balance& taskBalance) +{ + uint64_t vault_size = my_particle_vault->getVaultSize(); + uint64_t fill_vault_index = currentNumParticles / vault_size; + + // March backwards through the vault so killed particles doesn't mess up the indexing + for (int particleIndex = currentNumParticles-1; particleIndex >= 0; particleIndex--) + { + uint64_t vault_index = particleIndex / vault_size; + + ParticleVault& taskProcessingVault = *( my_particle_vault->getTaskProcessingVault(vault_index) ); + + uint64_t taskParticleIndex = particleIndex%vault_size; + + MC_Base_Particle ¤tParticle = taskProcessingVault[taskParticleIndex]; + double randomNumber = rngSample(¤tParticle.random_number_seed); + if (splitRRFactor < 1) + { + if (randomNumber > splitRRFactor) + { + // Kill + taskProcessingVault.eraseSwapParticle(taskParticleIndex); + taskBalance._rr++; + } + else + { + currentParticle.weight /= splitRRFactor; + } + } + else if (splitRRFactor > 1) + { + // Split + int splitFactor = (int)floor(splitRRFactor); + if (randomNumber > (splitRRFactor - splitFactor)) { splitFactor--; } + + currentParticle.weight /= splitRRFactor; + MC_Base_Particle splitParticle = currentParticle; + + for (int splitFactorIndex = 0; splitFactorIndex < splitFactor; splitFactorIndex++) + { + taskBalance._split++; + + splitParticle.random_number_seed = rngSpawn_Random_Number_Seed( + ¤tParticle.random_number_seed); + splitParticle.identifier = splitParticle.random_number_seed; + + my_particle_vault->addProcessingParticle( splitParticle, fill_vault_index ); + + } + } + } +} +} // anonymous namespace + + +// Roulette low-weight particles relative to the source particle weight. +void RouletteLowWeightParticles(MonteCarlo* monteCarlo) +{ + NVTX_Range range("RouletteLowWeightParticles"); + + const double lowWeightCutoff = monteCarlo->_params.simulationParams.lowWeightCutoff; + + if (lowWeightCutoff > 0.0) + { + + uint64_t currentNumParticles = monteCarlo->_particleVaultContainer->sizeProcessing(); + uint64_t vault_size = monteCarlo->_particleVaultContainer->getVaultSize(); + + Balance& taskBalance = monteCarlo->_tallies->_balanceTask[0]; + + // March backwards through the vault so killed particles don't mess up the indexing + const double source_particle_weight = monteCarlo->source_particle_weight; + const double weightCutoff = lowWeightCutoff*source_particle_weight; + + for ( int64_t particleIndex = currentNumParticles-1; particleIndex >= 0; particleIndex--) + { + uint64_t vault_index = particleIndex / vault_size; + + ParticleVault& taskProcessingVault = *(monteCarlo->_particleVaultContainer->getTaskProcessingVault(vault_index)); + uint64_t taskParticleIndex = particleIndex%vault_size; + MC_Base_Particle ¤tParticle = taskProcessingVault[taskParticleIndex]; + + if (currentParticle.weight <= weightCutoff) + { + double randomNumber = rngSample(¤tParticle.random_number_seed); + if (randomNumber <= lowWeightCutoff) + { + // The particle history continues with an increased weight. + currentParticle.weight /= lowWeightCutoff; + } + else + { + // Kill + taskProcessingVault.eraseSwapParticle(taskParticleIndex); + taskBalance._rr++; + } + } + } + monteCarlo->_particleVaultContainer->collapseProcessing(); + } +} diff --git a/src/QS_Vector.hh b/src/QS_Vector.hh index 4b93acc7..a3db9247 100644 --- a/src/QS_Vector.hh +++ b/src/QS_Vector.hh @@ -1,28 +1,73 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef QS_VECTOR_HH #define QS_VECTOR_HH #include "DeclareMacro.hh" -#include "QS_atomics.hh" +#include "AtomicMacro.hh" #include "qs_assert.hh" #include "MemoryControl.hh" #include template -class qs_vector +class qs_vector { - public: +public: - qs_vector() : _data(0), _capacity(0), _size(0), _memPolicy(MemoryControl::AllocationPolicy::HOST_MEM), _isOpen(0) {}; + qs_vector() : _data(0), _capacity(0), _size(0), _memPolicy(MemoryControl::AllocationPolicy::HOST_MEM), _isOpen(0){}; - qs_vector(int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) + qs_vector(int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM) : _data(0), _capacity(size), _size(size), _memPolicy(memPolicy), _isOpen(0) { _data = MemoryControl::allocate(size, memPolicy); } - qs_vector( int size, const T& value, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) + qs_vector(int size, const T& value, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM) : _data(0), _capacity(size), _size(size), _memPolicy(memPolicy), _isOpen(0) { _data = MemoryControl::allocate(size, memPolicy); @@ -31,32 +76,32 @@ class qs_vector _data[ii] = value; } - qs_vector(const qs_vector& aa ) + qs_vector(const qs_vector &aa ) : _data(0), _capacity(aa._capacity), _size(aa._size), _memPolicy(aa._memPolicy), _isOpen(aa._isOpen) { _data = MemoryControl::allocate(_capacity, _memPolicy); - for (int ii=0; ii<_size; ++ii) + for (int ii = 0; ii < _size; ++ii) _data[ii] = aa._data[ii]; } - + ~qs_vector() - { + { MemoryControl::deallocate(_data, _size, _memPolicy); } /// Needed for copy-swap idiom - void swap(qs_vector& other) + void swap(qs_vector &other) { - std::swap(_data, other._data); + std::swap(_data, other._data); std::swap(_capacity, other._capacity); - std::swap(_size, other._size); + std::swap(_size, other._size); std::swap(_memPolicy, other._memPolicy); - std::swap(_isOpen, other._isOpen); + std::swap(_isOpen, other._isOpen); } /// Implement assignment using copy-swap idiom - qs_vector& operator=(const qs_vector& aa) + qs_vector &operator=(const qs_vector &aa) { if (&aa != this) { @@ -66,52 +111,57 @@ class qs_vector return *this; } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL int get_memPolicy() { - return _memPolicy; + return _memPolicy; } - void push_back( const T& dataElem ) + void push_back(const T &dataElem) { qs_assert( _isOpen ); _data[_size] = dataElem; _size++; } - void Open() { _isOpen = true; } + void Open() { _isOpen = true; } void Close(){ _isOpen = false; } - HOST_DEVICE_CUDA - const T& operator[]( int index ) const + HOST_DEVICE_SYCL + const T& operator[](int index) const { return _data[index]; } - HOST_DEVICE_CUDA - T& operator[]( int index ) + HOST_DEVICE_SYCL + T& operator[](int index) { return _data[index]; } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL int capacity() const { return _capacity; } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL int size() const { return _size; } + + void setsize(int size) + { + _size = size; + } T& back() { return _data[_size-1]; } - void reserve( int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) + void reserve(int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM) { qs_assert( _capacity == 0 ); _capacity = size; @@ -119,7 +169,7 @@ class qs_vector _data = MemoryControl::allocate(size, memPolicy); } - void resize( int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) + void resize(int size, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM) { qs_assert( _capacity == 0 ); _capacity = size; @@ -128,9 +178,9 @@ class qs_vector _data = MemoryControl::allocate(size, memPolicy); } - void resize( int size, const T& value, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM ) - { - qs_assert( _capacity == 0 ); + void resize(int size, const T &value, MemoryControl::AllocationPolicy memPolicy = MemoryControl::AllocationPolicy::HOST_MEM) + { + qs_assert(_capacity == 0); _capacity = size; _size = size; _memPolicy = memPolicy; @@ -142,57 +192,61 @@ class qs_vector bool empty() const { - return ( _size == 0 ); + return (_size == 0); } - void eraseEnd( int NewEnd ) + void eraseEnd(int NewEnd) { - _size = NewEnd; + _size = NewEnd; } void pop_back() { - _size--; + _size--; } void clear() { - _size = 0; + _size = 0; } - void appendList( int listSize, T* list ) + void appendList(int listSize, T *list ) { - qs_assert( this->_size + listSize < this->_capacity ); + qs_assert(this->_size + listSize < this->_capacity); - int size = _size; - this->_size += listSize; + int size = _size; + this->_size += listSize; - for( int i = size; i < _size; i++ ) - { - _data[i] = list[ i-size ]; - } + for( int i = size; i < _size; i++ ) + { + _data[i] = list[ i-size ]; + } + + } + const T *const outputPointer() + { + return _data; } //Atomically retrieve an availible index then increment that index some amount - HOST_DEVICE_CUDA - int atomic_Index_Inc( int inc ) + HOST_DEVICE_SYCL + int atomic_Index_Inc(int inc) { - int pos; + int pos; - QS::atomicCaptureAdd( _size, inc, pos ); + ATOMIC_CAPTURE(_size, inc, pos); - return pos; + return pos; } - private: - T* _data; + +private: + T *_data; int _capacity; int _size; bool _isOpen; MemoryControl::AllocationPolicy _memPolicy; - }; - #endif diff --git a/src/Random.cc b/src/Random.cc new file mode 100644 index 00000000..16f03689 --- /dev/null +++ b/src/Random.cc @@ -0,0 +1,7 @@ + +#include "Random.h" + +namespace ts +{ +thread_local std::mt19937_64 Random::_engine{std::random_device{}()}; +}; // namespace ts diff --git a/src/Random.h b/src/Random.h new file mode 100644 index 00000000..4abb568a --- /dev/null +++ b/src/Random.h @@ -0,0 +1,33 @@ +#include + +namespace ts +{ +class Random +{ +public: + Random() = delete; + + static uint64_t + random() + { + std::uniform_int_distribution dist{0, UINT64_MAX}; + return dist(_engine); + } + + static double + drandom() + { + std::uniform_real_distribution dist{0, 1}; + return dist(_engine); + } + + static void + seed(uint64_t s) + { + _engine.seed(s); + } + +private: + thread_local static std::mt19937_64 _engine; +}; +}; // namespace ts diff --git a/src/SendQueue.hh b/src/SendQueue.hh index 11d4ea5b..414eaedd 100644 --- a/src/SendQueue.hh +++ b/src/SendQueue.hh @@ -1,10 +1,55 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef SENDQUEUE_HH #define SENDQUEUE_HH #include "QS_Vector.hh" #include "DeclareMacro.hh" -//Tuple to record which particles need to be sent to which neighbor process during tracking +// Tuple to record which particles need to be sent to which neighbor process during tracking struct sendQueueTuple { int _neighbor; @@ -13,33 +58,86 @@ struct sendQueueTuple class SendQueue { - public: - +public: SendQueue(); - SendQueue( size_t size ); + SendQueue(size_t size); - //Get the total size of the send Queue + // Get the total size of the send Queue size_t size(); - void reserve( size_t size ){ _data.reserve(size, VAR_MEM); } + void reserve(size_t size) { _data.reserve(size, VAR_MEM); } - //get the number of items in send queue going to a specific neighbor - size_t neighbor_size( int neighbor_ ); + // get the number of items in send queue going to a specific neighbor + size_t neighbor_size(int neighbor_); - sendQueueTuple& getTuple( int index_ ); + sendQueueTuple &getTuple(int index_); - //Add items to the send queue in a kernel - HOST_DEVICE_CUDA - void push( int neighbor_, int vault_index_ ); + // Add items to the send queue in a kernel + HOST_DEVICE_SYCL + void push(int neighbor_, int vault_index_); - //Clear send queue before after use + // Clear send queue before after use void clear(); - private: - - //The send queue - stores particle index and neighbor index for any particles that hit (TRANSIT_OFF_PROCESSOR) +private: + // The send queue - stores particle index and neighbor index for any particles that hit (TRANSIT_OFF_PROCESSOR) qs_vector _data; - }; +inline SendQueue::SendQueue() +{ +} + +inline SendQueue::SendQueue(size_t size) + : _data(size, VAR_MEM) +{ +} + +// ----------------------------------------------------------------------- +inline size_t SendQueue:: + size() +{ + return _data.size(); +} + +// ----------------------------------------------------------------------- +inline size_t SendQueue:: + neighbor_size(int neighbor_) +{ + size_t sum_n = 0; + for (size_t i = 0; i < _data.size(); i++) + { + if (neighbor_ == _data[i]._neighbor) + sum_n++; + } + return sum_n; +} + +// ----------------------------------------------------------------------- +inline HOST_DEVICE void SendQueue:: + push(int neighbor_, int vault_index_) +{ + size_t indx = _data.atomic_Index_Inc(1); + + _data[indx]._neighbor = neighbor_; + _data[indx]._particleIndex = vault_index_; +} +HOST_DEVICE_END + +// ----------------------------------------------------------------------- +inline void SendQueue:: + clear() +{ + _data.clear(); +} + +// ----------------------------------------------------------------------- +inline sendQueueTuple &SendQueue:: + getTuple(int index_) +{ + qs_assert(index_ >= 0); + qs_assert(index_ < _data.size()); + return _data[index_]; +} + #endif diff --git a/src/Tallies.cc b/src/Tallies.cc index 9a2bb4bf..da812e9d 100644 --- a/src/Tallies.cc +++ b/src/Tallies.cc @@ -1,3 +1,34 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + #include "Tallies.hh" #include "utilsMpi.hh" #include "MC_Time_Info.hh" @@ -9,25 +40,25 @@ #include using std::vector; -void Tallies::CycleInitialize(MonteCarlo* monteCarlo) +void Tallies::CycleInitialize(MonteCarlo *monteCarlo) { } void Tallies::SumTasks(void) { - for (int replication_index = 1; replication_index < _num_balance_replications; replication_index++) - { - _balanceTask[0].Add(_balanceTask[replication_index]); // Add index 1 and greater to index 0 - _balanceTask[replication_index].Reset(); // reset index 1 and greater after addition - } + for (int replication_index = 1; replication_index < _num_balance_replications; replication_index++) + { + _balanceTask[0].Add(_balanceTask[replication_index]); // Add index 1 and greater to index 0 + _balanceTask[replication_index].Reset(); // reset index 1 and greater after addition + } } void Tallies::CycleFinalize(MonteCarlo *monteCarlo) { SumTasks(); // sum the task level data down to index 0 at the end of each cycle - + vector tal; - tal.reserve( 13 ); + tal.reserve(13); tal.push_back(_balanceTask[0]._absorb); tal.push_back(_balanceTask[0]._census); tal.push_back(_balanceTask[0]._escape); @@ -66,7 +97,7 @@ void Tallies::CycleFinalize(MonteCarlo *monteCarlo) uint64_t newStart = _balanceTask[0]._end; - for ( auto balanceIter = 0; balanceIter < _balanceTask.size(); balanceIter++) + for (auto balanceIter = 0; balanceIter < _balanceTask.size(); balanceIter++) { _balanceTask[balanceIter].Reset(); } @@ -74,22 +105,22 @@ void Tallies::CycleFinalize(MonteCarlo *monteCarlo) for (int domainIndex = 0; domainIndex < _scalarFluxDomain.size(); domainIndex++) { - //Sum Cell Tally Replications + // Sum Cell Tally Replications for (int replication_index = 1; replication_index < _num_flux_replications; replication_index++) { - _cellTallyDomain[domainIndex]._task[0].Add( _cellTallyDomain[domainIndex]._task[replication_index]); - _cellTallyDomain[domainIndex]._task[replication_index].Reset(); + _cellTallyDomain[domainIndex]._task[0].Add(_cellTallyDomain[domainIndex]._task[replication_index]); + _cellTallyDomain[domainIndex]._task[replication_index].Reset(); } - //Sum Scalar Flux Tally Replications + // Sum Scalar Flux Tally Replications for (int replication_index = 1; replication_index < _num_flux_replications; replication_index++) { _scalarFluxDomain[domainIndex]._task[0].Add(_scalarFluxDomain[domainIndex]._task[replication_index]); - _scalarFluxDomain[domainIndex]._task[replication_index].Reset(); + _scalarFluxDomain[domainIndex]._task[replication_index].Reset(); } - if( monteCarlo->_params.simulationParams.coralBenchmark ) - _fluence.compute( domainIndex, _scalarFluxDomain[domainIndex] ); + if (monteCarlo->_params.simulationParams.coralBenchmark) + _fluence.compute(domainIndex, _scalarFluxDomain[domainIndex]); _cellTallyDomain[domainIndex]._task[0].Reset(); _scalarFluxDomain[domainIndex]._task[0].Reset(); @@ -97,50 +128,48 @@ void Tallies::CycleFinalize(MonteCarlo *monteCarlo) _spectrum.UpdateSpectrum(monteCarlo); } -void Fluence::compute( int domainIndex, ScalarFluxDomain &scalarFluxDomain ) +void Fluence::compute(int domainIndex, ScalarFluxDomain &scalarFluxDomain) { int numCells = scalarFluxDomain._task[0]._cell.size(); - while( this->_domain.size() <= domainIndex ) + while (this->_domain.size() <= domainIndex) { - FluenceDomain *newDomain = new FluenceDomain( numCells ); - this->_domain.push_back( newDomain ); + FluenceDomain *newDomain = new FluenceDomain(numCells); + this->_domain.push_back(newDomain); } - FluenceDomain* fluenceDomain = this->_domain[domainIndex]; + FluenceDomain *fluenceDomain = this->_domain[domainIndex]; - for( int cellIndex = 0; cellIndex < numCells; cellIndex++ ) + for (int cellIndex = 0; cellIndex < numCells; cellIndex++) { int numGroups = scalarFluxDomain._task[0]._cell[cellIndex].size(); - for( int groupIndex = 0; groupIndex < numGroups; groupIndex++ ) + for (int groupIndex = 0; groupIndex < numGroups; groupIndex++) { - fluenceDomain->addCell( cellIndex, scalarFluxDomain._task[0]._cell[cellIndex]._group[groupIndex]); + fluenceDomain->addCell(cellIndex, scalarFluxDomain._task[0]._cell[cellIndex]._group[groupIndex]); } } - } void Tallies::PrintSummary(MonteCarlo *monteCarlo) { - MC_FASTTIMER_STOP(MC_Fast_Timer::cycleFinalize); // stop the finalize timer to get report - - if ( monteCarlo->time_info->cycle == 0 ) - { - Print0("%-8s ", "cycle"); - _balanceTask[0].PrintHeader(); - Print0("%14s %14s %14s %14s\n", "scalar_flux", "cycleInit", "cycleTracking", "cycleFinalize"); - } - - Print0("%8i ", monteCarlo->time_info->cycle); - _balanceTask[0].Print(); - double sum = ScalarFluxSum(monteCarlo); - Print0("%14e %14e %14e %14e\n", sum, - MC_FASTTIMER_GET_LASTCYCLE(MC_Fast_Timer::cycleInit), - MC_FASTTIMER_GET_LASTCYCLE(MC_Fast_Timer::cycleTracking), - MC_FASTTIMER_GET_LASTCYCLE(MC_Fast_Timer::cycleFinalize) - ); - - MC_FASTTIMER_START(MC_Fast_Timer::cycleFinalize); // restart the finalize timer + MC_FASTTIMER_STOP(MC_Fast_Timer::cycleFinalize); // stop the finalize timer to get report + + if (monteCarlo->time_info->cycle == 0) + { + Print0("%8s ", "cycle"); + _balanceTask[0].PrintHeader(); + Print0("%14s %14s %14s %14s\n", "scalar_flux", "cycleInit", "cycleTracking", "cycleFinalize"); + } + + Print0("%8i ", monteCarlo->time_info->cycle); + _balanceTask[0].Print(); + double sum = ScalarFluxSum(monteCarlo); + Print0("%14e %14e %14e %14e\n", sum, + MC_FASTTIMER_GET_LASTCYCLE(MC_Fast_Timer::cycleInit), + MC_FASTTIMER_GET_LASTCYCLE(MC_Fast_Timer::cycleTracking), + MC_FASTTIMER_GET_LASTCYCLE(MC_Fast_Timer::cycleFinalize)); + + MC_FASTTIMER_START(MC_Fast_Timer::cycleFinalize); // restart the finalize timer } double Tallies::ScalarFluxSum(MonteCarlo *monteCarlo) @@ -170,59 +199,57 @@ double Tallies::ScalarFluxSum(MonteCarlo *monteCarlo) return sum; } -void Tallies::InitializeTallies( MonteCarlo *monteCarlo, - int balance_replications = 1, - int flux_replications = 1, - int cell_replications = 1 - ) +void Tallies::InitializeTallies(MonteCarlo *monteCarlo, + int balance_replications = 1, + int flux_replications = 1, + int cell_replications = 1) { - //Set num replications from input parameters - _num_balance_replications = balance_replications; - _num_flux_replications = flux_replications; + // Set num replications from input parameters + _num_balance_replications = balance_replications; + _num_flux_replications = flux_replications; _num_cellTally_replications = cell_replications; - - //Initialize the balance tally replications - if( _balanceTask.size() == 0 ) + // Initialize the balance tally replications + if (_balanceTask.size() == 0) { - if( _balanceTask.capacity() == 0 ) - { - //Reserve replicas number of balance tallies - _balanceTask.reserve(_num_balance_replications,VAR_MEM); + if (_balanceTask.capacity() == 0) + { + // Reserve replicas number of balance tallies + _balanceTask.reserve(_num_balance_replications, VAR_MEM); } - //Open the qs vectors to allow push back + // Open the qs vectors to allow push back _balanceTask.Open(); - for( int reps = 0; reps < _num_balance_replications; reps++ ) + for (int reps = 0; reps < _num_balance_replications; reps++) { - //Push back a Constructed object onto the qs vector - _balanceTask.push_back( Balance() ); + // Push back a Constructed object onto the qs vector + _balanceTask.push_back(Balance()); } - //Close the qs vectors diss-allowing push back + // Close the qs vectors diss-allowing push back _balanceTask.Close(); } - //Initialize the cellTally - if( _cellTallyDomain.size() == 0 ) + // Initialize the cellTally + if (_cellTallyDomain.size() == 0) { - if( _cellTallyDomain.capacity() == 0 ) - { + if (_cellTallyDomain.capacity() == 0) + { _cellTallyDomain.reserve(monteCarlo->domain.size(), VAR_MEM); - } + } _cellTallyDomain.Open(); for (int domainIndex = 0; domainIndex < monteCarlo->domain.size(); domainIndex++) - { + { _cellTallyDomain.push_back(CellTallyDomain(&monteCarlo->domain[domainIndex], _num_cellTally_replications)); - } + } _cellTallyDomain.Close(); } - //Initialize the scalarFluxTally - if( _scalarFluxDomain.size() == 0 ) + // Initialize the scalarFluxTally + if (_scalarFluxDomain.size() == 0) { - if( _scalarFluxDomain.capacity() == 0 ) + if (_scalarFluxDomain.capacity() == 0) { _scalarFluxDomain.reserve(monteCarlo->domain.size(), VAR_MEM); } @@ -230,7 +257,7 @@ void Tallies::InitializeTallies( MonteCarlo *monteCarlo, for (int domainIndex = 0; domainIndex < monteCarlo->domain.size(); domainIndex++) { _scalarFluxDomain.push_back(ScalarFluxDomain(&monteCarlo->domain[domainIndex], - monteCarlo->_nuclearData->_energies.size()-1, + monteCarlo->_nuclearData->_energies.size() - 1, _num_flux_replications)); } _scalarFluxDomain.Close(); diff --git a/src/Tallies.hh b/src/Tallies.hh index 34d39330..41b007fd 100644 --- a/src/Tallies.hh +++ b/src/Tallies.hh @@ -1,3 +1,48 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + #ifndef TALLIES_HH #define TALLIES_HH @@ -15,126 +60,124 @@ #include "DeclareMacro.hh" #include "EnergySpectrum.hh" -//typedef unsigned long long int uint64_cu; +typedef unsigned long long int uint64_cu; class Fluence; struct MC_Tally_Event { - enum Enum - { - Collision, - Facet_Crossing_Transit_Exit, - Census, - Facet_Crossing_Tracking_Error, - Facet_Crossing_Escape, - Facet_Crossing_Reflection, - Facet_Crossing_Communication - }; + enum Enum + { + Collision, + Facet_Crossing_Transit_Exit, + Census, + Facet_Crossing_Tracking_Error, + Facet_Crossing_Escape, + Facet_Crossing_Reflection, + Facet_Crossing_Communication + }; }; class Balance { - public: - - uint64_t _absorb; // Number of particles absorbed - uint64_t _census; // Number of particles that enter census - uint64_t _escape; // Number of particles that escape - uint64_t _collision; // Number of collosions - uint64_t _end; // Number of particles at end of cycle - uint64_t _fission; // Number of fission events - uint64_t _produce; // Number of particles created by collisions - uint64_t _scatter; // Number of scatters - uint64_t _start; // Number of particles at beginning of cycle - uint64_t _source; // Number of particles sourced in - uint64_t _rr; // Number of particles Russian Rouletted in population control - uint64_t _split; // Number of particles split in population control - uint64_t _numSegments; // Number of segements - - Balance() : - _absorb(0), _census(0), _escape(0), _collision(0), _end(0), _fission(0), _produce(0), _scatter(0), _start(0), - _source(0), _rr(0), _split(0), _numSegments(0) { } - - ~Balance() {} +public: + uint64_cu _absorb; // Number of particles absorbed + uint64_cu _census; // Number of particles that enter census + uint64_cu _escape; // Number of particles that escape + uint64_cu _collision; // Number of collosions + uint64_cu _end; // Number of particles at end of cycle + uint64_cu _fission; // Number of fission events + uint64_cu _produce; // Number of particles created by collisions + uint64_cu _scatter; // Number of scatters + uint64_cu _start; // Number of particles at beginning of cycle + uint64_cu _source; // Number of particles sourced in + uint64_cu _rr; // Number of particles Russian Rouletted in population control + uint64_cu _split; // Number of particles split in population control + uint64_cu _numSegments; // Number of segements + + Balance() : _absorb(0), _census(0), _escape(0), _collision(0), _end(0), _fission(0), _produce(0), _scatter(0), _start(0), + _source(0), _rr(0), _split(0), _numSegments(0) {} + + ~Balance() {} void PrintHeader() { - Print0("%12s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s %12s", - "start", "source", "rr", "split", - "absorb", "scatter", "fission", "produce", - "collisn", "escape", "census", + Print0("%6s %7s %6s %6s %10s %10s %7s %10s %10s %8s %10s %10s", + "start", "source", "rr", "split", + "absorb", "scatter", "fission", "produce", + "collisn", "escape", "census", "num_seg"); } void Print() { - Print0("%12" PRIu64 " %12" PRIu64 " %12" PRIu64 " %12" PRIu64 " %12" PRIu64 " %12" PRIu64 " %12" PRIu64 " %12" PRIu64 " %12" PRIu64 " %12" PRIu64 " %12" PRIu64 " %12" PRIu64 "", - _start, _source,_rr, _split, - _absorb, _scatter, _fission, _produce, - _collision, _escape, _census, + Print0("%6" PRIu64 " %7" PRIu64 " %6" PRIu64 " %6" PRIu64 " %10" PRIu64 " %10" PRIu64 " %7" PRIu64 " %10" PRIu64 " %10" PRIu64 " %8" PRIu64 " %10" PRIu64 " %10" PRIu64 "", + _start, _source, _rr, _split, + _absorb, _scatter, _fission, _produce, + _collision, _escape, _census, _numSegments); } - void Reset() - { - _absorb = _census = _escape = _collision = _end = _fission = _produce = _scatter = _start = _source = - _rr = _split = _numSegments = 0; - } - - void Add(Balance &bal) - { - _absorb += bal._absorb; - _census += bal._census; - _escape += bal._escape; - _collision += bal._collision; - _end += bal._end; - _fission += bal._fission; - _produce += bal._produce; - _scatter += bal._scatter; - _start += bal._start; - _source += bal._source; - _rr += bal._rr; - _split += bal._split; - _numSegments += bal._numSegments; - } + void Reset() + { + _absorb = _census = _escape = _collision = _end = _fission = _produce = _scatter = _start = _source = + _rr = _split = _numSegments = 0; + } + + HOST_DEVICE + void Add(Balance &bal) + { + _absorb += bal._absorb; + _census += bal._census; + _escape += bal._escape; + _collision += bal._collision; + _end += bal._end; + _fission += bal._fission; + _produce += bal._produce; + _scatter += bal._scatter; + _start += bal._start; + _source += bal._source; + _rr += bal._rr; + _split += bal._split; + _numSegments += bal._numSegments; + } }; class ScalarFluxCell { - public: - double* _group; - int _size; - - ScalarFluxCell() : _group(0), _size(0) {} - - ScalarFluxCell(double* storage, int size) - : _group(storage), - _size(size) - { - for( int i = 0; i < _size; i++ ) - { - _group[i] = 0.0; - } - } - - ~ScalarFluxCell() {} - - int size() const {return _size;} -}; +public: + double *_group; + int _size; + ScalarFluxCell() : _group(0), _size(0) {} + + ScalarFluxCell(double *storage, int size) + : _group(storage), + _size(size) + { + for (int i = 0; i < _size; i++) + { + _group[i] = 0.0; + } + } + + ~ScalarFluxCell() {} + + int size() const { return _size; } +}; class CellTallyTask { - public: +public: qs_vector _cell; CellTallyTask() : _cell() {} - CellTallyTask(MC_Domain* domain) + CellTallyTask(MC_Domain *domain) { - if( _cell.capacity() == 0 ) + if (_cell.capacity() == 0) { - _cell.reserve( domain->cell_state.size(), VAR_MEM ); + _cell.reserve(domain->cell_state.size(), VAR_MEM); } _cell.Open(); @@ -153,12 +196,12 @@ class CellTallyTask } } - void Reset() + void Reset() { for (int cellIndex = 0; cellIndex < _cell.size(); cellIndex++) { _cell[cellIndex] = 0.0; - } + } } ~CellTallyTask() {} @@ -166,169 +209,169 @@ class CellTallyTask class ScalarFluxTask { - public: - qs_vector _cell; - BulkStorage _scalarFluxCellStorage; - - ScalarFluxTask() : _cell() {} - - ScalarFluxTask(MC_Domain* domain, int numGroups) - { - if( _cell.capacity() == 0 ) - { - _cell.reserve( domain->cell_state.size(), VAR_MEM ); - _scalarFluxCellStorage.setCapacity(domain->cell_state.size()*numGroups, VAR_MEM); - } - - _cell.Open(); - for (int cellIndex = 0; cellIndex < domain->cell_state.size(); cellIndex++) - { - double* tmp = _scalarFluxCellStorage.getBlock(numGroups); - _cell.push_back(ScalarFluxCell(tmp, numGroups)); - } - _cell.Close(); - } - - void Add(ScalarFluxTask &scalarFluxTask) - { - unsigned int numGroups = _cell[0].size(); - for (int cellIndex = 0; cellIndex < _cell.size(); cellIndex++) - { - for (int groupIndex = 0; groupIndex < numGroups; groupIndex++) - { - _cell[cellIndex]._group[groupIndex] += scalarFluxTask._cell[cellIndex]._group[groupIndex]; - } - } - } - - void Reset() - { - unsigned int numGroups = _cell[0].size(); - for (int cellIndex = 0; cellIndex < _cell.size(); cellIndex++) - { - for (int groupIndex = 0; groupIndex < numGroups; groupIndex++) - { - _cell[cellIndex]._group[groupIndex] = 0.0; - } - } - } - - ~ScalarFluxTask() {} +public: + qs_vector _cell; + BulkStorage _scalarFluxCellStorage; + + ScalarFluxTask() : _cell() {} + + ScalarFluxTask(MC_Domain *domain, int numGroups) + { + if (_cell.capacity() == 0) + { + _cell.reserve(domain->cell_state.size(), VAR_MEM); + _scalarFluxCellStorage.setCapacity(domain->cell_state.size() * numGroups, VAR_MEM); + } + + _cell.Open(); + for (int cellIndex = 0; cellIndex < domain->cell_state.size(); cellIndex++) + { + double *tmp = _scalarFluxCellStorage.getBlock(numGroups); + _cell.push_back(ScalarFluxCell(tmp, numGroups)); + } + _cell.Close(); + } + + void Add(ScalarFluxTask &scalarFluxTask) + { + unsigned int numGroups = _cell[0].size(); + for (int cellIndex = 0; cellIndex < _cell.size(); cellIndex++) + { + for (int groupIndex = 0; groupIndex < numGroups; groupIndex++) + { + _cell[cellIndex]._group[groupIndex] += scalarFluxTask._cell[cellIndex]._group[groupIndex]; + } + } + } + + void Reset() + { + unsigned int numGroups = _cell[0].size(); + for (int cellIndex = 0; cellIndex < _cell.size(); cellIndex++) + { + for (int groupIndex = 0; groupIndex < numGroups; groupIndex++) + { + _cell[cellIndex]._group[groupIndex] = 0.0; + } + } + } + + ~ScalarFluxTask() {} }; class CellTallyDomain { - public: - qs_vector _task; - - CellTallyDomain() : _task() {} - - CellTallyDomain(MC_Domain* domain, int cellTally_replications) - { - // Assume OMP_NUM_THREADS tasks - if( _task.capacity() == 0 ) - { - _task.reserve(cellTally_replications, VAR_MEM); - } - _task.Open(); - for (int task_index = 0; task_index < cellTally_replications; task_index++) - { - _task.push_back(CellTallyTask(domain)); - } - _task.Close(); - } - - ~CellTallyDomain() {} +public: + qs_vector _task; + + CellTallyDomain() : _task() {} + + CellTallyDomain(MC_Domain *domain, int cellTally_replications) + { + // Assume OMP_NUM_THREADS tasks + if (_task.capacity() == 0) + { + _task.reserve(cellTally_replications, VAR_MEM); + } + _task.Open(); + for (int task_index = 0; task_index < cellTally_replications; task_index++) + { + _task.push_back(CellTallyTask(domain)); + } + _task.Close(); + } + + ~CellTallyDomain() {} }; class ScalarFluxDomain { - public: - qs_vector _task; - - ScalarFluxDomain() : _task() {} - - ScalarFluxDomain(MC_Domain* domain, int numGroups, int flux_replications) - { - // Assume OMP_NUM_THREADS tasks - if( _task.capacity() == 0 ) - { - _task.reserve(flux_replications, VAR_MEM); - } - _task.Open(); - for (int task_index = 0; task_index < flux_replications; task_index++) - { - _task.push_back(ScalarFluxTask(domain, numGroups)); - } - _task.Close(); - } - - ~ScalarFluxDomain() {} +public: + qs_vector _task; + + ScalarFluxDomain() : _task() {} + + ScalarFluxDomain(MC_Domain *domain, int numGroups, int flux_replications) + { + // Assume OMP_NUM_THREADS tasks + if (_task.capacity() == 0) + { + _task.reserve(flux_replications, VAR_MEM); + } + _task.Open(); + for (int task_index = 0; task_index < flux_replications; task_index++) + { + _task.push_back(ScalarFluxTask(domain, numGroups)); + } + _task.Close(); + } + + ~ScalarFluxDomain() {} }; class FluenceDomain { - public: - FluenceDomain( int numCells) : _cell(numCells, 0.0) - {} +public: + FluenceDomain(int numCells) : _cell(numCells, 0.0) + { + } - void addCell( int index, double value ){ _cell[index] += value;} - double getCell( int index ){ return _cell[index]; } - int size(){ return _cell.size(); } - - private: + void addCell(int index, double value) { _cell[index] += value; } + double getCell(int index) { return _cell[index]; } + int size() { return _cell.size(); } + +private: std::vector _cell; }; class Fluence { - public: - Fluence() {}; +public: + Fluence(){}; ~Fluence() { - for( int i = 0; i < _domain.size(); i++ ) + for (int i = 0; i < _domain.size(); i++) { - if( _domain[i] != NULL ) + if (_domain[i] != NULL) delete _domain[i]; } } void compute(int domain, ScalarFluxDomain &scalarFluxDomain); - std::vector _domain; + std::vector _domain; }; class Tallies { - public: - Balance _balanceCumulative; - qs_vector _balanceTask; +public: + Balance _balanceCumulative; + qs_vector _balanceTask; qs_vector _scalarFluxDomain; qs_vector _cellTallyDomain; Fluence _fluence; EnergySpectrum _spectrum; - Tallies( int balRep, int fluxRep, int cellRep, std::string spectrumName, int spectrumSize ) - : _balanceCumulative(), _balanceTask(), + Tallies( int balRep, int fluxRep, int cellRep, std::string spectrumName, int spectrumSize ) : _balanceCumulative(), _balanceTask(), _scalarFluxDomain(), _num_balance_replications(balRep), _num_flux_replications(fluxRep), _num_cellTally_replications(cellRep), - _spectrum(spectrumName, spectrumSize) + _spectrum(std::move(spectrumName), spectrumSize) { } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL int GetNumBalanceReplications() { return _num_balance_replications; } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL int GetNumFluxReplications() { return _num_flux_replications; } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL int GetNumCellTallyReplications() { return _num_cellTally_replications; @@ -336,36 +379,36 @@ class Tallies ~Tallies() {} - void InitializeTallies( MonteCarlo *monteCarlo, - int balance_replications, - int flux_replications, - int cell_replications); + void InitializeTallies(MonteCarlo *monteCarlo, + int balance_replications, + int flux_replications, + int cell_replications); - void CycleInitialize(MonteCarlo* monteCarlo); + void CycleInitialize(MonteCarlo *monteCarlo); void SumTasks(); void CycleFinalize(MonteCarlo *mcco); void PrintSummary(MonteCarlo *mcco); - HOST_DEVICE_CUDA + // These atomic operations seem to be working. + HOST_DEVICE_SYCL void TallyScalarFlux(double value, int domain, int task, int cell, int group) { - QS::atomicAdd( _scalarFluxDomain[domain]._task[task]._cell[cell]._group[group], value ); + ATOMIC_ADD( _scalarFluxDomain[domain]._task[task]._cell[cell]._group[group], value ); } - HOST_DEVICE_CUDA + HOST_DEVICE_SYCL void TallyCellValue(double value, int domain, int task, int cell) { - QS::atomicAdd( _cellTallyDomain[domain]._task[task]._cell[cell], value ); + ATOMIC_ADD(_cellTallyDomain[domain]._task[task]._cell[cell], value); } double ScalarFluxSum(MonteCarlo *mcco); - private: +private: int _num_balance_replications; int _num_flux_replications; int _num_cellTally_replications; - }; #endif diff --git a/src/cudaFunctions.cc.dp.cpp b/src/cudaFunctions.cc.dp.cpp new file mode 100644 index 00000000..93e4f6f0 --- /dev/null +++ b/src/cudaFunctions.cc.dp.cpp @@ -0,0 +1,124 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include "cudaFunctions.hh" +#include "cudaUtils.hh" +#include + +namespace testname +{ +#if HAVE_SYCL +#include "cudaFunctions.hh" + void WarmUpKernel(sycl::nd_item<3> item_ct1) + { + int global_index = getGlobalThreadID(item_ct1); + if (global_index == 0) + { + } + } +#endif +} + +#if defined(HAVE_SYCL) +void warmup_kernel() +{ + using namespace testname; + sycl_device_queue.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) + { + testname::WarmUpKernel(item_ct1); + }); + sycl_device_queue.wait(); +} +#endif + +#if defined(HAVE_SYCL) +int ThreadBlockLayout(sycl::range<3> &grid, sycl::range<3> &block, + int num_particles) +{ + int run_kernel = 1; + const uint64_t max_block_size = 2147483647; + // const uint64_t threads_per_block = 128; + const uint64_t threads_per_block = 256; + + block[2] = threads_per_block; + block[1] = 1; + block[0] = 1; + + uint64_t num_blocks = num_particles / threads_per_block + ((num_particles % threads_per_block == 0) ? 0 : 1); + + if (num_blocks == 0) + { + run_kernel = 0; + } + else if (num_blocks <= max_block_size) + { + grid[2] = num_blocks; + grid[1] = 1; + grid[0] = 1; + } + else if (num_blocks <= max_block_size * max_block_size) + { + grid[2] = max_block_size; + grid[1] = 1 + (num_blocks / max_block_size); + grid[0] = 1; + } + else if (num_blocks <= max_block_size * max_block_size * max_block_size) + { + grid[2] = max_block_size; + grid[1] = max_block_size; + grid[0] = 1 + (num_blocks / (max_block_size * max_block_size)); + } + else + { + printf("Error: num_blocks exceeds maximum block specifications. Cannot handle this case yet\n"); + run_kernel = 0; + } + + return run_kernel; +} +#endif diff --git a/src/cudaFunctions.hh b/src/cudaFunctions.hh index f8a8bce8..e1463e9f 100644 --- a/src/cudaFunctions.hh +++ b/src/cudaFunctions.hh @@ -1,14 +1,89 @@ -#ifndef CUDAFUNCTIONS_HH -#define CUDAFUNCTIONS_HH +/* +Modifications Copyright (C) 2023 Intel Corporation +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef HIPFUNCTIONS_HH +#define HIPFUNCTIONS_HH + +#include #include "cudaUtils.hh" #include "DeclareMacro.hh" -#if defined GPU_NATIVE +#if defined(HAVE_SYCL) void warmup_kernel(); -int ThreadBlockLayout( dim3 &grid, dim3 &block, int num_particles ); -DEVICE -int getGlobalThreadID(); +int ThreadBlockLayout(sycl::range<3> &grid, sycl::range<3> &block, + int num_particles); +DEVICE +#endif + +#if defined(HAVE_SYCL) +inline DEVICE int getGlobalThreadID(sycl::nd_item<3> item_ct1) +{ + int blockID = item_ct1.get_group(2) + + item_ct1.get_group(1) * item_ct1.get_group_range(2) + + item_ct1.get_group(0) * item_ct1.get_group_range(2) * + item_ct1.get_group_range(1); + + int threadID = + blockID * (item_ct1.get_local_range(2) * item_ct1.get_local_range(1) * + item_ct1.get_local_range(0)) + + item_ct1.get_local_id(0) * + (item_ct1.get_local_range(2) * item_ct1.get_local_range(1)) + + item_ct1.get_local_id(1) * item_ct1.get_local_range(2) + + item_ct1.get_local_id(2); + return threadID; +} + +inline DEVICE int getLocalThreadID(sycl::nd_item<3> item_ct1) +{ + + int threadID = item_ct1.get_local_id(0) * (item_ct1.get_local_range(2) * + item_ct1.get_local_range(1)) + + item_ct1.get_local_id(1) * item_ct1.get_local_range(2) + + item_ct1.get_local_id(2); + return threadID; +} #endif #endif diff --git a/src/cudaUtils.hh b/src/cudaUtils.hh index 7399ed9d..6bc51485 100644 --- a/src/cudaUtils.hh +++ b/src/cudaUtils.hh @@ -1,26 +1,111 @@ -#ifndef CUDAUTILS_HH -#define CUDAUTILS_HH +/* +Modifications Copyright (C) 2023 Intel Corporation -#if defined(HAVE_CUDA) -#include -#include -#include +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef HIPUTILS_HH +#define HIPUTILS_HH + +#if defined(HAVE_SYCL) || defined(HAVE_OPENMP_TARGET) +#include +extern sycl::queue sycl_device_queue; // global variable for device queue +#endif + +#ifdef HAVE_OPENMP_TARGET +#ifdef USE_OPENMP_NO_GPU +#define VAR_MEM MemoryControl::AllocationPolicy::HOST_MEM +#else +#define VAR_MEM MemoryControl::AllocationPolicy::UVM_MEM +#define HAVE_UVM +#endif +#elif HAVE_SYCL +#define VAR_MEM MemoryControl::AllocationPolicy::UVM_MEM +#define HAVE_UVM +#else +#define VAR_MEM MemoryControl::AllocationPolicy::HOST_MEM #endif -enum ExecutionPolicy{ cpu, gpuNative, gpuWithOpenMP }; +enum ExecutionPolicy +{ + cpu, + gpuWithCUDA, + gpuWithOpenMP +}; -inline ExecutionPolicy getExecutionPolicy( int useGPU ) +inline ExecutionPolicy getExecutionPolicy(int useGPU) { ExecutionPolicy execPolicy = ExecutionPolicy::cpu; - if( useGPU ) + if (useGPU) { - #if defined HAVE_CUDA || defined HAVE_HIP - execPolicy = ExecutionPolicy::gpuNative; - #elif defined (HAVE_OPENMP_TARGET) +#if defined(HAVE_SYCL) + execPolicy = ExecutionPolicy::gpuWithCUDA; +#elif defined(HAVE_OPENMP_TARGET) execPolicy = ExecutionPolicy::gpuWithOpenMP; - #endif +#endif } return execPolicy; } + +template +inline void gpuMallocManaged(T **ptr, size_t size, unsigned int flags = 1 /*cudaMemAttachGlobal*/) +{ +#if defined(HAVE_SYCL) +#ifdef UNIFIED_HOST + *ptr = (T *)sycl::malloc_host(size, sycl_device_queue); +#elif defined(UNIFIED_DEVICE) + *ptr = (T *)sycl::malloc_device(size, sycl_device_queue); +#else + *ptr = (T *)sycl::malloc_shared(size, sycl_device_queue); +#endif +#endif +} + +template +inline void gpuFree(T *ptr) +{ +#if defined(HAVE_SYCL) + sycl::free(ptr, sycl_device_queue); +#endif +} #endif diff --git a/src/initMC.cc.dp.cpp b/src/initMC.cc.dp.cpp new file mode 100644 index 00000000..3ab83bf5 --- /dev/null +++ b/src/initMC.cc.dp.cpp @@ -0,0 +1,556 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include "initMC.hh" +#include +#include +#include +#include +#include +#include "QS_Vector.hh" +#include "utilsMpi.hh" +#include "MonteCarlo.hh" +#include "MC_Processor_Info.hh" +#include "DecompositionObject.hh" +#include "GlobalFccGrid.hh" +#include "MeshPartition.hh" +#include "CommObject.hh" +#include "SharedMemoryCommObject.hh" +#include "MpiCommObject.hh" +#include "MC_Vector.hh" +#include "NuclearData.hh" +#include "MaterialDatabase.hh" +#include "MC_Time_Info.hh" +#include "Tallies.hh" +#include "MC_Base_Particle.hh" +#include "cudaUtils.hh" +#include "cudaFunctions.hh" +#include "Random.h" + +using std::cout; +using std::endl; +using std::make_pair; +using std::map; +using std::set; +using std::string; +using std::vector; + +namespace +{ + void initGPUInfo(MonteCarlo *monteCarlo); + void initNuclearData(MonteCarlo *monteCarlo, const Parameters ¶ms); + void initMesh(MonteCarlo *monteCarlo, const Parameters ¶ms); + void initTallies(MonteCarlo *monteCarlo, const Parameters ¶ms); + void initTimeInfo(MonteCarlo *monteCarlo, const Parameters ¶ms); + void initializeCentersRandomly(int nCenters, + const GlobalFccGrid &grid, + vector ¢ers); + void initializeCentersGrid(double lx, double ly, double lz, + int xDom, int yDom, int zDom, + vector ¢ers); + void consistencyCheck(int myRank, const qs_vector &domain); + void checkCrossSections(MonteCarlo *monteCarlo, const Parameters ¶ms); + +} + +MonteCarlo *initMC(const Parameters ¶ms) +{ + MonteCarlo *monteCarlo; +#ifdef HAVE_UVM + void *ptr; + // in my experiments you need the cudaMemAttachGlobal flag set to make pcie atomics work. + gpuMallocManaged(&ptr, sizeof(MonteCarlo), 1 /*cudaMemAttachGlobal*/); + monteCarlo = new (ptr) MonteCarlo(params); +#else + monteCarlo = new MonteCarlo(params); +#endif + initGPUInfo(monteCarlo); + initTimeInfo(monteCarlo, params); + initNuclearData(monteCarlo, params); + initMesh(monteCarlo, params); + initTallies(monteCarlo, params); + + MC_Base_Particle::Update_Counts(); + + // used when debugging cross sections + checkCrossSections(monteCarlo, params); + + void *ptr_dm, *ptr_dn, *ptr_dmesh; + ptr_dm = (void *)sycl::malloc_device( + monteCarlo->_materialDatabase->_mat.size() * sizeof(Material_d), + sycl_device_queue); + // monteCarlo->_material_d = new(ptr_d) Material_d; + monteCarlo->_material_d = (Material_d *)ptr_dm; + ptr_dn = (void *)sycl::malloc_device(sizeof(NuclearData_d), + sycl_device_queue); + monteCarlo->_nuclearData_d = (NuclearData_d *)ptr_dn; + + ptr_dmesh = (void *)sycl::malloc_device(monteCarlo->domain.size() * + sizeof(MC_Domain_d), + sycl_device_queue); + monteCarlo->domain_d = (MC_Domain_d *)ptr_dmesh; + + return monteCarlo; +} + +namespace +{ + // Init GPU usage information + void initGPUInfo(MonteCarlo *monteCarlo) + { + +#if defined(HAVE_OPENMP_TARGET) + int Ngpus = omp_get_num_devices(); +#elif defined(HAVE_SYCL) + int Ngpus = 1; +#else + int Ngpus = 0; +#endif + + if (Ngpus != 0) + { +#if defined(HAVE_OPENMP_TARGET) || defined(HAVE_SYCL) + monteCarlo->processor_info->use_gpu = 1; + int GPUID = monteCarlo->processor_info->rank % Ngpus; + monteCarlo->processor_info->gpu_id = GPUID; + +#if defined(HAVE_OPENMP_TARGET) + omp_set_default_device(GPUID); +#endif +#endif + } +#ifdef USE_OPENMP_NO_GPU + monteCarlo->processor_info->use_gpu = 0; + monteCarlo->processor_info->gpu_id = -1; +#endif + +#ifdef HAVE_SYCL + if (monteCarlo->processor_info->use_gpu) + warmup_kernel(); +#endif + + // printf("monteCarlo->processor_info->use_gpu = %d\n", monteCarlo->processor_info->use_gpu); + } +} + +/// Initializes both the NuclearData and the MaterialDatabase. These +/// two structures are inherently linked since the isotopeGids stored in +/// the MaterialDatabase must correspond to the isotope indices in the +/// NuclearData. +namespace +{ + void initNuclearData(MonteCarlo *monteCarlo, const Parameters ¶ms) + { +#if defined HAVE_UVM + void *ptr1, *ptr2; + ptr1 = calloc(1, sizeof(NuclearData)); + ptr2 = calloc(1, sizeof(MaterialDatabase)); + + monteCarlo->_nuclearData = new (ptr1) NuclearData(params.simulationParams.nGroups, + params.simulationParams.eMin, + params.simulationParams.eMax); + monteCarlo->_materialDatabase = new (ptr2) MaterialDatabase(); +#else + monteCarlo->_nuclearData = new NuclearData(params.simulationParams.nGroups, + params.simulationParams.eMin, + params.simulationParams.eMax); + monteCarlo->_materialDatabase = new MaterialDatabase(); +#endif + + map crossSection; + for (auto crossSectionIter = params.crossSectionParams.begin(); + crossSectionIter != params.crossSectionParams.end(); + crossSectionIter++) + { + const CrossSectionParameters &cp = crossSectionIter->second; + crossSection.insert(make_pair(cp.name, Polynomial(cp.aa, cp.bb, cp.cc, cp.dd, cp.ee))); + } + + int num_isotopes = 0; + int num_materials = 0; + + for (auto matIter = params.materialParams.begin(); matIter != params.materialParams.end(); matIter++) + { + const MaterialParameters &mp = matIter->second; + num_isotopes += mp.nIsotopes; + num_materials++; + } + + monteCarlo->_nuclearData->_isotopes.reserve(num_isotopes, VAR_MEM); + monteCarlo->_materialDatabase->_mat.reserve(num_materials, VAR_MEM); + + for (auto matIter = params.materialParams.begin(); + matIter != params.materialParams.end(); matIter++) + { + const MaterialParameters &mp = matIter->second; + Material material(mp.name, mp.mass); + double nuBar = params.crossSectionParams.at(mp.fissionCrossSection).nuBar; + material._iso.reserve(mp.nIsotopes, VAR_MEM); + + for (int iIso = 0; iIso < mp.nIsotopes; ++iIso) + { + int isotopeGid = monteCarlo->_nuclearData->addIsotope( + mp.nReactions, + crossSection.at(mp.fissionCrossSection), + crossSection.at(mp.scatteringCrossSection), + crossSection.at(mp.absorptionCrossSection), + nuBar, + mp.totalCrossSection, + mp.fissionCrossSectionRatio, + mp.scatteringCrossSectionRatio, + mp.absorptionCrossSectionRatio); + + // atomFraction for each isotope is 1/nIsotopes. Treats all + // isotopes as equally prevalent. + material.addIsotope(Isotope(isotopeGid, 1.0 / mp.nIsotopes)); + } + monteCarlo->_materialDatabase->addMaterial(material); + } + } +} + +namespace +{ + void consistencyCheck(int myRank, const qs_vector &domain) + { + if (myRank == 0) + { + cout << "Starting Consistency Check" << endl; + } + unsigned nDomains = domain.size(); + for (int iDomain = 0; iDomain < nDomains; ++iDomain) + { + const MC_Mesh_Domain &mesh = domain[iDomain].mesh; + unsigned nCells = mesh._cellConnectivity.size(); + for (unsigned iCell = 0; iCell < nCells; ++iCell) + { + for (unsigned iFacet = 0; iFacet < 24; ++iFacet) + { + const MC_Location ¤t = + mesh._cellConnectivity[iCell]._facet[iFacet].subfacet.current; + qs_assert(current.cell == iCell); + + const MC_Location &adjacent = + mesh._cellConnectivity[iCell]._facet[iFacet].subfacet.adjacent; + + int jDomain = adjacent.domain; + int jCell = adjacent.cell; + int jFacet = adjacent.facet; + + const Subfacet_Adjacency &backside = domain[jDomain].mesh._cellConnectivity[jCell]._facet[jFacet].subfacet; + + qs_assert(backside.adjacent.domain == iDomain); + qs_assert(backside.adjacent.cell == iCell); + qs_assert(backside.adjacent.facet == iFacet); + } + } + } + if (myRank == 0) + { + cout << "Finished Consistency Check" << endl; + } + } +} + +namespace +{ + void initMesh(MonteCarlo *monteCarlo, const Parameters ¶ms) + { + int nx = params.simulationParams.nx; + int ny = params.simulationParams.ny; + int nz = params.simulationParams.nz; + double lx = params.simulationParams.lx; + double ly = params.simulationParams.ly; + double lz = params.simulationParams.lz; + int xDom = params.simulationParams.xDom; + int yDom = params.simulationParams.yDom; + int zDom = params.simulationParams.zDom; + + int myRank, nRanks; + mpiComm_rank(MPI_COMM_WORLD, &myRank); + mpiComm_size(MPI_COMM_WORLD, &nRanks); + + /* + if(xDom !=1 || yDom!=1 || zDom!=1) + { + std::cout<<"We can only handle 1 domain (and mpi rank) at this time"< myDomainGid = ddc.getAssignedDomainGids(); + + GlobalFccGrid globalGrid(nx, ny, nz, lx, ly, lz); + + int nCenters = nRanks * nDomainsPerRank; + vector domainCenter; + if (xDom == 0 || yDom == 0 || zDom == 0) + initializeCentersRandomly(nCenters, globalGrid, domainCenter); + else + initializeCentersGrid(lx, ly, lz, xDom, yDom, zDom, domainCenter); + qs_assert(domainCenter.size() == nCenters); + + vector partition; + { + int foremanRank = myRank; + for (unsigned ii = 0; ii < myDomainGid.size(); ++ii) + { + partition.push_back(MeshPartition(myDomainGid[ii], ii, foremanRank)); + qs_assert(ddc.getIndex(myDomainGid[ii]) == ii); + } + } + + CommObject *comm = 0; + if (nRanks == 1) + comm = new SharedMemoryCommObject(partition); + else if (nRanks > 1 && nDomainsPerRank == 1) + comm = new MpiCommObject(MPI_COMM_WORLD, ddc); + else + qs_assert(false); + + for (unsigned ii = 0; ii < myDomainGid.size(); ++ii) + { + if (myRank == 0) + { + cout << "Building partition " << myDomainGid[ii] << endl; + } + partition[ii].buildMeshPartition(globalGrid, domainCenter, comm); + } + + mpiBarrier(MPI_COMM_WORLD); + mpiBarrier(MPI_COMM_WORLD); + + delete comm; + + monteCarlo->domain.reserve(myDomainGid.size(), VAR_MEM); + monteCarlo->domain.Open(); + for (unsigned ii = 0; ii < myDomainGid.size(); ++ii) + { + if (myRank == 0) + { + cout << "Building MC_Domain " << ii << endl; + } + monteCarlo->domain.push_back( + MC_Domain(partition[ii], globalGrid, ddc, params, *monteCarlo->_materialDatabase, + params.simulationParams.nGroups)); + } + monteCarlo->domain.Close(); + + if (nRanks == 1) + consistencyCheck(myRank, monteCarlo->domain); + + if (myRank == 0) + { + cout << "Finished initMesh" << endl; + } + } +} + +namespace +{ + void initTallies(MonteCarlo *monteCarlo, const Parameters ¶ms) + { + monteCarlo->_tallies->InitializeTallies( + monteCarlo, + params.simulationParams.balanceTallyReplications, + params.simulationParams.fluxTallyReplications, + params.simulationParams.cellTallyReplications); + } +} + +namespace +{ + void initTimeInfo(MonteCarlo *monteCarlo, const Parameters ¶ms) + { + monteCarlo->time_info->time_step = params.simulationParams.dt; + } +} + +namespace +{ + // scatter the centers (somewhat) randomly + void initializeCentersRandomly(int nCenters, + const GlobalFccGrid &grid, + vector ¢ers) + { + set picked; + do + { + Tuple iTuple(ts::Random::drandom() * grid.nx() / 2, + ts::Random::drandom() * grid.ny() / 2, + ts::Random::drandom() * grid.nz() / 2); + + if (!picked.insert(iTuple).second) + continue; + + iTuple += iTuple; // iTuple *= 2; + Long64 iCell = grid.cellTupleToIndex(iTuple); + MC_Vector r = grid.cellCenter(iCell); + centers.push_back(r); + } while (centers.size() < nCenters); + } +} + +namespace +{ + void initializeCentersGrid(double lx, double ly, double lz, + int xDom, int yDom, int zDom, + vector ¢ers) + { + double dx = lx / xDom; + double dy = ly / yDom; + double dz = lz / zDom; + for (int ix = 0; ix < xDom; ++ix) + for (int iy = 0; iy < yDom; ++iy) + for (int iz = 0; iz < zDom; ++iz) + centers.push_back( + MC_Vector((0.5 + ix) * dx, (0.5 + iy) * dy, (0.5 + iz) * dz)); + } +} + +namespace +{ + // This function is useful for debugging but is not called in ordinary + // use of the code. Uncomment the call to this function in initMC() + // if you want to get plot data for the cross sections. + void checkCrossSections(MonteCarlo *monteCarlo, const Parameters ¶ms) + { + if (monteCarlo->_params.simulationParams.crossSectionsOut == "") + return; + + struct XC_Data + { + XC_Data() : absorption(0.), fission(0.), scatter(0.) {} + double absorption; + double fission; + double scatter; + }; + + NuclearData *nd = monteCarlo->_nuclearData; + int nGroups = nd->_energies.size() - 1; + vector energy(nGroups); + for (unsigned ii = 0; ii < nGroups; ++ii) + energy[ii] = (nd->_energies[ii] + nd->_energies[ii + 1]) / 2.0; + + MaterialDatabase *matDB = monteCarlo->_materialDatabase; + unsigned nMaterials = matDB->_mat.size(); + + map> xcTable; + + // for each material + for (unsigned iMat = 0; iMat < nMaterials; ++iMat) + { + const string &materialName = matDB->_mat[iMat]._name; + vector &xcVec = xcTable[materialName]; + xcVec.resize(nGroups); + unsigned nIsotopes = matDB->_mat[iMat]._iso.size(); + // for each isotope + for (unsigned iIso = 0; iIso < nIsotopes; ++iIso) + { + int isotopeGid = monteCarlo->_materialDatabase->_mat[iMat]._iso[iIso]._gid; + unsigned nReactions = nd->_isotopes[isotopeGid]._species[0]._reactions.size(); + // for each reaction + for (unsigned iReact = 0; iReact < nReactions; ++iReact) + { + // loop over energies + NuclearDataReaction &reaction = nd->_isotopes[isotopeGid]._species[0]._reactions[iReact]; + // accumulate cross sections by reaction type + for (unsigned iGroup = 0; iGroup < nGroups; ++iGroup) + { + switch (reaction._reactionType) + { + case NuclearDataReaction::Scatter: + xcVec[iGroup].scatter += reaction.getCrossSection(iGroup) / nIsotopes; + break; + case NuclearDataReaction::Absorption: + xcVec[iGroup].absorption += reaction.getCrossSection(iGroup) / nIsotopes; + break; + case NuclearDataReaction::Fission: + xcVec[iGroup].fission += reaction.getCrossSection(iGroup) / nIsotopes; + break; + case NuclearDataReaction::Undefined: + qs_assert(false); + break; + } + } + } + } + } + + FILE *xSec; + + std::string fileName = monteCarlo->_params.simulationParams.crossSectionsOut + ".dat"; + + xSec = fopen(fileName.c_str(), "w"); + + // print cross section data + // first the header + fprintf(xSec, "#group energy"); + for (auto mapIter = xcTable.begin(); mapIter != xcTable.end(); ++mapIter) + { + const string &materialName = mapIter->first; + fprintf(xSec, " %s_a %s_f %s_s", materialName.c_str(), materialName.c_str(), materialName.c_str()); + } + fprintf(xSec, "\n"); + + // now the data + for (unsigned ii = 0; ii < nGroups; ++ii) + { + fprintf(xSec, "%u %g", ii, energy[ii]); + for (auto mapIter = xcTable.begin(); mapIter != xcTable.end(); ++mapIter) + { + fprintf(xSec, " %g %g %g", mapIter->second[ii].absorption, mapIter->second[ii].fission, mapIter->second[ii].scatter); + } + fprintf(xSec, "\n"); + } + fclose(xSec); + } +} diff --git a/src/main.cc.dp.cpp b/src/main.cc.dp.cpp new file mode 100644 index 00000000..55d03294 --- /dev/null +++ b/src/main.cc.dp.cpp @@ -0,0 +1,521 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include +#include +#include "utils.hh" +#include "Parameters.hh" +#include "utilsMpi.hh" +#include "MonteCarlo.hh" +#include "initMC.hh" +#include "Tallies.hh" +#include "PopulationControl.hh" +#include "ParticleVaultContainer.hh" +#include "ParticleVault.hh" +#include "MC_Particle_Buffer.hh" +#include "MC_Processor_Info.hh" +#include "MC_Time_Info.hh" +#include "macros.hh" +#include "MC_Fast_Timer.hh" +#include "MC_SourceNow.hh" +#include "SendQueue.hh" +#include "NVTX_Range.hh" +#include "cudaUtils.hh" +#include "cudaFunctions.hh" +#include "qs_assert.hh" +#include "CycleTracking.hh" +#include "CoralBenchmark.hh" +#include "EnergySpectrum.hh" + +#include "git_hash.hh" +#include "git_vers.hh" + +#ifdef HAVE_MPI +#include +#endif +#include "utilsMpi.hh" + +#ifdef HAVE_SYCL +sycl::property_list q_prop{sycl::ext::oneapi::property::queue::discard_events(), sycl::property::queue::in_order()}; +sycl::queue sycl_device_queue(q_prop); +#endif + +void gameOver(); +void cycleInit(bool loadBalance); +void cycleTracking(MonteCarlo *monteCarlo, uint64_cu *, uint64_cu *); +void cycleFinalize(); + +void setGPU() +{ + + int rank; + MPI_Comm comm_mc_world(MPI_COMM_WORLD); + + int Ngpus = 1; + + mpiComm_rank(comm_mc_world, &rank); + int GPUID = rank % Ngpus; +} + +using namespace std; + +MonteCarlo *mcco = NULL; + +int main(int argc, char **argv) +{ + mpiInit(&argc, &argv); + printBanner(GIT_VERS, GIT_HASH); + std::cout << "Loading params" << std::endl; + Parameters params = getParameters(argc, argv); + const string &filename = params.simulationParams.inputFile; + ifstream inp_file(filename.c_str()); + if (!inp_file.good()) + { + return -1; + } + std::cout << "Finished loading params" << std::endl; + printParameters(params, cout); + std::cout << "setting GPU" << std::endl; + setGPU(); + +#ifdef HAVE_SYCL + // HOIST INTO SETUP FUNCTION EVENTUALLY + char *devchar = std::getenv("QS_DEVICE"); + std::string devname = (devchar == NULL ? "None" : devchar); + if (devname == "CPU") + { + sycl_device_queue = sycl::cpu_selector{}; + } + else if (devname == "GPU") + { + sycl_device_queue = sycl::gpu_selector{}; + } + else if (devname == "HOST") + { + sycl_device_queue = sycl::host_selector{}; + } + else + { + std::cout << "QS_DEVICE must be CPU, GPU or HOST" << std::endl; + std::abort(); + } + + // DEBUG - REMOVE LATER + if (sycl_device_queue.get_device().is_cpu()) + std::cout << "is cpu" << std::endl; + if (sycl_device_queue.get_device().is_gpu()) + std::cout << "is gpu" << std::endl; + if (sycl_device_queue.get_device().is_host()) + std::cout << "is host" << std::endl; + if (sycl_device_queue.get_device().is_accelerator()) + std::cout << "is accelerator" << std::endl; +#endif + + // mcco stores just about everything. + std::cout << "setting parameters" << std::endl; + mcco = initMC(params); + + int myRank, nRanks; + mpiComm_rank(MPI_COMM_WORLD, &myRank); + + std::cout << "Started copyMaterialDatabase_device" << std::endl; + copyMaterialDatabase_device(mcco); + std::cout << "Finished copyMaterialDatabase_device" << std::endl; + copyNuclearData_device(mcco->_nuclearData, mcco->_nuclearData_d); + std::cout << "Finished copyNuclearData_device" << std::endl; + copyDomainDevice(mcco->_nuclearData->_numEnergyGroups, mcco->domain, mcco->domain_d, mcco->domainSize); + std::cout << "Finished copyDomainDevice" << std::endl; + + mpiBarrier(MPI_COMM_WORLD); + int loadBalance = params.simulationParams.loadBalance; + + MC_FASTTIMER_START(MC_Fast_Timer::main); // this can be done once mcco exist. + + const int nSteps = params.simulationParams.nSteps; + + // allocate arrays to hold counters in pinned memory on the host and on the device. + int replications = mcco->_tallies->GetNumBalanceReplications(); + uint64_cu *tallies; + tallies = (uint64_cu *)sycl::malloc_host(sizeof(uint64_cu) * NUM_TALLIES * + replications, + sycl_device_queue); + uint64_cu *tallies_d; + tallies_d = (uint64_cu *)sycl::malloc_device(sizeof(uint64_cu) * + NUM_TALLIES * replications, + sycl_device_queue); + for (int il = 0; il < replications; il++) + { + for (int j1 = 0; j1 < NUM_TALLIES; j1++) + { + tallies[NUM_TALLIES * il + j1] = 0; + } + } + sycl_device_queue + .memcpy(tallies_d, tallies, + sizeof(uint64_cu) * NUM_TALLIES * replications) + .wait(); + for (int ii = 0; ii < nSteps; ++ii) + { + cycleInit(bool(loadBalance)); + cycleTracking(mcco, tallies, tallies_d); + cycleFinalize(); + + mcco->fast_timer->Last_Cycle_Report( + params.simulationParams.cycleTimers, + mcco->processor_info->rank, + mcco->processor_info->num_processors, + mcco->processor_info->comm_mc_world); + } + + MC_FASTTIMER_STOP(MC_Fast_Timer::main); + + gameOver(); + + coralBenchmarkCorrectness(mcco, params); + + sycl::free(tallies, sycl_device_queue); + sycl::free(tallies_d, sycl_device_queue); + +#ifdef HAVE_UVM + mcco->~MonteCarlo(); + gpuFree(mcco); +#else + delete mcco; +#endif + + mpiFinalize(); + + return 0; +} + +void gameOver() +{ + mcco->fast_timer->Cumulative_Report(mcco->processor_info->rank, + mcco->processor_info->num_processors, + mcco->processor_info->comm_mc_world, + mcco->_tallies->_balanceCumulative._numSegments); + mcco->_tallies->_spectrum.PrintSpectrum(mcco); +} + +void cycleInit(bool loadBalance) +{ + + MC_FASTTIMER_START(MC_Fast_Timer::cycleInit); + + mcco->clearCrossSectionCache(); + + mcco->_tallies->CycleInitialize(mcco); + + mcco->_particleVaultContainer->swapProcessingProcessedVaults(); + + mcco->_particleVaultContainer->collapseProcessed(); + mcco->_particleVaultContainer->collapseProcessing(); + + mcco->_tallies->_balanceTask[0]._start = mcco->_particleVaultContainer->sizeProcessing(); + + mcco->particle_buffer->Initialize(); + + MC_SourceNow(mcco); + + PopulationControl(mcco, loadBalance); // controls particle population + + RouletteLowWeightParticles(mcco); // Delete particles with low statistical weight + + MC_FASTTIMER_STOP(MC_Fast_Timer::cycleInit); +} + +#if defined(HAVE_SYCL) + +void CycleTrackingKernel(MonteCarlo *monteCarlo, int num_particles, ParticleVault *processingVault, ParticleVault *processedVault, uint64_cu *tallies, + sycl::nd_item<3> item_ct1, + uint8_t *local) +{ + int global_index = getGlobalThreadID(item_ct1); + int local_index = getLocalThreadID(item_ct1); + int replications = monteCarlo->_tallies->GetNumBalanceReplications(); + + auto values_l = (int *)local; + if (local_index < replications * NUM_TALLIES) + { + values_l[local_index] = 0; + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + + if (global_index < num_particles) + { + CycleTrackingGuts(monteCarlo, global_index, processingVault, processedVault, &values_l[0]); + } + + item_ct1.barrier(sycl::access::fence_space::local_space); + if (local_index < replications * NUM_TALLIES) + { +#if defined(HAVE_SYCL) + ATOMIC_ADD(tallies[local_index], (uint64_cu)values_l[local_index]); +#else + __atomic_fetch_add(&(tallies[local_index]), (uint64_t)values_l[local_index], __ATOMIC_RELAXED); +#endif + } +} +#endif + +void cycleTracking(MonteCarlo *monteCarlo, uint64_cu *tallies, uint64_cu *tallies_d) +{ + MC_FASTTIMER_START(MC_Fast_Timer::cycleTracking); + + bool done = false; + + // Determine whether or not to use GPUs if they are available (set for each MPI rank) + ExecutionPolicy execPolicy = getExecutionPolicy(monteCarlo->processor_info->use_gpu); + + ParticleVaultContainer &my_particle_vault = *(monteCarlo->_particleVaultContainer); + + // Post Inital Receives for Particle Buffer + monteCarlo->particle_buffer->Post_Receive_Particle_Buffer(my_particle_vault.getVaultSize()); + + // Get Test For Done Method (Blocking or non-blocking + MC_New_Test_Done_Method::Enum new_test_done_method = monteCarlo->particle_buffer->new_test_done_method; + + int l5 = 0; + + const int replications = monteCarlo->_tallies->GetNumBalanceReplications(); + + do + { + + int particle_count = 0; // Initialize count of num_particles processed + + while (!done) + { + uint64_t fill_vault = 0; + + for (uint64_t processing_vault = 0; processing_vault < my_particle_vault.processingSize(); processing_vault++) + { + MC_FASTTIMER_START(MC_Fast_Timer::cycleTracking_Kernel); + uint64_t processed_vault = my_particle_vault.getFirstEmptyProcessedVault(); + + ParticleVault *processingVault = my_particle_vault.getTaskProcessingVault(processing_vault); + ParticleVault *processedVault = my_particle_vault.getTaskProcessedVault(processed_vault); + + int numParticles = processingVault->size(); + + if (numParticles != 0) + { + NVTX_Range trackingKernel("cycleTracking_TrackingKernel"); // range ends at end of scope + + // The tracking kernel can run + // * As a cuda kernel + // * As an OpenMP 4.5 parallel loop on the GPU + // * As an OpenMP 3.0 parallel loop on the CPU + // * AS a single thread on the CPU. + + switch (execPolicy) + { + case gpuWithCUDA: + { +#if defined(HAVE_SYCL) + + const size_t N = numParticles; + unsigned int wg_size = 16; + unsigned int num_wgs = (N + wg_size - 1) / wg_size; + + sycl_device_queue.submit([&](sycl::handler &cgh) + { + sycl::accessor + local_acc_ct1( + sycl::range<1>(NUM_TALLIES * replications * sizeof(int)), + cgh); + + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, num_wgs) * + sycl::range<3>(1, 1, wg_size), + sycl::range<3>(1, 1, wg_size)), + [=](sycl::nd_item<3> item_ct1) { + CycleTrackingKernel(monteCarlo, numParticles, + processingVault, processedVault, + tallies_d, item_ct1, + local_acc_ct1.get_pointer()); + }); }) + .wait(); + + sycl_device_queue + .memcpy(tallies, tallies_d, NUM_TALLIES * sizeof(uint64_cu) * replications) + .wait(); + +#endif + } + break; + + case gpuWithOpenMP: + { + + std::cout << " this isn't supported with hip yet " << std::endl; + } + break; + + case cpu: +#include "mc_omp_parallel_for_schedule_static.hh" + for (int particle_index = 0; particle_index < numParticles; particle_index++) + { + CycleTrackingGuts(monteCarlo, particle_index, processingVault, processedVault, (int *)tallies); + } + break; + default: + qs_assert(false); + + } // end switch + + // Add in counters from GPU kernel + for (int il = 0; il < replications; il++) + { + monteCarlo->_tallies->_balanceTask[il]._numSegments += tallies[NUM_TALLIES * il + 0]; + tallies[NUM_TALLIES * il + 0] = 0; + monteCarlo->_tallies->_balanceTask[il]._escape += tallies[NUM_TALLIES * il + 1]; + tallies[NUM_TALLIES * il + 1] = 0; + monteCarlo->_tallies->_balanceTask[il]._census += tallies[NUM_TALLIES * il + 2]; + tallies[NUM_TALLIES * il + 2] = 0; + monteCarlo->_tallies->_balanceTask[il]._collision += tallies[NUM_TALLIES * il + 3]; + tallies[NUM_TALLIES * il + 3] = 0; + monteCarlo->_tallies->_balanceTask[il]._scatter += tallies[NUM_TALLIES * il + 4]; + tallies[NUM_TALLIES * il + 4] = 0; + monteCarlo->_tallies->_balanceTask[il]._absorb += tallies[NUM_TALLIES * il + 5]; + tallies[NUM_TALLIES * il + 5] = 0; + monteCarlo->_tallies->_balanceTask[il]._fission += tallies[NUM_TALLIES * il + 6]; + tallies[NUM_TALLIES * il + 6] = 0; + monteCarlo->_tallies->_balanceTask[il]._produce += tallies[NUM_TALLIES * il + 7]; + tallies[NUM_TALLIES * il + 7] = 0; + } + sycl_device_queue + .memcpy(tallies_d, tallies, + sizeof(uint64_cu) * NUM_TALLIES * replications) + .wait(); + } + + particle_count += numParticles; + MC_FASTTIMER_STOP(MC_Fast_Timer::cycleTracking_Kernel); + + MC_FASTTIMER_START(MC_Fast_Timer::cycleTracking_MPI); + + // Next, communicate particles that have crossed onto + // other MPI ranks. + NVTX_Range cleanAndComm("cycleTracking_clean_and_comm"); + + SendQueue &sendQueue = *(my_particle_vault.getSendQueue()); + monteCarlo->particle_buffer->Allocate_Send_Buffer(sendQueue); + + // Move particles from send queue to the send buffers + for (int index = 0; index < sendQueue.size(); index++) + { + sendQueueTuple &sendQueueT = sendQueue.getTuple(index); + MC_Base_Particle mcb_particle; + + processingVault->getBaseParticleComm(mcb_particle, sendQueueT._particleIndex); + + int buffer = monteCarlo->particle_buffer->Choose_Buffer(sendQueueT._neighbor); + if (buffer >= 0) + monteCarlo->particle_buffer->Buffer_Particle(mcb_particle, buffer); + } + + monteCarlo->particle_buffer->Send_Particle_Buffers(); // post MPI sends + + processingVault->clear(); // remove the invalid particles + sendQueue.clear(); + + // Move particles in "extra" vaults into the regular vaults. + my_particle_vault.cleanExtraVaults(); + + // receive any particles that have arrived from other ranks + monteCarlo->particle_buffer->Receive_Particle_Buffers(fill_vault); + + MC_FASTTIMER_STOP(MC_Fast_Timer::cycleTracking_MPI); + + } // for loop on vaults + + MC_FASTTIMER_START(MC_Fast_Timer::cycleTracking_MPI); + + NVTX_Range collapseRange("cycleTracking_Collapse_ProcessingandProcessed"); + my_particle_vault.collapseProcessing(); + my_particle_vault.collapseProcessed(); + collapseRange.endRange(); + + // Test for done - blocking on all MPI ranks + NVTX_Range doneRange("cycleTracking_Test_Done_New"); + done = monteCarlo->particle_buffer->Test_Done_New(new_test_done_method); + doneRange.endRange(); + + MC_FASTTIMER_STOP(MC_Fast_Timer::cycleTracking_MPI); + + } // while not done: Test_Done_New() + + // Everything should be done normally. + done = monteCarlo->particle_buffer->Test_Done_New(MC_New_Test_Done_Method::Blocking); + + } while (!done); + + // Make sure to cancel all pending receive requests + monteCarlo->particle_buffer->Cancel_Receive_Buffer_Requests(); + // Make sure Buffers Memory is Free + monteCarlo->particle_buffer->Free_Buffers(); + + MC_FASTTIMER_STOP(MC_Fast_Timer::cycleTracking); +} + +void cycleFinalize() +{ + MC_FASTTIMER_START(MC_Fast_Timer::cycleFinalize); + + mcco->_tallies->_balanceTask[0]._end = mcco->_particleVaultContainer->sizeProcessed(); + + // Update the cumulative tally data. + mcco->_tallies->CycleFinalize(mcco); + + mcco->time_info->cycle++; + + mcco->particle_buffer->Free_Memory(); + + MC_FASTTIMER_STOP(MC_Fast_Timer::cycleFinalize); +} diff --git a/src/mpi_stubs_internal.hh b/src/mpi_stubs_internal.hh index dffcc612..e7c72794 100644 --- a/src/mpi_stubs_internal.hh +++ b/src/mpi_stubs_internal.hh @@ -1,3 +1,65 @@ +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + +/* +Modifications Copyright (C) 2023 Intel Corporation + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS +BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, +OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +SPDX-License-Identifier: BSD-3-Clause +*/ + /*++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++*/ // // Copyright (c) 2012 @@ -15,7 +77,7 @@ // MPI stubs structures to implement mpi calls //---------------------------------------------------------------------------------------------------------------------- -typedef struct _List *pList; // forward declaration for prototypes. +typedef struct _List *pList; // forward declaration for prototypes. typedef struct _Listitem *pListitem; typedef uint64_t MPI_Aint; @@ -27,7 +89,6 @@ typedef struct _List int count; } List; - typedef struct _Listitem { void *data; @@ -52,7 +113,7 @@ typedef struct typedef struct { - pListitem listitem; // to allow Req to be removed from list + pListitem listitem; // to allow Req to be removed from list int *buf; int tag; @@ -60,60 +121,61 @@ typedef struct } Req; - typedef struct _Handleitem { - int handle; - struct _Handleitem *next; + int handle = 0; + struct _Handleitem *next = NULL; union { - void *anything; // At least size of void * + void *anything; // At least size of void * Comm comm; Req req; } data; - } Handleitem; -typedef struct MPI_Stubs_Data_struct { - - MPI_Errhandler errhandler; - int headcount; - int itemcount; - int initialized; - - // - // The first block of handle items will be statically allocated. - // Subsequent ones will be added if necessary. - // blocks[0..nblocks-1] are allocated at any given time. - // - // Increase MPI_STUBS_MAX_BLOCKS if you *really* need more active request - // (Although probably something is wrong if you need more than 256k !!!) - // - Handleitem block0[MPI_STUBS_BLOCK_ITEMS]; - Handleitem *(blocks[MPI_STUBS_MAX_BLOCKS]); - int nblocks; - - int need_to_init; - Handleitem *nextfree; - - MPI_Stubs_Data_struct() +typedef struct MPI_Stubs_Data_struct +{ + + MPI_Errhandler errhandler; + int headcount; + int itemcount; + int initialized; + + // + // The first block of handle items will be statically allocated. + // Subsequent ones will be added if necessary. + // blocks[0..nblocks-1] are allocated at any given time. + // + // Increase MPI_STUBS_MAX_BLOCKS if you *really* need more active request + // (Although probably something is wrong if you need more than 256k !!!) + // + Handleitem block0[MPI_STUBS_BLOCK_ITEMS]; + Handleitem *(blocks[MPI_STUBS_MAX_BLOCKS]); + int nblocks; + + int need_to_init; + Handleitem *nextfree; + + MPI_Stubs_Data_struct() + { + this->errhandler = MPI_ERRORS_ARE_FATAL; + this->headcount = 0; + this->itemcount = 0; + this->initialized = 0; + this->nblocks = 0; + this->need_to_init = 1; + this->nextfree = NULL; + for (int index = 0; index < MPI_STUBS_MAX_BLOCKS; index++) { - this->errhandler = MPI_ERRORS_ARE_FATAL; - this->headcount = 0; - this->itemcount = 0; - this->initialized = 0; - this->nblocks = 0; - this->need_to_init = 1; - this->nextfree = NULL; - for (int index=0; indexblocks[index] = NULL; } + this->blocks[index] = NULL; } + } - ~MPI_Stubs_Data_struct() {}; + ~MPI_Stubs_Data_struct(){}; } MPI_Stubs_Data_type; - -#endif // ifndef MPI_STUBS_INTERNAL_H +#endif // ifndef MPI_STUBS_INTERNAL_H diff --git a/src/qs_assert.hh b/src/qs_assert.hh index f05158c6..25db9c39 100644 --- a/src/qs_assert.hh +++ b/src/qs_assert.hh @@ -1,18 +1,27 @@ -#include +/* +Copyright 2019 Advanced Micro Devices -#if defined HAVE_HIP -#define __HIP_PLATFORM_AMD__ -#include -#include -#endif +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ -#if defined __CUDA_ARCH__ || defined __HIP_DEVICE_COMPILE__ -#define qs_assert( cond) \ + +#include + +#ifdef DEBUG +#define qs_assert( cond) \ do \ { \ if (!(cond)) \ { \ - printf("ERROR\n"); \ + printf("file=%s: line=%d ERROR\n",__FILE__,__LINE__); \ } \ } while(0) #else @@ -21,7 +30,6 @@ { \ if (!(cond)) \ { \ - printf("file=%s: line=%d ERROR\n",__FILE__,__LINE__); \ } \ } while(0) #endif diff --git a/src/utilsMpi.cc.dp.cpp b/src/utilsMpi.cc.dp.cpp new file mode 100644 index 00000000..b0d41ba6 --- /dev/null +++ b/src/utilsMpi.cc.dp.cpp @@ -0,0 +1,386 @@ +/* +Copyright 2019 Advanced Micro Devices + +Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "utilsMpi.hh" +#include +#include // needed for memcpy on some compilers +#include // needed for clock +#include "qs_assert.hh" +#include "macros.hh" +#include "MonteCarlo.hh" +#include "MC_Processor_Info.hh" +#include "Globals.hh" + +#ifdef HAVE_MPI + +void mpiInit( int *argc, char ***argv) +{ +#ifdef HAVE_OPENMP + { // limit scope + char const* const provided_string[4] = \ + {"MPI_THREAD_SINGLE","MPI_THREAD_FUNNELED","MPI_THREAD_SERIALIZED","MPI_THREAD_MULTIPLE"}; + int provided, required = MPI_THREAD_FUNNELED; + + int err = MPI_Init_thread(argc, argv, required, &provided); + qs_assert(err == MPI_SUCCESS); + + int rank = -1; + mpiComm_rank(MPI_COMM_WORLD, &rank); + if (rank == 0) + fprintf(stdout,"MPI Initialized : %s\n", provided_string[provided]); + + if ((required > MPI_THREAD_SINGLE) && (required > provided)) + { + printf("MPI-OpenMP Error.\n\tCode requires %s thread support. MPI library provides %s support.\n", + provided_string[required],provided_string[provided]); + qs_assert(false); + } + } // limit scope + +#else + { // limit scope + int err = MPI_Init(argc, argv); + qs_assert(err == MPI_SUCCESS); + } //limit scope + +#endif + +} + + +double mpiWtime( void ) { return MPI_Wtime(); } + +int mpiComm_split ( MPI_Comm comm, int color, int key, MPI_Comm *newcomm) +{ + qs_assert(MPI_Comm_split(comm, color, key, newcomm) == MPI_SUCCESS); + return MPI_SUCCESS; +} + +void mpiComm_rank( MPI_Comm comm, int *rank ) { qs_assert(MPI_Comm_rank(comm, rank) == MPI_SUCCESS); } +void mpiCancel( MPI_Request *request ) { qs_assert(MPI_Cancel(request) == MPI_SUCCESS); } +void mpiTest_cancelled( MPI_Status *status, int *flag ) { qs_assert(MPI_Test_cancelled(status, flag) == MPI_SUCCESS); } +void mpiTest( MPI_Request *request, int *flag, MPI_Status * status) { qs_assert(MPI_Test(request, flag, status) == MPI_SUCCESS); } +void mpiWait( MPI_Request *request, MPI_Status *status ) { qs_assert(MPI_Wait(request, status) == MPI_SUCCESS); } +void mpiComm_size( MPI_Comm comm, int *size ) { qs_assert(MPI_Comm_size(comm, size) == MPI_SUCCESS); } +void mpiBarrier( MPI_Comm comm) { qs_assert(MPI_Barrier(comm) == MPI_SUCCESS); } +void mpiGet_version( int *version, int *subversion ) { qs_assert(MPI_Get_version(version, subversion) == MPI_SUCCESS); } +void mpiFinalize( void ) { qs_assert(MPI_Finalize() == MPI_SUCCESS); } +void mpiAbort( MPI_Comm comm, int errorcode ) { qs_assert(MPI_Abort(comm, errorcode) == MPI_SUCCESS); } +void mpiRequestFree( MPI_Request *request ){qs_assert( MPI_Request_free( request ) == MPI_SUCCESS);} + +void mpiScan( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ) + { qs_assert(MPI_Scan(sendbuf, recvbuf, count, datatype, operation, comm) == MPI_SUCCESS); } +void mpiType_commit(MPI_Datatype *datatype ) + { qs_assert(MPI_Type_commit( datatype ) == MPI_SUCCESS); } +void mpiType_contiguous(int count, MPI_Datatype old_type, MPI_Datatype *newtype) + { qs_assert(MPI_Type_contiguous(count, old_type, newtype) == MPI_SUCCESS); } +void mpiWaitall( int count, MPI_Request *array_of_requests, MPI_Status *array_of_statuses ) + { qs_assert(MPI_Waitall(count, array_of_requests, array_of_statuses) == MPI_SUCCESS); } +void mpiAllreduce ( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ) + { qs_assert(MPI_Allreduce(sendbuf, recvbuf, count, datatype, operation, comm) == MPI_SUCCESS); } +void mpiIAllreduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm, MPI_Request *request) +#ifdef HAVE_ASYNC_MPI + { qs_assert(MPI_Iallreduce(sendbuf, recvbuf, count, datatype, operation, comm, request) == MPI_SUCCESS); } +#else + { qs_assert(MPI_Allreduce(sendbuf, recvbuf, count, datatype, operation, comm ) == MPI_SUCCESS); } +#endif +void mpiReduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm ) + { qs_assert(MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm) == MPI_SUCCESS); } +void mpiGather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) + { qs_assert(MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) == MPI_SUCCESS); } +void mpiBcast( void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) + { qs_assert(MPI_Bcast(buf, count, datatype, root, comm) == MPI_SUCCESS); } +void mpiIrecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request) + { qs_assert(MPI_Irecv(buf, count, datatype, source, tag, comm, request) == MPI_SUCCESS); } +void mpiRecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status) + { qs_assert(MPI_Recv(buf, count, datatype, source, tag, comm, status) == MPI_SUCCESS); } +void mpiIsend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) + { qs_assert(MPI_Isend(buf, count, datatype, dest, tag, comm, request) == MPI_SUCCESS); } +void mpiSend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) + { qs_assert(MPI_Send(buf, count, datatype, dest, tag, comm) == MPI_SUCCESS); } + + // ------------------------------------------------------------------------------- + // ------------------------------------------------------------------------------- +#else // HAVE_MPI is not defined : Serial (non-MPI) implementation of necessary routines + // ------------------------------------------------------------------------------- + // ------------------------------------------------------------------------------- + +#include "mpi_stubs_internal.hh" // This will be our internal C++ structs. + +static Handleitem *init_block(int block, Handleitem *b); +static void init_handles(); +static MPI_Comm mpi_stubs_comm_new(); +static void mpi_stubs_alloc_handle(int *handle, void **data); +static pList mpi_stubs_list_new(); + +static MPI_Stubs_Data_type mpi_stubs_data; + + +// These slot numbers must match the #define of the data type in utilsMpi.hh +static size_t mpi_datatype_sizes[MPI_UNSIGNED_LONG_LONG+1] = +{ + sizeof(char), // slot 0 is not used + sizeof(unsigned char), // slot 1 MPI_Byte + sizeof(int), // slot 2 MPI_Int + sizeof(double), // slot 3 MPI_Double + sizeof(long long int), // slot 4 MPI_Long_Long + sizeof(unsigned long long) // slot 5 MPI_Unsigned_Long_Long +}; + +void mpiReduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm ) +{ + if (((sendbuf == NULL) || (recvbuf == NULL)) && (count > 0)) + { printf("%s:%d - MPI_Reduce sendbuf or recvbuf is NULL \n", __FILE__, __LINE__); qs_assert(false); } + + if (root != 0) + { printf("%s:%d - MPI_Reduce: bad root = %d\n", __FILE__, __LINE__, root); qs_assert(false); } + + switch (datatype) + { + case MPI_INT: + case MPI_LONG_LONG: + case MPI_DOUBLE: + case MPI_UNSIGNED_LONG_LONG: + if ((sendbuf != NULL) && (recvbuf != NULL)) + memcpy(recvbuf, sendbuf, count * mpi_datatype_sizes[datatype]); + break; + default: + printf("%s:%d - MPI_Reduce type (%d) not implemented.", __FILE__, __LINE__,datatype); qs_assert(false); + } +} + +void mpiAllreduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ) +{ + if (((sendbuf == NULL) || (recvbuf == NULL)) && (count > 0)) + { printf("%s:%d - MPI_Allreduce sendbuf or recvbuf is NULL \n",__FILE__, __LINE__); qs_assert(false); } + + switch (datatype) + { + case MPI_INT: + case MPI_LONG_LONG: + case MPI_DOUBLE: + case MPI_UNSIGNED_LONG_LONG: + if ((sendbuf != NULL) && (recvbuf != NULL)) + memcpy(recvbuf, sendbuf, count * mpi_datatype_sizes[datatype]); + break; + default: + printf("%s:%d - MPI_Allreduce type (%d) not implemented.", __FILE__, __LINE__, datatype); + qs_assert(false); + } +} + +void mpiIAllreduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm, MPI_Request *request) +{ + if (((sendbuf == NULL) || (recvbuf == NULL)) && (count > 0)) + { printf("%s:%d - MPI_Allreduce sendbuf or recvbuf is NULL \n",__FILE__, __LINE__); qs_assert(false); } + + switch (datatype) + { + case MPI_INT: + case MPI_LONG_LONG: + case MPI_DOUBLE: + case MPI_UNSIGNED_LONG_LONG: + if ((sendbuf != NULL) && (recvbuf != NULL)) + memcpy(recvbuf, sendbuf, count * mpi_datatype_sizes[datatype]); + break; + default: + printf("%s:%d - MPI_Allreduce type (%d) not implemented.", __FILE__, __LINE__, datatype); + qs_assert(false); + } +} + +void mpiScan( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ) +{ + if (((sendbuf == NULL) || (recvbuf == NULL)) && (count > 0)) + { printf("%s:%d - MPI_Scan sendbuf or recvbuf is NULL \n",__FILE__, __LINE__); qs_assert(false); } + + switch (datatype) + { + case MPI_INT: + case MPI_LONG_LONG: + case MPI_DOUBLE: + case MPI_UNSIGNED_LONG_LONG: + if ((sendbuf != NULL) && (recvbuf != NULL)) + memcpy(recvbuf, sendbuf, count * mpi_datatype_sizes[datatype]); + break; + default: + printf("%s:%d - MPI_Scan type (%d) not implemented.", __FILE__, __LINE__, datatype); + qs_assert(false); + } +} + +void mpiGather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) +{ + if (sendcount != recvcount) + { printf("%s:%d - MPI_Gather sendcount=%d != recvcount=%d\n", __FILE__, __LINE__, sendcount, recvcount); qs_assert(false); } + + if (sendtype != recvtype) + { printf("%s:%d - MPI_Gather sendtype=%d != recvtype=%d\n", __FILE__, __LINE__, sendtype, recvtype); qs_assert(false); } + + if (((sendbuf == NULL) || (recvbuf == NULL)) && (sendcount > 0)) + { printf("%s:%d - MPI_Gather sendbuf or recvbuf is NULL \n", __FILE__, __LINE__); qs_assert(false); } + + if (root != 0) + { fprintf(stderr,"%s:%d - MPI_Gather bad root = %d\n", __FILE__, __LINE__,root); qs_assert(false); } + + switch (recvtype) + { + case MPI_INT: + case MPI_LONG_LONG: + case MPI_DOUBLE: + case MPI_UNSIGNED_LONG_LONG: + if ((sendbuf != NULL) && (recvbuf != NULL)) + memcpy(recvbuf, sendbuf, recvcount * mpi_datatype_sizes[recvtype]); + break; + default: + printf("%s:%d - MPI_Gather type (%d) not implemented.", __FILE__, __LINE__, recvtype); + qs_assert(false); + } +} + +double mpiWtime (void) +{ + double value; + value = (double)clock() / (double)CLOCKS_PER_SEC; + return value; +} + +static Handleitem *init_block(int block, Handleitem *b) +{ + for (int i=0; inext; // Skip over using item 0 + newh->next = NULL; + + mpi_stubs_data.blocks[0] = mpi_stubs_data.block0; + mpi_stubs_data.nblocks = 1; + + for (int i=1; inext; + newh->next = NULL; + + *handle = newh->handle; + *data = &(newh->data); + + return; + } + + /* there is nothing free, so allocate a newh block and add it + * to mpi_stubs_data.blocks[] + */ + + if (nblocks == MPI_STUBS_MAX_BLOCKS) + { + fprintf(stderr,"%s:%d - allocate_handle: max %d active handles exceeded\n", + __FILE__, __LINE__, MPI_STUBS_MAX_BLOCKS*MPI_STUBS_BLOCK_ITEMS); + abort(); + } + + MC_MALLOC(mpi_stubs_data.blocks[nblocks], MPI_STUBS_BLOCK_ITEMS, Handleitem); + + newh = init_block(nblocks, mpi_stubs_data.blocks[nblocks]); + + mpi_stubs_data.nextfree = newh->next; + newh->next = NULL; + + *handle = newh->handle; + *data = &(newh->data); + + mpi_stubs_data.nblocks++; // DON'T FORGET THIS!!!! +} + +static pList mpi_stubs_list_new() +{ + pList list = NULL; + + MC_MALLOC(list, 1, List); + + list->head = NULL; + list->tail = NULL; + list->count = 0; + + mpi_stubs_data.headcount++; + return(list); +} + + + +static MPI_Comm mpi_stubs_comm_new() +{ + MPI_Comm chandle; + Comm *cptr; + static int num = 0; + + mpi_stubs_alloc_handle(&chandle,(void **) &cptr); + + cptr->sendlist = mpi_stubs_list_new(); + cptr->recvlist = mpi_stubs_list_new(); + + cptr->num = num++; + cptr->name = NULL; + + return(chandle); +} + +int mpiComm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm) +{ + + if (color == MPI_UNDEFINED) + { + *newcomm = MPI_COMM_NULL; + } + else + { + *newcomm = mpi_stubs_comm_new(); + } + + return(MPI_SUCCESS); +} + + + + +#endif // end #else HAVE_MPI diff --git a/src/utilsMpi.hh b/src/utilsMpi.hh index b80d41cc..c60c1a7c 100644 --- a/src/utilsMpi.hh +++ b/src/utilsMpi.hh @@ -3,123 +3,147 @@ #ifdef HAVE_MPI -#if defined (GNU_PERMISSIVE) +#if defined(GNU_PERMISSIVE) #pragma GCC diagnostic ignored "-fpermissive" #endif #include -#if defined (GNU_PERMISSIVE) +#if defined(GNU_PERMISSIVE) #pragma GCC diagnostic ignored "-pedantic" #endif #ifndef MPI_INT64_T -#define MPI_INT64_T MPI_LONG_LONG +#define MPI_INT64_T MPI_LONG_LONG #endif #ifndef MPI_UINT64_T #define MPI_UINT64_T MPI_UNSIGNED_LONG_LONG #endif -double mpiWtime ( void ); -void mpiTest_cancelled ( MPI_Status *status, int *flag ); -void mpiInit ( int * argc, char *** argv ); -void mpiFinalize ( void ); -void mpiComm_rank ( MPI_Comm comm, int *rank ); -void mpiComm_size ( MPI_Comm comm, int *size ); -int mpiComm_split ( MPI_Comm comm, int color, int key, MPI_Comm *newcomm); -void mpiBarrier ( MPI_Comm comm ); -void mpiGet_version ( int *version, int *subversion ); -void mpiReduce ( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm ); -void mpiGather ( void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); -void mpiBcast ( void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm); -void mpiCancel ( MPI_Request *request ); -void mpiWait ( MPI_Request *request, MPI_Status *status ); -void mpiWaitall ( int count, MPI_Request *array_of_requests, MPI_Status *array_of_statuses ); -void mpiTest ( MPI_Request *, int *, MPI_Status * ); -void mpiIrecv ( void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request); -void mpiRecv ( void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status); -void mpiIsend ( void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); -void mpiSend ( void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); -void mpiType_contiguous( int count, MPI_Datatype old_type, MPI_Datatype *newtype ); -void mpiType_commit ( MPI_Datatype *datatype ) ; -void mpiAllreduce ( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ); -void mpiIAllreduce ( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm, MPI_Request *request); -void mpiScan ( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ); -void mpiAbort ( MPI_Comm comm, int errorcode ); -void mpiRequestFree ( MPI_Request *request ); +double mpiWtime(void); +void mpiTest_cancelled(MPI_Status *status, int *flag); +void mpiInit(int *argc, char ***argv); +void mpiFinalize(void); +void mpiComm_rank(MPI_Comm comm, int *rank); +void mpiComm_size(MPI_Comm comm, int *size); +int mpiComm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm); +void mpiBarrier(MPI_Comm comm); +void mpiGet_version(int *version, int *subversion); +void mpiReduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); +void mpiGather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); +void mpiBcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm); +void mpiCancel(MPI_Request *request); +void mpiWait(MPI_Request *request, MPI_Status *status); +void mpiWaitall(int count, MPI_Request *array_of_requests, MPI_Status *array_of_statuses); +void mpiTest(MPI_Request *, int *, MPI_Status *); +void mpiIrecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request); +void mpiRecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status); +void mpiIsend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request); +void mpiSend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm); +void mpiType_contiguous(int count, MPI_Datatype old_type, MPI_Datatype *newtype); +void mpiType_commit(MPI_Datatype *datatype); +void mpiAllreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm); +void mpiIAllreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm, MPI_Request *request); +void mpiScan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm); +void mpiAbort(MPI_Comm comm, int errorcode); +void mpiRequestFree(MPI_Request *request); // HAVE_MPI not defined, define a serial version of MPI that works for us #else #include "qs_assert.hh" -#include -#include - -typedef struct { - int count ; - int MPI_SOURCE ; - int MPI_TAG ; - int MPI_ERROR ; +#include +#include + +typedef struct +{ + int count; + int MPI_SOURCE; + int MPI_TAG; + int MPI_ERROR; } MPI_Status; -typedef int MPI_Datatype ; -typedef int MPI_Comm ; -typedef int MPI_Request ; -typedef int MPI_Op ; +typedef int MPI_Datatype; +typedef int MPI_Comm; +typedef int MPI_Request; +typedef int MPI_Op; // If more datatypes are added here, they must also be added to mpi_datatype_sizes in utilsMpi.cc -#define MPI_BYTE ((MPI_Datatype)1) // MPI official type is 3 -#define MPI_INT ((MPI_Datatype)2) // MPI official type is 6 -#define MPI_DOUBLE ((MPI_Datatype)3) // MPI official type is 11 -#define MPI_LONG_LONG ((MPI_Datatype)4) // MPI official type is 13 -#define MPI_UNSIGNED_LONG_LONG ((MPI_Datatype)5) // MPI official type is 35 +#define MPI_BYTE ((MPI_Datatype)1) // MPI official type is 3 +#define MPI_INT ((MPI_Datatype)2) // MPI official type is 6 +#define MPI_DOUBLE ((MPI_Datatype)3) // MPI official type is 11 +#define MPI_LONG_LONG ((MPI_Datatype)4) // MPI official type is 13 +#define MPI_UNSIGNED_LONG_LONG ((MPI_Datatype)5) // MPI official type is 35 -#define MPI_REQUEST_NULL ((MPI_Request)0) -#define MPI_STATUS_IGNORE ((MPI_Status *)0) -#define MPI_STATUSES_IGNORE ((MPI_Status *)0) +#define MPI_REQUEST_NULL ((MPI_Request)0) +#define MPI_STATUS_IGNORE ((MPI_Status *)0) +#define MPI_STATUSES_IGNORE ((MPI_Status *)0) -#define MPI_INT64_T MPI_LONG_LONG +#define MPI_INT64_T MPI_LONG_LONG #define MPI_UINT64_T MPI_UNSIGNED_LONG_LONG -#define MPI_COMM_WORLD (1) - -#define MPI_MAX (1) -#define MPI_MIN (2) -#define MPI_SUM (3) - -inline void mpiInit ( int * argc, char *** argv ) { return; } -inline void mpiFinalize ( void ) { return; } -inline void mpiCancel ( MPI_Request *request) { return; } -inline void mpiTest_cancelled ( MPI_Status *status, int *flag ) { return; } -inline void mpiWait ( MPI_Request *request, MPI_Status *status ) { return; } -inline void mpiWaitall ( int count, MPI_Request *array_of_requests, MPI_Status *array_of_statuses ) { return; } -inline void mpiBarrier ( MPI_Comm comm ) { return; } -inline void mpiType_commit ( MPI_Datatype *datatype ) { return; } +#define MPI_COMM_WORLD (1) + +#define MPI_MAX (1) +#define MPI_MIN (2) +#define MPI_SUM (3) + +inline void mpiInit(int *argc, char ***argv) +{ + return; +} +inline void mpiFinalize(void) { return; } +inline void mpiCancel(MPI_Request *request) { return; } +inline void mpiTest_cancelled(MPI_Status *status, int *flag) { return; } +inline void mpiWait(MPI_Request *request, MPI_Status *status) { return; } +inline void mpiWaitall(int count, MPI_Request *array_of_requests, MPI_Status *array_of_statuses) { return; } +inline void mpiBarrier(MPI_Comm comm) { return; } +inline void mpiType_commit(MPI_Datatype *datatype) { return; } inline void mpiType_contiguous(int count, MPI_Datatype old_type, MPI_Datatype *newtype) { return; } -inline void mpiComm_rank ( MPI_Comm comm, int *rank ) { *rank = 0; } -inline void mpiComm_size ( MPI_Comm comm, int *size ) { *size = 1; } -inline void mpiGet_version ( int *version, int *subversion ) { *version = 3; *subversion = 0; } -inline void mpiAbort ( MPI_Comm comm, int errorcode ) { fprintf(stderr,"\n\nMPI_Abort called\n\n"); exit(errorcode); } - -inline void mpiTest( MPI_Request *, int *, MPI_Status * ) - { printf ("mpiTest should not be called in serial run\n"); qs_assert(false); } +inline void mpiComm_rank(MPI_Comm comm, int *rank) { *rank = 0; } +inline void mpiComm_size(MPI_Comm comm, int *size) { *size = 1; } +inline void mpiGet_version(int *version, int *subversion) +{ + *version = 3; + *subversion = 0; +} +inline void mpiAbort(MPI_Comm comm, int errorcode) +{ + fprintf(stderr, "\n\nMPI_Abort called\n\n"); + exit(errorcode); +} + +inline void mpiTest(MPI_Request *, int *, MPI_Status *) +{ + printf("mpiTest should not be called in serial run\n"); + qs_assert(false); +} inline void mpiIrecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request) - { printf ("mpiIrecv should not be called in serial run\n"); qs_assert(false); } +{ + printf("mpiIrecv should not be called in serial run\n"); + qs_assert(false); +} inline void mpiIsend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) - { printf ("mpiIsend should not be called in serial run\n"); qs_assert(false); } +{ + printf("mpiIsend should not be called in serial run\n"); + qs_assert(false); +} inline void mpiSend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) - { printf ("mpiSend should not be called in serial run\n"); qs_assert(false); } - -inline void mpiBcast( void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm){return;} - -double mpiWtime( void ); -int mpiComm_split( MPI_Comm comm, int color, int key, MPI_Comm *newcomm); -void mpiAllreduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ); -void mpiIAllreduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm, MPI_Request *request); -void mpiScan( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ); -void mpiReduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm ); -void mpiGather( void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); - -#endif // end #else HAVE_MPI -#endif // end #ifndef UTILS_MPI_HH +{ + printf("mpiSend should not be called in serial run\n"); + qs_assert(false); +} + +inline void mpiBcast(void *buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) { return; } + +double mpiWtime(void); +int mpiComm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm); +void mpiAllreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm); +void mpiIAllreduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm, MPI_Request *request); +void mpiScan(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm); +void mpiReduce(void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm); +void mpiGather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm); + +#endif // end #else HAVE_MPI +#endif // end #ifndef UTILS_MPI_HH From 2ce06058a1b8d89a68d421ef0d8baa0f8fd9ab87 Mon Sep 17 00:00:00 2001 From: skambapugithub Date: Fri, 6 Oct 2023 14:04:46 -0700 Subject: [PATCH 2/4] removed unwanted file --- src/Makefile.dpct | 216 ---------------------------------------------- 1 file changed, 216 deletions(-) delete mode 100644 src/Makefile.dpct diff --git a/src/Makefile.dpct b/src/Makefile.dpct deleted file mode 100644 index 878e02ed..00000000 --- a/src/Makefile.dpct +++ /dev/null @@ -1,216 +0,0 @@ -CC := icpx -fsycl - -LD := $(CC) - -LIB := - -FLAGS := -DHAVE_SYCL -O2 - -TARGET_0_SRC_0 = ./CoralBenchmark.cc -TARGET_0_OBJ_0 = ./CoralBenchmark.o -TARGET_0_FLAG_0 = ${FLAGS} - -TARGET_0_SRC_1 = ./CycleTracking.cc -TARGET_0_OBJ_1 = ./CycleTracking.o -TARGET_0_FLAG_1 = ${FLAGS} - -TARGET_0_SRC_2 = ./DecompositionObject.cc -TARGET_0_OBJ_2 = ./DecompositionObject.o -TARGET_0_FLAG_2 = ${FLAGS} - -TARGET_0_SRC_3 = ./DirectionCosine.cc.dp.cpp -TARGET_0_OBJ_3 = ./DirectionCosine.cc.dp.o -TARGET_0_FLAG_3 = ${FLAGS} - -TARGET_0_SRC_4 = ./EnergySpectrum.cc -TARGET_0_OBJ_4 = ./EnergySpectrum.o -TARGET_0_FLAG_4 = ${FLAGS} - -TARGET_0_SRC_5 = ./GlobalFccGrid.cc.dp.cpp -TARGET_0_OBJ_5 = ./GlobalFccGrid.cc.dp.o -TARGET_0_FLAG_5 = ${FLAGS} - -TARGET_0_SRC_6 = ./GridAssignmentObject.cc.dp.cpp -TARGET_0_OBJ_6 = ./GridAssignmentObject.cc.dp.o -TARGET_0_FLAG_6 = ${FLAGS} - -TARGET_0_SRC_7 = ./InputBlock.cc -TARGET_0_OBJ_7 = ./InputBlock.o -TARGET_0_FLAG_7 = ${FLAGS} - -TARGET_0_SRC_8 = ./MC_Base_Particle.cc -TARGET_0_OBJ_8 = ./MC_Base_Particle.o -TARGET_0_FLAG_8 = ${FLAGS} - -TARGET_0_SRC_9 = ./MC_Domain.cc.dp.cpp -TARGET_0_OBJ_9 = ./MC_Domain.cc.dp.o -TARGET_0_FLAG_9 = ${FLAGS} - -TARGET_0_SRC_10 = ./MC_Fast_Timer.cc.dp.cpp -TARGET_0_OBJ_10 = ./MC_Fast_Timer.cc.dp.o -TARGET_0_FLAG_10 = ${FLAGS} - -TARGET_0_SRC_11 = ./MC_Particle_Buffer.cc -TARGET_0_OBJ_11 = ./MC_Particle_Buffer.o -TARGET_0_FLAG_11 = ${FLAGS} - -TARGET_0_SRC_12 = ./MeshPartition.cc -TARGET_0_OBJ_12 = ./MeshPartition.o -TARGET_0_FLAG_12 = ${FLAGS} - -TARGET_0_SRC_13 = ./MonteCarlo.cc.dp.cpp -TARGET_0_OBJ_13 = ./MonteCarlo.cc.dp.o -TARGET_0_FLAG_13 = ${FLAGS} - -TARGET_0_SRC_14 = ./MpiCommObject.cc -TARGET_0_OBJ_14 = ./MpiCommObject.o -TARGET_0_FLAG_14 = ${FLAGS} - -TARGET_0_SRC_15 = ./Parameters.cc -TARGET_0_OBJ_15 = ./Parameters.o -TARGET_0_FLAG_15 = ${FLAGS} - -TARGET_0_SRC_16 = ./ParticleVault.cc -TARGET_0_OBJ_16 = ./ParticleVault.o -TARGET_0_FLAG_16 = ${FLAGS} - -TARGET_0_SRC_17 = ./ParticleVaultContainer.cc -TARGET_0_OBJ_17 = ./ParticleVaultContainer.o -TARGET_0_FLAG_17 = ${FLAGS} - -TARGET_0_SRC_18 = ./PopulationControl.cc.dp.cpp -TARGET_0_OBJ_18 = ./PopulationControl.cc.dp.o -TARGET_0_FLAG_18 = ${FLAGS} - -TARGET_0_SRC_19 = ./SharedMemoryCommObject.cc -TARGET_0_OBJ_19 = ./SharedMemoryCommObject.o -TARGET_0_FLAG_19 = ${FLAGS} - -TARGET_0_SRC_20 = ./Tallies.cc -TARGET_0_OBJ_20 = ./Tallies.o -TARGET_0_FLAG_20 = ${FLAGS} - -TARGET_0_SRC_21 = ./cmdLineParser.cc -TARGET_0_OBJ_21 = ./cmdLineParser.o -TARGET_0_FLAG_21 = ${FLAGS} - -TARGET_0_SRC_22 = ./cudaFunctions.cc.dp.cpp -TARGET_0_OBJ_22 = ./cudaFunctions.cc.dp.o -TARGET_0_FLAG_22 = ${FLAGS} - -TARGET_0_SRC_23 = ./initMC.cc.dp.cpp -TARGET_0_OBJ_23 = ./initMC.cc.dp.o -TARGET_0_FLAG_23 = ${FLAGS} - -TARGET_0_SRC_24 = ./main.cc.dp.cpp -TARGET_0_OBJ_24 = ./main.cc.dp.o -TARGET_0_FLAG_24 = ${FLAGS} - -TARGET_0_SRC_25 = ./parseUtils.cc -TARGET_0_OBJ_25 = ./parseUtils.o -TARGET_0_FLAG_25 = ${FLAGS} - -TARGET_0_SRC_26 = ./utils.cc -TARGET_0_OBJ_26 = ./utils.o -TARGET_0_FLAG_26 = ${FLAGS} - -TARGET_0_SRC_27 = ./utilsMpi.cc.dp.cpp -TARGET_0_OBJ_27 = ./utilsMpi.cc.dp.o -TARGET_0_FLAG_27 = ${FLAGS} - -TARGET_0 := ./qs - -TARGET := ${TARGET_0} - -.PHONY:all clean -OBJS_0 := ${TARGET_0_OBJ_0} ${TARGET_0_OBJ_1} ${TARGET_0_OBJ_2} ${TARGET_0_OBJ_3} ${TARGET_0_OBJ_4} ${TARGET_0_OBJ_5} ${TARGET_0_OBJ_6} ${TARGET_0_OBJ_7} ${TARGET_0_OBJ_8} ${TARGET_0_OBJ_9} ${TARGET_0_OBJ_10} ${TARGET_0_OBJ_11} ${TARGET_0_OBJ_12} ${TARGET_0_OBJ_13} ${TARGET_0_OBJ_14} ${TARGET_0_OBJ_15} ${TARGET_0_OBJ_16} ${TARGET_0_OBJ_17} ${TARGET_0_OBJ_18} ${TARGET_0_OBJ_19} ${TARGET_0_OBJ_20} ${TARGET_0_OBJ_21} ${TARGET_0_OBJ_22} ${TARGET_0_OBJ_23} ${TARGET_0_OBJ_24} ${TARGET_0_OBJ_25} ${TARGET_0_OBJ_26} ${TARGET_0_OBJ_27} -all: $(TARGET) -$(TARGET_0): $(OBJS_0) - $(CC) -o $@ $^ $(LIB) - -$(TARGET_0_OBJ_0):$(TARGET_0_SRC_0) - $(CC) -c ${TARGET_0_SRC_0} -o ${TARGET_0_OBJ_0} $(TARGET_0_FLAG_0) - -$(TARGET_0_OBJ_1):$(TARGET_0_SRC_1) - $(CC) -c ${TARGET_0_SRC_1} -o ${TARGET_0_OBJ_1} $(TARGET_0_FLAG_1) - -$(TARGET_0_OBJ_2):$(TARGET_0_SRC_2) - $(CC) -c ${TARGET_0_SRC_2} -o ${TARGET_0_OBJ_2} $(TARGET_0_FLAG_2) - -$(TARGET_0_OBJ_3):$(TARGET_0_SRC_3) - $(CC) -c ${TARGET_0_SRC_3} -o ${TARGET_0_OBJ_3} $(TARGET_0_FLAG_3) - -$(TARGET_0_OBJ_4):$(TARGET_0_SRC_4) - $(CC) -c ${TARGET_0_SRC_4} -o ${TARGET_0_OBJ_4} $(TARGET_0_FLAG_4) - -$(TARGET_0_OBJ_5):$(TARGET_0_SRC_5) - $(CC) -c ${TARGET_0_SRC_5} -o ${TARGET_0_OBJ_5} $(TARGET_0_FLAG_5) - -$(TARGET_0_OBJ_6):$(TARGET_0_SRC_6) - $(CC) -c ${TARGET_0_SRC_6} -o ${TARGET_0_OBJ_6} $(TARGET_0_FLAG_6) - -$(TARGET_0_OBJ_7):$(TARGET_0_SRC_7) - $(CC) -c ${TARGET_0_SRC_7} -o ${TARGET_0_OBJ_7} $(TARGET_0_FLAG_7) - -$(TARGET_0_OBJ_8):$(TARGET_0_SRC_8) - $(CC) -c ${TARGET_0_SRC_8} -o ${TARGET_0_OBJ_8} $(TARGET_0_FLAG_8) - -$(TARGET_0_OBJ_9):$(TARGET_0_SRC_9) - $(CC) -c ${TARGET_0_SRC_9} -o ${TARGET_0_OBJ_9} $(TARGET_0_FLAG_9) - -$(TARGET_0_OBJ_10):$(TARGET_0_SRC_10) - $(CC) -c ${TARGET_0_SRC_10} -o ${TARGET_0_OBJ_10} $(TARGET_0_FLAG_10) - -$(TARGET_0_OBJ_11):$(TARGET_0_SRC_11) - $(CC) -c ${TARGET_0_SRC_11} -o ${TARGET_0_OBJ_11} $(TARGET_0_FLAG_11) - -$(TARGET_0_OBJ_12):$(TARGET_0_SRC_12) - $(CC) -c ${TARGET_0_SRC_12} -o ${TARGET_0_OBJ_12} $(TARGET_0_FLAG_12) - -$(TARGET_0_OBJ_13):$(TARGET_0_SRC_13) - $(CC) -c ${TARGET_0_SRC_13} -o ${TARGET_0_OBJ_13} $(TARGET_0_FLAG_13) - -$(TARGET_0_OBJ_14):$(TARGET_0_SRC_14) - $(CC) -c ${TARGET_0_SRC_14} -o ${TARGET_0_OBJ_14} $(TARGET_0_FLAG_14) - -$(TARGET_0_OBJ_15):$(TARGET_0_SRC_15) - $(CC) -c ${TARGET_0_SRC_15} -o ${TARGET_0_OBJ_15} $(TARGET_0_FLAG_15) - -$(TARGET_0_OBJ_16):$(TARGET_0_SRC_16) - $(CC) -c ${TARGET_0_SRC_16} -o ${TARGET_0_OBJ_16} $(TARGET_0_FLAG_16) - -$(TARGET_0_OBJ_17):$(TARGET_0_SRC_17) - $(CC) -c ${TARGET_0_SRC_17} -o ${TARGET_0_OBJ_17} $(TARGET_0_FLAG_17) - -$(TARGET_0_OBJ_18):$(TARGET_0_SRC_18) - $(CC) -c ${TARGET_0_SRC_18} -o ${TARGET_0_OBJ_18} $(TARGET_0_FLAG_18) - -$(TARGET_0_OBJ_19):$(TARGET_0_SRC_19) - $(CC) -c ${TARGET_0_SRC_19} -o ${TARGET_0_OBJ_19} $(TARGET_0_FLAG_19) - -$(TARGET_0_OBJ_20):$(TARGET_0_SRC_20) - $(CC) -c ${TARGET_0_SRC_20} -o ${TARGET_0_OBJ_20} $(TARGET_0_FLAG_20) - -$(TARGET_0_OBJ_21):$(TARGET_0_SRC_21) - $(CC) -c ${TARGET_0_SRC_21} -o ${TARGET_0_OBJ_21} $(TARGET_0_FLAG_21) - -$(TARGET_0_OBJ_22):$(TARGET_0_SRC_22) - $(CC) -c ${TARGET_0_SRC_22} -o ${TARGET_0_OBJ_22} $(TARGET_0_FLAG_22) - -$(TARGET_0_OBJ_23):$(TARGET_0_SRC_23) - $(CC) -c ${TARGET_0_SRC_23} -o ${TARGET_0_OBJ_23} $(TARGET_0_FLAG_23) - -$(TARGET_0_OBJ_24):$(TARGET_0_SRC_24) - $(CC) -c ${TARGET_0_SRC_24} -o ${TARGET_0_OBJ_24} $(TARGET_0_FLAG_24) - -$(TARGET_0_OBJ_25):$(TARGET_0_SRC_25) - $(CC) -c ${TARGET_0_SRC_25} -o ${TARGET_0_OBJ_25} $(TARGET_0_FLAG_25) - -$(TARGET_0_OBJ_26):$(TARGET_0_SRC_26) - $(CC) -c ${TARGET_0_SRC_26} -o ${TARGET_0_OBJ_26} $(TARGET_0_FLAG_26) - -$(TARGET_0_OBJ_27):$(TARGET_0_SRC_27) - $(CC) -c ${TARGET_0_SRC_27} -o ${TARGET_0_OBJ_27} $(TARGET_0_FLAG_27) - -clean: - rm -f ${OBJS_0} $(TARGET) From 74c2b14ed3691e69596bb5694075b179e9dc7e4f Mon Sep 17 00:00:00 2001 From: skambapugithub Date: Fri, 6 Oct 2023 14:07:27 -0700 Subject: [PATCH 3/4] removed unwanted files --- src/CollisionEvent.cc | 151 -- src/DirectionCosine.cc | 13 - src/Doxyfile | 1519 ----------------- src/GlobalFccGrid.cc | 160 -- src/GridAssignmentObject.cc | 195 --- src/MCT.cc | 667 -------- src/MC_Adjacent_Facet.cc | 21 - src/MC_Domain.cc | 473 ----- src/MC_Facet_Crossing_Event.cc | 72 - src/MC_Fast_Timer.cc | 161 -- src/MC_Load_Particle.cc | 31 - src/MC_Location.cc | 14 - src/MC_RNG_State.cc | 115 -- src/MC_Segment_Outcome.cc | 249 --- src/MC_SourceNow.cc | 178 -- src/MacroscopicCrossSection.cc | 81 - src/Makefile | 367 ---- src/MonteCarlo.cc | 155 -- src/NuclearData.cc | 256 --- src/PhysicalConstants.cc | 22 - src/PopulationControl.cc | 171 -- src/QS_atomics.hh | 149 -- src/READ.ME.HOW.TO.RUN | 135 -- src/SendQueue.cc | 59 - src/cudaFunctions.cc | 86 - src/gpuPortability.hh | 52 - src/initMC.cc | 485 ------ src/main.cc | 325 ---- src/mc_omp_parallel_for_schedule_static_if.hh | 4 - ..._for_schedule_static_num_physical_cores.hh | 6 - src/utilsMpi.cc | 374 ---- 31 files changed, 6746 deletions(-) delete mode 100644 src/CollisionEvent.cc delete mode 100644 src/DirectionCosine.cc delete mode 100644 src/Doxyfile delete mode 100644 src/GlobalFccGrid.cc delete mode 100644 src/GridAssignmentObject.cc delete mode 100644 src/MCT.cc delete mode 100644 src/MC_Adjacent_Facet.cc delete mode 100644 src/MC_Domain.cc delete mode 100644 src/MC_Facet_Crossing_Event.cc delete mode 100644 src/MC_Fast_Timer.cc delete mode 100644 src/MC_Load_Particle.cc delete mode 100644 src/MC_Location.cc delete mode 100644 src/MC_RNG_State.cc delete mode 100644 src/MC_Segment_Outcome.cc delete mode 100644 src/MC_SourceNow.cc delete mode 100644 src/MacroscopicCrossSection.cc delete mode 100644 src/Makefile delete mode 100644 src/MonteCarlo.cc delete mode 100644 src/NuclearData.cc delete mode 100644 src/PhysicalConstants.cc delete mode 100644 src/PopulationControl.cc delete mode 100644 src/QS_atomics.hh delete mode 100644 src/READ.ME.HOW.TO.RUN delete mode 100644 src/SendQueue.cc delete mode 100644 src/cudaFunctions.cc delete mode 100644 src/gpuPortability.hh delete mode 100644 src/initMC.cc delete mode 100644 src/main.cc delete mode 100644 src/mc_omp_parallel_for_schedule_static_if.hh delete mode 100644 src/mc_omp_parallel_for_schedule_static_num_physical_cores.hh delete mode 100644 src/utilsMpi.cc diff --git a/src/CollisionEvent.cc b/src/CollisionEvent.cc deleted file mode 100644 index 3ba2b510..00000000 --- a/src/CollisionEvent.cc +++ /dev/null @@ -1,151 +0,0 @@ -#include "CollisionEvent.hh" -#include "MC_Particle.hh" -#include "NuclearData.hh" -#include "DirectionCosine.hh" -#include "MonteCarlo.hh" -#include "MC_Cell_State.hh" -#include "MaterialDatabase.hh" -#include "MacroscopicCrossSection.hh" -#include "MC_Base_Particle.hh" -#include "ParticleVaultContainer.hh" -#include "PhysicalConstants.hh" -#include "DeclareMacro.hh" -#include "QS_atomics.hh" - -#define MAX_PRODUCTION_SIZE 4 - -//---------------------------------------------------------------------------------------------------------------------- -// Routine MC_Collision_Event determines the isotope, reaction and secondary (projectile) -// particle characteristics for a collision event. -// -// Return true if the particle will continue. -//---------------------------------------------------------------------------------------------------------------------- - -HOST_DEVICE -void updateTrajectory( double energy, double angle, MC_Particle& particle ) -{ - particle.kinetic_energy = energy; - double cosTheta = angle; - double randomNumber = rngSample(&particle.random_number_seed); - double phi = 2 * 3.14159265 * randomNumber; - double sinPhi = sin(phi); - double cosPhi = cos(phi); - double sinTheta = sqrt((1.0 - (cosTheta*cosTheta))); - particle.direction_cosine.Rotate3DVector(sinTheta, cosTheta, sinPhi, cosPhi); - double speed = (PhysicalConstants::_speedOfLight * - sqrt((1.0 - ((PhysicalConstants::_neutronRestMassEnergy * - PhysicalConstants::_neutronRestMassEnergy) / - ((energy + PhysicalConstants::_neutronRestMassEnergy) * - (energy + PhysicalConstants::_neutronRestMassEnergy)))))); - particle.velocity.x = speed * particle.direction_cosine.alpha; - particle.velocity.y = speed * particle.direction_cosine.beta; - particle.velocity.z = speed * particle.direction_cosine.gamma; - randomNumber = rngSample(&particle.random_number_seed); - particle.num_mean_free_paths = -1.0*log(randomNumber); -} -HOST_DEVICE_END - -HOST_DEVICE - -bool CollisionEvent(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int tally_index) -{ - const MC_Cell_State &cell = monteCarlo->domain[mc_particle.domain].cell_state[mc_particle.cell]; - - int globalMatIndex = cell._material; - - //------------------------------------------------------------------------------------------------------------------ - // Pick the isotope and reaction. - //------------------------------------------------------------------------------------------------------------------ - double randomNumber = rngSample(&mc_particle.random_number_seed); - double totalCrossSection = mc_particle.totalCrossSection; - double currentCrossSection = totalCrossSection * randomNumber; - int selectedIso = -1; - int selectedUniqueNumber = -1; - int selectedReact = -1; - int numIsos = (int)monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso.size(); - - for (int isoIndex = 0; isoIndex < numIsos && currentCrossSection >= 0; isoIndex++) - { - int uniqueNumber = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._gid; - int numReacts = monteCarlo->_nuclearData->getNumberReactions(uniqueNumber); - for (int reactIndex = 0; reactIndex < numReacts; reactIndex++) - { - currentCrossSection -= macroscopicCrossSection(monteCarlo, reactIndex, mc_particle.domain, mc_particle.cell, - isoIndex, mc_particle.energy_group); - if (currentCrossSection < 0) - { - selectedIso = isoIndex; - selectedUniqueNumber = uniqueNumber; - selectedReact = reactIndex; - break; - } - } - } - qs_assert(selectedIso != -1); - - //------------------------------------------------------------------------------------------------------------------ - // Do the collision. - //------------------------------------------------------------------------------------------------------------------ - double energyOut[MAX_PRODUCTION_SIZE]; - double angleOut[MAX_PRODUCTION_SIZE]; - int nOut = 0; - double mat_mass = monteCarlo->_materialDatabase->_mat[globalMatIndex]._mass; - - monteCarlo->_nuclearData->_isotopes[selectedUniqueNumber]._species[0]._reactions[selectedReact].sampleCollision( - mc_particle.kinetic_energy, mat_mass, &energyOut[0], &angleOut[0], nOut, &(mc_particle.random_number_seed), MAX_PRODUCTION_SIZE ); - - //-------------------------------------------------------------------------------------------------------------- - // Post-Collision Phase 1: - // Tally the collision - //-------------------------------------------------------------------------------------------------------------- - - // Set the reaction for this particle. - QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._collision ); - NuclearDataReaction::Enum reactionType = monteCarlo->_nuclearData->_isotopes[selectedUniqueNumber]._species[0].\ - _reactions[selectedReact]._reactionType; - switch (reactionType) - { - case NuclearDataReaction::Scatter: - QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._scatter); - break; - case NuclearDataReaction::Absorption: - QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._absorb); - break; - case NuclearDataReaction::Fission: - QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[tally_index]._fission); - QS::atomicAdd( monteCarlo->_tallies->_balanceTask[tally_index]._produce, (uint64_t) nOut); - break; - case NuclearDataReaction::Undefined: - printf("reactionType invalid\n"); - qs_assert(false); - } - - if( nOut == 0 ) return false; - - for (int secondaryIndex = 1; secondaryIndex < nOut; secondaryIndex++) - { - // Newly created particles start as copies of their parent - MC_Particle secondaryParticle = mc_particle; - secondaryParticle.random_number_seed = rngSpawn_Random_Number_Seed(&mc_particle.random_number_seed); - secondaryParticle.identifier = secondaryParticle.random_number_seed; - updateTrajectory( energyOut[secondaryIndex], angleOut[secondaryIndex], secondaryParticle ); - monteCarlo->_particleVaultContainer->addExtraParticle(secondaryParticle); - } - - updateTrajectory( energyOut[0], angleOut[0], mc_particle); - - // If a fission reaction produces secondary particles we also add the original - // particle to the "extras" that we will handle later. This avoids the - // possibility of a particle doing multiple fission reactions in a single - // kernel invocation and overflowing the extra storage with secondary particles. - if ( nOut > 1 ) - monteCarlo->_particleVaultContainer->addExtraParticle(mc_particle); - - //If we are still tracking this particle the update its energy group - mc_particle.energy_group = monteCarlo->_nuclearData->getEnergyGroup(mc_particle.kinetic_energy); - - return nOut == 1; -} - -HOST_DEVICE_END - diff --git a/src/DirectionCosine.cc b/src/DirectionCosine.cc deleted file mode 100644 index ded7a30b..00000000 --- a/src/DirectionCosine.cc +++ /dev/null @@ -1,13 +0,0 @@ -#include "DirectionCosine.hh" -#include "MC_RNG_State.hh" -#include "PhysicalConstants.hh" - -void DirectionCosine::Sample_Isotropic(uint64_t *seed) -{ - this->gamma = 1.0 - 2.0*rngSample(seed); - double sine_gamma = sqrt((1.0 - (gamma*gamma))); - double phi = PhysicalConstants::_pi*(2.0*rngSample(seed) - 1.0); - - this->alpha = sine_gamma * cos(phi); - this->beta = sine_gamma * sin(phi); -} diff --git a/src/Doxyfile b/src/Doxyfile deleted file mode 100644 index 8eb36430..00000000 --- a/src/Doxyfile +++ /dev/null @@ -1,1519 +0,0 @@ -# Doxyfile 1.6.1 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project -# -# All text after a hash (#) is considered a comment and will be ignored -# The format is: -# TAG = value [value, ...] -# For lists items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (" ") - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# http://www.gnu.org/software/libiconv for the list of possible encodings. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded -# by quotes) that should identify the project. - -PROJECT_NAME = Quicksilver - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. -# This could be handy for archiving the generated documentation or -# if some version control system is used. - -PROJECT_NUMBER = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) -# base path where the generated documentation will be put. -# If a relative path is entered, it will be relative to the location -# where doxygen was started. If left blank the current directory will be used. - -OUTPUT_DIRECTORY = - -# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create -# 4096 sub-directories (in 2 levels) under the output directory of each output -# format and will distribute the generated files over these directories. -# Enabling this option can be useful when feeding doxygen a huge amount of -# source files, where putting all generated files in the same directory would -# otherwise cause performance problems for the file system. - -CREATE_SUBDIRS = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# The default language is English, other supported languages are: -# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, -# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, -# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English -# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, -# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, -# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. - -OUTPUT_LANGUAGE = English - -# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will -# include brief member descriptions after the members that are listed in -# the file and class documentation (similar to JavaDoc). -# Set to NO to disable this. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend -# the brief description of a member or function before the detailed description. -# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator -# that is used to form the text in various listings. Each string -# in this list, if found as the leading text of the brief description, will be -# stripped from the text and the result after processing the whole list, is -# used as the annotated text. Otherwise, the brief description is used as-is. -# If left blank, the following values are used ("$name" is automatically -# replaced with the name of the entity): "The $name class" "The $name widget" -# "The $name file" "is" "provides" "specifies" "contains" -# "represents" "a" "an" "the" - -ABBREVIATE_BRIEF = - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# Doxygen will generate a detailed section even if there is only a brief -# description. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full -# path before files name in the file list and in the header files. If set -# to NO the shortest path that makes the file name unique will be used. - -FULL_PATH_NAMES = YES - -# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag -# can be used to strip a user-defined part of the path. Stripping is -# only done if one of the specified strings matches the left-hand part of -# the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the -# path to strip. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of -# the path mentioned in the documentation of a class, which tells -# the reader which header file to include in order to use a class. -# If left blank only the name of the header file containing the class -# definition is used. Otherwise one should specify the include paths that -# are normally passed to the compiler using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter -# (but less readable) file names. This can be useful is your file systems -# doesn't support long names like on DOS, Mac, or CD-ROM. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen -# will interpret the first line (until the first dot) of a JavaDoc-style -# comment as the brief description. If set to NO, the JavaDoc -# comments will behave just like regular Qt-style comments -# (thus requiring an explicit @brief command for a brief description.) - -JAVADOC_AUTOBRIEF = NO - -# If the QT_AUTOBRIEF tag is set to YES then Doxygen will -# interpret the first line (until the first dot) of a Qt-style -# comment as the brief description. If set to NO, the comments -# will behave just like regular Qt-style comments (thus requiring -# an explicit \brief command for a brief description.) - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen -# treat a multi-line C++ special comment block (i.e. a block of //! or /// -# comments) as a brief description. This used to be the default behaviour. -# The new default is to treat a multi-line C++ comment block as a detailed -# description. Set this tag to YES if you prefer the old behaviour instead. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented -# member inherits the documentation from any documented member that it -# re-implements. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce -# a new page for each member. If set to NO, the documentation of a member will -# be part of the file/class/namespace that contains it. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. -# Doxygen uses this value to replace tabs by spaces in code fragments. - -TAB_SIZE = 3 - -# This tag can be used to specify a number of aliases that acts -# as commands in the documentation. An alias has the form "name=value". -# For example adding "sideeffect=\par Side Effects:\n" will allow you to -# put the command \sideeffect (or @sideeffect) in the documentation, which -# will result in a user-defined paragraph with heading "Side Effects:". -# You can put \n's in the value part of an alias to insert newlines. - -ALIASES = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C -# sources only. Doxygen will then generate output that is more tailored for C. -# For instance, some of the names that are used will be different. The list -# of all members will be omitted, etc. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java -# sources only. Doxygen will then generate output that is more tailored for -# Java. For instance, namespaces will be presented as packages, qualified -# scopes will look different, etc. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources only. Doxygen will then generate output that is more tailored for -# Fortran. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for -# VHDL. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Doxygen selects the parser to use depending on the extension of the files it parses. -# With this tag you can assign which parser to use for a given extension. -# Doxygen has a built-in mapping, but you can override or extend it using this tag. -# The format is ext=language, where ext is a file extension, and language is one of -# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, -# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat -# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), -# use: inc=Fortran f=C. Note that for custom extensions you also need to set FILE_PATTERNS otherwise the files are not read by doxygen. - -EXTENSION_MAPPING = - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should -# set this tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. -# func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. - -BUILTIN_STL_SUPPORT = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. -# Doxygen will parse them like normal C++ but will assume all classes use public -# instead of private inheritance when no explicit protection keyword is present. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate getter -# and setter methods for a property. Setting this option to YES (the default) -# will make doxygen to replace the get and set methods by a property in the -# documentation. This will only work if the methods are indeed getting or -# setting a simple type. If this is not the case, or you want to show the -# methods anyway, you should set this option to NO. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. - -DISTRIBUTE_GROUP_DOC = NO - -# Set the SUBGROUPING tag to YES (the default) to allow class member groups of -# the same type (for instance a group of public functions) to be put as a -# subgroup of that type (e.g. under the Public Functions section). Set it to -# NO to prevent subgrouping. Alternatively, this can be done per class using -# the \nosubgrouping command. - -SUBGROUPING = YES - -# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum -# is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically -# be useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. - -TYPEDEF_HIDES_STRUCT = NO - -# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to -# determine which symbols to keep in memory and which to flush to disk. -# When the cache is full, less often used symbols will be written to disk. -# For small to medium size projects (<1000 input files) the default value is -# probably good enough. For larger projects a too small cache size can cause -# doxygen to be busy swapping symbols to and from disk most of the time -# causing a significant performance penality. -# If the system has enough physical memory increasing the cache will improve the -# performance by keeping more symbols in memory. Note that the value works on -# a logarithmic scale so increasing the size by one will rougly double the -# memory usage. The cache size is given by this formula: -# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, -# corresponding to a cache size of 2^16 = 65536 symbols - -SYMBOL_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in -# documentation are documented, even if no documentation was available. -# Private class members and static file members will be hidden unless -# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES - -EXTRACT_ALL = YES - -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class -# will be included in the documentation. - -EXTRACT_PRIVATE = YES - -# If the EXTRACT_STATIC tag is set to YES all static members of a file -# will be included in the documentation. - -EXTRACT_STATIC = YES - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) -# defined locally in source files will be included in the documentation. -# If set to NO only classes defined in header files are included. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. When set to YES local -# methods, which are defined in the implementation section but not in -# the interface are included in the documentation. -# If set to NO (the default) only methods in the interface are included. - -EXTRACT_LOCAL_METHODS = YES - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base -# name of the file that contains the anonymous namespace. By default -# anonymous namespace are hidden. - -EXTRACT_ANON_NSPACES = YES - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all -# undocumented members of documented classes, files or namespaces. -# If set to NO (the default) these members will be included in the -# various overviews, but no documentation section is generated. -# This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. -# If set to NO (the default) these classes will be included in the various -# overviews. This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all -# friend (class|struct|union) declarations. -# If set to NO (the default) these declarations will be included in the -# documentation. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any -# documentation blocks found inside the body of a function. -# If set to NO (the default) these blocks will be appended to the -# function's detailed documentation block. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation -# that is typed after a \internal command is included. If the tag is set -# to NO (the default) then the documentation will be excluded. -# Set it to YES to include the internal documentation. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate -# file names in lower-case letters. If set to YES upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen -# will show members with their full class and namespace scopes in the -# documentation. If set to YES the scope will be hidden. - -HIDE_SCOPE_NAMES = NO - -# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen -# will put a list of the files that are included by a file in the documentation -# of that file. - -SHOW_INCLUDE_FILES = YES - -# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] -# is inserted in the documentation for inline members. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen -# will sort the (detailed) documentation of file and class members -# alphabetically by member name. If set to NO the members will appear in -# declaration order. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the -# brief documentation of file, namespace and class members alphabetically -# by member name. If set to NO (the default) the members will appear in -# declaration order. - -SORT_BRIEF_DOCS = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the (brief and detailed) documentation of class members so that constructors and destructors are listed first. If set to NO (the default) the constructors will appear in the respective orders defined by SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. - -SORT_MEMBERS_CTORS_1ST = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the -# hierarchy of group names into alphabetical order. If set to NO (the default) -# the group names will appear in their defined order. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be -# sorted by fully-qualified names, including namespaces. If set to -# NO (the default), the class list will be sorted only by class name, -# not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the -# alphabetical list. - -SORT_BY_SCOPE_NAME = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or -# disable (NO) the todo list. This list is created by putting \todo -# commands in the documentation. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or -# disable (NO) the test list. This list is created by putting \test -# commands in the documentation. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or -# disable (NO) the bug list. This list is created by putting \bug -# commands in the documentation. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or -# disable (NO) the deprecated list. This list is created by putting -# \deprecated commands in the documentation. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional -# documentation sections, marked by \if sectionname ... \endif. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines -# the initial value of a variable or define consists of for it to appear in -# the documentation. If the initializer consists of more lines than specified -# here it will be hidden. Use a value of 0 to hide initializers completely. -# The appearance of the initializer of individual variables and defines in the -# documentation can be controlled using \showinitializer or \hideinitializer -# command in the documentation regardless of this setting. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated -# at the bottom of the documentation of classes and structs. If set to YES the -# list will mention the files that were used to generate the documentation. - -SHOW_USED_FILES = YES - -# If the sources in your project are distributed over multiple directories -# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy -# in the documentation. The default is NO. - -SHOW_DIRECTORIES = NO - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. -# This will remove the Files entry from the Quick Index and from the -# Folder Tree View (if specified). The default is YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the -# Namespaces page. -# This will remove the Namespaces entry from the Quick Index -# and from the Folder Tree View (if specified). The default is YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command , where is the value of -# the FILE_VERSION_FILTER tag, and is the name of an input file -# provided by doxygen. Whatever the program writes to standard output -# is used as the file version. See the manual for examples. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by -# doxygen. The layout file controls the global structure of the generated output files -# in an output format independent way. The create the layout file that represents -# doxygen's defaults, run doxygen with the -l option. You can optionally specify a -# file name after the option, if omitted DoxygenLayout.xml will be used as the name -# of the layout file. - -LAYOUT_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated -# by doxygen. Possible values are YES and NO. If left blank NO is used. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated by doxygen. Possible values are YES and NO. If left blank -# NO is used. - -WARNINGS = YES - -# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings -# for undocumented members. If EXTRACT_ALL is set to YES then this flag will -# automatically be disabled. - -WARN_IF_UNDOCUMENTED = YES - -# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some -# parameters in a documented function, or documenting parameters that -# don't exist or using markup commands wrongly. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be abled to get warnings for -# functions that are documented, but have no documentation for their parameters -# or return value. If set to NO (the default) doxygen will only warn about -# wrong or incomplete parameter documentation, but not about the absence of -# documentation. - -WARN_NO_PARAMDOC = NO - -# The WARN_FORMAT tag determines the format of the warning messages that -# doxygen can produce. The string should contain the $file, $line, and $text -# tags, which will be replaced by the file and line number from which the -# warning originated and the warning text. Optionally the format may contain -# $version, which will be replaced by the version of the file (if it could -# be obtained via FILE_VERSION_FILTER) - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning -# and error messages should be written. If left blank the output is written -# to stderr. - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag can be used to specify the files and/or directories that contain -# documented source files. You may enter file names like "myfile.cpp" or -# directories like "/usr/src/myproject". Separate the files or directories -# with spaces. - -INPUT = - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is -# also the default input encoding. Doxygen uses libiconv (or the iconv built -# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for -# the list of possible encodings. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank the following patterns are tested: -# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx -# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 - -FILE_PATTERNS = - -# The RECURSIVE tag can be used to turn specify whether or not subdirectories -# should be searched for input files as well. Possible values are YES and NO. -# If left blank NO is used. - -RECURSIVE = NO - -# The EXCLUDE tag can be used to specify files and/or directories that should -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used select whether or not files or -# directories that are symbolic links (a Unix filesystem feature) are excluded -# from the input. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. Note that the wildcards are matched -# against the file with absolute path, so to exclude all test directories -# for example use the pattern */test/* - -EXCLUDE_PATTERNS = - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or -# directories that contain example code fragments that are included (see -# the \include command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank all files are included. - -EXAMPLE_PATTERNS = - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude -# commands irrespective of the value of the RECURSIVE tag. -# Possible values are YES and NO. If left blank NO is used. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or -# directories that contain image that are included in the documentation (see -# the \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command , where -# is the value of the INPUT_FILTER tag, and is the name of an -# input file. Doxygen will then use the output that the filter program writes -# to standard output. -# If FILTER_PATTERNS is specified, this tag will be -# ignored. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. -# Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. -# The filters are a list of the form: -# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further -# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER -# is applied to all files. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will be used to filter the input files when producing source -# files to browse (i.e. when SOURCE_BROWSER is set to YES). - -FILTER_SOURCE_FILES = NO - -#--------------------------------------------------------------------------- -# configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will -# be generated. Documented entities will be cross-referenced with these sources. -# Note: To get rid of all source code in the generated output, make sure also -# VERBATIM_HEADERS is set to NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body -# of functions and classes directly in the documentation. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct -# doxygen to hide any special comment blocks from generated source code -# fragments. Normal C and C++ comments will always remain visible. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES -# then for each documented function all documented -# functions referencing it will be listed. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES -# then for each documented function all documented entities -# called/used by that function will be listed. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) -# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from -# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will -# link to the source code. -# Otherwise they will link to the documentation. - -REFERENCES_LINK_SOURCE = YES - -# If the USE_HTAGS tag is set to YES then the references to source code -# will point to the HTML generated by the htags(1) tool instead of doxygen -# built-in source browser. The htags tool is part of GNU's global source -# tagging system (see http://www.gnu.org/software/global/global.html). You -# will need version 4.8.6 or higher. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen -# will generate a verbatim copy of the header file for each class for -# which an include is specified. Set to NO to disable this. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index -# of all compounds will be generated. Enable this if the project -# contains a lot of classes, structs, unions or interfaces. - -ALPHABETICAL_INDEX = NO - -# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then -# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns -# in which this list will be split (can be a number in the range [1..20]) - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all -# classes will be put under the same header in the alphabetical index. -# The IGNORE_PREFIX tag can be used to specify one or more prefixes that -# should be ignored while generating the index headers. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES (the default) Doxygen will -# generate HTML output. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `html' will be used as the default path. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for -# each generated HTML page (for example: .htm,.php,.asp). If it is left blank -# doxygen will generate files with .html extension. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a personal HTML header for -# each generated HTML page. If it is left blank doxygen will generate a -# standard header. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a personal HTML footer for -# each generated HTML page. If it is left blank doxygen will generate a -# standard footer. - -HTML_FOOTER = - -# If the HTML_TIMESTAMP tag is set to YES then the generated HTML -# documentation will contain the timesstamp. - -HTML_TIMESTAMP = NO - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading -# style sheet that is used by each HTML page. It can be used to -# fine-tune the look of the HTML output. If the tag is left blank doxygen -# will generate a default style sheet. Note that doxygen will try to copy -# the style sheet file to the HTML output directory, so don't put your own -# stylesheet in the HTML output directory as well, or it will be erased! - -HTML_STYLESHEET = - -# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, -# files or namespaces will be aligned in HTML using tables. If set to -# NO a bullet list will be used. - -HTML_ALIGN_MEMBERS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. For this to work a browser that supports -# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox -# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). - -HTML_DYNAMIC_SECTIONS = NO - -# If the GENERATE_DOCSET tag is set to YES, additional index files -# will be generated that can be used as input for Apple's Xcode 3 -# integrated development environment, introduced with OSX 10.5 (Leopard). -# To create a documentation set, doxygen will generate a Makefile in the -# HTML output directory. Running make will produce the docset in that -# directory and running "make install" will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find -# it at startup. -# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information. - -GENERATE_DOCSET = NO - -# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the -# feed. A documentation feed provides an umbrella under which multiple -# documentation sets from a single provider (such as a company or product suite) -# can be grouped. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that -# should uniquely identify the documentation set bundle. This should be a -# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen -# will append .docset to the name. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# If the GENERATE_HTMLHELP tag is set to YES, additional index files -# will be generated that can be used as input for tools like the -# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) -# of the generated HTML documentation. - -GENERATE_HTMLHELP = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can -# be used to specify the file name of the resulting .chm file. You -# can add a path in front of the file if the result should not be -# written to the html output directory. - -CHM_FILE = - -# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can -# be used to specify the location (absolute path including file name) of -# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run -# the HTML help compiler on the generated index.hhp. - -HHC_LOCATION = - -# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag -# controls if a separate .chi index file is generated (YES) or that -# it should be included in the master .chm file (NO). - -GENERATE_CHI = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING -# is used to encode HtmlHelp index (hhk), content (hhc) and project file -# content. - -CHM_INDEX_ENCODING = - -# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag -# controls whether a binary table of contents is generated (YES) or a -# normal table of contents (NO) in the .chm file. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members -# to the contents of the HTML help documentation and to the tree view. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER -# are set, an additional index file will be generated that can be used as input for -# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated -# HTML documentation. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can -# be used to specify the file name of the resulting .qch file. -# The path specified is relative to the HTML output folder. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating -# Qt Help Project output. For more information please see -# http://doc.trolltech.com/qthelpproject.html#namespace - -QHP_NAMESPACE = - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating -# Qt Help Project output. For more information please see -# http://doc.trolltech.com/qthelpproject.html#virtual-folders - -QHP_VIRTUAL_FOLDER = doc - -# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. -# For more information please see -# http://doc.trolltech.com/qthelpproject.html#custom-filters - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see -# Qt Help Project / Custom Filters. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's -# filter section matches. -# Qt Help Project / Filter Attributes. - -QHP_SECT_FILTER_ATTRS = - -# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can -# be used to specify the location of Qt's qhelpgenerator. -# If non-empty doxygen will try to run qhelpgenerator on the generated -# .qhp file. - -QHG_LOCATION = - -# The DISABLE_INDEX tag can be used to turn on/off the condensed index at -# top of each HTML page. The value NO (the default) enables the index and -# the value YES disables it. - -DISABLE_INDEX = NO - -# This tag can be used to set the number of enum values (range [1..20]) -# that doxygen will group on one line in the generated HTML documentation. - -ENUM_VALUES_PER_LINE = 4 - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. -# If the tag value is set to YES, a side panel will be generated -# containing a tree-like index structure (just like the one that -# is generated for HTML Help). For this to work a browser that supports -# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). -# Windows users are probably better off using the HTML help feature. - -GENERATE_TREEVIEW = NO - -# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, -# and Class Hierarchy pages using a tree view instead of an ordered list. - -USE_INLINE_TREES = NO - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be -# used to set the initial width (in pixels) of the frame in which the tree -# is shown. - -TREEVIEW_WIDTH = 250 - -# Use this tag to change the font size of Latex formulas included -# as images in the HTML documentation. The default is 10. Note that -# when you change the font size after a successful doxygen run you need -# to manually remove any form_*.png images from the HTML output directory -# to force them to be regenerated. - -FORMULA_FONTSIZE = 10 - -# When the SEARCHENGINE tag is enable doxygen will generate a search box for the HTML output. The underlying search engine uses javascript -# and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP) -# there is already a search function so this one should typically -# be disabled. - -SEARCHENGINE = YES - -#--------------------------------------------------------------------------- -# configuration options related to the LaTeX output -#--------------------------------------------------------------------------- - -# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will -# generate Latex output. - -GENERATE_LATEX = YES - -# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `latex' will be used as the default path. - -LATEX_OUTPUT = latex - -# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be -# invoked. If left blank `latex' will be used as the default command name. - -LATEX_CMD_NAME = latex - -# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to -# generate index for LaTeX. If left blank `makeindex' will be used as the -# default command name. - -MAKEINDEX_CMD_NAME = makeindex - -# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact -# LaTeX documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_LATEX = NO - -# The PAPER_TYPE tag can be used to set the paper type that is used -# by the printer. Possible values are: a4, a4wide, letter, legal and -# executive. If left blank a4wide will be used. - -PAPER_TYPE = a4wide - -# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX -# packages that should be included in the LaTeX output. - -EXTRA_PACKAGES = - -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for -# the generated latex document. The header should contain everything until -# the first chapter. If it is left blank doxygen will generate a -# standard header. Notice: only use this tag if you know what you are doing! - -LATEX_HEADER = - -# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated -# is prepared for conversion to pdf (using ps2pdf). The pdf file will -# contain links (just like the HTML output) instead of page references -# This makes the output suitable for online browsing using a pdf viewer. - -PDF_HYPERLINKS = YES - -# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of -# plain latex in the generated Makefile. Set this option to YES to get a -# higher quality PDF documentation. - -USE_PDFLATEX = YES - -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. -# command to the generated LaTeX files. This will instruct LaTeX to keep -# running if errors occur, instead of asking the user for help. -# This option is also used when generating formulas in HTML. - -LATEX_BATCHMODE = NO - -# If LATEX_HIDE_INDICES is set to YES then doxygen will not -# include the index chapters (such as File Index, Compound Index, etc.) -# in the output. - -LATEX_HIDE_INDICES = NO - -# If LATEX_SOURCE_CODE is set to YES then doxygen will include source code with syntax highlighting in the LaTeX output. Note that which sources are shown also depends on other settings such as SOURCE_BROWSER. - -LATEX_SOURCE_CODE = NO - -#--------------------------------------------------------------------------- -# configuration options related to the RTF output -#--------------------------------------------------------------------------- - -# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output -# The RTF output is optimized for Word 97 and may not look very pretty with -# other RTF readers or editors. - -GENERATE_RTF = NO - -# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `rtf' will be used as the default path. - -RTF_OUTPUT = rtf - -# If the COMPACT_RTF tag is set to YES Doxygen generates more compact -# RTF documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_RTF = NO - -# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated -# will contain hyperlink fields. The RTF file will -# contain links (just like the HTML output) instead of page references. -# This makes the output suitable for online browsing using WORD or other -# programs which support those fields. -# Note: wordpad (write) and others do not support links. - -RTF_HYPERLINKS = NO - -# Load stylesheet definitions from file. Syntax is similar to doxygen's -# config file, i.e. a series of assignments. You only have to provide -# replacements, missing definitions are set to their default value. - -RTF_STYLESHEET_FILE = - -# Set optional variables used in the generation of an rtf document. -# Syntax is similar to doxygen's config file. - -RTF_EXTENSIONS_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to the man page output -#--------------------------------------------------------------------------- - -# If the GENERATE_MAN tag is set to YES (the default) Doxygen will -# generate man pages - -GENERATE_MAN = NO - -# The MAN_OUTPUT tag is used to specify where the man pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `man' will be used as the default path. - -MAN_OUTPUT = man - -# The MAN_EXTENSION tag determines the extension that is added to -# the generated man pages (default is the subroutine's section .3) - -MAN_EXTENSION = .3 - -# If the MAN_LINKS tag is set to YES and Doxygen generates man output, -# then it will generate one additional man file for each entity -# documented in the real man page(s). These additional files -# only source the real man page, but without them the man command -# would be unable to find the correct page. The default is NO. - -MAN_LINKS = NO - -#--------------------------------------------------------------------------- -# configuration options related to the XML output -#--------------------------------------------------------------------------- - -# If the GENERATE_XML tag is set to YES Doxygen will -# generate an XML file that captures the structure of -# the code including all documentation. - -GENERATE_XML = NO - -# The XML_OUTPUT tag is used to specify where the XML pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `xml' will be used as the default path. - -XML_OUTPUT = xml - -# The XML_SCHEMA tag can be used to specify an XML schema, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_SCHEMA = - -# The XML_DTD tag can be used to specify an XML DTD, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_DTD = - -# If the XML_PROGRAMLISTING tag is set to YES Doxygen will -# dump the program listings (including syntax highlighting -# and cross-referencing information) to the XML output. Note that -# enabling this will significantly increase the size of the XML output. - -XML_PROGRAMLISTING = YES - -#--------------------------------------------------------------------------- -# configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- - -# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will -# generate an AutoGen Definitions (see autogen.sf.net) file -# that captures the structure of the code including all -# documentation. Note that this feature is still experimental -# and incomplete at the moment. - -GENERATE_AUTOGEN_DEF = NO - -#--------------------------------------------------------------------------- -# configuration options related to the Perl module output -#--------------------------------------------------------------------------- - -# If the GENERATE_PERLMOD tag is set to YES Doxygen will -# generate a Perl module file that captures the structure of -# the code including all documentation. Note that this -# feature is still experimental and incomplete at the -# moment. - -GENERATE_PERLMOD = NO - -# If the PERLMOD_LATEX tag is set to YES Doxygen will generate -# the necessary Makefile rules, Perl scripts and LaTeX code to be able -# to generate PDF and DVI output from the Perl module output. - -PERLMOD_LATEX = NO - -# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be -# nicely formatted so it can be parsed by a human reader. -# This is useful -# if you want to understand what is going on. -# On the other hand, if this -# tag is set to NO the size of the Perl module output will be much smaller -# and Perl will parse it just the same. - -PERLMOD_PRETTY = YES - -# The names of the make variables in the generated doxyrules.make file -# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. -# This is useful so different doxyrules.make files included by the same -# Makefile don't overwrite each other's variables. - -PERLMOD_MAKEVAR_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- - -# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will -# evaluate all C-preprocessor directives found in the sources and include -# files. - -ENABLE_PREPROCESSING = YES - -# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro -# names in the source code. If set to NO (the default) only conditional -# compilation will be performed. Macro expansion can be done in a controlled -# way by setting EXPAND_ONLY_PREDEF to YES. - -MACRO_EXPANSION = NO - -# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES -# then the macro expansion is limited to the macros specified with the -# PREDEFINED and EXPAND_AS_DEFINED tags. - -EXPAND_ONLY_PREDEF = NO - -# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files -# in the INCLUDE_PATH (see below) will be search if a #include is found. - -SEARCH_INCLUDES = YES - -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by -# the preprocessor. - -INCLUDE_PATH = - -# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard -# patterns (like *.h and *.hpp) to filter out the header-files in the -# directories. If left blank, the patterns specified with FILE_PATTERNS will -# be used. - -INCLUDE_FILE_PATTERNS = - -# The PREDEFINED tag can be used to specify one or more macro names that -# are defined before the preprocessor is started (similar to the -D option of -# gcc). The argument of the tag is a list of macros of the form: name -# or name=definition (no spaces). If the definition and the = are -# omitted =1 is assumed. To prevent a macro definition from being -# undefined via #undef or recursively expanded use the := operator -# instead of the = operator. - -PREDEFINED = - -# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then -# this tag can be used to specify a list of macro names that should be expanded. -# The macro definition that is found in the sources will be used. -# Use the PREDEFINED tag if you want to use a different macro definition. - -EXPAND_AS_DEFINED = - -# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then -# doxygen's preprocessor will remove all function-like macros that are alone -# on a line, have an all uppercase name, and do not end with a semicolon. Such -# function macros are typically used for boiler-plate code, and will confuse -# the parser if not removed. - -SKIP_FUNCTION_MACROS = YES - -#--------------------------------------------------------------------------- -# Configuration::additions related to external references -#--------------------------------------------------------------------------- - -# The TAGFILES option can be used to specify one or more tagfiles. -# Optionally an initial location of the external documentation -# can be added for each tagfile. The format of a tag file without -# this location is as follows: -# -# TAGFILES = file1 file2 ... -# Adding location for the tag files is done as follows: -# -# TAGFILES = file1=loc1 "file2 = loc2" ... -# where "loc1" and "loc2" can be relative or absolute paths or -# URLs. If a location is present for each tag, the installdox tool -# does not have to be run to correct the links. -# Note that each tag file must have a unique name -# (where the name does NOT include the path) -# If a tag file is not located in the directory in which doxygen -# is run, you must also specify the path to the tagfile here. - -TAGFILES = - -# When a file name is specified after GENERATE_TAGFILE, doxygen will create -# a tag file that is based on the input files it reads. - -GENERATE_TAGFILE = - -# If the ALLEXTERNALS tag is set to YES all external classes will be listed -# in the class index. If set to NO only the inherited external classes -# will be listed. - -ALLEXTERNALS = NO - -# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed -# in the modules index. If set to NO, only the current project's groups will -# be listed. - -EXTERNAL_GROUPS = YES - -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of `which perl'). - -PERL_PATH = /usr/bin/perl - -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- - -# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will -# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base -# or super classes. Setting the tag to NO turns the diagrams off. Note that -# this option is superseded by the HAVE_DOT option below. This is only a -# fallback. It is recommended to install and use dot, since it yields more -# powerful graphs. - -CLASS_DIAGRAMS = YES - -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see -# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - -# If set to YES, the inheritance and collaboration graphs will hide -# inheritance and usage relations if the target is undocumented -# or is not a class. - -HIDE_UNDOC_RELATIONS = YES - -# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is -# available from the path. This tool is part of Graphviz, a graph visualization -# toolkit from AT&T and Lucent Bell Labs. The other options in this section -# have no effect if this option is set to NO (the default) - -HAVE_DOT = YES - -# By default doxygen will write a font called FreeSans.ttf to the output -# directory and reference it in all dot files that doxygen generates. This -# font does not include all possible unicode characters however, so when you need -# these (or just want a differently looking font) you can specify the font name -# using DOT_FONTNAME. You need need to make sure dot is able to find the font, -# which can be done by putting it in a standard location or by setting the -# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory -# containing the font. - -DOT_FONTNAME = FreeSans - -# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. -# The default size is 10pt. - -DOT_FONTSIZE = 10 - -# By default doxygen will tell dot to use the output directory to look for the -# FreeSans.ttf font (which doxygen will put there itself). If you specify a -# different font using DOT_FONTNAME you can set the path where dot -# can find it using this tag. - -DOT_FONTPATH = - -# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect inheritance relations. Setting this tag to YES will force the -# the CLASS_DIAGRAMS tag to NO. - -CLASS_GRAPH = YES - -# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect implementation dependencies (inheritance, containment, and -# class references variables) of the class with other documented classes. - -COLLABORATION_GRAPH = YES - -# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for groups, showing the direct groups dependencies - -GROUP_GRAPHS = YES - -# If the UML_LOOK tag is set to YES doxygen will generate inheritance and -# collaboration diagrams in a style similar to the OMG's Unified Modeling -# Language. - -UML_LOOK = NO - -# If set to YES, the inheritance and collaboration graphs will show the -# relations between templates and their instances. - -TEMPLATE_RELATIONS = NO - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT -# tags are set to YES then doxygen will generate a graph for each documented -# file showing the direct and indirect include dependencies of the file with -# other documented files. - -INCLUDE_GRAPH = YES - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and -# HAVE_DOT tags are set to YES then doxygen will generate a graph for each -# documented header file showing the documented files that directly or -# indirectly include this file. - -INCLUDED_BY_GRAPH = YES - -# If the CALL_GRAPH and HAVE_DOT options are set to YES then -# doxygen will generate a call dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable call graphs -# for selected functions only using the \callgraph command. - -CALL_GRAPH = YES - -# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then -# doxygen will generate a caller dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable caller -# graphs for selected functions only using the \callergraph command. - -CALLER_GRAPH = YES - -# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen -# will graphical hierarchy of all classes instead of a textual one. - -GRAPHICAL_HIERARCHY = YES - -# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES -# then doxygen will show the dependencies a directory has on other directories -# in a graphical way. The dependency relations are determined by the #include -# relations between the files in the directories. - -DIRECTORY_GRAPH = YES - -# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. Possible values are png, jpg, or gif -# If left blank png will be used. - -DOT_IMAGE_FORMAT = png - -# The tag DOT_PATH can be used to specify the path where the dot tool can be -# found. If left blank, it is assumed the dot tool can be found in the path. - -DOT_PATH = - -# The DOTFILE_DIRS tag can be used to specify one or more directories that -# contain dot files that are included in the documentation (see the -# \dotfile command). - -DOTFILE_DIRS = - -# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of -# nodes that will be shown in the graph. If the number of nodes in a graph -# becomes larger than this value, doxygen will truncate the graph, which is -# visualized by representing a node as a red box. Note that doxygen if the -# number of direct children of the root node in a graph is already larger than -# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note -# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. - -DOT_GRAPH_MAX_NODES = 50 - -# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the -# graphs generated by dot. A depth value of 3 means that only nodes reachable -# from the root by following a path via at most 3 edges will be shown. Nodes -# that lay further from the root node will be omitted. Note that setting this -# option to 1 or 2 may greatly reduce the computation time needed for large -# code bases. Also note that the size of a graph can be further restricted by -# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. - -MAX_DOT_GRAPH_DEPTH = 0 - -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is disabled by default, because dot on Windows does not -# seem to support this out of the box. Warning: Depending on the platform used, -# enabling this option may lead to badly anti-aliased labels on the edges of -# a graph (i.e. they become hard to read). - -DOT_TRANSPARENT = NO - -# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output -# files in one run (i.e. multiple -o and -T options on the command line). This -# makes dot run faster, but since only newer versions of dot (>1.8.10) -# support this, this feature is disabled by default. - -DOT_MULTI_TARGETS = NO - -# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will -# generate a legend page explaining the meaning of the various boxes and -# arrows in the dot generated graphs. - -GENERATE_LEGEND = YES - -# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will -# remove the intermediate dot files that are used to generate -# the various graphs. - -DOT_CLEANUP = YES diff --git a/src/GlobalFccGrid.cc b/src/GlobalFccGrid.cc deleted file mode 100644 index 5d2a32fa..00000000 --- a/src/GlobalFccGrid.cc +++ /dev/null @@ -1,160 +0,0 @@ -#include "GlobalFccGrid.hh" -#include -#include -#include "MC_Vector.hh" -#include "Tuple.hh" - -using std::vector; -using std::min; -using std::max; - -namespace -{ - const vector& getFaceTupleOffset(); -} - - -GlobalFccGrid::GlobalFccGrid(int nx, int ny, int nz, - double lx, double ly, double lz) -: _nx(nx), _ny(ny), _nz(nz), - _lx(lx), _ly(ly), _lz(lz), - _cellTupleToIndex(nx, ny, nz), - _cellIndexToTuple(nx, ny, nz), - _nodeTupleToIndex(nx+1, ny+1, nz+1, 4), - _nodeIndexToTuple(nx+1, ny+1, nz+1, 4) -{ - _dx = _lx/_nx; - _dy = _ly/_ny; - _dz = _lz/_nz; -} - -Long64 GlobalFccGrid::whichCell(const MC_Vector& r) const -{ - int ix = r.x/_dx; - int iy = r.y/_dy; - int iz = r.z/_dz; - return _cellTupleToIndex(ix, iy, iz); -} - - -MC_Vector GlobalFccGrid::cellCenter(Long64 iCell) const -{ - Tuple tt = _cellIndexToTuple(iCell); - MC_Vector r = nodeCoord(Tuple4(tt.x(), tt.y(), tt.z(), 0) ); - r += MC_Vector(_dx/2., _dy/2., _dz/2.); - return r; -} - -const vector& GlobalFccGrid::cornerTupleOffsets() const -{ - static vector offset; - if (offset.size() == 0) - { - offset.reserve(14); - offset.push_back(Tuple4(0, 0, 0, 0)); // 0 - offset.push_back(Tuple4(1, 0, 0, 0)); // 1 - offset.push_back(Tuple4(0, 1, 0, 0)); // 2 - offset.push_back(Tuple4(1, 1, 0, 0)); // 3 - offset.push_back(Tuple4(0, 0, 1, 0)); // 4 - offset.push_back(Tuple4(1, 0, 1, 0)); // 5 - offset.push_back(Tuple4(0, 1, 1, 0)); // 6 - offset.push_back(Tuple4(1, 1, 1, 0)); // 7 - offset.push_back(Tuple4(1, 0, 0, 1)); // 8 - offset.push_back(Tuple4(0, 0, 0, 1)); // 9 - offset.push_back(Tuple4(0, 1, 0, 2)); // 10 - offset.push_back(Tuple4(0, 0, 0, 2)); // 11 - offset.push_back(Tuple4(0, 0, 1, 3)); // 12 - offset.push_back(Tuple4(0, 0, 0, 3)); // 13 - } - return offset; -} - -void GlobalFccGrid::getNodeGids(Long64 cellGid, vector& nodeGid) const -{ - if( nodeGid.size() == 0 ) - { - nodeGid.resize(14); - } - - Tuple tt = _cellIndexToTuple(cellGid); - Tuple4 baseNodeTuple = Tuple4(tt.x(), tt.y(), tt.z(), 0); - const vector& cornerTupleOffset = cornerTupleOffsets(); - for (unsigned ii=0; ii<14; ++ii) - nodeGid[ii] = _nodeTupleToIndex(baseNodeTuple + cornerTupleOffset[ii]); -} - -// for faces on the outer surface of the global grid, the returned cell -// gid will be the same as the input cellGid -void GlobalFccGrid::getFaceNbrGids(Long64 cellGid, vector& nbrCellGid) const -{ - if( nbrCellGid.size() == 0 ) - { - nbrCellGid.resize(6); - } - - Tuple cellTuple = _cellIndexToTuple(cellGid); - const vector& faceTupleOffset = getFaceTupleOffset(); - - for (unsigned ii=0; ii<6; ++ii) - { - Tuple faceNbr = cellTuple + faceTupleOffset[ii]; - snapTuple(faceNbr); - nbrCellGid[ii] = _cellTupleToIndex(faceNbr); - } -} - - -MC_Vector GlobalFccGrid::nodeCoord(Long64 index) const -{ - return nodeCoord(_nodeIndexToTuple(index)); -} - -MC_Vector GlobalFccGrid::nodeCoord(const Tuple4& tt) const -{ - vector basisOffset; - basisOffset.reserve(4); - if (basisOffset.size() == 0) - { - basisOffset.push_back(MC_Vector(0., 0., 0. )); - basisOffset.push_back(MC_Vector(0., _dy/2.0, _dz/2.0)); - basisOffset.push_back(MC_Vector(_dx/2.0, 0., _dz/2.0)); - basisOffset.push_back(MC_Vector(_dx/2.0, _dy/2.0, 0. )); - } - - double rx = tt.x()*_dx; - double ry = tt.y()*_dy; - double rz = tt.z()*_dz; - - MC_Vector rr = MC_Vector(rx, ry, rz) + basisOffset[tt.b()]; - - return rr; -} - -void GlobalFccGrid::snapTuple(Tuple& tt) const -{ - tt.x() = min(max(0, tt.x()), _nx-1); - tt.y() = min(max(0, tt.y()), _ny-1); - tt.z() = min(max(0, tt.z()), _nz-1); -} - -namespace -{ - const vector& getFaceTupleOffset() - { - static vector faceTupleOffset; - - if (faceTupleOffset.size() == 0) - { - faceTupleOffset.reserve(6); - faceTupleOffset.push_back( Tuple( 1, 0, 0) ); - faceTupleOffset.push_back( Tuple(-1, 0, 0) ); - faceTupleOffset.push_back( Tuple( 0, 1, 0) ); - faceTupleOffset.push_back( Tuple( 0, -1, 0) ); - faceTupleOffset.push_back( Tuple( 0, 0, 1) ); - faceTupleOffset.push_back( Tuple( 0, 0, -1) ); - } - - return faceTupleOffset; - } -} - diff --git a/src/GridAssignmentObject.cc b/src/GridAssignmentObject.cc deleted file mode 100644 index bc8b0be8..00000000 --- a/src/GridAssignmentObject.cc +++ /dev/null @@ -1,195 +0,0 @@ -#include "GridAssignmentObject.hh" -#include -#include -#include -#include "qs_assert.hh" - -#define DIFFSQ(a,b) (MC_Vector(a-b).Dot(MC_Vector(a-b))) - -using std::vector; -using std::queue; -using std::min; -using std::max; -using std::floor; - -/** The present implementation of GridAssignmentObject is judged to be - * sufficiently fast to meet the needs of initial assignment of - * particles to domains. The best way to speed up the code would be to - * more strictly limit the number of cells that are flooded by - * implementing an improved distance calculation in minDist2. - * - * The next best optimization possibility probably involves reducing - * the number of indexToTuple and tupleToIndex calculations (probably - * at the expense of a higher memory footprint. -*/ - - -GridAssignmentObject::GridAssignmentObject(const vector& centers) -: _centers(centers) -{ - // This sets the length scale of the grid cells. The value 5 is - // pretty arbitrary. It could just as easily be 1 or 10. If - // necessary it could be made a parameter that is wired out to the - // input deck. - int centersPerCell = 5; - - MC_Vector minCoord = _centers[0]; - MC_Vector maxCoord = _centers[0]; - for (int ii=1; ii<_centers.size(); ++ii) - { - const MC_Vector& iCenter = _centers[ii]; - minCoord.x = min(minCoord.x, iCenter.x); - minCoord.y = min(minCoord.y, iCenter.y); - minCoord.z = min(minCoord.z, iCenter.z); - maxCoord.x = max(maxCoord.x, iCenter.x); - maxCoord.y = max(maxCoord.y, iCenter.y); - maxCoord.z = max(maxCoord.z, iCenter.z); - } - _corner = minCoord; - - // It is possible that all of the centers lie on the x-, y-, or - // z-plane. If so, arbitrarily set the length in that direction to - // 1. - double lx = max(1., (maxCoord.x - minCoord.x)); - double ly = max(1., (maxCoord.y - minCoord.y)); - double lz = max(1., (maxCoord.z - minCoord.z)); - - double x = _centers.size()/centersPerCell/(lx*ly*lz); - x = pow(x, 1.0/3.0); - _nx = max(1., floor(x*lx)); - _ny = max(1., floor(x*ly)); - _nz = max(1., floor(x*lz)); - _dx = lx/_nx; - _dy = ly/_ny; - _dz = lz/_nz; - - int nCells = _nx * _ny * _nz; - - _grid.resize( nCells ); - - for (int ii=0; ii<_centers.size(); ++ii) - { - int iCell = whichCell(_centers[ii]); - _grid[iCell]._myCenters.push_back(ii); - } -} - -int GridAssignmentObject::nearestCenter(const MC_Vector r) -{ - double r2Min = 1e300; - int minCenter = -1; - - addTupleToQueue(whichCellTuple(r)); - - while (_floodQueue.size() > 0) - { - // pop the next cell to check - int iCell = _floodQueue.front(); _floodQueue.pop(); - // if cell is too far away to bother continue. - if (minDist2(r, iCell) > r2Min) - continue; - // check all centers in this cell - for (int ii=0; ii<_grid[iCell]._myCenters.size(); ++ii) - { - int iCenter = _grid[iCell]._myCenters[ii]; - - const MC_Vector& rCenter = _centers[iCenter]; - double r2 = DIFFSQ(r, rCenter); - if (r2 == r2Min) - minCenter = min(minCenter, iCenter); - if (r2 < r2Min) - { - r2Min = r2; - minCenter = iCenter; - } - } - // push any unused nbrs to queue. Mark as used. - addNbrsToQueue(iCell); - } - - while (_wetList.size() > 0) - { - _grid[_wetList.front()]._burned = false; - _wetList.pop(); - } - - qs_assert(minCenter >= 0); - return minCenter; -} - - -Tuple GridAssignmentObject::whichCellTuple(const MC_Vector r) const -{ - int ix = (r.x-_corner.x)/_dx; - int iy = (r.y-_corner.y)/_dy; - int iz = (r.z-_corner.z)/_dz; - ix = max(0, ix); - iy = max(0, iy); - iz = max(0, iz); - ix = min(_nx-1, ix); - iy = min(_ny-1, iy); - iz = min(_nz-1, iz); - - return Tuple(ix, iy, iz); -} - -int GridAssignmentObject::whichCell(const MC_Vector r) const -{ - return tupleToIndex(whichCellTuple(r)); -} - -int GridAssignmentObject::tupleToIndex(Tuple tuple) const -{ - return tuple.x() + _nx * (tuple.y() + _ny*tuple.z()); -} - -Tuple GridAssignmentObject::indexToTuple(int index) const -{ - int ix = index % _nx; - index /= _nx; - int iy = index % _ny; - int iz = index / _ny; - return Tuple(ix, iy, iz); -} - -/** Finds a lower bound of the squared distance from the point r to the - * cell with index iCell. As presently implemented this calculation is - * very conservative. We could set a larger lower bound by considering - * the location of the particle within the cell in which it lies. */ -double GridAssignmentObject::minDist2(const MC_Vector r, int iCell) const -{ - Tuple ir = whichCellTuple(r); - Tuple iTuple = indexToTuple(iCell); - - double rx = _dx*(abs(iTuple.x() - ir.x()) - 1); rx = max(0., rx); - double ry = _dy*(abs(iTuple.y() - ir.y()) - 1); ry = max(0., ry); - double rz = _dz*(abs(iTuple.z() - ir.z()) - 1); rz = max(0., rz); - - return rx*rx + ry*ry + rz*rz; -} - -void GridAssignmentObject::addTupleToQueue(Tuple iTuple) -{ - int index = tupleToIndex(iTuple); - if (_grid[index]._burned) - return; - _floodQueue.push(index); - _wetList.push(index); - _grid[index]._burned = true; -} - -void GridAssignmentObject::addNbrsToQueue(int iCell) -{ - Tuple iTuple = indexToTuple(iCell); - iTuple.x() += 1; if (iTuple.x() < _nx) addTupleToQueue(iTuple); - iTuple.x() -= 2; if (iTuple.x() >= 0) addTupleToQueue(iTuple); - iTuple.x() += 1; - - iTuple.y() += 1; if (iTuple.y() < _ny) addTupleToQueue(iTuple); - iTuple.y() -= 2; if (iTuple.y() >= 0) addTupleToQueue(iTuple); - iTuple.y() += 1; - - iTuple.z() += 1; if (iTuple.z() < _nz) addTupleToQueue(iTuple); - iTuple.z() -= 2; if (iTuple.z() >= 0) addTupleToQueue(iTuple); - iTuple.z() += 1; -} diff --git a/src/MCT.cc b/src/MCT.cc deleted file mode 100644 index 3faf3592..00000000 --- a/src/MCT.cc +++ /dev/null @@ -1,667 +0,0 @@ -/// \file -/// Functions to implement tracking of particles through the mesh - -#include "MCT.hh" -#include "Globals.hh" -#include "MonteCarlo.hh" -#include "MC_Nearest_Facet.hh" -#include "MC_Particle.hh" -#include "MC_Domain.hh" -#include "MC_Location.hh" -#include "DirectionCosine.hh" -#include "MC_Distance_To_Facet.hh" -#include "MC_RNG_State.hh" -#include "PhysicalConstants.hh" -#include "DeclareMacro.hh" - -namespace -{ - HOST_DEVICE - MC_Nearest_Facet MCT_Nearest_Facet_3D_G( - MC_Particle *mc_particle, - MC_Domain &domain, - MC_Location &location, - MC_Vector &coordinate, - const DirectionCosine *direction_cosine); - HOST_DEVICE_END - - HOST_DEVICE_CUDA - double MCT_Cell_Volume_3D_G_vector_tetDet(const MC_Vector &v0_, - const MC_Vector &v1_, - const MC_Vector &v2_, - const MC_Vector &v3); - - HOST_DEVICE_CUDA - void MCT_Nearest_Facet_3D_G_Move_Particle( - MC_Domain &domain, // input: domain - const MC_Location &location, - MC_Vector &coordinate, // input/output: move this coordinate - double move_factor); // input: multiplication factor for move - - HOST_DEVICE_CUDA - MC_Nearest_Facet MCT_Nearest_Facet_Find_Nearest( - int num_facets_per_cell, - MC_Distance_To_Facet *distance_to_facet); - - HOST_DEVICE_CUDA - MC_Nearest_Facet MCT_Nearest_Facet_Find_Nearest( - MC_Particle *mc_particle, - MC_Domain *domain, - MC_Location *location, - MC_Vector &coordinate, - int &iteration, // input/output - double &move_factor, // input/output - int num_facets_per_cell, - MC_Distance_To_Facet *distance_to_facet, - int &retry /* output */ ); - - HOST_DEVICE_CUDA - void MCT_Facet_Points_3D_G( - const MC_Domain &domain, // input - int cell, // input - int facet, // input - int num_points_per_facet, // input - int *facet_points /* output */); - - HOST_DEVICE_CUDA - double MCT_Nearest_Facet_3D_G_Distance_To_Segment( - double plane_tolerance, - double facet_normal_dot_direction_cosine, - double A, double B, double C, double D, - const MC_Vector &facet_coords0, - const MC_Vector &facet_coords1, - const MC_Vector &facet_coords2, - const MC_Vector &coordinate, - const DirectionCosine *direction_cosine, - bool allow_enter); - -} - - -/// Calculates the nearest facet of the specified cell to the -/// specified coordinates. -/// -/// \return The minimum distance and facet number. - -HOST_DEVICE -MC_Nearest_Facet MCT_Nearest_Facet(MC_Particle *mc_particle, - MC_Location &location, - MC_Vector &coordinate, - const DirectionCosine *direction_cosine, - double distance_threshold, - double current_best_distance, - bool new_segment, - MonteCarlo* monteCarlo ) -{ -// #ifndef BCMN_HAVE_OPENMP -// MC_FASTTIMER_START(MC_Fast_Timer::Nearest_Facet); -// #endif -// - - if (location.domain < 0 || location.cell < 0) - { - qs_assert(false); -// std::string output_string; -// mc_particle->Copy_Particle_To_String(output_string); -// MC_Fatal_Jump( "Bad location value. region: %d domain: %d, cell: %d.\nParticle record\n%s\n", -// location.region, location.domain, location.cell, output_string.c_str()); - } - MC_Domain &domain = monteCarlo->domain[location.domain]; - - MC_Nearest_Facet nearest_facet = - MCT_Nearest_Facet_3D_G(mc_particle, domain, location, coordinate, direction_cosine); - - if (nearest_facet.distance_to_facet < 0) { nearest_facet.distance_to_facet = 0; } - - if (nearest_facet.distance_to_facet >= PhysicalConstants::_hugeDouble) - { - qs_assert(false); -// MC_Warning( "Infinite distance (cell not bound) for location [Reg:%d Local Dom:%d " -// "Global Dom: %d Cell:%d Fac:%d], coordinate (%g %g %g) and direction (%g %g %g).\n", -// location.region, location.domain, -// mcco->region->Global_Domain_Number(location.region, location.domain), -// location.cell, location.facet, -// coordinate.x, coordinate.y, coordinate.z, -// direction_cosine->alpha, direction_cosine->beta, direction_cosine->gamma); -// if ( mc_particle ) -// { -// MC_Warning( "mc_particle.identifier %" PRIu64 "\n", mc_particle->identifier ); -// } - } - -// #ifndef BCMN_HAVE_OPENMP -// MC_FASTTIMER_STOP(MC_Fast_Timer::Nearest_Facet); -// #endif - - return nearest_facet; -} // End MCT_Nearest_Facet - -HOST_DEVICE_END - -/// Generates a random coordinate inside a polyhedral cell. - HOST_DEVICE_CUDA -void MCT_Generate_Coordinate_3D_G(uint64_t *random_number_seed, - int domain_num, - int cell, - MC_Vector &coordinate, - MonteCarlo* monteCarlo ) -{ - const MC_Domain &domain = monteCarlo->domain[domain_num]; - - // Determine the cell-center nodal point coordinates. - MC_Vector center = MCT_Cell_Position_3D_G(domain, cell); - - int num_facets = domain.mesh._cellConnectivity[cell].num_facets; - if (num_facets == 0) - { - coordinate.x = coordinate.y = coordinate.z = 0; - return; - } - - double random_number = rngSample(random_number_seed); - double which_volume = random_number * 6.0 * domain.cell_state[cell]._volume; - - // Find the tet to sample from. - double current_volume = 0.0; - int facet_index = -1; - const MC_Vector *point0 = NULL; - const MC_Vector *point1 = NULL; - const MC_Vector *point2 = NULL; - while (current_volume < which_volume) - { - facet_index++; - - if (facet_index == num_facets) { break; } - - int facet_points[3]; - MCT_Facet_Points_3D_G(domain, cell, facet_index, 3, facet_points); - point0 = &domain.mesh._node[facet_points[0]]; - point1 = &domain.mesh._node[facet_points[1]]; - point2 = &domain.mesh._node[facet_points[2]]; - - double subvolume = MCT_Cell_Volume_3D_G_vector_tetDet(*point0, *point1, *point2, center); - current_volume += subvolume; - - } - - // Sample from the tet. - double r1 = rngSample(random_number_seed); - double r2 = rngSample(random_number_seed); - double r3 = rngSample(random_number_seed); - - // Cut and fold cube into prism. - if (r1 + r2 > 1.0) - { - r1 = 1.0 - r1; - r2 = 1.0 - r2; - } - // Cut and fold prism into tetrahedron. - if (r2 + r3 > 1.0) - { - double tmp = r3; - r3 = 1.0 - r1 - r2; - r2 = 1.0 - tmp; - } - else if (r1 + r2 + r3 > 1.0) - { - double tmp = r3; - r3 = r1 + r2 + r3 - 1.0; - r1 = 1.0 - r2 - tmp; - } - - // numbers 1-4 are the barycentric coordinates of the random point. - double r4 = 1.0 - r1 - r2 - r3; - - // error check - if ((point0 == NULL) || (point1 == NULL) || (point2 == NULL)) - { - MC_Fatal_Jump( "Programmer Error: points must not be NULL: point0=%p point1=%p point2=%p", - point0, point1, point2); - return; - } - - coordinate.x = ( r4 * center.x + r1 * point0->x + r2 * point1->x + r3 * point2->x ); - coordinate.y = ( r4 * center.y + r1 * point0->y + r2 * point1->y + r3 * point2->y ); - coordinate.z = ( r4 * center.z + r1 * point0->z + r2 * point1->z + r3 * point2->z ); -} - - -/// Returns a coordinate that represents the "center" of the cell. - HOST_DEVICE_CUDA -MC_Vector MCT_Cell_Position_3D_G(const MC_Domain &domain, - int cell_index) -{ - MC_Vector coordinate; - - int num_points = domain.mesh._cellConnectivity[cell_index].num_points; - - for ( int point_index = 0; point_index < num_points; point_index ++ ) - { - int point = domain.mesh._cellConnectivity[cell_index]._point[point_index]; - - coordinate.x += domain.mesh._node[point].x; - coordinate.y += domain.mesh._node[point].y; - coordinate.z += domain.mesh._node[point].z; - } - - double one_over_num_points = 1.0/((double)num_points); - coordinate.x *= one_over_num_points; - coordinate.y *= one_over_num_points; - coordinate.z *= one_over_num_points; - - return coordinate; -} - - -namespace -{ - /// Fills in the facet_points array with the domain local point - /// numbers specified by the cell number and cell-local facet number - /// for a 3DG mesh. - HOST_DEVICE_CUDA - void MCT_Facet_Points_3D_G(const MC_Domain &domain, // input - int cell, // input - int facet, // input - int num_points_per_facet, // input - int *facet_points /* output */) - { - // Determine the domain local points of the facet in the cell for the 2DG or 3DG mesh. - for ( int point_index = 0; point_index < num_points_per_facet; point_index++ ) - facet_points[point_index] = domain.mesh._cellConnectivity[cell]._facet[facet].point[point_index]; - } -} - -namespace -{ - /// Calculates the distance from the specified coordinates to the - /// input segment. This is used to track to the faces of a 3D_G - /// mesh. - HOST_DEVICE_CUDA - double MCT_Nearest_Facet_3D_G_Distance_To_Segment(double plane_tolerance, - double facet_normal_dot_direction_cosine, - double A, double B, double C, double D, - const MC_Vector &facet_coords0, - const MC_Vector &facet_coords1, - const MC_Vector &facet_coords2, - const MC_Vector &coordinate, - const DirectionCosine *direction_cosine, - bool allow_enter) - { - double boundingBox_tolerance = 1e-9; - double numerator = -1.0*(A * coordinate.x + - B * coordinate.y + - C * coordinate.z + - D); - - /* Plane equation: numerator = -P(x,y,z) = -(Ax + By + Cz + D) - if: numerator < -1e-8*length(x,y,z) too negative! - if: numerator < 0 && numerator^2 > ( 1e-8*length(x,y,z) )^2 too negative! - reverse inequality since squaring function is decreasing for negative inputs. - If numerator is just SLIGHTLY negative, then the particle is just outside of the face */ - - // Filter out too negative distances - if (!allow_enter && numerator < 0.0 && numerator * numerator > plane_tolerance) { - return PhysicalConstants::_hugeDouble; } - - // we have to restrict the solution to within the triangular face - double distance = numerator / facet_normal_dot_direction_cosine; - - // see if the intersection point of the ray and the plane is within the triangular facet - MC_Vector intersection_pt; - intersection_pt.x = coordinate.x + distance * direction_cosine->alpha; - intersection_pt.y = coordinate.y + distance * direction_cosine->beta; - intersection_pt.z = coordinate.z + distance * direction_cosine->gamma; - - // if the point is completely below the triangle, it is not in the triangle -#define IF_POINT_BELOW_CONTINUE(axis) \ - if ( facet_coords0.axis > intersection_pt.axis + boundingBox_tolerance&& \ - facet_coords1.axis > intersection_pt.axis + boundingBox_tolerance && \ - facet_coords2.axis > intersection_pt.axis + boundingBox_tolerance ) { return PhysicalConstants::_hugeDouble; } - -#define IF_POINT_ABOVE_CONTINUE(axis) \ - if ( facet_coords0.axis < intersection_pt.axis - boundingBox_tolerance && \ - facet_coords1.axis < intersection_pt.axis - boundingBox_tolerance && \ - facet_coords2.axis < intersection_pt.axis - boundingBox_tolerance ) { return PhysicalConstants::_hugeDouble; } - - // Is the intersection point inside the triangular facet? Project to 2D and see. - - // A^2 + B^2 + C^2 = 1, so max(|A|,|B|,|C|) >= 1/sqrt(3) = 0.577 - // (all coefficients can't be small) - double cross0 = 0, cross1 = 0, cross2 = 0; - if ( C < -0.5 || C > 0.5 ) - { - IF_POINT_BELOW_CONTINUE(x); - IF_POINT_ABOVE_CONTINUE(x); - IF_POINT_BELOW_CONTINUE(y); - IF_POINT_ABOVE_CONTINUE(y); - -#define AB_CROSS_AC(ax,ay,bx,by,cx,cy) ( (bx-ax)*(cy-ay) - (by-ay)*(cx-ax) ) - - cross1 = AB_CROSS_AC(facet_coords0.x, facet_coords0.y, - facet_coords1.x, facet_coords1.y, - intersection_pt.x, intersection_pt.y); - cross2 = AB_CROSS_AC(facet_coords1.x, facet_coords1.y, - facet_coords2.x, facet_coords2.y, - intersection_pt.x, intersection_pt.y); - cross0 = AB_CROSS_AC(facet_coords2.x, facet_coords2.y, - facet_coords0.x, facet_coords0.y, - intersection_pt.x, intersection_pt.y); - - } - else if ( B < -0.5 || B > 0.5 ) - { - IF_POINT_BELOW_CONTINUE(x); - IF_POINT_ABOVE_CONTINUE(x); - IF_POINT_BELOW_CONTINUE(z); - IF_POINT_ABOVE_CONTINUE(z); - - cross1 = AB_CROSS_AC(facet_coords0.z, facet_coords0.x, - facet_coords1.z, facet_coords1.x, - intersection_pt.z, intersection_pt.x); - cross2 = AB_CROSS_AC(facet_coords1.z, facet_coords1.x, - facet_coords2.z, facet_coords2.x, - intersection_pt.z, intersection_pt.x); - cross0 = AB_CROSS_AC(facet_coords2.z, facet_coords2.x, - facet_coords0.z, facet_coords0.x, - intersection_pt.z, intersection_pt.x); - - } - else if ( A < -0.5 || A > 0.5 ) - { - IF_POINT_BELOW_CONTINUE(z); - IF_POINT_ABOVE_CONTINUE(z); - IF_POINT_BELOW_CONTINUE(y); - IF_POINT_ABOVE_CONTINUE(y); - - cross1 = AB_CROSS_AC(facet_coords0.y, facet_coords0.z, - facet_coords1.y, facet_coords1.z, - intersection_pt.y, intersection_pt.z); - cross2 = AB_CROSS_AC(facet_coords1.y, facet_coords1.z, - facet_coords2.y, facet_coords2.z, - intersection_pt.y, intersection_pt.z); - cross0 = AB_CROSS_AC(facet_coords2.y, facet_coords2.z, - facet_coords0.y, facet_coords0.z, - intersection_pt.y, intersection_pt.z); - } - - double cross_tol = 1e-9 * MC_FABS(cross0 + cross1 + cross2); // cross product tolerance - - if ( (cross0 > -cross_tol && cross1 > -cross_tol && cross2 > -cross_tol) || - (cross0 < cross_tol && cross1 < cross_tol && cross2 < cross_tol) ) - { - return distance; - } - return PhysicalConstants::_hugeDouble; - } -} - - -/// Reflects the particle off of a reflection boundary. -HOST_DEVICE -void MCT_Reflect_Particle(MonteCarlo *monteCarlo, MC_Particle &particle) -{ - DirectionCosine *direction_cosine = particle.Get_Direction_Cosine(); - MC_Location location = particle.Get_Location(); - - const MC_Domain &domain = location.get_domain(monteCarlo); - const MC_General_Plane &plane = domain.mesh._cellGeometry[location.cell]._facet[location.facet]; - - MC_Vector facet_normal(plane.A, plane.B, plane.C); - - - double dot = 2.0*( direction_cosine->alpha * facet_normal.x + - direction_cosine->beta * facet_normal.y + - direction_cosine->gamma * facet_normal.z ); - - if ( dot > 0 ) // do not reflect a particle that is ALREADY pointing inward - { - // reflect the particle - direction_cosine->alpha -= dot * facet_normal.x; - direction_cosine->beta -= dot * facet_normal.y; - direction_cosine->gamma -= dot * facet_normal.z; - } - - // Calculate the reflected, velocity components. - double particle_speed = particle.velocity.Length(); - particle.velocity.x = particle_speed * particle.direction_cosine.alpha; - particle.velocity.y = particle_speed * particle.direction_cosine.beta; - particle.velocity.z = particle_speed * particle.direction_cosine.gamma; -} -HOST_DEVICE_END - -namespace -{ - /// Loop over all the facets, return the minimum distance. - HOST_DEVICE_CUDA - MC_Nearest_Facet MCT_Nearest_Facet_Find_Nearest(int num_facets_per_cell, - MC_Distance_To_Facet *distance_to_facet) - { - MC_Nearest_Facet nearest_facet; - - // largest negative distance (smallest magnitude, but negative) - MC_Nearest_Facet nearest_negative_facet; - nearest_negative_facet.distance_to_facet = -PhysicalConstants::_hugeDouble; - - // Determine the facet that is closest to the specified coordinates. - for (int facet_index = 0; facet_index < num_facets_per_cell; facet_index++) - { - if ( distance_to_facet[facet_index].distance > 0.0 ) - { - if ( distance_to_facet[facet_index].distance <= nearest_facet.distance_to_facet ) - { - nearest_facet.distance_to_facet = distance_to_facet[facet_index].distance; - nearest_facet.facet = facet_index; - } - } - else // zero or negative distance - { - if ( distance_to_facet[facet_index].distance > nearest_negative_facet.distance_to_facet ) - { - // smallest in magnitude, but negative - nearest_negative_facet.distance_to_facet = distance_to_facet[facet_index].distance; - nearest_negative_facet.facet = facet_index; - } - } - } - - - if ( nearest_facet.distance_to_facet == PhysicalConstants::_hugeDouble ) - { - if ( nearest_negative_facet.distance_to_facet != -PhysicalConstants::_hugeDouble ) - { - // no positive solution, so allow a negative solution, that had really small magnitude. - nearest_facet.distance_to_facet = nearest_negative_facet.distance_to_facet; - nearest_facet.facet = nearest_negative_facet.facet; - } - } - - return nearest_facet; - } -} - - -namespace -{ - /// Loop over all the facets, return the minimum distance. - HOST_DEVICE_CUDA - MC_Nearest_Facet MCT_Nearest_Facet_Find_Nearest(MC_Particle *mc_particle, - MC_Domain *domain, - MC_Location *location, - MC_Vector &coordinate, - int &iteration, // input/output - double &move_factor, // input/output - int num_facets_per_cell, - MC_Distance_To_Facet *distance_to_facet, - int &retry /* output */ ) - { - MC_Nearest_Facet nearest_facet = MCT_Nearest_Facet_Find_Nearest(num_facets_per_cell, distance_to_facet); - - const int max_allowed_segments = 10000000; - - retry = 0; - - if ( mc_particle ) - { - if ( (nearest_facet.distance_to_facet == PhysicalConstants::_hugeDouble && move_factor > 0) || - ( mc_particle->num_segments > max_allowed_segments && nearest_facet.distance_to_facet <= 0.0 ) ) - { - // Could not find a solution, so move the particle towards the center of the cell - // and try again. - MCT_Nearest_Facet_3D_G_Move_Particle(*domain, *location, coordinate, move_factor); - - iteration++; - move_factor *= 2.0; - - if ( move_factor > 1.0e-2 ) - move_factor = 1.0e-2; - - int max_iterations = 10000; - - if ( iteration == max_iterations ) - { - qs_assert(false); // If we start hitting this assertion we can - // come up with a better mitigation plan. - dfr - retry = 0; - - } - else - retry = 1; - - // Allow the distance to the current facet - location->facet = -1; - - } - } - return nearest_facet; - } -} - - - - -namespace -{ - /// Calculates the distance from the specified coordinates to each - /// of the facets of the specified cell in a three-dimensional, - /// unstructured, hexahedral (Type 3D_G) domain, storing the minimum - /// distance and associated facet number. - - HOST_DEVICE - MC_Nearest_Facet MCT_Nearest_Facet_3D_G( - MC_Particle *mc_particle, - MC_Domain &domain, - MC_Location &location, - MC_Vector &coordinate, - const DirectionCosine *direction_cosine) - { - // int my_task_num = mc_particle == NULL ? 0 : mc_particle->task; - MC_Vector *facet_coords[3]; - int iteration = 0; - double move_factor = 0.5 * PhysicalConstants::_smallDouble; - - // Initialize some data for the unstructured, hexahedral mesh. - int num_facets_per_cell = domain.mesh._cellConnectivity[location.cell].num_facets; - - while (true) // will break out when distance is found - { - // Determine the distance to each facet of the cell. - // (1e-8 * Radius)^2 - double plane_tolerance = 1e-16*(coordinate.x*coordinate.x + - coordinate.y*coordinate.y + - coordinate.z*coordinate.z); - - MC_Distance_To_Facet distance_to_facet[24]; - - for (int facet_index = 0; facet_index < num_facets_per_cell; facet_index++) - { -//to-do mcco->distance_to_facet->task[my_task_num].facet[facet_index].distance = PhysicalConstants::_hugeDouble; - distance_to_facet[facet_index].distance = PhysicalConstants::_hugeDouble; - - MC_General_Plane &plane = domain.mesh._cellGeometry[location.cell]._facet[facet_index]; - - double facet_normal_dot_direction_cosine = - (plane.A * direction_cosine->alpha + - plane.B * direction_cosine->beta + - plane.C * direction_cosine->gamma); - - // Consider only those facets whose outer normals have - // a positive dot product with the direction cosine. - // I.e. the particle is LEAVING the cell. - if (facet_normal_dot_direction_cosine <= 0.0) { continue; } - - /* profiling with gprof showed that putting a call to MC_Facet_Coordinates_3D_G - slowed down the code by about 10%, so we get the facet coords "by hand." */ - int *point = domain.mesh._cellConnectivity[location.cell]._facet[facet_index].point; - facet_coords[0] = &domain.mesh._node[point[0]]; - facet_coords[1] = &domain.mesh._node[point[1]]; - facet_coords[2] = &domain.mesh._node[point[2]]; - - double t = MCT_Nearest_Facet_3D_G_Distance_To_Segment( - plane_tolerance, - facet_normal_dot_direction_cosine, plane.A, plane.B, plane.C, plane.D, - *facet_coords[0], *facet_coords[1], *facet_coords[2], - coordinate, direction_cosine, false); - -//to-do mcco->distance_to_facet->task[my_task_num].facet[facet_index].distance = t; - distance_to_facet[facet_index].distance = t; - } // for facet_index - - int retry = 0; - - MC_Nearest_Facet nearest_facet = MCT_Nearest_Facet_Find_Nearest( - mc_particle, &domain, &location, coordinate, - iteration, move_factor, num_facets_per_cell, -//to-do mcco->distance_to_facet->task[my_task_num].facet, - distance_to_facet, - retry); - - - if (! retry) return nearest_facet; - } // while (true) - } // End MCT_Nearest_Facet_3D_G - - HOST_DEVICE_END - -} // anonymous namespace - -namespace -{ - /// \return 6 times the volume of the tet. - /// - /// subtract v3 from v0, v1 and v2. Then take the triple product of v0, v1 and v2. - HOST_DEVICE_CUDA - double MCT_Cell_Volume_3D_G_vector_tetDet(const MC_Vector &v0_, - const MC_Vector &v1_, - const MC_Vector &v2_, - const MC_Vector &v3) - { - MC_Vector v0(v0_), v1(v1_), v2(v2_); - - v0.x -= v3.x; v0.y -= v3.y; v0.z -= v3.z; - v1.x -= v3.x; v1.y -= v3.y; v1.z -= v3.z; - v2.x -= v3.x; v2.y -= v3.y; v2.z -= v3.z; - - return - v0.z*(v1.x*v2.y - v1.y*v2.x) + - v0.y*(v1.z*v2.x - v1.x*v2.z) + - v0.x*(v1.y*v2.z - v1.z*v2.y); - } -} - - -namespace -{ - /// Move the input particle by a small amount toward the center of the cell. - HOST_DEVICE_CUDA - void MCT_Nearest_Facet_3D_G_Move_Particle(MC_Domain &domain, // input: domain - const MC_Location &location, - MC_Vector &coordinate, // input/output: move this coordinate - double move_factor) // input: multiplication factor for move - { - MC_Vector move_to = MCT_Cell_Position_3D_G(domain, location.cell); - - coordinate.x += move_factor * ( move_to.x - coordinate.x ); - coordinate.y += move_factor * ( move_to.y - coordinate.y ); - coordinate.z += move_factor * ( move_to.z - coordinate.z ); - } -} diff --git a/src/MC_Adjacent_Facet.cc b/src/MC_Adjacent_Facet.cc deleted file mode 100644 index d895a202..00000000 --- a/src/MC_Adjacent_Facet.cc +++ /dev/null @@ -1,21 +0,0 @@ -#include "MCT.hh" -#include "MC_Domain.hh" -#include "Globals.hh" -#include "MonteCarlo.hh" -#include "DeclareMacro.hh" - -class MC_Particle; - -HOST_DEVICE - -Subfacet_Adjacency &MCT_Adjacent_Facet(const MC_Location &location, MC_Particle &mc_particle, MonteCarlo* monteCarlo) - -{ - MC_Domain &domain = monteCarlo->domain[location.domain]; - - Subfacet_Adjacency &adjacency =domain.mesh._cellConnectivity[location.cell]._facet[location.facet].subfacet; - - return adjacency; -} - -HOST_DEVICE_END diff --git a/src/MC_Domain.cc b/src/MC_Domain.cc deleted file mode 100644 index ceb418be..00000000 --- a/src/MC_Domain.cc +++ /dev/null @@ -1,473 +0,0 @@ -#include "MC_Domain.hh" -#include -#include -#include -#include - -#include -using std::cout; -using std::endl; - -#include "Globals.hh" -#include "MonteCarlo.hh" -#include "MC_Cell_State.hh" -#include "macros.hh" -#include "MC_RNG_State.hh" -#include "PhysicalConstants.hh" -#include "MeshPartition.hh" -#include "GlobalFccGrid.hh" -#include "DecompositionObject.hh" -#include "MC_Facet_Adjacency.hh" -#include "Parameters.hh" -#include "MaterialDatabase.hh" -#include "MCT.hh" - -using std::vector; -using std::make_pair; -using std::map; -using std::abs; -using std::string; - -namespace -{ - struct FaceInfo - { - MC_Subfacet_Adjacency_Event::Enum _event; - CellInfo _cellInfo; - int _nbrIndex; - }; - - - int nodeIndirect[24][3] = { {1, 3, 8}, {3, 7, 8}, {7, 5, 8}, {5, 1, 8}, - {0, 4, 9}, {4, 6, 9}, {6, 2, 9}, {2, 0, 9}, - {3, 2,10}, {2, 6,10}, {6, 7,10}, {7, 3,10}, - {0, 1,11}, {1, 5,11}, {5, 4,11}, {4, 0,11}, - {4, 5,12}, {5, 7,12}, {7, 6,12}, {6, 4,12}, - {0, 2,13}, {2, 3,13}, {3, 1,13}, {1, 0,13} }; - - int opposingFacet[24] = { 7, 6, 5, 4, 3, 2, 1, 0, 12, 15, - 14, 13, 8, 11, 10, 9, 20, 23, 22, 21, - 16, 19, 18, 17}; - - - void bootstrapNodeMap(map& nodeIndexMap, - const MeshPartition& partition, - const GlobalFccGrid& grid); - - void buildCells(qs_vector& cell, - BulkStorage& facetStore, - BulkStorage& pointStore, - const map& nodeIndexMap, - const qs_vector& nbrDomain, - const MeshPartition& partition, - const GlobalFccGrid& grid, - const qs_vector& boundaryCondition); - - void makeFacet(MC_Facet_Adjacency& facet, - const MC_Location& location, - int* nodeIndex, - const vector& faceInfo); - - string findMaterial(const Parameters& params, const MC_Vector& rr); - - qs_vector getBoundaryCondition(const Parameters& params); -} - - -MC_Mesh_Domain::MC_Mesh_Domain(const MeshPartition& meshPartition, const GlobalFccGrid& grid, - const DecompositionObject& ddc, - const qs_vector& boundaryCondition) -: _domainGid(meshPartition.domainGid()) -{ - _nbrDomainGid.resize(meshPartition.nbrDomains().size()); - for (unsigned ii=0; ii< _nbrDomainGid.size(); ++ii) - _nbrDomainGid[ii] = meshPartition.nbrDomains()[ii]; - - - - _nbrRank.reserve(_nbrDomainGid.size(), VAR_MEM); - _nbrRank.Open(); - for (unsigned ii=0; ii<_nbrDomainGid.size(); ++ii) - _nbrRank.push_back(ddc.getRank(_nbrDomainGid[ii])); - _nbrRank.Close(); - map nodeIndexMap; - - bootstrapNodeMap(nodeIndexMap, meshPartition, grid); - - - int totalCells = 0; - for (auto iter=meshPartition.begin(); iter!=meshPartition.end(); ++iter) - { - if (iter->second._domainGid != meshPartition.domainGid()) - continue; - ++totalCells; - } - - _connectivityFacetStorage.setCapacity(totalCells*24, VAR_MEM); - _connectivityPointStorage.setCapacity(totalCells*14, VAR_MEM); - - buildCells(_cellConnectivity, _connectivityFacetStorage, _connectivityPointStorage, - nodeIndexMap, _nbrDomainGid, meshPartition, grid, boundaryCondition); - - _node.resize(nodeIndexMap.size(), VAR_MEM); - - for (auto iter=nodeIndexMap.begin(); iter!=nodeIndexMap.end(); ++iter) - { - const Long64& iNodeGid = iter->first; - const int& iNodeIndex = iter->second; - _node[iNodeIndex] = grid.nodeCoord(iNodeGid); - } - - {//limit scope - // initialize _cellGeometry - _cellGeometry.resize(_cellConnectivity.size(), VAR_MEM); - - // First, we need to count up the total number of facets of all - // cells in this domain and initialize the BulkStorage - // of facets (i.e., MC_General_Plane). This code is somewhat - // pedantic since we know all of the cells have 24 facets. - int totalFacets = 0; - for (unsigned iCell=0; iCell<_cellConnectivity.size(); ++iCell) - totalFacets += _cellConnectivity[iCell].num_facets; - _geomFacetStorage.setCapacity(totalFacets, VAR_MEM); - - // Now initialize all of the facets. - for (unsigned iCell=0; iCell<_cellConnectivity.size(); ++iCell) - { - int nFacets = _cellConnectivity[iCell].num_facets; - qs_assert(nFacets == 24); - _cellGeometry[iCell]._facet = _geomFacetStorage.getBlock(nFacets); - _cellGeometry[iCell]._size = nFacets; - for (unsigned jFacet=0; jFacet& nodeIndexMap, - const MeshPartition& partition, - const GlobalFccGrid& grid) - { - map faceCenters; - vector nodeGid; - for (auto iter=partition.begin(); iter!=partition.end(); ++iter) - { - if (iter->second._domainGid != partition.domainGid()) - continue; // skip remote cells - const Long64& iCellGid = iter->first; - grid.getNodeGids(iCellGid, nodeGid); - for (unsigned ii=0; ii<8; ++ii) //yes, 8. Only corners. - nodeIndexMap.insert(make_pair(nodeGid[ii], nodeIndexMap.size())); - for (unsigned ii=8; ii<14; ++ii) // save face centers for later. - faceCenters.insert(make_pair(nodeGid[ii], faceCenters.size())); - } - for (auto iter=faceCenters.begin(); iter!=faceCenters.end(); ++iter) - iter->second += nodeIndexMap.size(); - - nodeIndexMap.insert(faceCenters.begin(), faceCenters.end()); - } -} - -namespace -{ - // Setting up the subfacet info is tricky because some data members - // of Subfacet_Adjacency don't always apply. - // * neighbor_index is meaningless for boundary facets and facets that - // are adjacent to cells on the same domain. We choose to set - // neighbor_index to -1 in these cases. - // * adjacent is meaningless for boundary facets. In these cases we - // set adjacent = current. - void buildCells(qs_vector& cell, - BulkStorage& facetStore, - BulkStorage& pointStore, - const map& nodeIndexMap, - const qs_vector& nbrDomain, - const MeshPartition& partition, - const GlobalFccGrid& grid, - const qs_vector& boundaryCondition) - - { - map nbrDomainIndex; // nbrDomainIndex[domainGid] = localNbrIndex; - - for (unsigned ii=0; ii nodeGid; - vector faceNbr; - if( cell.size() == 0 ) - { - cell.reserve(partition.size(), VAR_MEM); - } - cell.Open(); - for (auto iter=partition.begin(); iter!=partition.end(); ++iter) - { - if (iter->second._domainGid != partition.domainGid()) - continue; - - - const Long64& iCellGid = iter->first; - const int& domainIndex = iter->second._domainIndex; - const int& cellIndex = iter->second._cellIndex; - const int& foreman = iter->second._foreman; - qs_assert(domainIndex == partition.domainIndex()); - qs_assert(cellIndex == cell.size()); - - cell.push_back(MC_Facet_Adjacency_Cell()); - MC_Facet_Adjacency_Cell& newCell = cell.back(); - - - newCell._facet = facetStore.getBlock(newCell.num_facets); - newCell._point = pointStore.getBlock(newCell.num_points); - - - - - - - grid.getNodeGids(iCellGid, nodeGid); - for (unsigned ii=0; iisecond; - } - - vector faceInfo(6); - grid.getFaceNbrGids(iCellGid, faceNbr); - for (unsigned ii=0; ii<6; ++ii) - { - auto here = partition.findCell(faceNbr[ii]); - qs_assert(here != partition.end()); - const CellInfo& jCellInfo = here->second; - faceInfo[ii]._event = MC_Subfacet_Adjacency_Event::Adjacency_Undefined; - faceInfo[ii]._cellInfo = jCellInfo; - faceInfo[ii]._nbrIndex = nbrDomainIndex[jCellInfo._domainGid]; - if (faceNbr[ii] == iCellGid) - faceInfo[ii]._event = boundaryCondition[ii]; - else - { - if (jCellInfo._foreman == foreman) - faceInfo[ii]._event = MC_Subfacet_Adjacency_Event::Transit_On_Processor; - else - faceInfo[ii]._event = MC_Subfacet_Adjacency_Event::Transit_Off_Processor; -// if (jCellInfo._domainIndex != domainIndex && jCellInfo._foreman == foreman) -// faceInfo[ii]._event = MC_Subfacet_Adjacency_Event::Transit_On_Processor; -// if (jCellInfo._foreman != foreman) -// faceInfo[ii]._event = MC_Subfacet_Adjacency_Event::Transit_Off_Processor; - } - } - - MC_Location location(domainIndex, cellIndex, -1); - for (unsigned ii=0; ii& faceInfo) - { - const int& facetId = location.facet; - int faceId = facetId / 4; - - facet.num_points = 3; - facet.point[0] = nodeIndex[nodeIndirect[facetId][0]]; - facet.point[1] = nodeIndex[nodeIndirect[facetId][1]]; - facet.point[2] = nodeIndex[nodeIndirect[facetId][2]]; - facet.subfacet.event = faceInfo[faceId]._event; - facet.subfacet.current = location; - facet.subfacet.adjacent.domain = faceInfo[faceId]._cellInfo._domainIndex; - facet.subfacet.adjacent.cell = faceInfo[faceId]._cellInfo._cellIndex; - facet.subfacet.adjacent.facet = opposingFacet[facetId]; - facet.subfacet.neighbor_index = faceInfo[faceId]._nbrIndex; - facet.subfacet.neighbor_global_domain = faceInfo[faceId]._cellInfo._domainGid; - facet.subfacet.neighbor_foreman = faceInfo[faceId]._cellInfo._foreman; - - // handle special case - if (facet.subfacet.event == MC_Subfacet_Adjacency_Event::Boundary_Reflection || - facet.subfacet.event == MC_Subfacet_Adjacency_Event::Boundary_Escape ) - facet.subfacet.adjacent.facet = facet.subfacet.current.facet; - } -} - -MC_Vector findCellCenter(const MC_Facet_Adjacency_Cell& cell, - const qs_vector& node) -{ - // find center of cell - MC_Vector cellCenter(0., 0., 0.); - for ( int iter=0; iter < cell.num_points; iter++) - cellCenter += node[cell._point[iter]]; - cellCenter /= cell.num_points; - return cellCenter; -} - - -// This is messed up. Why doesn't either the cell or the mesh have a -// member function to compute the volume? -double cellVolume(const MC_Facet_Adjacency_Cell& cell, - const qs_vector& node) -{ - // find center of cell - MC_Vector cellCenter(0., 0., 0.); - for ( int iter=0; iter < cell.num_points; iter++) - cellCenter += node[cell._point[iter]]; - cellCenter /= cell.num_points; - - double volume = 0; - for (unsigned iFacet=0; iFacet(materialDatabase._mat[cell_state[ii]._material]._iso.size()); - // The cellNumberDensity scales the crossSections so we choose to - // set this density to 1.0 so that the totalCrossSection will be - // as requested by the user. - cell_state[ii]._cellNumberDensity = 1.0; - - MC_Vector cellCenter = findCellCenter(mesh._cellConnectivity[ii], mesh._node); - cell_state[ii]._id = grid.whichCell(cellCenter) * UINT64_C(0x0100000000); - cell_state[ii]._sourceTally = 0; - } - -} - -void MC_Domain::clearCrossSectionCache(int numEnergyGroups) -{ - for (unsigned ii=0; ii= geom.xMin && rr.x <= geom.xMax) && - (rr.y >= geom.yMin && rr.y <= geom.yMax) && - (rr.z >= geom.zMin && rr.z <= geom.zMax) ) - inside = true; - } - break; - case GeometryParameters::SPHERE: - { - MC_Vector center(geom.xCenter, geom.yCenter, geom.zCenter); - if ( (rr-center).Length() <= geom.radius) - inside = true; - } - - break; - default: - qs_assert(false); - } - return inside; - } -} - - -// Returns the name of the material present at coordinate rr. If -// multiple materials overlap return the last material found. -namespace -{ - string findMaterial(const Parameters& params, const MC_Vector& rr) - { - string materialName; - for (unsigned ii=0; ii< params.geometryParams.size(); ++ii) - if (isInside(params.geometryParams[ii], rr)) - materialName = params.geometryParams[ii].materialName; - - qs_assert(materialName.size() > 0); - return materialName; - } -} - - -namespace -{ - qs_vector getBoundaryCondition(const Parameters& params) - { - qs_vector bc(6); - if (params.simulationParams.boundaryCondition == "reflect") - bc = qs_vector(6, MC_Subfacet_Adjacency_Event::Boundary_Reflection); - else if (params.simulationParams.boundaryCondition == "escape") - bc = qs_vector(6, MC_Subfacet_Adjacency_Event::Boundary_Escape); - else if (params.simulationParams.boundaryCondition == "octant") - for (unsigned ii=0; ii<6; ++ii) - { - if (ii % 2 == 0) bc[ii] = MC_Subfacet_Adjacency_Event::Boundary_Escape; - if (ii % 2 == 1) bc[ii] = MC_Subfacet_Adjacency_Event::Boundary_Reflection; - } - else - qs_assert(false); - return bc; - } -} - diff --git a/src/MC_Facet_Crossing_Event.cc b/src/MC_Facet_Crossing_Event.cc deleted file mode 100644 index 2f0bf1b9..00000000 --- a/src/MC_Facet_Crossing_Event.cc +++ /dev/null @@ -1,72 +0,0 @@ -#include "MC_Facet_Crossing_Event.hh" -#include "ParticleVaultContainer.hh" -#include "ParticleVault.hh" -#include "MC_Domain.hh" -#include "Tallies.hh" -#include "MC_Particle.hh" -#include "MC_Facet_Adjacency.hh" -#include "Globals.hh" -#include "MCT.hh" -#include "MC_Particle_Buffer.hh" -#include "DeclareMacro.hh" -#include "macros.hh" -#include "SendQueue.hh" - -//---------------------------------------------------------------------------------------------------------------------- -// Determines whether the particle has been tracked to a facet such that it: -// (i) enters into an adjacent cell -// (ii) escapes across the system boundary (Vacuum BC), or -// (iii) reflects off of the system boundary (Reflection BC). -// -//---------------------------------------------------------------------------------------------------------------------- - -HOST_DEVICE - -MC_Tally_Event::Enum MC_Facet_Crossing_Event(MC_Particle &mc_particle, MonteCarlo* monteCarlo, int particle_index, ParticleVault* processingVault) -{ - MC_Location location = mc_particle.Get_Location(); - - Subfacet_Adjacency &facet_adjacency = MCT_Adjacent_Facet(location, mc_particle, monteCarlo); - - if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_On_Processor ) - { - // The particle will enter into an adjacent cell. - mc_particle.domain = facet_adjacency.adjacent.domain; - mc_particle.cell = facet_adjacency.adjacent.cell; - mc_particle.facet = facet_adjacency.adjacent.facet; - mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Transit_Exit; - } - else if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Escape ) - { - // The particle will escape across the system boundary. - mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Escape; - } - else if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Boundary_Reflection ) - { - // The particle will reflect off of the system boundary. - mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Reflection; - } - else if ( facet_adjacency.event == MC_Subfacet_Adjacency_Event::Transit_Off_Processor ) - { - // The particle will enter into an adjacent cell on a spatial neighbor. - // The neighboring domain is on another processor. Set domain local domain on neighbor proc - - mc_particle.domain = facet_adjacency.adjacent.domain; - mc_particle.cell = facet_adjacency.adjacent.cell; - mc_particle.facet = facet_adjacency.adjacent.facet; - mc_particle.last_event = MC_Tally_Event::Facet_Crossing_Communication; - - // Select particle buffer - int neighbor_rank = monteCarlo->domain[facet_adjacency.current.domain].mesh._nbrRank[facet_adjacency.neighbor_index]; - - processingVault->putParticle( mc_particle, particle_index ); - - //Push neighbor rank and mc_particle onto the send queue - monteCarlo->_particleVaultContainer->getSendQueue()->push( neighbor_rank, particle_index ); - - } - - return mc_particle.last_event; -} - -HOST_DEVICE_END diff --git a/src/MC_Fast_Timer.cc b/src/MC_Fast_Timer.cc deleted file mode 100644 index 70aa7bf9..00000000 --- a/src/MC_Fast_Timer.cc +++ /dev/null @@ -1,161 +0,0 @@ -#include "MC_Fast_Timer.hh" -#include -#include "MonteCarlo.hh" -#include "MC_Processor_Info.hh" -#include "Globals.hh" -#include "portability.hh" - -const char *mc_fast_timer_names[MC_Fast_Timer::Num_Timers] = -{ - "main", - "cycleInit", - "cycleTracking", - "cycleTracking_Kernel", - "cycleTracking_MPI", - "cycleTracking_Test_Done", - "cycleFinalize" -}; - -static double mc_std_dev(uint64_t const data[], int const nelm); - -static double mc_std_dev(uint64_t const data[], int const nelm) -{ - uint64_t mean=0.0, sum_deviation=0.0; - - for(int ndx=0; ndx cumulativeClock(MC_Fast_Timer::Num_Timers); - std::vector max_clock(MC_Fast_Timer::Num_Timers); - std::vector min_clock(MC_Fast_Timer::Num_Timers); - std::vector sum_clock(MC_Fast_Timer::Num_Timers); - std::vector std_dev_use(num_ranks); // used to calculate standard deviation - - for ( int timer_index = 0; timer_index < MC_Fast_Timer::Num_Timers; timer_index++ ) - { cumulativeClock[timer_index] = this->timers[timer_index].cumulativeClock; } - - mpiReduce(&cumulativeClock[0], &max_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_MAX, 0, comm_world); - mpiReduce(&cumulativeClock[0], &min_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_MIN, 0, comm_world); - mpiReduce(&cumulativeClock[0], &sum_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_SUM, 0, comm_world); - - this->Print_Cumulative_Heading(mpi_rank); - - for ( int timer_index = 0; timer_index < MC_Fast_Timer::Num_Timers; timer_index++ ) - { - mpiGather(&cumulativeClock[timer_index], 1, MPI_UINT64_T, &std_dev_use[0], 1, MPI_UINT64_T, 0, comm_world); - - uint64_t ave_clock = sum_clock[timer_index] / num_ranks; - if (mpi_rank == 0) { - fprintf(stdout,"%-25s %12lu %12.3e %12.3e %12.3e %12.3e %12.2f\n", - mc_fast_timer_names[timer_index], - (unsigned long)this->timers[timer_index].numCalls, - (double)min_clock[timer_index], - (double)ave_clock, - (double)max_clock[timer_index], - (double)mc_std_dev(&std_dev_use[0], num_ranks), - (100.0 * ave_clock) / (max_clock[timer_index] + 1.0e-80) ); - } - } - if( mpi_rank == 0 ) - { - int cycleTracking_Index = 2; - fprintf(stdout, "%-25s %12.3e %-25s\n", - "Figure Of Merit", - (numSegments / (max_clock[cycleTracking_Index]*1e-6)), - "[Num Segments / Cycle Tracking Time]" ); - } -} - -void MC_Fast_Timer_Container::Last_Cycle_Report(int report_time, int mpi_rank, int num_ranks, MPI_Comm comm_world) -{ -#ifdef DISABLE_TIMERS - return; -#endif - - if(report_time == 1) - { - fflush(stdout); mpiBarrier(comm_world); - - std::vector lastCycleClock(MC_Fast_Timer::Num_Timers); - std::vector max_clock(MC_Fast_Timer::Num_Timers); - std::vector min_clock(MC_Fast_Timer::Num_Timers); - std::vector sum_clock(MC_Fast_Timer::Num_Timers); - std::vector std_dev_use(num_ranks); // used to calculate standard deviation - - for ( int timer_index = 0; timer_index < MC_Fast_Timer::Num_Timers; timer_index++ ) - { - lastCycleClock[timer_index] = this->timers[timer_index].lastCycleClock; - } - - mpiReduce(&lastCycleClock[0], &max_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_MAX, 0, comm_world); - mpiReduce(&lastCycleClock[0], &min_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_MIN, 0, comm_world); - mpiReduce(&lastCycleClock[0], &sum_clock[0], MC_Fast_Timer::Num_Timers, MPI_UINT64_T, MPI_SUM, 0, comm_world); - - this->Print_Last_Cycle_Heading(mpi_rank); - - for ( int timer_index = 0; timer_index < MC_Fast_Timer::Num_Timers; timer_index++ ) - { - mpiGather(&lastCycleClock[timer_index], 1, MPI_UINT64_T, &std_dev_use[0], 1, MPI_UINT64_T, 0, comm_world); - - uint64_t ave_clock = sum_clock[timer_index] / num_ranks; - if (mpi_rank == 0) { - fprintf(stdout,"%-25s %12lu %12.3e %12.3e %12.3e %12.3e %12.2f\n", - mc_fast_timer_names[timer_index], - (unsigned long)this->timers[timer_index].numCalls, - (double)min_clock[timer_index], - (double)ave_clock, - (double)max_clock[timer_index], - (double)mc_std_dev(&std_dev_use[0], num_ranks), - (100.0 * ave_clock) / (max_clock[timer_index] + 1.0e-80) ); - } - } - } - Clear_Last_Cycle_Timers(); -} - -void MC_Fast_Timer_Container::Clear_Last_Cycle_Timers() -{ - for ( int timer_index = 0; timer_index < MC_Fast_Timer::Num_Timers; timer_index++ ) - { - this->timers[timer_index].lastCycleClock = 0; - } -} - diff --git a/src/MC_Load_Particle.cc b/src/MC_Load_Particle.cc deleted file mode 100644 index cc54ea15..00000000 --- a/src/MC_Load_Particle.cc +++ /dev/null @@ -1,31 +0,0 @@ -#include "ParticleVault.hh" -#include "MC_Particle.hh" -#include "MC_Time_Info.hh" -#include "DeclareMacro.hh" - -//---------------------------------------------------------------------------------------------------------------------- -// Copies a single particle from the particle-vault data structure into the active-particle data structure. -//---------------------------------------------------------------------------------------------------------------------- - -HOST_DEVICE -void MC_Load_Particle(MonteCarlo *monteCarlo, MC_Particle &mc_particle, ParticleVault *particleVault, int particle_index) -{ - //particleVault.popParticle(mc_particle); - particleVault->getParticle(mc_particle, particle_index); - - // Time to Census - if ( mc_particle.time_to_census <= 0.0 ) - { - mc_particle.time_to_census += monteCarlo->time_info->time_step; - } - - // Age - if (mc_particle.age < 0.0) { mc_particle.age = 0.0; } - - // Energy Group - mc_particle.energy_group = monteCarlo->_nuclearData->getEnergyGroup(mc_particle.kinetic_energy); -// printf("file=%s line=%d\n",__FILE__,__LINE__); - -} -HOST_DEVICE_END - diff --git a/src/MC_Location.cc b/src/MC_Location.cc deleted file mode 100644 index 9d186a06..00000000 --- a/src/MC_Location.cc +++ /dev/null @@ -1,14 +0,0 @@ -#include "MC_Location.hh" -#include "MonteCarlo.hh" -#include "MC_Domain.hh" -#include "DeclareMacro.hh" - -// Return a reference to the domain for this location. - -HOST_DEVICE -const MC_Domain &MC_Location::get_domain(MonteCarlo *mcco) const -{ - return mcco->domain[domain]; -} - -HOST_DEVICE_END diff --git a/src/MC_RNG_State.cc b/src/MC_RNG_State.cc deleted file mode 100644 index 96c37c7e..00000000 --- a/src/MC_RNG_State.cc +++ /dev/null @@ -1,115 +0,0 @@ -#include "MC_RNG_State.hh" -#include "DeclareMacro.hh" - -//---------------------------------------------------------------------------// - -namespace -{ -HOST_DEVICE - // Break a 64 bit state into 2 32 bit ints. - void breakup_uint64( uint64_t uint64_in, - uint32_t& front_bits, uint32_t& back_bits ) - { - front_bits = static_cast( uint64_in >> 32 ); - back_bits = static_cast( uint64_in & 0xffffffff ); - } -HOST_DEVICE_END -} - -//---------------------------------------------------------------------------// - -namespace -{ - // Function sed to hash a 64 bit int into another, unrelated one. It - // does this in two 32 bit chuncks. This function uses the algorithm - // from Numerical Recipies in C, 2nd edition: psdes, p. 302. This is - // used to make 64 bit numbers for use as initial states for the 64 - // bit lcg random number generator. -HOST_DEVICE - void pseudo_des( uint32_t& lword, uint32_t& irword ) - { - // This random number generator assumes that type uint32_t is a 32 bit int - // = 1/2 of a 64 bit int. The sizeof operator returns the size in bytes = 8 bits. - - const int NITER = 2; - const uint32_t c1[] = { 0xbaa96887L, 0x1e17d32cL, 0x03bcdc3cL, 0x0f33d1b2L }; - const uint32_t c2[] = { 0x4b0f3b58L, 0xe874f0c3L, 0x6955c5a6L, 0x55a7ca46L}; - - uint32_t ia,ib,iswap,itmph=0,itmpl=0; - - for( int i = 0; i < NITER; i++) - { - ia = ( iswap = irword ) ^ c1[i]; - itmpl = ia & 0xffff; - itmph = ia >> 16; - ib = itmpl*itmpl+ ~(itmph*itmph); - - irword = lword ^ (((ia = (ib >> 16) | - ((ib & 0xffff) << 16)) ^ c2[i])+itmpl*itmph); - - lword=iswap; - } - } -HOST_DEVICE_END -} - -//---------------------------------------------------------------------------// - -namespace -{ - - HOST_DEVICE - // Function used to reconstruct a 64 bit from 2 32 bit ints. - uint64_t reconstruct_uint64( uint32_t front_bits, uint32_t back_bits ) - { - uint64_t reconstructed, temp; - reconstructed = static_cast( front_bits ); - temp = static_cast( back_bits ); - - // shift first bits 32 bits to left - reconstructed = reconstructed << 32; - - // temp must be masked to kill leading 1's. Then 'or' with reconstructed - // to get the last bits in - reconstructed |= (temp & 0x00000000ffffffff); - - return reconstructed; - } - HOST_DEVICE_END -} - -//---------------------------------------------------------------------------// - -namespace -{ -HOST_DEVICE - // Function used to hash a 64 bit int to get an initial state. - uint64_t hash_state( uint64_t initial_number ) - { - // break initial number apart into 2 32 bit ints - uint32_t front_bits, back_bits; - breakup_uint64( initial_number, front_bits, back_bits ); - - // hash the bits - pseudo_des( front_bits, back_bits ); - - // put the hashed parts together into 1 64 bit int - return reconstruct_uint64( front_bits, back_bits ); - } -HOST_DEVICE_END -} - -//---------------------------------------------------------------------------------------------------------------------- -// This routine spawns a "child" random number seed from a "parent" random number seed. -//---------------------------------------------------------------------------------------------------------------------- - -HOST_DEVICE -uint64_t rngSpawn_Random_Number_Seed(uint64_t *parent_seed) -{ - uint64_t spawned_seed = hash_state(*parent_seed); - // Bump the parent seed as that is what is expected from the interface. - rngSample(parent_seed); - return spawned_seed; -} - -HOST_DEVICE_END diff --git a/src/MC_Segment_Outcome.cc b/src/MC_Segment_Outcome.cc deleted file mode 100644 index 81325697..00000000 --- a/src/MC_Segment_Outcome.cc +++ /dev/null @@ -1,249 +0,0 @@ -#include "MC_Segment_Outcome.hh" -#include "MC_Nearest_Facet.hh" -#include "MC_Location.hh" -#include "MonteCarlo.hh" -#include "Globals.hh" -#include "MC_Particle.hh" -#include "MC_RNG_State.hh" -#include "MC_Cell_State.hh" -#include "Tallies.hh" -#include "utils.hh" -#include "macros.hh" -#include "MacroscopicCrossSection.hh" -#include "MCT.hh" -#include "PhysicalConstants.hh" -#include "DeclareMacro.hh" - - -HOST_DEVICE -static inline unsigned int MC_Find_Min(const double *array, - int num_elements); -HOST_DEVICE_END - -//-------------------------------------------------------------------------------------------------- -// Routine MC_Segment_Outcome determines whether the next segment of the particle's trajectory will result in: -// (i) collision within the current cell, -// (ii) exiting from the current cell, or -// (iii) census at the end of the time step. -//-------------------------------------------------------------------------------------------------- - -HOST_DEVICE -MC_Segment_Outcome_type::Enum MC_Segment_Outcome(MonteCarlo* monteCarlo, MC_Particle &mc_particle, unsigned int &flux_tally_index) -{ - // initialize distances to large number - int number_of_events = 3; - double distance[3]; - distance[0] = distance[1] = distance[2] = 1e80; - - // Calculate the particle speed - double particle_speed = mc_particle.Get_Velocity()->Length(); - - // Force collision if a census event narrowly preempts a collision - int force_collision = 0 ; - if ( mc_particle.num_mean_free_paths < 0.0 ) - { - force_collision = 1 ; - - if ( mc_particle.num_mean_free_paths > -900.0 ) - { -#if 1 - printf(" MC_Segment_Outcome: mc_particle.num_mean_free_paths > -900.0 \n"); - #else - std::string output_string; - MC_Warning( "Forced Collision: num_mean_free_paths < 0 \n" - "Particle record:\n%s", output_string.c_str()); -#endif - } - - mc_particle.num_mean_free_paths = PhysicalConstants::_smallDouble; - } - - // Randomly determine the distance to the next collision - // based upon the composition of the current cell. - double macroscopic_total_cross_section = weightedMacroscopicCrossSection(monteCarlo, 0, - mc_particle.domain, mc_particle.cell, mc_particle.energy_group); - - // Cache the cross section - mc_particle.totalCrossSection = macroscopic_total_cross_section; - if (macroscopic_total_cross_section == 0.0) - { - mc_particle.mean_free_path = PhysicalConstants::_hugeDouble; - } - else - { - mc_particle.mean_free_path = 1.0 / macroscopic_total_cross_section; - } - - if ( mc_particle.num_mean_free_paths == 0.0) - { - // Sample the number of mean-free-paths remaining before - // the next collision from an exponential distribution. - double random_number = rngSample(&mc_particle.random_number_seed); - - mc_particle.num_mean_free_paths = -1.0*log(random_number); - } - - // Calculate the distances to collision, nearest facet, and census. - - // Forced collisions do not need to move far. - if (force_collision) - { - distance[MC_Segment_Outcome_type::Collision] = PhysicalConstants::_smallDouble; - } - else - { - distance[MC_Segment_Outcome_type::Collision] = mc_particle.num_mean_free_paths*mc_particle.mean_free_path; - } - - // process census - distance[MC_Segment_Outcome_type::Census] = particle_speed*mc_particle.time_to_census; - - - // DEBUG Turn off threshold for now - double distance_threshold = 10.0 * PhysicalConstants::_hugeDouble; - // Get the current winning distance. - double current_best_distance = PhysicalConstants::_hugeDouble; - - DirectionCosine *direction_cosine = mc_particle.Get_Direction_Cosine(); - - bool new_segment = (mc_particle.num_segments == 0 || - mc_particle.last_event == MC_Tally_Event::Collision); - - MC_Location location(mc_particle.Get_Location()); - - // Calculate the minimum distance to each facet of the cell. - MC_Nearest_Facet nearest_facet; - nearest_facet = MCT_Nearest_Facet(&mc_particle, location, mc_particle.coordinate, - direction_cosine, distance_threshold, current_best_distance, new_segment, monteCarlo); - - mc_particle.normal_dot = nearest_facet.dot_product; - - distance[MC_Segment_Outcome_type::Facet_Crossing] = nearest_facet.distance_to_facet; - - - // Get out of here if the tracker failed to bound this particle's volume. - if (mc_particle.last_event == MC_Tally_Event::Facet_Crossing_Tracking_Error) - { - return MC_Segment_Outcome_type::Facet_Crossing; - } - - // Calculate the minimum distance to the selected events. - - // Force a collision (if required). - if ( force_collision == 1 ) - { - distance[MC_Segment_Outcome_type::Facet_Crossing] = PhysicalConstants::_hugeDouble; - distance[MC_Segment_Outcome_type::Census] = PhysicalConstants::_hugeDouble; - distance[MC_Segment_Outcome_type::Collision] = PhysicalConstants::_tinyDouble ; - } - - // we choose our segment outcome here - MC_Segment_Outcome_type::Enum segment_outcome = - (MC_Segment_Outcome_type::Enum) MC_Find_Min(distance, number_of_events); - - if (distance[segment_outcome] < 0) - { - MC_Fatal_Jump( "Negative distances to events are NOT permitted!\n" - "identifier = %" PRIu64 "\n" - "(Collision = %g,\n" - " Facet Crossing = %g,\n" - " Census = %g,\n", - mc_particle.identifier, - distance[MC_Segment_Outcome_type::Collision], - distance[MC_Segment_Outcome_type::Facet_Crossing], - distance[MC_Segment_Outcome_type::Census]); - } - mc_particle.segment_path_length = distance[segment_outcome]; - - mc_particle.num_mean_free_paths -= mc_particle.segment_path_length / mc_particle.mean_free_path; - - // Before using segment_outcome as an index, verify it is valid - if (segment_outcome < 0 || segment_outcome >= MC_Segment_Outcome_type::Max_Number) - { - MC_Fatal_Jump( "segment_outcome '%d' is invalid\n", (int)segment_outcome ); - } - - MC_Tally_Event::Enum SegmentOutcome_to_LastEvent[MC_Segment_Outcome_type::Max_Number] = - { - MC_Tally_Event::Collision, - MC_Tally_Event::Facet_Crossing_Transit_Exit, - MC_Tally_Event::Census, - }; - - mc_particle.last_event = SegmentOutcome_to_LastEvent[segment_outcome]; - - // Set the segment path length to be the minimum of - // (i) the distance to collision in the cell, or - // (ii) the minimum distance to a facet of the cell, or - // (iii) the distance to census at the end of the time step - if (segment_outcome == MC_Segment_Outcome_type::Collision) - { - mc_particle.num_mean_free_paths = 0.0; - } - else if (segment_outcome == MC_Segment_Outcome_type::Facet_Crossing) - { - mc_particle.facet = nearest_facet.facet; - } - else if (segment_outcome == MC_Segment_Outcome_type::Census) - { - mc_particle.time_to_census = MC_MIN(mc_particle.time_to_census, 0.0); - } - - // If collision was forced, set mc_particle.num_mean_free_paths = 0 - // so that a new value is randomly selected on next pass. - if (force_collision == 1) { mc_particle.num_mean_free_paths = 0.0; } - - // Do not perform any tallies if the segment path length is zero. - // This only introduces roundoff errors. - if (mc_particle.segment_path_length == 0.0) - { - return segment_outcome; - } - - // Move particle to end of segment, accounting for some physics processes along the segment. - - // Project the particle trajectory along the segment path length. - mc_particle.Move_Particle(mc_particle.direction_cosine, mc_particle.segment_path_length); - - double segment_path_time = (mc_particle.segment_path_length/particle_speed); - - // Decrement the time to census and increment age. - mc_particle.time_to_census -= segment_path_time; - mc_particle.age += segment_path_time; - - // Ensure mc_particle.time_to_census is non-negative. - if (mc_particle.time_to_census < 0.0) - { - mc_particle.time_to_census = 0.0; - } - - // Accumulate the particle's contribution to the scalar flux. - monteCarlo->_tallies->TallyScalarFlux(mc_particle.segment_path_length * mc_particle.weight, mc_particle.domain, - flux_tally_index, mc_particle.cell, mc_particle.energy_group); - - return segment_outcome; -} -HOST_DEVICE_END - - - - -HOST_DEVICE -static inline unsigned int MC_Find_Min(const double *array, - int num_elements) -{ - double min = array[0]; - int min_index = 0; - - for (int element_index = 1; element_index < num_elements; ++element_index) - { - if ( array[element_index] < min ) - { - min = array[element_index]; - min_index = element_index; - } - } - - return min_index; -} -HOST_DEVICE_END diff --git a/src/MC_SourceNow.cc b/src/MC_SourceNow.cc deleted file mode 100644 index 55336c97..00000000 --- a/src/MC_SourceNow.cc +++ /dev/null @@ -1,178 +0,0 @@ -#include "MC_SourceNow.hh" -#include "QS_Vector.hh" -#include -#include "utils.hh" -#include "utilsMpi.hh" -#include "MonteCarlo.hh" -#include "MaterialDatabase.hh" -#include "initMC.hh" -#include "Tallies.hh" -#include "ParticleVaultContainer.hh" -#include "ParticleVault.hh" -#include "MC_Processor_Info.hh" -#include "MC_Cell_State.hh" -#include "MC_Time_Info.hh" -#include "MCT.hh" -#include "PhysicalConstants.hh" -#include "macros.hh" -#include "QS_atomics.hh" -#include "NVTX_Range.hh" -#include - -namespace -{ - double Get_Speed_From_Energy(double energy); -} - - -void MC_SourceNow(MonteCarlo *monteCarlo) -{ - NVTX_Range range("MC_Source_Now"); - - std::vector source_rate(monteCarlo->_materialDatabase->_mat.size()); // Get this from user input - - for ( int material_index = 0; material_index < monteCarlo->_materialDatabase->_mat.size(); material_index++ ) - { - std::string name = monteCarlo->_materialDatabase->_mat[material_index]._name; - double sourceRate = monteCarlo->_params.materialParams[name].sourceRate; - source_rate[material_index] = sourceRate; - } - - double local_weight_particles = 0; - - for ( int domain_index = 0; domain_index < monteCarlo->domain.size(); domain_index++ ) - { - MC_Domain &domain = monteCarlo->domain[domain_index]; - - for ( int cell_index = 0; cell_index < domain.cell_state.size(); cell_index++ ) - { - MC_Cell_State &cell = domain.cell_state[cell_index]; - double cell_weight_particles = cell._volume * source_rate[cell._material] * monteCarlo->time_info->time_step; - local_weight_particles += cell_weight_particles; - } - } - - double total_weight_particles = 0; - - mpiAllreduce(&local_weight_particles, &total_weight_particles, 1, MPI_DOUBLE, MPI_SUM, monteCarlo->processor_info->comm_mc_world); - - uint64_t num_particles = monteCarlo->_params.simulationParams.nParticles; - double source_fraction = 0.1; - double source_particle_weight = total_weight_particles/(source_fraction * num_particles); - // Store the source particle weight for later use. - monteCarlo->source_particle_weight = source_particle_weight; - - uint64_t vault_size = monteCarlo->_particleVaultContainer->getVaultSize(); - uint64_t processing_index = monteCarlo->_particleVaultContainer->sizeProcessing() / vault_size; - - uint64_t task_index = 0; - uint64_t particle_count = 0; - - // Compute the partial sums on each mpi process. - // uint64_t local_num_particles = (int)(local_weight_particles / source_particle_weight); - - for ( int domain_index = 0; domain_index < monteCarlo->domain.size(); domain_index++ ) - { - MC_Domain &domain = monteCarlo->domain[domain_index]; - - for ( int cell_index = 0; cell_index < domain.cell_state.size(); cell_index++ ) - { - MC_Cell_State &cell = domain.cell_state[cell_index]; - double cell_weight_particles = cell._volume * source_rate[cell._material] * monteCarlo->time_info->time_step; - double cell_num_particles_float = cell_weight_particles / source_particle_weight; - int cell_num_particles = (int)cell_num_particles_float; - - //Can Make this parallel - have an optimization from Leopold to add still - for ( int particle_index = 0; particle_index < cell_num_particles; particle_index++ ) - { - MC_Particle particle; - - uint64_t random_number_seed; - - QS::atomicCaptureAdd( cell._sourceTally, UINT64_C(1), random_number_seed ); - - random_number_seed += cell._id; - - particle.random_number_seed = rngSpawn_Random_Number_Seed(&random_number_seed); - particle.identifier = random_number_seed; - - MCT_Generate_Coordinate_3D_G(&particle.random_number_seed, domain_index, cell_index, particle.coordinate, monteCarlo); - - particle.direction_cosine.Sample_Isotropic(&particle.random_number_seed); - - // sample energy uniformly from [eMin, eMax] MeV - particle.kinetic_energy = (monteCarlo->_params.simulationParams.eMax - monteCarlo->_params.simulationParams.eMin)* - rngSample(&particle.random_number_seed) + monteCarlo->_params.simulationParams.eMin; - - double speed = Get_Speed_From_Energy(particle.kinetic_energy); - - particle.velocity.x = speed * particle.direction_cosine.alpha; - particle.velocity.y = speed * particle.direction_cosine.beta; - particle.velocity.z = speed * particle.direction_cosine.gamma; - - particle.domain = domain_index; - particle.cell = cell_index; - particle.task = task_index; - particle.weight = source_particle_weight; - - double randomNumber = rngSample(&particle.random_number_seed); - particle.num_mean_free_paths = -1.0*log(randomNumber); - - randomNumber = rngSample(&particle.random_number_seed); - particle.time_to_census = monteCarlo->time_info->time_step * randomNumber; - - MC_Base_Particle base_particle( particle ); - - monteCarlo->_particleVaultContainer->addProcessingParticle( base_particle, processing_index ); - - particle_count++; - - QS::atomicIncrement( monteCarlo->_tallies->_balanceTask[particle.task]._source); - } - } - } - -#if 0 - // Check for duplicate particle random number seeds. - std::vector particle_seeds; - int task_index = 0; - //for ( int task_index = 0; task_index < num_threads; task_index++ ) - { - ParticleVault& particleVault = monteCarlo->_particleVaultContainer->getTaskProcessingVault(task_index); - - uint64_t currentNumParticles = particleVault.size(); - for (int particleIndex = 0; particleIndex < currentNumParticles; particleIndex++) - { - MC_Base_Particle ¤tParticle = particleVault[particleIndex]; - particle_seeds.push_back(currentParticle.random_number_seed); - } - } - - std::sort(particle_seeds.begin(), particle_seeds.end()); - uint64_t num_dupl = 0; - for (size_t pi_index = 0; pi_indexdomain[domainIndex].cell_state[cellIndex]._material; - - double atomFraction = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._atomFraction; - - double microscopicCrossSection = 0.0; - // The cell number density is the fraction of the atoms in cell - // volume of this isotope. We set this (elsewhere) to 1/nIsotopes. - // This is a statement that we treat materials as if all of their - // isotopes are present in equal amounts - double cellNumberDensity = monteCarlo->domain[domainIndex].cell_state[cellIndex]._cellNumberDensity; - - int isotopeGid = monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso[isoIndex]._gid; - if ( atomFraction == 0.0 || cellNumberDensity == 0.0) { return 1e-20; } - - if (reactionIndex < 0) - { - // Return total cross section - microscopicCrossSection = monteCarlo->_nuclearData->getTotalCrossSection(isotopeGid, energyGroup); - } - else - { - // Return the reaction cross section - microscopicCrossSection = monteCarlo->_nuclearData->getReactionCrossSection((unsigned int)reactionIndex, - isotopeGid, energyGroup); - } - - return atomFraction * cellNumberDensity * microscopicCrossSection; - -} -HOST_DEVICE_END - - -//---------------------------------------------------------------------------------------------------------------------- -// Routine weightedMacroscopicCrossSection calculates the number-density-weighted -// macroscopic cross section of the collection of isotopes in a cell. -//dfr Weighted is a bit of a misnomer here, since there is no weighting -//applied by this routine. In Mercury we would weight for multiple -//materials in a cell. -//---------------------------------------------------------------------------------------------------------------------- -HOST_DEVICE -double weightedMacroscopicCrossSection(MonteCarlo* monteCarlo, int taskIndex, int domainIndex, - int cellIndex, int energyGroup) -{ - double* precomputedCrossSection = - &monteCarlo->domain[domainIndex].cell_state[cellIndex]._total[energyGroup]; - qs_assert (precomputedCrossSection != NULL); - if (*precomputedCrossSection > 0.0) - return *precomputedCrossSection; - - int globalMatIndex = monteCarlo->domain[domainIndex].cell_state[cellIndex]._material; - int nIsotopes = (int)monteCarlo->_materialDatabase->_mat[globalMatIndex]._iso.size(); - double sum = 0.0; - for (int isoIndex = 0; isoIndex < nIsotopes; isoIndex++) - { - sum += macroscopicCrossSection(monteCarlo, -1, domainIndex, cellIndex, - isoIndex, energyGroup); - } - - QS::atomicWrite( *precomputedCrossSection, sum ); - - return sum; -} -HOST_DEVICE_END diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 5867c989..00000000 --- a/src/Makefile +++ /dev/null @@ -1,367 +0,0 @@ -# Makefile for Quicksilver - -# Quicksilver is a relatively easy to build code with no external -# dependencies (except MPI and OpenMP). You should be able to build -# Quicksilver on nearly any system by customizing the values of only -# four variables: -# -# CXX The name of the C++ compiler (with path if necessary) -# Quicksilver uses C++11 features, so a C++11 compliant -# compiler should be used. -# -# CXXFLAGS Command line switches to pass to the C++ compiler -# when compiling objects *and* when linking the executable. -# -# CPPFLAGS Command line switches to pass to the compiler *only* -# when compiling objects -# -# LDFLAGS Command line switches to pass to the compiler *only* -# when linking the executable -# -# Any other variable you may see in this Makefile (such as OPTFLAGS, -# OPENMP_FLAGS, CUDA_PATH, etc) are defined for convienience and clarity -# only. They do not appear in the build recipes. Only the four -# variables above are used in the build recipes. -# -# -# -# -# Quicksilver recognizes a number of pre-processor macros that -# enable or disable various code features such as MPI, OpenMP, etc. -# The following pre-processor DEFINES are recognized: -# -# -DHAVE_MPI Define HAVE_MPI to enable MPI feartures in -# Quicksilver. If this is not defined, the MPI -# functions will be replaced with stub implmentations -# and the code will run on a single "rank". -# -# -DHAVE_ASYNC_MPI Define this if your MPI has support for non-blocking -# collectives. (Quicksilver will use MPI_Iallreduce -# in the test-for-done algorithm.) -# -# -DHAVE_CUDA Define this to enable the Cuda build. -# -# -DHAVE_HIP Define this to enable the HIP build. Quicksilver assumes -# that HIP is targeting AMD GPUs. -# -# -DHAVE_OPENMP Use this define to generate a code which uses OpenMP -# threads. It will also be necessary to add the appropriate -# compiler flags to CXXFLAGS and LDFLAGS, which vary -# by compiler, such as '-qopenmp -pthread' for Intel, or -# '-fopenmp' for Gnu. Defining HAVE_OPENMP will use -# only features in OpenMP 3.x. -# -# -DHAVE_OPENMP_TARGET -# Use this define to generate OpenMP 4.5 code, -# targeting GPUs. When selecting this option you -# *MUST* specify either TARGET_NVIDIA or TARGET_AMD -# to determine which GPU architecture to use. -# -# -DTARGET_NVIDIA Use this define when building OpenMP target code for -# Nvidia GPUs -# -# -DTARGET_AMD Use this define when building OpenMP target code for -# AMD GPUs -# -# -DDISABLE_TIMERS Quicksilver uses built-in high resolution timers to -# track performance of important high level functions. -# To disable the internal timers (and the timing reports) -# define DISABLE_TIMERS. This can be useful when using a -# profiler or other external timing mechanism. -# -# -DCSTDINT_MISSING Define this if is not available. -# In this case the include file will be used -# as an alternative. This was found to be necessary with -# PGI and some Clang compilers. -# -# -DCHRONO_MISSING -# Define this if is not available. -# Normally we use the C++11 high resolution timers for -# internal timing. This requires the include of -# However, if this is not available, then one may specify -# this -D option, and the MPI high resolution timer will -# be used as an alternative. This was found to be necessary -# with some Clang compilers, some older Gnu compilers on BG/Q -# and older Intel compilers. -# -# -DUSE_NVTX Define this for some extra NVProf profiling information. -# It will create regions that can be visualized in NVVP. -# -# -DEXPONENTIAL_TALLY -# Define this to run Cycle Tracking with an exponential -# cell-based tally, in order to partially mimic photon -# transport problems. -# -# ------------------------------------------------------------------------------ - -SHELL = /bin/bash - -# Set your desired C++ compiler and any necessary flags. -# The interpretation of each of these four variables is described above. -# A number of examples for machines we regularly run on are given below. - -#AMD with openMP -#ROCM_ROOT = /opt/rocm-5.6.0 -#CXX = /usr/tce/packages/cray-mpich/cray-mpich-8.1.26-rocmcc-5.6.0-cce-16.0.0a-magic/bin/mpicxx -#CXXFLAGS = -O2 -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -fopenmp -#CPPFLAGS = -DHAVE_MPI -DHAVE_OPENMP -DHAVE_OPENMP_TARGET -DTARGET_AMD -I$(ROCM_ROOT)/include -Wno-unused-result -#LDFLAGS = -L$(ROCM_ROOT)/lib -lamdhip64 - -#AMD with HIP -ROCM_ROOT = /opt/rocm-5.6.0 -CXX = /usr/tce/packages/cray-mpich/cray-mpich-8.1.26-rocmcc-5.6.0-cce-16.0.0a-magic/bin/mpicxx -CXXFLAGS = -g -CPPFLAGS = -DHAVE_MPI -DHAVE_HIP -x hip --offload-arch=gfx90a -fgpu-rdc -Wno-unused-result -LDFLAGS = -fgpu-rdc --hip-link --offload-arch=gfx90a - - - -# CCE with OpenMP -#ROCM_ROOT = /opt/rocm-5.6.0 -#CXX=/usr/tce/packages/cray-mpich/cray-mpich-8.1.26-cce-16.0.0-magic/bin/mpicxx -#CXXFLAGS = -g -fopenmp-targets=amdgcn-amd-amdhsa -Xopenmp-target=amdgcn-amd-amdhsa -march=gfx90a -fopenmp -target-accel=amd_gfx90a -Wno-unused-result -#CPPFLAGS = -DHAVE_MPI -DHAVE_OPENMP -DHAVE_OPENMP_TARGET -DTARGET_AMD -I$(ROCM_ROOT)/include -#LDFLAGS = -L$(ROCM_ROOT)/lib -lamdhip64 - - - -############################################################################### -# Very simple GCC build with OpenMP but without MPI. -# This works on a Macbook (if gcc is installed) -############################################################################### -# -#OPENMP_FLAGS = -DHAVE_OPENMP -fopenmp -#OPENMP_LDFLAGS = -fopenmp -#OPTFLAGS = -g -O2 -# -#CXX=g++ -#CXXFLAGS = -std=c++11 $(OPTFLAGS) -Wpedantic -#CPPFLAGS = $(OPENMP_FLAGS) -#LDFLAGS = $(OPENMP_LDFLAGS) - - -############################################################################### -### GCC -- with MPI and OpenMP -############################################################################### -#OPENMP_FLAGS = -DHAVE_OPENMP -fopenmp -#OPENMP_LDFLAGS = -fopenmp -#MPI_FLAGS = -DHAVE_MPI -#OPTFLAGS = -g -O2 -# -#CXX=mpicxx -#CXXFLAGS = -std=c++11 $(OPTFLAGS) -Wpedantic -#CPPFLAGS = $(MPI_FLAGS) $(OPENMP_FLAGS) -#LDFLAGS = $(OPENMP_LDFLAGS) - - -############################################################################### -# LLNL LC BG/Q Comilers # -############################################################################### -### BGQ GNU -#OPTFLAGS = -g -O2 -## -#CXX=/usr/local/tools/compilers/ibm/mpicxx-4.8.4 -#CXXFLAGS = -std=c++11 $(OPTFLAGS) -#CPPFLAGS = -DCHRONO_MISSING -DHAVE_MPI -DHAVE_OPENMP -fopenmp -#LDFLAGS = -fopenmp - - -############################################################################### -# OpenMP 4.5 on LLNL CORAL EA nodes -############################################################################### -## Choose one Cuda path -##CUDA_PATH = /usr/local/cuda-8.0 -#CUDA_PATH = /usr/tcetmp/packages/cuda-9.0.176 - -## Choose one of these compilers -#CXX=mpiclang++-gpu -#CXX=mpixlC-gpu - -#OPENMP_OFFLOAD_FLAGS += -DHAVE_OPENMP_TARGET -mcpu=power8 -#OPENMP_FLAGS = -DHAVE_OPENMP -fopenmp ${OPENMP_OFFLOAD_FLAGS} -#OPTFLAGS = -O2 - -#CUDA_FLAGS = -I${CUDA_PATH}/include/ -#CUDA_LDFLAGS = -L${CUDA_PATH}/lib64/ -lcuda -lcudart - -#CXXFLAGS = -std=c++11 $(OPTFLAGS) -#CPPFLAGS = -DHAVE_MPI $(OPENMP_FLAGS) $(CUDA_FLAGS) -#LDFLAGS = $(CUDA_LDFLAGS) - - -############################################################################### -# Cuda on LLNL CORAL EA nodes -############################################################################### -## Choose one Cuda path -##CUDA_PATH = /usr/local/cuda-8.0 -#CUDA_PATH = /usr/tcetmp/packages/cuda-9.0.176 - -#HOST_COMPILER = /usr/tce/packages/spectrum-mpi/spectrum-mpi-2017.04.03-xl-beta-2017.09.13/bin/mpixlC - -#OPTFLAGS = -O2 -## Version below for debugging -##OPTFLAGS = -DUSE_NVTX -g -G -lineinfo -O0 - -#CUDA_FLAGS = -I${CUDA_PATH}/include/ -#CUDA_LDFLAGS = -L${CUDA_PATH}/lib64/ -lcuda -lcudart -# -#CXX=$(CUDA_PATH)/bin/nvcc -#CXXFLAGS = -DHAVE_CUDA -std=c++11 $(OPTFLAGS) -Xptxas -v -#CXXFLAGS += -gencode=arch=compute_60,code=\"sm_60,compute_60\" -#CXXFLAGS += --compiler-bindir=$(HOST_COMPILER) -#CPPFLAGS = -x cu -dc -DHAVE_MPI -DHAVE_ASYNC_MPI -#LDFLAGS = $(CUDA_LDFLAGS) -##LDFLAGS += ${CUDA_PATH}/lib64/libnvToolsExt.so - - - - -############################################################################### -# LLNL TOSS GCC + OpenMP (mvapich 2 - version 1.7) [cab] -############################################################################### -#OPTFLAGS = -g -O2 -#OPENMP_FLAGS = -DHAVE_OPENMP -fopenmp -#OPENMP_LDFLAGS = -fopenmp -# -#CXX = /usr/apps/gnu/4.9.3/bin/mpig++ -#CXXFLAGS = -std=c++0x $(OPTFLAGS) -mpi=mvapich2-gnu-1.7 -#CPPFLAGS = -DHAVE_MPI $(OPENMP_FLAGS) -#LDFLAGS = $(OPENMP_LDFLAGS) - -############################################################################### -# LLNL TOSS Intel + OpenMP (mvapich 2 - version 2.1) [quartz] -############################################################################### -#OPENMP_FLAGS = -DHAVE_OPENMP -qopenmp -#OPENMP_LDFLAGS = -qopenmp -#OPTFLAGS = -g -O2 -# -#CXX=/usr/local/bin/mpiicpc-17.0.174 -#CXXFLAGS = -std=c++11 -mpi=mvapich2-intel-2.1 -DHAVE_MPI $(OPENMP_FLAGS) -#CXXFLAGS += -wd1128 -wd64 -wd21 -#LDFLAGS = $(OPENMP_LDFLAGS) - - -############################################################################### -# LLNL TOSS Clang (cab) -############################################################################### -#CLANGPATH = /usr/global/tools/clang/chaos_5_x86_64_ib/clang-omp-3.5.0 -#OPTFLAGS = -g -O2 -# -#CXX=${CLANGPATH}/bin/mpiclang++ -#CXXFLAGS = -std=c++11 $(OPTFLAGS) -#CPPFLAGS = -DHAVE_MPI -#LDFLAGS = -Wl,-rpath,${CLANGPATH}/lib - - -############################################################################### -# Trinity Compilers # -# # -# One must 'swap' modules on this machine to access different compilers. # -############################################################################### - -### Defaults to Intel. -#OPTFLAGS = -g -O2 -xmic-avx512 -ipo -#OPENMP_FLAGS = -DHAVE_OPENMP -qopenmp -pthread -#OPENMP_LDFLAGS = -qopenmp -pthread -# -#CXX=CC -#CXXFLAGS = -std=c++11 $(OPTFLAGS) -#CPPFLAGS = -DHAVE_MPI -DCHRONO_MISSING $(OPENMP_FLAGS) -#LDFLAGS = $(OPENMP_LDFLAGS) - - - -################################################################################ -### Below here, it is pitch black. ### -### You are likely to be eaten by a grue. ### -################################################################################ - -#GITVERS := -D'GIT_VERS="$(shell git log -n 1 | grep Date | awk -F " " '{print $$6 "-" $$3 "-" $$4 "-" $$5}')"' -#GITHASH := -D'GIT_HASH="$(shell git log -n 1 | grep commit | awk -F " " '{print $$2}')"' -GITVERS := "$(shell git log -n 1 | grep Date | awk -F " " '{print $$6 "-" $$3 "-" $$4 "-" $$5}')" -GITHASH := "$(shell git log -n 1 | grep commit | awk -F " " '{print $$2}')" - -Quicksilver_EXE=qs - -# clear all suffixes -.SUFFIXES: -# list only those that we use -.SUFFIXES: .cc .o - -.PHONY: DEFAULT clean distclean depend - -# For development purposes, what is working now. -SOURCES= \ - CollisionEvent.cc \ - CoralBenchmark.cc \ - CycleTracking.cc \ - DecompositionObject.cc \ - DirectionCosine.cc \ - EnergySpectrum.cc \ - GlobalFccGrid.cc \ - GridAssignmentObject.cc \ - InputBlock.cc \ - MCT.cc \ - MC_Adjacent_Facet.cc \ - MC_Base_Particle.cc \ - MC_Domain.cc \ - MC_Facet_Crossing_Event.cc \ - MC_Fast_Timer.cc \ - MC_Load_Particle.cc \ - MC_Location.cc \ - MC_Particle_Buffer.cc \ - MC_RNG_State.cc \ - MC_Segment_Outcome.cc \ - MC_SourceNow.cc \ - MacroscopicCrossSection.cc \ - MeshPartition.cc \ - MonteCarlo.cc \ - MpiCommObject.cc \ - NuclearData.cc \ - Parameters.cc \ - ParticleVault.cc \ - ParticleVaultContainer.cc \ - PopulationControl.cc \ - SendQueue.cc \ - SharedMemoryCommObject.cc \ - Tallies.cc \ - cmdLineParser.cc \ - cudaFunctions.cc \ - initMC.cc \ - main.cc \ - parseUtils.cc \ - utils.cc \ - utilsMpi.cc - -CC_OBJECTS=$(SOURCES:.cc=.o) - -DEFAULT: ${Quicksilver_EXE} - -git_hash.hh: - echo "#define GIT_HASH \"$(GITHASH)\" "> git_hash.hh - -git_vers.hh: - echo "#define GIT_VERS \"$(GITVERS)\" "> git_vers.hh - -%.o: %.cc - ${CXX} ${CPPFLAGS} ${CXXFLAGS} -c $< -o $@ - -${Quicksilver_EXE}: git_hash.hh git_vers.hh ${CC_OBJECTS} - ${CXX} ${CXXFLAGS} ${LDFLAGS} -o ${Quicksilver_EXE} ${CC_OBJECTS} - -clean: - rm -f *.o git_hash.hh git_vers.hh .depend load.map *.core *.optrpt - -distclean: clean - rm -f ${Quicksilver_EXE} .depend.bak - rm -rf html latex vtune* - -.depend: $(SOURCES) - @touch .depend - @$(MAKE) --no-print-directory depend - -depend: - @echo "Rebuilding dependencies..." - @makedepend -f .depend -Y. --$(CXXFLAGS) $(CPPFLAGS)-- $(SOURCES) 2> /dev/null - --include .depend diff --git a/src/MonteCarlo.cc b/src/MonteCarlo.cc deleted file mode 100644 index 915955b4..00000000 --- a/src/MonteCarlo.cc +++ /dev/null @@ -1,155 +0,0 @@ -#include "MonteCarlo.hh" -#include "NuclearData.hh" -#include "MaterialDatabase.hh" -#include "ParticleVaultContainer.hh" -#include "MC_RNG_State.hh" -#include "Tallies.hh" -#include "MC_Processor_Info.hh" -#include "MC_Time_Info.hh" -#include "MC_Particle_Buffer.hh" -#include "MC_Fast_Timer.hh" -#include - -#include "macros.hh" // current location of openMP wrappers. -#include "gpuPortability.hh" - -using std::ceil; - -//---------------------------------------------------------------------------------------------------------------------- -// Construct a MonteCarlo object. -//---------------------------------------------------------------------------------------------------------------------- -MonteCarlo::MonteCarlo(const Parameters& params) -: _params(params), - _nuclearData(NULL) -{ - _nuclearData = 0; - _materialDatabase = 0; - - #if defined (HAVE_UVM) - void *ptr1, *ptr2, *ptr3, *ptr4; - - gpuMallocManaged( &ptr1, sizeof(Tallies) ); - gpuMallocManaged( &ptr2, sizeof(MC_Processor_Info) ); - gpuMallocManaged( &ptr3, sizeof(MC_Time_Info) ); - gpuMallocManaged( &ptr4, sizeof(MC_Fast_Timer_Container) ); - - _tallies = new(ptr1) Tallies( params.simulationParams.balanceTallyReplications, - params.simulationParams.fluxTallyReplications, - params.simulationParams.cellTallyReplications, - params.simulationParams.energySpectrum, - params.simulationParams.nGroups); - processor_info = new(ptr2) MC_Processor_Info(); - time_info = new(ptr3) MC_Time_Info(); - fast_timer = new(ptr4) MC_Fast_Timer_Container(); - #else - _tallies = new Tallies( params.simulationParams.balanceTallyReplications, - params.simulationParams.fluxTallyReplications, - params.simulationParams.cellTallyReplications, - params.simulationParams.energySpectrum, - params.simulationParams.nGroups); - processor_info = new MC_Processor_Info(); - time_info = new MC_Time_Info(); - fast_timer = new MC_Fast_Timer_Container(); - #endif - - source_particle_weight = 0.0; - - size_t num_processors = processor_info->num_processors; - size_t num_particles = params.simulationParams.nParticles; - size_t batch_size = params.simulationParams.batchSize; - size_t num_batches = params.simulationParams.nBatches; - - size_t num_particles_on_process = num_particles / num_processors; - - if( num_particles_on_process <= 0 ) - { - MC_Fatal_Jump( "Not enough particles for each process ( Ranks: %d Num Particles: %d ) \n", num_processors, num_particles ); - num_particles_on_process = 1; - } - - if ( batch_size == 0 ) //batch size unset - use num_batches to get batch_size - { - batch_size = (num_particles_on_process / num_batches) + ((num_particles_on_process%num_batches == 0) ? 0 : 1) ; - } - else //batch size explicatly set - use to find num_batches - { - num_batches = num_particles_on_process / batch_size + (( num_particles_on_process%batch_size == 0 ) ? 0 : 1); - } - - size_t vector_size = 0; - - for (auto matIter = params.materialParams.begin(); - matIter != params.materialParams.end(); - matIter++) - { - const MaterialParameters& mp = matIter->second; - double nuBar = params.crossSectionParams.at(mp.fissionCrossSection).nuBar; - size_t nb = ceil( nuBar ); - size_t test_size = nb*( batch_size ); - - if ( test_size > vector_size ) - vector_size = test_size; - } - if ( vector_size == 0 ) - vector_size = 2*batch_size; - - int num_extra_vaults = ( vector_size / batch_size ) + 1; - //Previous definition was not enough extra space for some reason? need to determine why still - - #if defined(HAVE_UVM) - void *ptr5, *ptr6; - gpuMallocManaged( &ptr5, sizeof(MC_Particle_Buffer) ); - gpuMallocManaged( &ptr6, sizeof(ParticleVaultContainer) ); - particle_buffer = new(ptr5) MC_Particle_Buffer(this, batch_size); - _particleVaultContainer = new(ptr6) ParticleVaultContainer(batch_size, num_batches, num_extra_vaults); - #else - particle_buffer = new MC_Particle_Buffer(this, batch_size); - _particleVaultContainer = new ParticleVaultContainer(batch_size, num_batches, num_extra_vaults); - #endif - -} - -//---------------------------------------------------------------------------------------------------------------------- -// Destruct a MonteCarlo object. -//---------------------------------------------------------------------------------------------------------------------- -MonteCarlo::~MonteCarlo() -{ - #if defined (HAVE_UVM) - - _nuclearData->~NuclearData(); - _particleVaultContainer->~ParticleVaultContainer(); - _materialDatabase->~MaterialDatabase(); - _tallies->~Tallies(); - processor_info->~MC_Processor_Info(); - time_info->~MC_Time_Info(); - fast_timer->~MC_Fast_Timer_Container(); - particle_buffer->~MC_Particle_Buffer(); - - gpuFree( _nuclearData ); - gpuFree( _particleVaultContainer); - gpuFree( _materialDatabase); - gpuFree( _tallies); - gpuFree( processor_info); - gpuFree( time_info); - gpuFree( fast_timer); - gpuFree( particle_buffer); - - #else - delete _nuclearData; - delete _particleVaultContainer; - delete _materialDatabase; - delete _tallies; - delete processor_info; - delete time_info; - delete fast_timer; - delete particle_buffer; - #endif -} - -void MonteCarlo::clearCrossSectionCache() -{ - int numEnergyGroups = _nuclearData->_numEnergyGroups; - for (unsigned ii=0; ii -#include "MC_RNG_State.hh" -#include "DeclareMacro.hh" -#include "qs_assert.hh" - -using std::log10; -using std::pow; - -// Set the cross section values and reaction type -// Cross sections are scaled to produce the supplied reactionCrossSection at 1MeV. -NuclearDataReaction::NuclearDataReaction( - Enum reactionType, double nuBar, const qs_vector& energies, - const Polynomial& polynomial, double reactionCrossSection) -: _crossSection(energies.size()-1, 0., VAR_MEM), - _reactionType(reactionType), - _nuBar(nuBar) -{ - int nGroups = _crossSection.size(); - - for (int ii=0; ii= 1. ) //1 MeV - { - normalization = _crossSection[ii]; - break; - } - qs_assert(normalization > 0.); - - // scale to specified reaction cross section - double scale = reactionCrossSection/normalization; - for (int ii=0; ii &energyOut, - qs_vector &angleOut, uint64_t* seed) -#endif - -HOST_DEVICE - -void NuclearDataReaction::sampleCollision( - double incidentEnergy, double material_mass, double* energyOut, - double* angleOut, int &nOut, uint64_t* seed, int max_production_size) -{ - double randomNumber; - switch(_reactionType) - { - case Scatter: - nOut = 1; - randomNumber = rngSample(seed); - energyOut[0] = incidentEnergy * (1.0 - (randomNumber*(1.0/material_mass))); - randomNumber = rngSample(seed) * 2.0 - 1.0; - angleOut[0] = randomNumber; - break; - case Absorption: - break; - case Fission: - { - int numParticleOut = (int)(_nuBar + rngSample(seed)); - qs_assert( numParticleOut <= max_production_size ); - nOut = numParticleOut; - for (int outIndex = 0; outIndex < numParticleOut; outIndex++) - { - randomNumber = rngSample(seed) / 2.0 + 0.5; - energyOut[outIndex] = (20 * randomNumber*randomNumber); - randomNumber = rngSample(seed) * 2.0 - 1.0; - angleOut[outIndex] = randomNumber; - } - } - break; - case Undefined: - printf("_reactionType invalid\n"); - qs_assert(false); - } -} - -HOST_DEVICE_END - -// Then call this for each reaction to set cross section values -void NuclearDataSpecies::addReaction( - NuclearDataReaction::Enum type, double nuBar, - qs_vector &energies, const Polynomial& polynomial, double reactionCrossSection) -{ - _reactions.Open(); - _reactions.push_back(NuclearDataReaction(type, nuBar, energies, polynomial, reactionCrossSection)); - _reactions.Close(); -} - - - -// Set up the energies boundaries of the neutron -NuclearData::NuclearData(int numGroups, double energyLow, double energyHigh) : _energies( numGroups+1,VAR_MEM) -{ - qs_assert (energyLow < energyHigh); - _numEnergyGroups = numGroups; - _energies[0] = energyLow; - _energies[numGroups] = energyHigh; - double logLow = log(energyLow); - double logHigh = log(energyHigh); - double delta = (logHigh - logLow) / (numGroups + 1.0); - for (int energyIndex = 1; energyIndex < numGroups; energyIndex++) - { - double logValue = logLow + delta *energyIndex; - _energies[energyIndex] = exp(logValue); - } -} - -int NuclearData::addIsotope( - int nReactions, - const Polynomial& fissionFunction, - const Polynomial& scatterFunction, - const Polynomial& absorptionFunction, - double nuBar, - double totalCrossSection, - double fissionWeight, double scatterWeight, double absorptionWeight) -{ - _isotopes.Open(); - _isotopes.push_back(NuclearDataIsotope()); - _isotopes.Close(); - - double totalWeight = fissionWeight + scatterWeight + absorptionWeight; - - int nFission = nReactions / 3; - int nScatter = nReactions / 3; - int nAbsorption = nReactions / 3; - switch (nReactions % 3) - { - case 0: - break; - case 1: - ++nScatter; - break; - case 2: - ++nScatter; - ++nFission; - break; - } - - double fissionCrossSection = (totalCrossSection * fissionWeight) / (nFission * totalWeight); - double scatterCrossSection = (totalCrossSection * scatterWeight) / (nScatter * totalWeight); - double absorptionCrossSection = (totalCrossSection * absorptionWeight) / (nAbsorption * totalWeight); - - _isotopes.back()._species[0]._reactions.reserve( nReactions, VAR_MEM); - - for (int ii=0; ii _energies[numEnergies-1]) return numEnergies-1; - - int high = numEnergies-1; - int low = 0; - - while( high != low+1 ) - { - int mid = (high+low)/2; - if( energy < _energies[mid] ) - high = mid; - else - low = mid; - } - - return low; -} -HOST_DEVICE_END - -// General routines to help access data lower down -// Return the total cross section for this energy group -HOST_DEVICE -double NuclearData::getTotalCrossSection(unsigned int isotopeIndex, unsigned int group) -{ - qs_assert(isotopeIndex < _isotopes.size()); - int numReacts = (int)_isotopes[isotopeIndex]._species[0]._reactions.size(); - double totalCrossSection = 0.0; - for (int reactIndex = 0; reactIndex < numReacts; reactIndex++) - { - totalCrossSection += _isotopes[isotopeIndex]._species[0]._reactions[reactIndex].getCrossSection(group); - } - return totalCrossSection; -} -HOST_DEVICE_END - -// Return the total cross section for this energy group -HOST_DEVICE -double NuclearData::getReactionCrossSection( - unsigned int reactIndex, unsigned int isotopeIndex, unsigned int group) -{ - qs_assert(isotopeIndex < _isotopes.size()); - qs_assert(reactIndex < _isotopes[isotopeIndex]._species[0]._reactions.size()); - return _isotopes[isotopeIndex]._species[0]._reactions[reactIndex].getCrossSection(group); -} -HOST_DEVICE_END - diff --git a/src/PhysicalConstants.cc b/src/PhysicalConstants.cc deleted file mode 100644 index 92c19ef1..00000000 --- a/src/PhysicalConstants.cc +++ /dev/null @@ -1,22 +0,0 @@ -#include "PhysicalConstants.hh" - - // The values of all physical constants are taken from: - // 2006 CODATA which is located on the web at - // http://physics.nist.gov/cuu/Constants/codata.pdf - - // The units of physical quantities used by the code are: - // Mass - gram (g) - // Length - centimeter (cm) - // Time - second (s) - // Energy - million electron-volts (MeV) : of a particle - // Energy - erg (g cm^2/s^2): in some background calculation - // Temperature - thousand electron-volts (keV) - -const double PhysicalConstants::_neutronRestMassEnergy = 9.395656981095e+2; /* MeV */ -const double PhysicalConstants::_pi = 3.1415926535897932; -const double PhysicalConstants::_speedOfLight = 2.99792458e+10; // cm / s - -// Constants used in math for computer science, roundoff, and other reasons -const double PhysicalConstants::_tinyDouble = 1.0e-13; -const double PhysicalConstants::_smallDouble = 1.0e-10; -const double PhysicalConstants::_hugeDouble = 1.0e+75; diff --git a/src/PopulationControl.cc b/src/PopulationControl.cc deleted file mode 100644 index a2e6da63..00000000 --- a/src/PopulationControl.cc +++ /dev/null @@ -1,171 +0,0 @@ -#include "PopulationControl.hh" -#include "MC_Processor_Info.hh" -#include "MonteCarlo.hh" -#include "Globals.hh" -#include "MC_Particle.hh" -#include "ParticleVaultContainer.hh" -#include "ParticleVault.hh" -#include "utilsMpi.hh" -#include "NVTX_Range.hh" -#include - -namespace -{ - void PopulationControlGuts(const double splitRRFactor, - uint64_t currentNumParticles, - ParticleVaultContainer* my_particle_vault, - Balance& taskBalance); -} - -void PopulationControl(MonteCarlo* monteCarlo, bool loadBalance) -{ - NVTX_Range range("PopulationControl"); - - uint64_t targetNumParticles = monteCarlo->_params.simulationParams.nParticles; - uint64_t globalNumParticles = 0; - uint64_t localNumParticles = monteCarlo->_particleVaultContainer->sizeProcessing(); - - if (loadBalance) - { - // If we are parallel, we will have one domain per mpi processs. The targetNumParticles is across - // all MPI processes, so we need to divide by the number or ranks to get the per-mpi-process number targetNumParticles - targetNumParticles = ceil((double)targetNumParticles / (double)mcco->processor_info->num_processors ); - - //NO LONGER SPLITING VAULTS BY THREADS -// // If we are threaded, targetNumParticles should be divided by the number of threads (tasks) to balance -// // the particles across the thread level vaults. -// targetNumParticles = ceil((double)targetNumParticles / (double)mcco->processor_info->num_tasks); - } - else - { - mpiAllreduce(&localNumParticles, &globalNumParticles, 1, MPI_UINT64_T, MPI_SUM, MPI_COMM_WORLD); - } - - Balance & taskBalance = monteCarlo->_tallies->_balanceTask[0]; - - double splitRRFactor = 1.0; - if (loadBalance) - { - int currentNumParticles = localNumParticles; - if (currentNumParticles != 0) - splitRRFactor = (double)targetNumParticles / (double)currentNumParticles; - else - splitRRFactor = 1.0; - } - else - { - splitRRFactor = (double)targetNumParticles / (double)globalNumParticles; - } - - if (splitRRFactor != 1.0) // no need to split if population is already correct. - PopulationControlGuts(splitRRFactor, localNumParticles, monteCarlo->_particleVaultContainer, taskBalance); - - monteCarlo->_particleVaultContainer->collapseProcessing(); - - return; -} - - -namespace -{ -void PopulationControlGuts(const double splitRRFactor, uint64_t currentNumParticles, ParticleVaultContainer* my_particle_vault, Balance& taskBalance) -{ - uint64_t vault_size = my_particle_vault->getVaultSize(); - uint64_t fill_vault_index = currentNumParticles / vault_size; - - // March backwards through the vault so killed particles doesn't mess up the indexing - for (int particleIndex = currentNumParticles-1; particleIndex >= 0; particleIndex--) - { - uint64_t vault_index = particleIndex / vault_size; - - ParticleVault& taskProcessingVault = *( my_particle_vault->getTaskProcessingVault(vault_index) ); - - uint64_t taskParticleIndex = particleIndex%vault_size; - - MC_Base_Particle ¤tParticle = taskProcessingVault[taskParticleIndex]; - double randomNumber = rngSample(¤tParticle.random_number_seed); - if (splitRRFactor < 1) - { - if (randomNumber > splitRRFactor) - { - // Kill - taskProcessingVault.eraseSwapParticle(taskParticleIndex); - taskBalance._rr++; - } - else - { - currentParticle.weight /= splitRRFactor; - } - } - else if (splitRRFactor > 1) - { - // Split - int splitFactor = (int)floor(splitRRFactor); - if (randomNumber > (splitRRFactor - splitFactor)) { splitFactor--; } - - currentParticle.weight /= splitRRFactor; - MC_Base_Particle splitParticle = currentParticle; - - for (int splitFactorIndex = 0; splitFactorIndex < splitFactor; splitFactorIndex++) - { - taskBalance._split++; - - splitParticle.random_number_seed = rngSpawn_Random_Number_Seed( - ¤tParticle.random_number_seed); - splitParticle.identifier = splitParticle.random_number_seed; - - my_particle_vault->addProcessingParticle( splitParticle, fill_vault_index ); - - } - } - } -} -} // anonymous namespace - - -// Roulette low-weight particles relative to the source particle weight. -void RouletteLowWeightParticles(MonteCarlo* monteCarlo) -{ - NVTX_Range range("RouletteLowWeightParticles"); - - const double lowWeightCutoff = monteCarlo->_params.simulationParams.lowWeightCutoff; - - if (lowWeightCutoff > 0.0) - { - - uint64_t currentNumParticles = monteCarlo->_particleVaultContainer->sizeProcessing(); - uint64_t vault_size = monteCarlo->_particleVaultContainer->getVaultSize(); - - Balance& taskBalance = monteCarlo->_tallies->_balanceTask[0]; - - // March backwards through the vault so killed particles don't mess up the indexing - const double source_particle_weight = monteCarlo->source_particle_weight; - const double weightCutoff = lowWeightCutoff*source_particle_weight; - - for ( int64_t particleIndex = currentNumParticles-1; particleIndex >= 0; particleIndex--) - { - uint64_t vault_index = particleIndex / vault_size; - - ParticleVault& taskProcessingVault = *(monteCarlo->_particleVaultContainer->getTaskProcessingVault(vault_index)); - uint64_t taskParticleIndex = particleIndex%vault_size; - MC_Base_Particle ¤tParticle = taskProcessingVault[taskParticleIndex]; - - if (currentParticle.weight <= weightCutoff) - { - double randomNumber = rngSample(¤tParticle.random_number_seed); - if (randomNumber <= lowWeightCutoff) - { - // The particle history continues with an increased weight. - currentParticle.weight /= lowWeightCutoff; - } - else - { - // Kill - taskProcessingVault.eraseSwapParticle(taskParticleIndex); - taskBalance._rr++; - } - } - } - monteCarlo->_particleVaultContainer->collapseProcessing(); - } -} diff --git a/src/QS_atomics.hh b/src/QS_atomics.hh deleted file mode 100644 index 5ca17f9c..00000000 --- a/src/QS_atomics.hh +++ /dev/null @@ -1,149 +0,0 @@ -#ifndef QS_ATOMICS_HH -#define QS_ATOMICS_HH - -#include "gpuPortability.hh" - -// Provides the following atomic functions: -// * QS::atomicWrite(a,b) a=b -// * QS::atomicAdd(a,b) a+=b -// * QS::atomicIncrement(a,b) a++ -// * QS::atomicCaptureAdd(a,b,c) c=a; a+=b -// These all function correctly on hip(AMD), cuda, openMP, and openMP offload. -// -// There is one significant complication that we need to worry about -// when trying to provide device native implementations of atomics on -// hip and cuda. Cuda doesn't allow function overloading based on -// __host__ or __device__ attributes. If you have two functions with -// the same signature, one with __host__ (or undecorated, since -// functions are __host by default) and another with __device__, nvcc -// will produce an error that the function is multiply defined. The -// solution to this problem is to wrap the overloaded functions in a -// check for the __CUDA_ARCH__ macro, which is defined only when -// compiling for the device. See -// https://forums.developer.nvidia.com/t/overloading-host-and-device-function/29601 -// -// On the other hand, hip seems to have no such problem managing -// functions that are overloaded on __host__ or __device__ attributes. -// Hence, we don't have to worry about checking for the device pass on -// a hip build. - - - - -// First, we need to provide some "built-in" atomic signatures that -// the CUDA API doesn't provide. These should only be available in -// the device pass of a CUDA build. HIP provides these signatures. -#if defined HAVE_CUDA && defined __CUDA_ARCH__ - -// atomicAdd for uint64_t: -// It is common that unsigned long and unsigned long long are both -// 64-bit integers. In such cases, uint64_t may be defined as -// unsigned long. Unfortunately, nvidia doesn't supply a version of -// atomicAdd that takes unsigned long arguments. As long as unsigned -// long and unsigned long long are the same size, we can get away with -// this kind of nonsense. -static inline __device__ uint64_t atomicAdd(uint64_t* address, uint64_t val) -{ - static_assert(sizeof(uint64_t) == sizeof(unsigned long long), - "type size mismatch"); - return ::atomicAdd(reinterpret_cast(address), val); -} - -// atomicExch for double: -// nvidia doesn't supply a version of atomicExch that takes doubles. -// So, we will roll our own with this somewhat evil hack. -static inline __device__ double atomicExch(double* address, double val) -{ - static_assert(sizeof(double) == sizeof(unsigned long long), - "type size mismatch"); - return __longlong_as_double - ( - ::atomicExch(reinterpret_cast(address), - __double_as_longlong(val)) - ); -} - -#endif //#if defined HAVE_CUDA && defined __CUDA_ARCH__ - - -namespace QS -{ - // First, the versions defined in terms of the native atomic - // functions provided by CUDA and HIP. - - // These get built when building for HIP (which QS assumes means AMD), - // or the device pass of a CUDA build - #if defined HAVE_HIP || (defined HAVE_CUDA && defined __CUDA_ARCH__) - - template static inline __device__ - void atomicWrite(T& aa, T bb) - { - atomicExch(&aa, bb); - } - - template static inline __device__ - void atomicAdd(T& aa, T bb) - { - ::atomicAdd(&aa, bb); - } - - template static inline __device__ - void atomicIncrement(T& aa) - { - ::atomicAdd(&aa, 1); - } - - template static inline __device__ - void atomicCaptureAdd(T& aa, T bb, T& cc) - { - cc = ::atomicAdd(&aa, bb); - } - - #endif // #if defined HAVE_HIP || (defined HAVE_CUDA && defined __CUDA_ARCH__) - - - // Now the version defined in terms of omp atomic directives. Note - // that these apply to both CPU and GPU (i.e., target) code. These - // also supply implementations for CPU builds without openMP. - // Obviously, these functions aren't actually atomic without - // openMP. That's OK since without openMP quicksilver can't need - // atomics on the CPU since it has no way run multiple threads in - // the same address space. - - // These get build for everything *except* the device pass of a CUDA - // build. - #if ! (defined HAVE_CUDA && defined __CUDA_ARCH__) - - template static inline - void atomicWrite(T& aa, T bb) - { - #pragma omp atomic write - aa = bb; - } - - template static inline - void atomicAdd(T& aa, T bb) - { - #pragma omp atomic - aa += bb; - } - - template static inline - void atomicIncrement(T& aa) - { - #pragma omp atomic update - aa++; - } - - template static inline - void atomicCaptureAdd(T& aa, T bb, T& cc) - { - #pragma omp atomic capture - {cc = aa; aa += bb;} - } - - #endif // #if ! (defined HAVE_CUDA && defined __CUDA_ARCH__) - -} // namespace QS - -#endif // #ifndef QS_ATOMICS_HH diff --git a/src/READ.ME.HOW.TO.RUN b/src/READ.ME.HOW.TO.RUN deleted file mode 100644 index 24e2a8e8..00000000 --- a/src/READ.ME.HOW.TO.RUN +++ /dev/null @@ -1,135 +0,0 @@ - -Last Updated 2016-06-07 - -Initial Release Instructions for running. - -------------------------------------------------------------------------------- -Running The Default Problem - -There is a default problem built into the executable, which will run if -no input is specified. This may be run sequentially, with MPI, with threads, -or hybrid MPI + Threads. - -One can adjust the default problem by specifying command line arguments. To -see a list of arguments, run qs --help, which will give a short one line -description for each argument. More complete documentation will be provided -in a later release of this application. - -As an example, one may increase the number of particles, and hence the -amount of work to be done, by specifying the --nParticles option. The -following is an example of an MPI run with 4 mpi processes and 1,000,000 -particles. - - srun -n4 ./qs --nParticles=1000000 - - -------------------------------------------------------------------------------- -Running From An Input Deck: - -The Quicksilver development team is generating standard test cases as input -files. These reside in the 'Input' subdirectory. - -A useful one to start with is homogeneousProblem_v3.inp - -While the default test case has a particle source in a specific material and -location with the mesth, this homogeneous test problem is simplified and has -sourcing throughout one material across the entire mesh. This helps to -create a problem that is more load balanced from the start of the run. - -Both of these problems are of interest (and others), and suit different -studies. - -Note, the specific homogeneousProblem_v3.inp test problem is designed to -use MPI_THREAD_MULTIPLE, and requires this support. If the MPI library -does not support this, it may be disabled by setting the mpiThreadMultiple -option within the deck to 0.[ mpiThreadMultiple has ben disable in QS ] - -Also, that particular test problem requires the user to specify the -number of I, J, and K ranks such that their product equals the number -of MPI processes requested: Thus a 4 mpi process run of this input deck -would look like: - - srun -n4 ./qs -i Input/homogeneousProblem_v3.inp -I 2 -J 2 -K 1 - -And an 8 MPI process run could look like: - - srun -n8 ./qs -i Input/homogeneousProblem_v3.inp -I 2 -J 2 -K 2 - - -------------------------------------------------------------------------------- -A note on running with threads: - -If the code is compiled with threads, the code will run with threads in -addition to MPI. The OMP_NUM_THREADS env var will be used to set the -number of threads. If one wishes to run without threads one may either -compile the code without threads ore set OMP_NUM_THREADS to 1. - -------------------------------------------------------------------------------- -A note on the output generated: - -The Preamble: - -At the start of the run, the code generates a preamble of how it is running. -It looks simlar to - - MPI Initialized : MPI_THREAD_MULTIPLE - Copyright (c) 2016 - Lawrence Livermore National Security, LLC - All Rights Reserved - Quicksilver Version : 2016-Jun-3-14:30:10 - Quicksilver Git Hash : 6e6c03436f491c760d173c5c5fda681589f22ec4 - MPI Version : 3.0 - Number of MPI ranks : 8 - Number of OpenMP Threads: 4 - Number of OpenMP CPUs : 8 - -This is useful information, and includes information abot the MPI Thread -run mode, the number of MPI processes, the number of threads per MPI process, -the number of available 'cores' OpenMP believes are available, and -the specific Quicksilver version that is running. - -The Problem Definition: - -After the preamble, the code outputs the problem definition. This is -quite useful, as a feature of Quicksilver, is that this section may -be placed into a file and then used as input on subsequent runs. This -is how we created the Input test decks discussed above. - -The simulation section is composed of the following sections which may -be placed into a file: Simulation, Geometry, Material, CrossSection. -There may be more than one of each section, if for instance there are -multiple materials or geometries in the problem. - - -The Run Output - -As the code runs, it prints output each run step, these are various tallies, -such as start number of particles, absorbed number of particles, and more. -It also includes time spent in the three main phases of each run step: cycle -initialize, cycle tracking, and cycle finalize. - -It can be useful to plot these values using a spread sheet to compare various -run modes for correctness and for relative performance. - -There is also, at the end of the run, a coarse breakdown of time spent overall -in the above mentioned three code phases, as well as a few other sub timings -from cycle tracking. - -------------------------------------------------------------------------------- -A note on asserts: - -Asserts are used to stop the code when it is run incorrectly, the input is -invalid, or an error is detected. Read the assert message to see if it may -be remedied quickly. As a for instance, if the above mentioned test problem -homogeneousProblem_v3.inp is run with a code that was not compiled with -OpenMP thread, - - qs: utilsMpi.cc:35: void mpiInit(int*, char***, Parameters*): - Assertion `false' failed. - User requested mpiThreadMultiple support in a non-threaded code build - -This is because that specific test problem attempts to initialize the -MPI library with support for MPI_THREAD_MULTIPLE, which requires a -threaded executable, and an MPI library which supports this feature. - --------------------------------------------------------------------------------- diff --git a/src/SendQueue.cc b/src/SendQueue.cc deleted file mode 100644 index cbbc2a81..00000000 --- a/src/SendQueue.cc +++ /dev/null @@ -1,59 +0,0 @@ -#include "SendQueue.hh" -#include "QS_Vector.hh" - -SendQueue::SendQueue() -{} - -SendQueue::SendQueue( size_t size ) -: _data( size, VAR_MEM ) -{} - - -// ----------------------------------------------------------------------- -size_t SendQueue:: -size() -{ - return _data.size(); -} - -// ----------------------------------------------------------------------- -size_t SendQueue:: -neighbor_size( int neighbor_ ) -{ - size_t sum_n=0; - for( size_t i = 0; i < _data.size(); i++ ) - { - if( neighbor_ == _data[i]._neighbor ) - sum_n++; - } - return sum_n; -} - -// ----------------------------------------------------------------------- -HOST_DEVICE -void SendQueue:: -push( int neighbor_, int vault_index_ ) -{ - size_t indx = _data.atomic_Index_Inc(1); - - _data[indx]._neighbor = neighbor_; - _data[indx]._particleIndex = vault_index_; -} -HOST_DEVICE_END - -// ----------------------------------------------------------------------- -void SendQueue:: -clear() -{ - _data.clear(); -} - -// ----------------------------------------------------------------------- -sendQueueTuple& SendQueue:: -getTuple( int index_ ) -{ - qs_assert( index_ >= 0 ); - qs_assert( index_ < _data.size() ); - return _data[index_]; -} - diff --git a/src/cudaFunctions.cc b/src/cudaFunctions.cc deleted file mode 100644 index fcffaee6..00000000 --- a/src/cudaFunctions.cc +++ /dev/null @@ -1,86 +0,0 @@ -#include "gpuPortability.hh" -#include "cudaFunctions.hh" -#include "cudaUtils.hh" -#include - -namespace -{ -#if defined GPU_NATIVE - __global__ void trivialKernel() - { - int global_index = getGlobalThreadID(); - if( global_index == 0) - { - } - } -#endif -} - -#if defined GPU_NATIVE -void warmup_kernel() -{ - trivialKernel<<<1, 1>>>(); - gpuDeviceSynchronize(); -} -#endif - -#if defined GPU_NATIVE -int ThreadBlockLayout( dim3 &grid, dim3 &block, int num_particles ) -{ - int run_kernel = 1; - const uint64_t max_block_size = 65535; - const uint64_t threads_per_block = 128; - - block.x = threads_per_block; - block.y = 1; - block.z = 1; - - uint64_t num_blocks = num_particles / threads_per_block + ( ( num_particles%threads_per_block == 0 ) ? 0 : 1 ); - - if( num_blocks == 0 ) - { - run_kernel = 0; - } - else if( num_blocks <= max_block_size ) - { - grid.x = num_blocks; - grid.y = 1; - grid.z = 1; - } - else if( num_blocks <= max_block_size*max_block_size ) - { - grid.x = max_block_size; - grid.y = 1 + (num_blocks / max_block_size ); - grid.z = 1; - } - else if( num_blocks <= max_block_size*max_block_size*max_block_size ) - { - grid.x = max_block_size; - grid.y = max_block_size; - grid.z = 1 + (num_blocks / (max_block_size*max_block_size)); - } - else - { - printf("Error: num_blocks exceeds maximum block specifications. Cannot handle this case yet\n"); - run_kernel = 0; - } - - return run_kernel; -} -#endif - -#if defined GPU_NATIVE -DEVICE -int getGlobalThreadID() -{ - int blockID = blockIdx.x + - blockIdx.y * gridDim.x + - blockIdx.z * gridDim.x * gridDim.y; - - int threadID = blockID * (blockDim.x * blockDim.y * blockDim.z) + - threadIdx.z * ( blockDim.x * blockDim.y ) + - threadIdx.y * blockDim.x + - threadIdx.x; - return threadID; -} -#endif diff --git a/src/gpuPortability.hh b/src/gpuPortability.hh deleted file mode 100644 index 58a6ae8e..00000000 --- a/src/gpuPortability.hh +++ /dev/null @@ -1,52 +0,0 @@ -#ifndef GPUPORTABILITY_HH -#define GPUPORTABILITY_HH - -#if defined __CUDACC__ || defined TARGET_NVIDIA - #define __DO_CUDA - #define __PREFIX cuda - #define HAVE_UVM - #include - #include - #include -#elif defined __HIPCC__ || defined TARGET_AMD - #define __DO_HIP - #define __PREFIX hip - #define HAVE_UVM - #define __HIP_PLATFORM_AMD__ - #include -#else - #define __PREFIX invalid -#endif - -#if defined HAVE_CUDA || defined HAVE_HIP - #define GPU_NATIVE -#endif - - -#ifdef __DO_CUDA -#endif - -#ifdef __DO_HIP -#endif - -#if defined HAVE_UVM - #define VAR_MEM MemoryControl::AllocationPolicy::UVM_MEM -#else - #define VAR_MEM MemoryControl::AllocationPolicy::HOST_MEM -#endif - -#define CONCAT_(A, B) A ## B -#define CONCAT(A1, B1) CONCAT_(A1, B1) - -#define gpuMallocManaged CONCAT(__PREFIX, MallocManaged) -#define gpuFree CONCAT(__PREFIX, Free) -#define gpuDeviceSynchronize CONCAT(__PREFIX, DeviceSynchronize) -#define gpuGetDeviceCount CONCAT(__PREFIX, GetDeviceCount) -#define gpuSetDevice CONCAT(__PREFIX, SetDevice) -#define gpuPeekAtLastError CONCAT(__PREFIX, PeekAtLastError) - - -#undef __DO_CUDA -#undef __DO_HIP - -#endif // #ifndef GPUPORTABILITY_HH diff --git a/src/initMC.cc b/src/initMC.cc deleted file mode 100644 index e0051e97..00000000 --- a/src/initMC.cc +++ /dev/null @@ -1,485 +0,0 @@ -#include "initMC.hh" -#include -#include -#include -#include -#include -#include "QS_Vector.hh" -#include "utilsMpi.hh" -#include "MonteCarlo.hh" -#include "MC_Processor_Info.hh" -#include "DecompositionObject.hh" -#include "GlobalFccGrid.hh" -#include "MeshPartition.hh" -#include "CommObject.hh" -#include "SharedMemoryCommObject.hh" -#include "MpiCommObject.hh" -#include "MC_Vector.hh" -#include "NuclearData.hh" -#include "MaterialDatabase.hh" -#include "MC_Time_Info.hh" -#include "Tallies.hh" -#include "MC_Base_Particle.hh" -#include "gpuPortability.hh" -#include "cudaUtils.hh" -#include "cudaFunctions.hh" - -using std::vector; -using std::string; -using std::set; -using std::cout; -using std::endl; -using std::map; -using std::make_pair; - -namespace -{ - void initGPUInfo(MonteCarlo* monteCarlo); - void initNuclearData(MonteCarlo* monteCarlo, const Parameters& params); - void initMesh(MonteCarlo* monteCarlo, const Parameters& params); - void initTallies(MonteCarlo* monteCarlo, const Parameters& params); - void initTimeInfo(MonteCarlo* monteCarlo, const Parameters& params); - void initializeCentersRandomly(int nCenters, - const GlobalFccGrid& grid, - vector& centers); - void initializeCentersGrid(double lx, double ly, double lz, - int xDom, int yDom, int zDom, - vector& centers); - void consistencyCheck(int myRank, const qs_vector& domain); - void checkCrossSections(MonteCarlo* monteCarlo, const Parameters& params); - -} - -MonteCarlo* initMC(const Parameters& params) -{ - MonteCarlo* monteCarlo; - #ifdef HAVE_UVM - void* ptr; - gpuMallocManaged( &ptr, sizeof(MonteCarlo) ); - monteCarlo = new(ptr) MonteCarlo(params); - #else - monteCarlo = new MonteCarlo(params); - #endif - initGPUInfo(monteCarlo); - initTimeInfo(monteCarlo, params); - initNuclearData(monteCarlo, params); - initMesh(monteCarlo, params); - initTallies(monteCarlo, params); - - MC_Base_Particle::Update_Counts(); - - // used when debugging cross sections - checkCrossSections(monteCarlo, params); - return monteCarlo; -} - -namespace -{ -//Init GPU usage information - void initGPUInfo( MonteCarlo* monteCarlo) - { - #if defined HAVE_OPENMP_TARGET - int Ngpus = omp_get_num_devices(); - #elif defined GPU_NATIVE - int Ngpus; - gpuGetDeviceCount(&Ngpus); - #else - int Ngpus = 0; - #endif - - if( Ngpus != 0 ) - { - #if defined HAVE_OPENMP_TARGET || defined GPU_NATIVE - monteCarlo->processor_info->use_gpu = 1; - int GPUID = monteCarlo->processor_info->rank%Ngpus; - monteCarlo->processor_info->gpu_id = GPUID; - - #if defined HAVE_OPENMP_TARGET - omp_set_default_device(GPUID); - #endif - - #if defined GPU_NATIVE - gpuSetDevice(GPUID); - //cudaDeviceSetLimit( cudaLimitStackSize, 64*1024 ); - #endif - #endif - } - else - { - monteCarlo->processor_info->use_gpu = 0; - monteCarlo->processor_info->gpu_id = -1; - } - -#ifdef GPU_NATIVE - if( monteCarlo->processor_info->use_gpu ) - warmup_kernel(); -#endif - - //printf("monteCarlo->processor_info->use_gpu = %d\n", monteCarlo->processor_info->use_gpu); - - } -} - - -/// Initializes both the NuclearData and the MaterialDatabase. These -/// two structures are inherently linked since the isotopeGids stored in -/// the MaterialDatabase must correspond to the isotope indices in the -/// NuclearData. -namespace -{ - void initNuclearData(MonteCarlo* monteCarlo, const Parameters& params) - { - #if defined HAVE_UVM - void *ptr1, *ptr2; - gpuMallocManaged( &ptr1, sizeof(NuclearData) ); - gpuMallocManaged( &ptr2, sizeof(MaterialDatabase) ); - - monteCarlo->_nuclearData = new(ptr1) NuclearData(params.simulationParams.nGroups, - params.simulationParams.eMin, - params.simulationParams.eMax); - monteCarlo->_materialDatabase = new(ptr2) MaterialDatabase(); - #else - monteCarlo->_nuclearData = new NuclearData(params.simulationParams.nGroups, - params.simulationParams.eMin, - params.simulationParams.eMax); - monteCarlo->_materialDatabase = new MaterialDatabase(); - #endif - - map crossSection; - for (auto crossSectionIter = params.crossSectionParams.begin(); - crossSectionIter != params.crossSectionParams.end(); - crossSectionIter++) - { - const CrossSectionParameters& cp = crossSectionIter->second; - crossSection.insert(make_pair(cp.name, Polynomial(cp.aa, cp.bb, cp.cc, cp.dd, cp.ee))); - } - - int num_isotopes = 0; - int num_materials = 0; - - for( auto matIter = params.materialParams.begin(); matIter != params.materialParams.end(); matIter++ ) - { - const MaterialParameters& mp = matIter->second; - num_isotopes += mp.nIsotopes; - num_materials++; - } - - monteCarlo->_nuclearData->_isotopes.reserve( num_isotopes, VAR_MEM ); - monteCarlo->_materialDatabase->_mat.reserve( num_materials, VAR_MEM ); - - for (auto matIter = params.materialParams.begin(); - matIter != params.materialParams.end(); matIter++) - { - const MaterialParameters& mp = matIter->second; - Material material(mp.name, mp.mass); - double nuBar = params.crossSectionParams.at(mp.fissionCrossSection).nuBar; - material._iso.reserve( mp.nIsotopes, VAR_MEM ); - - for (int iIso=0; iIso_nuclearData->addIsotope( - mp.nReactions, - crossSection.at(mp.fissionCrossSection), - crossSection.at(mp.scatteringCrossSection), - crossSection.at(mp.absorptionCrossSection), - nuBar, - mp.totalCrossSection, - mp.fissionCrossSectionRatio, - mp.scatteringCrossSectionRatio, - mp.absorptionCrossSectionRatio); - - // atomFraction for each isotope is 1/nIsotopes. Treats all - // isotopes as equally prevalent. - material.addIsotope(Isotope(isotopeGid, 1.0/mp.nIsotopes)); - } - monteCarlo->_materialDatabase->addMaterial(material); - } - } -} - -namespace -{ - void consistencyCheck(int myRank, const qs_vector& domain) - { - if (myRank == 0) { cout << "Starting Consistency Check" < myDomainGid = ddc.getAssignedDomainGids(); - - GlobalFccGrid globalGrid(nx, ny, nz, lx, ly, lz); - - int nCenters = nRanks*nDomainsPerRank; - vector domainCenter; - if (xDom == 0 && yDom == 0 && zDom == 0) - initializeCentersRandomly(nCenters, globalGrid, domainCenter); - else - initializeCentersGrid(lx, ly, lz, xDom, yDom, zDom, domainCenter); - - qs_assert(domainCenter.size() == nCenters); - - vector partition; - { - int foremanRank = myRank; - for (unsigned ii=0; ii 1 && nDomainsPerRank == 1) - comm = new MpiCommObject(MPI_COMM_WORLD, ddc); - else - qs_assert(false); - - for (unsigned ii=0; iidomain.reserve(myDomainGid.size(),VAR_MEM); - monteCarlo->domain.Open(); - for (unsigned ii=0; iidomain.push_back( - MC_Domain(partition[ii], globalGrid, ddc, params, *monteCarlo->_materialDatabase, - params.simulationParams.nGroups)); - } - monteCarlo->domain.Close(); - - if (nRanks == 1) - consistencyCheck(myRank, monteCarlo->domain); - - if (myRank == 0) { cout << "Finished initMesh" <_tallies->InitializeTallies( - monteCarlo, - params.simulationParams.balanceTallyReplications, - params.simulationParams.fluxTallyReplications, - params.simulationParams.cellTallyReplications - ); - } -} - -namespace -{ - void initTimeInfo(MonteCarlo* monteCarlo, const Parameters& params) - { - monteCarlo->time_info->time_step = params.simulationParams.dt; - } -} - -namespace -{ - // scatter the centers (somewhat) randomly - void initializeCentersRandomly(int nCenters, - const GlobalFccGrid& grid, - vector& centers) - { - set picked; - do - { - Tuple iTuple(drand48()*grid.nx()/2, - drand48()*grid.ny()/2, - drand48()*grid.nz()/2); - - if (!picked.insert(iTuple).second) - continue; - - iTuple += iTuple; // iTuple *= 2; - Long64 iCell = grid.cellTupleToIndex(iTuple); - MC_Vector r = grid.cellCenter(iCell); - centers.push_back(r); - } while (centers.size() < nCenters); - } -} - -namespace -{ - void initializeCentersGrid(double lx, double ly, double lz, - int xDom, int yDom, int zDom, - vector& centers) - { - double dx = lx/xDom; - double dy = ly/yDom; - double dz = lz/zDom; - for (int ix=0; ix_params.simulationParams.crossSectionsOut == "" ) return; - - struct XC_Data - { - XC_Data() : absorption(0.), fission(0.), scatter(0.){} - double absorption; - double fission; - double scatter; - }; - - NuclearData* nd = monteCarlo->_nuclearData; - int nGroups = nd->_energies.size() - 1; - vector energy(nGroups); - for (unsigned ii=0; ii_energies[ii] + nd->_energies[ii+1])/2.0; - - - MaterialDatabase* matDB = monteCarlo->_materialDatabase; - unsigned nMaterials = matDB->_mat.size(); - - map > xcTable; - - - // for each material - for (unsigned iMat=0; iMat_mat[iMat]._name; - vector& xcVec = xcTable[materialName]; - xcVec.resize(nGroups); - unsigned nIsotopes = matDB->_mat[iMat]._iso.size(); - // for each isotope - for (unsigned iIso=0; iIso_materialDatabase->_mat[iMat]._iso[iIso]._gid; - unsigned nReactions = nd->_isotopes[isotopeGid]._species[0]._reactions.size(); - // for each reaction - for (unsigned iReact=0; iReact_isotopes[isotopeGid]._species[0]._reactions[iReact]; - // accumulate cross sections by reaction type - for (unsigned iGroup=0; iGroup_params.simulationParams.crossSectionsOut + ".dat"; - - xSec = fopen( fileName.c_str(), "w" ); - - // print cross section data - // first the header - fprintf(xSec, "#group energy"); - for (auto mapIter=xcTable.begin(); mapIter!=xcTable.end(); ++mapIter) - { - const string& materialName = mapIter->first; - fprintf(xSec, " %s_a %s_f %s_s", materialName.c_str(), materialName.c_str(), materialName.c_str()); - } - fprintf(xSec,"\n"); - - // now the data - for (unsigned ii=0; iisecond[ii].absorption, mapIter->second[ii].fission, mapIter->second[ii].scatter); - } - fprintf(xSec, "\n"); - } - fclose( xSec ); - } -} diff --git a/src/main.cc b/src/main.cc deleted file mode 100644 index 765ef62f..00000000 --- a/src/main.cc +++ /dev/null @@ -1,325 +0,0 @@ -#include -#include "utils.hh" -#include "Parameters.hh" -#include "utilsMpi.hh" -#include "MonteCarlo.hh" -#include "initMC.hh" -#include "Tallies.hh" -#include "PopulationControl.hh" -#include "ParticleVaultContainer.hh" -#include "ParticleVault.hh" -#include "MC_Particle_Buffer.hh" -#include "MC_Processor_Info.hh" -#include "MC_Time_Info.hh" -#include "macros.hh" -#include "MC_Fast_Timer.hh" -#include "MC_SourceNow.hh" -#include "SendQueue.hh" -#include "NVTX_Range.hh" -#include "cudaUtils.hh" -#include "cudaFunctions.hh" -#include "qs_assert.hh" -#include "CycleTracking.hh" -#include "CoralBenchmark.hh" -#include "EnergySpectrum.hh" - -#include "git_hash.hh" -#include "git_vers.hh" - -void gameOver(); -void cycleInit( bool loadBalance ); -void cycleTracking(MonteCarlo* monteCarlo); -void cycleFinalize(); - -using namespace std; - -MonteCarlo *mcco = NULL; - -int main(int argc, char** argv) -{ - mpiInit(&argc, &argv); - printBanner(GIT_VERS, GIT_HASH); - - Parameters params = getParameters(argc, argv); - printParameters(params, cout); - - // mcco stores just about everything. - mcco = initMC(params); - - int loadBalance = params.simulationParams.loadBalance; - - MC_FASTTIMER_START(MC_Fast_Timer::main); // this can be done once mcco exist. - - const int nSteps = params.simulationParams.nSteps; - - for (int ii=0; iifast_timer->Last_Cycle_Report( - params.simulationParams.cycleTimers, - mcco->processor_info->rank, - mcco->processor_info->num_processors, - mcco->processor_info->comm_mc_world ); - } - - - MC_FASTTIMER_STOP(MC_Fast_Timer::main); - - gameOver(); - - coralBenchmarkCorrectness(mcco, params); - -#ifdef HAVE_UVM - mcco->~MonteCarlo(); - gpuFree( mcco ); -#else - delete mcco; -#endif - - mpiFinalize(); - - return 0; -} - -void gameOver() -{ - mcco->fast_timer->Cumulative_Report(mcco->processor_info->rank, - mcco->processor_info-> num_processors, - mcco->processor_info->comm_mc_world, - mcco->_tallies->_balanceCumulative._numSegments); - mcco->_tallies->_spectrum.PrintSpectrum(mcco); -} - -void cycleInit( bool loadBalance ) -{ - - MC_FASTTIMER_START(MC_Fast_Timer::cycleInit); - - mcco->clearCrossSectionCache(); - - mcco->_tallies->CycleInitialize(mcco); - - mcco->_particleVaultContainer->swapProcessingProcessedVaults(); - - mcco->_particleVaultContainer->collapseProcessed(); - mcco->_particleVaultContainer->collapseProcessing(); - - mcco->_tallies->_balanceTask[0]._start = mcco->_particleVaultContainer->sizeProcessing(); - - mcco->particle_buffer->Initialize(); - - MC_SourceNow(mcco); - - PopulationControl(mcco, loadBalance); // controls particle population - - RouletteLowWeightParticles(mcco); // Delete particles with low statistical weight - - MC_FASTTIMER_STOP(MC_Fast_Timer::cycleInit); -} - - -#if defined GPU_NATIVE - -GLOBAL void CycleTrackingKernel( MonteCarlo* monteCarlo, int num_particles, ParticleVault* processingVault, ParticleVault* processedVault ) -{ - int global_index = getGlobalThreadID(); - - if( global_index < num_particles ) - { - CycleTrackingGuts( monteCarlo, global_index, processingVault, processedVault ); - } -} - -#endif - -void cycleTracking(MonteCarlo *monteCarlo) -{ - MC_FASTTIMER_START(MC_Fast_Timer::cycleTracking); - - bool done = false; - - //Determine whether or not to use GPUs if they are available (set for each MPI rank) - ExecutionPolicy execPolicy = getExecutionPolicy( monteCarlo->processor_info->use_gpu ); - - ParticleVaultContainer &my_particle_vault = *(monteCarlo->_particleVaultContainer); - - //Post Inital Receives for Particle Buffer - monteCarlo->particle_buffer->Post_Receive_Particle_Buffer( my_particle_vault.getVaultSize() ); - - //Get Test For Done Method (Blocking or non-blocking - MC_New_Test_Done_Method::Enum new_test_done_method = monteCarlo->particle_buffer->new_test_done_method; - - do - { - int particle_count = 0; // Initialize count of num_particles processed - - while ( !done ) - { - uint64_t fill_vault = 0; - - for ( uint64_t processing_vault = 0; processing_vault < my_particle_vault.processingSize(); processing_vault++ ) - { - MC_FASTTIMER_START(MC_Fast_Timer::cycleTracking_Kernel); - uint64_t processed_vault = my_particle_vault.getFirstEmptyProcessedVault(); - - ParticleVault *processingVault = my_particle_vault.getTaskProcessingVault(processing_vault); - ParticleVault *processedVault = my_particle_vault.getTaskProcessedVault(processed_vault); - - int numParticles = processingVault->size(); - - if ( numParticles != 0 ) - { - NVTX_Range trackingKernel("cycleTracking_TrackingKernel"); // range ends at end of scope - - // The tracking kernel can run - // * As a cuda kernel - // * As an OpenMP 4.5 parallel loop on the GPU - // * As an OpenMP 3.0 parallel loop on the CPU - // * AS a single thread on the CPU. - switch (execPolicy) - { - case gpuNative: - { - #if defined (GPU_NATIVE) - dim3 grid(1,1,1); - dim3 block(1,1,1); - int runKernel = ThreadBlockLayout( grid, block, numParticles); - - //Call Cycle Tracking Kernel - if( runKernel ) - CycleTrackingKernel<<>>( monteCarlo, numParticles, processingVault, processedVault ); - - //Synchronize the stream so that memory is copied back before we begin MPI section - gpuPeekAtLastError(); - gpuDeviceSynchronize(); - #endif - } - break; - - case gpuWithOpenMP: - { - int nthreads=128; - if (numParticles < 64*56 ) - nthreads = 64; - int nteams = (numParticles + nthreads - 1 ) / nthreads; - nteams = nteams > 1 ? nteams : 1; - #ifdef HAVE_OPENMP_TARGET - #pragma omp target enter data map(to:monteCarlo[0:1]) - #pragma omp target enter data map(to:processingVault[0:1]) - #pragma omp target enter data map(to:processedVault[0:1]) - #pragma omp target teams distribute parallel for num_teams(nteams) thread_limit(128) - #endif - for ( int particle_index = 0; particle_index < numParticles; particle_index++ ) - { - CycleTrackingGuts( monteCarlo, particle_index, processingVault, processedVault ); - } - #ifdef HAVE_OPENMP_TARGET - #pragma omp target exit data map(from:monteCarlo[0:1]) - #pragma omp target exit data map(from:processingVault[0:1]) - #pragma omp target exit data map(from:processedVault[0:1]) - #endif - } - break; - - case cpu: - #include "mc_omp_parallel_for_schedule_static.hh" - for ( int particle_index = 0; particle_index < numParticles; particle_index++ ) - { - CycleTrackingGuts( monteCarlo, particle_index, processingVault, processedVault ); - } - break; - default: - qs_assert(false); - } // end switch - } - - particle_count += numParticles; - - MC_FASTTIMER_STOP(MC_Fast_Timer::cycleTracking_Kernel); - - MC_FASTTIMER_START(MC_Fast_Timer::cycleTracking_MPI); - - // Next, communicate particles that have crossed onto - // other MPI ranks. - NVTX_Range cleanAndComm("cycleTracking_clean_and_comm"); - - SendQueue &sendQueue = *(my_particle_vault.getSendQueue()); - monteCarlo->particle_buffer->Allocate_Send_Buffer( sendQueue ); - - //Move particles from send queue to the send buffers - for ( int index = 0; index < sendQueue.size(); index++ ) - { - sendQueueTuple& sendQueueT = sendQueue.getTuple( index ); - MC_Base_Particle mcb_particle; - - processingVault->getBaseParticleComm( mcb_particle, sendQueueT._particleIndex ); - - int buffer = monteCarlo->particle_buffer->Choose_Buffer(sendQueueT._neighbor ); - monteCarlo->particle_buffer->Buffer_Particle(mcb_particle, buffer ); - } - - monteCarlo->particle_buffer->Send_Particle_Buffers(); // post MPI sends - - processingVault->clear(); //remove the invalid particles - sendQueue.clear(); - - // Move particles in "extra" vaults into the regular vaults. - my_particle_vault.cleanExtraVaults(); - - // receive any particles that have arrived from other ranks - monteCarlo->particle_buffer->Receive_Particle_Buffers( fill_vault ); - - MC_FASTTIMER_STOP(MC_Fast_Timer::cycleTracking_MPI); - - } // for loop on vaults - - MC_FASTTIMER_START(MC_Fast_Timer::cycleTracking_MPI); - - NVTX_Range collapseRange("cycleTracking_Collapse_ProcessingandProcessed"); - my_particle_vault.collapseProcessing(); - my_particle_vault.collapseProcessed(); - collapseRange.endRange(); - - - //Test for done - blocking on all MPI ranks - NVTX_Range doneRange("cycleTracking_Test_Done_New"); - done = monteCarlo->particle_buffer->Test_Done_New( new_test_done_method ); - doneRange.endRange(); - - MC_FASTTIMER_STOP(MC_Fast_Timer::cycleTracking_MPI); - - } // while not done: Test_Done_New() - - // Everything should be done normally. - done = monteCarlo->particle_buffer->Test_Done_New( MC_New_Test_Done_Method::Blocking ); - - } while ( !done ); - - //Make sure to cancel all pending receive requests - monteCarlo->particle_buffer->Cancel_Receive_Buffer_Requests(); - //Make sure Buffers Memory is Free - monteCarlo->particle_buffer->Free_Buffers(); - - MC_FASTTIMER_STOP(MC_Fast_Timer::cycleTracking); -} - - -void cycleFinalize() -{ - MC_FASTTIMER_START(MC_Fast_Timer::cycleFinalize); - - mcco->_tallies->_balanceTask[0]._end = mcco->_particleVaultContainer->sizeProcessed(); - - // Update the cumulative tally data. - mcco->_tallies->CycleFinalize(mcco); - - mcco->time_info->cycle++; - - mcco->particle_buffer->Free_Memory(); - - MC_FASTTIMER_STOP(MC_Fast_Timer::cycleFinalize); -} - diff --git a/src/mc_omp_parallel_for_schedule_static_if.hh b/src/mc_omp_parallel_for_schedule_static_if.hh deleted file mode 100644 index eb15598c..00000000 --- a/src/mc_omp_parallel_for_schedule_static_if.hh +++ /dev/null @@ -1,4 +0,0 @@ -#if defined(HAVE_OPENMP) - #pragma omp parallel for schedule (static) MC_OMP_PARALLEL_FOR_IF_CONDITION -#endif - diff --git a/src/mc_omp_parallel_for_schedule_static_num_physical_cores.hh b/src/mc_omp_parallel_for_schedule_static_num_physical_cores.hh deleted file mode 100644 index ba24c05e..00000000 --- a/src/mc_omp_parallel_for_schedule_static_num_physical_cores.hh +++ /dev/null @@ -1,6 +0,0 @@ -#if defined(HAVE_OPENMP) - int num_physical_cores = mc_get_num_physical_procs(); - if ((mcco->processor_info->rank == 0) && (mcco->_params.simulationParams.debugThreads >= 2)) - { printf("OpenMP Looping over %d cores\n",num_physical_cores); } - #pragma omp parallel for schedule (static) num_threads(num_physical_cores) -#endif diff --git a/src/utilsMpi.cc b/src/utilsMpi.cc deleted file mode 100644 index 6934a952..00000000 --- a/src/utilsMpi.cc +++ /dev/null @@ -1,374 +0,0 @@ -#include "utilsMpi.hh" -#include -#include // needed for memcpy on some compilers -#include // needed for clock -#include "qs_assert.hh" -#include "macros.hh" -#include "MonteCarlo.hh" -#include "MC_Processor_Info.hh" -#include "Globals.hh" - - -#ifdef HAVE_MPI - -void mpiInit( int *argc, char ***argv) -{ - -#ifdef HAVE_OPENMP - { // limit scope - char const* const provided_string[4] = \ - {"MPI_THREAD_SINGLE","MPI_THREAD_FUNNELED","MPI_THREAD_SERIALIZED","MPI_THREAD_MULTIPLE"}; - int provided, required = MPI_THREAD_FUNNELED; - - int err = MPI_Init_thread(argc, argv, required, &provided); - qs_assert(err == MPI_SUCCESS); - - int rank = -1; - mpiComm_rank(MPI_COMM_WORLD, &rank); - if (rank == 0) - fprintf(stdout,"MPI Initialized : %s\n", provided_string[provided]); - - if ((required > MPI_THREAD_SINGLE) && (required > provided)) - { - printf("MPI-OpenMP Error.\n\tCode requires %s thread support. MPI library provides %s support.\n", - provided_string[required],provided_string[provided]); - qs_assert(false); - } - } // limit scope - -#else - { // limit scope - int err = MPI_Init(argc, argv); - qs_assert(err == MPI_SUCCESS); - } //limit scope - -#endif - -} - - -double mpiWtime( void ) { return MPI_Wtime(); } - -int mpiComm_split ( MPI_Comm comm, int color, int key, MPI_Comm *newcomm) -{ - qs_assert(MPI_Comm_split(comm, color, key, newcomm) == MPI_SUCCESS); - return MPI_SUCCESS; -} - -void mpiComm_rank( MPI_Comm comm, int *rank ) { qs_assert(MPI_Comm_rank(comm, rank) == MPI_SUCCESS); } -void mpiCancel( MPI_Request *request ) { qs_assert(MPI_Cancel(request) == MPI_SUCCESS); } -void mpiTest_cancelled( MPI_Status *status, int *flag ) { qs_assert(MPI_Test_cancelled(status, flag) == MPI_SUCCESS); } -void mpiTest( MPI_Request *request, int *flag, MPI_Status * status) { qs_assert(MPI_Test(request, flag, status) == MPI_SUCCESS); } -void mpiWait( MPI_Request *request, MPI_Status *status ) { qs_assert(MPI_Wait(request, status) == MPI_SUCCESS); } -void mpiComm_size( MPI_Comm comm, int *size ) { qs_assert(MPI_Comm_size(comm, size) == MPI_SUCCESS); } -void mpiBarrier( MPI_Comm comm) { qs_assert(MPI_Barrier(comm) == MPI_SUCCESS); } -void mpiGet_version( int *version, int *subversion ) { qs_assert(MPI_Get_version(version, subversion) == MPI_SUCCESS); } -void mpiFinalize( void ) { qs_assert(MPI_Finalize() == MPI_SUCCESS); } -void mpiAbort( MPI_Comm comm, int errorcode ) { qs_assert(MPI_Abort(comm, errorcode) == MPI_SUCCESS); } -void mpiRequestFree( MPI_Request *request ){qs_assert( MPI_Request_free( request ) == MPI_SUCCESS);} - -void mpiScan( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ) - { qs_assert(MPI_Scan(sendbuf, recvbuf, count, datatype, operation, comm) == MPI_SUCCESS); } -void mpiType_commit(MPI_Datatype *datatype ) - { qs_assert(MPI_Type_commit( datatype ) == MPI_SUCCESS); } -void mpiType_contiguous(int count, MPI_Datatype old_type, MPI_Datatype *newtype) - { qs_assert(MPI_Type_contiguous(count, old_type, newtype) == MPI_SUCCESS); } -void mpiWaitall( int count, MPI_Request *array_of_requests, MPI_Status *array_of_statuses ) - { qs_assert(MPI_Waitall(count, array_of_requests, array_of_statuses) == MPI_SUCCESS); } -void mpiAllreduce ( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ) - { qs_assert(MPI_Allreduce(sendbuf, recvbuf, count, datatype, operation, comm) == MPI_SUCCESS); } -void mpiIAllreduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm, MPI_Request *request) -#ifdef HAVE_ASYNC_MPI - { qs_assert(MPI_Iallreduce(sendbuf, recvbuf, count, datatype, operation, comm, request) == MPI_SUCCESS); } -#else - { qs_assert(MPI_Allreduce(sendbuf, recvbuf, count, datatype, operation, comm ) == MPI_SUCCESS); } -#endif -void mpiReduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm ) - { qs_assert(MPI_Reduce(sendbuf, recvbuf, count, datatype, op, root, comm) == MPI_SUCCESS); } -void mpiGather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) - { qs_assert(MPI_Gather(sendbuf, sendcount, sendtype, recvbuf, recvcount, recvtype, root, comm) == MPI_SUCCESS); } -void mpiBcast( void* buf, int count, MPI_Datatype datatype, int root, MPI_Comm comm) - { qs_assert(MPI_Bcast(buf, count, datatype, root, comm) == MPI_SUCCESS); } -void mpiIrecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Request *request) - { qs_assert(MPI_Irecv(buf, count, datatype, source, tag, comm, request) == MPI_SUCCESS); } -void mpiRecv(void *buf, int count, MPI_Datatype datatype, int source, int tag, MPI_Comm comm, MPI_Status *status) - { qs_assert(MPI_Recv(buf, count, datatype, source, tag, comm, status) == MPI_SUCCESS); } -void mpiIsend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm, MPI_Request *request) - { qs_assert(MPI_Isend(buf, count, datatype, dest, tag, comm, request) == MPI_SUCCESS); } -void mpiSend(void *buf, int count, MPI_Datatype datatype, int dest, int tag, MPI_Comm comm) - { qs_assert(MPI_Send(buf, count, datatype, dest, tag, comm) == MPI_SUCCESS); } - - // ------------------------------------------------------------------------------- - // ------------------------------------------------------------------------------- -#else // HAVE_MPI is not defined : Serial (non-MPI) implementation of necessary routines - // ------------------------------------------------------------------------------- - // ------------------------------------------------------------------------------- - -#include "mpi_stubs_internal.hh" // This will be our internal C++ structs. - -static Handleitem *init_block(int block, Handleitem *b); -static void init_handles(); -static MPI_Comm mpi_stubs_comm_new(); -static void mpi_stubs_alloc_handle(int *handle, void **data); -static pList mpi_stubs_list_new(); - -static MPI_Stubs_Data_type mpi_stubs_data; - - -// These slot numbers must match the #define of the data type in utilsMpi.hh -static size_t mpi_datatype_sizes[MPI_UNSIGNED_LONG_LONG+1] = -{ - sizeof(char), // slot 0 is not used - sizeof(unsigned char), // slot 1 MPI_Byte - sizeof(int), // slot 2 MPI_Int - sizeof(double), // slot 3 MPI_Double - sizeof(long long int), // slot 4 MPI_Long_Long - sizeof(unsigned long long) // slot 5 MPI_Unsigned_Long_Long -}; - -void mpiReduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op op, int root, MPI_Comm comm ) -{ - if (((sendbuf == NULL) || (recvbuf == NULL)) && (count > 0)) - { printf("%s:%d - MPI_Reduce sendbuf or recvbuf is NULL \n", __FILE__, __LINE__); qs_assert(false); } - - if (root != 0) - { printf("%s:%d - MPI_Reduce: bad root = %d\n", __FILE__, __LINE__, root); qs_assert(false); } - - switch (datatype) - { - case MPI_INT: - case MPI_LONG_LONG: - case MPI_DOUBLE: - case MPI_UNSIGNED_LONG_LONG: - memcpy(recvbuf, sendbuf, count * mpi_datatype_sizes[datatype]); - break; - default: - printf("%s:%d - MPI_Reduce type (%d) not implemented.", __FILE__, __LINE__,datatype); qs_assert(false); - } -} - -void mpiAllreduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ) -{ - if (((sendbuf == NULL) || (recvbuf == NULL)) && (count > 0)) - { printf("%s:%d - MPI_Allreduce sendbuf or recvbuf is NULL \n",__FILE__, __LINE__); qs_assert(false); } - - switch (datatype) - { - case MPI_INT: - case MPI_LONG_LONG: - case MPI_DOUBLE: - case MPI_UNSIGNED_LONG_LONG: - memcpy(recvbuf, sendbuf, count * mpi_datatype_sizes[datatype]); - break; - default: - printf("%s:%d - MPI_Allreduce type (%d) not implemented.", __FILE__, __LINE__, datatype); - qs_assert(false); - } -} - -void mpiIAllreduce( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm, MPI_Request *request) -{ - if (((sendbuf == NULL) || (recvbuf == NULL)) && (count > 0)) - { printf("%s:%d - MPI_Allreduce sendbuf or recvbuf is NULL \n",__FILE__, __LINE__); qs_assert(false); } - - switch (datatype) - { - case MPI_INT: - case MPI_LONG_LONG: - case MPI_DOUBLE: - case MPI_UNSIGNED_LONG_LONG: - memcpy(recvbuf, sendbuf, count * mpi_datatype_sizes[datatype]); - break; - default: - printf("%s:%d - MPI_Allreduce type (%d) not implemented.", __FILE__, __LINE__, datatype); - qs_assert(false); - } -} - -void mpiScan( void *sendbuf, void *recvbuf, int count, MPI_Datatype datatype, MPI_Op operation, MPI_Comm comm ) -{ - if (((sendbuf == NULL) || (recvbuf == NULL)) && (count > 0)) - { printf("%s:%d - MPI_Scan sendbuf or recvbuf is NULL \n",__FILE__, __LINE__); qs_assert(false); } - - switch (datatype) - { - case MPI_INT: - case MPI_LONG_LONG: - case MPI_DOUBLE: - case MPI_UNSIGNED_LONG_LONG: - memcpy(recvbuf, sendbuf, count * mpi_datatype_sizes[datatype]); - break; - default: - printf("%s:%d - MPI_Scan type (%d) not implemented.", __FILE__, __LINE__, datatype); - qs_assert(false); - } -} - -void mpiGather(void *sendbuf, int sendcount, MPI_Datatype sendtype, void *recvbuf, int recvcount, MPI_Datatype recvtype, int root, MPI_Comm comm) -{ - if (sendcount != recvcount) - { printf("%s:%d - MPI_Gather sendcount=%d != recvcount=%d\n", __FILE__, __LINE__, sendcount, recvcount); qs_assert(false); } - - if (sendtype != recvtype) - { printf("%s:%d - MPI_Gather sendtype=%d != recvtype=%d\n", __FILE__, __LINE__, sendtype, recvtype); qs_assert(false); } - - if (((sendbuf == NULL) || (recvbuf == NULL)) && (sendcount > 0)) - { printf("%s:%d - MPI_Gather sendbuf or recvbuf is NULL \n", __FILE__, __LINE__); qs_assert(false); } - - if (root != 0) - { fprintf(stderr,"%s:%d - MPI_Gather bad root = %d\n", __FILE__, __LINE__,root); qs_assert(false); } - - switch (recvtype) - { - case MPI_INT: - case MPI_LONG_LONG: - case MPI_DOUBLE: - case MPI_UNSIGNED_LONG_LONG: - memcpy(recvbuf, sendbuf, recvcount * mpi_datatype_sizes[recvtype]); - break; - default: - printf("%s:%d - MPI_Gather type (%d) not implemented.", __FILE__, __LINE__, recvtype); - qs_assert(false); - } -} - -#include - -double mpiWtime (void) -{ - using t = std::chrono::high_resolution_clock; - auto c = t::now().time_since_epoch().count(); - auto n = t::period::num; - auto d = t::period::den; - double r = static_cast(c)/static_cast(d)*static_cast(n); - return r; -} - -static Handleitem *init_block(int block, Handleitem *b) -{ - for (int i=0; inext; // Skip over using item 0 - newh->next = NULL; - - mpi_stubs_data.blocks[0] = mpi_stubs_data.block0; - mpi_stubs_data.nblocks = 1; - - for (int i=1; inext; - newh->next = NULL; - - *handle = newh->handle; - *data = &(newh->data); - - return; - } - - /* there is nothing free, so allocate a newh block and add it - * to mpi_stubs_data.blocks[] - */ - - if (nblocks == MPI_STUBS_MAX_BLOCKS) - { - fprintf(stderr,"%s:%d - allocate_handle: max %d active handles exceeded\n", - __FILE__, __LINE__, MPI_STUBS_MAX_BLOCKS*MPI_STUBS_BLOCK_ITEMS); - abort(); - } - - MC_MALLOC(mpi_stubs_data.blocks[nblocks], MPI_STUBS_BLOCK_ITEMS, Handleitem); - - newh = init_block(nblocks, mpi_stubs_data.blocks[nblocks]); - - mpi_stubs_data.nextfree = newh->next; - newh->next = NULL; - - *handle = newh->handle; - *data = &(newh->data); - - mpi_stubs_data.nblocks++; // DON'T FORGET THIS!!!! -} - -static pList mpi_stubs_list_new() -{ - pList list = NULL; - - MC_MALLOC(list, 1, List); - - list->head = NULL; - list->tail = NULL; - list->count = 0; - - mpi_stubs_data.headcount++; - return(list); -} - - - -static MPI_Comm mpi_stubs_comm_new() -{ - MPI_Comm chandle; - Comm *cptr; - static int num = 0; - - mpi_stubs_alloc_handle(&chandle,(void **) &cptr); - - cptr->sendlist = mpi_stubs_list_new(); - cptr->recvlist = mpi_stubs_list_new(); - - cptr->num = num++; - cptr->name = NULL; - - return(chandle); -} - -int mpiComm_split(MPI_Comm comm, int color, int key, MPI_Comm *newcomm) -{ - - if (color == MPI_UNDEFINED) - { - *newcomm = MPI_COMM_NULL; - } - else - { - *newcomm = mpi_stubs_comm_new(); - } - - return(MPI_SUCCESS); -} - - - - -#endif // end #else HAVE_MPI From cb5f0505af2448b70a6254d518b56d208e5f2111 Mon Sep 17 00:00:00 2001 From: skambapugithub Date: Fri, 6 Oct 2023 14:23:09 -0700 Subject: [PATCH 4/4] moved SYCL version in to dir named SYCL --- CMakeLists.txt => SYCL/CMakeLists.txt | 0 {Examples => SYCL/Examples}/AllAbsorb/allAbsorb.inp | 0 {Examples => SYCL/Examples}/AllEscape/allEscape.inp | 0 {Examples => SYCL/Examples}/AllScattering/scatteringOnly.inp | 0 .../Examples}/CORAL2_Benchmark/Problem1/00_README.TXT | 0 .../Examples}/CORAL2_Benchmark/Problem1/Coral2_P1.inp | 0 .../Examples}/CORAL2_Benchmark/Problem1/Coral2_P1_1.inp | 0 .../Examples}/CORAL2_Benchmark/Problem1/Coral2_P1_4096.inp | 0 {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem1/P1_04t.sh | 0 {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem1/P1_16t.sh | 0 {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem1/P1_64t.sh | 0 .../Examples}/CORAL2_Benchmark/Problem2/00_README.TXT | 0 .../Examples}/CORAL2_Benchmark/Problem2/Coral2_P2.inp | 0 .../Examples}/CORAL2_Benchmark/Problem2/Coral2_P2_1.inp | 0 .../Examples}/CORAL2_Benchmark/Problem2/Coral2_P2_4096.inp | 0 {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem2/P2_64t.sh | 0 {Examples => SYCL/Examples}/CTS2_Benchmark/00_README.TXT | 0 {Examples => SYCL/Examples}/CTS2_Benchmark/CTS2.inp | 0 {Examples => SYCL/Examples}/CTS2_Benchmark/CTS2_1.inp | 0 {Examples => SYCL/Examples}/CTS2_Benchmark/CTS2_36.inp | 0 {Examples => SYCL/Examples}/CTS2_Benchmark/CTS2_scaling.sh | 0 {Examples => SYCL/Examples}/Homogeneous/homogeneousProblem.inp | 0 {Examples => SYCL/Examples}/Homogeneous/homogeneousProblem_v3.inp | 0 .../Examples}/Homogeneous/homogeneousProblem_v3_wq.inp | 0 .../Examples}/Homogeneous/homogeneousProblem_v4_tm.inp | 0 .../Examples}/Homogeneous/homogeneousProblem_v4_ts.inp | 0 .../Examples}/Homogeneous/homogeneousProblem_v5_ts.inp | 0 .../Examples}/Homogeneous/homogeneousProblem_v7_ts.inp | 0 .../Examples}/Homogeneous/quicksilver_aprun_trinity_01.sh | 0 .../Examples}/Homogeneous/quicksilver_aprun_trinity_02.sh | 0 .../Examples}/Homogeneous/quicksilver_aprun_trinity_04.sh | 0 .../Examples}/Homogeneous/quicksilver_slurm_rzalast_01.sh | 0 .../Examples}/Homogeneous/quicksilver_slurm_rzgenie_01.sh | 0 .../Examples}/Homogeneous/run.homogeneousProblem_v4.rzoz7.x | 0 {Examples => SYCL/Examples}/NoCollisions/no.collisions.inp | 0 {Examples => SYCL/Examples}/NoFission/noFission.inp | 0 {Examples => SYCL/Examples}/NonFlatXC/NonFlatXC.inp | 0 LICENSE.md => SYCL/LICENSE.md | 0 README.md => SYCL/README.md | 0 {src => SYCL/src}/AtomicMacro.hh | 0 {src => SYCL/src}/BulkStorage.hh | 0 {src => SYCL/src}/CollisionEvent.hh | 0 {src => SYCL/src}/CommObject.hh | 0 {src => SYCL/src}/CoralBenchmark.cc | 0 {src => SYCL/src}/CoralBenchmark.hh | 0 {src => SYCL/src}/CycleTracking.cc | 0 {src => SYCL/src}/CycleTracking.hh | 0 {src => SYCL/src}/DeclareMacro.hh | 0 {src => SYCL/src}/DecompositionObject.cc | 0 {src => SYCL/src}/DecompositionObject.hh | 0 {src => SYCL/src}/DirectionCosine.cc.dp.cpp | 0 {src => SYCL/src}/DirectionCosine.hh | 0 {src => SYCL/src}/EnergySpectrum.cc | 0 {src => SYCL/src}/EnergySpectrum.hh | 0 {src => SYCL/src}/FacetPair.hh | 0 {src => SYCL/src}/GlobalFccGrid.cc.dp.cpp | 0 {src => SYCL/src}/GlobalFccGrid.hh | 0 {src => SYCL/src}/Globals.hh | 0 {src => SYCL/src}/GridAssignmentObject.cc.dp.cpp | 0 {src => SYCL/src}/GridAssignmentObject.hh | 0 {src => SYCL/src}/IndexToTuple.hh | 0 {src => SYCL/src}/IndexToTuple4.hh | 0 {src => SYCL/src}/InputBlock.cc | 0 {src => SYCL/src}/InputBlock.hh | 0 {src => SYCL/src}/Long64.hh | 0 {src => SYCL/src}/MCT.hh | 0 {src => SYCL/src}/MC_Base_Particle.cc | 0 {src => SYCL/src}/MC_Base_Particle.hh | 0 {src => SYCL/src}/MC_Cell_State.hh | 0 {src => SYCL/src}/MC_Distance_To_Facet.hh | 0 {src => SYCL/src}/MC_Domain.cc.dp.cpp | 0 {src => SYCL/src}/MC_Domain.hh | 0 {src => SYCL/src}/MC_Facet_Adjacency.hh | 0 {src => SYCL/src}/MC_Facet_Crossing_Event.hh | 0 {src => SYCL/src}/MC_Facet_Geometry.hh | 0 {src => SYCL/src}/MC_Fast_Timer.cc.dp.cpp | 0 {src => SYCL/src}/MC_Fast_Timer.hh | 0 {src => SYCL/src}/MC_Location.hh | 0 {src => SYCL/src}/MC_Nearest_Facet.hh | 0 {src => SYCL/src}/MC_Particle.hh | 0 {src => SYCL/src}/MC_Particle_Buffer.cc | 0 {src => SYCL/src}/MC_Particle_Buffer.hh | 0 {src => SYCL/src}/MC_Processor_Info.hh | 0 {src => SYCL/src}/MC_RNG_State.hh | 0 {src => SYCL/src}/MC_Segment_Outcome.hh | 0 {src => SYCL/src}/MC_SourceNow.hh | 0 {src => SYCL/src}/MC_Time_Info.hh | 0 {src => SYCL/src}/MC_Vector.hh | 0 {src => SYCL/src}/MacroscopicCrossSection.hh | 0 {src => SYCL/src}/MaterialDatabase.hh | 0 {src => SYCL/src}/MemoryControl.hh | 0 {src => SYCL/src}/MeshPartition.cc | 0 {src => SYCL/src}/MeshPartition.hh | 0 {src => SYCL/src}/MonteCarlo.cc.dp.cpp | 0 {src => SYCL/src}/MonteCarlo.hh | 0 {src => SYCL/src}/MpiCommObject.cc | 0 {src => SYCL/src}/MpiCommObject.hh | 0 {src => SYCL/src}/NVTX_Range.hh | 0 {src => SYCL/src}/NuclearData.hh | 0 {src => SYCL/src}/Parameters.cc | 0 {src => SYCL/src}/Parameters.hh | 0 {src => SYCL/src}/ParticleVault.cc | 0 {src => SYCL/src}/ParticleVault.hh | 0 {src => SYCL/src}/ParticleVaultContainer.cc | 0 {src => SYCL/src}/ParticleVaultContainer.hh | 0 {src => SYCL/src}/PhysicalConstants.hh | 0 {src => SYCL/src}/PopulationControl.cc.dp.cpp | 0 {src => SYCL/src}/PopulationControl.hh | 0 {src => SYCL/src}/QS_Vector.hh | 0 {src => SYCL/src}/Random.cc | 0 {src => SYCL/src}/Random.h | 0 {src => SYCL/src}/SendQueue.hh | 0 {src => SYCL/src}/SharedMemoryCommObject.cc | 0 {src => SYCL/src}/SharedMemoryCommObject.hh | 0 {src => SYCL/src}/Tallies.cc | 0 {src => SYCL/src}/Tallies.hh | 0 {src => SYCL/src}/Tuple.hh | 0 {src => SYCL/src}/Tuple4.hh | 0 {src => SYCL/src}/Tuple4ToIndex.hh | 0 {src => SYCL/src}/TupleToIndex.hh | 0 {src => SYCL/src}/cmdLineParser.cc | 0 {src => SYCL/src}/cmdLineParser.hh | 0 {src => SYCL/src}/cudaFunctions.cc.dp.cpp | 0 {src => SYCL/src}/cudaFunctions.hh | 0 {src => SYCL/src}/cudaUtils.hh | 0 {src => SYCL/src}/initMC.cc.dp.cpp | 0 {src => SYCL/src}/initMC.hh | 0 {src => SYCL/src}/macros.hh | 0 {src => SYCL/src}/main.cc.dp.cpp | 0 {src => SYCL/src}/mc_omp_critical.hh | 0 {src => SYCL/src}/mc_omp_parallel_for_schedule_static.hh | 0 {src => SYCL/src}/memUtils.hh | 0 {src => SYCL/src}/mpi_stubs.hh | 0 {src => SYCL/src}/mpi_stubs_internal.hh | 0 {src => SYCL/src}/parseUtils.cc | 0 {src => SYCL/src}/parseUtils.hh | 0 {src => SYCL/src}/portability.hh | 0 {src => SYCL/src}/qs_assert.hh | 0 {src => SYCL/src}/utils.cc | 0 {src => SYCL/src}/utils.hh | 0 {src => SYCL/src}/utilsMpi.cc.dp.cpp | 0 {src => SYCL/src}/utilsMpi.hh | 0 142 files changed, 0 insertions(+), 0 deletions(-) rename CMakeLists.txt => SYCL/CMakeLists.txt (100%) rename {Examples => SYCL/Examples}/AllAbsorb/allAbsorb.inp (100%) rename {Examples => SYCL/Examples}/AllEscape/allEscape.inp (100%) rename {Examples => SYCL/Examples}/AllScattering/scatteringOnly.inp (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem1/00_README.TXT (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem1/Coral2_P1.inp (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem1/Coral2_P1_1.inp (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem1/Coral2_P1_4096.inp (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem1/P1_04t.sh (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem1/P1_16t.sh (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem1/P1_64t.sh (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem2/00_README.TXT (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem2/Coral2_P2.inp (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem2/Coral2_P2_1.inp (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem2/Coral2_P2_4096.inp (100%) rename {Examples => SYCL/Examples}/CORAL2_Benchmark/Problem2/P2_64t.sh (100%) rename {Examples => SYCL/Examples}/CTS2_Benchmark/00_README.TXT (100%) rename {Examples => SYCL/Examples}/CTS2_Benchmark/CTS2.inp (100%) rename {Examples => SYCL/Examples}/CTS2_Benchmark/CTS2_1.inp (100%) rename {Examples => SYCL/Examples}/CTS2_Benchmark/CTS2_36.inp (100%) rename {Examples => SYCL/Examples}/CTS2_Benchmark/CTS2_scaling.sh (100%) rename {Examples => SYCL/Examples}/Homogeneous/homogeneousProblem.inp (100%) rename {Examples => SYCL/Examples}/Homogeneous/homogeneousProblem_v3.inp (100%) rename {Examples => SYCL/Examples}/Homogeneous/homogeneousProblem_v3_wq.inp (100%) rename {Examples => SYCL/Examples}/Homogeneous/homogeneousProblem_v4_tm.inp (100%) rename {Examples => SYCL/Examples}/Homogeneous/homogeneousProblem_v4_ts.inp (100%) rename {Examples => SYCL/Examples}/Homogeneous/homogeneousProblem_v5_ts.inp (100%) rename {Examples => SYCL/Examples}/Homogeneous/homogeneousProblem_v7_ts.inp (100%) rename {Examples => SYCL/Examples}/Homogeneous/quicksilver_aprun_trinity_01.sh (100%) rename {Examples => SYCL/Examples}/Homogeneous/quicksilver_aprun_trinity_02.sh (100%) rename {Examples => SYCL/Examples}/Homogeneous/quicksilver_aprun_trinity_04.sh (100%) rename {Examples => SYCL/Examples}/Homogeneous/quicksilver_slurm_rzalast_01.sh (100%) rename {Examples => SYCL/Examples}/Homogeneous/quicksilver_slurm_rzgenie_01.sh (100%) rename {Examples => SYCL/Examples}/Homogeneous/run.homogeneousProblem_v4.rzoz7.x (100%) rename {Examples => SYCL/Examples}/NoCollisions/no.collisions.inp (100%) rename {Examples => SYCL/Examples}/NoFission/noFission.inp (100%) rename {Examples => SYCL/Examples}/NonFlatXC/NonFlatXC.inp (100%) rename LICENSE.md => SYCL/LICENSE.md (100%) rename README.md => SYCL/README.md (100%) rename {src => SYCL/src}/AtomicMacro.hh (100%) rename {src => SYCL/src}/BulkStorage.hh (100%) rename {src => SYCL/src}/CollisionEvent.hh (100%) rename {src => SYCL/src}/CommObject.hh (100%) rename {src => SYCL/src}/CoralBenchmark.cc (100%) rename {src => SYCL/src}/CoralBenchmark.hh (100%) rename {src => SYCL/src}/CycleTracking.cc (100%) rename {src => SYCL/src}/CycleTracking.hh (100%) rename {src => SYCL/src}/DeclareMacro.hh (100%) rename {src => SYCL/src}/DecompositionObject.cc (100%) rename {src => SYCL/src}/DecompositionObject.hh (100%) rename {src => SYCL/src}/DirectionCosine.cc.dp.cpp (100%) rename {src => SYCL/src}/DirectionCosine.hh (100%) rename {src => SYCL/src}/EnergySpectrum.cc (100%) rename {src => SYCL/src}/EnergySpectrum.hh (100%) rename {src => SYCL/src}/FacetPair.hh (100%) rename {src => SYCL/src}/GlobalFccGrid.cc.dp.cpp (100%) rename {src => SYCL/src}/GlobalFccGrid.hh (100%) rename {src => SYCL/src}/Globals.hh (100%) rename {src => SYCL/src}/GridAssignmentObject.cc.dp.cpp (100%) rename {src => SYCL/src}/GridAssignmentObject.hh (100%) rename {src => SYCL/src}/IndexToTuple.hh (100%) rename {src => SYCL/src}/IndexToTuple4.hh (100%) rename {src => SYCL/src}/InputBlock.cc (100%) rename {src => SYCL/src}/InputBlock.hh (100%) rename {src => SYCL/src}/Long64.hh (100%) rename {src => SYCL/src}/MCT.hh (100%) rename {src => SYCL/src}/MC_Base_Particle.cc (100%) rename {src => SYCL/src}/MC_Base_Particle.hh (100%) rename {src => SYCL/src}/MC_Cell_State.hh (100%) rename {src => SYCL/src}/MC_Distance_To_Facet.hh (100%) rename {src => SYCL/src}/MC_Domain.cc.dp.cpp (100%) rename {src => SYCL/src}/MC_Domain.hh (100%) rename {src => SYCL/src}/MC_Facet_Adjacency.hh (100%) rename {src => SYCL/src}/MC_Facet_Crossing_Event.hh (100%) rename {src => SYCL/src}/MC_Facet_Geometry.hh (100%) rename {src => SYCL/src}/MC_Fast_Timer.cc.dp.cpp (100%) rename {src => SYCL/src}/MC_Fast_Timer.hh (100%) rename {src => SYCL/src}/MC_Location.hh (100%) rename {src => SYCL/src}/MC_Nearest_Facet.hh (100%) rename {src => SYCL/src}/MC_Particle.hh (100%) rename {src => SYCL/src}/MC_Particle_Buffer.cc (100%) rename {src => SYCL/src}/MC_Particle_Buffer.hh (100%) rename {src => SYCL/src}/MC_Processor_Info.hh (100%) rename {src => SYCL/src}/MC_RNG_State.hh (100%) rename {src => SYCL/src}/MC_Segment_Outcome.hh (100%) rename {src => SYCL/src}/MC_SourceNow.hh (100%) rename {src => SYCL/src}/MC_Time_Info.hh (100%) rename {src => SYCL/src}/MC_Vector.hh (100%) rename {src => SYCL/src}/MacroscopicCrossSection.hh (100%) rename {src => SYCL/src}/MaterialDatabase.hh (100%) rename {src => SYCL/src}/MemoryControl.hh (100%) rename {src => SYCL/src}/MeshPartition.cc (100%) rename {src => SYCL/src}/MeshPartition.hh (100%) rename {src => SYCL/src}/MonteCarlo.cc.dp.cpp (100%) rename {src => SYCL/src}/MonteCarlo.hh (100%) rename {src => SYCL/src}/MpiCommObject.cc (100%) rename {src => SYCL/src}/MpiCommObject.hh (100%) rename {src => SYCL/src}/NVTX_Range.hh (100%) rename {src => SYCL/src}/NuclearData.hh (100%) rename {src => SYCL/src}/Parameters.cc (100%) rename {src => SYCL/src}/Parameters.hh (100%) rename {src => SYCL/src}/ParticleVault.cc (100%) rename {src => SYCL/src}/ParticleVault.hh (100%) rename {src => SYCL/src}/ParticleVaultContainer.cc (100%) rename {src => SYCL/src}/ParticleVaultContainer.hh (100%) rename {src => SYCL/src}/PhysicalConstants.hh (100%) rename {src => SYCL/src}/PopulationControl.cc.dp.cpp (100%) rename {src => SYCL/src}/PopulationControl.hh (100%) rename {src => SYCL/src}/QS_Vector.hh (100%) rename {src => SYCL/src}/Random.cc (100%) rename {src => SYCL/src}/Random.h (100%) rename {src => SYCL/src}/SendQueue.hh (100%) rename {src => SYCL/src}/SharedMemoryCommObject.cc (100%) rename {src => SYCL/src}/SharedMemoryCommObject.hh (100%) rename {src => SYCL/src}/Tallies.cc (100%) rename {src => SYCL/src}/Tallies.hh (100%) rename {src => SYCL/src}/Tuple.hh (100%) rename {src => SYCL/src}/Tuple4.hh (100%) rename {src => SYCL/src}/Tuple4ToIndex.hh (100%) rename {src => SYCL/src}/TupleToIndex.hh (100%) rename {src => SYCL/src}/cmdLineParser.cc (100%) rename {src => SYCL/src}/cmdLineParser.hh (100%) rename {src => SYCL/src}/cudaFunctions.cc.dp.cpp (100%) rename {src => SYCL/src}/cudaFunctions.hh (100%) rename {src => SYCL/src}/cudaUtils.hh (100%) rename {src => SYCL/src}/initMC.cc.dp.cpp (100%) rename {src => SYCL/src}/initMC.hh (100%) rename {src => SYCL/src}/macros.hh (100%) rename {src => SYCL/src}/main.cc.dp.cpp (100%) rename {src => SYCL/src}/mc_omp_critical.hh (100%) rename {src => SYCL/src}/mc_omp_parallel_for_schedule_static.hh (100%) rename {src => SYCL/src}/memUtils.hh (100%) rename {src => SYCL/src}/mpi_stubs.hh (100%) rename {src => SYCL/src}/mpi_stubs_internal.hh (100%) rename {src => SYCL/src}/parseUtils.cc (100%) rename {src => SYCL/src}/parseUtils.hh (100%) rename {src => SYCL/src}/portability.hh (100%) rename {src => SYCL/src}/qs_assert.hh (100%) rename {src => SYCL/src}/utils.cc (100%) rename {src => SYCL/src}/utils.hh (100%) rename {src => SYCL/src}/utilsMpi.cc.dp.cpp (100%) rename {src => SYCL/src}/utilsMpi.hh (100%) diff --git a/CMakeLists.txt b/SYCL/CMakeLists.txt similarity index 100% rename from CMakeLists.txt rename to SYCL/CMakeLists.txt diff --git a/Examples/AllAbsorb/allAbsorb.inp b/SYCL/Examples/AllAbsorb/allAbsorb.inp similarity index 100% rename from Examples/AllAbsorb/allAbsorb.inp rename to SYCL/Examples/AllAbsorb/allAbsorb.inp diff --git a/Examples/AllEscape/allEscape.inp b/SYCL/Examples/AllEscape/allEscape.inp similarity index 100% rename from Examples/AllEscape/allEscape.inp rename to SYCL/Examples/AllEscape/allEscape.inp diff --git a/Examples/AllScattering/scatteringOnly.inp b/SYCL/Examples/AllScattering/scatteringOnly.inp similarity index 100% rename from Examples/AllScattering/scatteringOnly.inp rename to SYCL/Examples/AllScattering/scatteringOnly.inp diff --git a/Examples/CORAL2_Benchmark/Problem1/00_README.TXT b/SYCL/Examples/CORAL2_Benchmark/Problem1/00_README.TXT similarity index 100% rename from Examples/CORAL2_Benchmark/Problem1/00_README.TXT rename to SYCL/Examples/CORAL2_Benchmark/Problem1/00_README.TXT diff --git a/Examples/CORAL2_Benchmark/Problem1/Coral2_P1.inp b/SYCL/Examples/CORAL2_Benchmark/Problem1/Coral2_P1.inp similarity index 100% rename from Examples/CORAL2_Benchmark/Problem1/Coral2_P1.inp rename to SYCL/Examples/CORAL2_Benchmark/Problem1/Coral2_P1.inp diff --git a/Examples/CORAL2_Benchmark/Problem1/Coral2_P1_1.inp b/SYCL/Examples/CORAL2_Benchmark/Problem1/Coral2_P1_1.inp similarity index 100% rename from Examples/CORAL2_Benchmark/Problem1/Coral2_P1_1.inp rename to SYCL/Examples/CORAL2_Benchmark/Problem1/Coral2_P1_1.inp diff --git a/Examples/CORAL2_Benchmark/Problem1/Coral2_P1_4096.inp b/SYCL/Examples/CORAL2_Benchmark/Problem1/Coral2_P1_4096.inp similarity index 100% rename from Examples/CORAL2_Benchmark/Problem1/Coral2_P1_4096.inp rename to SYCL/Examples/CORAL2_Benchmark/Problem1/Coral2_P1_4096.inp diff --git a/Examples/CORAL2_Benchmark/Problem1/P1_04t.sh b/SYCL/Examples/CORAL2_Benchmark/Problem1/P1_04t.sh similarity index 100% rename from Examples/CORAL2_Benchmark/Problem1/P1_04t.sh rename to SYCL/Examples/CORAL2_Benchmark/Problem1/P1_04t.sh diff --git a/Examples/CORAL2_Benchmark/Problem1/P1_16t.sh b/SYCL/Examples/CORAL2_Benchmark/Problem1/P1_16t.sh similarity index 100% rename from Examples/CORAL2_Benchmark/Problem1/P1_16t.sh rename to SYCL/Examples/CORAL2_Benchmark/Problem1/P1_16t.sh diff --git a/Examples/CORAL2_Benchmark/Problem1/P1_64t.sh b/SYCL/Examples/CORAL2_Benchmark/Problem1/P1_64t.sh similarity index 100% rename from Examples/CORAL2_Benchmark/Problem1/P1_64t.sh rename to SYCL/Examples/CORAL2_Benchmark/Problem1/P1_64t.sh diff --git a/Examples/CORAL2_Benchmark/Problem2/00_README.TXT b/SYCL/Examples/CORAL2_Benchmark/Problem2/00_README.TXT similarity index 100% rename from Examples/CORAL2_Benchmark/Problem2/00_README.TXT rename to SYCL/Examples/CORAL2_Benchmark/Problem2/00_README.TXT diff --git a/Examples/CORAL2_Benchmark/Problem2/Coral2_P2.inp b/SYCL/Examples/CORAL2_Benchmark/Problem2/Coral2_P2.inp similarity index 100% rename from Examples/CORAL2_Benchmark/Problem2/Coral2_P2.inp rename to SYCL/Examples/CORAL2_Benchmark/Problem2/Coral2_P2.inp diff --git a/Examples/CORAL2_Benchmark/Problem2/Coral2_P2_1.inp b/SYCL/Examples/CORAL2_Benchmark/Problem2/Coral2_P2_1.inp similarity index 100% rename from Examples/CORAL2_Benchmark/Problem2/Coral2_P2_1.inp rename to SYCL/Examples/CORAL2_Benchmark/Problem2/Coral2_P2_1.inp diff --git a/Examples/CORAL2_Benchmark/Problem2/Coral2_P2_4096.inp b/SYCL/Examples/CORAL2_Benchmark/Problem2/Coral2_P2_4096.inp similarity index 100% rename from Examples/CORAL2_Benchmark/Problem2/Coral2_P2_4096.inp rename to SYCL/Examples/CORAL2_Benchmark/Problem2/Coral2_P2_4096.inp diff --git a/Examples/CORAL2_Benchmark/Problem2/P2_64t.sh b/SYCL/Examples/CORAL2_Benchmark/Problem2/P2_64t.sh similarity index 100% rename from Examples/CORAL2_Benchmark/Problem2/P2_64t.sh rename to SYCL/Examples/CORAL2_Benchmark/Problem2/P2_64t.sh diff --git a/Examples/CTS2_Benchmark/00_README.TXT b/SYCL/Examples/CTS2_Benchmark/00_README.TXT similarity index 100% rename from Examples/CTS2_Benchmark/00_README.TXT rename to SYCL/Examples/CTS2_Benchmark/00_README.TXT diff --git a/Examples/CTS2_Benchmark/CTS2.inp b/SYCL/Examples/CTS2_Benchmark/CTS2.inp similarity index 100% rename from Examples/CTS2_Benchmark/CTS2.inp rename to SYCL/Examples/CTS2_Benchmark/CTS2.inp diff --git a/Examples/CTS2_Benchmark/CTS2_1.inp b/SYCL/Examples/CTS2_Benchmark/CTS2_1.inp similarity index 100% rename from Examples/CTS2_Benchmark/CTS2_1.inp rename to SYCL/Examples/CTS2_Benchmark/CTS2_1.inp diff --git a/Examples/CTS2_Benchmark/CTS2_36.inp b/SYCL/Examples/CTS2_Benchmark/CTS2_36.inp similarity index 100% rename from Examples/CTS2_Benchmark/CTS2_36.inp rename to SYCL/Examples/CTS2_Benchmark/CTS2_36.inp diff --git a/Examples/CTS2_Benchmark/CTS2_scaling.sh b/SYCL/Examples/CTS2_Benchmark/CTS2_scaling.sh similarity index 100% rename from Examples/CTS2_Benchmark/CTS2_scaling.sh rename to SYCL/Examples/CTS2_Benchmark/CTS2_scaling.sh diff --git a/Examples/Homogeneous/homogeneousProblem.inp b/SYCL/Examples/Homogeneous/homogeneousProblem.inp similarity index 100% rename from Examples/Homogeneous/homogeneousProblem.inp rename to SYCL/Examples/Homogeneous/homogeneousProblem.inp diff --git a/Examples/Homogeneous/homogeneousProblem_v3.inp b/SYCL/Examples/Homogeneous/homogeneousProblem_v3.inp similarity index 100% rename from Examples/Homogeneous/homogeneousProblem_v3.inp rename to SYCL/Examples/Homogeneous/homogeneousProblem_v3.inp diff --git a/Examples/Homogeneous/homogeneousProblem_v3_wq.inp b/SYCL/Examples/Homogeneous/homogeneousProblem_v3_wq.inp similarity index 100% rename from Examples/Homogeneous/homogeneousProblem_v3_wq.inp rename to SYCL/Examples/Homogeneous/homogeneousProblem_v3_wq.inp diff --git a/Examples/Homogeneous/homogeneousProblem_v4_tm.inp b/SYCL/Examples/Homogeneous/homogeneousProblem_v4_tm.inp similarity index 100% rename from Examples/Homogeneous/homogeneousProblem_v4_tm.inp rename to SYCL/Examples/Homogeneous/homogeneousProblem_v4_tm.inp diff --git a/Examples/Homogeneous/homogeneousProblem_v4_ts.inp b/SYCL/Examples/Homogeneous/homogeneousProblem_v4_ts.inp similarity index 100% rename from Examples/Homogeneous/homogeneousProblem_v4_ts.inp rename to SYCL/Examples/Homogeneous/homogeneousProblem_v4_ts.inp diff --git a/Examples/Homogeneous/homogeneousProblem_v5_ts.inp b/SYCL/Examples/Homogeneous/homogeneousProblem_v5_ts.inp similarity index 100% rename from Examples/Homogeneous/homogeneousProblem_v5_ts.inp rename to SYCL/Examples/Homogeneous/homogeneousProblem_v5_ts.inp diff --git a/Examples/Homogeneous/homogeneousProblem_v7_ts.inp b/SYCL/Examples/Homogeneous/homogeneousProblem_v7_ts.inp similarity index 100% rename from Examples/Homogeneous/homogeneousProblem_v7_ts.inp rename to SYCL/Examples/Homogeneous/homogeneousProblem_v7_ts.inp diff --git a/Examples/Homogeneous/quicksilver_aprun_trinity_01.sh b/SYCL/Examples/Homogeneous/quicksilver_aprun_trinity_01.sh similarity index 100% rename from Examples/Homogeneous/quicksilver_aprun_trinity_01.sh rename to SYCL/Examples/Homogeneous/quicksilver_aprun_trinity_01.sh diff --git a/Examples/Homogeneous/quicksilver_aprun_trinity_02.sh b/SYCL/Examples/Homogeneous/quicksilver_aprun_trinity_02.sh similarity index 100% rename from Examples/Homogeneous/quicksilver_aprun_trinity_02.sh rename to SYCL/Examples/Homogeneous/quicksilver_aprun_trinity_02.sh diff --git a/Examples/Homogeneous/quicksilver_aprun_trinity_04.sh b/SYCL/Examples/Homogeneous/quicksilver_aprun_trinity_04.sh similarity index 100% rename from Examples/Homogeneous/quicksilver_aprun_trinity_04.sh rename to SYCL/Examples/Homogeneous/quicksilver_aprun_trinity_04.sh diff --git a/Examples/Homogeneous/quicksilver_slurm_rzalast_01.sh b/SYCL/Examples/Homogeneous/quicksilver_slurm_rzalast_01.sh similarity index 100% rename from Examples/Homogeneous/quicksilver_slurm_rzalast_01.sh rename to SYCL/Examples/Homogeneous/quicksilver_slurm_rzalast_01.sh diff --git a/Examples/Homogeneous/quicksilver_slurm_rzgenie_01.sh b/SYCL/Examples/Homogeneous/quicksilver_slurm_rzgenie_01.sh similarity index 100% rename from Examples/Homogeneous/quicksilver_slurm_rzgenie_01.sh rename to SYCL/Examples/Homogeneous/quicksilver_slurm_rzgenie_01.sh diff --git a/Examples/Homogeneous/run.homogeneousProblem_v4.rzoz7.x b/SYCL/Examples/Homogeneous/run.homogeneousProblem_v4.rzoz7.x similarity index 100% rename from Examples/Homogeneous/run.homogeneousProblem_v4.rzoz7.x rename to SYCL/Examples/Homogeneous/run.homogeneousProblem_v4.rzoz7.x diff --git a/Examples/NoCollisions/no.collisions.inp b/SYCL/Examples/NoCollisions/no.collisions.inp similarity index 100% rename from Examples/NoCollisions/no.collisions.inp rename to SYCL/Examples/NoCollisions/no.collisions.inp diff --git a/Examples/NoFission/noFission.inp b/SYCL/Examples/NoFission/noFission.inp similarity index 100% rename from Examples/NoFission/noFission.inp rename to SYCL/Examples/NoFission/noFission.inp diff --git a/Examples/NonFlatXC/NonFlatXC.inp b/SYCL/Examples/NonFlatXC/NonFlatXC.inp similarity index 100% rename from Examples/NonFlatXC/NonFlatXC.inp rename to SYCL/Examples/NonFlatXC/NonFlatXC.inp diff --git a/LICENSE.md b/SYCL/LICENSE.md similarity index 100% rename from LICENSE.md rename to SYCL/LICENSE.md diff --git a/README.md b/SYCL/README.md similarity index 100% rename from README.md rename to SYCL/README.md diff --git a/src/AtomicMacro.hh b/SYCL/src/AtomicMacro.hh similarity index 100% rename from src/AtomicMacro.hh rename to SYCL/src/AtomicMacro.hh diff --git a/src/BulkStorage.hh b/SYCL/src/BulkStorage.hh similarity index 100% rename from src/BulkStorage.hh rename to SYCL/src/BulkStorage.hh diff --git a/src/CollisionEvent.hh b/SYCL/src/CollisionEvent.hh similarity index 100% rename from src/CollisionEvent.hh rename to SYCL/src/CollisionEvent.hh diff --git a/src/CommObject.hh b/SYCL/src/CommObject.hh similarity index 100% rename from src/CommObject.hh rename to SYCL/src/CommObject.hh diff --git a/src/CoralBenchmark.cc b/SYCL/src/CoralBenchmark.cc similarity index 100% rename from src/CoralBenchmark.cc rename to SYCL/src/CoralBenchmark.cc diff --git a/src/CoralBenchmark.hh b/SYCL/src/CoralBenchmark.hh similarity index 100% rename from src/CoralBenchmark.hh rename to SYCL/src/CoralBenchmark.hh diff --git a/src/CycleTracking.cc b/SYCL/src/CycleTracking.cc similarity index 100% rename from src/CycleTracking.cc rename to SYCL/src/CycleTracking.cc diff --git a/src/CycleTracking.hh b/SYCL/src/CycleTracking.hh similarity index 100% rename from src/CycleTracking.hh rename to SYCL/src/CycleTracking.hh diff --git a/src/DeclareMacro.hh b/SYCL/src/DeclareMacro.hh similarity index 100% rename from src/DeclareMacro.hh rename to SYCL/src/DeclareMacro.hh diff --git a/src/DecompositionObject.cc b/SYCL/src/DecompositionObject.cc similarity index 100% rename from src/DecompositionObject.cc rename to SYCL/src/DecompositionObject.cc diff --git a/src/DecompositionObject.hh b/SYCL/src/DecompositionObject.hh similarity index 100% rename from src/DecompositionObject.hh rename to SYCL/src/DecompositionObject.hh diff --git a/src/DirectionCosine.cc.dp.cpp b/SYCL/src/DirectionCosine.cc.dp.cpp similarity index 100% rename from src/DirectionCosine.cc.dp.cpp rename to SYCL/src/DirectionCosine.cc.dp.cpp diff --git a/src/DirectionCosine.hh b/SYCL/src/DirectionCosine.hh similarity index 100% rename from src/DirectionCosine.hh rename to SYCL/src/DirectionCosine.hh diff --git a/src/EnergySpectrum.cc b/SYCL/src/EnergySpectrum.cc similarity index 100% rename from src/EnergySpectrum.cc rename to SYCL/src/EnergySpectrum.cc diff --git a/src/EnergySpectrum.hh b/SYCL/src/EnergySpectrum.hh similarity index 100% rename from src/EnergySpectrum.hh rename to SYCL/src/EnergySpectrum.hh diff --git a/src/FacetPair.hh b/SYCL/src/FacetPair.hh similarity index 100% rename from src/FacetPair.hh rename to SYCL/src/FacetPair.hh diff --git a/src/GlobalFccGrid.cc.dp.cpp b/SYCL/src/GlobalFccGrid.cc.dp.cpp similarity index 100% rename from src/GlobalFccGrid.cc.dp.cpp rename to SYCL/src/GlobalFccGrid.cc.dp.cpp diff --git a/src/GlobalFccGrid.hh b/SYCL/src/GlobalFccGrid.hh similarity index 100% rename from src/GlobalFccGrid.hh rename to SYCL/src/GlobalFccGrid.hh diff --git a/src/Globals.hh b/SYCL/src/Globals.hh similarity index 100% rename from src/Globals.hh rename to SYCL/src/Globals.hh diff --git a/src/GridAssignmentObject.cc.dp.cpp b/SYCL/src/GridAssignmentObject.cc.dp.cpp similarity index 100% rename from src/GridAssignmentObject.cc.dp.cpp rename to SYCL/src/GridAssignmentObject.cc.dp.cpp diff --git a/src/GridAssignmentObject.hh b/SYCL/src/GridAssignmentObject.hh similarity index 100% rename from src/GridAssignmentObject.hh rename to SYCL/src/GridAssignmentObject.hh diff --git a/src/IndexToTuple.hh b/SYCL/src/IndexToTuple.hh similarity index 100% rename from src/IndexToTuple.hh rename to SYCL/src/IndexToTuple.hh diff --git a/src/IndexToTuple4.hh b/SYCL/src/IndexToTuple4.hh similarity index 100% rename from src/IndexToTuple4.hh rename to SYCL/src/IndexToTuple4.hh diff --git a/src/InputBlock.cc b/SYCL/src/InputBlock.cc similarity index 100% rename from src/InputBlock.cc rename to SYCL/src/InputBlock.cc diff --git a/src/InputBlock.hh b/SYCL/src/InputBlock.hh similarity index 100% rename from src/InputBlock.hh rename to SYCL/src/InputBlock.hh diff --git a/src/Long64.hh b/SYCL/src/Long64.hh similarity index 100% rename from src/Long64.hh rename to SYCL/src/Long64.hh diff --git a/src/MCT.hh b/SYCL/src/MCT.hh similarity index 100% rename from src/MCT.hh rename to SYCL/src/MCT.hh diff --git a/src/MC_Base_Particle.cc b/SYCL/src/MC_Base_Particle.cc similarity index 100% rename from src/MC_Base_Particle.cc rename to SYCL/src/MC_Base_Particle.cc diff --git a/src/MC_Base_Particle.hh b/SYCL/src/MC_Base_Particle.hh similarity index 100% rename from src/MC_Base_Particle.hh rename to SYCL/src/MC_Base_Particle.hh diff --git a/src/MC_Cell_State.hh b/SYCL/src/MC_Cell_State.hh similarity index 100% rename from src/MC_Cell_State.hh rename to SYCL/src/MC_Cell_State.hh diff --git a/src/MC_Distance_To_Facet.hh b/SYCL/src/MC_Distance_To_Facet.hh similarity index 100% rename from src/MC_Distance_To_Facet.hh rename to SYCL/src/MC_Distance_To_Facet.hh diff --git a/src/MC_Domain.cc.dp.cpp b/SYCL/src/MC_Domain.cc.dp.cpp similarity index 100% rename from src/MC_Domain.cc.dp.cpp rename to SYCL/src/MC_Domain.cc.dp.cpp diff --git a/src/MC_Domain.hh b/SYCL/src/MC_Domain.hh similarity index 100% rename from src/MC_Domain.hh rename to SYCL/src/MC_Domain.hh diff --git a/src/MC_Facet_Adjacency.hh b/SYCL/src/MC_Facet_Adjacency.hh similarity index 100% rename from src/MC_Facet_Adjacency.hh rename to SYCL/src/MC_Facet_Adjacency.hh diff --git a/src/MC_Facet_Crossing_Event.hh b/SYCL/src/MC_Facet_Crossing_Event.hh similarity index 100% rename from src/MC_Facet_Crossing_Event.hh rename to SYCL/src/MC_Facet_Crossing_Event.hh diff --git a/src/MC_Facet_Geometry.hh b/SYCL/src/MC_Facet_Geometry.hh similarity index 100% rename from src/MC_Facet_Geometry.hh rename to SYCL/src/MC_Facet_Geometry.hh diff --git a/src/MC_Fast_Timer.cc.dp.cpp b/SYCL/src/MC_Fast_Timer.cc.dp.cpp similarity index 100% rename from src/MC_Fast_Timer.cc.dp.cpp rename to SYCL/src/MC_Fast_Timer.cc.dp.cpp diff --git a/src/MC_Fast_Timer.hh b/SYCL/src/MC_Fast_Timer.hh similarity index 100% rename from src/MC_Fast_Timer.hh rename to SYCL/src/MC_Fast_Timer.hh diff --git a/src/MC_Location.hh b/SYCL/src/MC_Location.hh similarity index 100% rename from src/MC_Location.hh rename to SYCL/src/MC_Location.hh diff --git a/src/MC_Nearest_Facet.hh b/SYCL/src/MC_Nearest_Facet.hh similarity index 100% rename from src/MC_Nearest_Facet.hh rename to SYCL/src/MC_Nearest_Facet.hh diff --git a/src/MC_Particle.hh b/SYCL/src/MC_Particle.hh similarity index 100% rename from src/MC_Particle.hh rename to SYCL/src/MC_Particle.hh diff --git a/src/MC_Particle_Buffer.cc b/SYCL/src/MC_Particle_Buffer.cc similarity index 100% rename from src/MC_Particle_Buffer.cc rename to SYCL/src/MC_Particle_Buffer.cc diff --git a/src/MC_Particle_Buffer.hh b/SYCL/src/MC_Particle_Buffer.hh similarity index 100% rename from src/MC_Particle_Buffer.hh rename to SYCL/src/MC_Particle_Buffer.hh diff --git a/src/MC_Processor_Info.hh b/SYCL/src/MC_Processor_Info.hh similarity index 100% rename from src/MC_Processor_Info.hh rename to SYCL/src/MC_Processor_Info.hh diff --git a/src/MC_RNG_State.hh b/SYCL/src/MC_RNG_State.hh similarity index 100% rename from src/MC_RNG_State.hh rename to SYCL/src/MC_RNG_State.hh diff --git a/src/MC_Segment_Outcome.hh b/SYCL/src/MC_Segment_Outcome.hh similarity index 100% rename from src/MC_Segment_Outcome.hh rename to SYCL/src/MC_Segment_Outcome.hh diff --git a/src/MC_SourceNow.hh b/SYCL/src/MC_SourceNow.hh similarity index 100% rename from src/MC_SourceNow.hh rename to SYCL/src/MC_SourceNow.hh diff --git a/src/MC_Time_Info.hh b/SYCL/src/MC_Time_Info.hh similarity index 100% rename from src/MC_Time_Info.hh rename to SYCL/src/MC_Time_Info.hh diff --git a/src/MC_Vector.hh b/SYCL/src/MC_Vector.hh similarity index 100% rename from src/MC_Vector.hh rename to SYCL/src/MC_Vector.hh diff --git a/src/MacroscopicCrossSection.hh b/SYCL/src/MacroscopicCrossSection.hh similarity index 100% rename from src/MacroscopicCrossSection.hh rename to SYCL/src/MacroscopicCrossSection.hh diff --git a/src/MaterialDatabase.hh b/SYCL/src/MaterialDatabase.hh similarity index 100% rename from src/MaterialDatabase.hh rename to SYCL/src/MaterialDatabase.hh diff --git a/src/MemoryControl.hh b/SYCL/src/MemoryControl.hh similarity index 100% rename from src/MemoryControl.hh rename to SYCL/src/MemoryControl.hh diff --git a/src/MeshPartition.cc b/SYCL/src/MeshPartition.cc similarity index 100% rename from src/MeshPartition.cc rename to SYCL/src/MeshPartition.cc diff --git a/src/MeshPartition.hh b/SYCL/src/MeshPartition.hh similarity index 100% rename from src/MeshPartition.hh rename to SYCL/src/MeshPartition.hh diff --git a/src/MonteCarlo.cc.dp.cpp b/SYCL/src/MonteCarlo.cc.dp.cpp similarity index 100% rename from src/MonteCarlo.cc.dp.cpp rename to SYCL/src/MonteCarlo.cc.dp.cpp diff --git a/src/MonteCarlo.hh b/SYCL/src/MonteCarlo.hh similarity index 100% rename from src/MonteCarlo.hh rename to SYCL/src/MonteCarlo.hh diff --git a/src/MpiCommObject.cc b/SYCL/src/MpiCommObject.cc similarity index 100% rename from src/MpiCommObject.cc rename to SYCL/src/MpiCommObject.cc diff --git a/src/MpiCommObject.hh b/SYCL/src/MpiCommObject.hh similarity index 100% rename from src/MpiCommObject.hh rename to SYCL/src/MpiCommObject.hh diff --git a/src/NVTX_Range.hh b/SYCL/src/NVTX_Range.hh similarity index 100% rename from src/NVTX_Range.hh rename to SYCL/src/NVTX_Range.hh diff --git a/src/NuclearData.hh b/SYCL/src/NuclearData.hh similarity index 100% rename from src/NuclearData.hh rename to SYCL/src/NuclearData.hh diff --git a/src/Parameters.cc b/SYCL/src/Parameters.cc similarity index 100% rename from src/Parameters.cc rename to SYCL/src/Parameters.cc diff --git a/src/Parameters.hh b/SYCL/src/Parameters.hh similarity index 100% rename from src/Parameters.hh rename to SYCL/src/Parameters.hh diff --git a/src/ParticleVault.cc b/SYCL/src/ParticleVault.cc similarity index 100% rename from src/ParticleVault.cc rename to SYCL/src/ParticleVault.cc diff --git a/src/ParticleVault.hh b/SYCL/src/ParticleVault.hh similarity index 100% rename from src/ParticleVault.hh rename to SYCL/src/ParticleVault.hh diff --git a/src/ParticleVaultContainer.cc b/SYCL/src/ParticleVaultContainer.cc similarity index 100% rename from src/ParticleVaultContainer.cc rename to SYCL/src/ParticleVaultContainer.cc diff --git a/src/ParticleVaultContainer.hh b/SYCL/src/ParticleVaultContainer.hh similarity index 100% rename from src/ParticleVaultContainer.hh rename to SYCL/src/ParticleVaultContainer.hh diff --git a/src/PhysicalConstants.hh b/SYCL/src/PhysicalConstants.hh similarity index 100% rename from src/PhysicalConstants.hh rename to SYCL/src/PhysicalConstants.hh diff --git a/src/PopulationControl.cc.dp.cpp b/SYCL/src/PopulationControl.cc.dp.cpp similarity index 100% rename from src/PopulationControl.cc.dp.cpp rename to SYCL/src/PopulationControl.cc.dp.cpp diff --git a/src/PopulationControl.hh b/SYCL/src/PopulationControl.hh similarity index 100% rename from src/PopulationControl.hh rename to SYCL/src/PopulationControl.hh diff --git a/src/QS_Vector.hh b/SYCL/src/QS_Vector.hh similarity index 100% rename from src/QS_Vector.hh rename to SYCL/src/QS_Vector.hh diff --git a/src/Random.cc b/SYCL/src/Random.cc similarity index 100% rename from src/Random.cc rename to SYCL/src/Random.cc diff --git a/src/Random.h b/SYCL/src/Random.h similarity index 100% rename from src/Random.h rename to SYCL/src/Random.h diff --git a/src/SendQueue.hh b/SYCL/src/SendQueue.hh similarity index 100% rename from src/SendQueue.hh rename to SYCL/src/SendQueue.hh diff --git a/src/SharedMemoryCommObject.cc b/SYCL/src/SharedMemoryCommObject.cc similarity index 100% rename from src/SharedMemoryCommObject.cc rename to SYCL/src/SharedMemoryCommObject.cc diff --git a/src/SharedMemoryCommObject.hh b/SYCL/src/SharedMemoryCommObject.hh similarity index 100% rename from src/SharedMemoryCommObject.hh rename to SYCL/src/SharedMemoryCommObject.hh diff --git a/src/Tallies.cc b/SYCL/src/Tallies.cc similarity index 100% rename from src/Tallies.cc rename to SYCL/src/Tallies.cc diff --git a/src/Tallies.hh b/SYCL/src/Tallies.hh similarity index 100% rename from src/Tallies.hh rename to SYCL/src/Tallies.hh diff --git a/src/Tuple.hh b/SYCL/src/Tuple.hh similarity index 100% rename from src/Tuple.hh rename to SYCL/src/Tuple.hh diff --git a/src/Tuple4.hh b/SYCL/src/Tuple4.hh similarity index 100% rename from src/Tuple4.hh rename to SYCL/src/Tuple4.hh diff --git a/src/Tuple4ToIndex.hh b/SYCL/src/Tuple4ToIndex.hh similarity index 100% rename from src/Tuple4ToIndex.hh rename to SYCL/src/Tuple4ToIndex.hh diff --git a/src/TupleToIndex.hh b/SYCL/src/TupleToIndex.hh similarity index 100% rename from src/TupleToIndex.hh rename to SYCL/src/TupleToIndex.hh diff --git a/src/cmdLineParser.cc b/SYCL/src/cmdLineParser.cc similarity index 100% rename from src/cmdLineParser.cc rename to SYCL/src/cmdLineParser.cc diff --git a/src/cmdLineParser.hh b/SYCL/src/cmdLineParser.hh similarity index 100% rename from src/cmdLineParser.hh rename to SYCL/src/cmdLineParser.hh diff --git a/src/cudaFunctions.cc.dp.cpp b/SYCL/src/cudaFunctions.cc.dp.cpp similarity index 100% rename from src/cudaFunctions.cc.dp.cpp rename to SYCL/src/cudaFunctions.cc.dp.cpp diff --git a/src/cudaFunctions.hh b/SYCL/src/cudaFunctions.hh similarity index 100% rename from src/cudaFunctions.hh rename to SYCL/src/cudaFunctions.hh diff --git a/src/cudaUtils.hh b/SYCL/src/cudaUtils.hh similarity index 100% rename from src/cudaUtils.hh rename to SYCL/src/cudaUtils.hh diff --git a/src/initMC.cc.dp.cpp b/SYCL/src/initMC.cc.dp.cpp similarity index 100% rename from src/initMC.cc.dp.cpp rename to SYCL/src/initMC.cc.dp.cpp diff --git a/src/initMC.hh b/SYCL/src/initMC.hh similarity index 100% rename from src/initMC.hh rename to SYCL/src/initMC.hh diff --git a/src/macros.hh b/SYCL/src/macros.hh similarity index 100% rename from src/macros.hh rename to SYCL/src/macros.hh diff --git a/src/main.cc.dp.cpp b/SYCL/src/main.cc.dp.cpp similarity index 100% rename from src/main.cc.dp.cpp rename to SYCL/src/main.cc.dp.cpp diff --git a/src/mc_omp_critical.hh b/SYCL/src/mc_omp_critical.hh similarity index 100% rename from src/mc_omp_critical.hh rename to SYCL/src/mc_omp_critical.hh diff --git a/src/mc_omp_parallel_for_schedule_static.hh b/SYCL/src/mc_omp_parallel_for_schedule_static.hh similarity index 100% rename from src/mc_omp_parallel_for_schedule_static.hh rename to SYCL/src/mc_omp_parallel_for_schedule_static.hh diff --git a/src/memUtils.hh b/SYCL/src/memUtils.hh similarity index 100% rename from src/memUtils.hh rename to SYCL/src/memUtils.hh diff --git a/src/mpi_stubs.hh b/SYCL/src/mpi_stubs.hh similarity index 100% rename from src/mpi_stubs.hh rename to SYCL/src/mpi_stubs.hh diff --git a/src/mpi_stubs_internal.hh b/SYCL/src/mpi_stubs_internal.hh similarity index 100% rename from src/mpi_stubs_internal.hh rename to SYCL/src/mpi_stubs_internal.hh diff --git a/src/parseUtils.cc b/SYCL/src/parseUtils.cc similarity index 100% rename from src/parseUtils.cc rename to SYCL/src/parseUtils.cc diff --git a/src/parseUtils.hh b/SYCL/src/parseUtils.hh similarity index 100% rename from src/parseUtils.hh rename to SYCL/src/parseUtils.hh diff --git a/src/portability.hh b/SYCL/src/portability.hh similarity index 100% rename from src/portability.hh rename to SYCL/src/portability.hh diff --git a/src/qs_assert.hh b/SYCL/src/qs_assert.hh similarity index 100% rename from src/qs_assert.hh rename to SYCL/src/qs_assert.hh diff --git a/src/utils.cc b/SYCL/src/utils.cc similarity index 100% rename from src/utils.cc rename to SYCL/src/utils.cc diff --git a/src/utils.hh b/SYCL/src/utils.hh similarity index 100% rename from src/utils.hh rename to SYCL/src/utils.hh diff --git a/src/utilsMpi.cc.dp.cpp b/SYCL/src/utilsMpi.cc.dp.cpp similarity index 100% rename from src/utilsMpi.cc.dp.cpp rename to SYCL/src/utilsMpi.cc.dp.cpp diff --git a/src/utilsMpi.hh b/SYCL/src/utilsMpi.hh similarity index 100% rename from src/utilsMpi.hh rename to SYCL/src/utilsMpi.hh