diff --git a/.github/workflows/build-ecmech/action.yml b/.github/workflows/build-ecmech/action.yml index 93fdd55..7b605f4 100644 --- a/.github/workflows/build-ecmech/action.yml +++ b/.github/workflows/build-ecmech/action.yml @@ -17,7 +17,7 @@ runs: steps: - name: Install ECMech run: | - git clone --single-branch --branch v0.3.4 --depth 1 ${{ inputs.ecmech-repo }} ${{ inputs.ecmech-dir }}; + git clone --single-branch --branch v0.4.1 --depth 1 ${{ inputs.ecmech-repo }} ${{ inputs.ecmech-dir }}; cd ${{ inputs.ecmech-dir }}; git submodule init; git submodule update; diff --git a/.github/workflows/build-exaconstit/action.yml b/.github/workflows/build-exaconstit/action.yml index 2204c6e..40fb849 100644 --- a/.github/workflows/build-exaconstit/action.yml +++ b/.github/workflows/build-exaconstit/action.yml @@ -1,6 +1,9 @@ name: build-exaconstit inputs: + python-exe: + description: 'python binary location' + required: true raja-dir: description: 'raja install location working dir' required: true @@ -42,6 +45,7 @@ runs: -DECMECH_DIR=${{ inputs.ecmech-dir }} \ -DSNLS_DIR=${{ inputs.snls-dir }} \ -DCMAKE_BUILD_TYPE=Release \ + -DPYTHON_EXECUTABLE=${{ inputs.python-exe }} \ -DENABLE_SNLS_V03=ON \ -DENABLE_TESTS=ON make -j3; diff --git a/.github/workflows/build-raja/action.yml b/.github/workflows/build-raja/action.yml index 97ef45e..81e597d 100644 --- a/.github/workflows/build-raja/action.yml +++ b/.github/workflows/build-raja/action.yml @@ -14,7 +14,7 @@ runs: steps: - name: Install RAJA run: | - git clone --single-branch --branch v2022.10.5 --depth 1 ${{ inputs.raja-repo }} ${{ inputs.raja-dir }}; + git clone --single-branch --branch v2024.07.0 --depth 1 ${{ inputs.raja-repo }} ${{ inputs.raja-dir }}; cd ${{ inputs.raja-dir }}; git submodule init; git submodule update; diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 02e2ef1..723c1e9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,8 +11,8 @@ on: # Note the SNLS top dir is no longer where SNLS's source is located within ecmech # rather it's the top directory of ecmech. env: - HYPRE_ARCHIVE: v2.26.0.tar.gz - HYPRE_TOP_DIR: hypre-2.26.0 + HYPRE_ARCHIVE: v2.30.0.tar.gz + HYPRE_TOP_DIR: hypre-2.30.0 METIS_ARCHIVE: metis-5.1.0.tar.gz METIS_TOP_DIR: metis-5.1.0 MFEM_TOP_DIR: mfem-exaconstit @@ -30,7 +30,7 @@ jobs: builds-and-tests: strategy: matrix: - os: [ubuntu-20.04] + os: [ubuntu-latest] target: [release] mpi: [parallel] build-system: [cmake] @@ -40,15 +40,16 @@ jobs: steps: - name: checkout exaconstit - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: path: ${{ env.EXACONSTIT_TOP_DIR }} fetch-depth: 0 # Our tests require python so install it and numpy - - name: Set up Python 3.8 - uses: actions/setup-python@v2 + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + id: py310 with: - python-version: 3.8 + python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip @@ -58,7 +59,7 @@ jobs: # TODO: It would be nice to have only one step, e.g. with a dedicated # action, but I (@adrienbernede) don't see how at the moment. - name: get MPI (Linux) - if: matrix.mpi == 'parallel' && matrix.os == 'ubuntu-20.04' + if: matrix.mpi == 'parallel' && matrix.os == 'ubuntu-latest' run: | sudo apt-get install mpich libmpich-dev export MAKE_CXX_FLAG="MPICXX=mpic++" @@ -68,10 +69,10 @@ jobs: - name: cache raja id: raja-cache if: matrix.mpi == 'parallel' - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ${{ env.RAJA_TOP_DIR }} - key: ${{ runner.os }}-build-${{ env.RAJA_TOP_DIR }}-v2.01 + key: ${{ runner.os }}-build-${{ env.RAJA_TOP_DIR }}-v2.02 - name: get raja if: matrix.mpi == 'parallel' && steps.raja-cache.outputs.cache-hit != 'true' @@ -84,10 +85,10 @@ jobs: - name: cache ecmech id: ecmech-cache if: matrix.mpi == 'parallel' - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ${{ env.ECMECH_TOP_DIR }} - key: ${{ runner.os }}-build-${{ env.ECMECH_TOP_DIR }}-v2.01 + key: ${{ runner.os }}-build-${{ env.ECMECH_TOP_DIR }}-v2.02 - name: get ecmech if: matrix.mpi == 'parallel' && steps.ecmech-cache.outputs.cache-hit != 'true' @@ -101,10 +102,10 @@ jobs: - name: cache hypre id: hypre-cache if: matrix.mpi == 'parallel' - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ${{ env.HYPRE_TOP_DIR }} - key: ${{ runner.os }}-build-${{ env.HYPRE_TOP_DIR }}-v2.01 + key: ${{ runner.os }}-build-${{ env.HYPRE_TOP_DIR }}-v2.02 - name: get hypre if: matrix.mpi == 'parallel' && steps.hypre-cache.outputs.cache-hit != 'true' @@ -118,7 +119,7 @@ jobs: - name: cache metis id: metis-cache if: matrix.mpi == 'parallel' - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ${{ env.METIS_TOP_DIR }} key: ${{ runner.os }}-build-${{ env.METIS_TOP_DIR }}-v2.01 @@ -136,10 +137,10 @@ jobs: - name: cache mfem id: mfem-cache if: matrix.mpi == 'parallel' - uses: actions/cache@v2 + uses: actions/cache@v4 with: path: ${{ env.MFEM_TOP_DIR }} - key: ${{ runner.os }}-build-${{ env.MFEM_TOP_DIR }}-v2.03 + key: ${{ runner.os }}-build-${{ env.MFEM_TOP_DIR }}-v2.04 - name: install mfem if: matrix.mpi == 'parallel' && steps.mfem-cache.outputs.cache-hit != 'true' @@ -154,6 +155,7 @@ jobs: - name: build uses: ./.github/workflows/build-exaconstit with: + python-exe: '${{ steps.py310.outputs.python-path }}' raja-dir: '${{ github.workspace }}/${{ env.RAJA_TOP_DIR}}/install_dir/lib/cmake/raja/' mfem-dir: '${{ github.workspace }}/${{ env.MFEM_TOP_DIR }}/install_dir/lib/cmake/mfem/' ecmech-dir: '${{ github.workspace }}/${{ env.ECMECH_TOP_DIR }}/install_dir/' diff --git a/CMakeLists.txt b/CMakeLists.txt index a905337..a266736 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ # see README.md for copyright and license information # -cmake_minimum_required(VERSION 3.1) +cmake_minimum_required(VERSION 3.21) if("${CMAKE_VERSION}" VERSION_GREATER 3.3) cmake_policy(SET CMP0057 NEW) endif() @@ -17,7 +17,7 @@ endif() enable_language(C) -set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) diff --git a/README.md b/README.md index fce13cd..c64e1c9 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,8 @@ # ExaConstit App -Updated: June. 10, 2022 +Updated: Feb. 6, 2025 -Version 0.7.0 +Version 0.8.0 # Description: A principal purpose of this code app is to probe the deformation response of polycrystalline materials; for example, in homogenization to obtain bulk constitutive properties of metals. This is a nonlinear quasi-static, implicit solid mechanics code built on the MFEM library based on an updated Lagrangian formulation (velocity based). @@ -50,7 +50,7 @@ Several small examples that you can run are found in the ```test/data``` directo The ```scripts/postprocessing``` directory contains several useful post-processing tools. The ```macro_stress_strain_plot.py``` file can be used to generate macroscopic stress strain plots. An example script ```adios2_example.py``` is provided as example for how to make use of the ```ADIOS2``` post-processing files if ```MFEM``` was compiled with ```ADIOS2``` support. It's highly recommended to install ```MFEM``` with this library if you plan to be doing a lot of post-processing of data in python. -A set of scripts to perform lattice strain calculations similar to those found in powder diffraction type experiments can be found in the ```scripts/postprocessing``` directory. The appropriate python scripts are: `adios2_extraction.py`, `strain_Xtal_to_Sample.py`, and `calc_lattice_strain.py`. In order to use these scripts, one needs to run with the `light_up=true` option set in the `Visualization` table of your simulation option file. +A set of scripts to perform lattice strain calculations similar to those found in powder diffraction type experiments can be found in the ```scripts/postprocessing``` directory. The appropriate python scripts are: `adios2_extraction.py`, `strain_Xtal_to_Sample.py`, and `calc_lattice_strain.py`. In order to use these scripts, one needs to run with the `light_up=true` option set in the `Visualization` table of your simulation option file. Alternatively, if you just use the `light_up` option and provide the necessary parameters as defined in the `src/options.toml` file you a set of insitu lattice strain calculations will be done. The cost of these insitu calculations is fairly nominal and are generally advisable to use when performing large scale simulations where this data is desireable. # Workflow Examples @@ -65,17 +65,22 @@ The other workflow is based on a UQ workflow for metal additive manufacturing th * Conduit and ADIOS2 supply output support. ZLIB allows MFEM to read in gzip mesh files or save data as being compressed. * You'll need to use the exaconstit-dev branch of MFEM found on this fork of MFEM: https://github.com/rcarson3/mfem.git * We do plan on upstreaming the necessary changes needed for ExaConstit into the master branch of MFEM, so you'll no longer be required to do this + * Version 0.8.0 of ExaConstit is compatible with the following mfem hash: + 31b42daa3cdddeff04ce3f59befa769b262facd7 + or + 29a8e15382682babe0f5c993211caa3008e1ec96 * Version 0.7.0 of Exaconstit is compatible with the following mfem hash 78a95570971c5278d6838461da6b66950baea641 * Version 0.6.0 of ExaConstit is compatible with the following mfem hash 1b31e07cbdc564442a18cfca2c8d5a4b037613f0 * Version 0.5.0 of ExaConstit required 5ebca1fc463484117c0070a530855f8cbc4d619e -* ExaCMech is required for ExaConstit to be built and can be obtained at https://github.com/LLNL/ExaCMech.git and now requires the develop branch. ExaCMech depends internally on SNLS, from https://github.com/LLNL/SNLS.git. We depend on v0.3.4 of ExaCMech as of this point in time. + * ExaCMech is required for ExaConstit to be built and can be obtained at https://github.com/LLNL/ExaCMech.git and now requires the develop branch. ExaCMech depends internally on SNLS, from https://github.com/LLNL/SNLS.git. We depend on v0.4.1 of ExaCMech as of this point in time. + * GPU-builds of ExaCMech >= v0.4.1 and thus ExaConstit now require the RAJA Portability Suite (RAJA, Umpire, and CHAI) to compile and run on the GPU. We currently leverage the `v2024.07.0` tag for all of the RAJA Portability Suite repos. * For versions of ExaCMech >= 0.3.3, you'll need to add `-DENABLE_SNLS_V03=ON` to the cmake commands as a number of cmake changes were made to that library and SNLS. * RAJA is required for ExaConstit to be built and should be the same one that ExaCMech and MFEM are built with. It can be obtained at https://github.com/LLNL/RAJA. Currently, RAJA >= 2022.10.x is required for ExaConstit due to a dependency update in MFEMv4.5. -* An example install bash script for unix systems can be found in ```scripts/install/unix_install_example.sh```. This is provided as an example of how to install ExaConstit and its dependencies, but it is not guaranteed to work on every system. A CUDA version of that script is also included in that folder, and only minor modifications are required if using a version of Cmake >= 3.18.*. In those cases ```CUDA_ARCH``` has been changed to ```CMAKE_CUDA_ARCHITECTURES```. You'll also need to look up what you're CUDA architecture compute capability is set to and modify that within the script. Currently, it is set to ```sm_70``` which is associated with the Volta architecture. +* An example install bash script for unix systems can be found in ```scripts/install/unix_install_example.sh```. This is provided as an example of how to install ExaConstit and its dependencies, but it is not guaranteed to work on every system. A CUDA version of that script is also included in that folder (`unix_gpu_cuda_install_example.sh`), and only minor modifications are required if using a version of Cmake >= 3.18.*. In those cases ```CUDA_ARCH``` has been changed to ```CMAKE_CUDA_ARCHITECTURES```. You'll also need to look up what you're CUDA architecture compute capability is set to and modify that within the script. Currently, it is set to ```sm_70``` which is associated with the Volta architecture. We also have a HIP version included in that folder (`unix_gpu_cuda_install_example.sh`). It's based on a LLNL El Capitan-like system build of things so things might need tweaking for other AMD GPU machines. * Create a build directory and cd into there -* Run ```cmake .. -DENABLE_MPI=ON -DENABLE_FORTRAN=ON -DMFEM_DIR{mfem's installed cmake location} -DBLT_SOURCE_DIR=${BLT cloned location if not located in cmake directory} -DECMECH_DIR=${ExaCMech installed cmake location} -DRAJA_DIR={RAJA installed location} -DSNLS_DIR={SNLS installed cmake location}``` +* Run ```cmake .. -DENABLE_MPI=ON -DENABLE_FORTRAN=OFF -DMFEM_DIR{mfem's installed cmake location} -DBLT_SOURCE_DIR=${BLT cloned location if not located in cmake directory} -DECMECH_DIR=${ExaCMech installed cmake location} -DRAJA_DIR={RAJA installed location} -DSNLS_DIR={SNLS installed cmake location}``` * Run ```make -j 4``` diff --git a/cmake/CMakeBasics.cmake b/cmake/CMakeBasics.cmake index dbb50f6..6e36490 100644 --- a/cmake/CMakeBasics.cmake +++ b/cmake/CMakeBasics.cmake @@ -4,7 +4,7 @@ set(PACKAGE_BUGREPORT "carson16@llnl.gov") set(EXACONSTIT_VERSION_MAJOR 0) -set(EXACONSTIT_VERSION_MINOR 7) +set(EXACONSTIT_VERSION_MINOR 8) set(EXACONSTIT_VERSION_PATCH \"0\") set(HEADER_INCLUDE_DIR @@ -51,7 +51,7 @@ macro(exaconstit_fill_depends_list) foreach( _dep ${arg_DEPENDS_ON}) string(TOUPPER ${_dep} _ucdep) - if (ENABLE_${_ucdep} OR ${_ucdep}_FOUND) + if (ENABLE_${_ucdep} OR ${_ucdep}_FOUND OR ${_dep}_FOUND) list(APPEND ${arg_LIST_NAME} ${_dep}) endif() endforeach() diff --git a/cmake/blt b/cmake/blt index 5a792c1..fb4246b 160000 --- a/cmake/blt +++ b/cmake/blt @@ -1 +1 @@ -Subproject commit 5a792c1775e7a7628d84dcde31652a689f1df7b5 +Subproject commit fb4246b8bae74c3d7291bef9698fd38863844680 diff --git a/cmake/thirdpartylibraries/FindRAJA.cmake b/cmake/thirdpartylibraries/FindRAJA.cmake deleted file mode 100644 index 4def976..0000000 --- a/cmake/thirdpartylibraries/FindRAJA.cmake +++ /dev/null @@ -1,62 +0,0 @@ -############################################################################### -# -# Setup RAJA -# This file defines: -# RAJA_FOUND - If RAJA was found -# RAJA_INCLUDE_DIRS - The RAJA include directories -# RAJA_LIBRARY - The RAJA library - -# first Check for RAJA_DIR - -if(NOT RAJA_DIR) - MESSAGE(FATAL_ERROR "Could not find RAJA. RAJA support needs explicit RAJA_DIR") -endif() - -if (NOT RAJA_CONFIG_CMAKE) - set(RAJA_CONFIG_CMAKE "${RAJA_DIR}/share/raja/cmake/raja-config.cmake") -endif() -if (EXISTS "${RAJA_CONFIG_CMAKE}") - include("${RAJA_CONFIG_CMAKE}") -endif() -if (NOT RAJA_RELEASE_CMAKE) - set(RAJA_RELEASE_CMAKE "${RAJA_DIR}/share/raja/cmake/raja-release.cmake") -endif() -if (EXISTS "${RAJA_RELEASE_CMAKE}") - include("${RAJA_RELEASE_CMAKE}") -endif() - -find_package(RAJA REQUIRED) - -if(camp_DIR AND (RAJA_VERSION_MINOR GREATER 10 OR RAJA_VERSION_MAJOR GREATER 0)) - find_package(camp REQUIRED - NO_DEFAULT_PATH - PATHS ${camp_DIR} - ${camp_DIR}/lib/cmake/camp - ) - set(ENABLE_CAMP ON CACHE BOOL "") -endif() - -if(RAJA_CONFIG_LOADED) - if(ENABLE_OPENMP) - set(BLT_CXX_FLAGS "${BLT_CXX_FLAGS} -fopenmp" CACHE PATH "") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp" CACHE STRING "" FORCE) - endif() -endif() - -get_property(RAJA_INCLUDE_DIRS TARGET RAJA PROPERTY INTERFACE_INCLUDE_DIRECTORIES) -include_directories(${RAJA_INCLUDE_DIRS}) - -find_library( RAJA_LIBRARY NAMES RAJA libRAJA - PATHS ${RAJA_LIB_DIR} ${RAJA_DIR}/../../../lib/ - NO_DEFAULT_PATH - NO_CMAKE_ENVIRONMENT_PATH - NO_CMAKE_PATH - NO_SYSTEM_ENVIRONMENT_PATH - NO_CMAKE_SYSTEM_PATH) - -include(FindPackageHandleStandardArgs) -# handle the QUIETLY and REQUIRED arguments and set RAJA_FOUND to TRUE -# if all listed variables are TRUE -find_package_handle_standard_args(RAJA DEFAULT_MSG - RAJA_INCLUDE_DIRS - RAJA_LIBRARY ) diff --git a/cmake/thirdpartylibraries/FindSNLS.cmake b/cmake/thirdpartylibraries/FindSNLS.cmake deleted file mode 100644 index 52a50bf..0000000 --- a/cmake/thirdpartylibraries/FindSNLS.cmake +++ /dev/null @@ -1,46 +0,0 @@ -############################################################################### -# -# Setup SNLS -# This file defines: -# SNLS_FOUND - If SNLS was found -# SNLS_INCLUDE_DIRS - The SNLS include directories - -# first Check for SNLS_DIR - -if(NOT SNLS_DIR) - MESSAGE(FATAL_ERROR "Could not find SNLS. SNLS support needs explicit SNLS_DIR") -endif() - -# SNLS's installed cmake config target is lower case -if (ENABLE_SNLS_V03) - set(snls_DIR "${SNLS_DIR}/share/snls/cmake/" ) - list(APPEND CMAKE_PREFIX_PATH ${snls_DIR}) - - find_package(snls REQUIRED) - - set (SNLS_FOUND ${snls_FOUND} CACHE STRING "") - - set(SNLS_LIBRARIES snls) - - set(SNLS_DEPENDS) - MESSAGE("SNLS RAJA_PERF_SUITE" ${SNLS_USE_RAJA_PERF_SUITE}) - blt_list_append(TO SNLS_DEPENDS ELEMENTS chai raja umpire camp IF ${SNLS_USE_RAJA_PERF_SUITE}) - -else() - #find includes - find_path( SNLS_INCLUDE_DIRS SNLS_lup_solve.h - PATHS ${SNLS_DIR}/include/ ${SNLS_DIR} - NO_DEFAULT_PATH - NO_CMAKE_ENVIRONMENT_PATH - NO_CMAKE_PATH - NO_SYSTEM_ENVIRONMENT_PATH - NO_CMAKE_SYSTEM_PATH) - - include(FindPackageHandleStandardArgs) - # handle the QUIETLY and REQUIRED arguments and set SNLS_FOUND to TRUE - # if all listed variables are TRUE - find_package_handle_standard_args(SNLS DEFAULT_MSG - SNLS_INCLUDE_DIRS - ) -endif() - diff --git a/cmake/thirdpartylibraries/SetupThirdPartyLibraries.cmake b/cmake/thirdpartylibraries/SetupThirdPartyLibraries.cmake index f416378..d38ab81 100644 --- a/cmake/thirdpartylibraries/SetupThirdPartyLibraries.cmake +++ b/cmake/thirdpartylibraries/SetupThirdPartyLibraries.cmake @@ -1,9 +1,13 @@ # Provide backwards compatibility for *_PREFIX options set(_tpls - mfem + camp raja + umpire + chai + fmt snls exacmech + mfem caliper) foreach(_tpl ${_tpls}) @@ -35,6 +39,15 @@ else() message(FATAL_ERROR "MFEM_DIR was not provided. It is needed to find MFEM.") endif() +################################ +# RAJA +################################ + +if (RAJA_DIR) + find_package(RAJA REQUIRED CONFIG PATHS ${RAJA_DIR}) +else() + message(FATAL_ERROR "RAJA_DIR was not provided. It is needed to find RAJA.") +endif() ################################ # ExaCMech @@ -55,40 +68,58 @@ else() endif() ################################ -# RAJA +# SNLS ################################ -if (DEFINED RAJA_DIR) - include(cmake/thirdpartylibraries/FindRAJA.cmake) - if (RAJA_FOUND) - blt_register_library( NAME raja - TREAT_INCLUDES_AS_SYSTEM ON - INCLUDES ${RAJA_INCLUDE_DIRS} - LIBRARIES ${RAJA_LIBRARY} - DEPENDS_ON camp) +if (SNLS_DIR) + find_package(SNLS REQUIRED CONFIG PATHS ${SNLS_DIR}) + set_target_properties(snls PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${SNLS_INCLUDE_DIRS}") +endif() + +if(SNLS_USE_RAJA_PORT_SUITE) +################################ +# camp +################################ + + if (CAMP_DIR) + find_package(camp REQUIRED CONFIG PATHS ${CAMP_DIR}) else() - message(FATAL_ERROR "Unable to find RAJA with given path ${RAJA_DIR}") + message(FATAL_ERROR "CAMP_DIR was not provided. It is needed to find CAMP.") endif() -else() - message(FATAL_ERROR "RAJA_DIR was not provided. It is needed to find RAJA.") -endif() ################################ -# SNLS +# chai ################################ -if (SNLS_DIR) - include(cmake/thirdpartylibraries/FindSNLS.cmake) - if (SNLS_FOUND) - blt_register_library( NAME snls - TREAT_INCLUDES_AS_SYSTEM ON - INCLUDES ${SNLS_INCLUDE_DIRS} - LIBRARIES ${SNLS_LIBRARIES} - DEPENDS_ON ${SNLS_DEPENDS}) + if (CHAI_DIR) + set(umpire_DIR ${UMPIRE_DIR}) + set(raja_DIR ${RAJA_DIR}) + set(fmt_DIR ${FMT_DIR}) + find_package(chai REQUIRED CONFIG PATHS ${CHAI_DIR}) else() - message(FATAL_ERROR "Unable to find SNLS with given path ${SNLS_DIR}") + message(FATAL_ERROR "CHAI_DIR was not provided. It is needed to find CHAI.") endif() -endif() + +################################ +# fmt +################################ + + if (FMT_DIR) + find_package(fmt CONFIG PATHS ${FMT_DIR}) + else() + message(WARNING "FMT_DIR was not provided. This is a requirement for camp as of v2024.02.0. Ignore this warning if using older versions of the RAJA Portability Suite") + endif() + +################################ +# UMPIRE +################################ + + if (DEFINED UMPIRE_DIR) + find_package(umpire REQUIRED CONFIG PATHS ${UMPIRE_DIR}) + else() + message(FATAL_ERROR "UMPIRE_DIR was not provided. It is needed to find UMPIRE.") + endif() +endif() # End SNLS_USE_RAJA_PORT_SUITE check ################################ # Caliper diff --git a/scripts/install/unix_cpu_intel_install_example.sh b/scripts/install/unix_cpu_intel_install_example.sh new file mode 100644 index 0000000..6ad5e0a --- /dev/null +++ b/scripts/install/unix_cpu_intel_install_example.sh @@ -0,0 +1,280 @@ +#!/usr/bin/bash +# For ease all of this should be run in its own directory +# Build and run this in $SCRATCH/csm3_builds/ + +SCRIPT=$(readlink -f "$0") +BASE_DIR=$(dirname "$SCRIPT") + +# On macs the above two lines won't work but can be replaced with this line +# BASE_DIR=$(cd "$(dirname "$0")"; pwd -P) + +module load intel/2023.2.1-magic +module load CMake/3.26.3 +module load python/3.12 +module list + +CC="/usr/tce/packages/intel/intel-2023.2.1-magic/bin/icx" +CXX="/usr/tce/packages/intel/intel-2023.2.1-magic/bin/icpx" +MPICXX="/usr/tce/packages/mvapich2/mvapich2-2.3.7-intel-2023.2.1-magic/bin/mpicxx" +MPICC="/usr/tce/packages/mvapich2/mvapich2-2.3.7-intel-2023.2.1-magic/bin/mpicc" +PYTHON_EXE="/usr/apps/python-3.12.2/bin/python" + +#Build raja +if [ ! -d "camp" ]; then + git clone https://github.com/LLNL/camp.git -b v2024.07.0 + cd ${BASE_DIR}/camp + git submodule init + git submodule update + + if [ ! -d "build" ]; then + mkdir build + cd ${BASE_DIR}/camp/build + rm -rf * + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DRAJA_TIMER=chrono \ + -DENABLE_OPENMP=OFF \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DENABLE_CUDA=OFF |& tee my_camp_config + make -j 2 |& tee my_camp_build + make install |& tee my_camp_install + fi +fi + +OLCF_CAMP_ROOT=${BASE_DIR}/camp/install_dir/ + +cd ${BASE_DIR} + +#exit +if [ ! -d "RAJA" ]; then + git clone https://github.com/LLNL/RAJA.git -b v2024.07.0 + cd ${BASE_DIR}/RAJA + git submodule init + git submodule update + cd ${BASE_DIR}/RAJA + if [ ! -d "build" ]; then + mkdir build + cd ${BASE_DIR}/RAJA/build + rm -rf * + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DRAJA_ENABLE_TESTS=OFF \ + -DRAJA_ENABLE_EXAMPLES=OFF \ + -DRAJA_ENABLE_BENCHMARKS=OFF \ + -DRAJA_TIMER=chrono \ + -DENABLE_OPENMP=OFF \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DENABLE_CUDA=OFF \ + -Dcamp_DIR=${OLCF_CAMP_ROOT} |& tee my_raja_config + make -j 4 |& tee my_raja_build + make install |& tee my_raja_install + fi +fi + +OLCF_RAJA_ROOT=${BASE_DIR}/RAJA/install_dir/ + +echo ${OLCF_RAJA_ROOT} + +cd ${BASE_DIR} +if [ ! -d "ExaCMech" ]; then + # Clone the repo + git clone https://github.com/LLNL/ExaCMech.git + cd ${BASE_DIR}/ExaCMech + # Checkout the branch that has the HIP features on it + git checkout develop + # Update all the various submodules + git submodule init && git submodule update + if [ ! -d "${BASE_DIR}/ExaCMech/build" ]; then + mkdir build + cd ${BASE_DIR}/ExaCMech/build + rm -rf * + + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DENABLE_MINIAPPS=OFF \ + -DENABLE_OPENMP=OFF \ + -DRAJA_DIR=${OLCF_RAJA_ROOT}/lib/cmake/raja/ \ + -DBUILD_SHARED_LIBS=OFF \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DENABLE_CUDA=OFF \ + -Dcamp_DIR=${OLCF_CAMP_ROOT}/lib/cmake/camp |& tee my_exacmech_config + + make -j 4 |& tee my_exacmech_build + make install |& tee my_exacmech_install + fi +fi +cd ${BASE_DIR} + +# Now to build our MFEM dependencies +# First let's install Hypre v2.23.0 +cd ${BASE_DIR} +if [ ! -d "hypre" ]; then + + git clone https://github.com/hypre-space/hypre.git --branch v2.30.0 --single-branch + cd ${BASE_DIR}/hypre/ + mkdir build + cd ${BASE_DIR}/hypre/build + rm -rf * + # Based on their install instructions + # This should work on most systems + # Hypre's default suggestions of just using configure don't always work + cmake ../src -DCMAKE_INSTALL_PREFIX=../src/hypre/ \ + -DWITH_MPI=TRUE \ + -DCMAKE_C_COMPILER=${MPICC} \ + -DCMAKE_CXX_COMPILER=${MPICXX} \ + -DCMAKE_Fortran_COMPILER=${MPIFORT} \ + -DCMAKE_BUILD_TYPE=Release \ + |& tee my_hypre_config + + make -j 4 |& tee my_hypre_build + make install |& tee my_hypre_install + + cd ${BASE_DIR}/hypre/src/hypre + OLCF_HYPRE_ROOT="$(pwd)" + +else + + echo " hypre already built " + OLCF_HYPRE_ROOT=${BASE_DIR}/hypre/src/hypre + +fi + +cd ${BASE_DIR} + +if [ ! -d "metis-5.1.0" ]; then + + curl -o metis-5.1.0.tar.gz https://mfem.github.io/tpls/metis-5.1.0.tar.gz + tar -xzf metis-5.1.0.tar.gz + rm metis-5.1.0.tar.gz + cd metis-5.1.0 + mkdir install_dir + make config prefix=${BASE_DIR}/metis-5.1.0/install_dir/ CC=${CC} CXX=${CXX} |& tee my_metis_config + make -j 4 |& tee my_metis_build + make install |& tee my_metis_install + cd ${BASE_DIR}/metis-5.1.0/install_dir/ + OLCF_METIS_ROOT="$(pwd)" +else + + echo " metis-5.1.0 already built " + OLCF_METIS_ROOT=${BASE_DIR}/metis-5.1.0/install_dir/ + +fi + +cd ${BASE_DIR} +if [ ! -d "ADIOS2" ]; then + # Clone the repo + git clone https://github.com/ornladios/ADIOS2.git + cd ${BASE_DIR}/ADIOS2 + # Checkout the branch that has the HIP features on it + git checkout v2.10.1 + # Update all the various submodules + git submodule init && git submodule update + + cd ${BASE_DIR} + if [ ! -d "${BASE_DIR}/ADIOS2/build" ]; then + cd ${BASE_DIR}/ADIOS2 + mkdir build + cd ${BASE_DIR}/ADIOS2/build + rm -rf * + + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DADIOS2_USE_MPI=ON \ + -DADIOS2_USE_Blosc2=OFF \ + -DADIOS2_USE_BZip2=OFF \ + -DADIOS2_USE_ZeroMQ=OFF \ + -DADIOS2_USE_Endian_Reverse=OFF \ + -DADIOS2_USE_Fortran=OFF \ + -DADIOS2_USE_Python=OFF \ + -DADIOS2_USE_HDF5=OFF \ + -DADIOS2_USE_MPI=ON \ + -DADIOS2_USE_PNG=OFF \ + -DBUILD_SHARED_LIBS=ON \ + -DADIOS2_USE_SZ=OFF \ + -DADIOS2_USE_ZFP=OFF + + + make -j 4 |& tee my_adios2_build + make install |& tee my_adios2_install + fi +fi + +cd ${BASE_DIR} + +if [ ! -d "mfem" ]; then + git clone https://github.com/rcarson3/mfem.git + cd ${BASE_DIR}/mfem/ + git checkout exaconstit-dev + if [ ! -d "build" ]; then + mkdir build + fi + cd ${BASE_DIR}/mfem/build + LOCAL_CMAKE_MFEM="$(which cmake)" + echo "NOTE: MFEM: cmake = $LOCAL_CMAKE_MFEM" + #All the options + cmake ../ -DMFEM_USE_MPI=YES -DMFEM_USE_SIMD=NO\ + -DCMAKE_CXX_COMPILER=${MPICXX} \ + -DMETIS_DIR=${OLCF_METIS_ROOT} \ + -DHYPRE_DIR=${OLCF_HYPRE_ROOT} \ + -DCMAKE_INSTALL_PREFIX=../install_dir/ \ + -DMFEM_USE_OPENMP=OFF \ + -DMFEM_USE_RAJA=YES \ + -DRAJA_DIR:PATH=${OLCF_RAJA_ROOT} \ + -DMFEM_USE_ZLIB=YES \ + -DMFEM_USE_ADIOS2=ON \ + -DADIOS2_DIR=${BASE_DIR}/ADIOS2/install_dir/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DRAJA_REQUIRED_PACKAGES="camp" \ + -DMFEM_USE_CAMP=ON \ + -Dcamp_DIR:PATH=${OLCF_CAMP_ROOT}/lib/cmake/camp/ \ + -DCMAKE_CXX_STANDARD=14 \ + -DCMAKE_BUILD_TYPE=Release \ + |& tee my_mfem_config + + make -j 4 |& tee my_mfem_build + make install |& tee my_mfem_install +fi + +cd ${BASE_DIR} + +if [ ! -d "ExaConstit" ]; then + git clone https://github.com/llnl/ExaConstit.git + cd ${BASE_DIR}/ExaConstit/ + git checkout exaconstit-dev + git submodule init && git submodule update + + cd ${BASE_DIR}/ExaConstit/ + if [ ! -d "build" ]; then + mkdir build + fi + + cd ${BASE_DIR}/ExaConstit/build && rm -rf * + LOCAL_CMAKE_MFEM="$(which cmake)" + echo "NOTE: ExaConstit: cmake = $LOCAL_CMAKE_MFEM" + + cmake ../ -DCMAKE_C_COMPILER=${MPICC} \ + -DCMAKE_CXX_COMPILER=${MPICXX} \ + -DENABLE_TESTS=ON \ + -DENABLE_OPENMP=OFF \ + -DENABLE_FORTRAN=OFF \ + -DPYTHON_EXECUTABLE=${PYTHON_EXE} \ + -DMFEM_DIR=${BASE_DIR}/mfem/install_dir/lib/cmake/mfem/ \ + -DECMECH_DIR=${BASE_DIR}/ExaCMech/install_dir/ \ + -DSNLS_DIR=${BASE_DIR}/ExaCMech/install_dir/ \ + -DENABLE_SNLS_V03=ON \ + -DCMAKE_INSTALL_PREFIX=../install_dir/ \ + -DRAJA_DIR:PATH=${OLCF_RAJA_ROOT}/lib/cmake/raja/ \ + -DCMAKE_BUILD_TYPE=Release \ + -Dcamp_DIR=${OLCF_CAMP_ROOT}/lib/cmake/camp |& tee my_exconstit_config + + make -j 4|& tee my_exconstit_build + +fi diff --git a/scripts/install/unix_gpu_cuda_install_example.sh b/scripts/install/unix_gpu_cuda_install_example.sh new file mode 100644 index 0000000..b9edc10 --- /dev/null +++ b/scripts/install/unix_gpu_cuda_install_example.sh @@ -0,0 +1,476 @@ +#!/usr/bin/bash +# For ease all of this should be run in its own directory + +SCRIPT=$(readlink -f "$0") +BASE_DIR=$(dirname "$SCRIPT") + +echo $BASH_VERSION + +# This is a bit system dependent but for El Capitan-like systems the below should work +# You should be able to modify it to work for your own system easily enough. +# Most of the options are defined by the first set of bash variables defined +# below. You'll likely need to modify the ROCM_BASE, MPIHOME, and then the various +# MPI/linker flags +# While this is largely targeted towards AMD GPU builds, you can probably update +# it easily enough for a NVidia GPU build of things... +module load cmake/3.29.2 clang/ibm-14.0.5 cuda/11.8.0 + +CLANG_BASE="/usr/tce/packages/clang/clang-ibm-14.0.5/" +NVCC_BASE="" +CC="${CLANG_BASE}/bin/clang" +CXX="${CLANG_BASE}/bin/clang++" + + +GCC_HOME="/usr/tce/packages/gcc/gcc-11.2.1" +CUDA_VER="11.8.0" +CUDA_TOOLKIT_ROOT_DIR="/usr/tce/packages/cuda/cuda-${CUDA_VER}" +NVCC="${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc" + +BLT_EXE_LINKER_FLAGS="-L${GCC_HOME}/rh/usr/lib/gcc/ppc64le-redhat-linux/11 -Wl,-rpath,${GCC_HOME}/rh/usr/lib/gcc/ppc64le-redhat-linux/11" + +MPIHOME="/usr/tce/packages/spectrum-mpi/spectrum-mpi-rolling-release-clang-ibm-14.0.5/" +MPICXX="${MPIHOME}/bin/mpicxx" +MPICC="${MPIHOME}/bin/mpicc" +MPIFORT="${MPIHOME}/bin/mpifort" +CUDAON="ON" +OPENMP_ON="OFF" +GPU_TARGETS="70" +CXX_FLAGS="-fPIC -std=c++17 --gcc-toolchain=${GCC_HOME}" +CUDA_FLAGS="-restrict --expt-extended-lambda -Xcompiler --gcc-toolchain=${GCC_HOME} -Xnvlink --suppress-stack-size-warning -std=c++17" + +PYTHON_EXE="/usr/tce/packages/python/python-3.8.2/bin/python3" +# Various build options for our various libaries +UMPIRE_ENABLE_TOOLS="ON" +UMPIRE_ENABLE_BACKTRACE="ON" +UMPIRE_ENABLE_BACKTRACE_SYMBOLS="ON" +# On V100s turn this off +CHAI_DISABLE_RM="OFF" +# Only for MI300a s other systems we need to turn this off +CHAI_THIN_GPU_ALLOCATE="OFF" +CHAI_ENABLE_PINNED="ON" +CHAI_ENABLE_PICK="ON" +CHAI_DEBUG="OFF" +CHAI_ENABLE_GPU_SIMULATION_MODE="OFF" +CHAI_ENABLE_UM="ON" +CHAI_ENABLE_MANAGED_PTR="ON" +CHAI_ENABLE_MANAGED_PTR_ON_GPU="ON" + +#Build camp +if [ ! -d "camp" ]; then + git clone https://github.com/LLNL/camp.git -b v2024.07.0 + cd ${BASE_DIR}/camp + git submodule init + git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/camp/build_cuda" ]; then + cd ${BASE_DIR}/camp + mkdir build_cuda + cd ${BASE_DIR}/camp/build_cuda + rm -rf * + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DENABLE_OPENMP=${OPENMP_ON} \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \ + -DCMAKE_CUDA_COMPILER=${NVCC} \ + -DCMAKE_CUDA_HOST_COMPILER=${CXX} \ + -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \ + -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \ + -DENABLE_CUDA=${CUDAON} + make -j 2 + make install +fi + +CAMP_ROOT=${BASE_DIR}/camp/install_dir_cuda/ +echo ${CAMP_ROOT} +cd ${BASE_DIR} + +#exit +if [ ! -d "RAJA" ]; then + git clone https://github.com/LLNL/RAJA.git -b v2024.07.0 + cd ${BASE_DIR}/RAJA + git submodule init + git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/RAJA/build_cuda" ]; then + cd ${BASE_DIR}/RAJA + mkdir build_cuda + cd ${BASE_DIR}/RAJA/build_cuda + rm -rf * + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DRAJA_ENABLE_TESTS=OFF \ + -DRAJA_ENABLE_EXAMPLES=OFF \ + -DRAJA_ENABLE_BENCHMARKS=OFF \ + -DRAJA_ENABLE_REPRODUCERS=OFF \ + -DRAJA_ENABLE_EXERCISES=OFF \ + -DRAJA_ENABLE_VECTORIZATION=OFF \ + -DRAJA_ENABLE_DOCUMENTATION=OFF \ + -DRAJA_USE_DOUBLE=ON \ + -DRAJA_USE_BARE_PTR=ON \ + -DRAJA_TIMER=chrono \ + -DENABLE_OPENMP=${OPENMP_ON} \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \ + -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \ + -DCMAKE_CUDA_COMPILER=${NVCC} \ + -DCMAKE_CUDA_HOST_COMPILER=${CXX} \ + -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \ + -DENABLE_CUDA=${CUDAON} \ + -Dcamp_DIR=${CAMP_ROOT} + make -j 4 + make install +fi + +RAJA_ROOT=${BASE_DIR}/RAJA/install_dir_cuda/ +echo ${RAJA_ROOT} +cd ${BASE_DIR} + +if [ ! -d "Umpire" ]; then + git clone https://github.com/LLNL/Umpire.git -b v2024.07.0 + cd ${BASE_DIR}/Umpire + git submodule init + git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/Umpire/build_cuda" ]; then + cd ${BASE_DIR}/Umpire + mkdir build_cuda + cd ${BASE_DIR}/Umpire/build_cuda + rm -rf * + + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DENABLE_OPENMP=${OPENMP_ON} \ + -DENABLE_MPI=OFF \ + -DUMPIRE_ENABLE_C=OFF \ + -DENABLE_FORTRAN=OFF \ + -DENABLE_GMOCK=OFF \ + -DUMPIRE_ENABLE_IPC_SHARED_MEMORY=OFF \ + -DUMPIRE_ENABLE_TOOLS=${UMPIRE_ENABLE_TOOLS} \ + -DUMPIRE_ENABLE_BACKTRACE=${UMPIRE_ENABLE_BACKTRACE} \ + -DUMPIRE_ENABLE_BACKTRACE_SYMBOLS=${UMPIRE_ENABLE_BACKTRACE_SYMBOLS} \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \ + -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \ + -DCMAKE_CUDA_COMPILER=${NVCC} \ + -DCMAKE_CUDA_HOST_COMPILER=${CXX} \ + -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \ + -DENABLE_CUDA=${CUDAON} \ + -Dcamp_DIR=${CAMP_ROOT} + + make -j 4 + make install +fi + +UMPIRE_ROOT=${BASE_DIR}/Umpire/install_dir_cuda/ +echo ${UMPIRE_ROOT} +cd ${BASE_DIR} + +if [ ! -d "CHAI" ]; then + git clone https://github.com/LLNL/CHAI.git -b v2024.07.0 + cd ${BASE_DIR}/CHAI + git submodule init + git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/CHAI/build_cuda" ]; then + cd ${BASE_DIR}/CHAI + mkdir build_cuda + cd ${BASE_DIR}/CHAI/build_cuda + rm -rf * + + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DENABLE_EXAMPLES=OFF \ + -DENABLE_DOCS=OFF \ + -DENABLE_GMOCK=OFF \ + -DENABLE_OPENMP=${OPENMP_ON} \ + -DENABLE_MPI=OFF \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \ + -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \ + -DCMAKE_CUDA_COMPILER=${NVCC} \ + -DCMAKE_CUDA_HOST_COMPILER=${CXX} \ + -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \ + -DENABLE_CUDA=${CUDAON} \ + -DCHAI_ENABLE_RAJA_PLUGIN=ON \ + -DCHAI_ENABLE_RAJA_NESTED_TEST=OFF \ + -DCHAI_ENABLE_PINNED=${CHAI_ENABLE_PINNED} \ + -DCHAI_DISABLE_RM=${CHAI_DISABLE_RM} \ + -DCHAI_THIN_GPU_ALLOCATE=${CHAI_THIN_GPU_ALLOCATE} \ + -DCHAI_ENABLE_PICK=${CHAI_ENABLE_PICK} \ + -DCHAI_DEBUG=${CHAI_DEBUG} \ + -DCHAI_ENABLE_GPU_SIMULATION_MODE=${CHAI_ENABLE_GPU_SIMULATION_MODE} \ + -DCHAI_ENABLE_UM=${CHAI_ENABLE_UM} \ + -DCHAI_ENABLE_MANAGED_PTR=${CHAI_ENABLE_MANAGED_PTR} \ + -DCHAI_ENABLE_MANAGED_PTR_ON_GPU=${CHAI_ENABLE_MANAGED_PTR_ON_GPU} \ + -Dfmt_DIR=${UMPIRE_ROOT} \ + -Dumpire_DIR=${UMPIRE_ROOT} \ + -DRAJA_DIR=${RAJA_ROOT} \ + -Dcamp_DIR=${CAMP_ROOT} + make -j 4 + make install +fi + +CHAI_ROOT=${BASE_DIR}/CHAI/install_dir_cuda/ +echo ${CHAI_ROOT} +cd ${BASE_DIR} + +if [ ! -d "ExaCMech" ]; then + # Clone the repo + git clone https://github.com/LLNL/ExaCMech.git + cd ${BASE_DIR}/ExaCMech + # Checkout the branch that has the HIP features on it + git checkout develop + # Update all the various submodules + git submodule init && git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/ExaCMech/build_cuda" ]; then + cd ${BASE_DIR}/ExaCMech + mkdir build_cuda + cd ${BASE_DIR}/ExaCMech/build_cuda + rm -rf * + + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DENABLE_MINIAPPS=OFF \ + -DENABLE_OPENMP=${OPENMP_ON} \ + -DBUILD_SHARED_LIBS=OFF \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \ + -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \ + -DCMAKE_CUDA_COMPILER=${NVCC} \ + -DCMAKE_CUDA_HOST_COMPILER=${CXX} \ + -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \ + -DENABLE_CUDA=${CUDAON} \ + -DFMT_DIR=${UMPIRE_ROOT}/lib64/cmake/fmt \ + -DUMPIRE_DIR=${UMPIRE_ROOT}/lib64/cmake/umpire \ + -DRAJA_DIR=${RAJA_ROOT}/lib/cmake/raja \ + -DCHAI_DIR=${CHAI_ROOT}/lib/cmake/chai \ + -DCAMP_DIR=${CAMP_ROOT}/lib/cmake/camp + + make -j 4 + make install +fi + +ECMECH_ROOT=${BASE_DIR}/ExaCMech/install_dir_cuda/ +echo ${ECMECH_ROOT} +cd ${BASE_DIR} + +# Now to build our MFEM dependencies +# First let's install Hypre v2.23.0 +cd ${BASE_DIR} +if [ ! -d "hypre" ]; then + git clone https://github.com/hypre-space/hypre.git --branch v2.32.0 --single-branch +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/hypre/build_cuda" ]; then + cd ${BASE_DIR}/hypre/ + mkdir build_cuda + cd ${BASE_DIR}/hypre/build_cuda + rm -rf * + # Based on their install instructions + # This should work on most systems + # Hypre's default suggestions of just using configure don't always work + cmake ../src -DCMAKE_INSTALL_PREFIX=../src/hypre_hip/ \ + -DCMAKE_C_COMPILER=${CC} \ + -DMPI_CXX_COMPILER=${MPICXX} \ + -DMPI_C_COMPILER=${MPICC} \ + -DCMAKE_BUILD_TYPE=Release \ + |& tee my_hypre_config + + make -j 4 |& tee my_hypre_build + make install |& tee my_hypre_install + + cd ${BASE_DIR}/hypre/src/hypre_hip + HYPRE_ROOT="$(pwd)" + +else + + echo " hypre already built " + HYPRE_ROOT=${BASE_DIR}/hypre/src/hypre_hip + +fi + +cd ${BASE_DIR} + +if [ ! -d "metis-5.1.0" ]; then + + curl -o metis-5.1.0.tar.gz https://mfem.github.io/tpls/metis-5.1.0.tar.gz + tar -xzf metis-5.1.0.tar.gz + rm metis-5.1.0.tar.gz +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/metis-5.1.0/install_dir_cuda" ]; then + cd ${BASE_DIR}/metis-5.1.0 + mkdir install_dir_cuda + make distclean + make config prefix=${BASE_DIR}/metis-5.1.0/install_dir_cuda/ CC=${CC} CXX=${CXX} |& tee my_metis_config + make -j 4 |& tee my_metis_build + make install |& tee my_metis_install + cd ${BASE_DIR}/metis-5.1.0/install_dir_cuda/ + METIS_ROOT="$(pwd)" +else + echo " metis-5.1.0 already built " + METIS_ROOT=${BASE_DIR}/metis-5.1.0/install_dir_cuda/ +fi + +# cd ${BASE_DIR} +# if [ ! -d "ADIOS2" ]; then +# # Clone the repo +# git clone https://github.com/ornladios/ADIOS2.git +# cd ${BASE_DIR}/ADIOS2 +# # Checkout the branch that has the HIP features on it +# git checkout v2.10.0 +# # Update all the various submodules +# git submodule init && git submodule update +# fi +# cd ${BASE_DIR} +# if [ ! -d "${BASE_DIR}/ADIOS2/build_cuda" ]; then +# cd ${BASE_DIR}/ADIOS2 +# mkdir build_cuda +# cd ${BASE_DIR}/ADIOS2/build_cuda +# rm -rf * + +# cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \ +# -DCMAKE_BUILD_TYPE=Release \ +# -DCMAKE_C_COMPILER=${CC} \ +# -DCMAKE_CXX_COMPILER=${CXX} \ +# -DADIOS2_USE_MPI=ON \ +# -DADIOS2_USE_Blosc2=OFF \ +# -DADIOS2_USE_BZip2=OFF \ +# -DADIOS2_USE_ZeroMQ=OFF \ +# -DADIOS2_USE_Endian_Reverse=OFF \ +# -DADIOS2_USE_Fortran=OFF \ +# -DADIOS2_USE_Python=ON \ +# -DPYTHON_EXECUTABLE=${PYTHON_EXE} \ +# -DADIOS2_USE_HDF5=OFF \ +# -DADIOS2_USE_MPI=ON \ +# -DADIOS2_USE_PNG=OFF \ +# -DBUILD_SHARED_LIBS=ON \ +# -DADIOS2_USE_SZ=OFF \ +# -DADIOS2_USE_ZFP=OFF + + +# make -j 16 |& tee my_adios2_build +# make install |& tee my_adios2_install +# fi + + +cd ${BASE_DIR} + +if [ ! -d "mfem" ]; then + git clone https://github.com/rcarson3/mfem.git + cd ${BASE_DIR}/mfem/ + git checkout exaconstit-dev +fi + +cd ${BASE_DIR} + +if [ ! -d "${BASE_DIR}/mfem/build_cuda" ]; then + mkdir ${BASE_DIR}/mfem/build_cuda + cd ${BASE_DIR}/mfem/build_cuda + LOCAL_CMAKE_MFEM="$(which cmake)" + echo "NOTE: MFEM: cmake = $LOCAL_CMAKE_MFEM" + #All the options + cmake ../ -DMFEM_USE_MPI=YES -DMFEM_USE_SIMD=NO\ + -DMETIS_DIR=${METIS_ROOT} \ + -DHYPRE_DIR=${HYPRE_ROOT} \ + -DMFEM_USE_RAJA=YES \ + -DRAJA_DIR:PATH=${RAJA_ROOT} \ + -DRAJA_REQUIRED_PACKAGES="camp" \ + -DMFEM_USE_CAMP=ON \ + -Dcamp_DIR:PATH=${CAMP_ROOT}/lib/cmake/camp/ \ + -DMFEM_USE_OPENMP=${OPENMP_ON} \ + -DMFEM_USE_ZLIB=YES \ + -DCMAKE_INSTALL_PREFIX=../install_dir_cuda/ \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \ + -DCMAKE_CUDA_COMPILER=${NVCC} \ + -DCMAKE_CUDA_HOST_COMPILER=${CXX} \ + -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \ + -DENABLE_CUDA=${CUDAON} \ + -DMFEM_USE_CUDA=${CUDAON} \ + -DCMAKE_BUILD_TYPE=Release \ + |& tee my_mfem_config + # -DMFEM_USE_MAGMA=ON \ + # -DMAGMA_DIR=${BASE_DIR}/magma/install_dir/ \ + # -DMFEM_USE_ADIOS2=ON \ + # -DADIOS2_DIR=${BASE_DIR}/ADIOS2/install_dir_cuda/ \ + + make -j 16 |& tee my_mfem_build + make install |& tee my_mfem_install +fi + +cd ${BASE_DIR} + +# : << 'END_COMMENT' +if [ ! -d "ExaConstit" ]; then + git clone https://github.com/llnl/ExaConstit.git + cd ${BASE_DIR}/ExaConstit/ + git checkout exaconstit-dev + git submodule init && git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/ExaConstit/build_cuda" ]; then + cd ${BASE_DIR}/ExaConstit/ + mkdir build_cuda + + cd ${BASE_DIR}/ExaConstit/build_cuda #&& rm -rf * + LOCAL_CMAKE_MFEM="$(which cmake)" + echo "NOTE: ExaConstit: cmake = $LOCAL_CMAKE_MFEM" + + cmake ../ \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${CXX} \ + -DMPI_CXX_COMPILER=${MPICXX} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCMAKE_CUDA_FLAGS="${CUDA_FLAGS}" \ + -DBLT_EXE_LINKER_FLAGS="${BLT_EXE_LINKER_FLAGS}" \ + -DCMAKE_CUDA_COMPILER=${NVCC} \ + -DCMAKE_CUDA_HOST_COMPILER=${CXX} \ + -DCMAKE_CUDA_ARCHITECTURES=${GPU_TARGETS} \ + -DENABLE_CUDA=${CUDAON} \ + -DPYTHON_EXECUTABLE=${PYTHON_EXE} \ + -DENABLE_TESTS=ON \ + -DENABLE_OPENMP=OFF \ + -DENABLE_FORTRAN=OFF \ + -DENABLE_SNLS_V03=ON \ + -DCMAKE_INSTALL_PREFIX=../install_dir/ \ + -DRAJA_DIR:PATH=${RAJA_ROOT}/lib/cmake/raja/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DMFEM_DIR=${BASE_DIR}/mfem/install_dir_cuda/lib/cmake/mfem/ \ + -DECMECH_DIR=${BASE_DIR}/ExaCMech/install_dir_cuda/ \ + -DSNLS_DIR=${BASE_DIR}/ExaCMech/install_dir_cuda/ \ + -DFMT_DIR=${UMPIRE_ROOT}/lib64/cmake/fmt \ + -DUMPIRE_DIR=${UMPIRE_ROOT}/lib64/cmake/umpire \ + -DRAJA_DIR=${RAJA_ROOT}/lib/cmake/raja \ + -DCHAI_DIR=${CHAI_ROOT}/lib/cmake/chai \ + -DCAMP_DIR=${CAMP_ROOT}/lib/cmake/camp |& tee my_exconstit_config + + make -j 4|& tee my_exconstit_build +fi +###END_COMMENT diff --git a/scripts/install/unix_gpu_hip_install_example.sh b/scripts/install/unix_gpu_hip_install_example.sh new file mode 100644 index 0000000..fa15503 --- /dev/null +++ b/scripts/install/unix_gpu_hip_install_example.sh @@ -0,0 +1,462 @@ +#!/usr/bin/bash +# For ease all of this should be run in its own directory + +SCRIPT=$(readlink -f "$0") +BASE_DIR=$(dirname "$SCRIPT") + +echo $BASH_VERSION + +# This is a bit system dependent but for El Capitan-like systems the below should work +# You should be able to modify it to work for your own system easily enough. +# Most of the options are defined by the first set of bash variables defined +# below. You'll likely need to modify the ROCM_BASE, MPIHOME, and then the various +# MPI/linker flags +# While this is largely targeted towards AMD GPU builds, you can probably update +# it easily enough for a NVidia GPU build of things... +module load cmake/3.29.2 rocmcc/6.3.1-magic rocm/6.3.1 cray-mpich/8.1.31 + +ROCM_BASE="/usr/tce/packages/rocmcc/rocmcc-6.3.1-magic/" +CC="${ROCM_BASE}/bin/amdclang" +CXX="${ROCM_BASE}/bin/amdclang++" +HIPCC="${ROCM_BASE}/bin/hipcc" +MPIHOME="/usr/tce/packages/cray-mpich/cray-mpich-8.1.31-rocmcc-6.3.1-magic/" +MPILIBHOME="/opt/cray/pe/mpich/8.1.31/gtl/lib" +MPIAMDHOME="/opt/cray/pe/mpich/8.1.31/ofi/amd/6.0/lib" +MPICRAYFLAGS="-Wl,-rpath,/opt/cray/libfabric/2.1/lib64:/opt/cray/pe/pmi/6.1.15/lib:/opt/cray/pe/pals/1.2.12/lib:/opt/rocm-6.3.1/llvm/lib -lxpmem" +MPICXX="$MPIHOME/bin/mpicxx" +MPICC="$MPIHOME/bin/mpicc" +MPIFORT="$MPIHOME/bin/mpifort" +ROCMON="ON" +OPENMP_ON="OFF" +LOC_ROCM_ARCH="gfx942" +GPU_TARGETS="gfx942" +AMDGPU_TARGETS="gfx942" +CXX_FLAGS="-fPIC -std=c++17 -munsafe-fp-atomics" + +EXE_LINK_FLAGS="--hip-link -lroctx64 -Wl,-rpath,${MPIAMDHOME} ${MPICRAYFLAGS} -L${MPILIBHOME} -lmpi_gtl_hsa -Wl,-rpath,${MPILIBHOME}" +PYTHON_EXE="/usr/tce/packages/python/python-3.9.12/bin/python3" +# Various build options for our various libaries +UMPIRE_ENABLE_TOOLS="ON" +UMPIRE_ENABLE_BACKTRACE="ON" +UMPIRE_ENABLE_BACKTRACE_SYMBOLS="ON" +# On V100s turn this off +CHAI_DISABLE_RM="ON" +# Only for MI300a s other systems we need to turn this off +CHAI_THIN_GPU_ALLOCATE="ON" +CHAI_ENABLE_PINNED="ON" +CHAI_ENABLE_PICK="ON" +CHAI_DEBUG="OFF" +CHAI_ENABLE_GPU_SIMULATION_MODE="OFF" +CHAI_ENABLE_UM="ON" +CHAI_ENABLE_MANAGED_PTR="ON" +CHAI_ENABLE_MANAGED_PTR_ON_GPU="ON" + +#Build camp +if [ ! -d "camp" ]; then + git clone https://github.com/LLNL/camp.git -b v2024.07.0 + cd ${BASE_DIR}/camp + git submodule init + git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/camp/build_hip" ]; then + cd ${BASE_DIR}/camp + mkdir build_hip + cd ${BASE_DIR}/camp/build_hip + rm -rf * + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DENABLE_OPENMP=OFF \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${HIPCC} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \ + -DENABLE_HIP=$ROCMON + make -j 2 + make install +fi + +CAMP_ROOT=${BASE_DIR}/camp/install_dir_hip/ +echo ${CAMP_ROOT} +cd ${BASE_DIR} + +#exit +if [ ! -d "RAJA" ]; then + git clone https://github.com/LLNL/RAJA.git -b v2024.07.0 + cd ${BASE_DIR}/RAJA + git submodule init + git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/RAJA/build_hip" ]; then + cd ${BASE_DIR}/RAJA + mkdir build_hip + cd ${BASE_DIR}/RAJA/build_hip + rm -rf * + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DRAJA_ENABLE_TESTS=OFF \ + -DRAJA_ENABLE_EXAMPLES=OFF \ + -DRAJA_ENABLE_BENCHMARKS=OFF \ + -DRAJA_ENABLE_REPRODUCERS=OFF \ + -DRAJA_ENABLE_EXERCISES=OFF \ + -DRAJA_ENABLE_VECTORIZATION=OFF \ + -DRAJA_ENABLE_DOCUMENTATION=OFF \ + -DRAJA_USE_DOUBLE=ON \ + -DRAJA_USE_BARE_PTR=ON \ + -DRAJA_TIMER=chrono \ + -DENABLE_OPENMP=${OPENMP_ON} \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${HIPCC} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DENABLE_HIP=${ROCMON} \ + -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \ + -DGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DHIP_CXX_COMPILER=${HIPCC} \ + -Dcamp_DIR=${CAMP_ROOT} + make -j 4 + make install +fi + +RAJA_ROOT=${BASE_DIR}/RAJA/install_dir_hip/ +echo ${RAJA_ROOT} +cd ${BASE_DIR} + +if [ ! -d "Umpire" ]; then + git clone https://github.com/LLNL/Umpire.git -b v2024.07.0 + cd ${BASE_DIR}/Umpire + git submodule init + git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/Umpire/build_hip" ]; then + cd ${BASE_DIR}/Umpire + mkdir build_hip + cd ${BASE_DIR}/Umpire/build_hip + rm -rf * + + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DENABLE_OPENMP=${OPENMP_ON} \ + -DENABLE_MPI=OFF \ + -DUMPIRE_ENABLE_C=OFF \ + -DENABLE_FORTRAN=OFF \ + -DENABLE_GMOCK=OFF \ + -DUMPIRE_ENABLE_IPC_SHARED_MEMORY=OFF \ + -DUMPIRE_ENABLE_TOOLS=${UMPIRE_ENABLE_TOOLS} \ + -DUMPIRE_ENABLE_BACKTRACE=${UMPIRE_ENABLE_BACKTRACE} \ + -DUMPIRE_ENABLE_BACKTRACE_SYMBOLS=${UMPIRE_ENABLE_BACKTRACE_SYMBOLS} \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${HIPCC} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DENABLE_HIP=${ROCMON} \ + -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \ + -DGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DHIP_CXX_COMPILER=${HIPCC} \ + -Dcamp_DIR=${CAMP_ROOT} + + make -j 4 + make install +fi + +UMPIRE_ROOT=${BASE_DIR}/Umpire/install_dir_hip/ +echo ${UMPIRE_ROOT} +cd ${BASE_DIR} + +if [ ! -d "CHAI" ]; then + git clone https://github.com/LLNL/CHAI.git -b v2024.07.0 + cd ${BASE_DIR}/CHAI + git submodule init + git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/CHAI/build_hip" ]; then + cd ${BASE_DIR}/CHAI + mkdir build_hip + cd ${BASE_DIR}/CHAI/build_hip + rm -rf * + + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DENABLE_EXAMPLES=OFF \ + -DENABLE_DOCS=OFF \ + -DENABLE_GMOCK=OFF \ + -DENABLE_OPENMP=${OPENMP_ON} \ + -DENABLE_MPI=OFF \ + -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${HIPCC} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DENABLE_HIP=${ROCMON} \ + -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \ + -DGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DHIP_CXX_COMPILER=${HIPCC} \ + -DCHAI_ENABLE_RAJA_PLUGIN=ON \ + -DCHAI_ENABLE_RAJA_NESTED_TEST=OFF \ + -DCHAI_ENABLE_PINNED=${CHAI_ENABLE_PINNED} \ + -DCHAI_DISABLE_RM=${CHAI_DISABLE_RM} \ + -DCHAI_THIN_GPU_ALLOCATE=${CHAI_THIN_GPU_ALLOCATE} \ + -DCHAI_ENABLE_PICK=${CHAI_ENABLE_PICK} \ + -DCHAI_DEBUG=${CHAI_DEBUG} \ + -DCHAI_ENABLE_GPU_SIMULATION_MODE=${CHAI_ENABLE_GPU_SIMULATION_MODE} \ + -DCHAI_ENABLE_UM=${CHAI_ENABLE_UM} \ + -DCHAI_ENABLE_MANAGED_PTR=${CHAI_ENABLE_MANAGED_PTR} \ + -DCHAI_ENABLE_MANAGED_PTR_ON_GPU=${CHAI_ENABLE_MANAGED_PTR_ON_GPU} \ + -Dfmt_DIR=${UMPIRE_ROOT} \ + -Dumpire_DIR=${UMPIRE_ROOT} \ + -DRAJA_DIR=${RAJA_ROOT} \ + -Dcamp_DIR=${CAMP_ROOT} + make -j 4 + make install +fi + +CHAI_ROOT=${BASE_DIR}/CHAI/install_dir_hip/ +echo ${CHAI_ROOT} +cd ${BASE_DIR} + +if [ ! -d "ExaCMech" ]; then + # Clone the repo + git clone https://github.com/LLNL/ExaCMech.git + cd ${BASE_DIR}/ExaCMech + # Checkout the branch that has the HIP features on it + git checkout develop + # Update all the various submodules + git submodule init && git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/ExaCMech/build_hip" ]; then + cd ${BASE_DIR}/ExaCMech + mkdir build_hip + cd ${BASE_DIR}/ExaCMech/build_hip + rm -rf * + + cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DENABLE_TESTS=OFF \ + -DENABLE_MINIAPPS=OFF \ + -DENABLE_OPENMP=${OPENMP_ON} \ + -DBUILD_SHARED_LIBS=OFF \ + -DCMAKE_CXX_COMPILER=${HIPCC} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DENABLE_HIP=$ROCMON \ + -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \ + -DGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DHIP_CXX_COMPILER=${HIPCC} \ + -DFMT_DIR=${UMPIRE_ROOT}/lib64/cmake/fmt \ + -DUMPIRE_DIR=${UMPIRE_ROOT}/lib64/cmake/umpire \ + -DRAJA_DIR=${RAJA_ROOT}/lib/cmake/raja \ + -DCHAI_DIR=${CHAI_ROOT}/lib/cmake/chai \ + -DCAMP_DIR=${CAMP_ROOT}/lib/cmake/camp + + make -j 4 + make install +fi + +ECMECH_ROOT=${BASE_DIR}/ExaCMech/install_dir_hip/ +echo ${ECMECH_ROOT} +cd ${BASE_DIR} + +# Now to build our MFEM dependencies +# First let's install Hypre v2.23.0 +cd ${BASE_DIR} +if [ ! -d "hypre" ]; then + git clone https://github.com/hypre-space/hypre.git --branch v2.32.0 --single-branch +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/hypre/build_hip" ]; then + cd ${BASE_DIR}/hypre/ + mkdir build_hip + cd ${BASE_DIR}/hypre/build_hip + rm -rf * + # Based on their install instructions + # This should work on most systems + # Hypre's default suggestions of just using configure don't always work + cmake ../src -DCMAKE_INSTALL_PREFIX=../src/hypre_hip/ \ + -DCMAKE_C_COMPILER=${CC} \ + -DMPI_CXX_COMPILER=${MPICXX} \ + -DMPI_C_COMPILER=${MPICC} \ + -DCMAKE_BUILD_TYPE=Release \ + |& tee my_hypre_config + + make -j 4 |& tee my_hypre_build + make install |& tee my_hypre_install + + cd ${BASE_DIR}/hypre/src/hypre_hip + HYPRE_ROOT="$(pwd)" + +else + + echo " hypre already built " + HYPRE_ROOT=${BASE_DIR}/hypre/src/hypre_hip + +fi + +cd ${BASE_DIR} + +if [ ! -d "metis-5.1.0" ]; then + + curl -o metis-5.1.0.tar.gz https://mfem.github.io/tpls/metis-5.1.0.tar.gz + tar -xzf metis-5.1.0.tar.gz + rm metis-5.1.0.tar.gz +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/metis-5.1.0/install_dir_hip" ]; then + cd ${BASE_DIR}/metis-5.1.0 + mkdir install_dir_hip + make distclean + make config prefix=${BASE_DIR}/metis-5.1.0/install_dir_hip/ CC=${CC} CXX=${CXX} |& tee my_metis_config + make -j 4 |& tee my_metis_build + make install |& tee my_metis_install + cd ${BASE_DIR}/metis-5.1.0/install_dir_hip/ + METIS_ROOT="$(pwd)" +else + echo " metis-5.1.0 already built " + METIS_ROOT=${BASE_DIR}/metis-5.1.0/install_dir_hip/ +fi + +# cd ${BASE_DIR} +# if [ ! -d "ADIOS2" ]; then +# # Clone the repo +# git clone https://github.com/ornladios/ADIOS2.git +# cd ${BASE_DIR}/ADIOS2 +# # Checkout the branch that has the HIP features on it +# git checkout v2.10.0 +# # Update all the various submodules +# git submodule init && git submodule update +# fi +# cd ${BASE_DIR} +# if [ ! -d "${BASE_DIR}/ADIOS2/build_hip" ]; then +# cd ${BASE_DIR}/ADIOS2 +# mkdir build_hip +# cd ${BASE_DIR}/ADIOS2/build_hip +# rm -rf * + +# cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \ +# -DCMAKE_BUILD_TYPE=Release \ +# -DCMAKE_C_COMPILER=${CC} \ +# -DCMAKE_CXX_COMPILER=${CXX} \ +# -DADIOS2_USE_MPI=ON \ +# -DADIOS2_USE_Blosc2=OFF \ +# -DADIOS2_USE_BZip2=OFF \ +# -DADIOS2_USE_ZeroMQ=OFF \ +# -DADIOS2_USE_Endian_Reverse=OFF \ +# -DADIOS2_USE_Fortran=OFF \ +# -DADIOS2_USE_Python=ON \ +# -DPYTHON_EXECUTABLE=${PYTHON_EXE} \ +# -DADIOS2_USE_HDF5=OFF \ +# -DADIOS2_USE_MPI=ON \ +# -DADIOS2_USE_PNG=OFF \ +# -DBUILD_SHARED_LIBS=ON \ +# -DADIOS2_USE_SZ=OFF \ +# -DADIOS2_USE_ZFP=OFF + + +# make -j 16 |& tee my_adios2_build +# make install |& tee my_adios2_install +# fi + + +cd ${BASE_DIR} + +if [ ! -d "mfem" ]; then + git clone https://github.com/rcarson3/mfem.git + cd ${BASE_DIR}/mfem/ + git checkout exaconstit-dev +fi + +cd ${BASE_DIR} + +if [ ! -d "${BASE_DIR}/mfem/build_hip" ]; then + mkdir ${BASE_DIR}/mfem/build_hip + cd ${BASE_DIR}/mfem/build_hip + LOCAL_CMAKE_MFEM="$(which cmake)" + echo "NOTE: MFEM: cmake = $LOCAL_CMAKE_MFEM" + #All the options + cmake ../ -DMFEM_USE_MPI=YES -DMFEM_USE_SIMD=NO\ + -DMETIS_DIR=${METIS_ROOT} \ + -DHYPRE_DIR=${HYPRE_ROOT} \ + -DMFEM_USE_RAJA=YES \ + -DRAJA_DIR:PATH=${RAJA_ROOT} \ + -DRAJA_REQUIRED_PACKAGES="camp" \ + -DMFEM_USE_CAMP=ON \ + -Dcamp_DIR:PATH=${CAMP_ROOT}/lib/cmake/camp/ \ + -DMFEM_USE_OPENMP=${OPENMP_ON} \ + -DMFEM_USE_ZLIB=YES \ + -DCMAKE_CXX_COMPILER=${HIPCC} \ + -DMPI_CXX_COMPILER=${MPICXX} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCMAKE_INSTALL_PREFIX=../install_dir_hip/ \ + -DCMAKE_CXX_STANDARD=17 \ + -DMFEM_USE_HIP=${ROCMON} \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \ + -DHIP_ARCH=${LOC_ROCM_ARCH} \ + -DGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DHIP_CXX_COMPILER=${HIPCC} \ + |& tee my_mfem_config + # -DMFEM_USE_MAGMA=ON \ + # -DMAGMA_DIR=${BASE_DIR}/magma/install_dir/ \ + # -DMFEM_USE_ADIOS2=ON \ + # -DADIOS2_DIR=${BASE_DIR}/ADIOS2/install_dir_hip/ \ + + make -j 16 |& tee my_mfem_build + make install |& tee my_mfem_install +fi + +cd ${BASE_DIR} + +# : << 'END_COMMENT' +if [ ! -d "ExaConstit" ]; then + git clone https://github.com/llnl/ExaConstit.git + cd ${BASE_DIR}/ExaConstit/ + git checkout exaconstit-dev + git submodule init && git submodule update +fi +cd ${BASE_DIR} +if [ ! -d "${BASE_DIR}/ExaConstit/build_hip" ]; then + cd ${BASE_DIR}/ExaConstit/ + mkdir build_hip + + cd ${BASE_DIR}/ExaConstit/build_hip #&& rm -rf * + LOCAL_CMAKE_MFEM="$(which cmake)" + echo "NOTE: ExaConstit: cmake = $LOCAL_CMAKE_MFEM" + + cmake ../ -DCMAKE_C_COMPILER=${CC} \ + -DCMAKE_CXX_COMPILER=${HIPCC} \ + -DMPI_CXX_COMPILER=${MPICXX} \ + -DHIP_CXX_COMPILER=${HIPCC} \ + -DCMAKE_CXX_FLAGS="${CXX_FLAGS}" \ + -DCMAKE_EXE_LINKER_FLAGS="${EXE_LINK_FLAGS}" \ + -DPYTHON_EXECUTABLE=${PYTHON_EXE} \ + -DENABLE_TESTS=ON \ + -DENABLE_OPENMP=OFF \ + -DENABLE_FORTRAN=OFF \ + -DENABLE_HIP=${ROCMON} \ + -DENABLE_SNLS_V03=ON \ + -DCMAKE_INSTALL_PREFIX=../install_dir/ \ + -DRAJA_DIR:PATH=${RAJA_ROOT}/lib/cmake/raja/ \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_HIP_ARCHITECTURES=${LOC_ROCM_ARCH} \ + -DGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DAMDGPU_TARGETS=${LOCM_ROCM_ARCH} \ + -DMFEM_DIR=${BASE_DIR}/mfem/install_dir_hip/lib/cmake/mfem/ \ + -DECMECH_DIR=${BASE_DIR}/ExaCMech/install_dir_hip/ \ + -DSNLS_DIR=${BASE_DIR}/ExaCMech/install_dir_hip/ \ + -DFMT_DIR=${UMPIRE_ROOT}/lib64/cmake/fmt \ + -DUMPIRE_DIR=${UMPIRE_ROOT}/lib64/cmake/umpire \ + -DRAJA_DIR=${RAJA_ROOT}/lib/cmake/raja \ + -DCHAI_DIR=${CHAI_ROOT}/lib/cmake/chai \ + -DCAMP_DIR=${CAMP_ROOT}/lib/cmake/camp |& tee my_exconstit_config + + make -j 4|& tee my_exconstit_build +fi +###END_COMMENT diff --git a/scripts/install/unix_gpu_install_example.sh b/scripts/install/unix_gpu_install_example.sh deleted file mode 100644 index d25e11c..0000000 --- a/scripts/install/unix_gpu_install_example.sh +++ /dev/null @@ -1,203 +0,0 @@ -#!/bin/bash -# The below is a bash script that should work on most UNIX systems to download all of ExaConstit and its dependencies -# and then install them. -# -# For ease all of this should be run in its own directory -SCRIPT=$(readlink -f "$0") -BASE_DIR=$(dirname "$SCRIPT") -#change this to the cuda compute capability for your gpu -# LOC_CUDA_ARCH='sm_70' -#CMAKE_CUDA_ARCHITECTURES drops the sm_ aspect of the cuda compute capability -LOC_CUDA_ARCH='70' - -# If you are using SPACK or have another module like system to set-up your developer environment -# you'll want to load up the necessary compilers and devs environments -# In other words make sure what ever MPI you want is loaded, C++, C, and Fortran compilers are loaded, and -# a cmake version b/t 3.12 and 3.18. - -# Build raja -if [ ! -d "raja" ]; then - git clone --recursive https://github.com/llnl/raja.git --branch v2022.10.5 --single-branch - cd ${BASE_DIR}/raja - # Instantiate all the submodules - git submodule init - git submodule update - # Build everything - mkdir build - cd ${BASE_DIR}/raja/build/ - # GPU build - cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \ - -DENABLE_OPENMP=OFF \ - -DENABLE_CUDA=ON \ - -DRAJA_TIMER=chrono \ - -DCMAKE_CUDA_ARCHITECTURESmbly=${LOC_CUDA_ARCH} \ - -DENABLE_TESTS=OFF \ - -DCMAKE_BUILD_TYPE=Release - make -j 4 - make install -else - - echo " RAJA already built " - -fi - -# Now to build ExaCMech -cd ${BASE_DIR} - -if [ ! -d "ExaCMech" ]; then - - git clone https://github.com/LLNL/ExaCMech.git --single-branch - cd ${BASE_DIR}/ExaCMech - # Instantiate all the submodules - git submodule init - git submodule update - # Build everything - mkdir build - cd ${BASE_DIR}/ExaCMech/build - # GPU build - cmake ../ -DCMAKE_INSTALL_PREFIX=../install_dir/ \ - -DRAJA_DIR=${BASE_DIR}/raja/install_dir/lib/cmake/raja/ \ - -DENABLE_OPENMP=OFF \ - -DENABLE_CUDA=ON \ - -DENABLE_TESTS=OFF \ - -DENABLE_MINIAPPS=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CUDA_ARCHITECTURESmbly=${LOC_CUDA_ARCH} \ - -DBUILD_SHARED_LIBS=OFF - make -j 4 - make install -else - - echo " ExaCMech already built " - -fi - -# Now to build our MFEM dependencies -# First let's install Hypre v2.20.0 -cd ${BASE_DIR} -if [ ! -d "hypre" ]; then - - git clone https://github.com/hypre-space/hypre.git --branch v2.26.0 --single-branch - cd ${BASE_DIR}/hypre/src - # Based on their install instructions - # This should work on most systems - # Hypre's default suggestions of just using configure don't always work - ./configure CC=mpicc CXX=mpicxx FC=mpif90 - make -j 4 - make install - cd hypre - HYPRE_DIR="$(pwd)" - -else - - echo " hypre already built " - HYPRE_DIR=${BASE_DIR}/hypre/src/hypre - -fi - -# Now to install metis-5.1.0 -# It appears that there are some minor differences in performance between metis-4 and metis-5 -# If you'd like to install metis-4 instead here's the commands needed -# uncomment the below and then comment the metis-5 commands -# cd ${BASE_DIR} -# curl -o metis-4.0.3.tar.gz http://glaros.dtc.umn.edu/gkhome/fetch/sw/metis/OLD/metis-4.0.3.tar.gz -# tar -xzf metis-4.0.3.tar.gz -# rm metis-4.0.3.tar.gz -# cd metis-4.0.3 -# make -# METIS_DIR="$(pwd)" -# metis-5 install down below -cd ${BASE_DIR} - -if [ ! -d "metis-5.1.0" ]; then - - curl -o metis-5.1.0.tar.gz https://mfem.github.io/tpls/metis-5.1.0.tar.gz tar -xzf metis-5.1.0.tar.gz - rm metis-5.1.0.tar.gz - cd metis-5.1.0 - mkdir install_dir - make config prefix=${BASE_DIR}/metis-5.1.0/install_dir/ - make -j 4 - make install - cd ${BASE_DIR}/metis-5.1.0/install_dir/ - METIS_DIR="$(pwd)" -else - - echo " metis-5.1.0 already built " - METIS_DIR=${BASE_DIR}/metis-5.1.0/install_dir/ - -fi - -# If you want anyother MFEM options installed like Conduit, ADIOS2, or etc. install them now -# We can now install MFEM with relevant data for ExaConstit - -cd ${BASE_DIR} - -if [ ! -d "mfem" ]; then - - git clone https://github.com/rcarson3/mfem.git --branch exaconstit-dev --single-branch - cd ${BASE_DIR}/mfem/ - mkdir build - cd ${BASE_DIR}/mfem/build/ - # All the options - cmake ../ -DMFEM_USE_MPI=ON -DMFEM_USE_SIMD=OFF\ - -DMETIS_DIR=${METIS_DIR} \ - -DHYPRE_DIR=${HYPRE_DIR} \ - -DCMAKE_INSTALL_PREFIX=../install_dir/ \ - -DMFEM_USE_CUDA=ON \ - -DCMAKE_CUDA_ARCHITECTURESmbly=${LOC_CUDA_ARCH} \ - -DMFEM_USE_OPENMP=OFF \ - -DMFEM_USE_RAJA=ON -DRAJA_DIR=${BASE_DIR}/raja/install_dir/ \ - -DCMAKE_BUILD_TYPE=Release - # The below are the relevant lines needed for ADIOS2 and conduit. You'll want to put them - # before the -DCMAKE_BUILD_TYPE call - # -DMFEM_USE_ADIOS2=ON -DADIOS2_DIR=${ADIOS2_DIR} \ - # -DMFEM_USE_CONDUIT=ON -DConduit_REQUIRED_PACKAGES=HDF5 -DCONDUIT_DIR=${CONDUIT_DIR} \ - # -DHDF5_ROOT:PATH=${HDF5_DIR} \ - make -j 4 - make install - -else - - echo " MFEM already built " - -fi - -#We can finally install ExaConstit -cd ${BASE_DIR} - -if [ ! -d "ExaConstit" ]; then - - git clone https://github.com/LLNL/ExaConstit.git - cd ${BASE_DIR}/ExaConstit/ - # Instantiate all the submodules - git submodule init - git submodule update - # Build everything - mkdir build - cd ${BASE_DIR}/ExaConstit/build/ - - cmake ../ -DENABLE_MPI=ON -DENABLE_FORTRAN=ON \ - -DMFEM_DIR=${BASE_DIR}/mfem/install_dir/lib/cmake/mfem/ \ - -DECMECH_DIR=${BASE_DIR}/ExaCMech/install_dir/ \ - -DRAJA_DIR=${BASE_DIR}/raja/install_dir/lib/cmake/raja/ \ - -DSNLS_DIR=${BASE_DIR}/ExaCMech/install_dir/ \ - -DENABLE_SNLS_V03=ON \ - -DCMAKE_BUILD_TYPE=Release \ - -DENABLE_CUDA=ON \ - -DCMAKE_CUDA_ARCHITECTURESmbly=${LOC_CUDA_ARCH} \ - -DENABLE_TESTS=ON - # Sometimes the cmake systems can be a bit difficult and not properly find the MFEM installed location - # using the above. If that's the case the below should work: - # -DMFEM_DIR=${BASE_DIR}/mfem/install_dir/ \ - - make -j 4 - # Check and make sure everything installed correctly by running the test suite - make test - -else - - echo " ExaConstit already built " - -fi - -# ExaConstit is now installed diff --git a/scripts/install/unix_install_example.sh b/scripts/install/unix_install_example.sh index 9ef9a58..f31a759 100644 --- a/scripts/install/unix_install_example.sh +++ b/scripts/install/unix_install_example.sh @@ -6,6 +6,11 @@ SCRIPT=$(readlink -f "$0") BASE_DIR=$(dirname "$SCRIPT") +# Set this to your location of python +# for example PYTHON_EXE for an anaconda build of python +# on a mac might be somewhere like: +PYTHON_EXE="/Users/USER/anaconda3/bin/python" + # If you are using SPACK or have another module like system to set-up your developer environment # you'll want to load up the necessary compilers and devs environments # In other words make sure what ever MPI you want is loaded, C++, C, and Fortran compilers are loaded, and @@ -13,7 +18,7 @@ BASE_DIR=$(dirname "$SCRIPT") # Build raja if [ ! -d "raja" ]; then - git clone --recursive https://github.com/llnl/raja.git --branch v2022.10.5 --single-branch + git clone --recursive https://github.com/llnl/raja.git --branch v2024.07.0 --single-branch cd ${BASE_DIR}/raja # Instantiate all the submodules git submodule init @@ -69,7 +74,7 @@ fi cd ${BASE_DIR} if [ ! -d "hypre" ]; then - git clone https://github.com/hypre-space/hypre.git --branch v2.26.0 --single-branch + git clone https://github.com/hypre-space/hypre.git --branch v2.30.0 --single-branch cd ${BASE_DIR}/hypre/src # Based on their install instructions # This should work on most systems @@ -168,6 +173,7 @@ if [ ! -d "ExaConstit" ]; then cd ${BASE_DIR}/ExaConstit/build/ cmake ../ -DENABLE_MPI=ON -DENABLE_FORTRAN=ON \ + -DPYTHON_EXECUTABLE=${PYTHON_EXE} \ -DMFEM_DIR=${BASE_DIR}/mfem/install_dir/lib/cmake/mfem/ \ -DECMECH_DIR=${BASE_DIR}/ExaCMech/install_dir/ \ -DRAJA_DIR=${BASE_DIR}/raja/install_dir/lib/cmake/raja/ \ diff --git a/scripts/meshing/CMakeLists.txt b/scripts/meshing/CMakeLists.txt index fd5ff26..d0330e2 100644 --- a/scripts/meshing/CMakeLists.txt +++ b/scripts/meshing/CMakeLists.txt @@ -3,24 +3,32 @@ set(MESHING_DEPENDS ) #SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DDEBUG") #SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG") -exaconstit_fill_depends_list(LIST_NAME MESHING_DEPENDS - DEPENDS_ON mfem mpi) +set(MESHING_DEPENDS) -if(ENABLE_OPENMP) - list(APPEND MESHING_DEPENDS openmp) -endif() +exaconstit_fill_depends_list(LIST_NAME MESHING_DEPENDS + DEPENDS_ON mfem mpi) -if(ENABLE_CUDA) - list(APPEND MESHING_DEPENDS cuda CUDA::cublas CUDA::cusparse) +if (${BLT_VERSION} VERSION_GREATER_EQUAL 0.6.0) + if(ENABLE_CUDA) + list(APPEND MESHING_DEPENDS blt::cuda_runtime blt::cuda) + endif() + if(ENABLE_OPENMP) + list(APPEND MESHING_DEPENDS blt::openmp) + endif() +else() + if(ENABLE_CUDA) + list(APPEND MESHING_DEPENDS cuda cuda_runtime) + endif() + if(ENABLE_OPENMP) + list(APPEND MESHING_DEPENDS openmp) + endif() endif() if(ENABLE_HIP) list(APPEND MESHING_DEPENDS blt::hip blt::hip_runtime) endif() -if(ENABLE_CALIPER) - list(APPEND MESHING_DEPENDS caliper) -endif() +message("-- MESHING_DEPENDS: ${MESHING_DEPENDS}") blt_add_executable(NAME mesh_generator SOURCES mesh_generator.cpp diff --git a/scripts/postprocessing/xtal_light_up/Cargo.lock b/scripts/postprocessing/xtal_light_up/Cargo.lock new file mode 100644 index 0000000..1e98946 --- /dev/null +++ b/scripts/postprocessing/xtal_light_up/Cargo.lock @@ -0,0 +1,396 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "anyhow" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "indoc" +version = "2.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" + +[[package]] +name = "inventory" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f958d3d68f4167080a18141e10381e7634563984a537f2a49a30fd8e53ac5767" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "matrixmultiply" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" +dependencies = [ + "autocfg", + "rawpointer", +] + +[[package]] +name = "memoffset" +version = "0.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" +dependencies = [ + "autocfg", +] + +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "numpy" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec170733ca37175f5d75a5bea5911d6ff45d2cd52849ce98b685394e4f2f37f4" +dependencies = [ + "libc", + "ndarray", + "num-complex", + "num-integer", + "num-traits", + "pyo3", + "rustc-hash", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" +dependencies = [ + "anyhow", + "cfg-if", + "indoc", + "inventory", + "libc", + "memoffset", + "parking_lot", + "portable-atomic", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" +dependencies = [ + "heck", + "proc-macro2", + "pyo3-build-config", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "redox_syscall" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +dependencies = [ + "bitflags", +] + +[[package]] +name = "rustc-hash" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + +[[package]] +name = "syn" +version = "2.0.71" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b146dcf730474b4bcd16c311627b31ede9ab149045db4d6088b3becaea046462" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4873307b7c257eddcb50c9bedf158eb669578359fb28428bef438fec8e6ba7c2" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "xtal_light_up" +version = "0.1.0" +dependencies = [ + "anyhow", + "num-traits", + "numpy", + "pyo3", +] diff --git a/scripts/postprocessing/xtal_light_up/Cargo.toml b/scripts/postprocessing/xtal_light_up/Cargo.toml new file mode 100644 index 0000000..b9313f3 --- /dev/null +++ b/scripts/postprocessing/xtal_light_up/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "xtal_light_up" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +anyhow = {version="1.0"} +# rayon = {version="1.10"} +numpy = {version = "0.21", optional=false} +pyo3 = { version = "0.21", optional=false, features = ["abi3-py39", "extension-module","anyhow", "multiple-pymethods",] } + +num-traits = {version = "0.2.14", features = ["libm"]} +[features] +# python = ["numpy", "pyo3"] + +[lib] +name = "xtal_light_up" +crate-type = ["cdylib", "rlib"] + +[profile.release] +opt-level = 3 +codegen-units = 1 +lto = true + +[profile.test] +opt-level = 1 +debug = false +lto = true +incremental = false +codegen-units = 1 diff --git a/scripts/postprocessing/xtal_light_up/light_up_py/fiber_calcs_rank.py b/scripts/postprocessing/xtal_light_up/light_up_py/fiber_calcs_rank.py new file mode 100644 index 0000000..b0075eb --- /dev/null +++ b/scripts/postprocessing/xtal_light_up/light_up_py/fiber_calcs_rank.py @@ -0,0 +1,258 @@ +# Python standard library imports +import argparse +import os +import time + +# third-party library imports +import numpy as np +import adios2 + +#local imports +import xtal_light_up.xtal_light_up as xlup + +def fiber_calc_ranks(args): + ts_total = time.time() + # path to ExaConstit ADIOS2 binary-pack (.bp) + in_dir = args["in_dir"] + if not os.path.exists(in_dir) : + raise Warning('Input directory not found, aborting!') + + # save directory for script outputs, recursively create directory if it doesn't exist + out_dir = args["out_dir"] + if not os.path.exists(out_dir) : + print('Output directory not found, created at %s' % out_dir) + os.makedirs(out_dir) + else : + print('Output directory found, proceeding.') + + # int - number of resource sets used for the simulation + # IMPORTANT: must set this correctly to process all elements in the simulation + nranks = args["nranks"] # 48 + + # open ADIOS2 binary-pack (.bp) file + ### fh = adios2.open(in_dir , 'r' , engine_type = 'BP4') # this doesn't work in my adios2 install - seems they've removed open + fh = adios2.FileReader(in_dir) + + # list of variables stored in adios2 file + init_vars = fh.available_variables() + + # total number of cycles saved off (+ initial step at time = 0) + # if stride > 1 in options.toml, this number will be (# of ExaConstit steps / stride) + 1 + ### fh.steps() # doesn't work after switching to FileReader (see above) + steps = fh.num_steps() + + #%% Extract connectivity information - needed for other variables. (NO INPUTS HERE) + + con1d = list() + index = np.zeros((nranks , 2) , dtype = np.int32) + iend = 0 + + # Get the initial end node vertices and connectivity array for everything. + # ADIOS2 doesn't save higher order values, so only have the end nodes. + # Many repeat vertices, since these are saved off for each element. + for i in range(nranks) : + + if (i == 0) : + + # Pull out connectivity information. + con = fh.read('connectivity' , block_id = i) + + con1d.append(con[:,1]) + con = con[:,1::] + + # # Can uncomment to also pull out vertices. + # vert = fh.read('vertices' , block_id = i) + + # # Can uncomment to also pull out grain IDs ('ElementAttribute'). + # grain = fh.read('ElementAttribute' , block_id = i) + # grain = grain[con[:,1]] + + else : + + # Pull out connectivity information. + tmp = fh.read('connectivity' , block_id = i) + con1d.append(tmp[:,1]) + + # Connectivity is local to resource set rather than global, so increment to global. + tmp = tmp + np.max(con) + con = np.vstack((con , tmp[:,1::])) + + # # Can uncomment to also pull out vertices. + # tmp = fh.read('vertices' , block_id = i) + # vert = np.vstack((vert , tmp)) + + # # Can uncomment to also pull out grain IDs ('ElementAttribute'). + # tmp = fh.read('ElementAttribute' , block_id = i) + # grain = np.hstack((grain , tmp[con1d[i]])) + + del tmp + + # indexing variable that will be used later on + index[i,0] = iend + iend = con.shape[0] + index[i,1] = iend + + # # Can uncomment to convert grain IDs to int32. + # grain = np.int32(grain) + + conshape = np.copy(con.shape) + + # list of variables to save off (can view available variables in init_vars in next block below) + # different variables are stored in different ways - not all variables are supported by this script + # this script should work for any variables that are saved off for every element - some examples of working variables are given below + # vars_out = [ + # 'ElementVolume' , + # 'LatticeOrientation' , + # 'ShearRate' , + # 'Stress' , + # 'XtalElasticStrain' + # ] + + # initialize + + hkl = np.zeros((4, 3)) + hkl[0, :] = [1,1,1] + hkl[1, :] = [2,0,0] + hkl[2, :] = [2,2,0] + hkl[3, :] = [3,1,1] + + s_dir = np.asarray([0.0,0.0,1.0]) + + top = fh.read('ElementVolume' , block_id = 0) + + # If we want per element quantities then uncomment below block + # elem_vols = np.empty((steps, conshape[0])) + # strains = np.empty((steps, conshape[0], 6)) + # in_fibers = np.zeros((hkl.shape[0], steps, strains.shape[1]), dtype=bool) + # direct_stiffness = np.zeros((steps - 1, conshape[0])) + # tay_fact = np.zeros((steps - 1, conshape[0])) + # eps_rate = np.zeros((steps - 1, conshape[0])) + + lattice_strains = np.zeros((hkl.shape[0], steps)) + lattice_vols = np.zeros_like(lattice_strains) + lattice_dir_stiff = np.zeros((hkl.shape[0], steps-1)) + lattice_tay_fact = np.zeros((hkl.shape[0], steps-1)) + lattice_eps_rate = np.zeros((hkl.shape[0], steps-1)) + + total_volume = np.zeros(steps) + + print("Processing all variables") + for ii in range(nranks): + print("Starting rank update: " + str(ii + 1)) + isize = con1d[ii].shape[0] * conshape[1] + + # Read all of the data in + ev_local = np.ascontiguousarray(fh.read('ElementVolume', start = [0], count = [isize], step_selection = [0 , steps] , block_id = ii).reshape((steps, isize))[:, con1d[ii]]) + + # Provide info later related to RVE size so can see how many elements are + # actually used in the fiber calculations + total_volume += np.sum(ev_local, axis=1) + + xtal_oris_local = arr = np.ascontiguousarray(fh.read('LatticeOrientation', start = [0, 0], count = [isize, 4], step_selection = [0 , steps] , block_id = ii).reshape((steps, isize, 4))[:, con1d[ii], :]) + + elas_strain_local = np.ascontiguousarray(fh.read('XtalElasticStrain', start = [0, 0], count = [isize, 6], step_selection = [0 , steps] , block_id = ii).reshape((steps, isize, 6))[:, con1d[ii], :]) + + stress_local = np.ascontiguousarray(fh.read('Stress', start = [0, 0], count = [isize, 6], step_selection = [0 , steps - 1] , block_id = ii).reshape((steps - 1, isize, 6))[:, con1d[ii], :]) + + top = fh.read('ShearRate' , block_id = 0) + gdots_local = np.ascontiguousarray(fh.read('ShearRate', start = [0, 0], count = [isize, top.shape[1]], step_selection = [0 , steps - 1] , block_id = ii).reshape((steps - 1, isize, top.shape[1]))[:, con1d[ii], :]) + + in_fibers_local = np.zeros((hkl.shape[0], steps, elas_strain_local.shape[1]), dtype=bool) + + xlup.calc_within_fibers(xtal_oris_local, s_dir, hkl, 3.60, np.deg2rad(5.0), in_fibers_local) + in_fiber_local1 = np.ascontiguousarray(in_fibers_local[:,1:steps,:]) + ev_local1 = np.ascontiguousarray(ev_local[1:steps,:]) + + # All of our local calculations + xlup.strain_lattice2sample(xtal_oris_local, elas_strain_local) + xlup.calc_lattice_strains(elas_strain_local, s_dir, ev_local, in_fibers_local, lattice_strains, lattice_vols, True) + xlup.calc_directional_stiffness_lattice_fiber(stress_local, elas_strain_local[1:steps,:,:], lattice_dir_stiff, ev_local1, in_fiber_local1, True) + xlup.calc_taylor_factors_lattice_fiber(gdots_local, lattice_tay_fact, lattice_eps_rate, ev_local1, in_fiber_local1, True) + + # If we want per element quantities then uncomment below block + # direct_stiffness_local = np.zeros_like(ev_local[1:steps, :]) + # tay_fact_local = np.zeros_like(ev_local[1:steps, :]) + # eps_rate_local = np.zeros_like(ev_local[1:steps, :]) + # xlup.calc_directional_stiffness(stress_local, elas_strain_local[1:steps,:,:], direct_stiffness_local) + # xlup.calc_taylor_factors(gdots_local, tay_fact_local, eps_rate_local) + + # elem_vols[:, index[ii,0]:index[ii,1]] = ev_local + # strains[:, index[ii,0]:index[ii,1], :] = elas_strain_local + # in_fibers[:, :, index[ii,0]:index[ii,1]] = in_fibers_local + # direct_stiffness[:, index[ii,0]:index[ii,1]] = direct_stiffness_local + # tay_fact[:, index[ii,0]:index[ii,1]] = tay_fact_local + # eps_rate[:, index[ii,0]:index[ii,1]] = eps_rate_local + + fh.close() + + # Print data out and then update all of them for the mean values + print("HKLs used:") + print(hkl.T) + print("Total Volume") + print(total_volume) + print("Lattice strains:") + lattice_strains = lattice_strains / lattice_vols + print(lattice_strains.T) + print("Lattice taylor factor:") + lattice_tay_fact = lattice_tay_fact / lattice_vols[:,1:steps] + print(lattice_tay_fact.T) + print("Lattice plastic deformation rate:") + lattice_eps_rate = lattice_eps_rate / lattice_vols[:,1:steps] + print(lattice_eps_rate.T) + print("Lattice directional stiffness:") + lattice_dir_stiff = lattice_dir_stiff / lattice_vols[:,1:steps] + print(lattice_dir_stiff.T) + print("Lattice volumes:") + print(lattice_vols.T) + + out_basename = args["out_basename"] + out_lattice_quants = out_basename + "lattice_avg_quants" + out_file = os.path.join(out_dir, out_lattice_quants) + np.savez_compressed(out_file, lattice_strains=lattice_strains, lattice_tay_fact=lattice_tay_fact, lattice_eps_rate=lattice_eps_rate, lattice_dir_stiff=lattice_dir_stiff, lattice_vols=lattice_vols, hkls=hkl, total_volume=total_volume) + + # If we want per element quantities then uncomment below block to save off + # in_fibers = in_fibers.astype(np.uint8) + # out_lightup_processed = out_basename + "light_up_processed.bp" + # out_file = os.path.join(out_dir, out_lightup_processed) + + # with adios2.Stream(out_file, "w") as s: + # s.write("ElementVolumes", elem_vols, shape=elem_vols.shape, start=[0,0], count=elem_vols.shape) + # s.write("HKLs", hkl, shape=hkl.shape, start=[0,0], count=hkl.shape) + # s.write("FiberElements", in_fibers, shape=in_fibers.shape, start=[0,0,0], count=in_fibers.shape) + # s.write("Strains", strains, shape=strains.shape, start=[0,0,0], count=strains.shape) + # s.write("DirectionalModulus", direct_stiffness, shape=direct_stiffness.shape, start=[0,0], count=direct_stiffness.shape) + # s.write("TaylorFactor", tay_fact, shape=tay_fact.shape, start=[0,0], count=tay_fact.shape) + # s.write("DpEff", eps_rate, shape=eps_rate.shape, start=[0,0], count=eps_rate.shape) + + tf_total = time.time() + print('%.3f seconds to process %s.' % (tf_total - ts_total, "all items")) + +if (__name__ == '__main__') : + + parser = argparse.ArgumentParser( + description = 'Extract specified variables from ExaConstit ADIOS2 outputs.' + ) + parser.add_argument('-in_dir' , + help = 'path to exaconstit.bp' , + type = str, + default='./example_lightup.bp', + ) + parser.add_argument('-out_dir' , + help = 'directory to save script outputs' , + type = str, + default='./outdir') + parser.add_argument('-out_basename' , + help = 'basename for outputs typically would be rve name with underscore after it' , + type = str, + default='rve_') + parser.add_argument('-nranks' , + help = 'number of resource sets used for the simulation (IMPORTANT)' , + type = int, + default=int(1)) + + args = parser.parse_args() + print(args) + + args_dict = vars(args) + fiber_calc_ranks(args_dict) + diff --git a/scripts/postprocessing/xtal_light_up/pyxtallu.py b/scripts/postprocessing/xtal_light_up/pyxtallu.py new file mode 100644 index 0000000..cebc38d --- /dev/null +++ b/scripts/postprocessing/xtal_light_up/pyxtallu.py @@ -0,0 +1,237 @@ +import numpy as np +import xtal_light_up.xtal_light_up as xlup + +from hexrd import rotations as rot +from hexrd import material , valunits + +from hexrd.matrixutil import \ + columnNorm, unitVector, \ + skewMatrixOfVector, findDuplicateVectors, \ + multMatArray, nullSpace + +def make_matl(mat_name , sgnum , lparms , hkl_ssq_max = 50 , dmin_angstroms = 0.5) : + """ + + Parameters + ---------- + mat_name : str + label for material. + sgnum : int + space group number for material. + lparms : list of floats + lattice parameters in angstroms. + hkl_ssq_max : int, optional + maximum hkl sum of squares (peak upper bound). The default is 50. + dmin_angstroms : float, optional + minimum d-spacing in angstroms (alt peak upper bound). The default is 0.6. + + """ + + matl = material.Material(mat_name) + matl.sgnum = sgnum + matl.latticeParameters = lparms + matl.hklMax = hkl_ssq_max + matl.dmin = valunits.valWUnit('lp' , 'length' , dmin_angstroms , 'angstrom') + + nhkls = len(matl.planeData.exclusions) + matl.planeData.set_exclusions(np.zeros(nhkls , dtype = bool)) + + return matl + +def applySym(vec, qsym, csFlag=False, cullPM=False, tol=rot.cnst.sqrt_epsf): + """ + Apply symmetry group to a single 3-vector (columnar) argument. + + csFlag : centrosymmetry flag + cullPM : cull +/- flag + """ + nsym = qsym.shape[1] + Rsym = rot.rotMatOfQuat(qsym) + if nsym == 1: + Rsym = np.array([Rsym, ]) + allhkl = multMatArray( + Rsym, np.tile(vec, (nsym, 1, 1)) + ).swapaxes(1, 2).reshape(nsym, 3).T + + if csFlag: + allhkl = np.hstack([allhkl, -1*allhkl]) + eqv, uid = findDuplicateVectors(allhkl, tol=tol, equivPM=cullPM) + + return allhkl[np.ix_(list(range(3)), uid)] + +def distanceToFiber(c, s, q, qsym, **kwargs): + """ + Calculate symmetrically reduced distance to orientation fiber. + + Parameters + ---------- + c : TYPE + DESCRIPTION. + s : TYPE + DESCRIPTION. + q : TYPE + DESCRIPTION. + qsym : TYPE + DESCRIPTION. + **kwargs : TYPE + DESCRIPTION. + + Raises + ------ + RuntimeError + DESCRIPTION. + + Returns + ------- + d : TYPE + DESCRIPTION. + + """ + csymFlag = False + B = np.eye(3) + + arglen = len(kwargs) + + if len(c) != 3 or len(s) != 3: + raise RuntimeError('c and/or s are not 3-vectors') + + # argument handling + if arglen > 0: + argkeys = list(kwargs.keys()) + for i in range(arglen): + if argkeys[i] == 'centrosymmetry': + csymFlag = kwargs[argkeys[i]] + elif argkeys[i] == 'bmatrix': + B = kwargs[argkeys[i]] + else: + raise RuntimeError("keyword arg \'%s\' is not recognized" + % (argkeys[i])) + + c = unitVector(np.dot(B, np.asarray(c))) + s = unitVector(np.asarray(s).reshape(3, 1)) + + nq = q.shape[1] # number of quaternions + rmats = rot.rotMatOfQuat(q) # (nq, 3, 3) + + csym = applySym(c, qsym, csymFlag) # (3, m) + m = csym.shape[1] # multiplicity + + if nq == 1: + rc = np.dot(rmats, csym) # apply q's to c's + sdotrc = np.dot(s.T, rc).max() + else: + rc = multMatArray( + rmats, np.tile(csym, (nq, 1, 1)) + ) # apply q's to c's + + sdotrc = np.dot( + s.T, + rc.swapaxes(1, 2).reshape(nq*m, 3).T + ).reshape(nq, m).max(1) + + d = rot.arccosSafe(np.array(sdotrc)) + + print(d) + + return d + + +# help(xlup) + +lattice_orientations = np.zeros((1,1,4)) +strains = np.zeros((1,1,6)) + +lattice_orientations[:,:,0] = 1.0 / np.sqrt(1.5) +lattice_orientations[:,:,1] = 0.5 / np.sqrt(1.5) +lattice_orientations[:,:,2] = 0.5 / np.sqrt(1.5) + +strains[:,:,0] = 1.0 +strains[:,:,1] = 2.0 +strains[:,:,2] = 3.0 +strains[:,:,3] = 1.0 +strains[:,:,4] = 2.0 +strains[:,:,5] = 3.0 + +xlup.strain_lattice2sample(lattice_orientations, strains) + +gdots = np.zeros((1,1,12)) +gdots[:,:,0] = 1e-4 +gdots[:,:,1] = 1e-4 +gdots[:,:,2] = -1e-4 +tay_factor = np.zeros((1,1)) + +n_arr = (1 / np.sqrt(3)) * np.array([ + [ 1 , 1 , 1] , + [ 1 , 1 , 1] , + [ 1 , 1 , 1] , + [-1 , 1 , 1] , + [-1 , 1 , 1] , + [-1 , 1 , 1] , + [-1 , -1 , 1] , + [-1 , -1 , 1] , + [-1 , -1 , 1] , + [ 1 , -1 , 1] , + [ 1 , -1 , 1] , + [ 1 , -1 , 1] + ]) + +s_arr = (1 / np.sqrt(2)) * np.array([ + [ 0 , 1 , -1] , + [-1 , 0 , 1] , + [ 1 , -1 , 0] , + [-1 , 0 , -1] , + [ 0 , -1 , 1] , + [ 1 , 1 , 0] , + [ 0 , -1 , -1] , + [ 1 , 0 , 1] , + [-1 , 1 , 0] , + [ 1 , 0 , -1] , + [ 0 , 1 , 1] , + [-1 , -1 , 0] + ]) + +# calculate FCC Schmid tensor +m_arr = np.zeros((n_arr.shape[0] , s_arr.shape[1], n_arr.shape[1])) +for ii in range(m_arr.shape[0]) : + m_arr[ii, :, :] = np.tensordot(s_arr[ii] , n_arr[ii] , axes = 0) + m_arr[ii, :, :] = 0.5 * (m_arr[ii, :, :] + m_arr[ii, :, :].T) +m_arr = np.reshape(m_arr, (n_arr.shape[0] , s_arr.shape[1] * n_arr.shape[1])) +# calculate DpEff from shear strain rates on each slip system +def dp_eff(gdots) : + + Dp = np.sum(m_arr * gdots[: , None] , axis = 0).reshape((3,3)) + + # Dp = Lp + Lp.T - np.diag(Lp.diagonal()) + + return np.sqrt((2 / 3) * np.tensordot(Dp , Dp)) + +dp_effs = np.zeros_like(tay_factor) + +xlup.calc_taylor_factors(gdots, tay_factor, dp_effs) + +# get material symmetry - here is a simple example for IN625 +matl = make_matl(mat_name = 'FCC' , sgnum = 225 , lparms = [3.60 ,]) +pd = matl.planeData + +hkl = np.zeros((4, 3)) +hkl[0, :] = [1,1,1] +hkl[1, :] = [2,0,0] +hkl[2, :] = [2,2,0] +hkl[3, :] = [3,1,1] +s_dir = np.asarray([0.0,0.0,1.0]) +quats = np.swapaxes(lattice_orientations, 0, 2) +for jj in range(4): + # compute crystal direction from planeData + c_dir = np.atleast_2d(np.dot(pd.latVecOps['B'] , hkl[jj, :].T)).T + distance = distanceToFiber(c_dir, s_dir, quats, pd.getQSym()) + +in_fibers = np.zeros((hkl.shape[0], 1, 1), dtype=bool) + +xlup.calc_within_fibers(lattice_orientations, s_dir, hkl, 3.60, np.deg2rad(5.0), in_fibers) + +# # compute distance from quaternions to crystallographic fiber +# distance = np.degrees(rot.distanceToFiber(c_dir , s_dir , quats , pd.getQSym())) + +# # filter for distances within specified distance bound +# in_fiber = np.where(distance < distance_bnd)[0] +# in_fiber_arr[jj] = in_fiber diff --git a/scripts/postprocessing/xtal_light_up/setup.py b/scripts/postprocessing/xtal_light_up/setup.py new file mode 100644 index 0000000..46aa8be --- /dev/null +++ b/scripts/postprocessing/xtal_light_up/setup.py @@ -0,0 +1,15 @@ +from setuptools import setup +from setuptools_rust import Binding, RustExtension + +import site +import sys +site.ENABLE_USER_SITE = "--user" in sys.argv[1:] + +setup( + name="xtal_light_up", + version="0.1", + rust_extensions=[RustExtension("xtal_light_up.xtal_light_up", binding=Binding.PyO3,debug=False)], + packages=["xtal_light_up"], + # rust extensions are not zip safe, just like C-extensions. + zip_safe=False, +) \ No newline at end of file diff --git a/scripts/postprocessing/xtal_light_up/src/lib.rs b/scripts/postprocessing/xtal_light_up/src/lib.rs new file mode 100644 index 0000000..279d9fb --- /dev/null +++ b/scripts/postprocessing/xtal_light_up/src/lib.rs @@ -0,0 +1,7 @@ +extern crate anyhow; +extern crate numpy; +extern crate pyo3; +extern crate num_traits as libnum; + +// pub mod xtal_light_up; +pub mod pyxtal_light_up; \ No newline at end of file diff --git a/scripts/postprocessing/xtal_light_up/src/pyxtal_light_up/math.rs b/scripts/postprocessing/xtal_light_up/src/pyxtal_light_up/math.rs new file mode 100644 index 0000000..dea75cf --- /dev/null +++ b/scripts/postprocessing/xtal_light_up/src/pyxtal_light_up/math.rs @@ -0,0 +1,406 @@ +#![allow(dead_code)] +use libnum::{Float, NumAssignOps, NumOps, One, Zero}; + +/// Dot product of two vectors +/// vec1 and vec2 both have lengths NDIM +pub fn dot_prod(vec1: &[F], vec2: &[F]) -> F +where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(vec1.len() >= NDIM); + assert!(vec2.len() >= NDIM); + + let mut dot_prod: F = F::zero(); + for i in 0..NDIM { + dot_prod += vec1[i] * vec2[i]; + } + + dot_prod +} + +/// Cross product of two vectors and only using first 3 components +/// vec1 and vec2 both have lengths NDIM +pub fn cross_prod(vec1: &[F], vec2: &[F]) -> [F; 3] +where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(vec1.len() >= 3); + assert!(vec2.len() >= 3); + + let mut cross_prod = [F::zero(); 3]; + + cross_prod[0] = vec1[1] * vec2[2] - vec1[2] * vec2[1]; + cross_prod[1] = vec1[2] * vec2[0] - vec1[0] * vec2[2]; + cross_prod[2] = vec1[0] * vec2[1] - vec1[1] * vec2[0]; + + cross_prod +} + +// Might want a stable norm eventually +/// Takes the L2 norm of a vector +/// where vec has length NDIM +pub fn norm(vec: &[F]) -> F +where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(vec.len() >= NDIM); + F::sqrt(dot_prod::(vec, vec)) +} + +/// Takes the norm of the columns of a matrix +/// where the matrix has dimensions NDIM x MDIM +/// The values are stored in norm_vec which is of length m +pub fn norm_column( + matrix: &[[F; MDIM]], + norm_vec: &mut [F], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(matrix.len() >= NDIM); + assert!(norm_vec.len() >= MDIM); + // Initialize this to have the squared values of the first row + for i_m in 0..MDIM { + norm_vec[i_m] = matrix[0][i_m] * matrix[0][i_m]; + } + + // Accumulate the results across all remaining rows + for i_n in 1..NDIM { + for j_m in 0..MDIM { + norm_vec[j_m] += matrix[i_n][j_m] * matrix[i_n][j_m]; + } + } + + // Calculate the norm for each column + for item in norm_vec.iter_mut().take(MDIM) { + *item = F::sqrt(*item); + } +} + +/// Outer product of two vectors +/// over writes value in supplied matrix +/// matrix = vec1 \otimes vec2 +/// vec1 has length NDIM +/// vec2 has length MDIM +/// matrix has dimensions NDIM x MDIM +pub fn outer_prod( + vec1: &[F], + vec2: &[F], + matrix: &mut [[F; MDIM]], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(matrix.len() >= NDIM); + assert!(vec1.len() >= MDIM); + assert!(vec2.len() >= NDIM); + + for i_n in 0..NDIM { + for j_m in 0..MDIM { + matrix[i_n][j_m] = vec1[i_n] * vec2[j_m]; + } + } +} + +/// Adds a scaled outer product to supplied matrix +/// matrix += \scale * vec1 \otimes vec2 +/// vec1 has length NDIM +/// vec2 has length MDIM +/// scale is an Option type and if supplied None defaults to a value of 1.0 +/// matrix has dimensions NDIM x MDIM +pub fn outer_prod_add_scale( + vec1: &[F], + vec2: &[F], + scale: Option, + matrix: &mut [[F; MDIM]], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(matrix.len() >= NDIM); + assert!(vec1.len() >= MDIM); + assert!(vec2.len() >= NDIM); + + let alpha = if let Some(x) = scale { x } else { F::one() }; + + for i_n in 0..NDIM { + for j_m in 0..MDIM { + matrix[i_n][j_m] += alpha * vec1[i_n] * vec2[j_m]; + } + } +} + +/// Matrix vector product +/// matrix has dimensions NDIM x MDIM +/// vec has dimensions MDIM +/// prod has dimensions NDIM +pub fn mat_vec_mult( + matrix: &[[F; MDIM]], + vec: &[F], + prod: &mut [F], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(matrix.len() >= NDIM); + assert!(vec.len() >= MDIM); + assert!(prod.len() >= NDIM); + for i_n in 0..NDIM { + prod[i_n] = F::zero(); + for j_m in 0..MDIM { + prod[i_n] += matrix[i_n][j_m] * vec[j_m]; + } + } +} + +/// Matrix transpose vector product +/// matrix has dimensions NDIM x MDIM +/// vec has dimensions NDIM +/// prod has dimensions MDIM +pub fn mat_t_vec_mult( + matrix: &[[F; MDIM]], + vec: &[F], + prod: &mut [F], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(matrix.len() >= NDIM); + assert!(vec.len() >= NDIM); + assert!(prod.len() >= MDIM); + + for i_m in 0..MDIM { + prod[i_m] = F::zero(); + for j_n in 0..NDIM { + prod[i_m] += matrix[j_n][i_m] * vec[j_n]; + } + } +} + +/// Upper triangle matrix vector product +/// matrix is an upper triangle matrix +/// (values below the diagonal are assumed zero) +/// matrix has dimensions NDIM x MDIM +/// vec has dimensions MDIM +/// prod has dimensions NDIM +/// NDIM <= MDIM +pub fn upper_tri_mat_vec_mult( + matrix: &[[F; MDIM]], + vec: &[F], + prod: &mut [F], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(NDIM <= MDIM); + assert!(matrix.len() >= NDIM); + assert!(vec.len() >= MDIM); + assert!(prod.len() >= NDIM); + + for i_n in 0..NDIM { + prod[i_n] = F::zero(); + for j_m in i_n..MDIM { + prod[i_n] += matrix[i_n][j_m] * vec[j_m]; + } + } +} + +/// Upper triangle matrix transpose vector product +/// matrix is an upper triangle matrix +/// (values below the diagonal are assumed zero) +/// matrix has dimensions NDIM x MDIM +/// vec has dimensions MDIM +/// prod has dimensions NDIM +/// NDIM <= MDIM +pub fn upper_tri_mat_t_vec_mult( + matrix: &[[F; MDIM]], + vec: &[F], + prod: &mut [F], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(NDIM <= MDIM); + assert!(matrix.len() >= NDIM); + assert!(vec.len() >= MDIM); + assert!(prod.len() >= NDIM); + + for i_n in 0..NDIM { + prod[i_n] = F::zero(); + // M_ji * a_j = p_i + // Only go down to diagonal + for j_m in 0..i_n { + prod[i_n] += matrix[j_m][i_n] * vec[j_m]; + } + } +} + +/// Matrix-matrix multiplication +/// matrix1 has dimensions LDIM x NDIM +/// matrix2 has dimensions NDIM x MDIM +/// prod_matrix has dimensions LDIM x MDIM +/// This function will either accumulate the values of +/// the multiplication on the product, +/// or it will zero out the product ahead of time +/// depending on the compile time flag. +pub fn mat_mat_mult< + const LDIM: usize, + const NDIM: usize, + const MDIM: usize, + const ZERO_OUT: bool, + F, +>( + matrix1: &[[F; NDIM]], + matrix2: &[[F; MDIM]], + prod_matrix: &mut [[F; MDIM]], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(matrix1.len() >= LDIM); + assert!(matrix2.len() >= NDIM); + assert!(prod_matrix.len() >= LDIM); + + if ZERO_OUT { + for item in prod_matrix.iter_mut().take(LDIM) { + for val in item.iter_mut() { + *val = F::zero(); + } + } + } + + //prod_matrix_ik = matrix1_ij * matrix2_jk + for i_l in 0..LDIM { + for j_n in 0..NDIM { + for k_m in 0..MDIM { + prod_matrix[i_l][k_m] += matrix1[i_l][j_n] * matrix2[j_n][k_m]; + } + } + } +} + +/// Matrix transpose-matrix multiplication +/// matrix1 has dimensions NDIM x LDIM +/// matrix2 has dimensions NDIM x MDIM +/// prod_matrix has dimensions LDIM x MDIM +/// This function will either accumulate the values of +/// the multiplication on the product, +/// or it will zero out the product ahead of time +/// depending on the run time flag. +pub fn mat_t_mat_mult< + const LDIM: usize, + const NDIM: usize, + const MDIM: usize, + const ZERO_OUT: bool, + F, +>( + matrix1: &[[F; LDIM]], + matrix2: &[[F; MDIM]], + prod_matrix: &mut [[F; MDIM]], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(matrix1.len() >= NDIM); + assert!(matrix2.len() >= NDIM); + assert!(prod_matrix.len() >= LDIM); + + if ZERO_OUT { + for item in prod_matrix.iter_mut().take(LDIM) { + for val in item.iter_mut() { + *val = F::zero(); + } + } + } + + //prod_matrix_ik = matrix1_ji * matrix2_jk + for j_n in 0..NDIM { + for i_l in 0..LDIM { + for k_m in 0..MDIM { + prod_matrix[i_l][k_m] += matrix1[j_n][i_l] * matrix2[j_n][k_m]; + } + } + } +} + +/// Matrix-matrix transpose multiplication +/// matrix1 has dimensions LDIM x NDIM +/// matrix2 has dimensions MDIM x NDIM +/// prod_matrix has dimensions LDIM x MDIM +/// This function will either accumulate the values of +/// the multiplication on the product, +/// or it will zero out the product ahead of time +/// depending on the run time flag. +pub fn mat_mat_t_mult< + const LDIM: usize, + const NDIM: usize, + const MDIM: usize, + const ZERO_OUT: bool, + F, +>( + matrix1: &[[F; NDIM]], + matrix2: &[[F; NDIM]], + prod_matrix: &mut [[F; MDIM]], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(matrix1.len() >= LDIM); + assert!(matrix2.len() >= MDIM); + assert!(prod_matrix.len() >= LDIM); + + if ZERO_OUT { + for item in prod_matrix.iter_mut().take(LDIM) { + for val in item.iter_mut() { + *val = F::zero(); + } + } + } + + //prod_matrix_ik = matrix1_ij * matrix2_kj + for i_l in 0..LDIM { + for k_m in 0..MDIM { + for j_n in 0..NDIM { + prod_matrix[i_l][k_m] += matrix1[i_l][j_n] * matrix2[k_m][j_n]; + } + } + } +} + +/// Performs the triple product operation +/// needed to rotate a matrix of NDIM x NDIM +/// by a rotation matrix +/// The product matrix is zeroed out in this operation +/// Transpose does the operation: +/// prod_matrix_il = rot_matrix_ji matrix_jk rot_matrix_kl +/// prod_matrix = rot_matrix^t * matrix * rot_matrix +/// Non-transpose operation does: +/// prod_matrix_il = rot_matrix_ij matrix_jk rot_matrix_lk +/// prod_matrix = rot_matrix * matrix * rot_matrix^T +pub fn rotate_matrix( + rot_matrix: &[[F; NDIM]], + matrix: &[[F; NDIM]], + prod_matrix: &mut [[F; NDIM]], +) where + F: Float + Zero + One + NumAssignOps + NumOps + core::fmt::Debug, +{ + assert!(rot_matrix.len() >= NDIM); + assert!(matrix.len() >= NDIM); + assert!(prod_matrix.len() >= NDIM); + + // zero things out first + for item in prod_matrix.iter_mut().take(NDIM) { + for val in item.iter_mut() { + *val = F::zero(); + } + } + + // Now for matrix multiplication + for i_n in 0..NDIM { + for j_n in 0..NDIM { + for k_n in 0..NDIM { + for l_n in 0..NDIM { + if TRANSPOSE { + // This is rot_matrix_ji matrix_jk rot_matrix_kl + prod_matrix[i_n][l_n] += + rot_matrix[j_n][i_n] * matrix[j_n][k_n] * rot_matrix[k_n][l_n]; + } else { + // This is rot_matrix_ij matrix_jk rot_matrix_lk + prod_matrix[i_n][l_n] += + rot_matrix[i_n][j_n] * matrix[j_n][k_n] * rot_matrix[l_n][k_n]; + } + } + } + } + } +} \ No newline at end of file diff --git a/scripts/postprocessing/xtal_light_up/src/pyxtal_light_up/mod.rs b/scripts/postprocessing/xtal_light_up/src/pyxtal_light_up/mod.rs new file mode 100644 index 0000000..717ca96 --- /dev/null +++ b/scripts/postprocessing/xtal_light_up/src/pyxtal_light_up/mod.rs @@ -0,0 +1,718 @@ +use numpy::{PyArray1, PyArray2, PyArray3, PyArrayMethods}; +use pyo3::{pymodule, Bound, types::PyModule, PyResult, Python}; + + +// use crate::xtal_light_up; +pub(crate) mod math; +use math::*; + +#[pymodule] +fn xtal_light_up(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> { + #[pyfn(m)] + #[pyo3(name = "strain_lattice2sample")] + fn strain_lattice2sample<'py>( + lattice_orientations: &Bound<'_, PyArray3>, + strains: &Bound<'_, PyArray3>, + ) -> anyhow::Result<()> { + let xtal_o = lattice_orientations.readonly(); + let xtal_orientations = xtal_o.as_slice()?; + let mut xtal_s = strains.readwrite(); + let xtal_strains = xtal_s.as_slice_mut()?; + + // let xtal_ori_chunks = xtal_orientations.chunks_exact(4); + // let mut xtal_strain_chunks = xtal_strains.chunks_exact_mut(6); + // zip(xtal_orientations.chunks_exact(4)). + // |(strain, _quats)| + // xtal_strains.par_chunks_exact_mut(6).enumerate().for_each(|(i, strain)| { strain[0] = 0.0_f64; + // // Zipping quats isn't the most obvious if we wanted chunks of things + // // in parallel at least... + // // This at least gets us around the compiler issue + // let quats : &[f64] = { + // let start = i * 4; + // let end = (i + 1) * 4; + // &xtal_orientations[start..end] + // }; + // }); + + xtal_strains.chunks_exact_mut(6).zip(xtal_orientations.chunks_exact(4)).for_each(|(strain_vec, quat)| { + rotate_strain_to_sample(quat, strain_vec); + }); + Ok(()) + } + + + #[pyfn(m)] + #[pyo3(name = "calc_taylor_factors")] + fn calc_taylor_factors<'py>( + shear_rates: &Bound<'_, PyArray3>, + taylor_factors: &Bound<'_, PyArray2>, + eff_plastic_def_rate: &Bound<'_, PyArray2>, + ) -> anyhow::Result<()> { + let gamma_dots = shear_rates.readonly(); + let gammadots = gamma_dots.as_slice()?; + let mut tay_factors = taylor_factors.readwrite(); + let tayfacs = tay_factors.as_slice_mut()?; + let mut eff_pl_def_rate = eff_plastic_def_rate.readwrite(); + let eps_rate = eff_pl_def_rate.as_slice_mut()?; + + let eps_rate_gdots = eps_rate.iter_mut().zip(gammadots.chunks_exact(12)); + tayfacs.iter_mut().zip(eps_rate_gdots).for_each(|(tayfac, eps_rate_gdot)| { + (*tayfac, *eps_rate_gdot.0) = calc_taylor_factor(eps_rate_gdot.1); + }); + Ok(()) + } + + #[pyfn(m)] + #[pyo3(name = "calc_directional_stiffness")] + fn calc_directional_stiffness<'py>( + cauchy_stress: &Bound<'_, PyArray3>, + sample_strain: &Bound<'_, PyArray3>, + directional_stiffness: &Bound<'_, PyArray2>, + ) -> anyhow::Result<()> { + + let cauchy = cauchy_stress.readonly(); + let stress = cauchy.as_slice()?; + let sstrain = sample_strain.readonly(); + let strain = sstrain.as_slice()?; + + let mut dir_stiffness = directional_stiffness.readwrite(); + let dir_stiff = dir_stiffness.as_slice_mut()?; + + let stress_strains = stress.chunks_exact(6).zip(strain.chunks_exact(6)); + + dir_stiff.iter_mut().zip(stress_strains).for_each(|(dir_stiff, stress_strains)| { + if f64::abs(stress_strains.1[2]) < f64::EPSILON { + *dir_stiff = 0.0_f64 + } + else { + *dir_stiff = stress_strains.0[2] / stress_strains.1[2]; + } + }); + Ok(()) + } + + #[pyfn(m)] + #[pyo3(name = "calc_lattice_strains")] + fn calc_lattice_strains<'py>( + sample_strains: &Bound<'_, PyArray3>, + sample_dir: &Bound<'_, PyArray1>, + elem_vols: &Bound<'_, PyArray2>, + in_fibers: &Bound<'_, PyArray3>, + lattice_strains: &Bound<'_, PyArray2>, + lattice_volumes: &Bound<'_, PyArray2>, + per_rank_update: bool + ) -> anyhow::Result<()> { + let num_elems: usize; + let num_steps: usize; + + let s_in_fibers = in_fibers.readonly(); + { + let in_fiber_av = s_in_fibers.as_array(); + (_, num_steps, num_elems) = in_fiber_av.dim(); + } + + let in_fiber_hkls = s_in_fibers.as_slice()?; + let s_strains = sample_strains.readonly(); + let strains = s_strains.as_slice()?; + + let mut s_lattice_strains = lattice_strains.readwrite(); + let lat_strains = s_lattice_strains.as_slice_mut()?; + + let mut s_lattice_vols = lattice_volumes.readwrite(); + let lat_vols = s_lattice_vols.as_slice_mut()?; + + let s_elem_vols = elem_vols.readonly(); + let evs = s_elem_vols.as_slice()?; + + let s_sample_dir = sample_dir.readonly(); + let s_dir = s_sample_dir.as_slice()?; + + let project_vec = [s_dir[0] * s_dir[0], s_dir[1] * s_dir[1] , s_dir[2] * s_dir[2] , 2.0_f64 * s_dir[1] * s_dir[2] , 2.0_f64 * s_dir[0] * s_dir[2] , 2.0_f64 * s_dir[0] * s_dir[1]]; + + let lat_strain_in_fiber_iter = lat_strains.chunks_exact_mut(num_steps).zip(in_fiber_hkls.chunks_exact(num_steps * num_elems)); + + let lat_vol_strain_in_fiber_iter = lat_vols.chunks_exact_mut(num_steps).zip(lat_strain_in_fiber_iter); + + for (lat_vols_hkl, (lat_strains_hkl, in_fiber_hkl)) in lat_vol_strain_in_fiber_iter.into_iter() { + let ev_strains_iter = evs.chunks_exact(num_elems).zip(strains.chunks_exact(num_elems * 6)); + let ev_strains_in_fiber_iter = in_fiber_hkl.chunks_exact(num_elems).zip(ev_strains_iter); + + for (istep, (in_fiber_hkl_step, (evs_step, strains_step))) in ev_strains_in_fiber_iter.enumerate() { + let (total_lat_vol, inv_total_lat_vol) = calc_volume_terms_fiber(evs_step, in_fiber_hkl_step, per_rank_update); + + lat_vols_hkl[istep] += total_lat_vol; + + lat_strains_hkl[istep] += strains_step.chunks_exact(6) + .zip(evs_step) + .zip(in_fiber_hkl_step) + .filter(|ev_strain_in_fiber| *ev_strain_in_fiber.1) + .map(|((strain, ev), _)| { + ev * inv_total_lat_vol * dot_prod::<6,f64>(&project_vec, strain) + }) + .reduce(|lat_strain, lat_strain_elem| + lat_strain + lat_strain_elem + ).unwrap_or(0.0_f64); + } + } + Ok(()) + } + + #[pyfn(m)] + #[pyo3(name = "calc_taylor_factors_lattice_fiber")] + fn calc_taylor_factors_lattice_fiber<'py>( + shear_rates: &Bound<'_, PyArray3>, + taylor_factors: &Bound<'_, PyArray2>, + eff_plastic_def_rate: &Bound<'_, PyArray2>, + elem_vols: &Bound<'_, PyArray2>, + in_fibers: &Bound<'_, PyArray3>, + per_rank_update: bool + ) -> anyhow::Result<()> { + let num_elems: usize; + let num_steps: usize; + + let s_in_fibers = in_fibers.readonly(); + { + let in_fiber_av = s_in_fibers.as_array(); + (_, num_steps, num_elems) = in_fiber_av.dim(); + } + let in_fiber_hkls = s_in_fibers.as_slice()?; + let s_elem_vols = elem_vols.readonly(); + let evs = s_elem_vols.as_slice()?; + + let gamma_dots = shear_rates.readonly(); + let gammadots = gamma_dots.as_slice()?; + let mut tay_factors = taylor_factors.readwrite(); + let tayfacs = tay_factors.as_slice_mut()?; + let mut eff_pl_def_rate = eff_plastic_def_rate.readwrite(); + let eps_rate = eff_pl_def_rate.as_slice_mut()?; + + let lat_eps_in_fiber_iter = eps_rate.chunks_exact_mut(num_steps).zip(in_fiber_hkls.chunks_exact(num_steps * num_elems)); + let ltf_leps_in_fiber_iter = tayfacs.chunks_exact_mut(num_steps).zip(lat_eps_in_fiber_iter); + + for (tay_fac_hkl, (eps_rate_hkl, in_fiber_hkl)) in ltf_leps_in_fiber_iter.into_iter() { + let ev_gdot_iter = evs.chunks_exact(num_elems).zip(gammadots.chunks_exact(num_elems * 12)); + let ev_gdot_in_fiber_iter = in_fiber_hkl.chunks_exact(num_elems).zip(ev_gdot_iter); + for (istep, (in_fiber_hkl_step, (evs_step, gdots_step))) in ev_gdot_in_fiber_iter.enumerate() { + let (_, inv_total_lat_vol) = calc_volume_terms_fiber(evs_step, in_fiber_hkl_step, per_rank_update); + + let (tay_fac_hkl_step, eps_rate_hkl_step) = gdots_step.chunks_exact(12) + .zip(evs_step) + .zip(in_fiber_hkl_step) + .filter(|ev_gdot_in_fiber| *ev_gdot_in_fiber.1) + .map(|((gdot, ev), _)| { + let (tf, deps) = calc_taylor_factor(gdot); + (ev * inv_total_lat_vol * tf, ev * inv_total_lat_vol * deps) + }) + .reduce(|tfdeps, tfdeps_elem| + (tfdeps.0 + tfdeps_elem.0, tfdeps.1 + tfdeps_elem.1) + ).unwrap_or((0.0_f64, 0.0_f64)); + tay_fac_hkl[istep] += tay_fac_hkl_step; + eps_rate_hkl[istep] += eps_rate_hkl_step; + } + } + Ok(()) + } + + #[pyfn(m)] + #[pyo3(name = "calc_directional_stiffness_lattice_fiber")] + fn calc_directional_stiffness_lattice_fiber<'py>( + cauchy_stress: &Bound<'_, PyArray3>, + sample_strain: &Bound<'_, PyArray3>, + directional_stiffness: &Bound<'_, PyArray2>, + elem_vols: &Bound<'_, PyArray2>, + in_fibers: &Bound<'_, PyArray3>, + per_rank_update: bool + ) -> anyhow::Result<()> { + let num_elems: usize; + let num_steps: usize; + + let s_in_fibers = in_fibers.readonly(); + { + let in_fiber_av = s_in_fibers.as_array(); + (_, num_steps, num_elems) = in_fiber_av.dim(); + } + let in_fiber_hkls = s_in_fibers.as_slice()?; + let s_elem_vols = elem_vols.readonly(); + let evs = s_elem_vols.as_slice()?; + + let cauchy = cauchy_stress.readonly(); + let stress = cauchy.as_slice()?; + let sstrain = sample_strain.readonly(); + let strain = sstrain.as_slice()?; + + let mut dir_stiffness = directional_stiffness.readwrite(); + let dir_stiff = dir_stiffness.as_slice_mut()?; + + let lat_ds_in_fiber_iter = dir_stiff.chunks_exact_mut(num_steps).zip(in_fiber_hkls.chunks_exact(num_steps * num_elems)); + + for (dir_stiff_hkl, in_fiber_hkl) in lat_ds_in_fiber_iter.into_iter() { + let stress_strains = stress.chunks_exact(num_elems * 6).zip(strain.chunks_exact(num_elems * 6)); + let ev_sst_iter = evs.chunks_exact(num_elems).zip(stress_strains); + let ev_sst_in_fiber_iter = in_fiber_hkl.chunks_exact(num_elems).zip(ev_sst_iter); + for (istep, (in_fiber_hkl_step, (evs_step, (stress_step, strain_step)))) in ev_sst_in_fiber_iter.enumerate() { + let (_, inv_total_lat_vol) = calc_volume_terms_fiber(evs_step, in_fiber_hkl_step, per_rank_update); + + dir_stiff_hkl[istep] += stress_step.chunks_exact(6) + .zip(strain_step.chunks_exact(6)) + .zip(evs_step) + .zip(in_fiber_hkl_step) + .filter(|ev_sst_in_fiber| *ev_sst_in_fiber.1) + .map(|((sst, ev), _)| { + if f64::abs(sst.1[2]) < f64::EPSILON { + 0.0_f64 + } + else { + (sst.0[2] / sst.1[2]) * ev * inv_total_lat_vol + } + }) + .reduce(|dir_modulus, dir_modulus_elem| + dir_modulus + dir_modulus_elem + ).unwrap_or(0.0_f64); + } + } + + Ok(()) + } + + #[pyfn(m)] + #[pyo3(name = "calc_within_fibers")] + fn calc_within_fibers<'py>( + lattice_orientations: &Bound<'_, PyArray3>, + sample_dir: &Bound<'_, PyArray1>, + hkls: &Bound<'_, PyArray2>, + lattice_param_a: f64, + distance_tolerance_rad: f64, + in_fibers: &Bound<'_, PyArray3>, + ) -> anyhow::Result<()> { + + let num_elems: usize; + let num_steps: usize; + + let mut s_in_fibers = in_fibers.readwrite(); + { + let in_fiber_av = s_in_fibers.as_array(); + (_, num_steps, num_elems) = in_fiber_av.dim(); + } + + let in_fiber_hkls = s_in_fibers.as_slice_mut()?; + + let xtal_o = lattice_orientations.readonly(); + let xtal_orientations = xtal_o.as_slice()?; + + let s_hkls = hkls.readonly(); + let slice_hkls = s_hkls.as_slice()?; + + let s_sample_dir = sample_dir.readonly(); + let s_dir = s_sample_dir.as_slice()?; + + in_fiber_hkls.chunks_exact_mut(num_steps * num_elems) + .zip(slice_hkls.chunks_exact(3)) + .for_each(|(in_fibers, hkl)| { + calculate_in_fibers(lattice_param_a, hkl, s_dir, xtal_orientations, distance_tolerance_rad, in_fibers); + }); + + Ok(()) + } + Ok(()) +} + +#[inline(always)] +fn calc_volume_terms_fiber(evs_step: &[f64], + in_fiber_hkl_step: &[bool], + per_rank_update: bool + ) -> (f64, f64) { + let total_lat_vol = evs_step.into_iter() + .zip(in_fiber_hkl_step) + .filter(|ev_in_fiber| *ev_in_fiber.1) + .map(|(ev, _)| *ev) + .reduce(|ev_total, ev| { + ev_total + ev + }).unwrap_or(0.0_f64); + + let inv_total_lat_vol = + if per_rank_update { + 1.0_f64 + } else { + if total_lat_vol > f64::EPSILON { + 1.0_f64 / total_lat_vol + } else { + 0.0_f64 + } + }; + (total_lat_vol, inv_total_lat_vol) +} + +#[inline(always)] +fn calculate_in_fibers(lparam_a: f64, + hkl: &[f64], + s_dir: &[f64], + quats: &[f64], + distance_tolerance: f64, + in_fiber: &mut [bool] +) { + // Computes reciprocal lattice B but different from HEXRD we return as row matrix as that's the easiest way of doing things + let lat_vec_ops_b = compute_lattice_b_param_cubic(lparam_a); + + // compute crystal direction from planeData + let c_dir = { + let mut tmp_cdir = [0.0_f64; 3]; + mat_t_vec_mult::<3, 3, f64>(&lat_vec_ops_b, &hkl, &mut tmp_cdir); + tmp_cdir + }; + + let symm_quat = symmetric_cubic_quaternions(); + + within_fiber::<24>(&c_dir, s_dir, quats, &symm_quat, distance_tolerance, in_fiber); +} + +#[inline(always)] +fn quat2rmat(quat: &[f64]) -> [[f64; 3]; 3] { + assert!(quat.len() >= 4); + let qbar = quat[0] * quat[0] - (quat[1] * quat[1] + quat[2] * quat[2] + quat[3] * quat[3]); + + let mut rmat = [[0.0_f64; 3]; 3]; + + rmat[0][0] = qbar + 2.0_f64 * quat[1] * quat[1]; + rmat[1][0] = 2.0_f64 * (quat[1] * quat[2] + quat[0] * quat[3]); + rmat[2][0] = 2.0_f64 * (quat[1] * quat[3] - quat[0] * quat[2]); + + rmat[0][1] = 2.0_f64 * (quat[1] * quat[2] - quat[0] * quat[3]); + rmat[1][1] = qbar + 2.0_f64 * quat[2] * quat[2]; + rmat[2][1] = 2.0_f64 * (quat[2] * quat[3] + quat[0] * quat[1]); + + rmat[0][2] = 2.0_f64 * (quat[1] * quat[3] + quat[0] * quat[2]); + rmat[1][2] = 2.0_f64 * (quat[2] * quat[3] - quat[0] * quat[1]); + rmat[2][2] = qbar + 2.0_f64 * quat[3] * quat[3]; + + rmat +} + +#[inline(always)] +fn rotate_strain_to_sample(quat: &[f64], strain_vec: &mut[f64]) { + assert!(quat.len() >= 4); + assert!(strain_vec.len() >= 6); + let rmat = quat2rmat(quat); + let strain = + { + let mut strain = [[0.0_f64; 3]; 3]; + + strain[0][0] = strain_vec[0]; + strain[1][1] = strain_vec[1]; + strain[2][2] = strain_vec[2]; + strain[1][2] = strain_vec[3]; strain[2][1] = strain[1][2]; + strain[0][2] = strain_vec[4]; strain[2][0] = strain[0][2]; + strain[0][1] = strain_vec[5]; strain[1][0] = strain[0][1]; + + strain + }; + + { + let mut strain_samp = [[0.0_f64; 3]; 3]; + rotate_matrix::<3, false, f64>(&rmat, &strain, &mut strain_samp); + + strain_vec[0] = strain_samp[0][0]; + strain_vec[1] = strain_samp[1][1]; + strain_vec[2] = strain_samp[2][2]; + strain_vec[3] = strain_samp[1][2]; + strain_vec[4] = strain_samp[0][2]; + strain_vec[5] = strain_samp[0][1]; + } +} + +#[inline(always)] +fn calc_taylor_factor(gdots: &[f64]) -> (f64, f64) { + assert!(gdots.len() >= 12); + let symm_schmid = calculate_fcc_symm_schmid_tensor(); + let mut plastic_def_rate_vec = [0.0_f64; 6]; + mat_t_vec_mult::<12, 6, f64>(&symm_schmid, gdots, &mut plastic_def_rate_vec); + + let eff_plastic_def_rate = { + let norm_vec = norm::<6, f64>(&plastic_def_rate_vec); + norm_vec * f64::sqrt(2.0_f64 / 3.0_f64) + }; + + if eff_plastic_def_rate <= f64::EPSILON { + return (0.0_f64, 0.0) + } + + let abs_sum_shear_rate = { + let mut sum = 0.0_f64; + for gdot in gdots.iter() { + sum += f64::abs(*gdot); + } + sum + }; + + (abs_sum_shear_rate / eff_plastic_def_rate, eff_plastic_def_rate) +} + +#[inline(always)] +fn calculate_fcc_symm_schmid_tensor() -> [[f64; 6]; 12] { + let two = 2.0_f64; + let three = 3.0_f64; + let sqrt_3i = 1.0_f64 / f64::sqrt(three); + let sqrt_2i = 1.0 / f64::sqrt(two); + + let slip_direction = [ + [sqrt_3i, sqrt_3i, sqrt_3i], + [sqrt_3i, sqrt_3i, sqrt_3i], + [sqrt_3i, sqrt_3i, sqrt_3i], + [-sqrt_3i, sqrt_3i, sqrt_3i], + [-sqrt_3i, sqrt_3i, sqrt_3i], + [-sqrt_3i, sqrt_3i, sqrt_3i], + [-sqrt_3i, -sqrt_3i, sqrt_3i], + [-sqrt_3i, -sqrt_3i, sqrt_3i], + [-sqrt_3i, -sqrt_3i, sqrt_3i], + [sqrt_3i, -sqrt_3i, sqrt_3i], + [sqrt_3i, -sqrt_3i, sqrt_3i], + [sqrt_3i, -sqrt_3i, sqrt_3i], + ]; + + let slip_plane_normal = [ + [0.0_f64, sqrt_2i, -sqrt_2i], + [-sqrt_2i, 0.0_f64, sqrt_2i], + [sqrt_2i, -sqrt_2i, 0.0_f64], + [-sqrt_2i, 0.0_f64, -sqrt_2i], + [0.0_f64, -sqrt_2i, sqrt_2i], + [sqrt_2i, sqrt_2i, 0.0_f64], + [0.0_f64, -sqrt_2i, -sqrt_2i], + [sqrt_2i, 0.0_f64, sqrt_2i], + [-sqrt_2i, sqrt_2i, 0.0_f64], + [sqrt_2i, 0.0_f64, -sqrt_2i], + [0.0_f64, sqrt_2i, sqrt_2i], + [-sqrt_2i, -sqrt_2i, 0.0_f64], + ]; + + let mut symm_schmid = [[0.0_f64; 6]; 12]; + let mut skw_schmid = [[0.0_f64; 3]; 12]; + + calculate_schmid_tensor::<12>( + &slip_plane_normal, + &slip_direction, + &mut symm_schmid, + &mut skw_schmid, + ); + + symm_schmid +} + +fn calculate_schmid_tensor( + slip_plane_normal: &[[f64; 3]], + slip_direction: &[[f64; 3]], + symm_schmid: &mut [[f64; 6]], + skw_schmid: &mut [[f64; 3]], +) +{ + assert!(slip_plane_normal.len() >= NSLIP); + assert!(slip_direction.len() >= NSLIP); + assert!(symm_schmid.len() >= NSLIP); + assert!(skw_schmid.len() >= NSLIP); + + let one_half = 0.5_f64; + let sqrt_2i = 1.0 / f64::sqrt(2.0_f64); + + let mut schmid = [[0.0_f64; 3]; 3]; + + for islip in 0..NSLIP { + outer_prod::<3, 3, f64>( + &slip_direction[islip], + &slip_plane_normal[islip], + &mut schmid, + ); + + // Replace with an inline set of functions + // skew(schmid)[2, 1] = 1/2 * (schmid[2, 1] - schmid[1, 2]) + skw_schmid[islip][0] = one_half * (schmid[2][1] - schmid[1][2]); + // skew(schmid)[0, 2] = 1/2 * (schmid[0, 2] - schmid[2, 0]) + skw_schmid[islip][1] = one_half * (schmid[0][2] - schmid[2][0]); + // skew(schmid)[1, 0] = 1/2 * (schmid[1, 0] - schmid[0, 1]) + skw_schmid[islip][2] = one_half * (schmid[1][0] - schmid[0][1]); + + // Replace with an inline set of functions + // sym(schmid)[0, 0] = 1/2 * (schmid[0, 0] + schmid[0, 0]) + symm_schmid[islip][0] = schmid[0][0]; + // sym(schmid)[1, 1] = 1/2 * (schmid[1, 1] + schmid[1, 1]) + symm_schmid[islip][1] = schmid[1][1]; + // skew(schmid)[2, 2] = 1/2 * (schmid[2, 2] + schmid[2, 2]) + symm_schmid[islip][2] = schmid[2][2]; + // sym(schmid)[1, 2] = 1/2 * (schmid[1, 2] + schmid[2, 1]) + // For consistent dot products replace with sqrt(2)/2 = 1/sqrt(2) + symm_schmid[islip][3] = sqrt_2i * (schmid[2][1] + schmid[1][2]); + // sym(schmid)[0, 2] = 1/2 * (schmid[0, 2] + schmid[2, 0]) + symm_schmid[islip][4] = sqrt_2i * (schmid[0][2] + schmid[2][0]); + // sym(schmid)[0, 1] = 1/2 * (schmid[0, 1] + schmid[1, 0]) + symm_schmid[islip][5] = sqrt_2i * (schmid[1][0] + schmid[0][1]); + } +} + +/// Computes reciprocal lattice B but different from HEXRD we return as row matrix as that's the easiest way of doing things +#[inline(always)] +fn compute_lattice_b_param_cubic(lparam_a: f64) -> [[f64; 3]; 3] { + let deg90 = std::f64::consts::PI / 2.0_f64; + let cellparms = [lparam_a, lparam_a, lparam_a, deg90, deg90, deg90]; + + let alfa = cellparms[3]; + let beta = cellparms[4]; + let gamma = cellparms[5]; + + let cosalfar = (f64::cos(beta) * f64::cos(gamma) - f64::cos(alfa)) / (f64::sin(beta) * f64::sin(gamma)); + let sinalfar = f64::sqrt(1.0_f64 - cosalfar * cosalfar); + + let a = [cellparms[0], 0.0_f64, 0.0_f64]; + let b = [cellparms[1] * f64::cos(gamma), cellparms[1] * f64::sin(gamma), 0.0_f64]; + let c = [cellparms[2] * f64::cos(beta), -cellparms[2] * cosalfar * f64::sin(beta), cellparms[2] * sinalfar * f64::sin(beta)]; + + // Cell volume + let inv_vol = { + let v_temp = cross_prod(&b, &c); + 1.0 / dot_prod::<3, f64>(&a, &v_temp) + }; + + // Reciprocal lattice vectors + let cross_prod_inv_v = |vec1: &[f64], vec2: &[f64], inv_vol: f64| -> [f64; 3] + { + let mut tmp = cross_prod(vec1, vec2); + tmp[0] *= inv_vol; + tmp[1] *= inv_vol; + tmp[2] *= inv_vol; + tmp + }; + + let astar = cross_prod_inv_v(&b, &c, inv_vol); + let bstar = cross_prod_inv_v(&c, &a, inv_vol); + let cstar = cross_prod_inv_v(&a, &b, inv_vol); + + // B takes components in the reciprocal lattice to X + [astar, bstar, cstar] +} + +#[inline(always)] +fn symmetric_cubic_quaternions() -> [[f64; 4]; 24] { + let angle_axis_symm = [ + [0.0_f64, 1.0_f64, 0.0_f64, 0.0_f64], // identity + [std::f64::consts::FRAC_PI_2, 1.0_f64, 0.0_f64, 0.0_f64], // fourfold about 1 0 0 (x1) + [std::f64::consts::PI, 1.0_f64, 0.0_f64, 0.0_f64], // + [std::f64::consts::FRAC_PI_2 * 3.0_f64, 1.0_f64, 0.0_f64, 0.0_f64], // + [std::f64::consts::FRAC_PI_2, 0.0_f64, 1.0_f64, 0.0_f64], // fourfold about 0 1 0 (x2) + [std::f64::consts::PI, 0.0_f64, 1.0_f64, 0.0_f64], // + [std::f64::consts::FRAC_PI_2 * 3.0_f64, 0.0_f64, 1.0_f64, 0.0_f64], // + [std::f64::consts::FRAC_PI_2, 0.0_f64, 0.0_f64, 1.0_f64], // fourfold about 0 0 1 (x3) + [std::f64::consts::PI, 0.0_f64, 0.0_f64, 1.0_f64], // + [std::f64::consts::FRAC_PI_2 * 3.0_f64, 0.0_f64, 0.0_f64, 1.0_f64], // + [std::f64::consts::FRAC_PI_3 * 2.0_f64, 1.0_f64, 1.0_f64, 1.0_f64], // threefold about 1 1 1 + [std::f64::consts::FRAC_PI_3 * 4.0_f64, 1.0_f64, 1.0_f64, 1.0_f64], // + [std::f64::consts::FRAC_PI_3 * 2.0_f64, -1.0_f64, 1.0_f64, 1.0_f64], // threefold about -1 1 1 + [std::f64::consts::FRAC_PI_3 * 4.0_f64, -1.0_f64, 1.0_f64, 1.0_f64], // + [std::f64::consts::FRAC_PI_3 * 2.0_f64, -1.0_f64, -1.0_f64, 1.0_f64], // threefold about -1 -1 1 + [std::f64::consts::FRAC_PI_3 * 4.0_f64, -1.0_f64, -1.0_f64, 1.0_f64], // + [std::f64::consts::FRAC_PI_3 * 2.0_f64, 1.0_f64, -1.0_f64, 1.0_f64], // threefold about 1 -1 1 + [std::f64::consts::FRAC_PI_3 * 4.0_f64, 1.0_f64, -1.0_f64, 1.0_f64], // + [std::f64::consts::PI, 1.0_f64, 1.0_f64, 0.0_f64], // twofold about 1 1 0 + [std::f64::consts::PI, -1.0_f64, 1.0_f64, 0.0_f64], // twofold about -1 1 0 + [std::f64::consts::PI, 1.0_f64, 0.0_f64, 1.0_f64], // twofold about 1 0 1 + [std::f64::consts::PI, 0.0_f64, 1.0_f64, 1.0_f64], // twofold about 0 1 1 + [std::f64::consts::PI, -1.0_f64, 0.0_f64, 1.0_f64], // twofold about -1 0 1 + [std::f64::consts::PI, 0.0_f64, -1.0_f64, 1.0_f64], // twofold about 0 -1 1 + ]; + + let inv2 = 1.0_f64 / 2.0_f64; + let mut quat_symm = [[0.0_f64; 4]; 24]; + quat_symm.iter_mut() + .zip(angle_axis_symm) + .for_each(|(quat, ang_axis)| { + let s = f64::sin(inv2 * ang_axis[0]); + quat[0] = f64::cos(inv2 * ang_axis[0]); + let mut inv_norm_axis = 1.0 / norm::<3, f64>(&ang_axis[1..4]); + quat[1] = s * ang_axis[1] * inv_norm_axis; + quat[2] = s * ang_axis[2] * inv_norm_axis; + quat[3] = s * ang_axis[3] * inv_norm_axis; + + inv_norm_axis = 1.0_f64; + if quat[0] < 0.0 { + inv_norm_axis *= -1.0_f64; + } + + quat[0] *= inv_norm_axis; + quat[1] *= inv_norm_axis; + quat[2] *= inv_norm_axis; + quat[3] *= inv_norm_axis; + }); + + quat_symm +} + +/// Returns all that aren't +// #[inline(always)] +// fn find_unique_tolerance(rmat_fr_qsym_c_dir: &[[f64; 3]], tolerance: f64) -> Vec<[f64; 3]> { +// rmat_fr_qsym_c_dir.to_vec() +// } + +#[inline(always)] +fn within_fiber(c_dir: &[f64], + s_dir: &[f64], + quats: &[f64], + symm_quat: &[[f64; 4]], + distance_tolerance: f64, + in_fibers: &mut [bool]) { + + assert!(c_dir.len() >= 3); + assert!(s_dir.len() >= 3); + assert!(quats.len() >= 4); + assert!(symm_quat.len() >= SYM_LEN); + + assert!(in_fibers.len() == (quats.len() / 4)); + + let c = { + let inv_c_norm = 1.0 / norm::<3, f64>(c_dir); + [c_dir[0] * inv_c_norm, c_dir[1] * inv_c_norm, c_dir[2] * inv_c_norm] + }; + + let s = { + let inv_s_norm = 1.0 / norm::<3, f64>(s_dir); + [s_dir[0] * inv_s_norm, s_dir[1] * inv_s_norm, s_dir[2] * inv_s_norm] + }; + + // Could maybe move this over to a vec if we want this to be easily generic over a ton of symmetry conditions... + let mut rmat_fr_qsym_c_dir = [[0.0_f64; 3]; SYM_LEN]; + rmat_fr_qsym_c_dir.iter_mut() + .zip(symm_quat) + .for_each(|(prod, quat)| { + let rmat = quat2rmat(quat); + // Might need to make this the transpose... + mat_t_vec_mult::<3, 3, f64>(&rmat, &c, prod); + }); + + // If we really wanted to we could lower try and calculate the elements + // that aren't unique here but that's not worth the effort at all given + // how fast things are + // let c_syms: Vec<[f64; 3]> = find_unique_tolerance::(&rmat_fr_qsym_c_dir, f64::sqrt(f64::EPSILON)); + + let s_rmat_csym_prod: Vec = quats.chunks_exact(4).map(|quat| { + let sine = rmat_fr_qsym_c_dir.iter().map(|c_sym| { + let rmat = quat2rmat(quat); + let mut prod = [0.0_f64; 3]; + // Might need to make this the transpose... + mat_vec_mult::<3, 3, f64>(&rmat, c_sym, &mut prod); + dot_prod::<3, f64>(&s, &prod) + }) + .fold(std::f64::MIN, |a, b| a.max(b)); + sine + }).collect(); + + in_fibers.iter_mut() + .zip(s_rmat_csym_prod) + .for_each(|(in_fiber, sine)| { + let sine_safe = + { + if f64::abs(sine) > 1.00000001 { + sine.signum() + } + else { + sine + } + }; + + let distance = f64::acos(sine_safe); + *in_fiber = distance <= distance_tolerance; + }) +} \ No newline at end of file diff --git a/scripts/postprocessing/xtal_light_up/xtal_light_up/__init__.py b/scripts/postprocessing/xtal_light_up/xtal_light_up/__init__.py new file mode 100644 index 0000000..10dc208 --- /dev/null +++ b/scripts/postprocessing/xtal_light_up/xtal_light_up/__init__.py @@ -0,0 +1,10 @@ +# # import the contents of the Rust library into the Python extension +# from xtal_light_up import * + +# # optional: include the documentation from the Rust module +# from xtal_light_up import __doc__ # noqa: F401 + + +# class PythonClass: +# def __init__(self, value: int) -> None: +# self.value = value \ No newline at end of file diff --git a/src/BCManager.cpp b/src/BCManager.cpp index 8d7958e..da11702 100644 --- a/src/BCManager.cpp +++ b/src/BCManager.cpp @@ -95,9 +95,10 @@ void BCManager::updateBCData(mfem::Array & ess_bdr, mfem::Array2D & BCData& bc = this->GetBCInstance(i + 1); BCData::getComponents(bc.compID, cmp_row); - component(i, 0) = cmp_row[0]; - component(i, 1) = cmp_row[1]; - component(i, 2) = cmp_row[2]; + const int bcID = ess_id[i] - 1; + component(bcID, 0) = cmp_row[0]; + component(bcID, 1) = cmp_row[1]; + component(bcID, 2) = cmp_row[2]; } } } @@ -132,11 +133,12 @@ void BCManager::updateBCData(mfem::Array & ess_bdr, mfem::Vector & vgrad, m for (std::uint32_t i = 0; i < ess_id.size(); ++i) { // set the active boundary attributes if (ess_comp[i] != 0) { - ess_bdr[ess_id[i] - 1] = 1; + const int bcID = ess_id[i] - 1; + ess_bdr[bcID] = 1; BCData::getComponents(ess_comp[i], cmp_row); - component(i, 0) = cmp_row[0]; - component(i, 1) = cmp_row[1]; - component(i, 2) = cmp_row[2]; + component(bcID, 0) = cmp_row[0]; + component(bcID, 1) = cmp_row[1]; + component(bcID, 2) = cmp_row[2]; } } } diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e2217a6..cf29b7a 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,6 +9,7 @@ set(EXACONSTIT_HEADERS mechanics_integrators.hpp mechanics_ecmech.hpp mechanics_kernels.hpp + mechanics_lightup.hpp mechanics_log.hpp mechanics_umat.hpp mechanics_operator_ext.hpp @@ -50,25 +51,37 @@ endif() set(EXACONSTIT_DEPENDS) exaconstit_fill_depends_list(LIST_NAME EXACONSTIT_DEPENDS - DEPENDS_ON mfem ecmech raja mpi snls) - -if(ENABLE_OPENMP) - list(APPEND EXACONSTIT_DEPENDS openmp) + DEPENDS_ON mfem ecmech snls RAJA camp mpi) + +if (${BLT_VERSION} VERSION_GREATER_EQUAL 0.6.0) + if(ENABLE_CUDA) + list(APPEND EXACONSTIT_DEPENDS blt::cuda_runtime blt::cuda) + endif() + if(ENABLE_OPENMP) + list(APPEND EXACONSTIT_DEPENDS blt::openmp) + endif() +else() + if(ENABLE_CUDA) + list(APPEND EXACONSTIT_DEPENDS cuda cuda_runtime) + endif() + if(ENABLE_OPENMP) + list(APPEND EXACONSTIT_DEPENDS openmp) + endif() endif() -if(ENABLE_CUDA) - list(APPEND EXACONSTIT_DEPENDS cuda CUDA::cublas CUDA::cusparse) +if(ENABLE_HIP) + list(APPEND EXACONSTIT_DEPENDS blt::hip blt::hip_runtime) endif() -if(ENABLE_HIP) - list(APPEND EXACONSTIT_DEPENDS blt::hip blt::hip_runtime roc::rocsparse roc::rocrand) +if (SNLS_USE_RAJA_PORT_SUITE) + list(APPEND EXACONSTIT_DEPENDS chai umpire fmt::fmt) endif() if(ENABLE_CALIPER) list(APPEND EXACONSTIT_DEPENDS caliper) endif() -#include_directories(BEFORE ${PROJECT_BINARY_DIR}) +message("-- EXACONSTIT_DEPENDS: ${EXACONSTIT_DEPENDS}") #------------------------------------------------------------------------------ # Defines @@ -109,16 +122,24 @@ blt_add_library(NAME exaconstit_static set(EXACONSTIT_DRIVER) -if(ENABLE_OPENMP) - list(APPEND EXACONSTIT_DRIVER openmp) -endif() - -if(ENABLE_CUDA) - list(APPEND EXACONSTIT_DRIVER cuda) +if (${BLT_VERSION} VERSION_GREATER_EQUAL 0.6.0) + if(ENABLE_CUDA) + list(APPEND EXACONSTIT_DRIVER blt::cuda_runtime blt::cuda CUDA::cublas) + endif() + if(ENABLE_OPENMP) + list(APPEND EXACONSTIT_DRIVER blt::openmp) + endif() +else() + if(ENABLE_CUDA) + list(APPEND EXACONSTIT_DRIVER cuda cuda_runtime CUDA::cublas) + endif() + if(ENABLE_OPENMP) + list(APPEND EXACONSTIT_DRIVER openmp) + endif() endif() if(ENABLE_HIP) - list(APPEND EXACONSTIT_DRIVER blt::hip blt::hip_runtime) + list(APPEND EXACONSTIT_DRIVER blt::hip blt::hip_runtime hipblas rocsparse rocrand) endif() blt_add_executable(NAME mechanics diff --git a/src/mechanics_driver.cpp b/src/mechanics_driver.cpp index 47e06f8..180769e 100644 --- a/src/mechanics_driver.cpp +++ b/src/mechanics_driver.cpp @@ -119,6 +119,9 @@ int main(int argc, char *argv[]) MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &num_procs); MPI_Comm_rank(MPI_COMM_WORLD, &myid); +#if (MFEM_HYPRE_VERSION >= 21900) + Hypre::Init(); +#endif // Used to scope the main program away from the main MPI Init and Finalize calls { // Here we start a timer to time everything @@ -313,7 +316,7 @@ int main(int argc, char *argv[]) for (int lev = 0; lev < toml_opt.par_ref_levels; lev++) { pmesh->UniformRefinement(); } - + pmesh->SetAttributes(); } // Mesh related calls // Called only once { @@ -348,13 +351,8 @@ int main(int argc, char *argv[]) ParFiniteElementSpace l2_fes_cen(pmesh, &l2_fec, dim, mfem::Ordering::byVDIM); ParFiniteElementSpace l2_fes_voigt(pmesh, &l2_fec, 6, mfem::Ordering::byVDIM); ParFiniteElementSpace l2_fes_tens(pmesh, &l2_fec, 9, mfem::Ordering::byVDIM); - int gdot_size = 1; - if(toml_opt.xtal_type == XtalType::FCC || toml_opt.xtal_type == XtalType::BCC) { - gdot_size = 12; - } else if (toml_opt.xtal_type == XtalType::HCP) { - gdot_size = 24; - } - ParFiniteElementSpace l2_fes_gdots(pmesh, &l2_fec, gdot_size, mfem::Ordering::byVDIM); + ParFiniteElementSpace l2_fes_hard(pmesh, &l2_fec, toml_opt.hard_size, mfem::Ordering::byVDIM); + ParFiniteElementSpace l2_fes_gdots(pmesh, &l2_fec, toml_opt.gdot_size, mfem::Ordering::byVDIM); ParGridFunction vonMises(&l2_fes); vonMises = 0.0; @@ -374,19 +372,13 @@ int main(int argc, char *argv[]) } #endif - ParGridFunction dpeff(&l2_fes); - ParGridFunction pleff(&l2_fes); - ParGridFunction hardness(&l2_fes); + ParGridFunction dpeff(&l2_fes_pl); + ParGridFunction pleff(&l2_fes_pl); + ParGridFunction hardness(&l2_fes_hard); ParGridFunction quats(&l2_fes_ori); - ParGridFunction gdots(&l2_fes); + ParGridFunction gdots(&l2_fes_gdots); if (toml_opt.mech_type == MechType::EXACMECH) { - dpeff.SetSpace(&l2_fes_pl); - pleff.SetSpace(&l2_fes_pl); - // Right now this is only a scalar value but that might change later... - hardness.SetSpace(&l2_fes_pl); - quats.SetSpace(&l2_fes_ori); - gdots.SetSpace(&l2_fes_gdots); if (toml_opt.light_up) { elem_centroid = new ParGridFunction(&l2_fes_cen); elastic_strain = new ParGridFunction(&l2_fes_voigt); diff --git a/src/mechanics_ecmech.cpp b/src/mechanics_ecmech.cpp index 4961df8..7d2e5e9 100644 --- a/src/mechanics_ecmech.cpp +++ b/src/mechanics_ecmech.cpp @@ -1,7 +1,8 @@ #include "mfem.hpp" #include "mfem/general/forall.hpp" #include "ECMech_cases.h" -#include "ECMech_evptnWrap.h" +#include "ECMech_const.h" + #include "mechanics_model.hpp" #include "mechanics_log.hpp" #include "mechanics_ecmech.hpp" @@ -13,8 +14,6 @@ #include "mechanics_kernels.hpp" using namespace mfem; -using namespace std; -using namespace ecmech; namespace { @@ -187,6 +186,199 @@ void kernel(const ecmech::matModelBase* mat_model_base, } // End private namespace + +ExaCMechModel::ExaCMechModel( + mfem::QuadratureFunction *_q_stress0, mfem::QuadratureFunction *_q_stress1, + mfem::QuadratureFunction *_q_matGrad, mfem::QuadratureFunction *_q_matVars0, + mfem::QuadratureFunction *_q_matVars1, + mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords, + mfem::Vector *_props, int _nProps, int _nStateVars, double _temp_k, + ecmech::ExecutionStrategy _accel, Assembly _assembly, std::string mat_model_name + ) : + ExaModel(_q_stress0, _q_stress1, _q_matGrad, _q_matVars0, _q_matVars1, + _beg_coords, _end_coords, _props, _nProps, _nStateVars, _assembly), + temp_k(_temp_k), accel(_accel) +{ + setup_data_structures(); + setup_model(mat_model_name); +} + +void ExaCMechModel::setup_data_structures() { + // First find the total number of points that we're dealing with so nelems * nqpts + const int vdim = stress0->GetVDim(); + const int size = stress0->Size(); + const int npts = size / vdim; + // Now initialize all of the vectors that we'll be using with our class + vel_grad_array = new mfem::Vector(npts * ecmech::ndim * ecmech::ndim, mfem::Device::GetMemoryType()); + eng_int_array = new mfem::Vector(npts * ecmech::ne, mfem::Device::GetMemoryType()); + w_vec_array = new mfem::Vector(npts * ecmech::nwvec, mfem::Device::GetMemoryType()); + vol_ratio_array = new mfem::Vector(npts * ecmech::nvr, mfem::Device::GetMemoryType()); + stress_svec_p_array = new mfem::Vector(npts * ecmech::nsvp, mfem::Device::GetMemoryType()); + d_svec_p_array = new mfem::Vector(npts * ecmech::nsvp, mfem::Device::GetMemoryType()); + tempk_array = new mfem::Vector(npts, mfem::Device::GetMemoryType()); + sdd_array = new mfem::Vector(npts * ecmech::nsdd, mfem::Device::GetMemoryType()); + eff_def_rate = new mfem::Vector(npts, mfem::Device::GetMemoryType()); + // If we're using a Device we'll want all of these vectors on it and staying there. + // Also, note that UseDevice() only returns a boolean saying if it's on the device or not + // rather than telling the vector whether or not it needs to lie on the device. + vel_grad_array->UseDevice(true); *vel_grad_array = 0.0; + eng_int_array->UseDevice(true); *eng_int_array = 0.0; + w_vec_array->UseDevice(true); *w_vec_array = 0.0; + vol_ratio_array->UseDevice(true); *vol_ratio_array = 0.0; + stress_svec_p_array->UseDevice(true); *stress_svec_p_array = 0.0; + d_svec_p_array->UseDevice(true); *d_svec_p_array = 0.0; + tempk_array->UseDevice(true); *tempk_array = 0.0; + sdd_array->UseDevice(true); *sdd_array = 0.0; + eff_def_rate->UseDevice(true); *eff_def_rate = 0.0; +} + +void ExaCMechModel::setup_model(std::string mat_model_name) { + // First aspect is setting up our various map structures + index_map = ecmech::modelParamIndexMap(mat_model_name); + // additional terms we need to add + index_map["num_volumes"] = 1; + index_map["index_volume"] = index_map["index_slip_rates"] + index_map["num_slip_system"]; + index_map["num_internal_energy"] = ecmech::ne; + index_map["index_internal_energy"] = index_map["index_volume"] + index_map["num_volumes"]; + + { + std::string s_shrateEff = "shrateEff"; + std::string s_shrEff = "shrEff"; + std::string s_pl_work = "pl_work"; + std::string s_quats = "quats"; + std::string s_gdot = "gdot"; + std::string s_hard = "hardness"; + std::string s_ieng = "int_eng"; + std::string s_rvol = "rel_vol"; + std::string s_est = "elas_strain"; + + std::pair i_sre = std::make_pair(index_map["index_effective_shear_rate"], 1); + std::pair i_se = std::make_pair(index_map["index_effective_shear"], 1); + std::pair i_plw = std::make_pair(index_map["index_flow_strength"], 1); + std::pair i_q = std::make_pair(index_map["index_lattice_ori"], 4); + std::pair i_g = std::make_pair(index_map["index_slip_rates"], index_map["num_slip_system"]); + std::pair i_h = std::make_pair(index_map["index_hardness"], index_map["num_hardening"]); + std::pair i_en = std::make_pair(index_map["index_internal_energy"], ecmech::ne); + std::pair i_rv = std::make_pair(index_map["index_volume"], 1); + std::pair i_est = std::make_pair(index_map["index_dev_elas_strain"], ecmech::ntvec); + + qf_mapping[s_shrateEff] = i_sre; + qf_mapping[s_shrEff] = i_se; + qf_mapping[s_pl_work] = i_plw; + qf_mapping[s_quats] = i_q; + qf_mapping[s_gdot] = i_g; + qf_mapping[s_hard] = i_h; + qf_mapping[s_ieng] = i_en; + qf_mapping[s_rvol] = i_rv; + qf_mapping[s_est] = i_est; + } + + // Now we can create our model + mat_model_base = ecmech::makeMatModel(mat_model_name); + // and update our model strides from the default values + size_t num_state_vars = index_map["num_hist"] + ecmech::ne + 1; + std::vector strides; + // Deformation rate stride + strides.push_back(ecmech::nsvp); + // Spin rate stride + strides.push_back(ecmech::ndim); + // Volume ratio stride + strides.push_back(ecmech::nvr); + // Internal energy stride + strides.push_back(ecmech::ne); + // Stress vector stride + strides.push_back(ecmech::nsvp); + // History variable stride + strides.push_back(num_state_vars); + // Temperature stride + strides.push_back(1); + // SDD stride + strides.push_back(ecmech::nsdd); + // Update our stride values from the default as our history strides are different + mat_model_base->updateStrides(strides); + + // Now get out the parameters to instantiate our history variables + // Opts and strs are just empty vectors of int and strings + std::vector params; + std::vector opts; + std::vector strs; + + for (int i = 0; i < matProps->Size(); i++) { + params.push_back(matProps->Elem(i)); + } + + // We really shouldn't see this change over time at least for our applications. + mat_model_base->initFromParams(opts, params, strs); + mat_model_base->complete(); + mat_model_base->setExecutionStrategy(accel); + + std::vector histInit; + { + std::vector names; + std::vector plot; + std::vector state; + mat_model_base->getHistInfo(names, histInit, plot, state); + } + + init_state_vars(histInit); +} + +void ExaCMechModel::init_state_vars(std::vector hist_init) +{ + mfem::Vector histInit(index_map["num_hist"], mfem::Device::GetMemoryType()); + histInit.UseDevice(true); histInit.HostReadWrite(); + assert(hist_init.size() == index_map["num_hist"]); + + for (uint i = 0; i < hist_init.size(); i++) { + histInit(i) = hist_init.at(i); + } + + const double* histInit_vec = histInit.Read(); + double* state_vars = matVars0->ReadWrite(); + + const size_t qf_size = (matVars0->Size()) / (matVars0->GetVDim()); + + const size_t vdim = matVars0->GetVDim(); + + const size_t ind_dp_eff = index_map["index_effective_shear_rate"]; + const size_t ind_eql_pl_strain = index_map["index_effective_shear"]; + const size_t ind_pl_work = index_map["index_flow_strength"]; + const size_t ind_num_evals = index_map["index_num_func_evals"]; + const size_t ind_hardness = index_map["index_hardness"]; + const size_t ind_vols = index_map["index_volume"]; + const size_t ind_int_eng = index_map["index_internal_energy"]; + const size_t ind_dev_elas_strain = index_map["index_dev_elas_strain"]; + const size_t ind_gdot = index_map["index_slip_rates"]; + const size_t num_slip = index_map["num_slip_system"]; + const size_t num_hardness = index_map["num_hardening"]; + + mfem::MFEM_FORALL(i, qf_size, { + const size_t ind = i * vdim; + + state_vars[ind + ind_dp_eff] = histInit_vec[ind_dp_eff]; + state_vars[ind + ind_eql_pl_strain] = histInit_vec[ind_eql_pl_strain]; + state_vars[ind + ind_pl_work] = histInit_vec[ind_pl_work]; + state_vars[ind + ind_num_evals] = histInit_vec[ind_num_evals]; + state_vars[ind + ind_vols] = 1.0; + + for (size_t j = 0; j < num_hardness; j++) { + state_vars[ind + ind_hardness + j] = histInit_vec[ind_hardness + j]; + } + + for (size_t j = 0; j < ecmech::ne; j++) { + state_vars[ind + ind_int_eng + j] = 0.0; + } + + for (size_t j = 0; j < ecmech::ntvec; j++) { + state_vars[ind + ind_dev_elas_strain + j] = histInit_vec[ind_dev_elas_strain + j]; + } + + for (size_t j = 0; j < num_slip; j++) { + state_vars[ind + ind_gdot + j] = histInit_vec[ind_gdot + j]; + } + }); +} + // Our model set-up makes use of several preprocessing kernels, // the actual material model kernel, and finally a post-processing kernel. void ExaCMechModel::ModelSetup(const int nqpts, const int nelems, const int /*space_dim*/, diff --git a/src/mechanics_ecmech.hpp b/src/mechanics_ecmech.hpp index 897b3aa..8ffe24e 100644 --- a/src/mechanics_ecmech.hpp +++ b/src/mechanics_ecmech.hpp @@ -1,11 +1,8 @@ -#ifndef MECHANICS_ECMECH -#define MECHANICS_ECMECH +#pragma once #include "mfem.hpp" -#include "mfem/general/forall.hpp" -#include "ECMech_cases.h" -#include "ECMech_evptnWrap.h" #include "ECMech_const.h" +#include "ECMech_matModelBase.h" #include "mechanics_model.hpp" /// Base class for all of our ExaCMechModels. @@ -37,46 +34,17 @@ class ExaCMechModel : public ExaModel mfem::Vector *sdd_array; mfem::Vector *eff_def_rate; + std::map index_map; + public: ExaCMechModel(mfem::QuadratureFunction *_q_stress0, mfem::QuadratureFunction *_q_stress1, mfem::QuadratureFunction *_q_matGrad, mfem::QuadratureFunction *_q_matVars0, mfem::QuadratureFunction *_q_matVars1, mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords, mfem::Vector *_props, int _nProps, int _nStateVars, double _temp_k, - ecmech::ExecutionStrategy _accel, Assembly _assembly) : - ExaModel(_q_stress0, _q_stress1, _q_matGrad, _q_matVars0, _q_matVars1, - _beg_coords, _end_coords, _props, _nProps, _nStateVars, _assembly), - temp_k(_temp_k), accel(_accel) - { - // First find the total number of points that we're dealing with so nelems * nqpts - const int vdim = _q_stress0->GetVDim(); - const int size = _q_stress0->Size(); - const int npts = size / vdim; - // Now initialize all of the vectors that we'll be using with our class - vel_grad_array = new mfem::Vector(npts * ecmech::ndim * ecmech::ndim, mfem::Device::GetMemoryType()); - eng_int_array = new mfem::Vector(npts * ecmech::ne, mfem::Device::GetMemoryType()); - w_vec_array = new mfem::Vector(npts * ecmech::nwvec, mfem::Device::GetMemoryType()); - vol_ratio_array = new mfem::Vector(npts * ecmech::nvr, mfem::Device::GetMemoryType()); - stress_svec_p_array = new mfem::Vector(npts * ecmech::nsvp, mfem::Device::GetMemoryType()); - d_svec_p_array = new mfem::Vector(npts * ecmech::nsvp, mfem::Device::GetMemoryType()); - tempk_array = new mfem::Vector(npts, mfem::Device::GetMemoryType()); - sdd_array = new mfem::Vector(npts * ecmech::nsdd, mfem::Device::GetMemoryType()); - eff_def_rate = new mfem::Vector(npts, mfem::Device::GetMemoryType()); - // If we're using a Device we'll want all of these vectors on it and staying there. - // Also, note that UseDevice() only returns a boolean saying if it's on the device or not - // rather than telling the vector whether or not it needs to lie on the device. - vel_grad_array->UseDevice(true); *vel_grad_array = 0.0; - eng_int_array->UseDevice(true); *eng_int_array = 0.0; - w_vec_array->UseDevice(true); *w_vec_array = 0.0; - vol_ratio_array->UseDevice(true); *vol_ratio_array = 0.0; - stress_svec_p_array->UseDevice(true); *stress_svec_p_array = 0.0; - d_svec_p_array->UseDevice(true); *d_svec_p_array = 0.0; - tempk_array->UseDevice(true); *tempk_array = 0.0; - sdd_array->UseDevice(true); *sdd_array = 0.0; - eff_def_rate->UseDevice(true); *eff_def_rate = 0.0; - } + ecmech::ExecutionStrategy _accel, Assembly _assembly, std::string mat_model_name); - virtual ~ExaCMechModel() + ~ExaCMechModel() { delete vel_grad_array; delete eng_int_array; @@ -87,379 +55,26 @@ class ExaCMechModel : public ExaModel delete tempk_array; delete sdd_array; delete eff_def_rate; + delete mat_model_base; } + void setup_data_structures(); + void setup_model(std::string mat_model_name); + void init_state_vars(std::vector hist_init); + /** This model takes in the velocity, det(jacobian), and local_grad/jacobian. * It then computes velocity gradient symm and skw tensors and passes * that to our material model in order to get out our Cauchy stress and * the material tangent matrix (d \sigma / d Vgrad_{sym}). It also * updates all of the state variables that live at the quadrature pts. */ - virtual void ModelSetup(const int nqpts, const int nelems, const int /*space_dim*/, - const int nnodes, const mfem::Vector &jacobian, - const mfem::Vector &loc_grad, const mfem::Vector &vel); + void ModelSetup(const int nqpts, const int nelems, const int /*space_dim*/, + const int nnodes, const mfem::Vector &jacobian, + const mfem::Vector &loc_grad, const mfem::Vector &vel) override; /// If we needed to do anything to our state variables once things are solved /// for we do that here. - virtual void UpdateModelVars(){} - - virtual void calcDpMat(mfem::QuadratureFunction &DpMat) const = 0; -}; - -/// A generic templated class that takes in a typedef of the crystal model that -/// we want to use from ExaCMech. -template -class ECMechXtalModel : public ExaCMechModel -{ - protected: - ecmechXtal *mat_model; - // Just various indices that we share during initialization - // in the future these could probably be eliminated all together - int ind_dp_eff, ind_eql_pl_strain, ind_pl_work, ind_num_evals, ind_dev_elas_strain; - int ind_quats, ind_hardness, ind_gdot, ind_vols, ind_int_eng; - int num_hardness, num_slip, num_vols, num_int_eng; - - // Note to self: we might want to in the future add support for the calculation - // of D^p_{eff} and \int D^p_{eff} dt for post processing needs - - public: - ECMechXtalModel(mfem::QuadratureFunction *_q_stress0, mfem::QuadratureFunction *_q_stress1, - mfem::QuadratureFunction *_q_matGrad, mfem::QuadratureFunction *_q_matVars0, - mfem::QuadratureFunction *_q_matVars1, - mfem::ParGridFunction* _beg_coords, mfem::ParGridFunction* _end_coords, - mfem::Vector *_props, int _nProps, int _nStateVars, double _temp_k, - ecmech::ExecutionStrategy _accel, Assembly _assembly) : - ExaCMechModel(_q_stress0, _q_stress1, _q_matGrad, _q_matVars0, _q_matVars1, - _beg_coords, _end_coords, _props, _nProps, _nStateVars, _temp_k, - _accel, _assembly) - { - // For FCC material models we have the following state variables - // and their number of components - // effective shear rate(1), effective shear(1), flow strength(1), n_evals(1), - // deviatoric elastic strain(5), quaternions(4), h(Kinetics::nH), - // gdot(SlipGeom::nslip), relative volume(1), internal energy(ecmech::ne) - int num_state_vars = ecmechXtal::numHist + ecmech::ne + 1; - - std::vector strides; - // Deformation rate stride - strides.push_back(ecmech::nsvp); - // Spin rate stride - strides.push_back(ecmech::ndim); - // Volume ratio stride - strides.push_back(ecmech::nvr); - // Internal energy stride - strides.push_back(ecmech::ne); - // Stress vector stride - strides.push_back(ecmech::nsvp); - // History variable stride - strides.push_back(num_state_vars); - // Temperature stride - strides.push_back(1); - // SDD stride - strides.push_back(ecmech::nsdd); - - mat_model = new ecmechXtal(strides.data(), strides.size()); - - mat_model_base = dynamic_cast(mat_model); - - ind_dp_eff = ecmech::evptn::iHistA_shrateEff; - ind_eql_pl_strain = ecmech::evptn::iHistA_shrEff; - ind_pl_work = ecmech::evptn::iHistA_flowStr; - ind_num_evals = ecmech::evptn::iHistA_nFEval; - ind_dev_elas_strain = ecmech::evptn::iHistLbE; - ind_quats = ecmech::evptn::iHistLbQ; - ind_hardness = ecmech::evptn::iHistLbH; - - ind_gdot = mat_model->iHistLbGdot; - // This will always be 1 for this class - num_hardness = mat_model->nH; - // This will always be 12 for this class - num_slip = mat_model->nslip; - // The number of vols -> we actually only need to save the previous time step value - // instead of all 4 values used in the evalModel. The rest can be calculated from - // this value. - num_vols = 1; - ind_vols = ind_gdot + num_slip; - // The number of internal energy variables -> currently 1 - num_int_eng = ecmech::ne; - ind_int_eng = ind_vols + num_vols; - { - std::string s_shrateEff = "shrateEff"; - std::string s_shrEff = "shrEff"; - std::string s_pl_work = "pl_work"; - std::string s_quats = "quats"; - std::string s_gdot = "gdot"; - std::string s_hard = "hardness"; - std::string s_ieng = "int_eng"; - std::string s_rvol = "rel_vol"; - std::string s_est = "elas_strain"; - - std::pair i_sre = std::make_pair(ind_dp_eff, 1); - std::pair i_se = std::make_pair(ind_eql_pl_strain, 1); - std::pair i_plw = std::make_pair(ind_pl_work, 1); - std::pair i_q = std::make_pair(ind_quats, 4); - std::pair i_g = std::make_pair(ind_gdot, num_slip); - std::pair i_h = std::make_pair(ind_hardness, num_hardness); - std::pair i_en = std::make_pair(ind_int_eng, ecmech::ne); - std::pair i_rv = std::make_pair(ind_vols, 1); - std::pair i_est = std::make_pair(ind_dev_elas_strain, ecmech::ntvec); - - qf_mapping[s_shrateEff] = i_sre; - qf_mapping[s_shrEff] = i_se; - qf_mapping[s_pl_work] = i_plw; - qf_mapping[s_quats] = i_q; - qf_mapping[s_gdot] = i_g; - qf_mapping[s_hard] = i_h; - qf_mapping[s_ieng] = i_en; - qf_mapping[s_rvol] = i_rv; - qf_mapping[s_est] = i_est; - } - - // Opts and strs are just empty vectors of int and strings - std::vector params; - std::vector opts; - std::vector strs; - - MFEM_ASSERT(matProps->Size() == ecmechXtal::nParams, - "Properties did not contain " << ecmechXtal::nParams << - " parameters for Voce model."); - - for (int i = 0; i < matProps->Size(); i++) { - params.push_back(matProps->Elem(i)); - } - - // We really shouldn't see this change over time at least for our applications. - mat_model_base->initFromParams(opts, params, strs); - mat_model_base->complete(); - mat_model_base->setExecutionStrategy(accel); - - std::vector histInit; - { - std::vector names; - std::vector plot; - std::vector state; - mat_model_base->getHistInfo(names, histInit, plot, state); - } - - init_state_vars(_q_matVars0, histInit); - } - - /// This really shouldn't be used. It's only public due to the internal - /// MFEM_FORALL requiring it to be public - void init_state_vars(mfem::QuadratureFunction *_q_matVars0, std::vector hist_init) - { - mfem::Vector histInit(ecmechXtal::numHist, mfem::Device::GetMemoryType()); - histInit.UseDevice(true); histInit.HostReadWrite(); - assert(hist_init.size() == ecmechXtal::numHist); - - for (uint i = 0; i < hist_init.size(); i++) { - histInit(i) = hist_init.at(i); - } - - const double* histInit_vec = histInit.Read(); - - double* state_vars = _q_matVars0->ReadWrite(); - - int qf_size = (_q_matVars0->Size()) / (_q_matVars0->GetVDim()); - - int vdim = _q_matVars0->GetVDim(); - - const int ind_dp_eff_ = ind_dp_eff; - const int ind_eql_pl_strain_ = ind_eql_pl_strain; - const int ind_pl_work_ = ind_pl_work; - const int ind_num_evals_ = ind_num_evals; - const int ind_hardness_ = ind_hardness; - const int ind_vols_ = ind_vols; - const int ind_int_eng_ = ind_int_eng; - const int ind_dev_elas_strain_ = ind_dev_elas_strain; - const int ind_gdot_ = ind_gdot; - const int nslip = num_slip; - - mfem::MFEM_FORALL(i, qf_size, { - const int ind = i * vdim; - - state_vars[ind + ind_dp_eff_] = histInit_vec[ind_dp_eff_]; - state_vars[ind + ind_eql_pl_strain_] = histInit_vec[ind_eql_pl_strain_]; - state_vars[ind + ind_pl_work_] = histInit_vec[ind_pl_work_]; - state_vars[ind + ind_num_evals_] = histInit_vec[ind_num_evals_]; - state_vars[ind + ind_hardness_] = histInit_vec[ind_hardness_]; - state_vars[ind + ind_vols_] = 1.0; - - for (int j = 0; j < ecmech::ne; j++) { - state_vars[ind + ind_int_eng_] = 0.0; - } - - for (int j = 0; j < 5; j++) { - state_vars[ind + ind_dev_elas_strain_ + j] = histInit_vec[ind_dev_elas_strain_ + j]; - } - - for (int j = 0; j < nslip; j++) { - state_vars[ind + ind_gdot_ + j] = histInit_vec[ind_gdot_ + j]; - } - }); - } - // We're re-using our deformation gradient quadrature function for this - // calculation which is why we use a 9 dim QF rather than a 6 dim QF - virtual void calcDpMat(mfem::QuadratureFunction &DpMat) const override { - auto slip_geom = mat_model->getSlipGeom(); - const int ind_slip = ind_gdot; - const int ind_quats_ = ind_quats; - const int npts = DpMat.GetSpace()->GetSize(); - auto gdot = mfem::Reshape(matVars1->Read(), matVars1->GetVDim(), npts); - auto d_dpmat = mfem::Reshape(DpMat.Write(), 3, 3, npts); - - static constexpr const int nslip = ecmechXtal::nslip; - - MFEM_ASSERT(DpMat.GetVDim() == 9, "DpMat needs to have a vdim of 9"); - - mfem::MFEM_FORALL(ipts, npts, { - // Initialize dphat to be 0.0 initially - double dphat[ecmech::ntvec]; - for (int idvec = 0; idvec < ecmech::ntvec; idvec++) { - dphat[idvec] = 0.0; - } - // Compute dphat in the crystal frame - ecmech::vecsVMa(dphat, slip_geom.getP(), &gdot(ind_slip, ipts)); - - // Calculated D^p in the crystal frame so we need to rotate things - // back to the sample frame now - double rot_mat[ecmech::ndim * ecmech::ndim]; - - // double quat[ecmech::qdim]; - // quat[0] = gdot(ind_quats, ipts); - // quat[1] = gdot(ind_quats + 1, ipts); - // quat[2] = gdot(ind_quats + 2, ipts); - // quat[3] = gdot(ind_quats + 3, ipts); - ecmech::quat_to_tensor(rot_mat, &gdot(ind_quats_, ipts)); - // - double qr5x5_ls[ecmech::ntvec * ecmech::ntvec]; - ecmech::get_rot_mat_vecd(qr5x5_ls, rot_mat); - - double dphat_sm[ecmech::ntvec]; - ecmech::vecsVMa(dphat_sm, qr5x5_ls, dphat); - - // Need to now convert from the deviatoric vector representation of - // things back to the full symmetric tensor format - double t1 = ecmech::sqr2i * dphat_sm[0]; - double t2 = ecmech::sqr6i * dphat_sm[1]; - - d_dpmat(0, 0, ipts) = t1 - t2; - d_dpmat(1, 1, ipts) = -t1 - t2; - d_dpmat(2, 2, ipts) = ecmech::sqr2b3 * dphat_sm[1]; - d_dpmat(2, 1, ipts) = ecmech::sqr2i * dphat_sm[4]; - d_dpmat(1, 2, ipts) = d_dpmat(2, 1, ipts); - d_dpmat(0, 2, ipts) = ecmech::sqr2i * dphat_sm[3]; - d_dpmat(2, 0, ipts) = d_dpmat(0, 2, ipts); - d_dpmat(0, 1, ipts) = ecmech::sqr2i * dphat_sm[2]; - d_dpmat(1, 0, ipts) = d_dpmat(0, 1, ipts); - - }); - } - - virtual ~ECMechXtalModel() - { - delete mat_model; - } + virtual void UpdateModelVars() override {} + void calcDpMat(mfem::QuadratureFunction &/* DpMat */) const override {} }; -/** @brief A linear isotropic Voce hardening model with a power law formulation - * for the slip kinetics. - * - * This model generally can do a decent job of capturing the material behavior in strain rates - * that are a bit lower where thermally activated slip is a more appropriate approximation. - * Generally, you'll find that if fitted to capture the elastic plastic transition - * it will miss the later plastic behavior of the material. However if it is fitted - * to the general macroscopic stress-strain and d\sigma / d \epsilon_e vs epsilon - * curve, it will miss the elastic-plastic regime. Based on far-field high energy - * x-ray diffraction (ff-HEXD) data, this model is capable of capture 1st order - * behaviors of the distribution of elastic intragrain heterogeneity. However, - * it fails to capture transient behaviors of these distributions as seen in - * http://doi.org/10.7298/X4JM27SD and http://doi.org/10.1088/1361-651x/aa6dc5 - * for fatigue applications. - * - * A good reference for the Voce implementation can be found in: - * section 2.1 https://doi.org/10.1016/S0045-7825(98)00034-6 - * section 2.1 https://doi.org/10.1016/j.ijplas.2007.03.004 - * Basics for how to fit such a model can be found here: - * https://doi.org/10.1016/S0921-5093(01)01174-1 . Although, it should be noted - * that this is more for the MTS model it can be adapted to the Voce model by taking into - * account that the m parameter determines the rate sensitivity. So, the more rate insensitive - * the material is the closer this will be to 0. The exponent to the Voce - * hardening law can be determined by what ordered function best fits the - * $\frac{d\sigma}{d\epsilon_e} \text{vs} \epsilon$ curve. - * The initial CRSS term best determines when the material starts to plastically deform. - * The saturation CRSS term determines pretty much how much the material is able - * to harden. The hardening coeff. for CRSS best determines the rate at which the - * material hardens so larger values lead to a quicker hardening of the material. - * - * Params start off with: - * initial density, heat capacity at constant volume, and a tolerance param - * Params then include Elastic constants: - * c11, c12, c44 for Cubic crystals - * Params then include the following: - * shear modulus, m parameter seen in slip kinetics, gdot_0 term found in slip kinetic eqn, - * hardening coeff. defined for g_crss evolution eqn, initial CRSS value, - * initial CRSS saturation strength, CRSS saturation strength scaling exponent, - * CRSS saturation strength rate scaling coeff, tausi -> hdn_init (not used) - * Params then include the following: - * the Grüneisen parameter, reference internal energy - */ -typedef ECMechXtalModel VoceFCCModel; -typedef ECMechXtalModel VoceNLFCCModel; -typedef ECMechXtalModel>> - VoceBCCModel; -typedef ECMechXtalModel>> - VoceNLBCCModel; - -/** @brief A class with slip and hardening kinetics based on a single Kocks-Mecking dislocation density - * balanced thermally activated MTS-like slip kinetics with phonon drag effects. - * - * See papers https://doi.org/10.1088/0965-0393/17/3/035003 (Section 2 - 2.3) - * and https://doi.org/10.1063/1.4792227 (Section 3 up to the intro of the twinning kinetics ~ eq 8) - * for more info on this particular style of models. - * This model includes a combination of the above two see the actual implementation of - * ExaCMech ECMech_kinetics_KMBalD.h file for the actual specifics. - * - * This model is much more complicated than the simple Voce hardening model and power law slip kinetics - * seen above. However, it is capable of capturing the behavior of the material over a wide range of - * not only strain rates but also temperature ranges. The thermal activated slip kinetics is more or less - * what the slip kinetic power law used with the Voce hardening model approximates as seen in: - * https://doi.org/10.1016/0079-6425(75)90007-9 and more specifically the Emperical Law section eqns: - * 34h - 34s. It should be noted though that this was based on work for FCC materials. - * The classical MTS model can be seen here towards its application towards copper for historical context: - * https://doi.org/10.1016/0001-6160(88)90030-2 - * - * An incredibly detailed overview of the thermally activated slip mechanisms can - * be found in https://doi.org/10.1016/S0079-6425(02)00003-8 . The conclusions provide a nice - * overview for how several of the parameters can be fitted for this model. Sections 2.3 - 3.4 - * also go a bit more in-depth into the basis for why the fits are done the way they are - * conducted. - * The phonon drag contribution has shown to really start to play a role at strain rates - * 10^3 and above. A bit of a review on this topic can be found in https://doi.org/10.1016/0001-6160(87)90285-9 . - * It should be noted that the model implemented here does not follow the same formulation - * provided in that paper. This model can be thought of as a simplified version. - * - * Params start off with: - * initial density, heat capacity at constant volume, and a tolerance param - * Params then include Elastic constants: - * (c11, c12, c44 for Cubic crystals) or (c11, c12, c13, c33, and c44 for Hexagonal Crystals) - * Params then include the following: - * reference shear modulus, reference temperature, g_0 * b^3 / \kappa where b is the - * magnitude of the burger's vector and \kappa is Boltzmann's constant**, - * Peierls barrier, MTS curve shape parameter (p), MTS curve shape parameter (q), - * reference thermally activated slip rate, reference drag limited slip rate, - * drag reference stress, slip resistance const (g_0)**, slip resistance const (s)**, - * dislocation density production constant (k_1), dislocation density production - * constant (k_{2_0}), dislocation density exponential constant, - * reference net slip rate constant, reference relative dislocation density - * Params then include the following: - * the Grüneisen parameter, reference internal energy - */ -typedef ECMechXtalModel KinKMBalDDFCCModel; -/// See documentation related to KinKMBalDDFCCModel -typedef ECMechXtalModel KinKMBalDDHCPModel; -typedef ECMechXtalModel KinKMbalDDBCCModel; - -#endif diff --git a/src/mechanics_integrators.cpp b/src/mechanics_integrators.cpp index 8ac4b71..382c588 100644 --- a/src/mechanics_integrators.cpp +++ b/src/mechanics_integrators.cpp @@ -106,7 +106,7 @@ void ExaNLFIntegrator::AssembleElementGrad( DenseMatrix grad_trans, temp; DenseMatrix tan_stiff; - int ngrad_dim2 = 36; + constexpr int ngrad_dim2 = 36; double matGrad[ngrad_dim2]; // Delta in our timestep double dt = model->GetModelDt(); @@ -804,7 +804,7 @@ void ExaNLFIntegrator::AssembleEA(const FiniteElementSpace &fes, Vector &emat) RAJA::Layout layout_geom = RAJA::make_permuted_layout({{ nqpts, dim, dim, nelems } }, perm4); RAJA::View > geom_j_view(geom->J.Read(), layout_geom); const int nqpts_ = nqpts; - const int dim_ = dim; + const int dim_ = dim; MFEM_FORALL(i, nelems, { for (int j = 0; j < nqpts_; j++) { for (int k = 0; k < dim_; k++) { @@ -1114,7 +1114,7 @@ void ICExaNLFIntegrator::AssembleElementGrad( DenseMatrix grad_trans, temp; DenseMatrix tan_stiff; - int ngrad_dim2 = 36; + constexpr int ngrad_dim2 = 36; double matGrad[ngrad_dim2]; // Delta in our timestep double dt = model->GetModelDt(); diff --git a/src/mechanics_integrators.hpp b/src/mechanics_integrators.hpp index d236fd1..d780746 100644 --- a/src/mechanics_integrators.hpp +++ b/src/mechanics_integrators.hpp @@ -31,20 +31,21 @@ class ExaNLFIntegrator : public mfem::NonlinearFormIntegrator /// This doesn't do anything at this point. We can add the functionality /// later on if a use case arises. + using mfem::NonlinearFormIntegrator::GetElementEnergy; virtual double GetElementEnergy(const mfem::FiniteElement &el, mfem::ElementTransformation &Ttr, - const mfem::Vector &elfun); + const mfem::Vector &elfun) override; using mfem::NonlinearFormIntegrator::AssembleElementVector; /// Assembles the Div(sigma) term / RHS terms of our linearized system of equations. virtual void AssembleElementVector(const mfem::FiniteElement &el, mfem::ElementTransformation &Ttr, - const mfem::Vector &elfun, mfem::Vector &elvect); + const mfem::Vector &elfun, mfem::Vector &elvect) override; /// Assembles our gradient matrix (K matrix as seen in typical mechanics FEM formulations) virtual void AssembleElementGrad(const mfem::FiniteElement &el, mfem::ElementTransformation &Ttr, - const mfem::Vector & /*elfun*/, mfem::DenseMatrix &elmat); + const mfem::Vector & /*elfun*/, mfem::DenseMatrix &elmat) override; // We currently don't have the AssemblePADiagonal still need to work out what this // would look like for the 4D tensor contraction operation diff --git a/src/mechanics_kernels.hpp b/src/mechanics_kernels.hpp index e5d30f1..c4ae9c3 100644 --- a/src/mechanics_kernels.hpp +++ b/src/mechanics_kernels.hpp @@ -36,7 +36,7 @@ void ComputeVolAvgTensor(const mfem::ParFiniteElementSpace* fes, double el_vol = 0.0; int my_id; MPI_Comm_rank(MPI_COMM_WORLD, &my_id); - double data[size]; + mfem::Vector data(size); const int DIM2 = 2; std::array perm2 {{ 1, 0 } }; @@ -61,7 +61,7 @@ void ComputeVolAvgTensor(const mfem::ParFiniteElementSpace* fes, for (int j = 0; j < size; j++) { RAJA::ReduceSum seq_sum(0.0); RAJA::ReduceSum vol_sum(0.0); - RAJA::forall(default_range, [ = ] (int i_npts){ + RAJA::forall(default_range, [ = ] (int i_npts){ const double* val = &(qf_data[i_npts * size]); seq_sum += wts_data[i_npts] * val[j]; vol_sum += wts_data[i_npts]; @@ -116,7 +116,7 @@ void ComputeVolAvgTensor(const mfem::ParFiniteElementSpace* fes, tensor[i] = data[i]; } - MPI_Allreduce(&data, tensor.HostReadWrite(), size, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + MPI_Allreduce(data.HostRead(), tensor.HostReadWrite(), size, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); if (vol_avg) { double temp = el_vol; @@ -132,6 +132,136 @@ void ComputeVolAvgTensor(const mfem::ParFiniteElementSpace* fes, } } } + +//Computes the volume average values of values that lie at the quadrature points +//but only computes the values that aren't filtered out +// aka It only includes values that are set to true in filter +// It also returns the volume that corresponds to the values that were filtered +template +double ComputeVolAvgTensorFilter(const mfem::ParFiniteElementSpace* fes, + const mfem::QuadratureFunction* qf, + const mfem::Array* filter, + mfem::Vector& tensor, int size, + const RTModel &class_device) +{ + mfem::Mesh *mesh = fes->GetMesh(); + const mfem::FiniteElement &el = *fes->GetFE(0); + const mfem::IntegrationRule *ir = &(mfem::IntRules.Get(el.GetGeomType(), 2 * el.GetOrder() + 1));; + + const int nqpts = ir->GetNPoints(); + const int nelems = fes->GetNE(); + const int npts = nqpts * nelems; + + const double* W = ir->GetWeights().Read(); + const mfem::GeometricFactors *geom = mesh->GetGeometricFactors(*ir, mfem::GeometricFactors::DETERMINANTS); + + double el_vol = 0.0; + int my_id; + MPI_Comm_rank(MPI_COMM_WORLD, &my_id); + mfem::Vector data(size); + + const int DIM2 = 2; + std::array perm2 {{ 1, 0 } }; + RAJA::Layout layout_geom = RAJA::make_permuted_layout({{ nqpts, nelems } }, perm2); + + mfem::Vector wts(geom->detJ); + RAJA::View > wts_view(wts.ReadWrite(), layout_geom); + RAJA::View > j_view(geom->detJ.Read(), layout_geom); + + RAJA::RangeSegment default_range(0, npts); + + mfem::MFEM_FORALL(i, nelems, { + const int nqpts_ = nqpts; + for (int j = 0; j < nqpts_; j++) { + wts_view(j, i) = j_view(j, i) * W[j]; + } + }); + + if (class_device == RTModel::CPU) { + const double* qf_data = qf->HostRead(); + const bool* filter_data = filter->HostRead(); + const double* wts_data = wts.HostRead(); + for (int j = 0; j < size; j++) { + RAJA::ReduceSum seq_sum(0.0); + RAJA::ReduceSum vol_sum(0.0); + RAJA::forall(default_range, [ = ] (int i_npts){ + if (!filter_data[i_npts]) return; + const double* val = &(qf_data[i_npts * size]); + seq_sum += wts_data[i_npts] * val[j]; + vol_sum += wts_data[i_npts]; + }); + data[j] = seq_sum.get(); + el_vol = vol_sum.get(); + } + } +#if defined(RAJA_ENABLE_OPENMP) + if (class_device == RTModel::OPENMP) { + const double* qf_data = qf->HostRead(); + const bool* filter_data = filter->HostRead(); + const double* wts_data = wts.HostRead(); + for (int j = 0; j < size; j++) { + RAJA::ReduceSum omp_sum(0.0); + RAJA::ReduceSum vol_sum(0.0); + RAJA::forall(default_range, [ = ] (int i_npts){ + if (!filter_data[i_npts]) return; + const double* val = &(qf_data[i_npts * size]); + omp_sum += wts_data[i_npts] * val[j]; + vol_sum += wts_data[i_npts]; + }); + data[j] = omp_sum.get(); + el_vol = vol_sum.get(); + } + } +#endif +#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP) + if (class_device == RTModel::GPU) { + const double* qf_data = qf->Read(); + const bool* filter_data = filter->Read(); + const double* wts_data = wts.Read(); +#if defined(RAJA_ENABLE_CUDA) + using gpu_reduce = RAJA::cuda_reduce; + using gpu_policy = RAJA::cuda_exec<1024>; +#else + using gpu_reduce = RAJA::hip_reduce; + using gpu_policy = RAJA::hip_exec<1024>; +#endif + for (int j = 0; j < size; j++) { + RAJA::ReduceSum gpu_sum(0.0); + RAJA::ReduceSum vol_sum(0.0); + RAJA::forall(default_range, [ = ] RAJA_DEVICE(int i_npts){ + if (!filter_data[i_npts]) return; + const double* val = &(qf_data[i_npts * size]); + gpu_sum += wts_data[i_npts] * val[j]; + vol_sum += wts_data[i_npts]; + }); + data[j] = gpu_sum.get(); + el_vol = vol_sum.get(); + } + } +#endif + + for (int i = 0; i < size; i++) { + tensor[i] = data[i]; + } + + MPI_Allreduce(data.HostRead(), tensor.HostReadWrite(), size, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + double temp = el_vol; + // Here we find what el_vol should be equal to + MPI_Allreduce(&temp, &el_vol, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + + if (vol_avg) { + // We meed to multiple by 1/V by our tensor values to get the appropriate + // average value for the tensor in the end. + double inv_vol = (fabs(el_vol) > 1e-14) ? 1.0 / el_vol : 0.0; + + for (int m = 0; m < size; m++) { + tensor[m] *= inv_vol; + } + } + return el_vol; +} + } } #endif diff --git a/src/mechanics_lightup.hpp b/src/mechanics_lightup.hpp new file mode 100644 index 0000000..919c5b9 --- /dev/null +++ b/src/mechanics_lightup.hpp @@ -0,0 +1,662 @@ +#pragma once + +#include "option_types.hpp" +#include "mechanics_kernels.hpp" + +#include "mfem.hpp" +#include "mfem/general/forall.hpp" + +#include "SNLS_linalg.h" +#include "ECMech_const.h" +#include "ECMech_gpu_portability.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +template +class LightUp { +public: + +LightUp(const std::vector> &hkls, + const double distance_tolerance, + const double s_dir[3], + const mfem::ParFiniteElementSpace* pfes, + mfem::QuadratureSpaceBase* qspace, + const std::unordered_map > &qf_mapping, + const RTModel &rtmodel, + const std::string &lattice_basename, + const double lattice_params[3]); + +~LightUp() = default; + +void calculate_lightup_data(const mfem::QuadratureFunction& history, + const mfem::QuadratureFunction& stress); + +void calculate_in_fibers(const mfem::QuadratureFunction& history, + const size_t quats_offset, + const size_t hkl_index); + + +void calc_lattice_strains(const mfem::QuadratureFunction& history, + const size_t strain_offset, + const size_t quats_offset, + const size_t rel_vol_offset, + std::vector& lattice_strains_output, + std::vector& lattice_volumes_output); + +void calc_lattice_taylor_factor_dpeff(const mfem::QuadratureFunction& history, + const size_t dpeff_offset, + const size_t gdot_offset, + const size_t gdot_length, + std::vector &lattice_tay_facs, + std::vector &lattice_dpeff); + +void calc_lattice_directional_stiffness(const mfem::QuadratureFunction& history, + const mfem::QuadratureFunction& stress, + const size_t strain_offset, + const size_t quats_offset, + const size_t rel_vol_offset, + std::vector> &lattice_dir_stiff); + +private: + std::vector> m_hkls; + const double m_distance_tolerance; + double m_s_dir[3]; + const mfem::ParFiniteElementSpace* m_pfes; + const size_t m_npts; + const RTModel m_class_device; + const std::unordered_map > m_qf_mapping; + const std::string m_lattice_basename; + const LatticeType m_lattice; + mfem::QuadratureFunction m_workspace; + std::vector> m_in_fibers; + std::vector m_rmat_fr_qsym_c_dir; +}; + +class LatticeTypeCubic { +public: +static constexpr size_t NSYM = 24; + +LatticeTypeCubic(const double lattice_param_a[3]) +{ + symmetric_cubic_quaternions(); + compute_lattice_b_param(lattice_param_a); +} + +~LatticeTypeCubic() = default; + +void +compute_lattice_b_param(const double lparam_a[3]) +{ + constexpr double FRAC_PI_2 = 1.57079632679489661923132169163975144; + const double cellparms[6] = {lparam_a[0], lparam_a[1], lparam_a[2], FRAC_PI_2, FRAC_PI_2, FRAC_PI_2}; + + const double alfa = cellparms[3]; + const double beta = cellparms[4]; + const double gamma = cellparms[5]; + + const double cosalfar = (cos(beta) * cos(gamma) - cos(alfa)) / (sin(beta) * sin(gamma)); + const double sinalfar = sqrtf(1.0 - cosalfar * cosalfar); + + const double a[3] = {cellparms[0], 0.0, 0.0}; + const double b[3] = {cellparms[1] * cos(gamma), cellparms[1] * sin(gamma), 0.0}; + const double c[3] = {cellparms[2] * cos(beta), -cellparms[2] * cosalfar * sin(beta), cellparms[2] * sinalfar * sin(beta)}; + + // Cell volume + double vol[3] = {}; + auto cross_prod = [&](const double* const vec1, + const double* const vec2, + double* const prod) { + prod[0] = vec1[1] * vec2[2] - vec1[2] * vec2[1]; + prod[1] = vec1[2] * vec2[0] - vec1[0] * vec2[2]; + prod[2] = vec1[0] * vec2[1] - vec1[1] * vec2[0]; + }; + + cross_prod(b, c, vol); + const double inv_vol = 1.0 / snls::linalg::dotProd<3>(a, vol); + + // Reciprocal lattice vectors + auto cross_prod_inv_v = [&](const double* const vec1, const double* const vec2, double* cross_prod_v) { + cross_prod(vec1, vec2, cross_prod_v); + cross_prod_v[0] *= inv_vol; + cross_prod_v[1] *= inv_vol; + cross_prod_v[2] *= inv_vol; + }; + + double * latb[3] = {&lattice_b[0], &lattice_b[3], &lattice_b[6]}; + // B takes components in the reciprocal lattice to X + cross_prod_inv_v(b, c, latb[0]); + cross_prod_inv_v(c, a, latb[1]); + cross_prod_inv_v(a, b, latb[2]); +} + +void +symmetric_cubic_quaternions() +{ + constexpr double PI = 3.14159265358979323846264338327950288; + constexpr double FRAC_PI_2 = 1.57079632679489661923132169163975144; + constexpr double FRAC_PI_3 = 1.04719755119659774615421446109316763; + + constexpr double angle_axis_symm [NSYM][4] = { + {0.0, 1.0, 0.0, 0.0}, // identity + {FRAC_PI_2, 1.0, 0.0, 0.0}, // fourfold about 1 0 0 (x1) + {PI, 1.0, 0.0, 0.0}, // + {FRAC_PI_2 * 3.0, 1.0, 0.0, 0.0}, // + {FRAC_PI_2, 0.0, 1.0, 0.0}, // fourfold about 0 1 0 (x2) + {PI, 0.0, 1.0, 0.0}, // + {FRAC_PI_2 * 3.0, 0.0, 1.0, 0.0}, // + {FRAC_PI_2, 0.0, 0.0, 1.0}, // fourfold about 0 0 1 (x3) + {PI, 0.0, 0.0, 1.0}, // + {FRAC_PI_2 * 3.0, 0.0, 0.0, 1.0}, // + {FRAC_PI_3 * 2.0, 1.0, 1.0, 1.0}, // threefold about 1 1 1 + {FRAC_PI_3 * 4.0, 1.0, 1.0, 1.0}, // + {FRAC_PI_3 * 2.0, -1.0, 1.0, 1.0}, // threefold about -1 1 1 + {FRAC_PI_3 * 4.0, -1.0, 1.0, 1.0}, // + {FRAC_PI_3 * 2.0, -1.0, -1.0, 1.0}, // threefold about -1 -1 1 + {FRAC_PI_3 * 4.0, -1.0, -1.0, 1.0}, // + {FRAC_PI_3 * 2.0, 1.0, -1.0, 1.0}, // threefold about 1 -1 1 + {FRAC_PI_3 * 4.0, 1.0, -1.0, 1.0}, // + {PI, 1.0, 1.0, 0.0}, // twofold about 1 1 0 + {PI, -1.0, 1.0, 0.0}, // twofold about -1 1 0 + {PI, 1.0, 0.0, 1.0}, // twofold about 1 0 1 + {PI, 0.0, 1.0, 1.0}, // twofold about 0 1 1 + {PI, -1.0, 0.0, 1.0}, // twofold about -1 0 1 + {PI, 0.0, -1.0, 1.0}, // twofold about 0 -1 1 + }; + + constexpr double inv2 = 1.0 / 2.0; + + for (size_t isym = 0; isym < NSYM; isym++) { + double *symm_quat = &quat_symm[isym * 4]; + const double s = sin(inv2 * angle_axis_symm[isym][0]); + symm_quat[0] = cos(inv2 * angle_axis_symm[isym][0]); + double inv_norm_axis = 1.0 / snls::linalg::norm<3>(&angle_axis_symm[isym][1]); + symm_quat[1] = s * angle_axis_symm[isym][1] * inv_norm_axis; + symm_quat[2] = s * angle_axis_symm[isym][2] * inv_norm_axis; + symm_quat[3] = s * angle_axis_symm[isym][3] * inv_norm_axis; + + inv_norm_axis = 1.0; + if (symm_quat[0] < 0.0) { + inv_norm_axis *= -1.0; + } + + symm_quat[0] *= inv_norm_axis; + symm_quat[1] *= inv_norm_axis; + symm_quat[2] *= inv_norm_axis; + symm_quat[3] *= inv_norm_axis; + } +} + +public: + double lattice_b[3 * 3]; + double quat_symm[24 * 4]; +}; + +namespace no_std { +template +struct IsStdArray : std::false_type {}; +template +struct IsStdArray> : std::true_type {}; +} + +template +void printArray(std::ostream &stream, std::array &array) { + stream << "\"[ "; + for (size_t i = 0; i < N - 1; i++) { + stream << std::scientific << std::setprecision(6) << array[i] << ","; + } + stream << array[N - 1] << " ]\"\t"; +} + +template +void printValues(std::ostream &stream, T& t) { + if constexpr (no_std::IsStdArray::value) { + printArray(stream, t); + } + else { + stream << std::scientific << std::setprecision(6) << t << "\t"; + } +} + +__ecmech_hdev__ +inline +void +quat2rmat(const double* const quat, + double* const rmats) +{ + double qbar = quat[0] * quat[0] - (quat[1] * quat[1] + quat[2] * quat[2] + quat[3] * quat[3]); + + double* rmat[3] = {&rmats[0], &rmats[3], &rmats[6]}; + + rmat[0][0] = qbar + 2.0 * quat[1] * quat[1]; + rmat[1][0] = 2.0 * (quat[1] * quat[2] + quat[0] * quat[3]); + rmat[2][0] = 2.0 * (quat[1] * quat[3] - quat[0] * quat[2]); + + rmat[0][1] = 2.0 * (quat[1] * quat[2] - quat[0] * quat[3]); + rmat[1][1] = qbar + 2.0 * quat[2] * quat[2]; + rmat[2][1] = 2.0 * (quat[2] * quat[3] + quat[0] * quat[1]); + + rmat[0][2] = 2.0 * (quat[1] * quat[3] + quat[0] * quat[2]); + rmat[1][2] = 2.0 * (quat[2] * quat[3] - quat[0] * quat[1]); + rmat[2][2] = qbar + 2.0 * quat[3] * quat[3]; +} + +template +LightUp::LightUp(const std::vector> &hkls, + const double distance_tolerance, + const double s_dir[3], + const mfem::ParFiniteElementSpace* pfes, + mfem::QuadratureSpaceBase* qspace, + const std::unordered_map > &qf_mapping, + const RTModel &rtmodel, + const std::string &lattice_basename, + const double lattice_params[3]) : + m_hkls(hkls), + m_distance_tolerance(distance_tolerance), + m_pfes(pfes), + m_npts(qspace->GetSize()), + m_class_device(rtmodel), + m_qf_mapping(qf_mapping), + m_lattice_basename(lattice_basename), + m_lattice(lattice_params) +{ + m_s_dir[0] = s_dir[0]; + m_s_dir[1] = s_dir[1]; + m_s_dir[2] = s_dir[2]; + + m_workspace.SetSpace(qspace, 3); + + const double inv_s_norm = 1.0 / snls::linalg::norm<3>(s_dir); + m_s_dir[0] *= inv_s_norm; + m_s_dir[1] *= inv_s_norm; + m_s_dir[2] *= inv_s_norm; + + auto lat_vec_ops_b = m_lattice.lattice_b; + // First one we'll always set to be all the values + m_in_fibers.push_back(mfem::Array(m_npts)); + for (auto &hkl: hkls) { + m_in_fibers.push_back(mfem::Array(m_npts)); + // Computes reciprocal lattice B but different from HEXRD we return as row matrix as that's the easiest way of doing things + double c_dir[3]; + // compute crystal direction from planeData + snls::linalg::matTVecMult<3,3>(lat_vec_ops_b, hkl.data(), c_dir); + + const double inv_c_norm = 1.0 / snls::linalg::norm<3>(c_dir); + c_dir[0] *= inv_c_norm; + c_dir[1] *= inv_c_norm; + c_dir[2] *= inv_c_norm; + + // Could maybe move this over to a vec if we want this to be easily generic over a ton of symmetry conditions... + double rmat_fr_qsym_c_dir[LatticeType::NSYM][3] = {}; + mfem::Vector tmp(LatticeType::NSYM * 3); + for (size_t isym=0; isym < LatticeType::NSYM; isym++) { + double rmat[3 * 3] = {}; + quat2rmat(&m_lattice.quat_symm[isym * 4], rmat); + snls::linalg::matTVecMult<3,3>(rmat, c_dir, rmat_fr_qsym_c_dir[isym]); + tmp(isym * 3 + 0) = rmat_fr_qsym_c_dir[isym][0]; + tmp(isym * 3 + 1) = rmat_fr_qsym_c_dir[isym][1]; + tmp(isym * 3 + 2) = rmat_fr_qsym_c_dir[isym][2]; + } + tmp.UseDevice(true); + m_rmat_fr_qsym_c_dir.push_back(tmp); + } + + m_hkls.insert(m_hkls.begin(), {0.0, 0.0, 0.0}); + int my_id; + MPI_Comm_rank(MPI_COMM_WORLD, &my_id); + // Now we're going to save off the lattice values to a file + if (my_id == 0) { + + auto file_line_print = [&](auto& basename, auto& name, auto &m_hkls) { + std::string filename = basename + name; + std::ofstream file; + file.open(filename, std::ios_base::out); + + file << "#" << "\t"; + + for (auto& item : m_hkls) { + file << std::setprecision(1) << "\"[ " < = find_unique_tolerance::(&rmat_fr_qsym_c_dir, f64::sqrt(f64::EPSILON)); + + // Move all of the above to the object constructor + // rmat_fr_qsym_c_dir move to an mfem vector and then use it's data down here + // same with s_dir and c_dir + // Here iterate on which HKL we're using maybe have a map for these rmat_fr_qsym_c_dir and c_dir +} + +template +void +LightUp::calculate_lightup_data(const mfem::QuadratureFunction& history, + const mfem::QuadratureFunction& stress) +{ + std::string s_estrain = "elas_strain"; + std::string s_rvol = "rel_vol"; + std::string s_quats = "quats"; + std::string s_gdot = "gdot"; + std::string s_shrateEff = "shrateEff"; + + const size_t quats_offset = m_qf_mapping.find(s_quats)->second.first; + const size_t strain_offset = m_qf_mapping.find(s_estrain)->second.first; + const size_t rel_vol_offset = m_qf_mapping.find(s_rvol)->second.first; + const size_t dpeff_offset = m_qf_mapping.find(s_shrateEff)->second.first; + const size_t gdot_offset = m_qf_mapping.find(s_gdot)->second.first; + const size_t gdot_length = m_qf_mapping.find(s_gdot)->second.second; + + m_in_fibers[0] = true; + for (size_t ihkl = 0; ihkl < m_rmat_fr_qsym_c_dir.size(); ihkl++) { + calculate_in_fibers(history, quats_offset, ihkl); + } + + std::vector lattice_strains_output; + std::vector lattice_volumes_output; + + calc_lattice_strains(history, strain_offset, quats_offset, rel_vol_offset, lattice_strains_output, lattice_volumes_output); + + std::vector lattice_dpeff_output; + std::vector lattice_tayfac_output; + + calc_lattice_taylor_factor_dpeff(history, dpeff_offset, gdot_offset, gdot_length, lattice_tayfac_output, lattice_dpeff_output); + + std::vector> lattice_dir_stiff_output; + + calc_lattice_directional_stiffness(history, stress, strain_offset, quats_offset, rel_vol_offset, lattice_dir_stiff_output); + + int my_id; + MPI_Comm_rank(MPI_COMM_WORLD, &my_id); + // Now we're going to save off the lattice values to a file + if (my_id == 0) { + + auto file_line_print = [&](auto& basename, auto& name, auto &vec) { + std::string filename = basename + name; + std::ofstream file; + file.open(filename, std::ios_base::app); + + for (auto& item : vec) { + printValues(file, item); + } + file << std::endl; + + file.close(); + }; + + file_line_print(m_lattice_basename, "strains.txt", lattice_strains_output); + file_line_print(m_lattice_basename, "volumes.txt", lattice_volumes_output); + file_line_print(m_lattice_basename, "dpeff.txt", lattice_dpeff_output); + file_line_print(m_lattice_basename, "taylor_factor.txt", lattice_tayfac_output); + file_line_print(m_lattice_basename, "directional_stiffness.txt", lattice_dir_stiff_output); + } + +} + +template +void +LightUp::calculate_in_fibers(const mfem::QuadratureFunction& history, + const size_t quats_offset, + const size_t hkl_index) +{ + // Same could be said for in_fiber down here + // that way we just need to know which hkl and quats we're running with + const size_t vdim = history.GetVDim(); + const auto history_data = history.Read(); + + // First hkl_index is always completely true so we can easily + // compute the total volume average values + auto in_fiber_view = m_in_fibers[hkl_index + 1].Write(); + auto rmat_fr_qsym_c_dir = m_rmat_fr_qsym_c_dir[hkl_index].Read(); + + mfem::Vector s_dir(3); + s_dir[0] = m_s_dir[0]; s_dir[1] = m_s_dir[1]; s_dir[2] = m_s_dir[2]; + auto s_dir_data = s_dir.Read(); + auto distance_tolerance = m_distance_tolerance; + + mfem::MFEM_FORALL(iquats, m_npts, { + // for(size_t iquats = 0; iquats < m_npts; iquats++) { + + const auto quats = &history_data[iquats * vdim + quats_offset]; + double rmat[3 * 3] = {}; + quat2rmat(quats, rmat); + + double sine = -10; + for (size_t isym = 0; isym < LatticeType::NSYM; isym++) { + double prod[3] = {}; + snls::linalg::matVecMult<3,3>(rmat, &rmat_fr_qsym_c_dir[isym * 3], prod); + double tmp = snls::linalg::dotProd<3>(s_dir_data, prod); + sine = (tmp > sine) ? tmp : sine; + } + if (fabs(sine) > 1.00000001) { + sine = (sine >= 0) ? 1.0 : -1.0; + } + in_fiber_view[iquats] = acos(sine) <= distance_tolerance; + }); +} + +template +void +LightUp::calc_lattice_strains(const mfem::QuadratureFunction& history, + const size_t strain_offset, + const size_t quats_offset, + const size_t rel_vol_offset, + std::vector& lattice_strains_output, + std::vector& lattice_volumes_output) +{ + const double project_vec[6] = {m_s_dir[0] * m_s_dir[0], + m_s_dir[1] * m_s_dir[1], + m_s_dir[2] * m_s_dir[2], + 2.0 * m_s_dir[1] * m_s_dir[2], + 2.0 * m_s_dir[0] * m_s_dir[2], + 2.0 * m_s_dir[0] * m_s_dir[1]}; + + const size_t vdim = history.GetVDim(); + const auto history_data = history.Read(); + m_workspace = 0.0; + auto lattice_strains = m_workspace.Write(); + + // Only need to compute this once + mfem::MFEM_FORALL(iqpts, m_npts, { + // for(size_t iqpts = 0; iqpts < m_npts; iqpts++) { + const auto strain_lat = &history_data[iqpts * vdim + strain_offset]; + const auto quats = &history_data[iqpts * vdim + quats_offset]; + const auto rel_vol = history_data[iqpts * vdim + rel_vol_offset]; + + double strain[6] = {}; + { + double strainm[3 * 3] = {}; + double* strain_m[3] = {&strainm[0], &strainm[3], &strainm[6]}; + const double t1 = ecmech::sqr2i * strain_lat[0]; + const double t2 = ecmech::sqr6i * strain_lat[1]; + // + // Volume strain is ln(V^e_mean) term aka ln(relative volume) + // Our plastic deformation has a det(1) aka no change in volume change + const double elas_vol_strain = log(rel_vol); + // We output elastic strain formulation such that the relationship + // between V^e and \varepsilon is just V^e = I + \varepsilon + strain_m[0][0] = (t1 - t2) + elas_vol_strain; // 11 + strain_m[1][1] = (-t1 - t2) + elas_vol_strain ; // 22 + strain_m[2][2] = ecmech::sqr2b3 * strain_lat[1] + elas_vol_strain; // 33 + strain_m[1][2] = ecmech::sqr2i * strain_lat[4]; // 23 + strain_m[2][0] = ecmech::sqr2i * strain_lat[3]; // 31 + strain_m[0][1] = ecmech::sqr2i * strain_lat[2]; // 12 + + strain_m[2][1] = strain_m[1][2]; + strain_m[0][2] = strain_m[2][0]; + strain_m[1][0] = strain_m[0][1]; + + double rmat[3 * 3] = {}; + double strain_samp[3 * 3] = {}; + + quat2rmat(quats, rmat); + snls::linalg::rotMatrix<3, false>(strainm, rmat, strain_samp); + + strain_m[0] = &strain_samp[0]; + strain_m[1] = &strain_samp[3]; + strain_m[2] = &strain_samp[6]; + strain[0] = strain_m[0][0]; + strain[1] = strain_m[1][1]; + strain[2] = strain_m[2][2]; + strain[3] = strain_m[1][2]; + strain[4] = strain_m[0][2]; + strain[5] = strain_m[0][1]; + + } + const double proj_strain = snls::linalg::dotProd<6>(project_vec, strain); + lattice_strains[iqpts] = proj_strain; + + }); + + for (const auto& in_fiber_hkl : m_in_fibers){ + mfem::Vector lattice_strain_hkl(1); + const double lat_vol = exaconstit::kernel::ComputeVolAvgTensorFilter(m_pfes, &m_workspace, &in_fiber_hkl, lattice_strain_hkl, 1, m_class_device); + + lattice_volumes_output.push_back(lat_vol); + lattice_strains_output.push_back(lattice_strain_hkl(0)); + } +} + +template +void +LightUp::calc_lattice_taylor_factor_dpeff(const mfem::QuadratureFunction& history, + const size_t dpeff_offset, + const size_t gdot_offset, + const size_t gdot_length, + std::vector &lattice_tay_facs, + std::vector &lattice_dpeff) +{ + + const size_t vdim = history.GetVDim(); + const auto history_data = history.Read(); + m_workspace = 0.0; + auto lattice_tayfac_dpeffs = m_workspace.Write(); + + // Only need to compute this once + mfem::MFEM_FORALL(iqpts, m_npts, { + // for(size_t iqpts = 0; iqpts < m_npts; iqpts++) { + const auto dpeff = &history_data[iqpts * vdim + dpeff_offset]; + const auto gdots = &history_data[iqpts * vdim + gdot_offset]; + auto lattice_tayfac_dpeff = &lattice_tayfac_dpeffs[iqpts * 2]; + double abs_gdot = 0.0; + for (size_t islip = 0; islip < gdot_length; islip++) { + abs_gdot += fabs(gdots[islip]); + } + lattice_tayfac_dpeff[0] = (fabs(*dpeff) <= 1.0e-14) ? 0.0 : (abs_gdot / *dpeff); + lattice_tayfac_dpeff[1] = *dpeff; + }); + + for (const auto& in_fiber_hkl : m_in_fibers){ + mfem::Vector lattice_tayfac_dpeff_hkl(2); + [[maybe_unused]] double _ = exaconstit::kernel::ComputeVolAvgTensorFilter(m_pfes, &m_workspace, &in_fiber_hkl, lattice_tayfac_dpeff_hkl, 2, m_class_device); + lattice_tay_facs.push_back(lattice_tayfac_dpeff_hkl(0)); + lattice_dpeff.push_back(lattice_tayfac_dpeff_hkl(1)); + } +} + + +template +void +LightUp::calc_lattice_directional_stiffness(const mfem::QuadratureFunction& history, + const mfem::QuadratureFunction& stress, + const size_t strain_offset, + const size_t quats_offset, + const size_t rel_vol_offset, + std::vector> &lattice_dir_stiff) +{ + + const size_t vdim = history.GetVDim(); + const auto history_data = history.Read(); + const auto stress_data = stress.Read(); + m_workspace = 0.0; + auto lattice_directional_stiffness = m_workspace.Write(); + + // Only need to compute this once + mfem::MFEM_FORALL(iqpts, m_npts, { + // for(size_t iqpts = 0; iqpts < m_npts; iqpts++) { + const auto strain_lat = &history_data[iqpts * vdim + strain_offset]; + const auto quats = &history_data[iqpts * vdim + quats_offset]; + const auto rel_vol = history_data[iqpts * vdim + rel_vol_offset]; + const auto stress_l = &stress_data[iqpts * 6]; + auto lds = &lattice_directional_stiffness[iqpts * 3]; + + double strain[6] = {}; + { + double strainm[3 * 3] = {}; + double* strain_m[3] = {&strainm[0], &strainm[3], &strainm[6]}; + const double t1 = ecmech::sqr2i * strain_lat[0]; + const double t2 = ecmech::sqr6i * strain_lat[1]; + // + // Volume strain is ln(V^e_mean) term aka ln(relative volume) + // Our plastic deformation has a det(1) aka no change in volume change + const double elas_vol_strain = log(rel_vol); + // We output elastic strain formulation such that the relationship + // between V^e and \varepsilon is just V^e = I + \varepsilon + strain_m[0][0] = (t1 - t2) + elas_vol_strain; // 11 + strain_m[1][1] = (-t1 - t2) + elas_vol_strain ; // 22 + strain_m[2][2] = ecmech::sqr2b3 * strain_lat[1] + elas_vol_strain; // 33 + strain_m[1][2] = ecmech::sqr2i * strain_lat[4]; // 23 + strain_m[2][0] = ecmech::sqr2i * strain_lat[3]; // 31 + strain_m[0][1] = ecmech::sqr2i * strain_lat[2]; // 12 + + strain_m[2][1] = strain_m[1][2]; + strain_m[0][2] = strain_m[2][0]; + strain_m[1][0] = strain_m[0][1]; + + double rmat[3 * 3] = {}; + double strain_samp[3 * 3] = {}; + + quat2rmat(quats, rmat); + + snls::linalg::rotMatrix<3, false>(strainm, rmat, strain_samp); + + strain_m[0] = &strain_samp[0]; + strain_m[1] = &strain_samp[3]; + strain_m[2] = &strain_samp[6]; + + strain[0] = strain_m[0][0]; + strain[1] = strain_m[1][1]; + strain[2] = strain_m[2][2]; + strain[3] = strain_m[1][2]; + strain[4] = strain_m[0][2]; + strain[5] = strain_m[0][1]; + } + + for (size_t ipt = 0; ipt < 3; ipt++) { + lds[ipt] = (fabs(strain[ipt]) < 1e-12) ? 0.0 : (stress_l[ipt] / strain[ipt]); + } + }); + + for (const auto& in_fiber_hkl : m_in_fibers){ + mfem::Vector lattice_direct_stiff(3); + [[maybe_unused]] double _ = exaconstit::kernel::ComputeVolAvgTensorFilter(m_pfes, &m_workspace, &in_fiber_hkl, lattice_direct_stiff, 3, m_class_device); + std::array stiff_tmp; + for (size_t ipt = 0; ipt < 3; ipt++) { + stiff_tmp[ipt] = lattice_direct_stiff(ipt); + } + lattice_dir_stiff.push_back(stiff_tmp); + } +} + +using LightUpCubic = LightUp; \ No newline at end of file diff --git a/src/mechanics_model.cpp b/src/mechanics_model.cpp index 07b9723..bd481c4 100644 --- a/src/mechanics_model.cpp +++ b/src/mechanics_model.cpp @@ -736,7 +736,7 @@ void ExaModel::CalcLogStrain(DenseMatrix& E, const DenseMatrix &F) DenseMatrix B; - int dim = 3; + constexpr int dim = 3; B.SetSize(dim); // F.SetSize(dim); diff --git a/src/mechanics_operator.cpp b/src/mechanics_operator.cpp index 46ea230..0c8ff05 100644 --- a/src/mechanics_operator.cpp +++ b/src/mechanics_operator.cpp @@ -7,9 +7,96 @@ #include "RAJA/RAJA.hpp" #include "ECMech_const.h" #include +#include +#include using namespace mfem; +namespace { + +struct ModelOptions { + mfem::QuadratureFunction *q_stress0; + mfem::QuadratureFunction *q_stress1; + mfem::QuadratureFunction *q_matGrad; + mfem::QuadratureFunction *q_matVars0; + mfem::QuadratureFunction *q_matVars1; + mfem::QuadratureFunction *q_defGrad0; + mfem::ParGridFunction* beg_coords; + mfem::ParGridFunction* end_coords; + mfem::Vector *props; + int nProps; + int nStateVars; + mfem::ParFiniteElementSpace* fes; + double temp_k; + ecmech::ExecutionStrategy accel; + std::string mat_model_name; + Assembly assembly; +}; + +ExaModel* makeMatModelUMAT(const ModelOptions & mod_options) { + ExaModel* matModel = nullptr; + + auto umat = new AbaqusUmatModel( + mod_options.q_stress0, + mod_options.q_stress1, + mod_options.q_matGrad, + mod_options.q_matVars0, + mod_options.q_matVars1, + mod_options.q_defGrad0, + mod_options.beg_coords, + mod_options.end_coords, + mod_options.props, + mod_options.nProps, + mod_options.nStateVars, + mod_options.fes, + mod_options.assembly + ); + matModel = dynamic_cast(umat); + + return matModel; +} + +ExaModel* makeMatModelExaCMech(const ModelOptions & mod_options) { + ExaModel* matModel = nullptr; + + auto ecmech = new ExaCMechModel( + mod_options.q_stress0, + mod_options.q_stress1, + mod_options.q_matGrad, + mod_options.q_matVars0, + mod_options.q_matVars1, + mod_options.beg_coords, + mod_options.end_coords, + mod_options.props, + mod_options.nProps, + mod_options.nStateVars, + mod_options.temp_k, + mod_options.accel, + mod_options.assembly, + mod_options.mat_model_name + ); + matModel = dynamic_cast(ecmech); + return matModel; +} + +ExaModel* makeMatModel(const ExaOptions &sim_options, const ModelOptions & mod_options) { + ExaModel* matModel = nullptr; + + if (sim_options.mech_type == MechType::UMAT) { + matModel = makeMatModelUMAT(mod_options); + } + else if (sim_options.mech_type == MechType::EXACMECH) { + matModel = makeMatModelExaCMech(mod_options); + } + + if (matModel == nullptr) { + MFEM_ABORT("Somehow you managed to ask for a material model that can't be created..."); + } + + return matModel; +} +} + NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes, Array &ess_bdr, @@ -46,167 +133,45 @@ NonlinearMechOperator::NonlinearMechOperator(ParFiniteElementSpace &fes, assembly = options.assembly; - if (options.mech_type == MechType::UMAT) { - // Our class will initialize our deformation gradients and - // our local shape function gradients which are taken with respect - // to our initial mesh when 1st created. - model = new AbaqusUmatModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1, - &q_kinVars0, &beg_crds, &end_crds, - &matProps, options.nProps, nStateVars, &fes, assembly); - - // Add the user defined integrator - if (options.integ_type == IntegrationType::FULL) { - Hform->AddDomainIntegrator(new ExaNLFIntegrator(dynamic_cast(model))); - } - else if (options.integ_type == IntegrationType::BBAR) { - Hform->AddDomainIntegrator(new ICExaNLFIntegrator(dynamic_cast(model))); - } + auto mod_options = ModelOptions{}; + mod_options.q_stress0 = &q_sigma0; + mod_options.q_stress1 = &q_sigma1; + mod_options.q_matGrad = &q_matGrad; + mod_options.q_matVars0 = &q_matVars0; + mod_options.q_matVars1 = &q_matVars1; + mod_options.q_defGrad0 = &q_kinVars0; + mod_options.beg_coords = &beg_crds; + mod_options.end_coords = &end_crds; + mod_options.props = &matProps; + mod_options.nProps = options.nProps; + mod_options.nStateVars = nStateVars; + mod_options.fes = &fes; + mod_options.temp_k = options.temp_k; + mod_options.assembly = assembly; + mod_options.mat_model_name = options.shortcut; - } - else if (options.mech_type == MechType::EXACMECH) { - // Time to go through a nice switch field to pick out the correct model to be run... - // Should probably figure a better way to do this in the future so this doesn't become - // one giant switch yard. Multiphase materials will probably require a complete revamp of things... - // First we check the xtal symmetry type - ecmech::ExecutionStrategy accel = ecmech::ExecutionStrategy::CPU; + { + mod_options.accel = ecmech::ExecutionStrategy::CPU; if (options.rtmodel == RTModel::CPU) { - accel = ecmech::ExecutionStrategy::CPU; + mod_options.accel = ecmech::ExecutionStrategy::CPU; } else if (options.rtmodel == RTModel::OPENMP) { - accel = ecmech::ExecutionStrategy::OPENMP; + mod_options.accel = ecmech::ExecutionStrategy::OPENMP; } else if (options.rtmodel == RTModel::GPU) { - accel = ecmech::ExecutionStrategy::GPU; + mod_options.accel = ecmech::ExecutionStrategy::GPU; } + } - if (options.xtal_type == XtalType::FCC) { - // Now we find out what slip kinetics and hardening law were chosen. - if (options.slip_type == SlipType::POWERVOCE) { - // Our class will initialize our deformation gradients and - // our local shape function gradients which are taken with respect - // to our initial mesh when 1st created. - model = new VoceFCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1, - &beg_crds, &end_crds, - &matProps, options.nProps, nStateVars, options.temp_k, accel, - assembly); - - // Add the user defined integrator - if (options.integ_type == IntegrationType::FULL) { - Hform->AddDomainIntegrator(new ExaNLFIntegrator(dynamic_cast(model))); - } - else if (options.integ_type == IntegrationType::BBAR) { - Hform->AddDomainIntegrator(new ICExaNLFIntegrator(dynamic_cast(model))); - } - } - else if (options.slip_type == SlipType::POWERVOCENL) { - // Our class will initialize our deformation gradients and - // our local shape function gradients which are taken with respect - // to our initial mesh when 1st created. - model = new VoceNLFCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1, - &beg_crds, &end_crds, - &matProps, options.nProps, nStateVars, options.temp_k, accel, - assembly); - - // Add the user defined integrator - if (options.integ_type == IntegrationType::FULL) { - Hform->AddDomainIntegrator(new ExaNLFIntegrator(dynamic_cast(model))); - } - else if (options.integ_type == IntegrationType::BBAR) { - Hform->AddDomainIntegrator(new ICExaNLFIntegrator(dynamic_cast(model))); - } - } - else if (options.slip_type == SlipType::MTSDD) { - // Our class will initialize our deformation gradients and - // our local shape function gradients which are taken with respect - // to our initial mesh when 1st created. - model = new KinKMBalDDFCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1, - &beg_crds, &end_crds, - &matProps, options.nProps, nStateVars, options.temp_k, accel, - assembly); - - // Add the user defined integrator - if (options.integ_type == IntegrationType::FULL) { - Hform->AddDomainIntegrator(new ExaNLFIntegrator(dynamic_cast(model))); - } - else if (options.integ_type == IntegrationType::BBAR) { - Hform->AddDomainIntegrator(new ICExaNLFIntegrator(dynamic_cast(model))); - } - } - } - else if (options.xtal_type == XtalType::HCP) { - if (options.slip_type == SlipType::MTSDD) { - // Our class will initialize our deformation gradients and - // our local shape function gradients which are taken with respect - // to our initial mesh when 1st created. - model = new KinKMBalDDHCPModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1, - &beg_crds, &end_crds, - &matProps, options.nProps, nStateVars, options.temp_k, accel, - assembly); - - // Add the user defined integrator - if (options.integ_type == IntegrationType::FULL) { - Hform->AddDomainIntegrator(new ExaNLFIntegrator(dynamic_cast(model))); - } - else if (options.integ_type == IntegrationType::BBAR) { - Hform->AddDomainIntegrator(new ICExaNLFIntegrator(dynamic_cast(model))); - } - } - } - else if (options.xtal_type == XtalType::BCC) { - // Now we find out what slip kinetics and hardening law were chosen. - if (options.slip_type == SlipType::POWERVOCE) { - // Our class will initialize our deformation gradients and - // our local shape function gradients which are taken with respect - // to our initial mesh when 1st created. - model = new VoceBCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1, - &beg_crds, &end_crds, - &matProps, options.nProps, nStateVars, options.temp_k, accel, - assembly); - - // Add the user defined integrator - if (options.integ_type == IntegrationType::FULL) { - Hform->AddDomainIntegrator(new ExaNLFIntegrator(dynamic_cast(model))); - } - else if (options.integ_type == IntegrationType::BBAR) { - Hform->AddDomainIntegrator(new ICExaNLFIntegrator(dynamic_cast(model))); - } - } - else if (options.slip_type == SlipType::POWERVOCENL) { - // Our class will initialize our deformation gradients and - // our local shape function gradients which are taken with respect - // to our initial mesh when 1st created. - model = new VoceNLBCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1, - &beg_crds, &end_crds, - &matProps, options.nProps, nStateVars, options.temp_k, accel, - assembly); - - // Add the user defined integrator - if (options.integ_type == IntegrationType::FULL) { - Hform->AddDomainIntegrator(new ExaNLFIntegrator(dynamic_cast(model))); - } - else if (options.integ_type == IntegrationType::BBAR) { - Hform->AddDomainIntegrator(new ICExaNLFIntegrator(dynamic_cast(model))); - } - } - else if (options.slip_type == SlipType::MTSDD) { - // Our class will initialize our deformation gradients and - // our local shape function gradients which are taken with respect - // to our initial mesh when 1st created. - model = new KinKMbalDDBCCModel(&q_sigma0, &q_sigma1, &q_matGrad, &q_matVars0, &q_matVars1, - &beg_crds, &end_crds, - &matProps, options.nProps, nStateVars, options.temp_k, accel, - assembly); - - // Add the user defined integrator - if (options.integ_type == IntegrationType::FULL) { - Hform->AddDomainIntegrator(new ExaNLFIntegrator(dynamic_cast(model))); - } - else if (options.integ_type == IntegrationType::BBAR) { - Hform->AddDomainIntegrator(new ICExaNLFIntegrator(dynamic_cast(model))); - } - } - } + model = makeMatModel(options, mod_options); + + // Add the user defined integrator + if (options.integ_type == IntegrationType::FULL) { + Hform->AddDomainIntegrator(new ExaNLFIntegrator(dynamic_cast(model))); + } + else if (options.integ_type == IntegrationType::BBAR) { + Hform->AddDomainIntegrator(new ICExaNLFIntegrator(dynamic_cast(model))); } if (assembly == Assembly::PA) { @@ -276,12 +241,19 @@ ExaModel *NonlinearMechOperator::GetModel() const return model; } -void NonlinearMechOperator::UpdateEssTDofs(const Array &ess_bdr) +void NonlinearMechOperator::UpdateEssTDofs(const Array &ess_bdr, bool mono_def_flag) { - // Set the essential boundary conditions - Hform->SetEssentialBC(ess_bdr, ess_bdr_comps, nullptr); - // Set the essential boundary conditions that we can store on our class - SetEssentialBC(ess_bdr, ess_bdr_comps, nullptr); + if (mono_def_flag) { + Hform->SetEssentialTrueDofs(ess_bdr); + ess_tdof_list = ess_bdr; + } + else { + // Set the essential boundary conditions + Hform->SetEssentialBC(ess_bdr, ess_bdr_comps, nullptr); + auto tmp = Hform->GetEssentialTrueDofs(); + // Set the essential boundary conditions that we can store on our class + SetEssentialBC(ess_bdr, ess_bdr_comps, nullptr); + } } // compute: y = H(x,p) @@ -336,14 +308,31 @@ void NonlinearMechOperator::Setup(const Vector &k) const // Everything else that we need should live on the class. // Within this function the model just needs to produce the Cauchy stress // and the material tangent matrix (d \sigma / d Vgrad_{sym}) - if (mech_type == MechType::UMAT) { - model->ModelSetup(nqpts, nelems, space_dims, ndofs, el_jac, qpts_dshape, k); + bool succeed_t = false; + bool succeed = false; + try{ + if (mech_type == MechType::UMAT) { + model->ModelSetup(nqpts, nelems, space_dims, ndofs, el_jac, qpts_dshape, k); + } + else { + // Takes in k vector and transforms into into our E-vector array + P->Mult(k, px); + elem_restrict_lex->Mult(px, el_x); + model->ModelSetup(nqpts, nelems, space_dims, ndofs, el_jac, qpts_dshape, el_x); + } + succeed_t = true; } - else { - // Takes in k vector and transforms into into our E-vector array - P->Mult(k, px); - elem_restrict_lex->Mult(px, el_x); - model->ModelSetup(nqpts, nelems, space_dims, ndofs, el_jac, qpts_dshape, el_x); + catch(const std::exception &exc) { + // catch anything thrown within try block that derives from std::exception + MFEM_WARNING(exc.what()); + succeed_t = false; + } + catch(...) { + succeed_t = false; + } + MPI_Allreduce(&succeed_t, &succeed, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD); + if (!succeed) { + throw std::runtime_error(std::string("Material model setup portion of code failed for at least one integration point.")); } } // End of model setup diff --git a/src/mechanics_operator.hpp b/src/mechanics_operator.hpp index 2221a47..2d027a7 100644 --- a/src/mechanics_operator.hpp +++ b/src/mechanics_operator.hpp @@ -86,7 +86,7 @@ class NonlinearMechOperator : public mfem::NonlinearForm void UpdateEndCoords(const mfem::Vector& vel) const; // Update the essential boundary conditions - void UpdateEssTDofs(const mfem::Array &ess_bdr); + void UpdateEssTDofs(const mfem::Array &ess_bdr, bool mono_def_flag); /// Get essential true dof list, if required const mfem::Array &GetEssTDofList(); diff --git a/src/mechanics_operator_ext.cpp b/src/mechanics_operator_ext.cpp index d82cabf..b9d6d82 100644 --- a/src/mechanics_operator_ext.cpp +++ b/src/mechanics_operator_ext.cpp @@ -92,7 +92,7 @@ void PANonlinearMechOperatorGradExt::Assemble() } } -void PANonlinearMechOperatorGradExt::AssembleDiagonal(Vector &diag) +void PANonlinearMechOperatorGradExt::AssembleDiagonal(Vector &diag) const { CALI_CXX_MARK_SCOPE("AssembleDiagonal"); Array &integrators = *oper_mech->GetDNFI(); @@ -225,7 +225,7 @@ void EANonlinearMechOperatorGradExt::Assemble() } } -void EANonlinearMechOperatorGradExt::AssembleDiagonal(Vector &diag) +void EANonlinearMechOperatorGradExt::AssembleDiagonal(Vector &diag) const { CALI_CXX_MARK_SCOPE("eaAssembleDiagonal"); diff --git a/src/mechanics_operator_ext.hpp b/src/mechanics_operator_ext.hpp index 3d3dedf..a4bcae3 100644 --- a/src/mechanics_operator_ext.hpp +++ b/src/mechanics_operator_ext.hpp @@ -23,7 +23,7 @@ class NonlinearMechOperatorExt : public mfem::Operator // Here we would assemble the diagonal of any matrix-like operation we might be // performing. - virtual void AssembleDiagonal(mfem::Vector &diag) = 0; + virtual void AssembleDiagonal(mfem::Vector &diag) const = 0; }; // We'll pass this on through the GetGradient method which can be used @@ -40,11 +40,11 @@ class PANonlinearMechOperatorGradExt : public NonlinearMechOperatorExt PANonlinearMechOperatorGradExt(mfem::NonlinearForm *_mech_operator, const mfem::Array &ess_tdofs); - virtual void Assemble(); - virtual void AssembleDiagonal(mfem::Vector &diag); + virtual void Assemble() override; + virtual void AssembleDiagonal(mfem::Vector &diag) const override; template void TMult(const mfem::Vector &x, mfem::Vector &y) const; - virtual void Mult(const mfem::Vector &x, mfem::Vector &y) const; + virtual void Mult(const mfem::Vector &x, mfem::Vector &y) const override; virtual void LocalMult(const mfem::Vector &x, mfem::Vector &y) const; virtual void MultVec(const mfem::Vector &x, mfem::Vector &y) const; }; @@ -63,9 +63,9 @@ class EANonlinearMechOperatorGradExt : public PANonlinearMechOperatorGradExt EANonlinearMechOperatorGradExt(mfem::NonlinearForm *_mech_operator, const mfem::Array &ess_tdofs); - void Assemble(); + void Assemble() override; - void AssembleDiagonal(mfem::Vector &diag); + virtual void AssembleDiagonal(mfem::Vector &diag) const override; // using PANonlinearMechOperatorGradExt::AssembleDiagonal; template void TMult(const mfem::Vector &x, mfem::Vector &y) const; diff --git a/src/mechanics_umat.cpp b/src/mechanics_umat.cpp index 5bfffec..57f2149 100644 --- a/src/mechanics_umat.cpp +++ b/src/mechanics_umat.cpp @@ -222,7 +222,7 @@ void AbaqusUmatModel::CalcLogStrainIncrement(DenseMatrix& dE, const DenseMatrix DenseMatrix F_hat, B_hat; - int dim = 3; + constexpr int dim = 3; F_hat.SetSize(dim); B_hat.SetSize(dim); @@ -255,7 +255,7 @@ void AbaqusUmatModel::CalcLogStrainIncrement(DenseMatrix& dE, const DenseMatrix // e = 1/2 (I - B^(-1)) = 1/2 (I - F(^-T)F^(-1)) void AbaqusUmatModel::CalcEulerianStrainIncr(DenseMatrix& dE, const DenseMatrix &Jpt) { - int dim = 3; + constexpr int dim = 3; DenseMatrix Fincr(Jpt, dim); DenseMatrix Finv(dim), Binv(dim); @@ -282,7 +282,7 @@ void AbaqusUmatModel::CalcLagrangianStrainIncr(DenseMatrix& dE, const DenseMatri { DenseMatrix C; - int dim = 3; + constexpr int dim = 3; double half = 1.0 / 2.0; @@ -342,8 +342,8 @@ void AbaqusUmatModel::ModelSetup(const int nqpts, const int nelems, const int sp int nstatv = numStateVars; double pnewdt = 10.0; // revisit this - double props[nprops]; // populate from the mat props vector wrapped by matProps on the base class - double statev[nstatv]; // populate from the state variables associated with this element/ip + mfem::Vector props(nprops); // populate from the mat props vector wrapped by matProps on the base class + mfem::Vector statev(nstatv); // populate from the state variables associated with this element/ip double rpl = 0.0; // volumetric heat generation per unit time, not considered double drpldt = 0.0; // variation of rpl wrt temperature set to 0.0 @@ -466,8 +466,8 @@ void AbaqusUmatModel::ModelSetup(const int nqpts, const int nelems, const int sp } // get state variables and material properties - GetElementStateVars(elemID, ipID, true, statev, nstatv); - GetMatProps(props); + GetElementStateVars(elemID, ipID, true, statev.HostReadWrite(), nstatv); + GetMatProps(props.HostReadWrite()); // get element stress and make sure ordering is ok double stressTemp[6]; @@ -532,10 +532,10 @@ void AbaqusUmatModel::ModelSetup(const int nqpts, const int nelems, const int sp // call c++ wrapper of umat routine - umat_call(&stress[0], &statev[0], &ddsdde[0], &sse, &spd, &scd, &rpl, + umat_call(&stress[0], statev.HostReadWrite(), &ddsdde[0], &sse, &spd, &scd, &rpl, ddsdt, drplde, &drpldt, &stran[0], &dstran[0], time, &deltaTime, &tempk, &dtemp, &predef, &dpred, &cmname, - &ndi, &nshr, &ntens, &nstatv, &props[0], &nprops, &coords[0], + &ndi, &nshr, &ntens, &nstatv, props.HostReadWrite(), &nprops, &coords[0], drot, &pnewdt, &celent, &dfgrd0[0], &dfgrd1[0], &noel, &npt, &layer, &kspt, &kstep, &kinc); @@ -572,7 +572,7 @@ void AbaqusUmatModel::ModelSetup(const int nqpts, const int nelems, const int sp SetElementStress(elemID, ipID, false, stressTemp2, ntens); // set the updated statevars - SetElementStateVars(elemID, ipID, false, statev, nstatv); + SetElementStateVars(elemID, ipID, false, statev.HostReadWrite(), nstatv); } } } diff --git a/src/option_parser.cpp b/src/option_parser.cpp index 084bfac..6574f7b 100644 --- a/src/option_parser.cpp +++ b/src/option_parser.cpp @@ -4,24 +4,15 @@ #include "TOML_Reader/toml.hpp" #include "mfem.hpp" #include "ECMech_cases.h" -#include "ECMech_evptnWrap.h" -#include "ECMech_const.h" #include #include +#include inline bool if_file_exists (const std::string& name) { std::ifstream f(name.c_str()); return f.good(); } -namespace { - typedef ecmech::evptn::matModel> - VoceBCCModel; - typedef ecmech::evptn::matModel> - VoceNLBCCModel; -} // my_id corresponds to the processor id. void ExaOptions::parse_options(int my_id) { @@ -113,7 +104,7 @@ void ExaOptions::get_properties() } if (grain_table.contains("grain_floc")) { - if (!if_file_exists(grain_map) and (mesh_type == MeshType::AUTO)) + if (grain_map.size() > 0 && !if_file_exists(grain_map) and (mesh_type == MeshType::AUTO)) { MFEM_ABORT("Grain file does not exist"); } @@ -143,6 +134,7 @@ void ExaOptions::get_bcs() const auto& table = toml::find(data, "BCs"); changing_bcs = toml::find_or(table, "changing_ess_bcs", false); + mono_def_flag = toml::find_or(table, "expt_mono_def_flag", false); vgrad_origin = toml::find_or>(table, "vgrad_origin", {}); vgrad_origin_flag = !vgrad_origin.empty(); @@ -366,122 +358,79 @@ void ExaOptions::get_model() if (ori_type != OriType::QUAT) { MFEM_ABORT("Properties.Grain.ori_type is not set to quaternion for use with an ExaCMech model."); - xtal_type = XtalType::NOTYPE; } grain_statevar_offset = ecmech::evptn::iHistLbQ; if(table.contains("ExaCMech")) { const auto& exacmech_table = toml::find(table, "ExaCMech"); - std::string _xtal_type = toml::find_or(exacmech_table, "xtal_type", ""); - std::string _slip_type = toml::find_or(exacmech_table, "slip_type", ""); - if ((_xtal_type == "fcc") || (_xtal_type == "FCC")) { - xtal_type = XtalType::FCC; - } - else if ((_xtal_type == "bcc") || (_xtal_type == "BCC")) { - xtal_type = XtalType::BCC; - } - else if ((_xtal_type == "hcp") || (_xtal_type == "HCP")) { - xtal_type = XtalType::HCP; - } - else { - MFEM_ABORT("Model.ExaCMech.xtal_type was not provided a valid type."); - xtal_type = XtalType::NOTYPE; - } - - if ((_slip_type == "mts") || (_slip_type == "MTS") || (_slip_type == "mtsdd") || (_slip_type == "MTSDD")) { - slip_type = SlipType::MTSDD; - if (xtal_type == XtalType::FCC) { - if (nProps != ecmech::matModelEvptn_FCC_B::nParams) { - MFEM_ABORT("Properties.Matl_Props.num_props needs " << ecmech::matModelEvptn_FCC_B::nParams << - " values for the MTSDD option and FCC option"); - } + shortcut = toml::find_or(exacmech_table, "shortcut", ""); + if (shortcut.size() == 0) { + std::string xtal_type = toml::find_or(exacmech_table, "xtal_type", ""); + std::string slip_type = toml::find_or(exacmech_table, "slip_type", ""); + shortcut = "evptn_"; + if ((xtal_type == "fcc") || (xtal_type == "FCC")) { + shortcut += "FCC_"; } - else if (xtal_type == XtalType::BCC) { - if (nProps != ecmech::matModelEvptn_BCC_A::nParams) { - MFEM_ABORT("Properties.Matl_Props.num_props needs " << ecmech::matModelEvptn_BCC_A::nParams << - " values for the MTSDD option and BCC option"); - } + else if ((xtal_type == "bcc") || (xtal_type == "BCC")) { + shortcut += "BCC_"; } - else if (xtal_type == XtalType::HCP) { - if (nProps != ecmech::matModelEvptn_HCP_A::nParams) { - MFEM_ABORT("Properties.Matl_Props.num_props needs " << ecmech::matModelEvptn_HCP_A::nParams << - " values for the MTSDD option and HCP option"); - } + else if ((xtal_type == "hcp") || (xtal_type == "HCP")) { + shortcut += "HCP_"; } else { - MFEM_ABORT("Model.ExaCMech.slip_type can not be MTS for BCC materials.") + MFEM_ABORT("Model.ExaCMech.xtal_type was not provided a valid type."); } - } - else if ((_slip_type == "powervoce") || (_slip_type == "PowerVoce") || (_slip_type == "POWERVOCE")) { - slip_type = SlipType::POWERVOCE; - if (xtal_type == XtalType::FCC) { - if (nProps != ecmech::matModelEvptn_FCC_A::nParams) { - MFEM_ABORT("Properties.Matl_Props.num_props needs " << ecmech::matModelEvptn_FCC_A::nParams << - " values for the PowerVoce option and FCC option"); + if ((slip_type == "mts") || (slip_type == "MTS") || (slip_type == "mtsdd") || (slip_type == "MTSDD")) { + if ((xtal_type == "hcp") || (xtal_type == "HCP")) { + shortcut += "A"; } - } - else if (xtal_type == XtalType::BCC) { - if (nProps != VoceBCCModel::nParams) { - MFEM_ABORT("Properties.Matl_Props.num_props needs " << VoceBCCModel::nParams << - " values for the PowerVoce option and BCC option"); + else { + shortcut += "B"; } - } - else { - MFEM_ABORT("Model.ExaCMech.slip_type can not be PowerVoce for HCP or BCC_112 materials.") } - } - else if ((_slip_type == "powervocenl") || (_slip_type == "PowerVoceNL") || (_slip_type == "POWERVOCENL")) { - slip_type = SlipType::POWERVOCENL; - if (xtal_type == XtalType::FCC) { - if (nProps != ecmech::matModelEvptn_FCC_AH::nParams) { - MFEM_ABORT("Properties.Matl_Props.num_props needs " << ecmech::matModelEvptn_FCC_AH::nParams << - " values for the PowerVoceNL option and FCC option"); + else if ((slip_type == "powervoce") || (slip_type == "PowerVoce") || (slip_type == "POWERVOCE")) { + if ((xtal_type == "hcp") || (xtal_type == "HCP")) { + MFEM_ABORT("Model.ExaCMech.slip_type can not be PowerVoce for HCP materials.") } + shortcut += "A"; } - else if (xtal_type == XtalType::BCC) { - if (nProps != VoceNLBCCModel::nParams) { - MFEM_ABORT("Properties.Matl_Props.num_props needs " << VoceNLBCCModel::nParams << - " values for the PowerVoceNL option and BCC option"); + else if ((slip_type == "powervocenl") || (slip_type == "PowerVoceNL") || (slip_type == "POWERVOCENL")) { + if ((xtal_type == "hcp") || (xtal_type == "HCP")) { + MFEM_ABORT("Model.ExaCMech.slip_type can not be PowerVoce for HCP materials.") } + shortcut += "AH"; } else { - MFEM_ABORT("Model.ExaCMech.slip_type can not be PowerVoceNL for HCP or BCC_112 materials.") + MFEM_ABORT("Model.ExaCMech.slip_type was not provided a valid type."); } } - else { - MFEM_ABORT("Model.ExaCMech.slip_type was not provided a valid type."); - slip_type = SlipType::NOTYPE; + + try { + [[maybe_unused]] auto unused = ecmech::makeMatModel(shortcut); + } catch(...) { + MFEM_ABORT("Model.ExaCMech.shortcut was not provided a valid name."); } - if (slip_type != SlipType::NOTYPE) { - if (xtal_type == XtalType::FCC) { - int num_state_vars_check = ecmech::matModelEvptn_FCC_A::numHist + ecmech::ne + 1 - 4; - if (numStateVars != num_state_vars_check) { - MFEM_ABORT("Properties.State_Vars.num_vars needs " << num_state_vars_check << " values for a " - "face cubic material when using an ExaCMech model. Note: the number of values for a quaternion " - "are not included in this count."); - } - } - else if (xtal_type == XtalType::BCC) { - // We'll probably need to modify this whenever we add support for the other BCC variations in - // here due to the change in number of slip systems. - int num_state_vars_check = ecmech::matModelEvptn_BCC_A::numHist + ecmech::ne + 1 - 4; - if (numStateVars != num_state_vars_check) { - MFEM_ABORT("Properties.State_Vars.num_vars needs " << num_state_vars_check << " values for a " - "body center cubic material when using an ExaCMech model. Note: the number of values for a quaternion " - "are not included in this count."); - } - } - else if (xtal_type == XtalType::HCP) { - int num_state_vars_check = ecmech::matModelEvptn_HCP_A::numHist + ecmech::ne + 1 - 4; - if (numStateVars != num_state_vars_check) { - MFEM_ABORT("Properties.State_Vars.num_vars needs " << num_state_vars_check << " values for a " - "hexagonal material when using an ExaCMech model. Note: the number of values for a quaternion " - "are not included in this count."); - } - } + auto index_map = ecmech::modelParamIndexMap(shortcut); + auto num_props_check = index_map["num_params"]; + auto num_state_vars_check = index_map["num_hist"] + ecmech::ne + 1 - 4; + + gdot_size = index_map["num_slip_system"]; + hard_size = index_map["num_hardening"]; + + + if (numStateVars != (int) num_state_vars_check) { + MFEM_ABORT("Properties.State_Vars.num_vars needs " << num_state_vars_check << " values for the given material choice" + "Note: the number of values for a quaternion " + "are not included in this count."); + } + + if (nProps != (int) num_props_check) { + MFEM_ABORT("Properties.Matl_Props.num_props needs " << num_props_check << " values for the given material choice" + "Note: the number of values for a quaternion " + "are not included in this count."); } } else { @@ -518,6 +467,7 @@ void ExaOptions::get_time_steps() MFEM_ABORT("dt_scale for auto time stepping needs to be between 0 and 1."); } dt_min = toml::find_or(auto_table, "dt_min", 1.0); + dt_max = toml::find_or(auto_table, "dt_max", std::numeric_limits::max()); t_final = toml::find_or(auto_table, "t_final", 1.0); dt_file = toml::find_or(auto_table, "auto_dt_file", "auto_dt_out.txt"); } @@ -563,11 +513,42 @@ void ExaOptions::get_visualizations() additional_avgs = _additional_avgs; std::string _avg_def_grad_fname = toml::find_or(table, "avg_def_grad_fname", "avg_def_grad.txt"); avg_def_grad_fname = _avg_def_grad_fname; + std::string _avg_euler_strain_fname = toml::find_or(table, "avg_euler_strain_fname", "avg_euler_strain.txt"); + avg_euler_strain_fname = _avg_euler_strain_fname; std::string _avg_pl_work_fname = toml::find_or(table, "avg_pl_work_fname", "avg_pl_work.txt"); avg_pl_work_fname = _avg_pl_work_fname; - std::string _avg_dp_tensor_fname = toml::find_or(table, "avg_dp_tensor_fname", "avg_dp_tensor.txt"); - avg_dp_tensor_fname = _avg_dp_tensor_fname; light_up = toml::find_or(table, "light_up", false); + if (light_up) { + + auto hkls = toml::find_or< std::vector> >(table, "light_up_hkl", {{}}); + + for (auto& hkl : hkls) { + std::array hkl_tmp = {hkl[0], hkl[1], hkl[2]}; + std::cout << "light-up hkls " << hkl_tmp[0] << " " << hkl_tmp[1] << " " << hkl_tmp[2] << std::endl; + light_hkls.push_back(hkl_tmp); + } + + light_dist_tol = toml::find_or(table, "light_dist_tol", {0.07}); + std::cout << "light-up distance tolerance " << light_dist_tol << std::endl; + auto s_dirs = toml::find_or>(table, "light_s_dir", {}); + + light_s_dir[0] = s_dirs[0]; + light_s_dir[1] = s_dirs[1]; + light_s_dir[2] = s_dirs[2]; + + std::cout << "light-up s direction " << light_s_dir[0] << " " << light_s_dir[1] << " " << light_s_dir[2] << std::endl; + + auto lparams = toml::find_or>(table, "lattice_params", {}); + + lattice_params[0] = lparams[0]; + lattice_params[1] = lparams[1]; + lattice_params[2] = lparams[2]; + + std::cout << "light-up lattice params " << lattice_params[0] << " " << lattice_params[1] << " " << lattice_params[2] << std::endl; + + lattice_basename = toml::find_or(table, "lattice_basename", "lattice_avg_"); + + } } // end of visualization parsing // From the toml file it finds all the values related to the Solvers @@ -780,7 +761,6 @@ void ExaOptions::print_options() std::cout << "Additional averages being computed" << std::endl; std::cout << "Average deformation gradient filename: " << avg_def_grad_fname << std::endl; std::cout << "Average plastic work filename: " << avg_pl_work_fname << std::endl; - std::cout << "Average plastic strain rate tensor filename: " << avg_dp_tensor_fname << std::endl; } else { @@ -789,6 +769,17 @@ void ExaOptions::print_options() std::cout << "Average stress filename: " << avg_stress_fname << std::endl; std::cout << "Light-up flag: " << light_up << std::endl; + if (light_up) { + for (auto& hkl : light_hkls) { + std::array hkl_tmp = {hkl[0], hkl[1], hkl[2]}; + std::cout << "light-up: hkls " << hkl_tmp[0] << " " << hkl_tmp[1] << " " << hkl_tmp[2] << std::endl; + } + std::cout << "light-up: distance tolerance " << light_dist_tol << std::endl; + std::cout << "light-up: s direction " << light_s_dir[0] << " " << light_s_dir[1] << " " << light_s_dir[2] << std::endl; + std::cout << "light-up: lattice params " << lattice_params[0] << " " << lattice_params[1] << " " << lattice_params[2] << std::endl; + std::cout << "light-up: lattice basename: " << lattice_basename << std::endl; + } + if (nl_solver == NLSolver::NR) { std::cout << "Nonlinear Solver is Newton Raphson" << std::endl; } @@ -852,29 +843,26 @@ void ExaOptions::print_options() std::cout << "UMAT" << std::endl; } else if (mech_type == MechType::EXACMECH) { - std::cout << "ExaCMech" << std::endl; - std::cout << "Crystal symmetry group is "; - if (xtal_type == XtalType::FCC) { - std::cout << "FCC" << std::endl; - } - else if (xtal_type == XtalType::BCC) { - std::cout << "BCC" << std::endl; - } - else if (xtal_type == XtalType::HCP) { - std::cout << "HCP" << std::endl; - } - std::cout << "Slip system and hardening model being used is "; + auto shortcut_delim = [](std::string & str, std::string delim) -> std::vector { + auto start = 0U; + auto end = str.find(delim); + std::vector sdelim; + while (end != std::string::npos) + { + sdelim.push_back(str.substr(start, end - start)); + start = end + delim.length(); + end = str.find(delim, start); + } + sdelim.push_back(str.substr(start, end - start)); + return sdelim; + }; - if (slip_type == SlipType::MTSDD) { - std::cout << "MTS slip like kinetics with dislocation density based hardening" << std::endl; - } - else if (slip_type == SlipType::POWERVOCE) { - std::cout << "Power law slip kinetics with a linear Voce hardening law" << std::endl; - } - else if (slip_type == SlipType::POWERVOCENL) { - std::cout << "Power law slip kinetics with a nonlinear Voce hardening law" << std::endl; - } + auto sdelim = shortcut_delim(shortcut, "_"); + + std::cout << "ExaCMech" << std::endl; + std::cout << "ExaCMech shortcut name: " << shortcut << std::endl; + std::cout << "Crystal symmetry group is " << sdelim[1] << std::endl; } std::cout << "Xtal Plasticity being used: " << cp << std::endl; @@ -904,6 +892,10 @@ void ExaOptions::print_options() std::cout << "Number of state variables: " << numStateVars << std::endl; std::cout << "State variable file location: " << state_file << std::endl; + if (mono_def_flag) { + std::cout << "Making use of experimental monotonic deformation BCs option" << std::endl; + } + for (const auto key: updateStep) { std::cout << "Starting on step " << key << " essential BCs values are:" << std::endl; diff --git a/src/option_parser.hpp b/src/option_parser.hpp index ee8c861..3a416ea 100644 --- a/src/option_parser.hpp +++ b/src/option_parser.hpp @@ -2,6 +2,7 @@ #ifndef option_parser_hpp #define option_parser_hpp +#include #include #include #include // for std::unordered_map @@ -33,6 +34,7 @@ class ExaOptions { double t_final; double dt; double dt_min; + double dt_max; double dt_scale; // We have a custom dt flag bool dt_cust; @@ -56,11 +58,16 @@ class ExaOptions { // average stress file name std::string avg_stress_fname; std::string avg_pl_work_fname; - std::string avg_dp_tensor_fname; std::string avg_def_grad_fname; + std::string avg_euler_strain_fname; bool additional_avgs; // light up values bool light_up = false; + std::vector> light_hkls = {}; + double light_dist_tol = 0.0; + double light_s_dir[3] = {}; + double lattice_params[3] = {}; + std::string lattice_basename = "lattice_avg_"; // newton input args double newton_rel_tol; @@ -86,10 +93,11 @@ class ExaOptions { // The type of mechanical interface that we'll be using MechType mech_type; - // The slip and hardening laws being used for ExaCMech - SlipType slip_type; - // Specify the xtal type we'll be using - used if ExaCMech is being used - XtalType xtal_type; + // shortcut name for the material we're using + std::string shortcut; + // gdot size is known now from option size + size_t gdot_size = 1; + size_t hard_size = 1; // Specify the temperature of the material double temp_k; @@ -133,6 +141,9 @@ class ExaOptions { bool vgrad_origin_flag = false; std::vector vgrad_origin; + // experimental flag option + bool mono_def_flag = false; + // Parse the TOML file for all of the various variables. // In other words this is our driver to get all of the values. void parse_options(int my_id); @@ -162,10 +173,6 @@ class ExaOptions { // Want all of these to be not set. If they aren't specified // then we want other things to fail in our driver file. mech_type = MechType::NOTYPE; - // The slip and hardening laws being used for ExaCMech - slip_type = SlipType::NOTYPE; - // Specify the xtal type we'll be using - used if ExaCMech is being used - xtal_type = XtalType::NOTYPE; // Specify the temperature of the material temp_k = 298.; @@ -198,13 +205,14 @@ class ExaOptions { avg_stress_fname = "avg_stress.txt"; avg_pl_work_fname = "avg_pl_work.txt"; avg_def_grad_fname = "avg_def_grad.txt"; - avg_dp_tensor_fname = "avg_dp_tensor.txt"; + avg_euler_strain_fname = "avg_euler_strain.txt"; additional_avgs = false; // Time step related parameters t_final = 1.0; dt = 1.0; dt_min = dt; + dt_max = dt; dt_cust = false; dt_auto = false; nsteps = 1; diff --git a/src/options.toml b/src/options.toml index e4771dd..b71f270 100644 --- a/src/options.toml +++ b/src/options.toml @@ -235,8 +235,22 @@ Version = "0.7.0" avg_def_grad_fname = "avg_def_grad.txt" # Optional - the file name for our plastic work file avg_pl_work_fname = "avg_pl_work.txt" - # Optional - the file name for our average plastic deformation rate file - avg_dp_tensor_fname = "avg_dp_tensor.txt" + # Optional - the file name for our average eulerian strain file + avg_euler_strain_fname = "avg_euler_strain.txt" + # Options to drive light_up type calculations in-situ in the code + light_up = false + # What HKL planes we want to do the light_up measurements on + light_up_hkl = [[1.0, 1.0, 1.0], + [2.0, 0.0, 0.0], + [2.0, 2.0, 0.0], + [3.0, 1.0, 1.0]] + # What tolerance in radians we want our measurements to be within for a given fiber + light_dist_tol = 0.0873 + # What our sample direction is for things + light_s_dir = [0.0, 0.0, 1.0] + # What our lattice spacing parameters are + lattice_params = [3.60, 3.60, 3.60] + lattice_basename = "lattice_avg_" [Solvers] # Option for how our assembly operation is conducted. Possible choices are # FULL, PA, EA diff --git a/src/system_driver.cpp b/src/system_driver.cpp index 4c5e244..a27b67e 100644 --- a/src/system_driver.cpp +++ b/src/system_driver.cpp @@ -6,6 +6,7 @@ #include "mechanics_kernels.hpp" #include "BCData.hpp" #include "BCManager.hpp" +#include "mechanics_lightup.hpp" #include #include @@ -21,6 +22,80 @@ void DirBdrFunc(int attr_id, Vector &y) bc.setDirBCs(y); } +namespace { + // Once again NVCC is the bain of my existence for not allowing + // valid code to run... + template + void min_max_helper(const int space_dim, + const size_t nnodes, + const T& class_device, + mfem::Vector* const nodes, + mfem::Vector& origin) + { + // Our nodes are by default saved in xxx..., yyy..., zzz... ordering rather + // than xyz, xyz, ... + // So, the below should get us a device reference that can be used. + const auto X = mfem::Reshape(nodes->Read(), nnodes, space_dim); + mfem::Vector min_origin(space_dim); min_origin = std::numeric_limits::max(); + mfem::Vector max_origin(space_dim); max_origin = -std::numeric_limits::max(); + + min_origin.HostReadWrite(); + max_origin.HostReadWrite(); + // We need to calculate the minimum point in the mesh to get the correct velocity gradient across + // the part. + RAJA::RangeSegment default_range(0, nnodes); + if (class_device == RTModel::CPU) { + for (int j = 0; j < space_dim; j++) { + RAJA::ReduceMin seq_min(std::numeric_limits::max()); + RAJA::ReduceMax seq_max(-std::numeric_limits::max()); + RAJA::forall(default_range, [ = ] (int i){ + seq_min.min(X(i, j)); + seq_max.max(X(i, j)); + }); + min_origin(j) = seq_min.get(); + max_origin(j) = seq_max.get(); + } + } +#if defined(RAJA_ENABLE_OPENMP) + if (class_device == RTModel::OPENMP) { + for (int j = 0; j < space_dim; j++) { + RAJA::ReduceMin omp_min(std::numeric_limits::max()); + RAJA::ReduceMax omp_max(-std::numeric_limits::max()); + RAJA::forall(default_range, [ = ] (int i){ + omp_min.min(X(i, j)); + omp_max.max(X(i, j)); + }); + min_origin(j) = omp_min.get(); + max_origin(j) = omp_max.get(); + } + } +#endif +#if defined(RAJA_ENABLE_CUDA) || defined(RAJA_ENABLE_HIP) + if (class_device == RTModel::GPU) { +#if defined(RAJA_ENABLE_CUDA) + using gpu_reduce = RAJA::cuda_reduce; + using gpu_policy = RAJA::cuda_exec<1024>; +#else + using gpu_reduce = RAJA::hip_reduce; + using gpu_policy = RAJA::hip_exec<1024>; +#endif + for (int j = 0; j < space_dim; j++) { + RAJA::ReduceMin gpu_min(std::numeric_limits::max()); + RAJA::ReduceMax gpu_max(-std::numeric_limits::max()); + RAJA::forall(default_range, [ = ] RAJA_DEVICE(int i){ + gpu_min.min(X(i, j)); + gpu_max.max(X(i, j)); + }); + min_origin(j) = gpu_min.get(); + max_origin(j) = gpu_max.get(); + } + } +#endif + MPI_Allreduce(min_origin.HostRead(), origin.HostReadWrite(), space_dim, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); + MPI_Allreduce(max_origin.HostRead(), &origin.HostReadWrite()[space_dim], space_dim, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); + }// End of finding max and min locations +} + SystemDriver::SystemDriver(ParFiniteElementSpace &fes, ExaOptions &options, QuadratureFunction &q_matVars0, @@ -36,26 +111,24 @@ SystemDriver::SystemDriver(ParFiniteElementSpace &fes, ParGridFunction &end_crds, Vector &matProps, int nStateVars) - : fe_space(fes), def_grad(q_kinVars0), evec(q_evec), vgrad_origin_flag(options.vgrad_origin_flag) + : fe_space(fes), mech_type(options.mech_type), class_device(options.rtmodel), + additional_avgs(options.additional_avgs), auto_time(options.dt_auto), + avg_stress_fname(options.avg_stress_fname), avg_pl_work_fname(options.avg_pl_work_fname), + avg_def_grad_fname(options.avg_def_grad_fname), + avg_euler_strain_fname(options.avg_euler_strain_fname), + vgrad_origin_flag(options.vgrad_origin_flag), mono_def_flag(options.mono_def_flag), + def_grad(q_kinVars0), evec(q_evec) { CALI_CXX_MARK_SCOPE("system_driver_init"); - auto_time = options.dt_auto; if (auto_time) { dt_min = options.dt_min; + dt_max = options.dt_max; dt_class = options.dt; dt_scale = options.dt_scale; auto_dt_fname = options.dt_file; } - mech_type = options.mech_type; - class_device = options.rtmodel; - avg_stress_fname = options.avg_stress_fname; - avg_pl_work_fname = options.avg_pl_work_fname; - avg_def_grad_fname = options.avg_def_grad_fname; - avg_dp_tensor_fname = options.avg_dp_tensor_fname; - additional_avgs = options.additional_avgs; - const int space_dim = fe_space.GetParMesh()->SpaceDimension(); // set the size of the essential boundary conditions attribute array ess_bdr["total"] = mfem::Array(); @@ -102,6 +175,70 @@ SystemDriver::SystemDriver(ParFiniteElementSpace &fes, nStateVars); model = mech_operator->GetModel(); + if (options.light_up) { + light_up = new LightUpCubic(options.light_hkls, + options.light_dist_tol, + options.light_s_dir, + &fe_space, + def_grad.GetSpace(), + *model->GetQFMapping(), + options.rtmodel, + options.lattice_basename, + options.lattice_params); + } + + if (mono_def_flag) + { + const auto nodes = fe_space.GetParMesh()->GetNodes(); + const int space_dim = fe_space.GetParMesh()->SpaceDimension(); + const int nnodes = nodes->Size() / space_dim; + Vector origin(space_dim * 2, mfem::Device::GetMemoryType()); origin.UseDevice(true); origin = 0.0; + // Just scoping variable usage so we can reuse variables if we'd want to + // CUDA once again is limiting us from writing normal C++ + // code so had to move to a helper function for this part... + min_max_helper(space_dim, nnodes, class_device, nodes, origin); + + mfem::Array ess_vdofs, ess_tdofs, ess_true_dofs; + ess_vdofs.SetSize(fe_space.GetVSize()); + ess_vdofs = 0; + // We need to set the ess_vdofs doing something like ess_vdofs[i] = -1; + // However, the compiler thinks ess_vdofs is const when trying to do this in + // the later loop, so we turn to lambda fcns to do this so the compiler picks + // the right mfem::Array::operator[](int i) fcn. + auto f = [&ess_vdofs](int i) { ess_vdofs[i] = -1; }; + const auto X = mfem::Reshape(nodes->HostRead(), nnodes, space_dim); + // For this we would need to set up the true dofs at start of simulation + // before anything actually moves + // X's dofs would be at global min(x, z) + // Y's dofs would be at global min(x, y, z) + // Z's dofs would be at global min(z) | global max(z) + RAJA::RangeSegment default_range(0, nnodes); + RAJA::forall(default_range, [ = ] (int i) { + const double x_diff_min = std::abs(X(i, 0) - origin(0)); + const double y_diff_min = std::abs(X(i, 1) - origin(1)); + const double z_diff_min = std::abs(X(i, 2) - origin(2)); + const double z_diff_max = std::abs(X(i, 2) - origin(5)); + if (x_diff_min < 1e-12 && z_diff_min < 1e-12) { + auto dof = fe_space.DofToVDof(i, 0); + f(dof); + } + if (x_diff_min < 1e-12 && y_diff_min < 1e-12 && z_diff_min < 1e-12) { + auto dof = fe_space.DofToVDof(i, 1); + f(dof); + } + if (z_diff_min < 1e-12 || z_diff_max < 1e-12) { + auto dof = fe_space.DofToVDof(i, 2); + f(dof); + } + });//end loop over nodes + // Taken from mfem::FiniteElementSpace::GetEssentialTrueDofs(...) + fe_space.Synchronize(ess_vdofs); + fe_space.GetRestrictionMatrix()->BooleanMult(ess_vdofs, ess_tdofs); + fe_space.MarkerToList(ess_tdofs, ess_true_dofs); + mech_operator->UpdateEssTDofs(ess_true_dofs, mono_def_flag); + } + + MPI_Comm_rank(MPI_COMM_WORLD, &myid); ess_bdr_func = new mfem::VectorFunctionRestrictedCoefficient(space_dim, DirBdrFunc, ess_bdr["ess_vel"], ess_bdr_scale); @@ -119,7 +256,7 @@ SystemDriver::SystemDriver(ParFiniteElementSpace &fes, HYPRE_Real rt_val = -10.0; // HYPRE_Real om_val = 1.0; // - int ml = HYPRE_BoomerAMGSetMaxLevels(h_amg, 30); + [[maybe_unused]] int ml = HYPRE_BoomerAMGSetMaxLevels(h_amg, 30); ml = HYPRE_BoomerAMGSetCoarsenType(h_amg, 0); ml = HYPRE_BoomerAMGSetMeasureType(h_amg, 0); ml = HYPRE_BoomerAMGSetStrongThreshold(h_amg, st_val); @@ -129,15 +266,13 @@ SystemDriver::SystemDriver(ParFiniteElementSpace &fes, // int ro = HYPRE_BoomerAMGSetOuterWt(h_amg, om_val); // Dimensionality of our problem ml = HYPRE_BoomerAMGSetNumFunctions(h_amg, 3); - ml = HYPRE_BoomerAMGSetSmoothType(h_amg, 3); + ml = HYPRE_BoomerAMGSetSmoothType(h_amg, 6); ml = HYPRE_BoomerAMGSetSmoothNumLevels(h_amg, 3); ml = HYPRE_BoomerAMGSetSmoothNumSweeps(h_amg, 3); ml = HYPRE_BoomerAMGSetVariant(h_amg, 0); ml = HYPRE_BoomerAMGSetOverlap(h_amg, 0); ml = HYPRE_BoomerAMGSetDomainType(h_amg, 1); ml = HYPRE_BoomerAMGSetSchwarzRlxWeight(h_amg, rt_val); - // Just to quite the compiler warnings... - ml++; prec_amg->SetPrintLevel(0); J_prec = prec_amg; @@ -232,11 +367,27 @@ void SystemDriver::Solve(Vector &x) // We provide an initial guess for what our current coordinates will look like // based on what our last time steps solution was for our velocity field. // The end nodes are updated before the 1st step of the solution here so we're good. - newton_solver->Mult(zero, x); - if (!newton_solver->GetConverged()) + bool succeed_t = false; + bool succeed = false; + try{ + newton_solver->Mult(zero, x); + succeed_t = newton_solver->GetConverged(); + } + catch(const std::exception &exc) { + // catch anything thrown within try block that derives from std::exception + MFEM_WARNING(exc.what()); + succeed_t = false; + } + catch(...) { + MFEM_WARNING("An unknown exception was thrown in Krylov solver step"); + succeed_t = false; + } + MPI_Allreduce(&succeed_t, &succeed, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD); + + if (!succeed) { int iter = 0; - while (!newton_solver->GetConverged() && (iter < 2)) { + while (!succeed && (iter < 4)) { if (myid == 0) { MFEM_WARNING("Solution did not converge decreasing dt by input scale factor"); } @@ -245,7 +396,15 @@ void SystemDriver::Solve(Vector &x) dt_class *= dt_scale; if (dt_class < dt_min) { dt_class = dt_min; } SetDt(dt_class); - newton_solver->Mult(zero, x); + try{ + newton_solver->Mult(zero, x); + succeed_t = newton_solver->GetConverged(); + } + catch (...) { + succeed_t = false; + } + MPI_Allreduce(&succeed_t, &succeed, 1, MPI_C_BOOL, MPI_LAND, MPI_COMM_WORLD); + iter += 1; } // Do final converge check outside of this while loop const double old_time = solVars.GetTime(); @@ -269,6 +428,7 @@ void SystemDriver::Solve(Vector &x) const double factor = niter_scale / nr_iter; dt_class *= factor; if (dt_class < dt_min) { dt_class = dt_min; } + if (dt_class > dt_max) { dt_class = dt_max; } if (myid == 0 && newton_solver->GetConverged()) { std::cout << "Time "<< solVars.GetTime() << " dt old was " << solVars.GetDTime() << " dt has been updated to " << dt_class << " and changed by a factor of " << factor << std::endl; } @@ -319,8 +479,10 @@ void SystemDriver::SolveInit(const Vector &xprev, Vector &x) const } void SystemDriver::UpdateEssBdr() { - BCManager::getInstance().updateBCData(ess_bdr, ess_bdr_scale, ess_velocity_gradient, ess_bdr_component); - mech_operator->UpdateEssTDofs(ess_bdr["total"]); + if (!mono_def_flag) { + BCManager::getInstance().updateBCData(ess_bdr, ess_bdr_scale, ess_velocity_gradient, ess_bdr_component); + mech_operator->UpdateEssTDofs(ess_bdr["total"], mono_def_flag); + } } // In the current form, we could honestly probably make use of velocity as our working array @@ -337,7 +499,7 @@ void SystemDriver::UpdateVelocity(mfem::ParGridFunction &velocity, mfem::Vector if (ess_bdr["ess_vgrad"].Sum() > 0) { - // Just scoping variable useage so we can reuse variables if we'd want to + // Just scoping variable usage so we can reuse variables if we'd want to { const auto nodes = fe_space.GetParMesh()->GetNodes(); const int space_dim = fe_space.GetParMesh()->SpaceDimension(); @@ -350,7 +512,7 @@ void SystemDriver::UpdateVelocity(mfem::ParGridFunction &velocity, mfem::Vector const auto VGRAD = mfem::Reshape(ess_velocity_gradient.Read(), space_dim, space_dim); velocity = 0.0; auto VT = mfem::Reshape(velocity.ReadWrite(), nnodes, space_dim); - + if (!vgrad_origin_flag) { vgrad_origin.HostReadWrite(); // We need to calculate the minimum point in the mesh to get the correct velocity gradient across @@ -359,7 +521,7 @@ void SystemDriver::UpdateVelocity(mfem::ParGridFunction &velocity, mfem::Vector if (class_device == RTModel::CPU) { for (int j = 0; j < space_dim; j++) { RAJA::ReduceMin seq_min(std::numeric_limits::max()); - RAJA::forall(default_range, [ = ] (int i){ + RAJA::forall(default_range, [ = ] (int i){ seq_min.min(X(i, j)); }); vgrad_origin(j) = seq_min.get(); @@ -414,8 +576,9 @@ void SystemDriver::UpdateVelocity(mfem::ParGridFunction &velocity, mfem::Vector velocity.GetTrueDofs(vel_tdof_tmp); mfem::Array ess_tdofs(mech_operator->GetEssentialTrueDofs()); - fe_space.GetEssentialTrueDofs(ess_bdr["ess_vgrad"], ess_tdofs, ess_bdr_component["ess_vgrad"]); - + if (!mono_def_flag) { + fe_space.GetEssentialTrueDofs(ess_bdr["ess_vgrad"], ess_tdofs, ess_bdr_component["ess_vgrad"]); + } auto I = ess_tdofs.Read(); auto size = ess_tdofs.Size(); auto Y = vel_tdofs.ReadWrite(); @@ -517,44 +680,54 @@ void SystemDriver::UpdateModel() file.open(avg_def_grad_fname, std::ios_base::app); dgrad.Print(file, dgrad.Size()); } - } + // Eulerian strain calculation + mfem::DenseMatrix estrain(3, 3); + { + mfem::DenseMatrix def_grad(dgrad.HostReadWrite(), 3, 3); + // Would be nice if we could just do this but maybe we should create more kernels for users... + // ExaModel::CalcEulerianStrain(estrain, def_grad); - if (mech_type == MechType::EXACMECH && additional_avgs) { - CALI_CXX_MARK_SCOPE("extra_avgs_dp_tensor_computation"); + /// Eulerian is simply e = 1/2(I - F^(-t)F^(-1)) + const int dim = 3; + mfem::DenseMatrix Finv(dim), Binv(dim); + double half = 1.0 / 2.0; - model->calcDpMat(def_grad); - const QuadratureFunction *qstate_var = &def_grad; - // Here we're getting the average stress value - Vector dgrad(qstate_var->GetVDim()); - dgrad = 0.0; + CalcInverse(def_grad, Finv); + MultAtB(Finv, Finv, Binv); - exaconstit::kernel::ComputeVolAvgTensor(fes, qstate_var, dgrad, dgrad.Size(), class_device); + estrain = 0.0; - std::cout.setf(std::ios::fixed); - std::cout.setf(std::ios::showpoint); - std::cout.precision(8); + for (int j = 0; j < dim; j++) { + for (int i = 0; i < dim; i++) { + estrain(i, j) -= half * Binv(i, j); + } + estrain(j, j) += half; + } + } - Vector dpgrad(6); - dpgrad(0) = dgrad(0); - dpgrad(1) = dgrad(4); - dpgrad(2) = dgrad(8); - dpgrad(3) = dgrad(5); - dpgrad(4) = dgrad(2); - dpgrad(5) = dgrad(1); + mfem::Vector euler_strain(6); + euler_strain(0) = estrain(0, 0); + euler_strain(1) = estrain(1, 1); + euler_strain(2) = estrain(2, 2); + euler_strain(3) = estrain(1, 2); + euler_strain(4) = estrain(0, 2); + euler_strain(5) = estrain(0, 1); - int my_id; - MPI_Comm_rank(MPI_COMM_WORLD, &my_id); - // Now we're going to save off the average dp tensor to a file + // Now we're going to save off the average stress tensor to a file if (my_id == 0) { std::ofstream file; - file.open(avg_dp_tensor_fname, std::ios_base::app); - dpgrad.Print(file, dpgrad.Size()); + file.open(avg_euler_strain_fname, std::ios_base::app); + euler_strain.Print(file, euler_strain.Size()); } } if(postprocessing) { CalcElementAvg(evec, model->GetMatVars0()); } + + if(light_up && (mech_type == MechType::EXACMECH)) { + light_up->calculate_lightup_data(*(model->GetMatVars0()), *(model->GetStress0())); + } } void SystemDriver::CalcElementAvg(mfem::Vector *elemVal, const mfem::QuadratureFunction *qf) @@ -895,9 +1068,12 @@ SystemDriver::~SystemDriver() { delete ess_bdr_func; delete J_solver; - if (J_prec != NULL) { + if (J_prec != nullptr) { delete J_prec; } + if (light_up != nullptr) { + delete light_up; + } delete newton_solver; delete mech_operator; } \ No newline at end of file diff --git a/src/system_driver.hpp b/src/system_driver.hpp index 0bf7bb5..ba19f47 100644 --- a/src/system_driver.hpp +++ b/src/system_driver.hpp @@ -24,6 +24,11 @@ class SimVars void SetLastStep(bool last) { last_step = last; } }; +class LatticeTypeCubic; +template +class LightUp; +using LightUpCubic = LightUp; + // The NonlinearMechOperator class is what really drives the entire system. // It's responsible for calling the Newton Rhapson solver along with several of // our post-processing steps. It also contains all of the relevant information @@ -54,16 +59,15 @@ class SystemDriver bool auto_time = false; double dt_class = 0.0; double dt_min = 0.0; + double dt_max = 0.0; double dt_scale = 1.0; - mfem::QuadratureFunction &def_grad; + std::string avg_stress_fname; std::string avg_pl_work_fname; std::string avg_def_grad_fname; - std::string avg_dp_tensor_fname; + std::string avg_euler_strain_fname; std::string auto_dt_fname; - mfem::QuadratureFunction *evec; - // define a boundary attribute array and initialize to 0 std::unordered_map > ess_bdr; mfem::Array2D ess_bdr_scale; @@ -75,6 +79,12 @@ class SystemDriver const bool vgrad_origin_flag = false; mfem::Vector vgrad_origin; + const bool mono_def_flag = false; + + LightUpCubic* light_up = nullptr; + + mfem::QuadratureFunction &def_grad; + mfem::QuadratureFunction *evec; public: SystemDriver(mfem::ParFiniteElementSpace &fes, diff --git a/src/umat_tests/umat.cxx b/src/umat_tests/umat.cxx index 466067a..920c822 100644 --- a/src/umat_tests/umat.cxx +++ b/src/umat_tests/umat.cxx @@ -20,16 +20,16 @@ #endif UMAT_API -void UMAT(real8 *stress, real8 *statev, real8 *ddsdde, +void UMAT(real8 * /* stress */, real8 * /* statev */, real8 *ddsdde, real8 *sse, real8 *spd, real8 *scd, real8 *rpl, - real8 *ddsdt, real8 *drplde, real8 *drpldt, - real8 *stran, real8 *dstran, real8 *time, - real8 *deltaTime, real8 *tempk, real8 *dtemp, real8 *predef, - real8 *dpred, real8 *cmname, int *ndi, int *nshr, int *ntens, - int *nstatv, real8 *props, int *nprops, real8 *coords, - real8 *drot, real8 *pnewdt, real8 *celent, - real8 *dfgrd0, real8 *dfgrd1, int *noel, int *npt, - int *layer, int *kspt, int *kstep, int *kinc) + real8 * /* ddsdt */, real8 *drplde, real8 *drpldt, + real8 * /* stran */, real8 * /* dstran */, real8 * /* time */, + real8 * /* deltaTime */, real8 * /* tempk */, real8 * /* dtemp */, real8 * /* predef */, + real8 * /* dpred */, real8 * /* cmname */, int * /* ndi */, int * /* nshr */, int * ntens, + int * /* nstatv */, real8 * /* props */, int * /* nprops */, real8 * /* coords */, + real8 * /* drot */, real8 * /* pnewdt */, real8 * /* celent */, + real8 * /* dfgrd0 */, real8 * /* dfgrd1 */, int * /* noel */, int * /* npt */, + int * /* layer */, int * /* kspt */, int * /* kstep */, int * /* kinc */) { sse[0] += 1.0; spd[0] += 1.0; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 66f86a4..917ad10 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -1,25 +1,44 @@ #------------------------------------------------------------------------------ # Dependencies #------------------------------------------------------------------------------ + set(EXACONSTIT_TEST_DEPENDS) exaconstit_fill_depends_list(LIST_NAME EXACONSTIT_TEST_DEPENDS - DEPENDS_ON mfem ecmech raja mpi snls) + DEPENDS_ON mfem ecmech RAJA mpi snls) -if(ENABLE_OPENMP) - list(APPEND EXACONSTIT_TEST_DEPENDS openmp) +if (${BLT_VERSION} VERSION_GREATER_EQUAL 0.6.0) + if(ENABLE_CUDA) + list(APPEND EXACONSTIT_TEST_DEPENDS blt::cuda_runtime blt::cuda CUDA::cublas) + endif() + if(ENABLE_OPENMP) + list(APPEND EXACONSTIT_TEST_DEPENDS blt::openmp) + endif() +else() + if(ENABLE_CUDA) + list(APPEND EXACONSTIT_TEST_DEPENDS cuda cuda_runtime CUDA::cublas) + endif() + if(ENABLE_OPENMP) + list(APPEND EXACONSTIT_TEST_DEPENDS openmp) + endif() endif() -if(ENABLE_CUDA) - list(APPEND EXACONSTIT_TEST_DEPENDS cuda) +if(ENABLE_HIP) + list(APPEND EXACONSTIT_TEST_DEPENDS blt::hip blt::hip_runtime hipblas rocsparse rocrand) endif() -if(ENABLE_HIP) - list(APPEND EXACONSTIT_TEST_DEPENDS blt::hip blt::hip_runtime) +if (SNLS_USE_RAJA_PORT_SUITE) + list(APPEND EXACONSTIT_TEST_DEPENDS chai umpire camp fmt::fmt) +endif() + +if(ENABLE_CALIPER) + list(APPEND EXACONSTIT_TEST_DEPENDS caliper) endif() list(APPEND EXACONSTIT_TEST_DEPENDS exaconstit_static) +message("-- EXACONSTIT_TEST_DEPENDS: ${EXACONSTIT_TEST_DEPENDS}") + blt_add_executable(NAME test_pa SOURCES mechanics_test.cpp OUTPUT_DIR ${TEST_OUTPUT_DIR} @@ -35,6 +54,11 @@ blt_add_executable(NAME test_grad_oper blt_add_test(NAME test_gradient_operation COMMAND test_grad_oper) + +if (NOT DEFINED PYTHON_EXECUTABLE) + message(FATAL_ERROR "cmake: PYTHON_EXECUTABLE must be defined if running test suite current value: ${PYTHON_EXECUTABLE}") +endif() + ## Borrowed from Conduit https://github.com/LLNL/conduit ## The license file can be found under ##------------------------------------------------------------------------------ diff --git a/test/data/voce_ea.toml b/test/data/voce_ea.toml index 0dd5866..de649bc 100644 --- a/test/data/voce_ea.toml +++ b/test/data/voce_ea.toml @@ -96,8 +96,8 @@ Version = "0.6.0" avg_def_grad_fname = "test_voce_ea_def_grad.txt" # Optional - the file name for our plastic work file avg_pl_work_fname = "test_voce_ea_pl_work.txt" - # Optional - the file name for our average plastic deformation rate file - avg_dp_tensor_fname = "test_voce_ea_dp_tensor.txt" + # Optional - the file name for our average eulerian strain file + avg_euler_strain_fname = "test_voce_ea_euler_strain.txt" [Solvers] # Option for how our assembly operation is conducted. Possible choices are # FULL, PA, EA diff --git a/test/data/voce_ea_cs.toml b/test/data/voce_ea_cs.toml index 4769a8a..621868f 100644 --- a/test/data/voce_ea_cs.toml +++ b/test/data/voce_ea_cs.toml @@ -98,7 +98,7 @@ Version = "0.6.0" additional_avgs = true avg_def_grad_fname = "test_voce_ea_cs_def_grad.txt" avg_pl_work_fname = "test_voce_ea_cs_pl_work.txt" - avg_dp_tensor_fname = "test_voce_ea_cs_dp_tensor.txt" + avg_euler_strain_fname = "test_voce_ea_cs_euler_strain.txt" [Solvers] # Option for how our assembly operation is conducted. Possible choices are # FULL, PA, EA diff --git a/test/data/voce_ea_cs_dp_tensor.txt b/test/data/voce_ea_cs_dp_tensor.txt deleted file mode 100644 index 3c747d2..0000000 --- a/test/data/voce_ea_cs_dp_tensor.txt +++ /dev/null @@ -1,40 +0,0 @@ -0 0 0 0 0 0 -0 0 0 0 0 0 --5.38581e-07 -8.47911e-07 1.38649e-06 8.73156e-08 2.60148e-08 1.75994e-07 --0.000136341 -0.000175994 0.000312335 -5.70658e-06 -6.2024e-06 1.75496e-06 --0.000338795 -0.000436589 0.000775383 -9.61354e-06 -1.7594e-05 -8.99596e-06 --0.000394006 -0.000514964 0.00090897 -6.80695e-06 -1.57556e-05 -1.65202e-05 --0.000406899 -0.000539936 0.000946835 -4.3804e-06 -1.34849e-05 -1.79593e-05 --0.000412107 -0.000549185 0.000961292 -2.8562e-06 -1.2247e-05 -1.71522e-05 --0.000415252 -0.000552968 0.00096822 -1.58205e-06 -1.16278e-05 -1.49155e-05 --0.000416864 -0.000555209 0.000972073 -5.58795e-07 -1.08824e-05 -1.24517e-05 --0.000417578 -0.000556923 0.000974501 1.7424e-07 -1.01776e-05 -1.05757e-05 --0.000418044 -0.000558083 0.000976127 7.15277e-07 -9.57615e-06 -8.90661e-06 --0.000418492 -0.000558757 0.00097725 1.01558e-06 -9.17412e-06 -7.66602e-06 --0.000418818 -0.000559223 0.00097804 1.14277e-06 -8.85417e-06 -7.16695e-06 --0.00041904 -0.000559543 0.000978583 1.21501e-06 -8.59461e-06 -7.09442e-06 --0.00041915 -0.000559816 0.000978967 1.25482e-06 -8.37673e-06 -7.10153e-06 --0.000419212 -0.000560036 0.000979248 1.26211e-06 -8.20321e-06 -7.11358e-06 --0.000419262 -0.000560187 0.000979449 1.26209e-06 -8.09109e-06 -7.19843e-06 --0.000419395 -0.000560205 0.0009796 1.27336e-06 -7.99619e-06 -7.29628e-06 --0.000419549 -0.000560177 0.000979726 1.31111e-06 -7.89199e-06 -7.46051e-06 --0.0004196 -0.000560233 0.000979833 1.37035e-06 -7.79122e-06 -7.73285e-06 --0.000419537 -0.000560385 0.000979922 1.43551e-06 -7.69793e-06 -8.02336e-06 --0.000419283 -0.000560677 0.00097996 1.39966e-06 -7.50281e-06 -8.46621e-06 --0.000419174 -0.000560895 0.000980069 1.1312e-06 -7.37094e-06 -8.76174e-06 --0.000419059 -0.000561097 0.000980155 8.6436e-07 -7.34611e-06 -8.87614e-06 --0.000418922 -0.000561311 0.000980232 8.09362e-07 -7.35151e-06 -8.91366e-06 --0.000418715 -0.000561596 0.000980311 9.15126e-07 -7.34345e-06 -9.04732e-06 --0.000418455 -0.000561933 0.000980388 1.08944e-06 -7.30285e-06 -9.216e-06 --0.000417752 -0.000562561 0.000980313 1.49948e-06 -7.23261e-06 -9.32057e-06 --0.00041704 -0.000563372 0.000980413 1.98557e-06 -7.18139e-06 -9.16016e-06 --0.000416384 -0.000564124 0.000980507 2.48142e-06 -7.23407e-06 -8.86977e-06 --0.000415861 -0.000564733 0.000980593 2.86196e-06 -7.38134e-06 -8.54578e-06 --0.000415709 -0.000565103 0.000980812 3.00723e-06 -7.42995e-06 -8.40691e-06 --0.000414958 -0.000565572 0.000980531 3.32663e-06 -7.2694e-06 -8.17583e-06 --0.000414553 -0.000566156 0.000980709 3.50609e-06 -7.03281e-06 -8.10256e-06 --0.000414124 -0.000566673 0.000980797 3.6708e-06 -6.80649e-06 -8.02682e-06 --0.000413504 -0.000567186 0.00098069 3.84579e-06 -6.47017e-06 -7.93867e-06 --0.000413099 -0.000567721 0.00098082 4.01546e-06 -6.25648e-06 -7.85517e-06 --0.000412971 -0.000567976 0.000980947 4.18625e-06 -6.11806e-06 -7.73856e-06 --0.000412987 -0.000568086 0.000981073 4.31263e-06 -5.97774e-06 -7.60589e-06 diff --git a/test/data/voce_ea_cs_euler_strain.txt b/test/data/voce_ea_cs_euler_strain.txt new file mode 100644 index 0000000..f871595 --- /dev/null +++ b/test/data/voce_ea_cs_euler_strain.txt @@ -0,0 +1,40 @@ +-1.63349e-06 -1.77848e-06 4.99996e-06 1.10839e-09 -9.90119e-08 -7.40345e-09 +-6.53528e-05 -7.12165e-05 0.000199941 4.13525e-09 -3.98395e-06 -2.68166e-07 +-0.000101836 -0.000113555 0.000299886 -6.06486e-07 -6.13847e-06 -4.72083e-07 +-0.000143207 -0.000165133 0.000399812 -2.06942e-06 -8.55416e-06 -1.21935e-06 +-0.000185479 -0.000219657 0.000499717 -3.16458e-06 -1.02297e-05 -2.25114e-06 +-0.000227822 -0.000275286 0.000599602 -3.7753e-06 -1.14209e-05 -3.43117e-06 +-0.000270278 -0.00033131 0.000699467 -4.14131e-06 -1.2439e-05 -4.63733e-06 +-0.000312858 -0.000387489 0.000799313 -4.35842e-06 -1.34151e-05 -5.74948e-06 +-0.000355487 -0.000443792 0.000899138 -4.48453e-06 -1.43578e-05 -6.72576e-06 +-0.000398121 -0.000500207 0.000998943 -4.54305e-06 -1.52599e-05 -7.58795e-06 +-0.000440758 -0.0005567 0.00109873 -4.54556e-06 -1.61193e-05 -8.34536e-06 +-0.000483406 -0.000613242 0.00119849 -4.51555e-06 -1.69475e-05 -9.0236e-06 +-0.000526064 -0.000669821 0.00129824 -4.46844e-06 -1.77475e-05 -9.67236e-06 +-0.000568729 -0.000726429 0.00139797 -4.40785e-06 -1.85246e-05 -1.03246e-05 +-0.000611397 -0.000783066 0.00149767 -4.33569e-06 -1.9286e-05 -1.09868e-05 +-0.000654063 -0.00083973 0.00159736 -4.2525e-06 -2.00371e-05 -1.16563e-05 +-0.00069673 -0.000896416 0.00169702 -4.15924e-06 -2.07819e-05 -1.2337e-05 +-0.000739406 -0.000953114 0.00179667 -4.05768e-06 -2.15206e-05 -1.30305e-05 +-0.000782093 -0.00100982 0.00189629 -3.94741e-06 -2.2252e-05 -1.37428e-05 +-0.000824783 -0.00106654 0.0019959 -3.82785e-06 -2.29779e-05 -1.44838e-05 +-0.000867467 -0.00112328 0.00209549 -3.69934e-06 -2.37004e-05 -1.52549e-05 +-0.000952778 -0.00123685 0.00229459 -3.44242e-06 -2.5127e-05 -1.68926e-05 +-0.00103807 -0.00135049 0.00249361 -3.2348e-06 -2.65396e-05 -1.86044e-05 +-0.00112334 -0.00146421 0.00269255 -3.07837e-06 -2.79545e-05 -2.03537e-05 +-0.00120859 -0.001578 0.00289142 -2.93089e-06 -2.93781e-05 -2.21209e-05 +-0.0012938 -0.00169187 0.0030902 -2.76157e-06 -3.08081e-05 -2.39206e-05 +-0.00137898 -0.00180584 0.00328891 -2.55616e-06 -3.22364e-05 -2.57574e-05 +-0.00154906 -0.00203409 0.00368604 -1.97619e-06 -3.50698e-05 -2.94873e-05 +-0.00171889 -0.00226276 0.00408285 -1.19448e-06 -3.78947e-05 -3.31704e-05 +-0.00188851 -0.00249184 0.00447935 -2.04135e-07 -4.07583e-05 -3.67515e-05 +-0.00205795 -0.00272126 0.00487553 9.51093e-07 -4.36892e-05 -4.0219e-05 +-0.00214267 -0.00283609 0.00507352 1.56115e-06 -4.51659e-05 -4.19289e-05 +-0.00239639 -0.00318099 0.0056669 3.59772e-06 -4.95122e-05 -4.69312e-05 +-0.00260769 -0.00346888 0.00616086 5.389e-06 -5.30259e-05 -5.10781e-05 +-0.00281885 -0.00375719 0.00665433 7.26917e-06 -5.64337e-05 -5.51977e-05 +-0.00313523 -0.00419028 0.00739352 1.02299e-05 -6.13082e-05 -6.13181e-05 +-0.00345145 -0.00462413 0.00813159 1.33271e-05 -6.6035e-05 -6.7382e-05 +-0.00376775 -0.0050585 0.00886857 1.65608e-05 -7.06577e-05 -7.33603e-05 +-0.00408421 -0.0054933 0.00960444 1.9897e-05 -7.51748e-05 -7.92467e-05 +-0.00450628 -0.00607353 0.0105838 2.44486e-05 -8.10563e-05 -8.69482e-05 diff --git a/test/data/voce_ea_dp_tensor.txt b/test/data/voce_ea_dp_tensor.txt deleted file mode 100644 index 88c5ad4..0000000 --- a/test/data/voce_ea_dp_tensor.txt +++ /dev/null @@ -1,40 +0,0 @@ -0 0 0 0 0 0 -0 0 0 0 0 0 --5.38528e-07 -8.4783e-07 1.38636e-06 8.73124e-08 2.60134e-08 1.75986e-07 --0.000136284 -0.00017592 0.000312204 -5.70459e-06 -6.19914e-06 1.75542e-06 --0.000338661 -0.000436416 0.000775078 -9.61124e-06 -1.75887e-05 -8.98925e-06 --0.000393835 -0.000514735 0.000908571 -6.80655e-06 -1.57515e-05 -1.65118e-05 --0.00040669 -0.000539653 0.000946344 -4.38041e-06 -1.34801e-05 -1.79505e-05 --0.000411855 -0.00054885 0.000960705 -2.85698e-06 -1.22407e-05 -1.7145e-05 --0.000414959 -0.000552577 0.000967536 -1.58369e-06 -1.16212e-05 -1.49112e-05 --0.00041653 -0.000554762 0.000971291 -5.61052e-07 -1.08759e-05 -1.24483e-05 --0.000417203 -0.000556419 0.000973622 1.71594e-07 -1.01711e-05 -1.05726e-05 --0.000417626 -0.000557524 0.00097515 7.12506e-07 -9.56882e-06 -8.90516e-06 --0.000418032 -0.000558143 0.000976176 1.01343e-06 -9.16595e-06 -7.66227e-06 --0.000418316 -0.000558552 0.000976868 1.14083e-06 -8.84546e-06 -7.15942e-06 --0.000418497 -0.000558817 0.000977314 1.21298e-06 -8.58527e-06 -7.08483e-06 --0.000418566 -0.000559034 0.0009776 1.25286e-06 -8.36681e-06 -7.09145e-06 --0.000418586 -0.000559198 0.000977784 1.2602e-06 -8.1923e-06 -7.10215e-06 --0.000418594 -0.000559294 0.000977887 1.26004e-06 -8.07921e-06 -7.18557e-06 --0.000418684 -0.000559258 0.000977941 1.27089e-06 -7.98391e-06 -7.28213e-06 --0.000418795 -0.000559174 0.000977969 1.30795e-06 -7.87941e-06 -7.44331e-06 --0.000418807 -0.000559171 0.000977979 1.36661e-06 -7.77799e-06 -7.71269e-06 --0.000418704 -0.000559266 0.00097797 1.43162e-06 -7.68436e-06 -8.00186e-06 --0.000418408 -0.0005595 0.000977908 1.39885e-06 -7.48942e-06 -8.44379e-06 --0.000418216 -0.000559607 0.000977823 1.1331e-06 -7.35472e-06 -8.73863e-06 --0.000418017 -0.000559697 0.000977714 8.65028e-07 -7.32767e-06 -8.8533e-06 --0.000417799 -0.000559797 0.000977596 8.06226e-07 -7.33137e-06 -8.88792e-06 --0.000417512 -0.000559968 0.00097748 9.0927e-07 -7.32272e-06 -9.01725e-06 --0.000417171 -0.00056019 0.000977361 1.08132e-06 -7.28126e-06 -9.18369e-06 --0.000416389 -0.0005607 0.00097709 1.48713e-06 -7.20964e-06 -9.29059e-06 --0.000415518 -0.000561282 0.0009768 1.969e-06 -7.15506e-06 -9.13103e-06 --0.000414698 -0.000561807 0.000976505 2.46085e-06 -7.20147e-06 -8.84051e-06 --0.000414012 -0.00056219 0.000976202 2.84113e-06 -7.34511e-06 -8.51469e-06 --0.0004137 -0.000562338 0.000976038 2.98538e-06 -7.39286e-06 -8.37152e-06 --0.000412868 -0.000562689 0.000975557 3.3034e-06 -7.23922e-06 -8.13771e-06 --0.000412221 -0.00056293 0.000975151 3.48046e-06 -7.00132e-06 -8.05831e-06 --0.00041159 -0.000563164 0.000974754 3.64213e-06 -6.77339e-06 -7.98012e-06 --0.00041077 -0.000563392 0.000974162 3.81493e-06 -6.43741e-06 -7.88888e-06 --0.000410059 -0.000563507 0.000973565 3.97826e-06 -6.21608e-06 -7.79976e-06 --0.000409614 -0.000563353 0.000972968 4.14527e-06 -6.07575e-06 -7.6826e-06 --0.000409328 -0.000563041 0.000972369 4.26865e-06 -5.9319e-06 -7.54415e-06 diff --git a/test/data/voce_ea_euler_strain.txt b/test/data/voce_ea_euler_strain.txt new file mode 100644 index 0000000..5f5abec --- /dev/null +++ b/test/data/voce_ea_euler_strain.txt @@ -0,0 +1,40 @@ +-1.63349e-06 -1.77848e-06 4.99996e-06 1.10839e-09 -9.90119e-08 -7.40345e-09 +-6.53525e-05 -7.12162e-05 0.00019994 4.13742e-09 -3.98393e-06 -2.68166e-07 +-0.000101827 -0.000113544 0.000299865 -6.0625e-07 -6.13796e-06 -4.71953e-07 +-0.000143186 -0.000165106 0.000399761 -2.06871e-06 -8.55311e-06 -1.21891e-06 +-0.00018544 -0.000219607 0.000499626 -3.16389e-06 -1.02285e-05 -2.25016e-06 +-0.000227763 -0.000275207 0.000599461 -3.77469e-06 -1.14194e-05 -3.42951e-06 +-0.000270192 -0.000331198 0.000699267 -4.1408e-06 -1.24369e-05 -4.63504e-06 +-0.000312742 -0.000387338 0.000799042 -4.35803e-06 -1.34125e-05 -5.74673e-06 +-0.000355337 -0.000443595 0.000898788 -4.48426e-06 -1.43545e-05 -6.72262e-06 +-0.000397933 -0.000499959 0.000998504 -4.54297e-06 -1.52559e-05 -7.58444e-06 +-0.000440528 -0.000556395 0.00109819 -4.5457e-06 -1.61146e-05 -8.34156e-06 +-0.000483129 -0.000612875 0.00119785 -4.51585e-06 -1.69421e-05 -9.01935e-06 +-0.000525736 -0.000669386 0.00129747 -4.46889e-06 -1.77412e-05 -9.66736e-06 +-0.000568346 -0.000725921 0.00139707 -4.40847e-06 -1.85175e-05 -1.03186e-05 +-0.000610954 -0.000782478 0.00149663 -4.33652e-06 -1.92779e-05 -1.09798e-05 +-0.000653556 -0.000839057 0.00159617 -4.25357e-06 -2.0028e-05 -1.16482e-05 +-0.000696155 -0.000895653 0.00169568 -4.16057e-06 -2.07716e-05 -1.23276e-05 +-0.000738758 -0.000952254 0.00179515 -4.0593e-06 -2.15091e-05 -1.30197e-05 +-0.000781369 -0.00100886 0.0018946 -3.94936e-06 -2.22394e-05 -1.37304e-05 +-0.000823978 -0.00106547 0.00199402 -3.83019e-06 -2.29639e-05 -1.44694e-05 +-0.000866576 -0.0011221 0.00209341 -3.70207e-06 -2.3685e-05 -1.52384e-05 +-0.00095171 -0.00123542 0.00229209 -3.44535e-06 -2.51089e-05 -1.68716e-05 +-0.00103681 -0.00134881 0.00249066 -3.23733e-06 -2.65182e-05 -1.85787e-05 +-0.00112186 -0.00146224 0.00268911 -3.08076e-06 -2.79295e-05 -2.03232e-05 +-0.00120688 -0.00157572 0.00288744 -2.93388e-06 -2.9349e-05 -2.20853e-05 +-0.00129185 -0.00168927 0.00308565 -2.7657e-06 -3.07748e-05 -2.38788e-05 +-0.00137676 -0.00180288 0.00328374 -2.56191e-06 -3.21988e-05 -2.57092e-05 +-0.00154629 -0.00203037 0.00367957 -1.9869e-06 -3.50231e-05 -2.94268e-05 +-0.0017155 -0.0022582 0.00407493 -1.21194e-06 -3.78373e-05 -3.30979e-05 +-0.00188443 -0.00248633 0.00446981 -2.30064e-07 -4.06876e-05 -3.66668e-05 +-0.00205312 -0.00271471 0.00486423 9.16467e-07 -4.3604e-05 -4.01213e-05 +-0.00213742 -0.00282898 0.00506126 1.52197e-06 -4.50733e-05 -4.18239e-05 +-0.00238986 -0.00317211 0.00565164 3.54417e-06 -4.94011e-05 -4.68024e-05 +-0.00259998 -0.00345836 0.00614282 5.32225e-06 -5.28988e-05 -5.09262e-05 +-0.00280985 -0.00374487 0.00663327 7.1875e-06 -5.62898e-05 -5.50216e-05 +-0.00312412 -0.00417507 0.00736757 1.01245e-05 -6.11391e-05 -6.11032e-05 +-0.00343802 -0.00460568 0.00810024 1.31927e-05 -6.5835e-05 -6.71242e-05 +-0.00375173 -0.0050365 0.00883127 1.63945e-05 -7.04259e-05 -7.3059e-05 +-0.00406538 -0.00546743 0.00956068 1.96964e-05 -7.49082e-05 -7.88969e-05 +-0.00448339 -0.00604204 0.0105307 2.42006e-05 -8.07412e-05 -8.65325e-05 diff --git a/test/test_mechanics.py b/test/test_mechanics.py index 14bae0a..dab0fac 100644 --- a/test/test_mechanics.py +++ b/test/test_mechanics.py @@ -8,6 +8,21 @@ import unittest from sys import platform +# Taken from https://github.com/orgs/community/discussions/49224 +# but modified slightly as we don't need as strict of a req as the OP in that thread +# import requests +# +def is_on_github_actions(): + if "CI" not in os.environ or not os.environ["CI"] or "GITHUB_RUN_ID" not in os.environ: + return False + + # headers = {"Authorization": f"Bearer {os.environ['GITHUB_TOKEN']}"} + # url = f"https://api.github.com/repos/{os.environ['GITHUB_REPOSITORY']}/actions/runs/{os.environ['GITHUB_RUN_ID']}" + # response = requests.get(url, headers=headers) + + # return response.status_code == 200 and "workflow_runs" in response.json() + return True + def check_stress(ans_pwd, test_pwd, test_case): answers = [] tests = [] @@ -27,7 +42,7 @@ def check_stress(ans_pwd, test_pwd, test_case): err += abs(float(a) - float(t)) err = err / i if (err > 1.0e-10): - raise ValueError("The following test case failed: ", test_case) + raise ValueError("The following test case failed: ", test_case, " error ", err) return True def runSystemCommands(params): @@ -35,7 +50,10 @@ def runSystemCommands(params): print("Now running test case: " + test) result = subprocess.run('pwd', stdout=subprocess.PIPE) pwd = result.stdout.decode('utf-8') - cmd = 'mpirun -np 2 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test + if not is_on_github_actions(): + cmd = 'mpirun -np 2 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test + else: + cmd = 'mpirun -np 1 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) ans_pwd = pwd.rstrip() + '/' + ans tresult = test.split(".")[0] @@ -85,18 +103,23 @@ def runExtraSystemCommands(params): print("Now running test case: " + test) result = subprocess.run('pwd', stdout=subprocess.PIPE) pwd = result.stdout.decode('utf-8') - cmd = 'mpirun -np 2 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test + if not is_on_github_actions(): + cmd = 'mpirun -np 2 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test + else: + cmd = 'mpirun -np 1 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) ans_pwd = pwd.rstrip() + '/' + ans[0] tresult = test.split(".")[0] test_pwd = pwd.rstrip() + '/test_'+tresult+'_stress.txt' check_stress(ans_pwd, test_pwd, test) cmd = 'rm ' + pwd.rstrip() + '/test_'+tresult+'_stress.txt' + subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) ans_pwd = pwd.rstrip() + '/' + ans[1] tresult = test.split(".")[0] test_pwd = pwd.rstrip() + '/test_'+tresult+'_def_grad.txt' check_stress(ans_pwd, test_pwd, test) cmd = 'rm ' + pwd.rstrip() + '/test_'+tresult+'_def_grad.txt' + subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) ans_pwd = pwd.rstrip() + '/' + ans[2] tresult = test.split(".")[0] test_pwd = pwd.rstrip() + '/test_'+tresult+'_pl_work.txt' @@ -105,16 +128,16 @@ def runExtraSystemCommands(params): subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) ans_pwd = pwd.rstrip() + '/' + ans[3] tresult = test.split(".")[0] - test_pwd = pwd.rstrip() + '/test_'+tresult+'_dp_tensor.txt' + test_pwd = pwd.rstrip() + '/test_'+tresult+'_euler_strain.txt' check_stress(ans_pwd, test_pwd, test) - cmd = 'rm ' + pwd.rstrip() + '/test_'+tresult+'_dp_tensor.txt' + cmd = 'rm ' + pwd.rstrip() + '/test_'+tresult+'_euler_strain.txt' subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) return True def runExtra(): test_cases = ["voce_ea.toml"] - test_results = [("voce_ea_stress.txt", "voce_ea_def_grad.txt", "voce_ea_pl_work.txt", "voce_ea_dp_tensor.txt")] + test_results = [("voce_ea_stress.txt", "voce_ea_def_grad.txt", "voce_ea_pl_work.txt", "voce_ea_euler_strain.txt")] result = subprocess.run('pwd', stdout=subprocess.PIPE) @@ -126,7 +149,7 @@ def runExtra(): cmd = 'rm ' + pwd.rstrip() + '/test_'+tresult+'_stress.txt ' + pwd.rstrip() \ + '/test_'+tresult+'_pl_work.txt ' + pwd.rstrip() \ + '/test_'+tresult+'_def_grad.txt' + pwd.rstrip() \ - + '/test_'+tresult+'_dp_tensor.txt' + + '/test_'+tresult+'_euler_strain.txt' + pwd.rstrip() result = subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) params = zip(test_cases, test_results) diff --git a/test/test_mechanics_const_strain_rate.py b/test/test_mechanics_const_strain_rate.py index 5d7919c..8385dd7 100644 --- a/test/test_mechanics_const_strain_rate.py +++ b/test/test_mechanics_const_strain_rate.py @@ -42,7 +42,7 @@ def check_stress(ans_pwd, test_pwd, test_case): err += abs(float(a) - float(t)) err = err / i if (err > 1.0e-10): - raise ValueError("The following test case failed: ", test_case) + raise ValueError("The following test case failed: ", test_case, " error ", err) return True def runSystemCommands(params): @@ -50,7 +50,10 @@ def runSystemCommands(params): print("Now running test case: " + test) result = subprocess.run('pwd', stdout=subprocess.PIPE) pwd = result.stdout.decode('utf-8') - cmd = 'mpirun -np 2 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test + if not is_on_github_actions(): + cmd = 'mpirun -np 2 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test + else: + cmd = 'mpirun -np 1 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) ans_pwd = pwd.rstrip() + '/' + ans tresult = test.split(".")[0] @@ -98,18 +101,23 @@ def runExtraSystemCommands(params): print("Now running test case: " + test) result = subprocess.run('pwd', stdout=subprocess.PIPE) pwd = result.stdout.decode('utf-8') - cmd = 'mpirun -np 2 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test + if not is_on_github_actions(): + cmd = 'mpirun -np 2 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test + else: + cmd = 'mpirun -np 1 ' + pwd.rstrip() + '/../bin/mechanics -opt ' + test subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) ans_pwd = pwd.rstrip() + '/' + ans[0] tresult = test.split(".")[0] test_pwd = pwd.rstrip() + '/test_'+tresult+'_stress.txt' check_stress(ans_pwd, test_pwd, test) cmd = 'rm ' + pwd.rstrip() + '/test_'+tresult+'_stress.txt' + subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) ans_pwd = pwd.rstrip() + '/' + ans[1] tresult = test.split(".")[0] test_pwd = pwd.rstrip() + '/test_'+tresult+'_def_grad.txt' check_stress(ans_pwd, test_pwd, test) cmd = 'rm ' + pwd.rstrip() + '/test_'+tresult+'_def_grad.txt' + subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) ans_pwd = pwd.rstrip() + '/' + ans[2] tresult = test.split(".")[0] test_pwd = pwd.rstrip() + '/test_'+tresult+'_pl_work.txt' @@ -118,16 +126,16 @@ def runExtraSystemCommands(params): subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) ans_pwd = pwd.rstrip() + '/' + ans[3] tresult = test.split(".")[0] - test_pwd = pwd.rstrip() + '/test_'+tresult+'_dp_tensor.txt' + test_pwd = pwd.rstrip() + '/test_'+tresult+'_euler_strain.txt' check_stress(ans_pwd, test_pwd, test) - cmd = 'rm ' + pwd.rstrip() + '/test_'+tresult+'_dp_tensor.txt' + cmd = 'rm ' + pwd.rstrip() + '/test_'+tresult+'_euler_strain.txt' subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) return True def runExtra(): test_cases = ["voce_ea_cs.toml"] - test_results = [("voce_ea_cs_stress.txt", "voce_ea_cs_def_grad.txt", "voce_ea_cs_pl_work.txt", "voce_ea_cs_dp_tensor.txt")] + test_results = [("voce_ea_cs_stress.txt", "voce_ea_cs_def_grad.txt", "voce_ea_cs_pl_work.txt", "voce_ea_cs_euler_strain.txt")] result = subprocess.run('pwd', stdout=subprocess.PIPE) @@ -139,7 +147,7 @@ def runExtra(): cmd = 'rm ' + pwd.rstrip() + '/test_'+tresult+'_stress.txt ' + pwd.rstrip() \ + '/test_'+tresult+'_pl_work.txt ' + pwd.rstrip() \ + '/test_'+tresult+'_def_grad.txt' + pwd.rstrip() \ - + '/test_'+tresult+'_dp_tensor.txt' + + '/test_'+tresult+'_euler_strain.txt' + pwd.rstrip() result = subprocess.run(cmd.rstrip(), stdout=subprocess.PIPE, shell=True) params = zip(test_cases, test_results) @@ -162,12 +170,8 @@ def runExtra(): class TestUnits(unittest.TestCase): def test_all_cases(self): actual = run() - # For some reason this test is giving issues on the Github CI - # I can't reproduce the issue on the multiple OS's, compiler, - # / systems I have access to. So, I'm going to disable it... - if not is_on_github_actions(): - actualExtra = runExtra() - self.assertTrue(actualExtra) + actualExtra = runExtra() + self.assertTrue(actualExtra) self.assertTrue(actual) diff --git a/workflows/Stage3/pre_main_post_script/voxel_coarsen/src/lib.rs b/workflows/Stage3/pre_main_post_script/voxel_coarsen/src/lib.rs index 98ad79d..97794af 100644 --- a/workflows/Stage3/pre_main_post_script/voxel_coarsen/src/lib.rs +++ b/workflows/Stage3/pre_main_post_script/voxel_coarsen/src/lib.rs @@ -6,8 +6,11 @@ extern crate data_reader; #[cfg(feature = "polars")] extern crate polars; +#[cfg(feature = "python")] extern crate numpy; +#[cfg(feature = "python")] extern crate pyo3; pub mod coarsen; -pub mod pycoarsen; +#[cfg(feature = "python")] +pub mod pycoarsen; \ No newline at end of file diff --git a/workflows/Stage3/pre_main_post_script/voxel_coarsen/src/pycoarsen/mod.rs b/workflows/Stage3/pre_main_post_script/voxel_coarsen/src/pycoarsen/mod.rs index 1cb286d..3ced67c 100644 --- a/workflows/Stage3/pre_main_post_script/voxel_coarsen/src/pycoarsen/mod.rs +++ b/workflows/Stage3/pre_main_post_script/voxel_coarsen/src/pycoarsen/mod.rs @@ -11,10 +11,10 @@ fn rust_voxel_coarsen(_py: Python<'_>, m: &Bound<'_, PyModule>) -> PyResult<()> py: Python<'py>, file: &str, coarsen_size: usize, - ) -> anyhow::Result<((usize, usize, usize), Bound<'py, PyArray1>)> { + ) -> anyhow::Result<((usize, usize, usize), Bound<`py, PyArray1>)> { let result = voxel_coarsen(file, coarsen_size)?; Ok((result.0, PyArray1::from_vec_bound(py, result.1))) } Ok(()) -} +} \ No newline at end of file