diff --git a/.github/workflows/CI_python.yaml b/.github/workflows/CI_python.yaml new file mode 100644 index 0000000..c7df53f --- /dev/null +++ b/.github/workflows/CI_python.yaml @@ -0,0 +1,56 @@ +name: Python Code Checks + +on: + workflow_dispatch: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + cd ./hmatools + python -m pip install --use-pep517 -e . + python -m pip install black flake8 pylint mypy pytest + python -m pip install types-seaborn pandas-stubs matplotlib-stubs + + - name: Format code with Black + run: | + cd ./hmatools/python + black --check . + + - name: Lint with Flake8 + run: | + cd ./hmatools/python + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=100 --statistics + + - name: Lint with Pylint + run: | + cd ./hmatools/python + pylint . + + - name: Type check with Mypy + run: | + cd ./hmatools/python + mypy . --explicit-package-bases + + - name: Run tests + run: | + pytest diff --git a/.github/workflows/OVERVIEW.md b/.github/workflows/OVERVIEW.md new file mode 100644 index 0000000..52314d7 --- /dev/null +++ b/.github/workflows/OVERVIEW.md @@ -0,0 +1,51 @@ +# GitHub Actions Workflows Overview + +## 1. Run FastCaloSim Benchmarking (`benchmarking.yaml`) + +The `benchmarking.yaml` workflow is designed to automate the benchmarking of FastCaloSim on different environments, including Perlmutter and Exalearn. Steps: + +- **Build and Push Images**: + - This step is optional and controlled via workflow dispatch (`run_build` input). + - If triggered, the job builds and pushes Docker images using a self-hosted runner, and logs are uploaded for review. + +- **Run FastCaloSim**: + - This job runs FastCaloSim on different platforms using a matrix strategy to handle multiple configurations (Perlmutter and Exalearn). + - It sets up environment variables specific to each runner and executes the simulation scripts. + - Log files from each run are compressed into tarballs and uploaded for later use. + +- **Postprocess Log Files**: + - This job depends on the completion of the `run` job. + - It downloads log files from the previous runs, decompresses them, and runs a postprocessing script using HMATools. + - The processed logs and resulting JSON files are uploaded as artifacts. + +- **Plot Results**: + - This job depends on the completion of the `postprocess` job. + - It downloads the processed JSON files and runs a plotting script using HMATools. + - The generated plots and associated logs are uploaded for analysis. + +For postprocessing, and plotting, a docker container with HMATools is built and run to execute the steps. + +## 2. Python Code Checks (`Python_CI.yaml`) + +The `Python_CI.yaml` workflow is a Continuous Integration (CI) pipeline focused on ensuring code quality and correctness in the HMATools project. It includes the following steps: + +- **Checkout Code**: + - The repository is checked out to the runner for subsequent operations. + +- **Set Up Python**: + - Python 3.11 is installed and configured for the environment. + +- **Install Dependencies**: + - All necessary Python packages, including project dependencies and development tools like `black`, `flake8`, `pylint`, `mypy`, and `pytest`, are installed. + +- **Code Formatting with Black**: + - The codebase is checked for compliance with `black` formatting standards. + +- **Linting with Flake8 and Pylint**: + - Code is linted to catch syntax errors, enforce coding standards, and assess code complexity. + +- **Type Checking with Mypy**: + - Static type checking is performed to catch type-related errors before runtime. + +- **Run Tests**: + - Unit tests are executed using `pytest` to ensure the functionality is working as expected. diff --git a/.github/workflows/benchmarking.yaml b/.github/workflows/benchmarking.yaml new file mode 100644 index 0000000..4a1edec --- /dev/null +++ b/.github/workflows/benchmarking.yaml @@ -0,0 +1,167 @@ +name: Run FastCaloSim Benchmarking + +on: + workflow_dispatch: + + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + run: + runs-on: ${{ matrix.runner-label }} + name: Run FastCaloSim + strategy: + matrix: + runner-label: [pm-login, exalearn5] + env: + LOG_DIR: ${{ github.workspace }}/logs/run + NERSC_CONTAINER_REGISTRY_USER: ${{ secrets.NERSC_CONTAINER_REGISTRY_USER }} + NERSC_CONTAINER_REGISTRY_PASSWORD: ${{ secrets.NERSC_CONTAINER_REGISTRY_PASSWORD }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Run FastCaloSim on Perlmutter + if: contains(runner.name, 'pm-login') + env: + RUNNER_LABEL: pm-login + FCS_DATAPATH: /global/cfs/cdirs/atlas/leggett/data/FastCaloSimInputs + run: | + cd ./scripts/run_scripts + ./run_images.sh + + - name: Run FastCaloSim on exalearn5 + if: contains(runner.name, 'exalearn5') + env: + RUNNER_LABEL: exalearn5 + FCS_DATAPATH: /local/scratch/cgleggett/data/FastCaloSimInputs + run: | + cd ./scripts/run_scripts + ./run_images.sh + + - name: Create tarball of log files + run: | + cd ${{ env.LOG_DIR }} + tar -czf ${{ github.workspace }}/log_files_${{ matrix.runner-label }}.tar.gz ./*.txt + + - name: Upload log files + uses: actions/upload-artifact@v4 + with: + name: Log Files - ${{ matrix.runner-label }} + path: log_files_${{ matrix.runner-label }}.tar.gz + + postprocess: + runs-on: ubuntu-latest + name: Postprocess log files + needs: run + env: + LOG_DIR: ${{ github.workspace }}/logs/postprocess + INPUT_DIR: ${{ github.workspace }}/logs/run + OUTPUT_DIR: ${{ github.workspace }}/logs/postprocess + SCRIPT: ./postprocess.sh + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Download pm-login log files + uses: actions/download-artifact@v4 + with: + name: Log Files - pm-login + path: ${{ env.INPUT_DIR }} + + - name: Download exalearn5 log files + uses: actions/download-artifact@v4 + with: + name: Log Files - exalearn5 + path: ${{ env.INPUT_DIR }} + + - name: Untar and uncompress logs + run: | + for runner in pm-login exalearn5; do + tar -xzf ${{ github.workspace }}/logs/run/log_files_${runner}.tar.gz -C ${{ github.workspace }}/logs/run + done + + - name: Debug input directory + run: | + ls -l ${{ env.INPUT_DIR }} + + - name: Build hmatools image + run: | + cd ./hmatools/scripts + ./build_image.sh + + - name: Run hmatools postprocessing + run: | + cd ./hmatools/scripts + ./run_image.sh + + - name: Upload logfiles + if: always() + uses: actions/upload-artifact@v4 + with: + name: Postprocess log file + path: ${{ env.LOG_DIR }}/*.txt + + - name: Upload json files + if: always() + uses: actions/upload-artifact@v4 + with: + name: FastCaloSim Results (JSON) + path: ${{ env.OUTPUT_DIR }}/*.json + + - name: Cleanup workspace + run: rm -rf ${{ github.workspace }}/* + + plot: + runs-on: ubuntu-latest + name: Plot results + needs: postprocess + env: + LOG_DIR: ${{ github.workspace }}/logs/plot + INPUT_DIR: ${{ github.workspace }}/logs/postprocess + OUTPUT_DIR: ${{ github.workspace }}/logs/plot + SCRIPT: ./plot.sh + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Download json files + uses: actions/download-artifact@v4 + with: + name: FastCaloSim Results (JSON) + path: ${{ env.INPUT_DIR }} + + - name: Debug input directory + run: | + ls -l ${{ env.INPUT_DIR }} + + - name: Build hmatools image + run: | + cd ./hmatools/scripts + ./build_image.sh + + - name: Run hmatools plotting + run: | + cd ./hmatools/scripts + ./run_image.sh + + - name: Upload logfiles + if: always() + uses: actions/upload-artifact@v4 + with: + name: Plot log file + path: ${{ env.LOG_DIR }}/*.txt + + - name: Upload plot files + if: always() + uses: actions/upload-artifact@v4 + with: + name: Result plots + path: ${{ env.OUTPUT_DIR }}/*.png diff --git a/.github/workflows/build_image.yaml b/.github/workflows/build_image.yaml new file mode 100644 index 0000000..28c1adc --- /dev/null +++ b/.github/workflows/build_image.yaml @@ -0,0 +1,41 @@ +name: Build Container images with FastCaloSim + +on: + workflow_dispatch: + inputs: + run_build: + description: "Run the build job?" + required: false + default: "false" + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + build: + runs-on: [self-hosted, pm-login] + name: Build and push images + if: ${{ github.event.inputs.run_build == 'true' }} + env: + LOG_DIR: ${{ github.workspace }}/logs/build + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Build images + run: | + cd ./scripts/build_scripts + ./build_images.sh + + - name: Upload log files + uses: actions/upload-artifact@v4 + with: + name: Log File - Building and Pushing container images + path: ${{ env.LOG_DIR }}/*.txt + + - name: Cleanup workspace + run: rm -rf ${{ github.workspace }}/* diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c28e92e --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +**/.DS_Store +**/__pycache__/ +**/*.egg-info/ +**/json/**/* +scripts/run_scripts/*.txt +**/*.txt +**/*.json +**/*.png +**/*.ipynb +!/hmatools/tests/test_data/** +!/hmatools/requirements.txt \ No newline at end of file diff --git a/FastCaloSimAnalyzer/FastCaloGpu/src/CMakeLists.txt b/FastCaloSimAnalyzer/FastCaloGpu/src/CMakeLists.txt index 4d2b459..c4093f8 100644 --- a/FastCaloSimAnalyzer/FastCaloGpu/src/CMakeLists.txt +++ b/FastCaloSimAnalyzer/FastCaloGpu/src/CMakeLists.txt @@ -1,11 +1,12 @@ # Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration # "First-class" CUDA support needs at least CMake 3.10. -cmake_minimum_required( VERSION 3.10 ) +cmake_minimum_required( VERSION 3.17 ) cmake_policy(SET CMP0074 NEW) set(FIND_CUDA ON) + # don't check for CUDA w/ Kokkos incase backend is something else if(USE_KOKKOS) set(FIND_CUDA OFF) @@ -24,12 +25,41 @@ endif() if(USE_STDPAR) if ( ${STDPAR_TARGET} STREQUAL "gpu" ) find_package(NVHPC REQUIRED COMPONENTS MATH) + set(CMAKE_THREAD_LIBS_INIT "-lpthread") + set(CMAKE_HAVE_THREADS_LIBRARY 1) + set(CMAKE_USE_WIN32_THREADS_INIT 0) + set(CMAKE_USE_PTHREADS_INIT 1) + set(THREADS_PREFER_PTHREAD_FLAG ON) set(FIND_CUDA ON) endif() endif() if(FIND_CUDA) find_package(CUDAToolkit REQUIRED) + if(TARGET CUDA::curand) + set(CURAND_LIB CUDA::curand) + else() + set(CURAND_LIB "curand") + endif() + + if(TARGET CUDA::nvToolsExt) + set(NVTOOLSEXT_LIB CUDA::nvToolsExt) + else() + set(NVTOOLSEXT_LIB "nvToolsExt") + endif() + + if(TARGET CUDA::cudart) + set(CUDART_LIB CUDA::cudart) + else() + set(CUDART_LIB "cudart") + endif() + + if(TARGET CUDA::cuda_driver) + set(CUDA_DRIVER_LIB CUDA::cuda_driver) + else() + set(CUDA_DRIVER_LIB "cuda_driver") + endif() + enable_language( CUDA ) set(CUDA_LIBRARIES PUBLIC ${CUDA_LIBRARIES}) endif() @@ -125,7 +155,7 @@ endif() target_include_directories(${FastCaloGpu_LIB} PRIVATE ../FastCaloGpu/ ) if(USE_ALPAKA) - target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${CUDA_curand_LIBRARY} alpaka::alpaka) + target_link_libraries(${FastCaloGpu_LIB} PUBLIC alpaka::alpaka ${CURAND_LIB}) target_compile_definitions(${FastCaloGpu_LIB} PRIVATE ${FCS_CommonDefinitions}) elseif(USE_HIP) target_compile_definitions(${FastCaloGpu_LIB} PRIVATE ${FCS_CommonDefinitions}) @@ -138,7 +168,7 @@ elseif(USE_HIP) target_link_libraries(${FastCaloGpu_LIB} PUBLIC CUDA::cudart) endif() else() - target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${CUDA_curand_LIBRARY} ${CUDA_nvToolsExt_LIBRARY}) + target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${NVTOOLSEXT_LIB} ${CURAND_LIB}) endif() if(ENABLE_OMPGPU) @@ -155,7 +185,7 @@ if(USE_KOKKOS) endif() if(USE_STDPAR) - target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${CUDA_LIBRARIES} ) + target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${CUDART_LIB} ${CUDA_DRIVER_LIB} ) target_compile_definitions(${FastCaloGpu_LIB} PRIVATE -DUSE_STDPAR -DSTDPAR_TARGET=${STDPAR_TARGET} ) # @@ -184,8 +214,8 @@ if(RNDGEN_CPU) else() if(USE_HIP) if ( ${HIP_TARGET} STREQUAL "AMD" ) - target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/hiprand/include ) - target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/rocrand/include ) + target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/include/hiprand ) + target_include_directories(${FastCaloGpu_LIB} PRIVATE ${ROCM_PATH}/include/rocrand ) target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${ROCM_PATH}/lib/libhiprand.so) elseif( ${HIP_TARGET} STREQUAL "NVIDIA" ) target_link_libraries(${FastCaloGpu_LIB} PUBLIC ${CUDA_curand_LIBRARY} ${CUDA_nvToolsExt_LIBRARY}) diff --git a/README.md b/README.md index fe72338..c20c44b 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,6 @@ # FastCaloSim GPU Project + ## Table of contents * [Introduction](#Introduction) * [Build Instructions for Different Backends](#Build-Instructions-for-Different-Backends) diff --git a/hmatools/LICENSE b/hmatools/LICENSE new file mode 100644 index 0000000..f49a4e1 --- /dev/null +++ b/hmatools/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/hmatools/README.md b/hmatools/README.md new file mode 100644 index 0000000..f4dc2f5 --- /dev/null +++ b/hmatools/README.md @@ -0,0 +1,70 @@ +# HMATools + +HMATools is a Python package to parse and plot logfiles produced by [FastCaloSim](https://github.com/hep-cce/FCS-GPU) and ultimately other HEPMiniApps (HMA). This README contains general installation and usage information. + +For deeper and more technical information, please be referred to the [Developer Guide](./hmatools_dev_guide.md). + +## Installation? Should it become its own repo? + +This is currently an open question, since HMATools is integrated inside a wrapper repository. +As of now, clone [FastCaloSim](https://github.com/hep-cce/FCS-GPU), navigate to Directory `hmatools` and inside run: + +```sh +pip install -e . +``` + +However, in the future we want to support something along the lines of this: +!!!Currently not supported!!! +```sh +pip install git+https://github.com/lorenz369/self_hosted_runner#egg=hmatools +``` + +## Usage + +### Postprocessing +The `hma_result.py` currently processes log files generated by FastCaloSim. To parse an entire directory or a single log file, provide its input path (-i/--input), and provide the output directory/filepath (-o/--output) where you'd like to save the processed JSON file. The script will search for log files in the specified path, and save the processed results as a JSON file in the output directory. + +Example usage of Command Line Interface: + +```sh +postprocess --input ./logs --output ./output +``` + +If you are integrating `hma_result.py` into a larger workflow, you might use it as follows: + +```python +from hmatools.hma_result import HmaResult + +# Initialize the HmaResult with a directory or a single log file +result = HmaResult("./logs") + +# Retrieve the results +runs = result.get_results() + +# Optionally, dump the results to a JSON file +result.dump_to_json("./output/results.json") +``` + +### Plotting + +The `hma_plot.py` module generates visualizations from JSON files produced during the previous postprocessing step with `hma_result.py`. This module reads the JSON files from a specified path and creates bar plots comparing different technologies and kernel run times. The plots are saved in the specified output directory. + +Example usage of Command Line Interface: + +```sh +postprocess --input ./logs --output ./output +``` + +If you are integrating `hma_plot.py` into a larger workflow, you might use it as follows: + +```python +from hmatools.hma_plot import HmaPlot + +# Initialize the HmaPlot with a directory or a single result file +plot = HmaPlot("./results") + +# Generate and save the plot +plot.plot(save_plot=True, filename="./output/plot.png") + + + diff --git a/hmatools/hmatools.Dockerfile b/hmatools/hmatools.Dockerfile new file mode 100755 index 0000000..98dc74a --- /dev/null +++ b/hmatools/hmatools.Dockerfile @@ -0,0 +1,13 @@ +FROM python:latest + +# Send Python output to the terminal +ENV PYTHONUNBUFFERED=1 + +WORKDIR /fcstools + +COPY . /fcstools + +RUN pip install --upgrade pip && \ + pip install --use-pep517 -e . + + diff --git a/hmatools/hmatools_dev_guide.md b/hmatools/hmatools_dev_guide.md new file mode 100644 index 0000000..966da5a --- /dev/null +++ b/hmatools/hmatools_dev_guide.md @@ -0,0 +1,145 @@ +# Developer Guide + +This guide is intended to help collaborators and contributors. General usage information can be +obtained in `README.md`. + +# HMATools + +HMATools is a Python package to parse and plot logfiles produced by [FastCaloSim](https://github.com/hep-cce/FCS-GPU) and ultimately other HEPMiniApps (HMA). + +## Repository Structure + +- **LICENSE**: License file for the project. +- **README.md**: Project documentation. +- **hmatools.Dockerfile**: Dockerfile for building the project ina a containerized environment. +- **pyproject.toml**: Configuration file for building and packaging the Python project. +- **requirements.txt**: List of dependencies required by the project. + +- **python/**: Main source directory for the HMATools package. + - `__init__.py`: Marks the directory as a Python package. + - `hma_metrics.py`: Module for handling metrics-related functionality. + - `hma_plot.py`: Module for plotting results. + - `hma_result.py`: Module for parsing and processing HMA results. + - `hma_run.py`: Module for managing individual HMA runs. + - `plotparser.py`: Parser module for plotting data. + - `postprocparser.py`: Parser module for postprocessing data. + - `run_info.py`: Module for handling run information like system, hardware, timestamp, etc. + - `util.py`: Utility functions used across the package. + +- **scripts/**: Shell scripts for various automation tasks. + - `build_image.sh`: Script to build the hmatools Docker image. + - `plot.sh`: Generic script to run plotting tasks inside containers. + - `postprocess.sh`: Generic script to run postprocessing tasks inside containers. + - `run_image.sh`: Script to run the hmatools Docker image, mount input/output and run scripts. + +- **setup.py**: Setup script for installing the package. + +- **tests/**: Test suite for the project. + - `__init__.py`: Marks the directory as a Python package. + - **test_data/**: Directory containing test data. + - **Log Files - Run FastCaloSim/**: Example log files for testing. + - `fcs_results.json`: JSON file with test results. + - `fcs_results.png`: Plot image generated from test results. + - `test_fcs_result.py`: Test script for validating `hma_result.py` and `hma_plot.py`. + +## Main modules + +The core functionalities of postprocessing log files and plotting results are handled by `hma_result.py` and `hma_plot.py`, respectively. Hence, the following contains a detailed break down of these two modules. + +### hma_result.py + +The `hma_result.py` module provides functionality to handle HMA results, primarily through the `HmaResult` class. Below is an overview of the key components of this module: + +#### Classes and Functions + +##### `HmaResult` +The `HmaResult` class represents the results of one or more HMA runs, consisting of multiple `HmaRun` instances. + +- **`__init__(input_path: Union[str, os.PathLike])`**: + Initializes the `HmaResult` object with the path to an input directory or file. Depending on whether the input is a directory or a single file, it calls the appropriate private method to retrieve the results. + +- **`_get_results_from_directory() -> List[HmaRun]`**: + A private method that reads log files from the specified input directory, initializes `HmaRun` objects for each log file, and stores them in a list. It raises a `ValueError` if no valid results are found. + +- **`_get_results_from_file(file_path: Union[str, os.PathLike]) -> List[HmaRun]`**: + A private method that reads a single log file, initializes a `HmaRun` object, and stores it in a list. + +- **`dump_to_json(filename: Optional[str] = None) -> None`**: + Dumps the results to a JSON file. If no filename is specified, it saves the file in the current working directory with a timestamp-based name. The function raises a `ValueError` if the output path is invalid. + +- **`get_results() -> List[HmaRun]`**: + Returns the list of `HmaRun` objects representing the results. + +- **`get_input_dir() -> Union[str, os.PathLike]`**: + Returns the path to the input directory. + +##### `json_serial` +A helper function for JSON serialization of various object types that are not serializable by default (e.g., `datetime`, `pd.DataFrame`, `HmaRun`). + +##### `main(input_path: str, output: str) -> None` +A standalone function that initializes a `HmaResult` object and triggers the postprocessing by calling `dump_to_json()`. + +### Example Usage in Code + +If you are integrating `hma_result.py` into a larger workflow, you might use it as follows: + +```python +from hmatools.hma_result import HmaResult + +# Initialize the HmaResult with a directory or a single log file +result = HmaResult("./logs") + +# Retrieve the results +runs = result.get_results() + +# Optionally, dump the results to a JSON file +result.dump_to_json("./output/results.json") +``` + +## hma_plot.py + +The `hma_plot.py` module is part of the HMATools package and is designed for plotting results parsed from logfiles produced by FastCaloSim and other HEPMiniApps (HMA). Below is an overview of its key components: + + +#### Classes and Functions + +##### `HmaPlot` +The `HmaPlot` class provides functionality to load and plot HMA results. It includes methods for reading input data, preparing the data for plotting, and generating comparative plots of kernel run times. + +- **`__init__(input_path: str) -> None`**: + Initializes the `HmaPlot` object with a specified input path, which can be a file or directory. Reads and deserializes the HMA data, extracts kernel information, and prepares a DataFrame for plotting. + +- **`read_hma_input() -> List[Dict[str, Any]]`**: + Reads and deserializes HMA result data from a given file or directory. + +- **`get_kernel_data() -> Tuple[List[pd.DataFrame], List[Tuple[str, int]]]`**: + Extracts kernel timing data and launch counts from the HMA results, returning a list of DataFrames and a list of launch counts. + +- **`prepare_dataframe() -> pd.DataFrame`**: + Combines multiple kernel result DataFrames into one, standardizing kernel names and preparing the data for plotting. + +- **`plot(save_plot: bool = True, filename: Optional[str] = None) -> None`**: + Generates bar plots comparing the total elapsed run time and average kernel run time for different technologies and kernels. It includes error bars for the average run time and can either save the plot to a file or display it. + +- **`save_or_show_plot(save_plot: bool = True, filename: Optional[str] = None) -> None`**: + Saves the generated plot to a file if `save_plot` is `True`, otherwise displays the plot. + +##### `json_deserial(dct: Dict[str, Any]) -> Union[Dict[str, Any], Metric]` +A helper function that deserializes JSON objects into Python objects, including custom `Metric` objects used in HMA results. + +##### `main(input_path: str, output: str) -> None` +A standalone function that initializes an `HmaPlot` object and triggers the plotting process, saving the output to the specified file. + +### Example Usage in Code + +If you are integrating `hma_plot.py` into a larger workflow, you might use it as follows: + +```python +from hmatools.hma_plot import HmaPlot + +# Initialize the HmaPlot with a directory or a single result file +plot = HmaPlot("./results") + +# Generate and save the plot +plot.plot(save_plot=True, filename="./output/plot.png") +``` diff --git a/hmatools/pyproject.toml b/hmatools/pyproject.toml new file mode 100644 index 0000000..737e278 --- /dev/null +++ b/hmatools/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools>=64", "wheel"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/hmatools/python/__init__.py b/hmatools/python/__init__.py new file mode 100644 index 0000000..6ac82cb --- /dev/null +++ b/hmatools/python/__init__.py @@ -0,0 +1,3 @@ +""" +This file marks the directory as a Python package. +""" diff --git a/hmatools/python/hma_metrics.py b/hmatools/python/hma_metrics.py new file mode 100644 index 0000000..28c0c72 --- /dev/null +++ b/hmatools/python/hma_metrics.py @@ -0,0 +1,141 @@ +""" +This module provides classes for parsing and storing metrics from log files. + +Classes: + Metric: A data class representing a single metric. + HmaMetrics: A class for parsing and storing metrics from a log file. +""" + +import re +import io +import os + +from dataclasses import dataclass +from typing import Union, List, Optional + +import pandas as pd + + +@dataclass +class Metric: + """ + Represents a single metric with a name, data type, value, and optional unit. + """ + + name: str + data_type: type + value: Union[float, int, str, pd.DataFrame] + unit: Optional[str] = None + + +class HmaMetrics: + """ + Parses and stores metrics from a log file. + """ + + def __init__(self, logfile: Union[str, os.PathLike], data: str) -> None: + self.logfile = logfile + self.metrics = self._parse_metrics(data) + + def _parse_metrics(self, data: str) -> List[Metric]: + kernel_lines = [] + capture_kernel_timing = False + + for line in data.splitlines(): + if not capture_kernel_timing and "Event: 9750" in line: + capture_kernel_timing = True + continue + + if capture_kernel_timing: + if "exiting early" in line: + break + kernel_lines.append(line.strip()) + + if kernel_lines: + return self._parse_kernel_data(kernel_lines) + print(f"No kernel information found for {self.logfile}") + return [] + + def _parse_kernel_lines(self, kernel_lines: List[str]) -> List[Metric]: + metrics = [] + kernel_timing_lines = [] + for line in kernel_lines: + if ":" in line: + m = self._process_kernel_line(line) + if m is not None: + metrics.append(m) + elif line.split()[0] in [ + "kernel", + "sim_clean", + "sim_A", + "sim_ct", + "sim_cp", + ]: + elements = line.strip() + kernel_timing_lines.append(elements) + elif "Time for Chain" in line: + chain_time_match = re.match( + r"Time for Chain (\d+) is ([0-9.]+) (\w+)", line.strip() + ) + if chain_time_match: + chain_id, value_num, unit = chain_time_match.groups() + m = Metric( + f"Time for Chain {chain_id}", float, float(value_num), unit + ) + metrics.append(m) + elif "launch count" in line: + match = re.search(r"launch count\s+(\d+)\s*(\+\d+)?", line) + if match: + launch_count = int(match.group(1)) + m = Metric("launch count", str, str(launch_count), None) + metrics.append(m) + + if kernel_timing_lines: + name = "kernel timing" + m = Metric( + name, + pd.DataFrame, + self._kernel_timing_df(kernel_timing_lines[1:]), + None, + ) + metrics.append(m) + return metrics + + def _parse_kernel_data(self, kernel_lines: List[str]) -> List[Metric]: + metrics = self._parse_kernel_lines(kernel_lines) + if metrics: + print(f"Found {len(metrics)} metrics for {self.logfile}") + return metrics + print(f"No kernel timing information found for {self.logfile}") + return [] + + def _process_kernel_line(self, line: str) -> Optional[Metric]: + if "GPU memory used(MB)" in line: + special_case_match = re.match(r"(.+?)\((\w+)\):\s*([0-9.]+)", line.strip()) + if special_case_match: + name, unit, value = special_case_match.groups() + return Metric(name, int, int(value), unit) + else: + name, value = map(str.strip, line.split(":", 1)) + match = re.match(r"([0-9.]+)\s*(\w*)", value) + if match: + value_num, unit = match.groups() + return Metric(name, float, float(value_num), unit) + + return None + + def _kernel_timing_df(self, lines: List[str]) -> pd.DataFrame: + data = "\n".join(lines) + return pd.read_csv(io.StringIO(data), sep=r"\s{2,}", header=0, engine="python") + + def get_logfile(self) -> Union[str, os.PathLike]: + """ + Returns the logfile associated with the metrics. + """ + return self.logfile + + def get_metrics(self) -> Optional[List[Metric]]: + """ + Returns the list of metrics. + """ + return self.metrics diff --git a/hmatools/python/hma_plot.py b/hmatools/python/hma_plot.py new file mode 100644 index 0000000..a31f046 --- /dev/null +++ b/hmatools/python/hma_plot.py @@ -0,0 +1,311 @@ +""" +Functionality to plot HMA results: + +- Load HMA results from a given result file +- Plot a comparison of multiple kernel run times with total elapsed run time and average run time +""" + +import json +import os +from typing import Dict, Any, Union, Optional, List, Tuple +from datetime import datetime +from io import StringIO + +import pandas as pd +import seaborn as sns +import matplotlib.pyplot as plt + +from hma_metrics import Metric +from util import make_launch_count_str, make_df_for_errorbar + + +def json_deserial(dct: Dict[str, Any]) -> Union[Dict[str, Any], Metric]: + """JSON deserializer for custom objects""" + + if "data_type" in dct: + if "float" in dct["data_type"]: + return Metric(dct["name"], float, float(dct["value"]), dct["unit"]) + if "int" in dct["data_type"]: + return Metric(dct["name"], int, int(dct["value"]), dct["unit"]) + if "str" in dct["data_type"]: + return Metric(dct["name"], str, str(dct["value"]), dct["unit"]) + if "DataFrame" in dct["data_type"]: + return Metric( + dct["name"], + pd.DataFrame, + pd.read_json(StringIO(dct["value"])), + dct["unit"], + ) + if "timestamp" in dct: + dct["timestamp"] = datetime.fromisoformat(dct["timestamp"]) + + return dct + + +class HmaPlot: + """Class for plotting HEPMiniApps results""" + + descriptive_names = { + "sim_clean": "workspace_reset", + "sim_A": "simulate", + "sim_ct": "reduce", + "sim_cp": "copy d->h", + } + + def __init__(self, input_path: str) -> None: + """Initialize HmaPlot with a file or directory""" + self.input_path: str = input_path + self.hma_data: List[Dict[str, Any]] = self.read_hma_input() + self.kernels: List[pd.DataFrame] + self.launch_count_list: List[Tuple[str, int]] + self.kernels, self.launch_count_list = self.get_kernel_data() + self.df_all: pd.DataFrame = self.prepare_dataframe() + + def read_hma_input(self) -> List[Dict[str, Any]]: + """Read and deserialize input from a file or directory""" + data: List[Dict[str, Any]] = [] + + if os.path.isdir(self.input_path): + input_files = [ + x for x in os.listdir(self.input_path) if x.endswith(".json") + ] + input_files.sort() + input_files = [os.path.join(self.input_path, file) for file in input_files] + for input_file in input_files: + with open(input_file, "r", encoding="utf-8") as f: + result_list = json.load(f, object_hook=json_deserial) + data = result_list if not data else data + result_list + elif os.path.isfile(self.input_path): + with open(self.input_path, "r", encoding="utf-8") as f: + data = json.load(f, object_hook=json_deserial) + else: + raise ValueError( + f"Provided input path '{self.input_path}' is neither a valid file nor a directory." + ) + + return data + + def _prepare_kernel_info(self, kernel_run: Dict[str, Any]) -> str: + """Prepare kernel information from the HMA data""" + runner_label = kernel_run["info"]["runner_label"] + image_info = kernel_run["info"]["image_info"] + image_type = image_info["image_type"] + image_tag = image_info["image_tag"] + short_tag = image_tag.split("-")[0] + kernel_info = runner_label + ":" + image_type + "_" + short_tag + return kernel_info + + def get_kernel_data(self) -> Tuple[List[pd.DataFrame], List[Tuple[str, int]]]: + """Extract kernel data and launch counts from the HMA data""" + pr_models: List[pd.DataFrame] = [] + launch_count_list: List[Tuple[str, int]] = [] + kernel_runs = [ + hma_run for hma_run in self.hma_data if hma_run["metrics"] is not None + ] + + for kernel_run in kernel_runs: + metrics = kernel_run["metrics"] + kernel_timing_idx = None + launch_count_idx = None + + for m in metrics: + if m.name == "kernel timing": + kernel_timing_idx = metrics.index(m) + elif m.name == "launch count": + launch_count_idx = metrics.index(m) + + if kernel_timing_idx is not None: + kernel_info = self._prepare_kernel_info(kernel_run) + df_kernel = metrics[kernel_timing_idx].value + df_kernel["kernel_info"] = kernel_info + df_kernel["runner"] = kernel_run["info"]["runner_label"] + df_kernel["image_type"] = kernel_run["info"]["image_info"]["image_type"] + print(f"Kernel timing metrics found for {kernel_info}") + pr_models.append(df_kernel) + else: + print(f"No kernel timing metrics found for {kernel_info}") + + if launch_count_idx is not None: + launch_count = metrics[launch_count_idx].value + launch_count_list.append((kernel_info, launch_count)) + else: + print(f"No launch count found for {kernel_info}") + + return pr_models, launch_count_list + + def prepare_dataframe(self) -> pd.DataFrame: + """Prepare a combined DataFrame with all kernel results""" + kernel_df = pd.concat(self.kernels, ignore_index=True) + kernel_df["kernel_info"] = kernel_df["kernel_info"].str.replace( + r"^fcs-", "", regex=True + ) + kernel_df["kernel"] = kernel_df["kernel"].replace(self.descriptive_names) + return kernel_df + + def save_or_show_plot( + self, + save_plot: bool = True, + filename: Optional[str] = None, + group_by: Optional[str] = None, + ) -> None: + """Save or show the plot""" + plt.tight_layout() + + if save_plot: + if filename is None: + plot_filename = ( + f"results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.png" + ) + elif os.path.isdir(filename): + plot_filename = os.path.join( + filename, + f"results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.png", + ) + else: + plot_filename = filename + + if group_by is not None: + plot_filename = plot_filename.replace(".png", f"_{group_by}.png") + + plt.savefig(plot_filename) + print(f"Plot saved to {plot_filename}") + else: + plt.show() + plt.close() + + def _make_total_run_time_plot( + self, ax: plt.Axes, df: pd.DataFrame, title: str + ) -> None: + + sns.barplot( + ax=ax, + x="kernel", + y="total /s", + hue="kernel_info", + data=df, + errorbar=None, + capsize=0.1, + ) + ax.set_title(title) + ax.set_ylabel("Total elapsed run time [s]") + ax.legend(title="Technology", loc="upper right", fontsize=12) + + def _make_avg_run_time_plot( + self, + ax: plt.Axes, + df: pd.DataFrame, + launch_count_list: List[Tuple[str, int]], + title: Optional[str] = None, + ) -> None: + + sns.barplot( + ax=ax, + x="kernel", + y="avg launch /us", + hue="kernel_info", + data=df, + errorbar=None, + capsize=0.1, + ) + ax.set_title(title or "Average Kernel Run time in us") + + ax.set_ylabel("Average Run time and standard deviation [us]") + ax.legend(title="Technology", loc="upper right", fontsize=12) + + props = {"boxstyle": "round", "facecolor": "wheat", "alpha": 0.5} + launch_count_str = make_launch_count_str(launch_count_list) + ax.text( + 0.02, + 0.95, + launch_count_str, + transform=ax.transAxes, + fontsize=12, + verticalalignment="top", + horizontalalignment="left", + bbox=props, + ) + + df_errorbar = make_df_for_errorbar(df, ax) + + ax.errorbar( + df_errorbar["bar_x"], + df_errorbar["bar_y"], + yerr=df_errorbar["std dev /us"], + fmt="none", + c="red", + capsize=5, + ) + ax.set_ylim(bottom=0) + + def plot( + self, + group_by: Optional[str] = None, + save_plot: bool = True, + filename: Optional[str] = None, + ) -> None: + """Plot the kernel run time comparisons, include error bars for average kernel run time""" + if group_by == "runner": + n = len(self.df_all["runner"].unique()) + _, axes = plt.subplots(2, n, figsize=((10 * n), 9)) + for i, runner in enumerate(self.df_all["runner"].unique()): + df_runner = self.df_all[self.df_all["runner"] == runner] + self._make_total_run_time_plot( + axes[0, i], + df_runner, + f"FastCaloSim Run Time comparison on {runner} for different technologies", + ) + self._make_avg_run_time_plot( + axes[1, i], + df_runner, + [x for x in self.launch_count_list if runner in x[0]], + f"Average Run Time comparison: {runner} kernels", + ) + filename = filename if filename is not None else "per_runner.png" + elif group_by == "image_type": + n = len(self.df_all["image_type"].unique()) + _, axes = plt.subplots(2, n, figsize=((10 * n), 9)) + for i, image_type in enumerate(self.df_all["image_type"].unique()): + df_image_type = self.df_all[self.df_all["image_type"] == image_type] + self._make_total_run_time_plot( + axes[0, i], + df_image_type, + f"FastCaloSim Run Time comparison of {image_type} across systems", + ) + self._make_avg_run_time_plot( + axes[1, i], + df_image_type, + [x for x in self.launch_count_list if image_type in x[0]], + f"Average Run Time comparison: {image_type} kernels", + ) + filename = filename if filename is not None else "per_image_type.png" + else: + _, axes = plt.subplots(2, 1, figsize=(10, 9)) + self._make_total_run_time_plot( + axes[0], + self.df_all, + "FastCaloSim Run Time comparison for different technologies", + ) + self._make_avg_run_time_plot(axes[1], self.df_all, self.launch_count_list) + filename = filename if filename is not None else "all.png" + + self.save_or_show_plot(save_plot, filename, group_by) + + +def main(input_path, output): + """Main function to create and save the plot from the HMA result file.""" + print(f"Plotting: input_path={input_path}, output={output}") + plot = HmaPlot(input_path) + plot.plot( + save_plot=True, + filename=output, + ) + plot.plot( + group_by="runner", + save_plot=True, + filename=output, + ) + plot.plot( + group_by="image_type", + save_plot=True, + filename=output, + ) diff --git a/hmatools/python/hma_result.py b/hmatools/python/hma_result.py new file mode 100644 index 0000000..c1c2735 --- /dev/null +++ b/hmatools/python/hma_result.py @@ -0,0 +1,111 @@ +""" +Functionality to handle HMA results: + +- HmaResult class which is responsible for initializing results from a given input directory, +retrieving these results, and dumping them to a JSON file +- Helper function for JSON serialization of various object types +""" + +import os +import json + +from datetime import datetime +from typing import Union, Optional, List + +import pandas as pd + +from hma_run import HmaRun +from util import get_current_timestamp + + +def json_serial(obj: object) -> Union[str, dict]: + """JSON serializer for objects not serializable by default""" + if isinstance(obj, datetime): + return obj.isoformat() + if isinstance(obj, pd.DataFrame): + return obj.to_json() + if isinstance(obj, type): + return str(obj) + if isinstance(obj, HmaRun): + return obj.to_dict() + raise TypeError(f"Type {type(obj)} not serializable") + + +class HmaResult: + """Representation of one HmaResult, consisting of multiple HmaRuns""" + + def __init__(self, input_path: Union[str, os.PathLike]) -> None: + """Initialize hma_results with the path to the input directory or file.""" + if os.path.isdir(input_path): + self.input_dir = input_path + self.results = self._get_results_from_directory() + elif os.path.isfile(input_path): + self.input_dir = os.path.dirname(input_path) + self.results = self._get_results_from_file(input_path) + else: + raise ValueError( + f"The provided input path '{input_path}' is neither a file nor a directory." + ) + + def _get_results_from_directory(self) -> List[HmaRun]: + """Get the individual HmaRun objects from the input directory""" + results = [] + input_files = [ + x + for x in os.listdir(self.input_dir) + if x.endswith(".txt") and x.startswith("run_log_") + ] + input_files.sort() + input_files = [os.path.join(self.input_dir, file) for file in input_files] + for logfile in input_files: + print(f"Reading {logfile}") + run = HmaRun(logfile) + results.append(run) + if not results: + raise ValueError(f"No HMA results found in directory {self.input_dir}") + return results + + def _get_results_from_file( + self, file_path: Union[str, os.PathLike] + ) -> List[HmaRun]: + """Get the HmaRun object from a single file""" + print(f"Reading {file_path}") + run = HmaRun(file_path) + return [run] + + def dump_to_json(self, filename: Optional[str] = None) -> None: + """Dump the results to a JSON file in the specified file or a default directory""" + + if filename is None: + current_dir = os.getcwd() + json_filename = os.path.join( + current_dir, f"results_{get_current_timestamp()}.json" + ) + elif os.path.isdir(filename): + json_filename = os.path.join( + filename, f"results_{get_current_timestamp()}.json" + ) + elif os.path.isdir(os.path.dirname(filename)): + json_filename = filename + else: + raise ValueError(f"Invalid output path: {filename}") + + with open(json_filename, "w", encoding="utf-8") as f: + json.dump(self.results, f, indent=4, sort_keys=True, default=json_serial) + + print(f"Results successfully saved to {json_filename}") + + def get_results(self) -> List[HmaRun]: + """Public method to get the results""" + return self.results + + def get_input_dir(self) -> Union[str, os.PathLike]: + """Public method to get the input directory path""" + return self.input_dir + + +def main(input_path, output): + """Main function for postprocessing HMA results.""" + print(f"Postprocessing: input_path={input_path}, output={output}") + result = HmaResult(input_path) + result.dump_to_json(output) diff --git a/hmatools/python/hma_run.py b/hmatools/python/hma_run.py new file mode 100644 index 0000000..a8171b2 --- /dev/null +++ b/hmatools/python/hma_run.py @@ -0,0 +1,57 @@ +""" +Module for handling FastCaloSim run results. + +This module defines the HmaRun class, which encapsulates the information and metrics +from a FastCaloSim run log file. It provides methods to initialize the run data, +convert it to a dictionary, and access the run info and metrics. +""" + +import os +from typing import Union, Dict, List, Optional +from collections.abc import Iterable + +from run_info import RunInfo +from hma_metrics import HmaMetrics + + +class HmaRun: + """Representation of one HmaRun, including relevant info and metrics""" + + def __init__(self, logfile: Union[str, os.PathLike]) -> None: + """Initialize FastCaloSim result object with the path to the input file.""" + if not os.path.isfile(logfile): + raise ValueError(f"The provided input path '{logfile}' is not a file.") + self.logfile = logfile + with open(logfile, "r", encoding="utf-8") as f: + logfile_content = f.read() + info_data, kernel_data = logfile_content.split("- Setup") + + self.info = RunInfo(logfile, info_data) + self.metrics = HmaMetrics(logfile, kernel_data) + + def to_dict( + self, + ) -> Dict[ + str, Union[Dict[str, str], Optional[List[Dict[str, Union[str, int, float]]]]] + ]: + """Convert the FastCaloSim result object to a dictionary.""" + if isinstance(self.metrics.metrics, Iterable): + metrics = ( + [m.__dict__ for m in self.metrics.metrics] + if self.metrics.metrics + else None + ) + return { + "info": self.info.__dict__, + "metrics": metrics, + } + print(f"No metrics found for {self.logfile}") + return {"info": self.info.__dict__, "metrics": None} + + def get_info(self) -> RunInfo: + """Public method to get the RunInfo object.""" + return self.info + + def get_metrics(self) -> HmaMetrics: + """Public method to get the HmaMetrics object.""" + return self.metrics diff --git a/hmatools/python/plotparser.py b/hmatools/python/plotparser.py new file mode 100644 index 0000000..a9cc4f3 --- /dev/null +++ b/hmatools/python/plotparser.py @@ -0,0 +1,26 @@ +""" +Module for parsing command line arguments and invoking the plotting main function. + +This module provides a command line interface for plotting HMA results. +It parses the input and output file paths from the command line arguments and +calls the main plotting function from the `hma_plot` module. +""" + +import argparse +from hma_plot import main as plot_main + + +def parse_args(): + """Parse command line arguments for the plotting script.""" + plot_parser = argparse.ArgumentParser(description="Plot command line arguments.") + plot_parser.add_argument("-i", "--input", required=True, help="Input file") + plot_parser.add_argument("-o", "--output", required=True, help="Output file") + + args = plot_parser.parse_args() + return args + + +def main(): + """Main function for parsing command line arguments and invoking plotting.""" + args = parse_args() + plot_main(args.input, args.output) diff --git a/hmatools/python/postprocparser.py b/hmatools/python/postprocparser.py new file mode 100644 index 0000000..1e66d25 --- /dev/null +++ b/hmatools/python/postprocparser.py @@ -0,0 +1,29 @@ +""" +Module for parsing command line arguments and invoking the postprocessing main function. + +This module provides a command line interface for postprocessing HMA results. +It parses the input and output file paths from the command line arguments and +calls the main postprocessing function from the `hma_result` module. +""" + +import argparse + +from hma_result import main as postprocess_main + + +def parse_args(): + """Parse command line arguments for the postprocessing script.""" + plot_parser = argparse.ArgumentParser( + description="Postprocess command line arguments." + ) + plot_parser.add_argument("-i", "--input", required=True, help="Input dir/file") + plot_parser.add_argument("-o", "--output", required=True, help="Output dir/file") + + args = plot_parser.parse_args() + return args + + +def main(): + """Main function for parsing command line arguments and invoking postprocessing.""" + args = parse_args() + postprocess_main(args.input, args.output) diff --git a/hmatools/python/run_info.py b/hmatools/python/run_info.py new file mode 100644 index 0000000..6b55377 --- /dev/null +++ b/hmatools/python/run_info.py @@ -0,0 +1,180 @@ +""" +Module for parsing and extracting run information from log files. +""" + +import os +import re +import json + +from typing import Union, Optional, Tuple, Dict +from datetime import datetime +from util import remove_trailing_commas, parse_csv_data + + +class RunInfo: + """ + Class for parsing and storing run information from a log file. + """ + + def __init__(self, logfile: Union[str, os.PathLike], info_data: str) -> None: + runner_label, description, timestamp = self._split_log_name(logfile) + self.runner_label = runner_label + self.image_info = self._parse_description(description) + self.timestamp = self._parse_timestamp(timestamp) + self.run_cmd = self._parse_run_cmd(info_data) + self.system_info = self._parse_system_info(info_data) + self.cpu_info = self._parse_cpu_info(info_data) + self.nvidia_info = self._parse_nvidia_info(info_data) + + def _split_log_name(self, logfile: Union[str, os.PathLike]) -> Tuple[str, str, str]: + tag, _ = os.path.splitext(os.path.basename(logfile)) + if tag.startswith("run_log_"): + tag = tag[8:] + tags = tag.split("_") + if len(tags) == 2: + description, timestamp = tags + runner_label = "" + elif len(tags) == 3: + runner_label, description, timestamp = tag.split("_") + else: + print(f"Could not parse log name {tag}") + runner_label, description, timestamp = "", "", "" + return runner_label, description, timestamp + + def _parse_description(self, description: str) -> Dict[str, str]: + # Assuming the description is in the format "image_type-root_version-image_tag" + description_parts = description.split("-") + for i, part in enumerate(description_parts): + if part[0].isdigit(): + image_info = { + "image_type": "-".join(description_parts[:i]), + "root_version": part, + "image_tag": "-".join(description_parts[i + 1 :]), + } + return image_info + print(f"Could not parse description {description}") + return {"image_type": "", "root_version": "", "image_tag": ""} + + def _parse_timestamp(self, timestamp: str) -> Optional[datetime]: + timestamp_match = re.search(r"\d{14}", timestamp) + if timestamp_match: + timestamp_str = timestamp_match.group() + parsed_timestamp = datetime.strptime(timestamp_str, "%Y%m%d%H%M%S") + return parsed_timestamp + return None + + def _parse_run_cmd(self, data: str) -> Optional[str]: + command_match = re.search(r"runTFCSSimulation\s.*", data) + if command_match: + return command_match.group(0) + return None + + def _parse_system_info( + self, file: str + ) -> Optional[Dict[str, Union[str, int, float]]]: + json_string = str() + hits = 0 + for line in file.splitlines(): + if "{" in line: + hits += 1 + + if hits >= 1: + json_string += line + + if "}" in line: + hits -= 1 + + if json_string: + json_string = remove_trailing_commas(json_string) + return json.loads(json_string) + print( + f"No system information found for {self.image_info['image_type']} " + f"with tag {self.image_info['image_tag']}" + ) + return None + + def _parse_cpu_info(self, file: str) -> Dict[str, Union[str, int, float]]: + cpu_info = {} + patterns = { + "Architecture": r"Architecture:\s+(.+)", + "CPU op-mode(s)": r"CPU op-mode\(s\):\s+(.+)", + "Address sizes": r"Address sizes:\s+(.+)", + "Byte Order": r"Byte Order:\s+(.+)", + "CPU(s)": r"CPU\(s\):\s+(\d+)", + "On-line CPU(s) list": r"On-line CPU\(s\) list:\s+(.+)", + "Vendor ID": r"Vendor ID:\s+(.+)", + "Model name": r"Model name:\s+(.+)", + "CPU family": r"CPU family:\s+(\d+)", + "Model": r"Model:\s+(\d+)", + "Thread(s) per core": r"Thread\(s\) per core:\s+(\d+)", + "Core(s) per socket": r"Core\(s\) per socket:\s+(\d+)", + "Socket(s)": r"Socket\(s\):\s+(\d+)", + "Stepping": r"Stepping:\s+(\d+)", + "Frequency boost": r"Frequency boost:\s+(.+)", + "CPU max MHz": r"CPU max MHz:\s+([\d.]+)", + "CPU min MHz": r"CPU min MHz:\s+([\d.]+)", + "BogoMIPS": r"BogoMIPS:\s+([\d.]+)", + "Flags": r"Flags:\s+(.+)", + "Virtualization": r"Virtualization:\s+(.+)", + "L1d cache": r"L1d cache:\s+(.+)", + "L1i cache": r"L1i cache:\s+(.+)", + "L2 cache": r"L2 cache:\s+(.+)", + "L3 cache": r"L3 cache:\s+(.+)", + "NUMA node(s)": r"NUMA node\(s\):\s+(\d+)", + "NUMA node0 CPU(s)": r"NUMA node0 CPU\(s\):\s+(.+)", + "NUMA node1 CPU(s)": r"NUMA node1 CPU\(s\):\s+(.+)", + "Vulnerability Gather data sampling": r"Vulnerability Gather data sampling:\s+(.+)", + "Vulnerability Itlb multihit": r"Vulnerability Itlb multihit:\s+(.+)", + "Vulnerability L1tf": r"Vulnerability L1tf:\s+(.+)", + "Vulnerability Mds": r"Vulnerability Mds:\s+(.+)", + "Vulnerability Meltdown": r"Vulnerability Meltdown:\s+(.+)", + "Vulnerability Mmio stale data": r"Vulnerability Mmio stale data:\s+(.+)", + "Vulnerability Retbleed": r"Vulnerability Retbleed:\s+(.+)", + "Vulnerability Spec rstack overflow": r"Vulnerability Spec rstack overflow:\s+(.+)", + "Vulnerability Spec store bypass": r"Vulnerability Spec store bypass:\s+(.+)", + "Vulnerability Spectre v1": r"Vulnerability Spectre v1:\s+(.+)", + "Vulnerability Spectre v2": r"Vulnerability Spectre v2:\s+(.+)", + "Vulnerability Srbds": r"Vulnerability Srbds:\s+(.+)", + "Vulnerability Tsx async abort": r"Vulnerability Tsx async abort:\s+(.+)", + } + + for key, pattern in patterns.items(): + match = re.search(pattern, file) + if match: + cpu_info[key] = match.group(1) + + return cpu_info + + def _parse_nvidia_info(self, file: str) -> Optional[Dict[str, str]]: + pattern = re.compile( + r"Executing: nvidia-smi --query-gpu=.*? --format=csv\n(.*?)(?=\nINFO -|\Z)", + re.DOTALL, + ) + match = pattern.search(file) + + if not match: + print("No nvidia-smi output found in the logfile.") + return None + + csv_data_block = match.group(1).strip() + + lines = csv_data_block.split("\n") + if len(lines) < 2: + print("Incomplete GPU information.") + return None + + csv_data = "\n".join(lines) + gpu_info = parse_csv_data(csv_data) + return gpu_info + + def get_image_info(self) -> Dict[str, str]: + """ + Get the image information. + """ + return self.image_info + + def get_timestamp(self) -> Optional[datetime]: + """ + Get the timestamp of the run. + """ + return self.timestamp diff --git a/hmatools/python/util.py b/hmatools/python/util.py new file mode 100644 index 0000000..6557917 --- /dev/null +++ b/hmatools/python/util.py @@ -0,0 +1,87 @@ +""" +Utility functions for time, CSV parsing, JSON manipulation, and data processing. +""" + +import re +import io +import csv + +from typing import Dict, List, Tuple +from datetime import datetime + +import pandas as pd + + +def get_current_time() -> str: + """Return the current time as a formatted string.""" + return datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + +def get_current_timestamp() -> str: + """Return the current timestamp as a formatted string.""" + return datetime.now().strftime("%Y%m%d%H%M%S") + + +def remove_trailing_commas(json_string: str) -> str: + """Remove trailing commas from a JSON string.""" + json_string = re.sub(r",\s*([\]}])", r"\1", json_string) + return json_string + + +def parse_csv_data(csv_data: str) -> Dict[str, str]: + """Parse CSV data into a dictionary.""" + csv_file = io.StringIO(csv_data) + reader = csv.DictReader(csv_file) + dic = {} + for row in reader: + for key, value in row.items(): + dic[key] = value + return dic + + +def make_launch_count_str(launch_count_list: List[Tuple[str, int]]) -> str: + """Create a string summarizing launch counts.""" + + def all_counts_equal(lst: List[Tuple[str, int]]) -> bool: + return all(x[1] == lst[0][1] for x in lst) + + if all_counts_equal(launch_count_list): + launch_count_str = f"n = {launch_count_list[0][1]} launches per kernel" + else: + print("Warning: Not all launch counts are equal") + launch_count_str = "Launches per kernel: " + ",\n".join( + [f"{image}: {count}" for image, count in launch_count_list] + ) + return launch_count_str + + +def get_bar_list(patches: List) -> List[Tuple[float, float]]: + """Return a sorted list of bar coordinates from patches.""" + bar_list = [] + kernel_patches = [patch for patch in patches if patch.get_height() != 0] + for kernel_patch in kernel_patches: + bar_xy = ( + (float(kernel_patch.get_x() + (kernel_patch.get_width() / 2))), + float(kernel_patch.get_height()), + ) + bar_list.append(bar_xy) + bar_list.sort() + return bar_list + + +def get_x_y_for_errorbar(axis) -> Tuple[List[float], List[float]]: + """Return x and y coordinates for error bars from an axis.""" + bar_list = get_bar_list(axis.patches) + return [x for x, y in bar_list], [y for x, y in bar_list] + + +def make_df_for_errorbar(df: pd.DataFrame, axis) -> pd.DataFrame: + """Create a DataFrame with error bar coordinates.""" + bar_x, bar_y = get_x_y_for_errorbar(axis) + + unique_kernels = df["kernel"].unique() + df["kernel"] = pd.Categorical(df["kernel"], categories=unique_kernels, ordered=True) + sorted_df = df.sort_values(by=["kernel", "kernel_info"]).reset_index(drop=True) + sorted_df["bar_x"] = bar_x + sorted_df["bar_y"] = bar_y + return sorted_df diff --git a/hmatools/requirements.txt b/hmatools/requirements.txt new file mode 100644 index 0000000..d3ec5fe --- /dev/null +++ b/hmatools/requirements.txt @@ -0,0 +1,4 @@ +argparse +pandas +seaborn +matplotlib \ No newline at end of file diff --git a/hmatools/scripts/build_image.sh b/hmatools/scripts/build_image.sh new file mode 100755 index 0000000..aa12ece --- /dev/null +++ b/hmatools/scripts/build_image.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +set -e # Exit immediately if a command exits with a non-zero status. +set -x # Print commands and their arguments as they are executed. + +check_command_exists() { + echo "Checking if command $1 exists..." + command -v "$1" +} + +echo "Starting script..." + +if check_command_exists podman-hpc; then + CONTAINER_CMD="podman-hpc" +elif check_command_exists docker; then + CONTAINER_CMD="docker" +else + echo "ERROR: Neither podman-hpc nor docker is installed on this system." + exit 1 +fi + +echo "Using container command: ${CONTAINER_CMD}" + +echo "INFO - $(date) - Building hmatools" +$CONTAINER_CMD build -f ../hmatools.Dockerfile \ + -t hmatools \ + .. # Context path: self_hosted_runner/hmatools +echo "INFO - $(date) - Built hmatools" + +echo "Script completed successfully!" diff --git a/hmatools/scripts/plot.sh b/hmatools/scripts/plot.sh new file mode 100755 index 0000000..6da4c81 --- /dev/null +++ b/hmatools/scripts/plot.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +logfile=${LOGFILE} + +log_command() { + local command="$1" + echo "INFO - $(date) - Executing: ${command}" | tee -a ${logfile} + eval ${command} | tee -a ${logfile} +} + +log_command "plot -i /input -o /output" diff --git a/hmatools/scripts/postprocess.sh b/hmatools/scripts/postprocess.sh new file mode 100755 index 0000000..b4ecb06 --- /dev/null +++ b/hmatools/scripts/postprocess.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +logfile=${LOGFILE} + +log_command() { + local command="$1" + echo "INFO - $(date) - Executing: ${command}" | tee -a ${logfile} + eval ${command} | tee -a ${logfile} +} + +log_command "postprocess -i /input -o /output" diff --git a/hmatools/scripts/run_image.sh b/hmatools/scripts/run_image.sh new file mode 100755 index 0000000..f72c762 --- /dev/null +++ b/hmatools/scripts/run_image.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +set -e # Exit immediately if a command exits with a non-zero status. +set -x # Print commands and their arguments as they are executed. + +# Ensure the directories exists +mkdir -p "${LOG_DIR}" +mkdir -p "${OUTPUT_DIR}" + +logfile="postprocess_log_$(date +%Y%m%d%H%M%S).txt" + +script="${SCRIPT}" +input_dir="${INPUT_DIR}" +output_dir="${OUTPUT_DIR}" + +check_command_exists() { + echo "Checking if command $1 exists..." + command -v "$1" >>${logfile} 2>&1 +} + +echo "Starting script..." + +if check_command_exists podman-hpc; then + CONTAINER_CMD="podman-hpc" +elif check_command_exists docker; then + CONTAINER_CMD="docker" +else + echo "ERROR: Neither podman-hpc nor docker is installed on this system." + exit 1 +fi + +echo "Using container command: ${CONTAINER_CMD}" + +echo "INFO - $(date) - Run postprocessing" +$CONTAINER_CMD run \ + --attach STDOUT \ + --rm \ + -v $PWD:/workspace \ + -v $input_dir:/input \ + -v $output_dir:/output \ + -v "${LOG_DIR}":/log_dir \ + -w /workspace \ + -e LOGFILE=/log_dir/${logfile} \ + hmatools \ + ${script} \ +| tee -a ${logfile} + +echo "Script completed successfully!" \ No newline at end of file diff --git a/hmatools/setup.py b/hmatools/setup.py new file mode 100644 index 0000000..bdeb238 --- /dev/null +++ b/hmatools/setup.py @@ -0,0 +1,16 @@ +from setuptools import setup, find_packages + +setup( + name="HMATools", + version="0.1", + packages=find_packages(where="python"), + package_dir={"": "python"}, + install_requires=["argparse", "pandas", "seaborn", "matplotlib"], + entry_points={ + "console_scripts": [ + "postprocess=postprocparser:main", + "plot=plotparser:main", + ], + }, + test_suite="python/tests", +) diff --git a/hmatools/tests/__init.py__ b/hmatools/tests/__init.py__ new file mode 100644 index 0000000..4f54c83 --- /dev/null +++ b/hmatools/tests/__init.py__ @@ -0,0 +1 @@ +# tests/__init__.py \ No newline at end of file diff --git a/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-cuda-6.30.04-cuda12.2.2-devel-ubuntu22.04_20240801101357.txt b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-cuda-6.30.04-cuda12.2.2-devel-ubuntu22.04_20240801101357.txt new file mode 100644 index 0000000..6b4221d --- /dev/null +++ b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-cuda-6.30.04-cuda12.2.2-devel-ubuntu22.04_20240801101357.txt @@ -0,0 +1,265 @@ +INFO - Thu Aug 1 17:14:06 UTC 2024 - DATAPATH: /local/scratch/cgleggett/data/FastCaloSimInputs:/input +INFO - Thu Aug 1 17:14:06 UTC 2024 - System information: +INFO - Thu Aug 1 17:14:06 UTC 2024 - Executing: perl /run/sysinfo.pl +{ + "hostname": "da8d6a272186", + "OS": "unknown", + "sockets": "1", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "threads_per_core": "2", + "max_clock": "3000.0000", + "cores": "32", + "GPUs": [ + { + "compute_capability": "8.0", + "driver": "550.54.15", + "gpu_id": "NVIDIA A100 80GB PCIe", + "graphics_clock": "1512 MHz", + "memory": "81920 MiB", + "memory_clock": "1410 MHz", + "sm_clock": "1410 MHz", + }, + ], +} +md5 sum: 57edbed40aa047bcbed937be8ee393a7 57edbed4 +INFO - Thu Aug 1 17:14:08 UTC 2024 - CPU information: +INFO - Thu Aug 1 17:14:08 UTC 2024 - Executing: lscpu +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Address sizes: 43 bits physical, 48 bits virtual +Byte Order: Little Endian +CPU(s): 32 +On-line CPU(s) list: 0-31 +Vendor ID: AuthenticAMD +Model name: AMD EPYC 7302P 16-Core Processor +CPU family: 23 +Model: 49 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 1 +Stepping: 0 +Frequency boost: enabled +CPU max MHz: 3000.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5988.87 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es +Virtualization: AMD-V +L1d cache: 512 KiB (16 instances) +L1i cache: 512 KiB (16 instances) +L2 cache: 8 MiB (16 instances) +L3 cache: 128 MiB (8 instances) +NUMA node(s): 1 +NUMA node0 CPU(s): 0-31 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Mitigation; untrained return thunk; SMT enabled with STIBP protection +Vulnerability Spec rstack overflow: Mitigation; Safe RET +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +INFO - Thu Aug 1 17:14:08 UTC 2024 - GPU information: +INFO - Thu Aug 1 17:14:08 UTC 2024 - Executing: nvidia-smi --query-gpu=name,driver_version,count,clocks.max.sm,clocks.max.memory,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu,temperature.memory --format=csv +name, driver_version, count, clocks.max.sm [MHz], clocks.max.memory [MHz], memory.total [MiB], memory.used [MiB], memory.free [MiB], utilization.gpu [%], utilization.memory [%], temperature.gpu, temperature.memory +NVIDIA A100 80GB PCIe, 550.54.15, 1, 1410 MHz, 1512 MHz, 81920 MiB, 0 MiB, 81037 MiB, 20 %, 0 %, 38, 51 +INFO - Thu Aug 1 17:14:11 UTC 2024 - Setup +INFO - Thu Aug 1 17:14:11 UTC 2024 - Executing: export FCS_DATAPATH=/input +INFO - Thu Aug 1 17:14:11 UTC 2024 - Executing: source /hep-mini-apps/root/install/bin/thisroot.sh +INFO - Thu Aug 1 17:14:11 UTC 2024 - Executing: source /hep-mini-apps/FCS-GPU/install/setup.sh +FastCaloSim Standalone + Installation path: /hep-mini-apps/FCS-GPU/install + Added '$FCSSTANDALONE/bin' to $PATH + Added '$FCSSTANDALONE/lib' to $LD_LIBRARY_PATH +INFO - Thu Aug 1 17:14:11 UTC 2024 - TFCSSimulation +INFO - Thu Aug 1 17:14:11 UTC 2024 - Executing: runTFCSSimulation --earlyReturn --energy 65536 +Reading input files from /input +Initialising DSID DB... +DB ready +Parametrization File: '/input/BigParamFiles/TFCSparam_v010.root' + energy = 65536 + eta_label = 020_025 +* Running on linux system +432704 /input/InputSamplesProdsysProduction/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.deriv.NTUP_FCS.e6555_e5984_s3259_r10283_p3449/NTUP_FCS.13289109._000001.pool.root.1 + * Prepare to run on: /input/InputSamplesProdsysProduction/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.deriv.NTUP_FCS.e6555_e5984_s3259_r10283_p3449/NTUP_FCS.13289109._000001.pool.root.1 with entries = 10000 + * Running over 10000 events. + * 1stPCA file: /input/ParametrizationProduction07/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.firstPCA_App.ver07.root + * AvgShape file: /input/ParametrizationProduction07/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.AvgSimShape.ver07.root +generating random numbers on GPU +R4H m_rand_ptr: 0x7fd73f200000 + -- R4H ncells: 200000 cells_energy: 0x7fd73f630000 hitcells_E: 0x7fd73f6f3600 hitcells_ct: 0x7fd73f6fb400 +Time of R4hit: 1.613e-06,2.294e-06,1.725,2e-08,1.9517e-05 s +using CUDA +Time of Rand4Hit_init: 1.72504 s +============================= +Loading cellId_vs_cellHashId_map +Line: 10000 id 2cac3c1e00000000 hash_id 9999 +Line: 20000 id 2cc4213e00000000 hash_id 19999 +Line: 30000 id 2ce0195e00000000 hash_id 29999 +Line: 40000 id 2d20847e00000000 hash_id 39999 +Line: 50000 id 2d21be1e00000000 hash_id 49999 +Line: 60000 id 2d22f63e00000000 hash_id 59999 +Line: 70000 id 2d4025de00000000 hash_id 69999 +Line: 80000 id 2d6001fe00000000 hash_id 79999 +Line: 90000 id 2d80681e00000000 hash_id 89999 +Line: 100000 id 2da1283e00000000 hash_id 99999 +Line: 110000 id 2da2605e00000000 hash_id 109999 +Line: 120000 id 2dc0007e00000000 hash_id 119999 +Line: 130000 id 2dc04e9e00000000 hash_id 129999 +Line: 140000 id 2de02abe00000000 hash_id 139999 +Line: 150000 id 2e2c2a5e00000000 hash_id 149999 +Line: 160000 id 2e441cfe00000000 hash_id 159999 +Line: 170000 id 2e60151e00000000 hash_id 169999 +Line: 180000 id 3485e00000000000 hash_id 179999 +Done. +Checking loading cells from file +0 : 6, 3179541336923570176 +Checking loading cells from file +25000 : 6, 3225782741538701312 +Checking loading cells from file +50000 : 1, 3252089450586439680 +Checking loading cells from file +75000 : 2, 3260690723892101120 +Checking loading cells from file +100000 : 1, 3287953458300190720 +Checking loading cells from file +125000 : 2, 3296678426663976960 +Checking loading cells from file +150000 : 5, 3327080816525180928 +Checking loading cells from file +175000 : 10, 3535184969997484032 +Result of PostProcessGeometry(): 1 + + +Testing whether CaloGeoGeometry is loaded properly +Identifier 3179554531063103488 sampling 6 eta: -3.15858 phi: 0.0545135 CaloDetDescrElement=0x560eef4116f0 + +Identifier 3179554531063103488 sampling 6 eta: -3.15858 phi: 0.0545135 CaloDetDescrElement=0x560eef4116f0 + +Loading FCal electrode #1 +Loading FCal electrode #2 +Loading FCal electrode #3 +Total GeoRegions= 78 +Total cells= 187652 +Executing on GPU: NVIDIA A100 80GB PCIe +cuMalloc 187652 cells +device Memcpy 187652/187652 cells Total:21017024 Bytes +CUDA GEO +ncells: 187652 +regions: 78 +================================= +GPU Geometry loaded!!! +TFCSShapeValidation::LoopEvents(): Running on layer = 2, pcabin = -1 +======================================================== +======================================================== + + +Event: 0 + +Event: 250 + +Event: 500 + +Event: 750 + +Event: 1000 + +Event: 1250 + +Event: 1500 + +Event: 1750 + +Event: 2000 + +Event: 2250 + +Event: 2500 + +Event: 2750 + +Event: 3000 + +Event: 3250 + +Event: 3500 + +Event: 3750 + +Event: 4000 + +Event: 4250 + +Event: 4500 + +Event: 4750 + +Event: 5000 + +Event: 5250 + +Event: 5500 + +Event: 5750 + +Event: 6000 + +Event: 6250 + +Event: 6500 + +Event: 6750 + +Event: 7000 + +Event: 7250 + +Event: 7500 + +Event: 7750 + +Event: 8000 + +Event: 8250 + +Event: 8500 + +Event: 8750 + +Event: 9000 + +Event: 9250 + +Event: 9500 + +Event: 9750 +GPU memory used(MB): 527 +kernel timing + kernel total /s avg launch /us std dev /us + sim_clean 0.20832632 10.4 1.3 + sim_A 0.48336262 24.2 3.2 + sim_ct 0.19353935 9.7 1.2 + sim_cp 0.37192848 18.6 2.2 +launch count 19973 +2 +Time of LoadGeo cpu IO:0.726031 s +Time of GPU GeoLg() :0.0247775 s +Time of InitInputTree :0.0177158 s +Time of resizeTruth :0.00503153 s +Time of eventloop GPU load FH :0.00453769 s +Time of eventloop LateralShapeParamHitChain :2.02886 s +Time of eventloop :2.39977 s +Time of eventloop GPU ChainA:1.68442 s +Time of eventloop GPU ChainB:0.00340656 s +Time of eventloop host Chain0:0.339178 s +Time of eventloop before chain simul:0.00647577 s +Time of eventloop I/O read from tree:0.303344 s +Time for Chain 0 is 2.07586 s +Time for Chain 1 is 0 s +Time for Chain 2 is 0 s +Time for Chain 3 is 0 s +Time for Chain 4 is 0 s +exiting early diff --git a/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-cuda-6.30.04-nvhpc23.9-devel-cuda12.2-ubuntu22.04_20240801101240.txt b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-cuda-6.30.04-nvhpc23.9-devel-cuda12.2-ubuntu22.04_20240801101240.txt new file mode 100644 index 0000000..919ff2c --- /dev/null +++ b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-cuda-6.30.04-nvhpc23.9-devel-cuda12.2-ubuntu22.04_20240801101240.txt @@ -0,0 +1,265 @@ +INFO - Thu Aug 1 17:12:49 UTC 2024 - DATAPATH: /local/scratch/cgleggett/data/FastCaloSimInputs:/input +INFO - Thu Aug 1 17:12:49 UTC 2024 - System information: +INFO - Thu Aug 1 17:12:49 UTC 2024 - Executing: perl /run/sysinfo.pl +{ + "hostname": "aa5ef7c9c27e", + "OS": "unknown", + "threads_per_core": "2", + "max_clock": "3000.0000", + "cores": "32", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "sockets": "1", + "GPUs": [ + { + "compute_capability": "8.0", + "driver": "550.54.15", + "gpu_id": "NVIDIA A100 80GB PCIe", + "graphics_clock": "1512 MHz", + "memory": "81920 MiB", + "memory_clock": "1410 MHz", + "sm_clock": "1410 MHz", + }, + ], +} +md5 sum: 47703f0f9cb09820c9c2bdf46acd37be 47703f0f +INFO - Thu Aug 1 17:12:52 UTC 2024 - CPU information: +INFO - Thu Aug 1 17:12:52 UTC 2024 - Executing: lscpu +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Address sizes: 43 bits physical, 48 bits virtual +Byte Order: Little Endian +CPU(s): 32 +On-line CPU(s) list: 0-31 +Vendor ID: AuthenticAMD +Model name: AMD EPYC 7302P 16-Core Processor +CPU family: 23 +Model: 49 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 1 +Stepping: 0 +Frequency boost: enabled +CPU max MHz: 3000.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5988.87 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es +Virtualization: AMD-V +L1d cache: 512 KiB (16 instances) +L1i cache: 512 KiB (16 instances) +L2 cache: 8 MiB (16 instances) +L3 cache: 128 MiB (8 instances) +NUMA node(s): 1 +NUMA node0 CPU(s): 0-31 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Mitigation; untrained return thunk; SMT enabled with STIBP protection +Vulnerability Spec rstack overflow: Mitigation; Safe RET +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +INFO - Thu Aug 1 17:12:52 UTC 2024 - GPU information: +INFO - Thu Aug 1 17:12:52 UTC 2024 - Executing: nvidia-smi --query-gpu=name,driver_version,count,clocks.max.sm,clocks.max.memory,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu,temperature.memory --format=csv +name, driver_version, count, clocks.max.sm [MHz], clocks.max.memory [MHz], memory.total [MiB], memory.used [MiB], memory.free [MiB], utilization.gpu [%], utilization.memory [%], temperature.gpu, temperature.memory +NVIDIA A100 80GB PCIe, 550.54.15, 1, 1410 MHz, 1512 MHz, 81920 MiB, 0 MiB, 81037 MiB, 20 %, 0 %, 38, 51 +INFO - Thu Aug 1 17:12:54 UTC 2024 - Setup +INFO - Thu Aug 1 17:12:54 UTC 2024 - Executing: export FCS_DATAPATH=/input +INFO - Thu Aug 1 17:12:54 UTC 2024 - Executing: source /hep-mini-apps/root/install/bin/thisroot.sh +INFO - Thu Aug 1 17:12:54 UTC 2024 - Executing: source /hep-mini-apps/FCS-GPU/install/setup.sh +FastCaloSim Standalone + Installation path: /hep-mini-apps/FCS-GPU/install + Added '$FCSSTANDALONE/bin' to $PATH + Added '$FCSSTANDALONE/lib' to $LD_LIBRARY_PATH +INFO - Thu Aug 1 17:12:54 UTC 2024 - TFCSSimulation +INFO - Thu Aug 1 17:12:54 UTC 2024 - Executing: runTFCSSimulation --earlyReturn --energy 65536 +Reading input files from /input +Initialising DSID DB... +DB ready +Parametrization File: '/input/BigParamFiles/TFCSparam_v010.root' + energy = 65536 + eta_label = 020_025 +* Running on linux system +432704 /input/InputSamplesProdsysProduction/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.deriv.NTUP_FCS.e6555_e5984_s3259_r10283_p3449/NTUP_FCS.13289109._000001.pool.root.1 + * Prepare to run on: /input/InputSamplesProdsysProduction/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.deriv.NTUP_FCS.e6555_e5984_s3259_r10283_p3449/NTUP_FCS.13289109._000001.pool.root.1 with entries = 10000 + * Running over 10000 events. + * 1stPCA file: /input/ParametrizationProduction07/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.firstPCA_App.ver07.root + * AvgShape file: /input/ParametrizationProduction07/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.AvgSimShape.ver07.root +generating random numbers on GPU +R4H m_rand_ptr: 0x7ff42f200000 + -- R4H ncells: 200000 cells_energy: 0x7ff42f630000 hitcells_E: 0x7ff42f6f3600 hitcells_ct: 0x7ff42f6fb400 +Time of R4hit: 1.854e-06,1.974e-06,1.73152,3e-08,1.9818e-05 s +using CUDA +Time of Rand4Hit_init: 1.73156 s +============================= +Loading cellId_vs_cellHashId_map +Line: 10000 id 2cac3c1e00000000 hash_id 9999 +Line: 20000 id 2cc4213e00000000 hash_id 19999 +Line: 30000 id 2ce0195e00000000 hash_id 29999 +Line: 40000 id 2d20847e00000000 hash_id 39999 +Line: 50000 id 2d21be1e00000000 hash_id 49999 +Line: 60000 id 2d22f63e00000000 hash_id 59999 +Line: 70000 id 2d4025de00000000 hash_id 69999 +Line: 80000 id 2d6001fe00000000 hash_id 79999 +Line: 90000 id 2d80681e00000000 hash_id 89999 +Line: 100000 id 2da1283e00000000 hash_id 99999 +Line: 110000 id 2da2605e00000000 hash_id 109999 +Line: 120000 id 2dc0007e00000000 hash_id 119999 +Line: 130000 id 2dc04e9e00000000 hash_id 129999 +Line: 140000 id 2de02abe00000000 hash_id 139999 +Line: 150000 id 2e2c2a5e00000000 hash_id 149999 +Line: 160000 id 2e441cfe00000000 hash_id 159999 +Line: 170000 id 2e60151e00000000 hash_id 169999 +Line: 180000 id 3485e00000000000 hash_id 179999 +Done. +Checking loading cells from file +0 : 6, 3179541336923570176 +Checking loading cells from file +25000 : 6, 3225782741538701312 +Checking loading cells from file +50000 : 1, 3252089450586439680 +Checking loading cells from file +75000 : 2, 3260690723892101120 +Checking loading cells from file +100000 : 1, 3287953458300190720 +Checking loading cells from file +125000 : 2, 3296678426663976960 +Checking loading cells from file +150000 : 5, 3327080816525180928 +Checking loading cells from file +175000 : 10, 3535184969997484032 +Result of PostProcessGeometry(): 1 + + +Testing whether CaloGeoGeometry is loaded properly +Identifier 3179554531063103488 sampling 6 eta: -3.15858 phi: 0.0545135 CaloDetDescrElement=0x5569a3d472b0 + +Identifier 3179554531063103488 sampling 6 eta: -3.15858 phi: 0.0545135 CaloDetDescrElement=0x5569a3d472b0 + +Loading FCal electrode #1 +Loading FCal electrode #2 +Loading FCal electrode #3 +Total GeoRegions= 78 +Total cells= 187652 +Executing on GPU: NVIDIA A100 80GB PCIe +cuMalloc 187652 cells +device Memcpy 187652/187652 cells Total:21017024 Bytes +CUDA GEO +ncells: 187652 +regions: 78 +================================= +GPU Geometry loaded!!! +TFCSShapeValidation::LoopEvents(): Running on layer = 2, pcabin = -1 +======================================================== +======================================================== + + +Event: 0 + +Event: 250 + +Event: 500 + +Event: 750 + +Event: 1000 + +Event: 1250 + +Event: 1500 + +Event: 1750 + +Event: 2000 + +Event: 2250 + +Event: 2500 + +Event: 2750 + +Event: 3000 + +Event: 3250 + +Event: 3500 + +Event: 3750 + +Event: 4000 + +Event: 4250 + +Event: 4500 + +Event: 4750 + +Event: 5000 + +Event: 5250 + +Event: 5500 + +Event: 5750 + +Event: 6000 + +Event: 6250 + +Event: 6500 + +Event: 6750 + +Event: 7000 + +Event: 7250 + +Event: 7500 + +Event: 7750 + +Event: 8000 + +Event: 8250 + +Event: 8500 + +Event: 8750 + +Event: 9000 + +Event: 9250 + +Event: 9500 + +Event: 9750 +GPU memory used(MB): 527 +kernel timing + kernel total /s avg launch /us std dev /us + sim_clean 0.20625765 10.3 1.0 + sim_A 0.48149875 24.1 2.8 + sim_ct 0.19245834 9.6 0.9 + sim_cp 0.36716598 18.4 1.3 +launch count 19973 +2 +Time of LoadGeo cpu IO:0.707652 s +Time of GPU GeoLg() :0.0246851 s +Time of InitInputTree :0.0176983 s +Time of resizeTruth :0.00498405 s +Time of eventloop GPU load FH :0.00442894 s +Time of eventloop LateralShapeParamHitChain :2.01168 s +Time of eventloop :2.37223 s +Time of eventloop GPU ChainA:1.67147 s +Time of eventloop GPU ChainB:0.00332568 s +Time of eventloop host Chain0:0.335094 s +Time of eventloop before chain simul:0.00654652 s +Time of eventloop I/O read from tree:0.307414 s +Time for Chain 0 is 2.05658 s +Time for Chain 1 is 0 s +Time for Chain 2 is 0 s +Time for Chain 3 is 0 s +Time for Chain 4 is 0 s +exiting early diff --git a/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-kokkos-cuda-6.30.04-cuda12.2.2-devel-ubuntu22.04_20240801101434.txt b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-kokkos-cuda-6.30.04-cuda12.2.2-devel-ubuntu22.04_20240801101434.txt new file mode 100644 index 0000000..49bd41d --- /dev/null +++ b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-kokkos-cuda-6.30.04-cuda12.2.2-devel-ubuntu22.04_20240801101434.txt @@ -0,0 +1,264 @@ +INFO - Thu Aug 1 17:14:43 UTC 2024 - DATAPATH: /local/scratch/cgleggett/data/FastCaloSimInputs:/input +INFO - Thu Aug 1 17:14:43 UTC 2024 - System information: +INFO - Thu Aug 1 17:14:43 UTC 2024 - Executing: perl /run/sysinfo.pl +{ + "hostname": "1a3ae75ea67e", + "OS": "unknown", + "max_clock": "3000.0000", + "sockets": "1", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "threads_per_core": "2", + "cores": "32", + "GPUs": [ + { + "compute_capability": "8.0", + "driver": "550.54.15", + "gpu_id": "NVIDIA A100 80GB PCIe", + "graphics_clock": "1512 MHz", + "memory": "81920 MiB", + "memory_clock": "1410 MHz", + "sm_clock": "1410 MHz", + }, + ], +} +md5 sum: ced329da134f193479b18066f045443f ced329da +INFO - Thu Aug 1 17:14:45 UTC 2024 - CPU information: +INFO - Thu Aug 1 17:14:45 UTC 2024 - Executing: lscpu +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Address sizes: 43 bits physical, 48 bits virtual +Byte Order: Little Endian +CPU(s): 32 +On-line CPU(s) list: 0-31 +Vendor ID: AuthenticAMD +Model name: AMD EPYC 7302P 16-Core Processor +CPU family: 23 +Model: 49 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 1 +Stepping: 0 +Frequency boost: enabled +CPU max MHz: 3000.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5988.87 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es +Virtualization: AMD-V +L1d cache: 512 KiB (16 instances) +L1i cache: 512 KiB (16 instances) +L2 cache: 8 MiB (16 instances) +L3 cache: 128 MiB (8 instances) +NUMA node(s): 1 +NUMA node0 CPU(s): 0-31 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Mitigation; untrained return thunk; SMT enabled with STIBP protection +Vulnerability Spec rstack overflow: Mitigation; Safe RET +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +INFO - Thu Aug 1 17:14:45 UTC 2024 - GPU information: +INFO - Thu Aug 1 17:14:45 UTC 2024 - Executing: nvidia-smi --query-gpu=name,driver_version,count,clocks.max.sm,clocks.max.memory,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu,temperature.memory --format=csv +name, driver_version, count, clocks.max.sm [MHz], clocks.max.memory [MHz], memory.total [MiB], memory.used [MiB], memory.free [MiB], utilization.gpu [%], utilization.memory [%], temperature.gpu, temperature.memory +NVIDIA A100 80GB PCIe, 550.54.15, 1, 1410 MHz, 1512 MHz, 81920 MiB, 0 MiB, 81037 MiB, 20 %, 0 %, 38, 51 +INFO - Thu Aug 1 17:14:48 UTC 2024 - Setup +INFO - Thu Aug 1 17:14:48 UTC 2024 - Executing: export FCS_DATAPATH=/input +INFO - Thu Aug 1 17:14:48 UTC 2024 - Executing: export LD_LIBRARY_PATH=/hep-mini-apps/Kokkos/install/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 +INFO - Thu Aug 1 17:14:48 UTC 2024 - Executing: source /hep-mini-apps/root/install/bin/thisroot.sh +INFO - Thu Aug 1 17:14:48 UTC 2024 - Executing: source /hep-mini-apps/FCS-GPU/install/setup.sh +FastCaloSim Standalone + Installation path: /hep-mini-apps/FCS-GPU/install + Added '$FCSSTANDALONE/bin' to $PATH + Added '$FCSSTANDALONE/lib' to $LD_LIBRARY_PATH +INFO - Thu Aug 1 17:14:48 UTC 2024 - TFCSSimulation +INFO - Thu Aug 1 17:14:48 UTC 2024 - Executing: runTFCSSimulation --earlyReturn --energy 65536 +Kokkos::OpenMP::initialize WARNING: OMP_PROC_BIND environment variable not set + In general, for best performance with OpenMP 4.0 or better set OMP_PROC_BIND=spread and OMP_PLACES=threads + For best performance with OpenMP 3.1 set OMP_PROC_BIND=true + For unit testing set OMP_PROC_BIND=false + +Reading input files from /input +Initialising DSID DB... +DB ready +Parametrization File: '/input/BigParamFiles/TFCSparam_v010.root' + energy = 65536 + eta_label = 020_025 +* Running on linux system +432704 /input/InputSamplesProdsysProduction/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.deriv.NTUP_FCS.e6555_e5984_s3259_r10283_p3449/NTUP_FCS.13289109._000001.pool.root.1 + * Prepare to run on: /input/InputSamplesProdsysProduction/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.deriv.NTUP_FCS.e6555_e5984_s3259_r10283_p3449/NTUP_FCS.13289109._000001.pool.root.1 with entries = 10000 + * Running over 10000 events. + * 1stPCA file: /input/ParametrizationProduction07/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.firstPCA_App.ver07.root + * AvgShape file: /input/ParametrizationProduction07/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.AvgSimShape.ver07.root +generating random numbers on GPU +Time of R4hit: 2.004e-06,2.515e-06,0.00903719,3.1e-08,0.000118768 s +using Kokkos +Time of Rand4Hit_init: 0.00918876 s +============================= +Loading cellId_vs_cellHashId_map +Line: 10000 id 2cac3c1e00000000 hash_id 9999 +Line: 20000 id 2cc4213e00000000 hash_id 19999 +Line: 30000 id 2ce0195e00000000 hash_id 29999 +Line: 40000 id 2d20847e00000000 hash_id 39999 +Line: 50000 id 2d21be1e00000000 hash_id 49999 +Line: 60000 id 2d22f63e00000000 hash_id 59999 +Line: 70000 id 2d4025de00000000 hash_id 69999 +Line: 80000 id 2d6001fe00000000 hash_id 79999 +Line: 90000 id 2d80681e00000000 hash_id 89999 +Line: 100000 id 2da1283e00000000 hash_id 99999 +Line: 110000 id 2da2605e00000000 hash_id 109999 +Line: 120000 id 2dc0007e00000000 hash_id 119999 +Line: 130000 id 2dc04e9e00000000 hash_id 129999 +Line: 140000 id 2de02abe00000000 hash_id 139999 +Line: 150000 id 2e2c2a5e00000000 hash_id 149999 +Line: 160000 id 2e441cfe00000000 hash_id 159999 +Line: 170000 id 2e60151e00000000 hash_id 169999 +Line: 180000 id 3485e00000000000 hash_id 179999 +Done. +Checking loading cells from file +0 : 6, 3179541336923570176 +Checking loading cells from file +25000 : 6, 3225782741538701312 +Checking loading cells from file +50000 : 1, 3252089450586439680 +Checking loading cells from file +75000 : 2, 3260690723892101120 +Checking loading cells from file +100000 : 1, 3287953458300190720 +Checking loading cells from file +125000 : 2, 3296678426663976960 +Checking loading cells from file +150000 : 5, 3327080816525180928 +Checking loading cells from file +175000 : 10, 3535184969997484032 +Result of PostProcessGeometry(): 1 + + +Testing whether CaloGeoGeometry is loaded properly +Identifier 3179554531063103488 sampling 6 eta: -3.15858 phi: 0.0545135 CaloDetDescrElement=0x558ef170c350 + +Identifier 3179554531063103488 sampling 6 eta: -3.15858 phi: 0.0545135 CaloDetDescrElement=0x558ef170c350 + +Loading FCal electrode #1 +Loading FCal electrode #2 +Loading FCal electrode #3 +Total GeoRegions= 78 +Total cells= 187652 +Executing on Kokkos: Cuda device +NVIDIA A100 80GB PCIe +device Memcpy 187652/187652 cells Total:21017024 Bytes +GPU Geometry loaded!!! +TFCSShapeValidation::LoopEvents(): Running on layer = 2, pcabin = -1 +======================================================== +======================================================== + + +Event: 0 + +Event: 250 + +Event: 500 + +Event: 750 + +Event: 1000 + +Event: 1250 + +Event: 1500 + +Event: 1750 + +Event: 2000 + +Event: 2250 + +Event: 2500 + +Event: 2750 + +Event: 3000 + +Event: 3250 + +Event: 3500 + +Event: 3750 + +Event: 4000 + +Event: 4250 + +Event: 4500 + +Event: 4750 + +Event: 5000 + +Event: 5250 + +Event: 5500 + +Event: 5750 + +Event: 6000 + +Event: 6250 + +Event: 6500 + +Event: 6750 + +Event: 7000 + +Event: 7250 + +Event: 7500 + +Event: 7750 + +Event: 8000 + +Event: 8250 + +Event: 8500 + +Event: 8750 + +Event: 9000 + +Event: 9250 + +Event: 9500 + +Event: 9750 +kernel timing + kernel total /s avg launch /us std dev /us + sim_clean 0.51891751 26.0 4.1 + sim_A 0.70366945 35.2 2.8 + sim_ct 0.23812975 11.9 0.8 + sim_cp 0.59703218 29.9 2.0 +launch count 19973 +2 +Time of LoadGeo cpu IO:0.707511 s +Time of GPU GeoLg() :0.0576766 s +Time of InitInputTree :0.0181598 s +Time of resizeTruth :0.00701503 s +Time of eventloop GPU load FH :0.0109189 s +Time of eventloop LateralShapeParamHitChain :2.88591 s +Time of eventloop :3.27094 s +Time of eventloop GPU ChainA:2.5357 s +Time of eventloop GPU ChainB:0.00333486 s +Time of eventloop host Chain0:0.344944 s +Time of eventloop before chain simul:0.00662612 s +Time of eventloop I/O read from tree:0.321609 s +Time for Chain 0 is 2.94052 s +Time for Chain 1 is 0 s +Time for Chain 2 is 0 s +Time for Chain 3 is 0 s +Time for Chain 4 is 0 s +exiting early diff --git a/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-kokkos-cuda-6.30.04-nvhpc23.9-devel-cuda12.2-ubuntu22.04_20240801101318.txt b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-kokkos-cuda-6.30.04-nvhpc23.9-devel-cuda12.2-ubuntu22.04_20240801101318.txt new file mode 100644 index 0000000..e7f0917 --- /dev/null +++ b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-kokkos-cuda-6.30.04-nvhpc23.9-devel-cuda12.2-ubuntu22.04_20240801101318.txt @@ -0,0 +1,264 @@ +INFO - Thu Aug 1 17:13:27 UTC 2024 - DATAPATH: /local/scratch/cgleggett/data/FastCaloSimInputs:/input +INFO - Thu Aug 1 17:13:28 UTC 2024 - System information: +INFO - Thu Aug 1 17:13:28 UTC 2024 - Executing: perl /run/sysinfo.pl +{ + "hostname": "35323be8c88b", + "OS": "unknown", + "sockets": "1", + "max_clock": "3000.0000", + "cores": "32", + "threads_per_core": "2", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "GPUs": [ + { + "compute_capability": "8.0", + "driver": "550.54.15", + "gpu_id": "NVIDIA A100 80GB PCIe", + "graphics_clock": "1512 MHz", + "memory": "81920 MiB", + "memory_clock": "1410 MHz", + "sm_clock": "1410 MHz", + }, + ], +} +md5 sum: cd3b94227bcf496fe3528b1d4e11e066 cd3b9422 +INFO - Thu Aug 1 17:13:30 UTC 2024 - CPU information: +INFO - Thu Aug 1 17:13:30 UTC 2024 - Executing: lscpu +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Address sizes: 43 bits physical, 48 bits virtual +Byte Order: Little Endian +CPU(s): 32 +On-line CPU(s) list: 0-31 +Vendor ID: AuthenticAMD +Model name: AMD EPYC 7302P 16-Core Processor +CPU family: 23 +Model: 49 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 1 +Stepping: 0 +Frequency boost: enabled +CPU max MHz: 3000.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5988.87 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es +Virtualization: AMD-V +L1d cache: 512 KiB (16 instances) +L1i cache: 512 KiB (16 instances) +L2 cache: 8 MiB (16 instances) +L3 cache: 128 MiB (8 instances) +NUMA node(s): 1 +NUMA node0 CPU(s): 0-31 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Mitigation; untrained return thunk; SMT enabled with STIBP protection +Vulnerability Spec rstack overflow: Mitigation; Safe RET +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +INFO - Thu Aug 1 17:13:30 UTC 2024 - GPU information: +INFO - Thu Aug 1 17:13:30 UTC 2024 - Executing: nvidia-smi --query-gpu=name,driver_version,count,clocks.max.sm,clocks.max.memory,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu,temperature.memory --format=csv +name, driver_version, count, clocks.max.sm [MHz], clocks.max.memory [MHz], memory.total [MiB], memory.used [MiB], memory.free [MiB], utilization.gpu [%], utilization.memory [%], temperature.gpu, temperature.memory +NVIDIA A100 80GB PCIe, 550.54.15, 1, 1410 MHz, 1512 MHz, 81920 MiB, 0 MiB, 81037 MiB, 20 %, 0 %, 38, 51 +INFO - Thu Aug 1 17:13:32 UTC 2024 - Setup +INFO - Thu Aug 1 17:13:32 UTC 2024 - Executing: export FCS_DATAPATH=/input +INFO - Thu Aug 1 17:13:32 UTC 2024 - Executing: export LD_LIBRARY_PATH=/hep-mini-apps/Kokkos/install/lib:/opt/nvidia/hpc_sdk/Linux_x86_64/23.9/comm_libs/nvshmem/lib:/opt/nvidia/hpc_sdk/Linux_x86_64/23.9/comm_libs/nccl/lib:/opt/nvidia/hpc_sdk/Linux_x86_64/23.9/math_libs/lib64:/opt/nvidia/hpc_sdk/Linux_x86_64/23.9/compilers/lib:/opt/nvidia/hpc_sdk/Linux_x86_64/23.9/cuda/lib64: +INFO - Thu Aug 1 17:13:32 UTC 2024 - Executing: source /hep-mini-apps/root/install/bin/thisroot.sh +INFO - Thu Aug 1 17:13:32 UTC 2024 - Executing: source /hep-mini-apps/FCS-GPU/install/setup.sh +FastCaloSim Standalone + Installation path: /hep-mini-apps/FCS-GPU/install + Added '$FCSSTANDALONE/bin' to $PATH + Added '$FCSSTANDALONE/lib' to $LD_LIBRARY_PATH +INFO - Thu Aug 1 17:13:32 UTC 2024 - TFCSSimulation +INFO - Thu Aug 1 17:13:32 UTC 2024 - Executing: runTFCSSimulation --earlyReturn --energy 65536 +Kokkos::OpenMP::initialize WARNING: OMP_PROC_BIND environment variable not set + In general, for best performance with OpenMP 4.0 or better set OMP_PROC_BIND=spread and OMP_PLACES=threads + For best performance with OpenMP 3.1 set OMP_PROC_BIND=true + For unit testing set OMP_PROC_BIND=false + +Reading input files from /input +Initialising DSID DB... +DB ready +Parametrization File: '/input/BigParamFiles/TFCSparam_v010.root' + energy = 65536 + eta_label = 020_025 +* Running on linux system +432704 /input/InputSamplesProdsysProduction/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.deriv.NTUP_FCS.e6555_e5984_s3259_r10283_p3449/NTUP_FCS.13289109._000001.pool.root.1 + * Prepare to run on: /input/InputSamplesProdsysProduction/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.deriv.NTUP_FCS.e6555_e5984_s3259_r10283_p3449/NTUP_FCS.13289109._000001.pool.root.1 with entries = 10000 + * Running over 10000 events. + * 1stPCA file: /input/ParametrizationProduction07/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.firstPCA_App.ver07.root + * AvgShape file: /input/ParametrizationProduction07/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.AvgSimShape.ver07.root +generating random numbers on GPU +Time of R4hit: 1.984e-06,2.174e-06,0.00869637,3e-08,0.000117887 s +using Kokkos +Time of Rand4Hit_init: 0.00884478 s +============================= +Loading cellId_vs_cellHashId_map +Line: 10000 id 2cac3c1e00000000 hash_id 9999 +Line: 20000 id 2cc4213e00000000 hash_id 19999 +Line: 30000 id 2ce0195e00000000 hash_id 29999 +Line: 40000 id 2d20847e00000000 hash_id 39999 +Line: 50000 id 2d21be1e00000000 hash_id 49999 +Line: 60000 id 2d22f63e00000000 hash_id 59999 +Line: 70000 id 2d4025de00000000 hash_id 69999 +Line: 80000 id 2d6001fe00000000 hash_id 79999 +Line: 90000 id 2d80681e00000000 hash_id 89999 +Line: 100000 id 2da1283e00000000 hash_id 99999 +Line: 110000 id 2da2605e00000000 hash_id 109999 +Line: 120000 id 2dc0007e00000000 hash_id 119999 +Line: 130000 id 2dc04e9e00000000 hash_id 129999 +Line: 140000 id 2de02abe00000000 hash_id 139999 +Line: 150000 id 2e2c2a5e00000000 hash_id 149999 +Line: 160000 id 2e441cfe00000000 hash_id 159999 +Line: 170000 id 2e60151e00000000 hash_id 169999 +Line: 180000 id 3485e00000000000 hash_id 179999 +Done. +Checking loading cells from file +0 : 6, 3179541336923570176 +Checking loading cells from file +25000 : 6, 3225782741538701312 +Checking loading cells from file +50000 : 1, 3252089450586439680 +Checking loading cells from file +75000 : 2, 3260690723892101120 +Checking loading cells from file +100000 : 1, 3287953458300190720 +Checking loading cells from file +125000 : 2, 3296678426663976960 +Checking loading cells from file +150000 : 5, 3327080816525180928 +Checking loading cells from file +175000 : 10, 3535184969997484032 +Result of PostProcessGeometry(): 1 + + +Testing whether CaloGeoGeometry is loaded properly +Identifier 3179554531063103488 sampling 6 eta: -3.15858 phi: 0.0545135 CaloDetDescrElement=0x5588e89149d0 + +Identifier 3179554531063103488 sampling 6 eta: -3.15858 phi: 0.0545135 CaloDetDescrElement=0x5588e89149d0 + +Loading FCal electrode #1 +Loading FCal electrode #2 +Loading FCal electrode #3 +Total GeoRegions= 78 +Total cells= 187652 +Executing on Kokkos: Cuda device +NVIDIA A100 80GB PCIe +device Memcpy 187652/187652 cells Total:21017024 Bytes +GPU Geometry loaded!!! +TFCSShapeValidation::LoopEvents(): Running on layer = 2, pcabin = -1 +======================================================== +======================================================== + + +Event: 0 + +Event: 250 + +Event: 500 + +Event: 750 + +Event: 1000 + +Event: 1250 + +Event: 1500 + +Event: 1750 + +Event: 2000 + +Event: 2250 + +Event: 2500 + +Event: 2750 + +Event: 3000 + +Event: 3250 + +Event: 3500 + +Event: 3750 + +Event: 4000 + +Event: 4250 + +Event: 4500 + +Event: 4750 + +Event: 5000 + +Event: 5250 + +Event: 5500 + +Event: 5750 + +Event: 6000 + +Event: 6250 + +Event: 6500 + +Event: 6750 + +Event: 7000 + +Event: 7250 + +Event: 7500 + +Event: 7750 + +Event: 8000 + +Event: 8250 + +Event: 8500 + +Event: 8750 + +Event: 9000 + +Event: 9250 + +Event: 9500 + +Event: 9750 +kernel timing + kernel total /s avg launch /us std dev /us + sim_clean 0.51431751 25.8 4.0 + sim_A 0.70151908 35.1 2.8 + sim_ct 0.23960878 12.0 0.7 + sim_cp 0.59184934 29.6 1.9 +launch count 19973 +2 +Time of LoadGeo cpu IO:0.729442 s +Time of GPU GeoLg() :0.0271369 s +Time of InitInputTree :0.0179467 s +Time of resizeTruth :0.00612663 s +Time of eventloop GPU load FH :0.0113988 s +Time of eventloop LateralShapeParamHitChain :2.8824 s +Time of eventloop :3.26986 s +Time of eventloop GPU ChainA:2.53221 s +Time of eventloop GPU ChainB:0.00334502 s +Time of eventloop host Chain0:0.344875 s +Time of eventloop before chain simul:0.0066083 s +Time of eventloop I/O read from tree:0.32318 s +Time for Chain 0 is 2.93794 s +Time for Chain 1 is 0 s +Time for Chain 2 is 0 s +Time for Chain 3 is 0 s +Time for Chain 4 is 0 s +exiting early diff --git a/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-stdpar-6.30.04-nvhpc23.9-devel-cuda12.2-ubuntu22.04_20240801101512.txt b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-stdpar-6.30.04-nvhpc23.9-devel-cuda12.2-ubuntu22.04_20240801101512.txt new file mode 100644 index 0000000..9448d63 --- /dev/null +++ b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-stdpar-6.30.04-nvhpc23.9-devel-cuda12.2-ubuntu22.04_20240801101512.txt @@ -0,0 +1,81 @@ +INFO - Thu Aug 1 17:15:21 UTC 2024 - DATAPATH: /local/scratch/cgleggett/data/FastCaloSimInputs:/input +INFO - Thu Aug 1 17:15:21 UTC 2024 - System information: +INFO - Thu Aug 1 17:15:21 UTC 2024 - Executing: perl /run/sysinfo.pl +{ + "hostname": "ffd161669a69", + "OS": "unknown", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "max_clock": "3000.0000", + "sockets": "1", + "threads_per_core": "2", + "cores": "32", + "GPUs": [ + { + "compute_capability": "8.0", + "driver": "550.54.15", + "gpu_id": "NVIDIA A100 80GB PCIe", + "graphics_clock": "1512 MHz", + "memory": "81920 MiB", + "memory_clock": "1410 MHz", + "sm_clock": "1410 MHz", + }, + ], +} +md5 sum: 976d06b6a3478e6ca97d9dd22d43b116 976d06b6 +INFO - Thu Aug 1 17:15:23 UTC 2024 - CPU information: +INFO - Thu Aug 1 17:15:23 UTC 2024 - Executing: lscpu +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Address sizes: 43 bits physical, 48 bits virtual +Byte Order: Little Endian +CPU(s): 32 +On-line CPU(s) list: 0-31 +Vendor ID: AuthenticAMD +Model name: AMD EPYC 7302P 16-Core Processor +CPU family: 23 +Model: 49 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 1 +Stepping: 0 +Frequency boost: enabled +CPU max MHz: 3000.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5988.87 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es +Virtualization: AMD-V +L1d cache: 512 KiB (16 instances) +L1i cache: 512 KiB (16 instances) +L2 cache: 8 MiB (16 instances) +L3 cache: 128 MiB (8 instances) +NUMA node(s): 1 +NUMA node0 CPU(s): 0-31 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Mitigation; untrained return thunk; SMT enabled with STIBP protection +Vulnerability Spec rstack overflow: Mitigation; Safe RET +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +INFO - Thu Aug 1 17:15:23 UTC 2024 - GPU information: +INFO - Thu Aug 1 17:15:23 UTC 2024 - Executing: nvidia-smi --query-gpu=name,driver_version,count,clocks.max.sm,clocks.max.memory,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu,temperature.memory --format=csv +name, driver_version, count, clocks.max.sm [MHz], clocks.max.memory [MHz], memory.total [MiB], memory.used [MiB], memory.free [MiB], utilization.gpu [%], utilization.memory [%], temperature.gpu, temperature.memory +NVIDIA A100 80GB PCIe, 550.54.15, 1, 1410 MHz, 1512 MHz, 81920 MiB, 0 MiB, 81037 MiB, 20 %, 0 %, 37, 51 +INFO - Thu Aug 1 17:15:25 UTC 2024 - Setup +INFO - Thu Aug 1 17:15:25 UTC 2024 - Executing: export FCS_DATAPATH=/input +INFO - Thu Aug 1 17:15:25 UTC 2024 - Executing: source /hep-mini-apps/root/install/bin/thisroot.sh +INFO - Thu Aug 1 17:15:26 UTC 2024 - Executing: source /hep-mini-apps/FCS-GPU/install/setup.sh +/run/run_fcs-gpu.sh: line 13: /hep-mini-apps/FCS-GPU/install/setup.sh: No such file or directory +INFO - Thu Aug 1 17:15:26 UTC 2024 - TFCSSimulation +INFO - Thu Aug 1 17:15:26 UTC 2024 - Executing: runTFCSSimulation --earlyReturn --energy 65536 +Reading input files from /input +Initialising DSID DB... +terminate called after throwing an instance of 'std::logic_error' + what(): basic_string::_M_construct null not valid +/run/run_fcs-gpu.sh: line 13: 56 Aborted (core dumped) runTFCSSimulation --earlyReturn --energy 65536 diff --git a/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-x86-6.30.04-ubuntu22.04_20240801101159.txt b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-x86-6.30.04-ubuntu22.04_20240801101159.txt new file mode 100644 index 0000000..041bf58 --- /dev/null +++ b/hmatools/tests/test_data/Log Files - Run FastCaloSim/run_log_fcs-x86-6.30.04-ubuntu22.04_20240801101159.txt @@ -0,0 +1,225 @@ +Logfile: /run/run_log_fcs-x86-6.30.04-ubuntu22.04_20240801101159.txt +INFO - Thu Aug 1 17:12:00 UTC 2024 - DATAPATH: /local/scratch/cgleggett/data/FastCaloSimInputs:/input +INFO - Thu Aug 1 17:12:00 UTC 2024 - System information: +INFO - Thu Aug 1 17:12:00 UTC 2024 - Executing: perl /run/sysinfo.pl +{ + "hostname": "cd8ef94064f4", + "OS": "unknown", + "sockets": "1", + "max_clock": "3000.0000", + "threads_per_core": "2", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "cores": "32", + "GPUs": [ + ], +} +md5 sum: d5acee724c31e8c79d0f3b47669b0067 d5acee72 +INFO - Thu Aug 1 17:12:00 UTC 2024 - CPU information: +INFO - Thu Aug 1 17:12:00 UTC 2024 - Executing: lscpu +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Address sizes: 43 bits physical, 48 bits virtual +Byte Order: Little Endian +CPU(s): 32 +On-line CPU(s) list: 0-31 +Vendor ID: AuthenticAMD +Model name: AMD EPYC 7302P 16-Core Processor +CPU family: 23 +Model: 49 +Thread(s) per core: 2 +Core(s) per socket: 16 +Socket(s): 1 +Stepping: 0 +Frequency boost: enabled +CPU max MHz: 3000.0000 +CPU min MHz: 1500.0000 +BogoMIPS: 5988.87 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es +Virtualization: AMD-V +L1d cache: 512 KiB (16 instances) +L1i cache: 512 KiB (16 instances) +L2 cache: 8 MiB (16 instances) +L3 cache: 128 MiB (8 instances) +NUMA node(s): 1 +NUMA node0 CPU(s): 0-31 +Vulnerability Gather data sampling: Not affected +Vulnerability Itlb multihit: Not affected +Vulnerability L1tf: Not affected +Vulnerability Mds: Not affected +Vulnerability Meltdown: Not affected +Vulnerability Mmio stale data: Not affected +Vulnerability Retbleed: Mitigation; untrained return thunk; SMT enabled with STIBP protection +Vulnerability Spec rstack overflow: Mitigation; Safe RET +Vulnerability Spec store bypass: Mitigation; Speculative Store Bypass disabled via prctl +Vulnerability Spectre v1: Mitigation; usercopy/swapgs barriers and __user pointer sanitization +Vulnerability Spectre v2: Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected +Vulnerability Srbds: Not affected +Vulnerability Tsx async abort: Not affected +INFO - Thu Aug 1 17:12:00 UTC 2024 - Setup +INFO - Thu Aug 1 17:12:00 UTC 2024 - Executing: export FCS_DATAPATH=/input +INFO - Thu Aug 1 17:12:00 UTC 2024 - Executing: source /hep-mini-apps/root/install/bin/thisroot.sh +INFO - Thu Aug 1 17:12:00 UTC 2024 - Executing: source /hep-mini-apps/FCS-GPU/install/setup.sh +FastCaloSim Standalone + Installation path: /hep-mini-apps/FCS-GPU/install + Added '$FCSSTANDALONE/bin' to $PATH + Added '$FCSSTANDALONE/lib' to $LD_LIBRARY_PATH +INFO - Thu Aug 1 17:12:00 UTC 2024 - TFCSSimulation +INFO - Thu Aug 1 17:12:00 UTC 2024 - Executing: runTFCSSimulation --earlyReturn --energy 65536 +Reading input files from /input +Initialising DSID DB... +DB ready +Parametrization File: '/input/BigParamFiles/TFCSparam_v010.root' + energy = 65536 + eta_label = 020_025 +* Running on linux system +432704 /input/InputSamplesProdsysProduction/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.deriv.NTUP_FCS.e6555_e5984_s3259_r10283_p3449/NTUP_FCS.13289109._000001.pool.root.1 + * Prepare to run on: /input/InputSamplesProdsysProduction/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.deriv.NTUP_FCS.e6555_e5984_s3259_r10283_p3449/NTUP_FCS.13289109._000001.pool.root.1 with entries = 10000 + * Running over 10000 events. + * 1stPCA file: /input/ParametrizationProduction07/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.firstPCA_App.ver07.root + * AvgShape file: /input/ParametrizationProduction07/mc16_13TeV.432704.ParticleGun_pid11_E65536_disj_eta_m25_m20_20_25_zv_0.AvgSimShape.ver07.root +============================= +Loading cellId_vs_cellHashId_map +Line: 10000 id 2cac3c1e00000000 hash_id 9999 +Line: 20000 id 2cc4213e00000000 hash_id 19999 +Line: 30000 id 2ce0195e00000000 hash_id 29999 +Line: 40000 id 2d20847e00000000 hash_id 39999 +Line: 50000 id 2d21be1e00000000 hash_id 49999 +Line: 60000 id 2d22f63e00000000 hash_id 59999 +Line: 70000 id 2d4025de00000000 hash_id 69999 +Line: 80000 id 2d6001fe00000000 hash_id 79999 +Line: 90000 id 2d80681e00000000 hash_id 89999 +Line: 100000 id 2da1283e00000000 hash_id 99999 +Line: 110000 id 2da2605e00000000 hash_id 109999 +Line: 120000 id 2dc0007e00000000 hash_id 119999 +Line: 130000 id 2dc04e9e00000000 hash_id 129999 +Line: 140000 id 2de02abe00000000 hash_id 139999 +Line: 150000 id 2e2c2a5e00000000 hash_id 149999 +Line: 160000 id 2e441cfe00000000 hash_id 159999 +Line: 170000 id 2e60151e00000000 hash_id 169999 +Line: 180000 id 3485e00000000000 hash_id 179999 +Done. +Checking loading cells from file +0 : 6, 3179541336923570176 +Checking loading cells from file +25000 : 6, 3225782741538701312 +Checking loading cells from file +50000 : 1, 3252089450586439680 +Checking loading cells from file +75000 : 2, 3260690723892101120 +Checking loading cells from file +100000 : 1, 3287953458300190720 +Checking loading cells from file +125000 : 2, 3296678426663976960 +Checking loading cells from file +150000 : 5, 3327080816525180928 +Checking loading cells from file +175000 : 10, 3535184969997484032 +Result of PostProcessGeometry(): 1 + + +Testing whether CaloGeoGeometry is loaded properly +Identifier 3179554531063103488 sampling 6 eta: -3.15858 phi: 0.0545135 CaloDetDescrElement=0x560552fad050 + +Identifier 3179554531063103488 sampling 6 eta: -3.15858 phi: 0.0545135 CaloDetDescrElement=0x560552fad050 + +Loading FCal electrode #1 +Loading FCal electrode #2 +Loading FCal electrode #3 +TFCSShapeValidation::LoopEvents(): Running on layer = 2, pcabin = -1 +======================================================== +======================================================== + + +Event: 0 + +Event: 250 + +Event: 500 + +Event: 750 + +Event: 1000 + +Event: 1250 + +Event: 1500 + +Event: 1750 + +Event: 2000 + +Event: 2250 + +Event: 2500 + +Event: 2750 + +Event: 3000 + +Event: 3250 + +Event: 3500 + +Event: 3750 + +Event: 4000 + +Event: 4250 + +Event: 4500 + +Event: 4750 + +Event: 5000 + +Event: 5250 + +Event: 5500 + +Event: 5750 + +Event: 6000 + +Event: 6250 + +Event: 6500 + +Event: 6750 + +Event: 7000 + +Event: 7250 + +Event: 7500 + +Event: 7750 + +Event: 8000 + +Event: 8250 + +Event: 8500 + +Event: 8750 + +Event: 9000 + +Event: 9250 + +Event: 9500 + +Event: 9750 +Time of LoadGeo cpu IO:0.715597 s +Time of eventloop LateralShapeParamHitChain :21.5122 s +Time of eventloop :21.8746 s +Time of eventloop GPU ChainA:0 s +Time of eventloop GPU ChainB:0 s +Time of eventloop host Chain0:21.5104 s +Time of eventloop before chain simul:0.00645115 s +Time of eventloop I/O read from tree:0.312925 s +Time for Chain 0 is 21.5537 s +Time for Chain 1 is 0 s +Time for Chain 2 is 0 s +Time for Chain 3 is 0 s +Time for Chain 4 is 0 s +exiting early diff --git a/hmatools/tests/test_data/fcs_results.json b/hmatools/tests/test_data/fcs_results.json new file mode 100644 index 0000000..addeee7 --- /dev/null +++ b/hmatools/tests/test_data/fcs_results.json @@ -0,0 +1,979 @@ +[ + { + "info": { + "cpu_info": { + "Address sizes": "43 bits physical, 48 bits virtual", + "Architecture": "x86_64", + "BogoMIPS": "5988.87", + "Byte Order": "Little Endian", + "CPU family": "23", + "CPU max MHz": "3000.0000", + "CPU min MHz": "1500.0000", + "CPU op-mode(s)": "32-bit, 64-bit", + "CPU(s)": "32", + "Core(s) per socket": "16", + "Flags": "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es", + "Frequency boost": "enabled", + "L1d cache": "512 KiB (16 instances)", + "L1i cache": "512 KiB (16 instances)", + "L2 cache": "8 MiB (16 instances)", + "L3 cache": "128 MiB (8 instances)", + "Model": "49", + "Model name": "AMD EPYC 7302P 16-Core Processor", + "NUMA node(s)": "1", + "NUMA node0 CPU(s)": "0-31", + "On-line CPU(s) list": "0-31", + "Socket(s)": "1", + "Stepping": "0", + "Thread(s) per core": "2", + "Vendor ID": "AuthenticAMD", + "Virtualization": "AMD-V", + "Vulnerability Gather data sampling": "Not affected", + "Vulnerability Itlb multihit": "Not affected", + "Vulnerability L1tf": "Not affected", + "Vulnerability Mds": "Not affected", + "Vulnerability Meltdown": "Not affected", + "Vulnerability Mmio stale data": "Not affected", + "Vulnerability Retbleed": "Mitigation; untrained return thunk; SMT enabled with STIBP protection", + "Vulnerability Spec rstack overflow": "Mitigation; Safe RET", + "Vulnerability Spec store bypass": "Mitigation; Speculative Store Bypass disabled via prctl", + "Vulnerability Spectre v1": "Mitigation; usercopy/swapgs barriers and __user pointer sanitization", + "Vulnerability Spectre v2": "Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected", + "Vulnerability Srbds": "Not affected", + "Vulnerability Tsx async abort": "Not affected" + }, + "datapath": "/local/scratch/cgleggett/data/FastCaloSimInputs:/input", + "image_info": { + "image_tag": "cuda12.2.2-devel-ubuntu22.04", + "image_type": "cuda", + "root_version": "6.30.04" + }, + "nvidia_info": { + " clocks.max.memory [MHz]": " 1512 MHz", + " clocks.max.sm [MHz]": " 1410 MHz", + " count": " 1", + " driver_version": " 550.54.15", + " memory.free [MiB]": " 81037 MiB", + " memory.total [MiB]": " 81920 MiB", + " memory.used [MiB]": " 0 MiB", + " temperature.gpu": " 38", + " temperature.memory": " 51", + " utilization.gpu [%]": " 20 %", + " utilization.memory [%]": " 0 %", + "name": "NVIDIA A100 80GB PCIe" + }, + "run_cmd": null, + "system_info": { + "GPUs": [ + { + "compute_capability": "8.0", + "driver": "550.54.15", + "gpu_id": "NVIDIA A100 80GB PCIe", + "graphics_clock": "1512 MHz", + "memory": "81920 MiB", + "memory_clock": "1410 MHz", + "sm_clock": "1410 MHz" + } + ], + "OS": "unknown", + "cores": "32", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "hostname": "da8d6a272186", + "max_clock": "3000.0000", + "sockets": "1", + "threads_per_core": "2" + }, + "timestamp": "2024-08-01T10:13:57" + }, + "metrics": [ + { + "data_type": "", + "name": "GPU memory used", + "unit": "MB", + "value": 527 + }, + { + "data_type": "", + "name": "launch count", + "unit": null, + "value": "19973" + }, + { + "data_type": "", + "name": "Time of LoadGeo cpu IO", + "unit": "s", + "value": 0.726031 + }, + { + "data_type": "", + "name": "Time of GPU GeoLg()", + "unit": "s", + "value": 0.0247775 + }, + { + "data_type": "", + "name": "Time of InitInputTree", + "unit": "s", + "value": 0.0177158 + }, + { + "data_type": "", + "name": "Time of resizeTruth", + "unit": "s", + "value": 0.00503153 + }, + { + "data_type": "", + "name": "Time of eventloop GPU load FH", + "unit": "s", + "value": 0.00453769 + }, + { + "data_type": "", + "name": "Time of eventloop LateralShapeParamHitChain", + "unit": "s", + "value": 2.02886 + }, + { + "data_type": "", + "name": "Time of eventloop", + "unit": "s", + "value": 2.39977 + }, + { + "data_type": "", + "name": "Time of eventloop GPU ChainA", + "unit": "s", + "value": 1.68442 + }, + { + "data_type": "", + "name": "Time of eventloop GPU ChainB", + "unit": "s", + "value": 0.00340656 + }, + { + "data_type": "", + "name": "Time of eventloop host Chain0", + "unit": "s", + "value": 0.339178 + }, + { + "data_type": "", + "name": "Time of eventloop before chain simul", + "unit": "s", + "value": 0.00647577 + }, + { + "data_type": "", + "name": "Time of eventloop I/O read from tree", + "unit": "s", + "value": 0.303344 + }, + { + "data_type": "", + "name": "Time for Chain 0", + "unit": "s", + "value": 2.07586 + }, + { + "data_type": "", + "name": "Time for Chain 1", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 2", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 3", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 4", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "kernel timing", + "unit": null, + "value": "{\"kernel\":{\"0\":\"sim_clean\",\"1\":\"sim_A\",\"2\":\"sim_ct\",\"3\":\"sim_cp\"},\"total \\/s\":{\"0\":0.20832632,\"1\":0.48336262,\"2\":0.19353935,\"3\":0.37192848},\"avg launch \\/us\":{\"0\":10.4,\"1\":24.2,\"2\":9.7,\"3\":18.6},\"std dev \\/us\":{\"0\":1.3,\"1\":3.2,\"2\":1.2,\"3\":2.2}}" + } + ] + }, + { + "info": { + "cpu_info": { + "Address sizes": "43 bits physical, 48 bits virtual", + "Architecture": "x86_64", + "BogoMIPS": "5988.87", + "Byte Order": "Little Endian", + "CPU family": "23", + "CPU max MHz": "3000.0000", + "CPU min MHz": "1500.0000", + "CPU op-mode(s)": "32-bit, 64-bit", + "CPU(s)": "32", + "Core(s) per socket": "16", + "Flags": "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es", + "Frequency boost": "enabled", + "L1d cache": "512 KiB (16 instances)", + "L1i cache": "512 KiB (16 instances)", + "L2 cache": "8 MiB (16 instances)", + "L3 cache": "128 MiB (8 instances)", + "Model": "49", + "Model name": "AMD EPYC 7302P 16-Core Processor", + "NUMA node(s)": "1", + "NUMA node0 CPU(s)": "0-31", + "On-line CPU(s) list": "0-31", + "Socket(s)": "1", + "Stepping": "0", + "Thread(s) per core": "2", + "Vendor ID": "AuthenticAMD", + "Virtualization": "AMD-V", + "Vulnerability Gather data sampling": "Not affected", + "Vulnerability Itlb multihit": "Not affected", + "Vulnerability L1tf": "Not affected", + "Vulnerability Mds": "Not affected", + "Vulnerability Meltdown": "Not affected", + "Vulnerability Mmio stale data": "Not affected", + "Vulnerability Retbleed": "Mitigation; untrained return thunk; SMT enabled with STIBP protection", + "Vulnerability Spec rstack overflow": "Mitigation; Safe RET", + "Vulnerability Spec store bypass": "Mitigation; Speculative Store Bypass disabled via prctl", + "Vulnerability Spectre v1": "Mitigation; usercopy/swapgs barriers and __user pointer sanitization", + "Vulnerability Spectre v2": "Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected", + "Vulnerability Srbds": "Not affected", + "Vulnerability Tsx async abort": "Not affected" + }, + "datapath": "/local/scratch/cgleggett/data/FastCaloSimInputs:/input", + "image_info": { + "image_tag": "nvhpc23.9-devel-cuda12.2-ubuntu22.04", + "image_type": "cuda", + "root_version": "6.30.04" + }, + "nvidia_info": { + " clocks.max.memory [MHz]": " 1512 MHz", + " clocks.max.sm [MHz]": " 1410 MHz", + " count": " 1", + " driver_version": " 550.54.15", + " memory.free [MiB]": " 81037 MiB", + " memory.total [MiB]": " 81920 MiB", + " memory.used [MiB]": " 0 MiB", + " temperature.gpu": " 38", + " temperature.memory": " 51", + " utilization.gpu [%]": " 20 %", + " utilization.memory [%]": " 0 %", + "name": "NVIDIA A100 80GB PCIe" + }, + "run_cmd": null, + "system_info": { + "GPUs": [ + { + "compute_capability": "8.0", + "driver": "550.54.15", + "gpu_id": "NVIDIA A100 80GB PCIe", + "graphics_clock": "1512 MHz", + "memory": "81920 MiB", + "memory_clock": "1410 MHz", + "sm_clock": "1410 MHz" + } + ], + "OS": "unknown", + "cores": "32", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "hostname": "aa5ef7c9c27e", + "max_clock": "3000.0000", + "sockets": "1", + "threads_per_core": "2" + }, + "timestamp": "2024-08-01T10:12:40" + }, + "metrics": [ + { + "data_type": "", + "name": "GPU memory used", + "unit": "MB", + "value": 527 + }, + { + "data_type": "", + "name": "launch count", + "unit": null, + "value": "19973" + }, + { + "data_type": "", + "name": "Time of LoadGeo cpu IO", + "unit": "s", + "value": 0.707652 + }, + { + "data_type": "", + "name": "Time of GPU GeoLg()", + "unit": "s", + "value": 0.0246851 + }, + { + "data_type": "", + "name": "Time of InitInputTree", + "unit": "s", + "value": 0.0176983 + }, + { + "data_type": "", + "name": "Time of resizeTruth", + "unit": "s", + "value": 0.00498405 + }, + { + "data_type": "", + "name": "Time of eventloop GPU load FH", + "unit": "s", + "value": 0.00442894 + }, + { + "data_type": "", + "name": "Time of eventloop LateralShapeParamHitChain", + "unit": "s", + "value": 2.01168 + }, + { + "data_type": "", + "name": "Time of eventloop", + "unit": "s", + "value": 2.37223 + }, + { + "data_type": "", + "name": "Time of eventloop GPU ChainA", + "unit": "s", + "value": 1.67147 + }, + { + "data_type": "", + "name": "Time of eventloop GPU ChainB", + "unit": "s", + "value": 0.00332568 + }, + { + "data_type": "", + "name": "Time of eventloop host Chain0", + "unit": "s", + "value": 0.335094 + }, + { + "data_type": "", + "name": "Time of eventloop before chain simul", + "unit": "s", + "value": 0.00654652 + }, + { + "data_type": "", + "name": "Time of eventloop I/O read from tree", + "unit": "s", + "value": 0.307414 + }, + { + "data_type": "", + "name": "Time for Chain 0", + "unit": "s", + "value": 2.05658 + }, + { + "data_type": "", + "name": "Time for Chain 1", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 2", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 3", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 4", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "kernel timing", + "unit": null, + "value": "{\"kernel\":{\"0\":\"sim_clean\",\"1\":\"sim_A\",\"2\":\"sim_ct\",\"3\":\"sim_cp\"},\"total \\/s\":{\"0\":0.20625765,\"1\":0.48149875,\"2\":0.19245834,\"3\":0.36716598},\"avg launch \\/us\":{\"0\":10.3,\"1\":24.1,\"2\":9.6,\"3\":18.4},\"std dev \\/us\":{\"0\":1.0,\"1\":2.8,\"2\":0.9,\"3\":1.3}}" + } + ] + }, + { + "info": { + "cpu_info": { + "Address sizes": "43 bits physical, 48 bits virtual", + "Architecture": "x86_64", + "BogoMIPS": "5988.87", + "Byte Order": "Little Endian", + "CPU family": "23", + "CPU max MHz": "3000.0000", + "CPU min MHz": "1500.0000", + "CPU op-mode(s)": "32-bit, 64-bit", + "CPU(s)": "32", + "Core(s) per socket": "16", + "Flags": "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es", + "Frequency boost": "enabled", + "L1d cache": "512 KiB (16 instances)", + "L1i cache": "512 KiB (16 instances)", + "L2 cache": "8 MiB (16 instances)", + "L3 cache": "128 MiB (8 instances)", + "Model": "49", + "Model name": "AMD EPYC 7302P 16-Core Processor", + "NUMA node(s)": "1", + "NUMA node0 CPU(s)": "0-31", + "On-line CPU(s) list": "0-31", + "Socket(s)": "1", + "Stepping": "0", + "Thread(s) per core": "2", + "Vendor ID": "AuthenticAMD", + "Virtualization": "AMD-V", + "Vulnerability Gather data sampling": "Not affected", + "Vulnerability Itlb multihit": "Not affected", + "Vulnerability L1tf": "Not affected", + "Vulnerability Mds": "Not affected", + "Vulnerability Meltdown": "Not affected", + "Vulnerability Mmio stale data": "Not affected", + "Vulnerability Retbleed": "Mitigation; untrained return thunk; SMT enabled with STIBP protection", + "Vulnerability Spec rstack overflow": "Mitigation; Safe RET", + "Vulnerability Spec store bypass": "Mitigation; Speculative Store Bypass disabled via prctl", + "Vulnerability Spectre v1": "Mitigation; usercopy/swapgs barriers and __user pointer sanitization", + "Vulnerability Spectre v2": "Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected", + "Vulnerability Srbds": "Not affected", + "Vulnerability Tsx async abort": "Not affected" + }, + "datapath": "/local/scratch/cgleggett/data/FastCaloSimInputs:/input", + "image_info": { + "image_tag": "cuda12.2.2-devel-ubuntu22.04", + "image_type": "kokkos-cuda", + "root_version": "6.30.04" + }, + "nvidia_info": { + " clocks.max.memory [MHz]": " 1512 MHz", + " clocks.max.sm [MHz]": " 1410 MHz", + " count": " 1", + " driver_version": " 550.54.15", + " memory.free [MiB]": " 81037 MiB", + " memory.total [MiB]": " 81920 MiB", + " memory.used [MiB]": " 0 MiB", + " temperature.gpu": " 38", + " temperature.memory": " 51", + " utilization.gpu [%]": " 20 %", + " utilization.memory [%]": " 0 %", + "name": "NVIDIA A100 80GB PCIe" + }, + "run_cmd": null, + "system_info": { + "GPUs": [ + { + "compute_capability": "8.0", + "driver": "550.54.15", + "gpu_id": "NVIDIA A100 80GB PCIe", + "graphics_clock": "1512 MHz", + "memory": "81920 MiB", + "memory_clock": "1410 MHz", + "sm_clock": "1410 MHz" + } + ], + "OS": "unknown", + "cores": "32", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "hostname": "1a3ae75ea67e", + "max_clock": "3000.0000", + "sockets": "1", + "threads_per_core": "2" + }, + "timestamp": "2024-08-01T10:14:34" + }, + "metrics": [ + { + "data_type": "", + "name": "launch count", + "unit": null, + "value": "19973" + }, + { + "data_type": "", + "name": "Time of LoadGeo cpu IO", + "unit": "s", + "value": 0.707511 + }, + { + "data_type": "", + "name": "Time of GPU GeoLg()", + "unit": "s", + "value": 0.0576766 + }, + { + "data_type": "", + "name": "Time of InitInputTree", + "unit": "s", + "value": 0.0181598 + }, + { + "data_type": "", + "name": "Time of resizeTruth", + "unit": "s", + "value": 0.00701503 + }, + { + "data_type": "", + "name": "Time of eventloop GPU load FH", + "unit": "s", + "value": 0.0109189 + }, + { + "data_type": "", + "name": "Time of eventloop LateralShapeParamHitChain", + "unit": "s", + "value": 2.88591 + }, + { + "data_type": "", + "name": "Time of eventloop", + "unit": "s", + "value": 3.27094 + }, + { + "data_type": "", + "name": "Time of eventloop GPU ChainA", + "unit": "s", + "value": 2.5357 + }, + { + "data_type": "", + "name": "Time of eventloop GPU ChainB", + "unit": "s", + "value": 0.00333486 + }, + { + "data_type": "", + "name": "Time of eventloop host Chain0", + "unit": "s", + "value": 0.344944 + }, + { + "data_type": "", + "name": "Time of eventloop before chain simul", + "unit": "s", + "value": 0.00662612 + }, + { + "data_type": "", + "name": "Time of eventloop I/O read from tree", + "unit": "s", + "value": 0.321609 + }, + { + "data_type": "", + "name": "Time for Chain 0", + "unit": "s", + "value": 2.94052 + }, + { + "data_type": "", + "name": "Time for Chain 1", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 2", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 3", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 4", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "kernel timing", + "unit": null, + "value": "{\"kernel\":{\"0\":\"sim_clean\",\"1\":\"sim_A\",\"2\":\"sim_ct\",\"3\":\"sim_cp\"},\"total \\/s\":{\"0\":0.51891751,\"1\":0.70366945,\"2\":0.23812975,\"3\":0.59703218},\"avg launch \\/us\":{\"0\":26.0,\"1\":35.2,\"2\":11.9,\"3\":29.9},\"std dev \\/us\":{\"0\":4.1,\"1\":2.8,\"2\":0.8,\"3\":2.0}}" + } + ] + }, + { + "info": { + "cpu_info": { + "Address sizes": "43 bits physical, 48 bits virtual", + "Architecture": "x86_64", + "BogoMIPS": "5988.87", + "Byte Order": "Little Endian", + "CPU family": "23", + "CPU max MHz": "3000.0000", + "CPU min MHz": "1500.0000", + "CPU op-mode(s)": "32-bit, 64-bit", + "CPU(s)": "32", + "Core(s) per socket": "16", + "Flags": "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es", + "Frequency boost": "enabled", + "L1d cache": "512 KiB (16 instances)", + "L1i cache": "512 KiB (16 instances)", + "L2 cache": "8 MiB (16 instances)", + "L3 cache": "128 MiB (8 instances)", + "Model": "49", + "Model name": "AMD EPYC 7302P 16-Core Processor", + "NUMA node(s)": "1", + "NUMA node0 CPU(s)": "0-31", + "On-line CPU(s) list": "0-31", + "Socket(s)": "1", + "Stepping": "0", + "Thread(s) per core": "2", + "Vendor ID": "AuthenticAMD", + "Virtualization": "AMD-V", + "Vulnerability Gather data sampling": "Not affected", + "Vulnerability Itlb multihit": "Not affected", + "Vulnerability L1tf": "Not affected", + "Vulnerability Mds": "Not affected", + "Vulnerability Meltdown": "Not affected", + "Vulnerability Mmio stale data": "Not affected", + "Vulnerability Retbleed": "Mitigation; untrained return thunk; SMT enabled with STIBP protection", + "Vulnerability Spec rstack overflow": "Mitigation; Safe RET", + "Vulnerability Spec store bypass": "Mitigation; Speculative Store Bypass disabled via prctl", + "Vulnerability Spectre v1": "Mitigation; usercopy/swapgs barriers and __user pointer sanitization", + "Vulnerability Spectre v2": "Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected", + "Vulnerability Srbds": "Not affected", + "Vulnerability Tsx async abort": "Not affected" + }, + "datapath": "/local/scratch/cgleggett/data/FastCaloSimInputs:/input", + "image_info": { + "image_tag": "nvhpc23.9-devel-cuda12.2-ubuntu22.04", + "image_type": "kokkos-cuda", + "root_version": "6.30.04" + }, + "nvidia_info": { + " clocks.max.memory [MHz]": " 1512 MHz", + " clocks.max.sm [MHz]": " 1410 MHz", + " count": " 1", + " driver_version": " 550.54.15", + " memory.free [MiB]": " 81037 MiB", + " memory.total [MiB]": " 81920 MiB", + " memory.used [MiB]": " 0 MiB", + " temperature.gpu": " 38", + " temperature.memory": " 51", + " utilization.gpu [%]": " 20 %", + " utilization.memory [%]": " 0 %", + "name": "NVIDIA A100 80GB PCIe" + }, + "run_cmd": null, + "system_info": { + "GPUs": [ + { + "compute_capability": "8.0", + "driver": "550.54.15", + "gpu_id": "NVIDIA A100 80GB PCIe", + "graphics_clock": "1512 MHz", + "memory": "81920 MiB", + "memory_clock": "1410 MHz", + "sm_clock": "1410 MHz" + } + ], + "OS": "unknown", + "cores": "32", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "hostname": "35323be8c88b", + "max_clock": "3000.0000", + "sockets": "1", + "threads_per_core": "2" + }, + "timestamp": "2024-08-01T10:13:18" + }, + "metrics": [ + { + "data_type": "", + "name": "launch count", + "unit": null, + "value": "19973" + }, + { + "data_type": "", + "name": "Time of LoadGeo cpu IO", + "unit": "s", + "value": 0.729442 + }, + { + "data_type": "", + "name": "Time of GPU GeoLg()", + "unit": "s", + "value": 0.0271369 + }, + { + "data_type": "", + "name": "Time of InitInputTree", + "unit": "s", + "value": 0.0179467 + }, + { + "data_type": "", + "name": "Time of resizeTruth", + "unit": "s", + "value": 0.00612663 + }, + { + "data_type": "", + "name": "Time of eventloop GPU load FH", + "unit": "s", + "value": 0.0113988 + }, + { + "data_type": "", + "name": "Time of eventloop LateralShapeParamHitChain", + "unit": "s", + "value": 2.8824 + }, + { + "data_type": "", + "name": "Time of eventloop", + "unit": "s", + "value": 3.26986 + }, + { + "data_type": "", + "name": "Time of eventloop GPU ChainA", + "unit": "s", + "value": 2.53221 + }, + { + "data_type": "", + "name": "Time of eventloop GPU ChainB", + "unit": "s", + "value": 0.00334502 + }, + { + "data_type": "", + "name": "Time of eventloop host Chain0", + "unit": "s", + "value": 0.344875 + }, + { + "data_type": "", + "name": "Time of eventloop before chain simul", + "unit": "s", + "value": 0.0066083 + }, + { + "data_type": "", + "name": "Time of eventloop I/O read from tree", + "unit": "s", + "value": 0.32318 + }, + { + "data_type": "", + "name": "Time for Chain 0", + "unit": "s", + "value": 2.93794 + }, + { + "data_type": "", + "name": "Time for Chain 1", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 2", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 3", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "Time for Chain 4", + "unit": "s", + "value": 0.0 + }, + { + "data_type": "", + "name": "kernel timing", + "unit": null, + "value": "{\"kernel\":{\"0\":\"sim_clean\",\"1\":\"sim_A\",\"2\":\"sim_ct\",\"3\":\"sim_cp\"},\"total \\/s\":{\"0\":0.51431751,\"1\":0.70151908,\"2\":0.23960878,\"3\":0.59184934},\"avg launch \\/us\":{\"0\":25.8,\"1\":35.1,\"2\":12.0,\"3\":29.6},\"std dev \\/us\":{\"0\":4.0,\"1\":2.8,\"2\":0.7,\"3\":1.9}}" + } + ] + }, + { + "info": { + "cpu_info": { + "Address sizes": "43 bits physical, 48 bits virtual", + "Architecture": "x86_64", + "BogoMIPS": "5988.87", + "Byte Order": "Little Endian", + "CPU family": "23", + "CPU max MHz": "3000.0000", + "CPU min MHz": "1500.0000", + "CPU op-mode(s)": "32-bit, 64-bit", + "CPU(s)": "32", + "Core(s) per socket": "16", + "Flags": "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es", + "Frequency boost": "enabled", + "L1d cache": "512 KiB (16 instances)", + "L1i cache": "512 KiB (16 instances)", + "L2 cache": "8 MiB (16 instances)", + "L3 cache": "128 MiB (8 instances)", + "Model": "49", + "Model name": "AMD EPYC 7302P 16-Core Processor", + "NUMA node(s)": "1", + "NUMA node0 CPU(s)": "0-31", + "On-line CPU(s) list": "0-31", + "Socket(s)": "1", + "Stepping": "0", + "Thread(s) per core": "2", + "Vendor ID": "AuthenticAMD", + "Virtualization": "AMD-V", + "Vulnerability Gather data sampling": "Not affected", + "Vulnerability Itlb multihit": "Not affected", + "Vulnerability L1tf": "Not affected", + "Vulnerability Mds": "Not affected", + "Vulnerability Meltdown": "Not affected", + "Vulnerability Mmio stale data": "Not affected", + "Vulnerability Retbleed": "Mitigation; untrained return thunk; SMT enabled with STIBP protection", + "Vulnerability Spec rstack overflow": "Mitigation; Safe RET", + "Vulnerability Spec store bypass": "Mitigation; Speculative Store Bypass disabled via prctl", + "Vulnerability Spectre v1": "Mitigation; usercopy/swapgs barriers and __user pointer sanitization", + "Vulnerability Spectre v2": "Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected", + "Vulnerability Srbds": "Not affected", + "Vulnerability Tsx async abort": "Not affected" + }, + "datapath": "/local/scratch/cgleggett/data/FastCaloSimInputs:/input", + "image_info": { + "image_tag": "nvhpc23.9-devel-cuda12.2-ubuntu22.04", + "image_type": "stdpar", + "root_version": "6.30.04" + }, + "nvidia_info": { + " clocks.max.memory [MHz]": " 1512 MHz", + " clocks.max.sm [MHz]": " 1410 MHz", + " count": " 1", + " driver_version": " 550.54.15", + " memory.free [MiB]": " 81037 MiB", + " memory.total [MiB]": " 81920 MiB", + " memory.used [MiB]": " 0 MiB", + " temperature.gpu": " 37", + " temperature.memory": " 51", + " utilization.gpu [%]": " 20 %", + " utilization.memory [%]": " 0 %", + "name": "NVIDIA A100 80GB PCIe" + }, + "run_cmd": null, + "system_info": { + "GPUs": [ + { + "compute_capability": "8.0", + "driver": "550.54.15", + "gpu_id": "NVIDIA A100 80GB PCIe", + "graphics_clock": "1512 MHz", + "memory": "81920 MiB", + "memory_clock": "1410 MHz", + "sm_clock": "1410 MHz" + } + ], + "OS": "unknown", + "cores": "32", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "hostname": "ffd161669a69", + "max_clock": "3000.0000", + "sockets": "1", + "threads_per_core": "2" + }, + "timestamp": "2024-08-01T10:15:12" + }, + "metrics": null + }, + { + "info": { + "cpu_info": { + "Address sizes": "43 bits physical, 48 bits virtual", + "Architecture": "x86_64", + "BogoMIPS": "5988.87", + "Byte Order": "Little Endian", + "CPU family": "23", + "CPU max MHz": "3000.0000", + "CPU min MHz": "1500.0000", + "CPU op-mode(s)": "32-bit, 64-bit", + "CPU(s)": "32", + "Core(s) per socket": "16", + "Flags": "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid aperfmperf pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw ibs skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb cat_l3 cdp_l3 hw_pstate ssbd mba ibrs ibpb stibp vmmcall fsgsbase bmi1 avx2 smep bmi2 cqm rdt_a rdseed adx smap clflushopt clwb sha_ni xsaveopt xsavec xgetbv1 xsaves cqm_llc cqm_occup_llc cqm_mbm_total cqm_mbm_local clzero irperf xsaveerptr wbnoinvd arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif v_spec_ctrl umip rdpid overflow_recov succor smca sme sev sev_es", + "Frequency boost": "enabled", + "L1d cache": "512 KiB (16 instances)", + "L1i cache": "512 KiB (16 instances)", + "L2 cache": "8 MiB (16 instances)", + "L3 cache": "128 MiB (8 instances)", + "Model": "49", + "Model name": "AMD EPYC 7302P 16-Core Processor", + "NUMA node(s)": "1", + "NUMA node0 CPU(s)": "0-31", + "On-line CPU(s) list": "0-31", + "Socket(s)": "1", + "Stepping": "0", + "Thread(s) per core": "2", + "Vendor ID": "AuthenticAMD", + "Virtualization": "AMD-V", + "Vulnerability Gather data sampling": "Not affected", + "Vulnerability Itlb multihit": "Not affected", + "Vulnerability L1tf": "Not affected", + "Vulnerability Mds": "Not affected", + "Vulnerability Meltdown": "Not affected", + "Vulnerability Mmio stale data": "Not affected", + "Vulnerability Retbleed": "Mitigation; untrained return thunk; SMT enabled with STIBP protection", + "Vulnerability Spec rstack overflow": "Mitigation; Safe RET", + "Vulnerability Spec store bypass": "Mitigation; Speculative Store Bypass disabled via prctl", + "Vulnerability Spectre v1": "Mitigation; usercopy/swapgs barriers and __user pointer sanitization", + "Vulnerability Spectre v2": "Mitigation; Retpolines, IBPB conditional, STIBP always-on, RSB filling, PBRSB-eIBRS Not affected", + "Vulnerability Srbds": "Not affected", + "Vulnerability Tsx async abort": "Not affected" + }, + "datapath": "/local/scratch/cgleggett/data/FastCaloSimInputs:/input", + "image_info": { + "image_tag": "ubuntu22.04", + "image_type": "x86", + "root_version": "6.30.04" + }, + "nvidia_info": null, + "run_cmd": null, + "system_info": { + "GPUs": [], + "OS": "unknown", + "cores": "32", + "cpu_id": "AMD EPYC 7302P 16-Core Processor", + "hostname": "cd8ef94064f4", + "max_clock": "3000.0000", + "sockets": "1", + "threads_per_core": "2" + }, + "timestamp": "2024-08-01T10:11:59" + }, + "metrics": null + } +] \ No newline at end of file diff --git a/hmatools/tests/test_data/fcs_results.png b/hmatools/tests/test_data/fcs_results.png new file mode 100644 index 0000000..f352fe5 Binary files /dev/null and b/hmatools/tests/test_data/fcs_results.png differ diff --git a/hmatools/tests/test_fcs_result.py b/hmatools/tests/test_fcs_result.py new file mode 100644 index 0000000..4320452 --- /dev/null +++ b/hmatools/tests/test_fcs_result.py @@ -0,0 +1,49 @@ +import sys +import os +import json + +sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'python'))) + +from hma_result import HmaResult +from hma_plot import HmaPlot + +def test_get_results(): + input_dir = os.path.join(os.path.dirname(__file__), "test_data/Log Files - Run FastCaloSim") + assert os.path.exists(input_dir), f"Input directory {input_dir} does not exist" + num_files = len([f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f))]) + + result = HmaResult(input_dir) + results = result.get_results() + assert len(results) == num_files + +def test_dump_to_json(tmp_path): + input_dir = os.path.join(os.path.dirname(__file__), "test_data/Log Files - Run FastCaloSim") + assert os.path.exists(input_dir), f"Input directory {input_dir} does not exist" + result = HmaResult(input_dir) + + # Dump results to JSON + output_file = tmp_path / 'fcs_results.json' + result.dump_to_json(output_file) + + # Verify the JSON file + assert output_file.exists() + with open(output_file, 'r') as f: + data = json.load(f) + + assert len(data) == 6 # Create a temporary directory for output + +def test_plot(tmp_path): + # Create the JSON file in the temporary directory + input_dir = os.path.join(os.path.dirname(__file__), "test_data/Log Files - Run FastCaloSim") + assert os.path.exists(input_dir), f"Input directory {input_dir} does not exist" + result = HmaResult(input_dir) + json_file = tmp_path / 'fcs_results.json' + result.dump_to_json(json_file) + + # Now create the plot using the JSON file + plot_file = tmp_path / "fcs_results.png" + plot = HmaPlot(tmp_path) + plot.plot(save_plot=True, filename=plot_file) + + # Verify the plot + assert plot_file.exists() \ No newline at end of file diff --git a/scripts/build_scripts/build_images.sh b/scripts/build_scripts/build_images.sh new file mode 100755 index 0000000..6b13d5a --- /dev/null +++ b/scripts/build_scripts/build_images.sh @@ -0,0 +1,156 @@ +#!/bin/bash + +set -e # Exit immediately if a command exits with a non-zero status. +#set -x # Print commands and their arguments as they are executed. + +OS_BASE=ubuntu22.04 +CUDA_VERSION=12.2.2-devel-${OS_BASE} +NVHPC_VERSION=23.9-devel-cuda12.2-${OS_BASE} +ROOT_VERSION=v6-30-04 +FCS_BRANCH=dingpf/packaging + + +ROOT_DOT_VERSION=${ROOT_VERSION//v/} +ROOT_DOT_VERSION=${ROOT_DOT_VERSION//-/.} +NVHPC_BASE_IMAGE=nvcr.io/nvidia/nvhpc:${NVHPC_VERSION} +CUDA_BASE_IMAGE=docker.io/nvidia/cuda:${CUDA_VERSION} +UBUNTU_BASE_IMAGE=docker.io/library/ubuntu:22.04 +ROCM_BASE_IMAGE=rocm/rocm-terminal:6.2.1 +#REGISTRY_PROJECT=docker.io/dingpf +REGISTRY_PROJECT=registry.nersc.gov/m2845 + +CHECK_REPO=${CHECK_REPO:-0} +BUILD_ROOT_IMAGE=${BUILD_ROOT_IMAGE:-0} +LOG_DIR=${LOG_DIR:-"."} + +logfile="${LOG_DIR}/build_image_log_$(date +%Y%m%d%H%M%S).txt" + +# Ensure the log directory exists +mkdir -p "${LOG_DIR}" + +check_command_exists() { + echo "Checking if command $1 exists..." | tee -a ${logfile} + command -v "$1" >>${logfile} 2>&1 +} + +check_image_exists() { + local image_tag=$1 + + echo "Checking if image ${image_tag} exists..." | tee -a ${logfile} + $CONTAINER_CMD manifest inspect ${image_tag} >>${logfile} 2>&1 + if [ $? -eq 0 ]; then + return 0 + else + return 1 + fi +} + +build_and_push_root_image() { + if [ ${BUILD_ROOT_IMAGE} -eq 0 ]; then + return + fi + local base_image=$1 + local image_tag=$(basename ${base_image}) + image_tag=${image_tag//:/} + local root_image_tag=${REGISTRY_PROJECT}/root:${ROOT_DOT_VERSION}-${image_tag} + + if [ ${CHECK_REPO} -eq 1 ]; then + check_image_exists ${root_image_tag} + if [ $? -eq 0 ]; then + echo "INFO - $(date) - Skipping build. Image ${root_image_tag} already exists in the registry." | tee -a ${logfile} + # podman-hpc push ${root_image_tag} + return + fi + fi + + echo "INFO - $(date) - Building image: ${root_image_tag}" | tee -a ${logfile} + $CONTAINER_CMD build -f root.Dockerfile \ + --build-arg=BASE=${base_image} \ + --build-arg=ROOT_VERSION=${ROOT_VERSION} \ + -t ${root_image_tag} . | tee -a ${logfile} + + echo "INFO - $(date) - Pushing image: ${root_image_tag}" | tee -a ${logfile} + $CONTAINER_CMD push ${root_image_tag} | tee -a ${logfile} +} + +build_and_push_fcs_image() { + local base_image=$1 + local image_type=$2 + local image_tag=$(basename ${base_image}) + image_tag=${image_tag//:/} + local root_image_tag=${REGISTRY_PROJECT}/root:${ROOT_DOT_VERSION}-${image_tag} + local fcs_image_tag=${REGISTRY_PROJECT}/${image_type}:${ROOT_DOT_VERSION}-${image_tag} + + if [ ${CHECK_REPO} -eq 1 ]; then + check_image_exists ${fcs_image_tag} + if [ $? -eq 0 ]; then + echo "INFO - $(date) - Image ${fcs_image_tag} already exists in the registry. Skipping build." | tee -a ${logfile} + return + fi + fi + + echo "INFO - $(date) - Building image: ${fcs_image_tag}" | tee -a ${logfile} + $CONTAINER_CMD build -f ${image_type}.Dockerfile \ + --build-arg=BASE=${root_image_tag} \ + --build-arg=FCS_BRANCH=${FCS_BRANCH} \ + -t ${fcs_image_tag} \ + . | tee -a ${logfile} + + echo "INFO - $(date) - Pushing image: ${fcs_image_tag}" | tee -a ${logfile} + $CONTAINER_CMD push ${fcs_image_tag} | tee -a ${logfile} +} + +echo "INFO - $(date) - Starting script..." | tee -a ${logfile} + +if check_command_exists podman-hpc; then + echo "INFO - $(date) - Using podman-hpc" | tee -a ${logfile} + CONTAINER_CMD="podman-hpc" +elif check_command_exists docker; then + echo "INFO - $(date) - Using docker" | tee -a ${logfile} + CONTAINER_CMD="docker" +else + echo "ERROR - $(date) - Neither podman-hpc nor docker is installed on this system." | tee -a ${logfile} + exit 1 +fi + +echo "INFO - $(date) - Using container command: ${CONTAINER_CMD}" | tee -a ${logfile} + +for base_image in ${UBUNTU_BASE_IMAGE} ${NVHPC_BASE_IMAGE} ${CUDA_BASE_IMAGE}; do + echo "INFO - $(date) - Processing base image: ${base_image}" | tee -a ${logfile} + build_and_push_root_image ${base_image} + + # Build FCS images using different base images + for image_type in fcs-x86 fcs-cuda fcs-kokkos-cuda; do + # Only build FCS x86 variant using Ubuntu base image + if [ "${image_type}" = "fcs-x86" ] && [ "${base_image}" != ${UBUNTU_BASE_IMAGE} ]; then + echo "INFO - $(date) - Skipping FCS x86 variant for non-Ubuntu base image" | tee -a ${logfile} + continue + fi + # Only build FCS GPU variant using non-Ubuntu base image + if [ "${image_type}" != "fcs-x86" ] && [ "${base_image}" = ${UBUNTU_BASE_IMAGE} ]; then + echo "INFO - $(date) - Skipping FCS GPU variant for Ubuntu base image" | tee -a ${logfile} + continue + fi + echo "INFO - $(date) - Building FCS image: ${image_type} with base image: ${base_image}" | tee -a ${logfile} + build_and_push_fcs_image ${base_image} ${image_type} + done +done + +echo "INFO - $(date) - Building additional FCS images..." | tee -a ${logfile} +build_and_push_fcs_image ${NVHPC_BASE_IMAGE} fcs-stdpar-cuda +build_and_push_fcs_image ${CUDA_BASE_IMAGE} fcs-hip-cuda + +echo "INFO - $(date) - Building ROCm ROOT image..." | tee -a ${logfile} +rocm_img_tag=$(basename ${ROCM_BASE_IMAGE}) +rocm_img_tag=${rocm_img_tag//:/} +root_rocm_image_tag=${REGISTRY_PROJECT}/root:${ROOT_DOT_VERSION}-${rocm_img_tag} +$CONTAINER_CMD build -f root-rocm.Dockerfile \ + --build-arg=BASE=${ROCM_BASE_IMAGE} \ + --build-arg=ROOT_VERSION=${ROOT_VERSION} \ + -t ${root_rocm_image_tag} . | tee -a ${logfile} +$CONTAINER_CMD push ${root_rocm_image_tag} | tee -a ${logfile} + +echo "INFO - $(date) - Building ROCm HIP FCS image..." | tee -a ${logfile} +build_and_push_fcs_image ${ROCM_BASE_IMAGE} fcs-hip-rocm + +echo "INFO - $(date) - Script completed successfully!" | tee -a ${logfile} diff --git a/scripts/build_scripts/fcs-cuda.Dockerfile b/scripts/build_scripts/fcs-cuda.Dockerfile new file mode 100644 index 0000000..e61f9cd --- /dev/null +++ b/scripts/build_scripts/fcs-cuda.Dockerfile @@ -0,0 +1,30 @@ +ARG BASE=registry.nersc.gov/m2845/root:6.30.04-cuda12.2.2-devel-ubuntu22.04 +FROM $BASE + +# ARG BASE=docker.io/dingpf/root:6.30.04-cuda12.2.2-ubuntu22.04 +#ARG BASE=docker.io/dingpf/root:6.30.04-nvhpc23.9-cuda12.2-ubuntu22.04 + +ARG WORK_DIR=/hep-mini-apps +ARG ROOT_INSTALL_DIR=$WORK_DIR/root/install + +ARG FCS_SRC_DIR=$WORK_DIR/FCS-GPU/source +ARG FCS_BUILD_DIR=$WORK_DIR/FCS-GPU/build +ARG FCS_INSTALL_DIR=$WORK_DIR/FCS-GPU/install +ARG FCS_BRANCH=dingpf/packaging + +RUN \ + cd $ROOT_INSTALL_DIR/bin && \ + . $ROOT_INSTALL_DIR/bin/thisroot.sh && \ + mkdir -p $FCS_BUILD_DIR && \ + mkdir -p $FCS_INSTALL_DIR && \ + git clone https://github.com/hep-cce/FCS-GPU.git -b ${FCS_BRANCH} $FCS_SRC_DIR && \ + cd $FCS_BUILD_DIR && \ + cmake -DCMAKE_INSTALL_PREFIX=$FCS_INSTALL_DIR \ + -DENABLE_XROOTD=Off \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + -DENABLE_GPU=on \ + -DCMAKE_CUDA_ARCHITECTURES=80 \ + $FCS_SRC_DIR/FastCaloSimAnalyzer && \ + make -j 128 install && \ + rm -rf $FCS_BUILD_DIR \ No newline at end of file diff --git a/scripts/build_scripts/fcs-hip-cuda.Dockerfile b/scripts/build_scripts/fcs-hip-cuda.Dockerfile new file mode 100644 index 0000000..333459f --- /dev/null +++ b/scripts/build_scripts/fcs-hip-cuda.Dockerfile @@ -0,0 +1,77 @@ +ARG BASE=registry.nersc.gov/m2845/root:6.30.04-cuda12.2.2-devel-ubuntu22.04 +FROM $BASE + +ARG DEBIAN_FRONTEND noninteractive + +RUN \ + DEBIAN_FRONTEND=${DEBIAN_FRONTEND} \ + apt-get update && \ + DEBIAN_FRONTEND=${DEBIAN_FRONTEND} \ + apt-get upgrade --yes && \ + apt-get install --yes \ + build-essential \ + cmake \ + wget \ + vim \ + python3 \ + git && \ + apt-get clean all + +RUN \ + wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | tee /etc/apt/keyrings/rocm.gpg > /dev/null; \ + echo "deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.1.1 jammy main" | tee --append /etc/apt/sources.list.d/rocm.list; \ + DEBIAN_FRONTEND=${DEBIAN_FRONTEND} apt update && \ + apt install --yes hipcc + +ENV CUDA_PATH /usr/local/cuda/ + +ARG ROCM_BRANCH=rocm-6.1.x + +RUN \ + mkdir -p /opt && cd /opt && \ + git clone -b ${ROCM_BRANCH} https://github.com/ROCm/clr.git && \ + git clone -b ${ROCM_BRANCH} https://github.com/ROCm/hip.git && \ + git clone -b ${ROCM_BRANCH} https://github.com/ROCm/hipother.git && \ + export CLR_DIR="$(readlink -f clr)" && \ + export HIP_DIR="$(readlink -f hip)" && \ + export HIP_OTHER="$(readlink -f hipother)" && \ + cd $CLR_DIR && \ + mkdir -p build && cd build && \ + export HIP_PLATFORM=nvidia && \ + cmake -DHIP_COMMON_DIR=$HIP_DIR -DHIP_PLATFORM=nvidia -DCMAKE_INSTALL_PREFIX=$PWD/install -DHIP_CATCH_TEST=0 -DCLR_BUILD_HIP=ON -DCLR_BUILD_OCL=OFF -DHIPNV_DIR=$HIP_OTHER/hipnv .. && \ + make -j 10 && \ + make install + +ENV HIP_PLATFORM=nvidia + + +ARG WORK_DIR=/hep-mini-apps +ARG ROOT_INSTALL_DIR=$WORK_DIR/root/install + +ARG FCS_SRC_DIR=$WORK_DIR/FCS-GPU/source +ARG FCS_BUILD_DIR=$WORK_DIR/FCS-GPU/build +ARG FCS_INSTALL_DIR=$WORK_DIR/FCS-GPU/install +ARG FCS_BRANCH=dingpf/packaging + +RUN \ + export PATH=/opt/clr/build/install/bin:$PATH && \ + cd $ROOT_INSTALL_DIR/bin && \ + . $ROOT_INSTALL_DIR/bin/thisroot.sh && \ + mkdir -p $FCS_BUILD_DIR && \ + mkdir -p $FCS_INSTALL_DIR && \ + git clone https://github.com/hep-cce/FCS-GPU.git -b ${FCS_BRANCH} $FCS_SRC_DIR +# git clone https://github.com/hep-cce/FCS-GPU.git -b ${FCS_BRANCH} $FCS_SRC_DIR && \ +# cd $FCS_BUILD_DIR && \ +# currently not working due to missing patches of fcs +# cmake -DCMAKE_INSTALL_PREFIX=$FCS_INSTALL_DIR \ +# -DENABLE_XROOTD=Off \ +# -DCMAKE_CXX_STANDARD=17 \ +# -DCMAKE_CXX_EXTENSIONS=Off \ +# -DENABLE_GPU=on \ +# -DCMAKE_CXX_COMPILER=hipcc \ +# -DCMAKE_CUDA_ARCHITECTURES=80 \ +# $FCS_SRC_DIR/FastCaloSimAnalyzer && \ +# make -j 128 install && \ +# rm -rf $FCS_BUILD_DIR + + diff --git a/scripts/build_scripts/fcs-hip-rocm.Dockerfile b/scripts/build_scripts/fcs-hip-rocm.Dockerfile new file mode 100644 index 0000000..16992e9 --- /dev/null +++ b/scripts/build_scripts/fcs-hip-rocm.Dockerfile @@ -0,0 +1,50 @@ +ARG BASE=registry.nersc.gov/m2845/root:6.30.04-rocm-terminal6.2.1 +ARG REFRESHED_AT=2024-11-28 +FROM $BASE + +USER root + +ARG DEBIAN_FRONTEND noninteractive +RUN \ + DEBIAN_FRONTEND=${DEBIAN_FRONTEND} \ + apt-get update && \ + DEBIAN_FRONTEND=${DEBIAN_FRONTEND} \ + apt-get upgrade --yes && \ + apt-get install --yes \ + wget && \ + apt-get clean all + +RUN \ + wget https://github.com/Kitware/CMake/releases/download/v3.31.1/cmake-3.31.1-linux-x86_64.sh &&\ + chmod +x cmake-3.31.1-linux-x86_64.sh && \ + mkdir -p /opt/cmake && \ + ./cmake-3.31.1-linux-x86_64.sh --prefix=/opt/cmake --skip-license --exclude-subdir && \ + rm -f ./cmake-3.31.1-linux-x86_64.sh + +ENV PATH=/opt/cmake/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/opt/rocm/bin + +ARG WORK_DIR=/hep-mini-apps +ARG FCS_SRC_DIR=$WORK_DIR/FCS-GPU/source +ARG FCS_BUILD_DIR=$WORK_DIR/FCS-GPU/build +ARG FCS_INSTALL_DIR=$WORK_DIR/FCS-GPU/install +ARG FCS_BRANCH=dingpf/packaging +ARG ROOT_INSTALL_DIR=$WORK_DIR/root/install + +RUN \ + cd $ROOT_INSTALL_DIR/bin && \ + . $ROOT_INSTALL_DIR/bin/thisroot.sh && \ + mkdir -p $FCS_BUILD_DIR && \ + mkdir -p $FCS_INSTALL_DIR && \ + git clone https://github.com/hep-cce/FCS-GPU.git -b ${FCS_BRANCH} $FCS_SRC_DIR && \ + cd $FCS_BUILD_DIR && \ + cmake -DCMAKE_INSTALL_PREFIX=$FCS_INSTALL_DIR \ + -DUSE_HIP=on \ + -DCMAKE_CXX_COMPILER=hipcc \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + -DENABLE_GPU=on \ + $FCS_SRC_DIR/FastCaloSimAnalyzer && \ + make -j 128 install && \ + cd $WORK_DIR && \ + rm -rf $FCS_BUILD_DIR + diff --git a/scripts/build_scripts/fcs-kokkos-cuda.Dockerfile b/scripts/build_scripts/fcs-kokkos-cuda.Dockerfile new file mode 100644 index 0000000..6e0018a --- /dev/null +++ b/scripts/build_scripts/fcs-kokkos-cuda.Dockerfile @@ -0,0 +1,58 @@ +ARG BASE=registry.nersc.gov/m2845/root:6.30.04-cuda12.2.2-devel-ubuntu22.04 +FROM $BASE + +# ARG BASE=docker.io/dingpf/root:6.30.04-cuda12.2.2-ubuntu22.04 +#ARG BASE=docker.io/dingpf/root:6.30.04-nvhpc23.9-cuda12.2-ubuntu22.04 + +ARG WORK_DIR=/hep-mini-apps +ARG ROOT_INSTALL_DIR=$WORK_DIR/root/install + +ARG Kokkos_SRC_DIR=$WORK_DIR/Kokkos/source +ARG Kokkos_BUILD_DIR=$WORK_DIR/Kokkos/build +ARG Kokkos_INSTALL_DIR=$WORK_DIR/Kokkos/install +ARG Kokkos_BRANCH=4.2.01 +RUN \ + mkdir -p $Kokkos_BUILD_DIR && \ + mkdir -p $Kokkos_INSTALL_DIR && \ + git clone https://github.com/kokkos/kokkos.git -b ${Kokkos_BRANCH} $Kokkos_SRC_DIR && \ + cd $Kokkos_BUILD_DIR && \ + cmake -DCMAKE_INSTALL_PREFIX=${Kokkos_INSTALL_DIR} \ + -DCMAKE_CXX_COMPILER=${Kokkos_SRC_DIR}/bin/nvcc_wrapper \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_CUDA=ON \ + -DKokkos_ENABLE_CUDA_LAMBDA=ON \ + -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=Off \ + -DKokkos_ENABLE_OPENMP=On \ + -DKokkos_ENABLE_SERIAL=On \ + -DKokkos_ENABLE_TESTS=Off \ + -DKokkos_ARCH_AMPERE80=ON \ + -DBUILD_SHARED_LIBS=ON \ + ${Kokkos_SRC_DIR} && \ + make -j 32 install && \ + rm -rf $Kokkos_BUILD_DIR + +ARG FCS_SRC_DIR=$WORK_DIR/FCS-GPU/source +ARG FCS_BUILD_DIR=$WORK_DIR/FCS-GPU/build +ARG FCS_INSTALL_DIR=$WORK_DIR/FCS-GPU/install +ARG FCS_BRANCH=dingpf/packaging +ENV CMAKE_PREFIX_PATH="${Kokkos_INSTALL_DIR}:${CMAKE_PREFIX_PATH}" +RUN \ + cd $ROOT_INSTALL_DIR/bin && \ + . $ROOT_INSTALL_DIR/bin/thisroot.sh && \ + mkdir -p $FCS_BUILD_DIR && \ + mkdir -p $FCS_INSTALL_DIR && \ + git clone https://github.com/hep-cce/FCS-GPU.git -b ${FCS_BRANCH} $FCS_SRC_DIR && \ + cd $FCS_BUILD_DIR && \ + cmake -DCMAKE_INSTALL_PREFIX=$FCS_INSTALL_DIR \ + -DENABLE_XROOTD=Off \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + -DENABLE_GPU=on \ + -DCMAKE_CUDA_ARCHITECTURES=80 \ + -DUSE_KOKKOS=ON \ + -DCMAKE_CXX_COMPILER=$Kokkos_INSTALL_DIR/bin/nvcc_wrapper \ + $FCS_SRC_DIR/FastCaloSimAnalyzer && \ + make -j 16 install && \ + rm -rf $FCS_BUILD_DIR \ No newline at end of file diff --git a/scripts/build_scripts/fcs-stdpar-cuda.Dockerfile b/scripts/build_scripts/fcs-stdpar-cuda.Dockerfile new file mode 100644 index 0000000..fff8f52 --- /dev/null +++ b/scripts/build_scripts/fcs-stdpar-cuda.Dockerfile @@ -0,0 +1,32 @@ +ARG BASE=registry.nersc.gov/m2845/root:6.30.04-nvhpc23.9-devel-cuda12.2-ubuntu22.04 +FROM $BASE + +ARG WORK_DIR=/hep-mini-apps +ARG ROOT_INSTALL_DIR=$WORK_DIR/root/install + +ARG FCS_SRC_DIR=$WORK_DIR/FCS-GPU/source +ARG FCS_BUILD_DIR=$WORK_DIR/FCS-GPU/build +ARG FCS_INSTALL_DIR=$WORK_DIR/FCS-GPU/install +ARG FCS_BRANCH=dingpf/packaging +ARG NVHPC_ROOT=/opt/nvidia/hpc_sdk/Linux_x86_64/23.9 + +RUN \ + cp $NVHPC_ROOT/compilers/bin/localrc $NVHPC_ROOT/compilers/bin/localrc_gcc114 && \ + cd $ROOT_INSTALL_DIR/bin && \ + . $ROOT_INSTALL_DIR/bin/thisroot.sh && \ + mkdir -p $FCS_BUILD_DIR && \ + mkdir -p $FCS_INSTALL_DIR && \ + git clone https://github.com/hep-cce/FCS-GPU.git -b ${FCS_BRANCH} $FCS_SRC_DIR && \ + cd $FCS_BUILD_DIR && \ + cmake -DCMAKE_INSTALL_PREFIX=$FCS_INSTALL_DIR \ + -DENABLE_XROOTD=Off \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + -DENABLE_GPU=on \ + -DUSE_STDPAR=ON \ + -DSTDPAR_TARGET=gpu \ + -DCMAKE_CUDA_ARCHITECTURES=80 \ + -DCMAKE_CXX_COMPILER=$FCS_SRC_DIR/scripts/nvc++_p \ + $FCS_SRC_DIR/FastCaloSimAnalyzer && \ + make -j 16 install && \ + rm -rf $FCS_BUILD_DIR diff --git a/scripts/build_scripts/fcs-x86.Dockerfile b/scripts/build_scripts/fcs-x86.Dockerfile new file mode 100644 index 0000000..bc14045 --- /dev/null +++ b/scripts/build_scripts/fcs-x86.Dockerfile @@ -0,0 +1,28 @@ +ARG BASE=registry.nersc.gov/m2845/root:6.30.04-ubuntu22.04 +FROM $BASE + +# ARG BASE=docker.io/dingpf/root:6.30.04-ubuntu22.04 + +ARG WORK_DIR=/hep-mini-apps +ARG ROOT_INSTALL_DIR=$WORK_DIR/root/install + +ARG FCS_SRC_DIR=$WORK_DIR/FCS-GPU/source +ARG FCS_BUILD_DIR=$WORK_DIR/FCS-GPU/build +ARG FCS_INSTALL_DIR=$WORK_DIR/FCS-GPU/install +ARG FCS_BRANCH=dingpf/packaging + +RUN \ + cd $ROOT_INSTALL_DIR/bin && \ + . $ROOT_INSTALL_DIR/bin/thisroot.sh && \ + mkdir -p $FCS_BUILD_DIR && \ + mkdir -p $FCS_INSTALL_DIR && \ + git clone https://github.com/hep-cce/FCS-GPU.git -b ${FCS_BRANCH} $FCS_SRC_DIR && \ + cd $FCS_BUILD_DIR && \ + cmake -DCMAKE_INSTALL_PREFIX=$FCS_INSTALL_DIR \ + -DENABLE_XROOTD=Off \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + $FCS_SRC_DIR/FastCaloSimAnalyzer && \ + make -j 128 install && \ + rm -rf $FCS_BUILD_DIR + diff --git a/scripts/build_scripts/perlmutter/build_FCS_with_cuda.sh b/scripts/build_scripts/perlmutter/build_FCS_with_cuda.sh new file mode 100755 index 0000000..b27a944 --- /dev/null +++ b/scripts/build_scripts/perlmutter/build_FCS_with_cuda.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +# load the default python module, ROOT was built with it. + +module load python + +WORK_DIR=$SCRATCH/hep-mini-apps + +SRC_DIR=$WORK_DIR/FCS-GPU_src +BUILD_DIR=$WORK_DIR/FCS-GPU_gpu_build +INSTALL_DIR=$WORK_DIR/FCS-GPU_gpu_install + +ROOT_DIR=$WORK_DIR/root_install +source $ROOT_DIR/bin/thisroot.sh + +cd $WORK_DIR +rm -rf $BUILD_DIR +mkdir -p $BUILD_DIR +mkdir -p $INSTALL_DIR +git clone https://github.com/cgleggett/FCS-GPU.git -b dingpf/packaging $SRC_DIR + +cd $BUILD_DIR +cmake -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ + -DENABLE_XROOTD=Off \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + -DENABLE_GPU=on \ + -DCMAKE_CUDA_ARCHITECTURES=80 \ + $SRC_DIR/FastCaloSimAnalyzer + +make -j 128 install diff --git a/scripts/build_scripts/perlmutter/build_FCS_with_kokkos_cuda.sh b/scripts/build_scripts/perlmutter/build_FCS_with_kokkos_cuda.sh new file mode 100755 index 0000000..3fecbd6 --- /dev/null +++ b/scripts/build_scripts/perlmutter/build_FCS_with_kokkos_cuda.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +ROOT_DIR=$SCRATCH/hep-mini-apps/root_install +SRC_DIR=$SCRATCH/hep-mini-apps/FCS-GPU_src +KOKKOS_DIR=$SCRATCH/hep-mini-apps-kokkos/kokkos_install + +module load python +module load cmake/3.24.3 + + +WORK_DIR=$SCRATCH/hep-mini-apps-kokkos + +BUILD_DIR=$WORK_DIR/FCS-GPU_build +INSTALL_DIR=$WORK_DIR/FCS-GPU_install + +source $ROOT_DIR/bin/thisroot.sh + +rm -rf $BUILD_DIR +mkdir -p $BUILD_DIR +mkdir -p $INSTALL_DIR +cd $BUILD_DIR + +export CMAKE_PREFIX_PATH=$KOKKOS_DIR:$CMAKE_PREFIX_PATH + +cmake -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ + -DENABLE_XROOTD=Off \ + -DCMAKE_CXX_EXTENSIONS=Off \ + -DENABLE_GPU=on \ + -DCMAKE_CUDA_ARCHITECTURES=80 \ + -DUSE_KOKKOS=ON \ + -DCMAKE_CXX_COMPILER=$KOKKOS_DIR/bin/nvcc_wrapper \ + -DCMAKE_CXX_STANDARD=17 \ + $SRC_DIR/FastCaloSimAnalyzer + +make -j 16 install + +echo "Run the following before calling the application" +echo "module load python" +echo "source $ROOT_DIR/bin/thisroot.sh" +echo "export FCS_DATAPATH=/global/cfs/cdirs/atlas/leggett/data/FastCaloSimInputs" +echo "export LD_LIBRARY_PATH=$KOKKOS_DIR/lib64:$LD_LIBRARY_PATH" +echo "source $INSTALL_DIR/setup.sh" diff --git a/scripts/build_scripts/perlmutter/build_FCS_with_stdpar.sh b/scripts/build_scripts/perlmutter/build_FCS_with_stdpar.sh new file mode 100755 index 0000000..3433270 --- /dev/null +++ b/scripts/build_scripts/perlmutter/build_FCS_with_stdpar.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +DPF_SCRATCH=/pscratch/sd/d/dingpf +ROOT_DIR=$DPF_SCRATCH/hep-mini-apps/root_install +SRC_DIR=$DPF_SCRATCH/hep-mini-apps/FCS-GPU_src + +module load python +module load cmake/3.24.3 +module load PrgEnv-nvhpc + +source $ROOT_DIR/bin/thisroot.sh + +WORK_DIR=$DPF_SCRATCH/hep-mini-apps-stdpar + +BUILD_DIR=$WORK_DIR/FCS-GPU_build +INSTALL_DIR=$WORK_DIR/FCS-GPU_install + +rm -rf $BUILD_DIR +mkdir -p $BUILD_DIR +mkdir -p $INSTALL_DIR +cd $BUILD_DIR + +export CMAKE_PREFIX_PATH=/opt/nvidia/hpc_sdk/Linux_x86_64/23.9/cmake:$CMAKE_PREFIX_PATH +export LIBRARY_PATH=$LD_LIBRARY_PATH + +cmake -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ + -DENABLE_XROOTD=Off \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + -DENABLE_GPU=on \ + -DUSE_STDPAR=ON \ + -DSTDPAR_TARGET=gpu \ + -DCMAKE_CUDA_ARCHITECTURES=80 \ + -DCMAKE_CXX_COMPILER=$SRC_DIR/scripts/nvc++_p \ + $SRC_DIR/FastCaloSimAnalyzer + +make -j 16 install + diff --git a/scripts/build_scripts/perlmutter/build_kokkos.sh b/scripts/build_scripts/perlmutter/build_kokkos.sh new file mode 100755 index 0000000..11f7197 --- /dev/null +++ b/scripts/build_scripts/perlmutter/build_kokkos.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +module load python +module load cmake/3.24.3 + +INSTALL_DIR=$SCRATCH/hep-mini-apps-kokkos/kokkos_install +SRC_DIR=$SCRATCH/hep-mini-apps-kokkos/kokkos +BUILD_DIR=$SCRATCH/hep-mini-apps-kokkos/kokkos_build + +mkdir -p $INSTALL_DIR +mkdir -p $BUILD_DIR + +pushd $BUILD_DIR + +cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_DIR} \ + -DCMAKE_CXX_COMPILER=${SRC_DIR}/bin/nvcc_wrapper \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_CUDA=ON \ + -DKokkos_ENABLE_CUDA_LAMBDA=ON \ + -DKokkos_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE=Off \ + -DKokkos_ENABLE_OPENMP=On \ + -DKokkos_ENABLE_SERIAL=On \ + -DKokkos_ENABLE_TESTS=Off \ + -DKokkos_ARCH_AMPERE80=ON \ + -DBUILD_SHARED_LIBS=ON \ + ${SRC_DIR} + +make -j 32 install + +popd diff --git a/scripts/build_scripts/perlmutter/build_root.sh b/scripts/build_scripts/perlmutter/build_root.sh new file mode 100755 index 0000000..e60f057 --- /dev/null +++ b/scripts/build_scripts/perlmutter/build_root.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# On Perlmutter +# + + +ROOT_VERSION=v6-30-04 + +module load python + +WORK_DIR=$SCRATCH/hep-mini-apps + +SRC_DIR=$WORK_DIR/root_src +INSTALL_DIR=$WORK_DIR/root_install +BUILD_DIR=$WORK_DIR/root_build +mkdir -p $WORK_DIR +git clone --branch $ROOT_VERSION --depth=1 https://github.com/root-project/root.git $SRC_DIR + +rm -rf $BUILD_DIR +mkdir -p $BUILD_DIR +mkdir -p $INSTALL_DIR + +cd $BUILD_DIR +cmake -DCMAKE_INSTALL_PREFIX=$INSTALL_DIR \ + -DCMAKE_CXX_FLAGS=-std=c++17 \ + -Dx11=OFF -Dtbb=OFF \ + -Dopengl=OFF -Dgviz=OFF \ + -Dimt=OFF -Ddavix=OFF \ + -Dvdt=OFF -Dxrootd=OFF \ + -Dwebgui=OFF -Dsqlite=OFF \ + -Dssl=OFF -Dmysql=OFF \ + -Doracle=OFF -Dpgsql=OFF \ + -Ddavix=OFF -Dgfal=OFF \ + -Dimt=OFF \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + $SRC_DIR +make -j 128 install +# took about 15 minutes with -j 128, source, build, and install dir are all on $PSCRATCH diff --git a/scripts/build_scripts/root-rocm.Dockerfile b/scripts/build_scripts/root-rocm.Dockerfile new file mode 100644 index 0000000..50e7c70 --- /dev/null +++ b/scripts/build_scripts/root-rocm.Dockerfile @@ -0,0 +1,43 @@ +ARG BASE=rocm/rocm-terminal:6.2.1 +ARG REFRESHED_AT=2024-11-28 +FROM $BASE + +ARG DEBIAN_FRONTEND noninteractive + +USER root +RUN \ + DEBIAN_FRONTEND=${DEBIAN_FRONTEND} \ + apt-get update && \ + DEBIAN_FRONTEND=${DEBIAN_FRONTEND} \ + apt-get upgrade --yes && \ + apt-get install --yes \ + hiprand \ + rocrand && \ + apt-get clean all + +ARG ROOT_VERSION=v6-30-04 +ARG WORK_DIR=/hep-mini-apps +ARG ROOT_SRC_DIR=$WORK_DIR/root/source +ARG ROOT_INSTALL_DIR=$WORK_DIR/root/install +ARG ROOT_BUILD_DIR=$WORK_DIR/build +RUN \ + mkdir -p $ROOT_BUILD_DIR && \ + git clone --branch $ROOT_VERSION --depth=1 https://github.com/root-project/root.git $ROOT_SRC_DIR && \ + mkdir -p $ROOT_INSTALL_DIR && \ + cd $ROOT_BUILD_DIR && \ + cmake -DCMAKE_INSTALL_PREFIX=$ROOT_INSTALL_DIR \ + -DCMAKE_CXX_FLAGS=-std=c++17 \ + -Dx11=OFF -Dtbb=OFF \ + -Dopengl=OFF -Dgviz=OFF \ + -Dimt=OFF -Ddavix=OFF \ + -Dvdt=OFF -Dxrootd=OFF \ + -Dwebgui=OFF -Dsqlite=OFF \ + -Dssl=OFF -Dmysql=OFF \ + -Doracle=OFF -Dpgsql=OFF \ + -Ddavix=OFF -Dgfal=OFF \ + -Dimt=OFF \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + $ROOT_SRC_DIR && \ + make -j 64 install && \ + rm -rf $ROOT_BUILD_DIR diff --git a/scripts/build_scripts/root.Dockerfile b/scripts/build_scripts/root.Dockerfile new file mode 100644 index 0000000..060a0dc --- /dev/null +++ b/scripts/build_scripts/root.Dockerfile @@ -0,0 +1,51 @@ +ARG CUDA_VERSION=12.2.2 +ARG CUDA_BASE=docker.io/nvidia/cuda:$CUDA_VERSION-devel-ubuntu22.04 +ARG NVHPC_CUDA_VERSION=23.9-devel-cuda12.2 +ARG NVHPC_BASE=nvcr.io/nvidia/nvhpc:${NVHPC_CUDA_VERSION}-ubuntu22.04 +ARG UBUNTU_BASE=docker.io/library/ubuntu:22.04 + +ARG BASE=$CUDA_BASE +FROM $BASE + +ARG DEBIAN_FRONTEND noninteractive + +RUN \ + apt-get update && \ + apt-get upgrade --yes && \ + apt-get install --yes \ + build-essential \ + cmake \ + wget \ + vim \ + python3 \ + git && \ + apt-get clean all + +ENV CUDA_PATH /usr/local/cuda/ + +ARG ROOT_VERSION=v6-30-04 +ARG WORK_DIR=/hep-mini-apps +ARG ROOT_SRC_DIR=$WORK_DIR/root/source +ARG ROOT_INSTALL_DIR=$WORK_DIR/root/install +ARG ROOT_BUILD_DIR=$WORK_DIR/build +RUN \ + mkdir -p $ROOT_BUILD_DIR && \ + git clone --branch $ROOT_VERSION --depth=1 https://github.com/root-project/root.git $ROOT_SRC_DIR && \ + mkdir -p $ROOT_INSTALL_DIR && \ + cd $ROOT_BUILD_DIR && \ + cmake -DCMAKE_INSTALL_PREFIX=$ROOT_INSTALL_DIR \ + -DCMAKE_CXX_FLAGS=-std=c++17 \ + -Dx11=OFF -Dtbb=OFF \ + -Dopengl=OFF -Dgviz=OFF \ + -Dimt=OFF -Ddavix=OFF \ + -Dvdt=OFF -Dxrootd=OFF \ + -Dwebgui=OFF -Dsqlite=OFF \ + -Dssl=OFF -Dmysql=OFF \ + -Doracle=OFF -Dpgsql=OFF \ + -Ddavix=OFF -Dgfal=OFF \ + -Dimt=OFF \ + -DCMAKE_CXX_STANDARD=17 \ + -DCMAKE_CXX_EXTENSIONS=Off \ + $ROOT_SRC_DIR && \ + make -j 64 install && \ + rm -rf $ROOT_BUILD_DIR diff --git a/scripts/run_scripts/run_fcs-gpu.sh b/scripts/run_scripts/run_fcs-gpu.sh new file mode 100755 index 0000000..3ec3c1d --- /dev/null +++ b/scripts/run_scripts/run_fcs-gpu.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +logfile=${LOGFILE} + +echo "Logfile: ${logfile}" | tee -a ${logfile} # Debug line to check LOGFILE value + +log() { + local message="$1" + echo "INFO - $(date) - ${message}" | tee -a ${logfile} +} + +log_command() { + local command="$1" + echo "INFO - $(date) - Executing: ${command}" | tee -a ${logfile} + eval ${command} 2>&1 | tee -a ${logfile} +} + +check_command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +log_info() { + local command=$1 + local description=$2 + local fallback_message="No ${description} information found. ${command} is not available." + + if check_command_exists $command; then + log "${description} information:" + log_command "$command" + else + log "$fallback_message" + fi +} + +log "DATAPATH: $DATAPATH" + +log_info "perl ${SYSINFO}" "System" +log_info "lscpu" "CPU" +log_info "nvidia-smi --query-gpu=name,driver_version,count,clocks.max.sm,clocks.max.memory,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu,temperature.memory --format=csv" "GPU" + +log "Setup" +export FCS_DATAPATH=/input +source /hep-mini-apps/root/install/bin/thisroot.sh +source /hep-mini-apps/FCS-GPU/install/setup.sh + +log "TFCSSimulation" +log_command "runTFCSSimulation --earlyReturn --energy 65536" diff --git a/scripts/run_scripts/run_fcs-kokkos-cuda.sh b/scripts/run_scripts/run_fcs-kokkos-cuda.sh new file mode 100755 index 0000000..0afd99a --- /dev/null +++ b/scripts/run_scripts/run_fcs-kokkos-cuda.sh @@ -0,0 +1,48 @@ +#!/bin/bash + +logfile=${LOGFILE} + +echo "Logfile: ${logfile}" | tee -a ${logfile} # Debug line to check LOGFILE value + +log() { + local message="$1" + echo "INFO - $(date) - ${message}" | tee -a ${logfile} +} + +log_command() { + local command="$1" + echo "INFO - $(date) - Executing: ${command}" | tee -a ${logfile} + eval ${command} 2>&1 | tee -a ${logfile} +} + +check_command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +log_info() { + local command=$1 + local description=$2 + local fallback_message="No ${description} information found. ${command} is not available." + + if check_command_exists $command; then + log "${description} information:" + log_command "$command" + else + log "$fallback_message" + fi +} + +log "DATAPATH: $DATAPATH" + +log_info "perl ${SYSINFO}" "System" +log_info "lscpu" "CPU" +log_info "nvidia-smi --query-gpu=name,driver_version,count,clocks.max.sm,clocks.max.memory,memory.total,memory.used,memory.free,utilization.gpu,utilization.memory,temperature.gpu,temperature.memory --format=csv" "GPU" + +log "Setup" +export FCS_DATAPATH=/input +export LD_LIBRARY_PATH=/hep-mini-apps/Kokkos/install/lib:$LD_LIBRARY_PATH +source /hep-mini-apps/root/install/bin/thisroot.sh +source /hep-mini-apps/FCS-GPU/install/setup.sh + +log "TFCSSimulation" +log_command "runTFCSSimulation --earlyReturn --energy 65536" diff --git a/scripts/run_scripts/run_fcs-x86.sh b/scripts/run_scripts/run_fcs-x86.sh new file mode 100755 index 0000000..6ba991c --- /dev/null +++ b/scripts/run_scripts/run_fcs-x86.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +logfile=${LOGFILE} + +echo "Logfile: ${logfile}" | tee -a ${logfile} # Debug line to check LOGFILE value + +log() { + local message="$1" + echo "INFO - $(date) - ${message}" | tee -a ${logfile} +} + +log_command() { + local command="$1" + echo "INFO - $(date) - Executing: ${command}" | tee -a ${logfile} + eval ${command} 2>&1 | tee -a ${logfile} || { + echo "ERROR - $(date) - Command failed: ${command}" | tee -a ${logfile} + exit 1 + } +} + +log_info() { + local command=$1 + local description=$2 + local fallback_message="No ${description} information found. ${command} is not available." + + if check_command_exists $command; then + log "${description} information:" + log_command "$command" + else + log "$fallback_message" + fi +} + +check_command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +log "DATAPATH: $DATAPATH" + +log_info "perl ${SYSINFO}" "System" +log_info "lscpu" "CPU" + +log "Setup" +export FCS_DATAPATH=/input +source /hep-mini-apps/root/install/bin/thisroot.sh +source /hep-mini-apps/FCS-GPU/install/setup.sh + +log "TFCSSimulation" +log_command "runTFCSSimulation --earlyReturn --energy 65536" diff --git a/scripts/run_scripts/run_images.sh b/scripts/run_scripts/run_images.sh new file mode 100755 index 0000000..0c305b9 --- /dev/null +++ b/scripts/run_scripts/run_images.sh @@ -0,0 +1,127 @@ +#!/bin/bash + +# set -e # Exit immediately if a command exits with a non-zero status. +#set -x # Print commands and their arguments as they are executed. + +OS_BASE=ubuntu22.04 +CUDA_VERSION=12.2.2-devel-${OS_BASE} +NVHPC_VERSION=23.9-devel-cuda12.2-${OS_BASE} +ROOT_VERSION=v6-30-04 +FCS_BRANCH=dingpf/packaging + +ROOT_DOT_VERSION=${ROOT_VERSION//v/} +ROOT_DOT_VERSION=${ROOT_DOT_VERSION//-/.} +NVHPC_BASE_IMAGE=nvcr.io/nvidia/nvhpc:${NVHPC_VERSION} +CUDA_BASE_IMAGE=docker.io/nvidia/cuda:${CUDA_VERSION} +UBUNTU_BASE_IMAGE=docker.io/library/ubuntu:22.04 +REGISTRY_PROJECT=registry.nersc.gov/m2845 +MOUNTED_RUN_DIR=/run +DATAPATH="${FCS_DATAPATH}" +RUNNER_LABEL="${RUNNER_LABEL}" + +CHECK_LOCAL_IMAGES=${CHECK_LOCAL_IMAGES:-0} + +# Ensure the log directory exists +mkdir -p "${LOG_DIR}" + +echo RUNNER_LABEL: ${RUNNER_LABEL} + +clean() { + $CONTAINER_CMD ps -aq | xargs -r $CONTAINER_CMD stop | xargs -r $CONTAINER_CMD rm --force +} + +check_command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +run_fcs_image() { + local base_image=$1 + local image_type=$2 + local image_tag=$(basename ${base_image}) + image_tag=${image_tag//:/} + local fcs_image_tag=${REGISTRY_PROJECT}/${image_type}:${ROOT_DOT_VERSION}-${image_tag} + local container_name="${image_type}_${image_tag}" + local container_script + local run_cmd + local backend=${BACKEND_OPTION} + local container_log_file="run_log_${RUNNER_LABEL}_${image_type}-${ROOT_DOT_VERSION}-${image_tag}_$(date +%Y%m%d%H%M%S).txt" + + case ${image_type} in + fcs-cuda) + container_script="${MOUNTED_RUN_DIR}/run_fcs-gpu.sh" + ;; + fcs-kokkos-cuda) + container_script="${MOUNTED_RUN_DIR}/run_fcs-kokkos-cuda.sh" + ;; + fcs-x86) + container_script="${MOUNTED_RUN_DIR}/run_fcs-x86.sh" + backend="" + fcs_image_tag=${REGISTRY_PROJECT}/fcs:${ROOT_DOT_VERSION}-${image_tag} + ;; + fcs-stdpar) + container_script="${MOUNTED_RUN_DIR}/run_fcs-gpu.sh" + fcs_image_tag=${REGISTRY_PROJECT}/fcs-stdpar:${ROOT_DOT_VERSION}-${image_tag} + ;; + fcs-hip-cuda) + container_script="${MOUNTED_RUN_DIR}/run_fcs-gpu.sh" + fcs_image_tag=${REGISTRY_PROJECT}/hip-cuda:${ROOT_DOT_VERSION}-${image_tag} + ;; + esac + + run_cmd="${CONTAINER_CMD} run \ + --attach STDOUT \ + --rm \ + ${backend}\ + -v $PWD:"${MOUNTED_RUN_DIR}" \ + -v "${DATAPATH}":/input \ + -v "${LOG_DIR}":/log_dir \ + -e SYSINFO=${MOUNTED_RUN_DIR}/sysinfo.pl \ + -e DATAPATH=${DATAPATH} \ + -e LOGFILE=/log_dir/${container_log_file} \ + ${fcs_image_tag} \ + ${container_script}" + + echo "Current directory: $(pwd)" + echo "INFO - $(date) - Running image: ${fcs_image_tag}" + echo "INFO - $(date) - CMD: ${run_cmd}" + ${run_cmd} + echo "INFO - $(date) - Finished running image: ${fcs_image_tag}" +} + +if check_command_exists podman-hpc; then + echo "Using podman-hpc" + CONTAINER_CMD="podman-hpc" + BACKEND_OPTION="--gpu " +elif check_command_exists docker; then + echo "Using docker" + CONTAINER_CMD="docker" + BACKEND_OPTION="--gpus 1" +else + echo "ERROR: Neither podman-hpc nor docker is installed on this system." + exit 1 +fi + +echo "Logging into ${REGISTRY_PROJECT}" +echo "${NERSC_CONTAINER_REGISTRY_PASSWORD}" | $CONTAINER_CMD login -u "${NERSC_CONTAINER_REGISTRY_USER}" --password-stdin ${REGISTRY_PROJECT} + +for base_image in ${UBUNTU_BASE_IMAGE} ${NVHPC_BASE_IMAGE} ${CUDA_BASE_IMAGE}; do + + # Run FCS images using different base images + for image_type in fcs-x86 fcs-cuda fcs-kokkos-cuda; do + # Only run FCS x86 variant using Ubuntu base image + if [ "${image_type}" = "fcs-x86" ] && [ "${base_image}" != ${UBUNTU_BASE_IMAGE} ]; then + continue + fi + # Only run FCS GPU variant using non-Ubuntu base image + if [ "${image_type}" != "fcs-x86" ] && [ "${base_image}" = ${UBUNTU_BASE_IMAGE} ]; then + continue + fi + run_fcs_image ${base_image} ${image_type} + done +done + +run_fcs_image ${NVHPC_BASE_IMAGE} fcs-stdpar + +# run_fcs_image ${CUDA_BASE_IMAGE} fcs-hip-cuda + +clean diff --git a/scripts/run_scripts/sysinfo.pl b/scripts/run_scripts/sysinfo.pl new file mode 100755 index 0000000..f6716a2 --- /dev/null +++ b/scripts/run_scripts/sysinfo.pl @@ -0,0 +1,163 @@ +#!/usr/bin/perl + +use Digest::MD5 qw(md5 md5_hex md5_base64); + +# +## Extracts information about system, including details of CPU and +## GPU hardware and driver versions +## Will produce a json output +# +# author: C. Leggett +# date: 2022/06/01 +# + +$HOST = `hostname -A`; +$HOST =~ s/\s*\n$//; + +$MSG = ""; + +# +## Use lsb_release to extract OS information +# +$REL = "unknown"; +$LSBREL = `which lsb_release 2> /dev/null`; +if ( $? == 0 ) { + chop $LSBREL; + $rel = `$LSBREL -d`; + chop $rel; + ($REL = $rel) =~ s/Description:\s*//; + $REL =~ s/\s*$//; +}; + +# +## Use lscpu to extract CPU information +# +$LSCPU = `which lscpu`; +chop $LSCPU; +if ( $? != 0 ) { + print STDERR "ERROR: no lscpu found\n"; + exit(1); +} + +@CPU = `$LSCPU`; + +# print "@CPU\n"; +foreach (@CPU) { + if (/Model name:\s+(.*)/) { + $CPU{"cpu_id"} = $1; + } elsif (/^CPU\(s\):\s+(.*)/) { + $CPU{"cores"} = $1; + } elsif (/^Socket\(s\):\s+(.*)/) { + $CPU{"sockets"} = $1; + } elsif (/^Thread\(s\) per core:\s+(.*)/) { + $CPU{"threads_per_core"} = $1; + } elsif (/^CPU max MHz:\s+(.*)/) { + $CPU{"max_clock"} = $1; + } +} + + + +$MSG .= "{\n \"hostname\": \"$HOST\",\n"; +$MSG .= " \"OS\": \"$REL\",\n"; +foreach $k (keys %CPU) { + $MSG .= " \"$k\": \"$CPU{$k}\",\n"; +} +$MSG .= " \"GPUs\": [\n"; + +# +## Use nvidia-smi to extract details of NVIDIA GPUs +# +$NVS= `which nvidia-smi 2> /dev/null`; +if ( $? == 0 ) { + @NV = `nvidia-smi --query-gpu=name,driver_version,memory.total,compute_cap,clocks.current.graphics,clocks.current.memory,clocks.current.sm --format=csv,noheader`; + if ( $? != 0 ) { + @NV = `nvidia-smi --query-gpu=name,driver_version,memory.total,clocks.current.graphics,clocks.current.memory,clocks.current.sm --format=csv,noheader`; + foreach (@NV) { + chop $_; + ($GPU{"gpu_id"},$GPU{"driver"},$GPU{"memory"},$GPU{"memory_clock"},$GPU{"graphics_clock"},$GPU{"sm_clock"}) = split(",",$_); + + $MSG .= " {\n"; + foreach $k (sort keys %GPU) { + $v = $GPU{$k}; + $v =~ s/^\s*//; + $MSG .= " \"$k\": \"$v\",\n"; + } + $MSG .= " },\n"; + } + } else { + foreach (@NV) { + chop $_; + ($GPU{"gpu_id"},$GPU{"driver"},$GPU{"memory"},$GPU{"compute_capability"},$GPU{"memory_clock"},$GPU{"graphics_clock"},$GPU{"sm_clock"}) = split(",",$_); + + $MSG .= " {\n"; + foreach $k (sort keys %GPU) { + $v = $GPU{$k}; + $v =~ s/^\s*//; + $MSG .= " \"$k\": \"$v\",\n"; + } + $MSG .= " },\n"; + } + } +} else { +# print STDERR "no nvidia-smi found\n"; +} + +# +## Use rocm-smi to extract details of AMD GPUs +# +$ROC = `which rocm-smi 2> /dev/null`; +if ( $? == 0 ) { + chop $ROC; + @X = `$ROC | grep -v "========" | grep -v "GPU"`; + $N = $#X - 1; + foreach $i ( 1 .. $N ) { + $MSG .= " {\n"; + $n = `rocm-smi -d $i --showproductname | grep series`; + $n =~ /Card series:\s+(.*)$/; + $name = $1; + $d = `rocm-smi -d $i --showdriverversion | grep version`; + $d =~ /version:\s+(.*)$/; + $dv = $1; + $MSG .= " \"gpu_id\": \"$name\",\n"; + $MSG .= " \"driver\": \"$dv\",\n"; + + $fc = 0; + $mc = 0; + $sc = 0; + $so = 0; + + @Y = `$ROC -d $i -c`; + foreach ( @Y ) { + if (/fclk.*\((.*)\)/) { + $fc = $1; + } + if (/mclk.*\((.*)\)/) { + $mc = $1; + } + if (/sclk.*\((.*)\)/) { + $sc = $1; + } + if (/socclk.*\((.*)\)/) { + $so = $1; + } + } + + $MSG .= " \"fclk\": \"$fc\",\n"; + $MSG .= " \"mclk\": \"$mc\",\n"; + $MSG .= " \"sclk\": \"$sc\",\n"; + $MSG .= " \"socclk\": \"$so\",\n"; + + $MSG .= " },\n"; + } +} + +$MSG .= " ],\n"; +$MSG .= "}\n"; + +print $MSG; + +$digest = md5_hex($MSG); +$short = substr($digest, 0, 8); + +print "md5 sum: $digest $short\n";