diff --git a/.github/workflows/a100_profiler.yml b/.github/workflows/a100_profiler.yml new file mode 100644 index 0000000000..cc768f63b8 --- /dev/null +++ b/.github/workflows/a100_profiler.yml @@ -0,0 +1,40 @@ +name: A100 Performance Profiler + +on: + schedule: + - cron: '00 00 * * *' + +jobs: + sycl_A100_Profiling: + name: SYCL A100 Profiling + env: + SYCL_NAME_PREFIX: sycl_AMD-Epyc-7313_a100_gcc-11.3_cuda-12.0.1 + ENABLE_CI_PROFILER: 1 + + MADGRAPH4GPU_DB_URL: ${{ secrets.MADGRAPH4GPU_DB_URL }} + MADGRAPH4GPU_DB_SECRET: ${{ secrets.MADGRAPH4GPU_DB_SECRET }} + runs-on: [self-hosted, linux, a100] + steps: + - uses: actions/checkout@v2 + - name: Runs SYCL performanceProfiler.py script + run: cd tools/profiling/; + python3 performanceProfiler.py -l 'SYCL' -b 'master' + - name: Uploads SYCL JSON files to DB + run: cd tools/profiling/; python3 sendData.py --absLayer SYCL --profiler 1 --branch master + + cuda_a100_Profiling: + name: CUDA A100 Profiling + env: + CUDA_NAME_PREFIX: cudacpp_AMD-Epyc-7313_a100_gcc-11.2.1_cuda-12.0.1 + ENABLE_CI_PROFILER: 1 + + MADGRAPH4GPU_DB_URL: ${{ secrets.MADGRAPH4GPU_DB_URL }} + MADGRAPH4GPU_DB_SECRET: ${{ secrets.MADGRAPH4GPU_DB_SECRET }} + runs-on: [self-hosted, linux, a100] + steps: + - uses: actions/checkout@v2 + - name: Runs CUDA performanceProfiler.py script + run: cd tools/profiling/; + python3 performanceProfiler.py -l 'CUDA' -b 'master' + - name: Uploads CUDA JSON files to DB + run: cd tools/profiling/; python3 sendData.py --absLayer CUDA --profiler 1 --branch master \ No newline at end of file diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 34cff0be37..b38d44bd13 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -11,17 +11,26 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum , epoch1/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum , epoch2/cuda/ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum ] + folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum, + epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx, + epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg, + epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg, + epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg ] fail-fast: false steps: + - uses: actions/checkout@v2 - - name: make epoch1 + - name: make epochX run: make -C ${{ matrix.folder }} debug CPU: runs-on: ubuntu-latest strategy: matrix: - folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum , epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg ] + folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum, + epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx, + epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg, + epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg, + epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg ] precision: [ d , f , m ] fail-fast: false steps: @@ -38,7 +47,11 @@ jobs: FC: gfortran-11 strategy: matrix: - folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum, epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg ] + folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum, + epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx, + epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg, + epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg, + epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg ] precision: [ d , f , m ] fail-fast: false steps: @@ -50,20 +63,22 @@ jobs: - name: make check run: make AVX=none OMPFLAGS= FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} check GPU: - runs-on: self-hosted + runs-on: [self-hosted, linux, a100] env: CUDA_HOME: /usr/local/cuda/ - FC: gfortran REQUIRE_CUDA: 1 + FC: gfortran strategy: matrix: - folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum , epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg ] + folder: [ epochX/cudacpp/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum, + epochX/cudacpp/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx, + epochX/cudacpp/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg, + epochX/cudacpp/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg, + epochX/cudacpp/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg ] precision: [ d , f , m ] fail-fast: false steps: - uses: actions/checkout@v2 - - name: path - run: echo "PATH=$PATH" - name: make info run: make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} info - name: make diff --git a/.github/workflows/mi250x_profiler.yml b/.github/workflows/mi250x_profiler.yml new file mode 100644 index 0000000000..e5a97c7d31 --- /dev/null +++ b/.github/workflows/mi250x_profiler.yml @@ -0,0 +1,80 @@ +name: MI250X Performance Profiler + +on: + push: + branches: [ gpu_abstraction ] + +jobs: + Container_Setup_and_Execution: + runs-on: [self-hosted, linux, a100] + name: Container Setup and Execution + steps: + - name: Generate runner token + id: generate_token + run: | + TOKEN=$(curl -XPOST -fsSL \ + -H "Authorization: token ${{ secrets.PAT }}" \ + -H "Accept: application/vnd.github.v3+json" \ + "https://api.github.com/repos/${{ github.repository }}/actions/runners/registration-token" \ + | grep -o '"token": *"[^"]*"' | cut -d '"' -f 4) + echo "token=$TOKEN" >> $GITHUB_OUTPUT + - name: SSH and run Docker container + env: + SSH_PRIVATE_KEY: ${{ secrets.SSH_KEY }} + MI250X_PROFILING_HOST: ${{ secrets.MI250X_PROFILING_HOST }} + MI250X_PROFILING_USER: ${{ secrets.MI250X_PROFILING_USER }} + HPC_ACCOUNT: ${{ secrets.HPC_ACCOUNT }} + HPC_PROJECT: ${{ secrets.HPC_PROJECT }} + SINGULARITY_CACHEDIR: /scratch/$HPC_ACCOUNT/$MI250X_PROFILING_USER/ + SINGULARITY_TMPDIR: /scratch/$HPC_ACCOUNT/$MI250X_PROFILING_USER/ + continue-on-error: true + run: | + echo "$SSH_PRIVATE_KEY" > id_rsa + chmod 600 id_rsa + ssh -o StrictHostKeyChecking=no -i id_rsa $MI250X_PROFILING_USER@$MI250X_PROFILING_HOST "\ + cd /scratch/$HPC_ACCOUNT/$MI250X_PROFILING_USER/ && \ + singularity pull --force oras://ghcr.io/${{ github.repository_owner }}/github_runner_mi250x:latest && \ + srun --account=$HPC_ACCOUNT -p $HPC_PROJECT --gpus=1 --time=03:00:00 singularity run --rocm \ + --env GITHUB_TOKEN=${{ steps.generate_token.outputs.token }} \ + --env REPO_URL=https://github.com/${{ github.repository }} \ + --env RUNNER_NAME=github_runner_mi250x \ + --env GITHUB_RUNNER_TAGS='Linux,x64,mi250x' \ + --env RUNNER_URL=https://github.com/actions/runner/releases/download/v2.303.0/actions-runner-linux-x64-2.303.0.tar.gz \ + github_runner_mi250x_latest.sif" + + HIP_MI250X_Profiling: + runs-on: [self-hosted, linux, mi250x] + name: HIP MI250X Profiling + env: + HIP_NAME_PREFIX: hip_AMD-Epyc-7A53_MI250X_gcc-11.2.1_rocm-5.2.3 + ENABLE_CI_PROFILER: 1 + steps: + - uses: actions/checkout@v2 + - name: Runs HIP performanceProfiler.py script + run: cd tools/profiling/; + python3 performanceProfiler.py -l 'HIP' -b 'master' + + - name: Uploads workplace_mg4gpu directory as an artifact + uses: actions/upload-artifact@v3 + with: + name: profiling-results + path: tools/profiling/workplace_mg4gpu + + Upload_JSON_files: + needs: HIP_MI250X_Profiling + runs-on: [self-hosted, linux] + name: Upload JSON files to DB + env: + HIP_NAME_PREFIX: hip_AMD-Epyc-7A53_MI250X_gcc-11.2.1_rocm-5.2.3 + ENABLE_CI_PROFILER: 1 + MADGRAPH4GPU_DB_URL: ${{ secrets.MADGRAPH4GPU_DB_URL }} + MADGRAPH4GPU_DB_SECRET: ${{ secrets.MADGRAPH4GPU_DB_SECRET }} + steps: + - uses: actions/checkout@v2 + - name: Download artifact containing profiling data + uses: actions/download-artifact@v3 + with: + name: profiling-results + path: tools/profiling + - name: Uploads HIP JSON files to DB + run: cd tools/profiling; python3 sendData.py --absLayer HIP --profiler 1 --branch master \ No newline at end of file diff --git a/.github/workflows/sycl.yml b/.github/workflows/sycl.yml new file mode 100644 index 0000000000..3af91957f8 --- /dev/null +++ b/.github/workflows/sycl.yml @@ -0,0 +1,49 @@ +name: SYCL CI + +on: + push: + branches: [ master ] + paths: + - 'epochX/sycl/**' + pull_request: + branches: [ master ] + paths: + - 'epochX/sycl/**' + +jobs: + GPU: + runs-on: [self-hosted, linux, a100] + env: + FC: gfortran + REQUIRE_CUDA: 1 + SYCLFLAGS: -fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend --cuda-gpu-arch=sm_80 -Xclang -fdenormal-fp-math=ieee + ENABLE_CI_PROFILER: 1 + strategy: + matrix: + folder: [ epochX/sycl/ee_mumu.sa/SubProcesses/P1_Sigma_sm_epem_mupmum, + epochX/sycl/gg_tt.sa/SubProcesses/P1_Sigma_sm_gg_ttx, + epochX/sycl/gg_ttg.sa/SubProcesses/P1_Sigma_sm_gg_ttxg, + epochX/sycl/gg_ttgg.sa/SubProcesses/P1_Sigma_sm_gg_ttxgg, + epochX/sycl/gg_ttggg.sa/SubProcesses/P1_Sigma_sm_gg_ttxggg ] + precision: [ d , f ] + fail-fast: false + steps: + - uses: actions/checkout@v2 + - name: make info + run: source /cvmfs/sft.cern.ch/lcg/releases/gcc/11.3.0-ad0f5/x86_64-centos8/setup.sh; + source /cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2023/setvars.sh --include-intel-llvm; + CXX=/cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2023/compiler/2023.0.0/linux/bin-llvm/clang++; + LD_LIBRARY_PATH=${{ github.workspace }}/${{ matrix.folder }}/../../lib:$LD_LIBRARY_PATH; + make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} info + - name: make + run: source /cvmfs/sft.cern.ch/lcg/releases/gcc/11.3.0-ad0f5/x86_64-centos8/setup.sh; + source /cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2023/setvars.sh --include-intel-llvm; + CXX=/cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2023/compiler/2023.0.0/linux/bin-llvm/clang++; + LD_LIBRARY_PATH=${{ github.workspace }}/${{ matrix.folder }}/../../lib:$LD_LIBRARY_PATH; + make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} + - name: make check + run: source /cvmfs/sft.cern.ch/lcg/releases/gcc/11.3.0-ad0f5/x86_64-centos8/setup.sh; + source /cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2023/setvars.sh --include-intel-llvm; + CXX=/cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2023/compiler/2023.0.0/linux/bin-llvm/clang++; + LD_LIBRARY_PATH=${{ github.workspace }}/${{ matrix.folder }}/../../lib:$LD_LIBRARY_PATH; + make FPTYPE=${{ matrix.precision }} -C ${{ matrix.folder }} check \ No newline at end of file diff --git a/.github/workflows/v100s_profiler.yml b/.github/workflows/v100s_profiler.yml new file mode 100644 index 0000000000..1ca108660e --- /dev/null +++ b/.github/workflows/v100s_profiler.yml @@ -0,0 +1,41 @@ +name: V100s Performance Profiler + +on: + schedule: + - cron: '00 00 * * *' + +jobs: + + sycl_v100s_Profiling: + name: SYCL V100S Profiling + env: + SYCL_NAME_PREFIX: sycl_Xeon-Silver-4216_v100s_gcc-11.3_cuda-12.0.1 + ENABLE_CI_PROFILER: 1 + + MADGRAPH4GPU_DB_URL: ${{ secrets.MADGRAPH4GPU_DB_URL }} + MADGRAPH4GPU_DB_SECRET: ${{ secrets.MADGRAPH4GPU_DB_SECRET }} + runs-on: [self-hosted, linux, v100s] + steps: + - uses: actions/checkout@v2 + - name: Runs SYCL performanceProfiler.py script + run: cd tools/profiling/; + python3 performanceProfiler.py -l 'SYCL' -b 'master' + - name: Uploads SYCL JSON files to DB + run: cd tools/profiling/; python3 sendData.py --absLayer SYCL --profiler 1 --branch master + + cuda_v100s_Profiling: + name: CUDA V100S Profiling + env: + CUDA_NAME_PREFIX: cudacpp_Xeon-Silver-4216_v100s_gcc-11.3_cuda-12.0.1 + ENABLE_CI_PROFILER: 1 + + MADGRAPH4GPU_DB_URL: ${{ secrets.MADGRAPH4GPU_DB_URL }} + MADGRAPH4GPU_DB_SECRET: ${{ secrets.MADGRAPH4GPU_DB_SECRET }} + runs-on: [self-hosted, linux, v100s] + steps: + - uses: actions/checkout@v2 + - name: Runs CUDA performanceProfiler.py script + run: cd tools/profiling/; + python3 performanceProfiler.py -l 'CUDA' -b 'master' + - name: Uploads CUDA JSON files to DB + run: cd tools/profiling/; python3 sendData.py --absLayer CUDA --profiler 1 --branch master \ No newline at end of file diff --git a/tools/profiling/README.md b/tools/profiling/README.md new file mode 100644 index 0000000000..1a5251d93b --- /dev/null +++ b/tools/profiling/README.md @@ -0,0 +1,163 @@ +# Documentation + +We are currently using [GitHub Actions](https://docs.github.com/en/actions) in conjunction with onsite self-hosted [GitHub Runners](https://docs.github.com/en/actions/hosting-your-own-runners/about-self-hosted-runners) to automate compiling/testing and performance profiling tasks in SYCL and CUDA on A100 and V100s GPUs currently. + +## Grafana link: [madgraph4gpu-db.web.cern.ch](https://madgraph4gpu-db.web.cern.ch/) + +## Performance Profiling + +### Profiling baseline currently used + +**GCC - 11.3.0** + +**CUDA - 12.0.1** + +**Clang - 16** + +### GitHub Actions Runner + +A [GitHub Runner](https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners) is a tool that allows users to automate their workflow by running [actions](https://docs.github.com/en/actions) or tasks in response to specific events on GitHub. This can include tasks such as running tests, building and deploying code, or publishing artifacts. They can be easily configured and managed through the GitHub website, and can help users streamline their development process and ensure that their code is always up-to-date and ready for deployment. In our case we use them to automate CI and nightly performance profiling. + +### performanceProfiler.py + +This is the main entrypoint for the profiler. It executes the two bash build scripts for SYCL ```buildSYCLProcess.sh``` and CUDA ```buildCUDAProcess.sh``` with the correct ThreadsPerBlock, BlocksPerThread and iteration count. + +#### Usage: + +Go to the `tools/profiling` directory and run: + +``` +python3 performanceProfiler.py -l -b +``` + +The following options are available for this script: + +`-l`: This option specifies the abstraction layer to use for profiling. The supported values are "SYCL" and "CUDA". The default value is "SYCL". + +`-b`: This option specifies the branch of the madgraph4gpu repository that will be used. The default value is "master". + +Example: + +Copy code +python script.py +To run the script with a different abstraction layer and branch, you can use the following command: + +``` +python script.py -l CUDA -b my_branch +``` + +### buildSYCLProcess.sh + +This bash script compiles and executes standalone physics processes using the MadGraph5_aMC@NLO GPU development framework with oneAPI/SYCL. + +#### Usage + +Go to the `tools/profiling` directory and run: + +``` +./buildSYCLProcess.sh -n -b -t -i [-r ] [-d ] +``` + +#### Arguments: + +* `-n`: Name of the physics process being built and run (e.g., gg_ttgg). + +* `-b`: Number of blocks per grid. + +* `-t`: Number of threads per block. + +* `-i`: Number of iterations. + +* `-r`: (Optional) Branch name. Default: not displayed in the report folder prefix. + +* `-d`: (Optional) Flag for setting the device ID. Default: "--device_id 2" for oneAPI toolkit runs on GPUs, otherwise "--device_id 0" for LLVM DPCPP compiler. You can also use `-d info` to get the specific device IDs for that host. + +#### Example: + +``` +./buildSYCLProcess.sh -n gg_ttgg -b 1024 -t 128 -i 10 -r master -d 2 +``` + +**Note**: + +To also compile to CPUs you need to enable more backends in the DPCPP toolchain (Currently when you follow how to use the LLVM DPCPP compiler for CUDA it does not install the necessary dependencies to see other devices as well on the host). You can read more on how to enable more backends [here](https://intel.github.io/llvm-docs/GetStartedGuide.html#build-dpc-toolchain). + +### buildCUDAProcess.sh + +This script compiles and executes physics processes using the MadGraph5_aMC@NLO GPU development framework with CUDA. + +#### Usage + +Go to the `tools/profiling` directory and run: + +``` +./buildCUDAProcess.sh -n -b -t -i -r -m +``` + +#### Arguments: + +* `-n`: Name of the physics process being built and run. + +* `-b`: Number of blocks per grid. + +* `-t`: Number of threads per block. + +* `-i`: Number of iterations. + +* `-r`: Branch name. + +* `-m`: Makefile arguments. + +#### Example: + +``` +./buildCUDAProcess.sh -n gg_ttgg -b 1024 -t 128 -i 10 -r master -m avx2 +``` + +#### Notes + +This script assumes that it is run from the profiling directory in the repository. +Make sure to set the correct CUDA path according to your system. +You may need to modify the script to set the correct GPU architecture or compiler options depending on your system. + +### sendData.py + +#### Usage: + +Go to the `tools/profiling` directory and run: + +``` +python3 sendData.py -r -branch +``` + +The following arguments are available for this script: + +* `-r` or `--reportPath```: This argument specifies the path for the reports that will be sent to the database. + +* `-f` or `--fields`: This argument specifies the fields in the JSON data that will be sent to the database. The default value is `['EvtsPerSec[MatrixElems] (3)', 'EvtsPerSec[MECalcOnly] (3)']`. + +* `-b` or `--branch`: This argument specifies the branch that the profiler data is in. The default value is `master`. + +* `-p` or `--profiler`: This argument enables CI profiling defaults. The default value is `False`. + +For example, to run the script with the default arguments, you can use the following command: + +python3 sendData.py +To run the script with a custom report path and branch, you can use the following command: + +python3 sendData.py -r /path/to/reports -b my_branch +Note that some options may not be relevant or may not work as expected in certain situations. For example, the -p option will only work when CI profiling defaults are enabled. + +## Known issues: + +### Bug in GCC 11.3.0/11.3.1 using the LLVM DPCPP compiler + +There is a [bug](https://bugs.gentoo.org/842405) affecting GCC versions 11.3.0/11.3.1 when compiling the standalone physics processes resulting in two compilation errors `.../fs_path.h:1209:9: error: 'end' is missing exception specification 'noexcept'` and `.../fs_path.h:1217:9: error: 'end' is missing exception specification 'noexcept'`` in the `fs_path.h` file. GCC version 11.2.0 is not affected, and appears to be fixed in later versions (Remains to be tested and cited). + +### libmg5amc_common.so: cannot open shared object file: No such file or directory + +The libmg5amc_common.so library is not set in the LD_LIBRARY_PATH + +### Not linking correctly/Wrong linker version from what you intend to compile with? + +If you have problems with wrong linker see which candidate GCC finds with `./sycl_workspace/llvm/build/bin/clang++ -v` and see if it is the correct GCC candidate. If it is not, you can correct this with adding `--gcc-toolchain=/cvmfs/sft.cern.ch/lcg/releases/gcc/11.3.0-ad0f5/x86_64-centos8/lib/gcc/x86_64-pc-linux-gnu/11.3.0` to the `CXXFLAGS`. This will correctly set the GCC candidate to the desired GCC installation. Using `ENABLE_CI_PROFILER=1` automatically adds this in all the standalone physics processes makefiles in SYCL and in CUDA. diff --git a/tools/profiling/buildCUDAProcess.sh b/tools/profiling/buildCUDAProcess.sh new file mode 100755 index 0000000000..0923aca9ab --- /dev/null +++ b/tools/profiling/buildCUDAProcess.sh @@ -0,0 +1,135 @@ +#!/bin/bash + +# +# __ __ _ ____ _ _ _ ____ ____ _ _ +# | \/ | __ _ __| | / ___| _ __ __ _ _ __ | |__ | || | / ___| | _ \ | | | | +# | |\/| | / _` | / _` | | | _ | '__| / _` | | '_ \ | '_ \ | || |_ | | _ | |_) | | | | | +# | | | | | (_| | | (_| | | |_| | | | | (_| | | |_) | | | | | |__ _| | |_| | | __/ | |_| | +# |_| |_| \__,_| \__,_| \____| |_| \__,_| | .__/ |_| |_| |_| \____| |_| \___/ +# |_| +# +# +# Bash script for compiling and executing physics processes using the MadGraph5_aMC@NLO GPU development framework +# using CUDA/HIP +# +# Author: Jorgen Teig, CERN 2023 +# + +helpFunction() +{ + echo "" + echo "Usage: $0 -n gg_ttgg -b 1024 -t 128 -i 10" + echo -e "\t-n Name of the physics process being built and run" + echo -e "\t-b Blocks per grid" + echo -e "\t-t Threads per block" + echo -e "\t-i Iterations" + echo -e "\t-r Branch" + echo -e "\t-m Makefile arguments" + exit 1 # Exit script after printing help +} + +while getopts "n:b:t:i:r:m:a:" opt +do + case "$opt" in + n ) MG_PROC="$OPTARG" ;; #process to target + b ) blocksPerGrid="$OPTARG" ;; + t ) threadsPerBlock="$OPTARG" ;; + i ) iterations="$OPTARG" ;; + r ) branch="$OPTARG" ;; + m ) makeArgs="$OPTARG" ;; + ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent + esac +done + +# Print helpFunction in case parameters are empty +if [ -z "${MG_PROC}" ] || [ -z "${blocksPerGrid}" ] || [ -z "${threadsPerBlock}" ] || [ -z "${iterations}" ] +then + echo "Some or all of the parameters are empty"; + helpFunction +fi + +# Begin script in case all parameters are correct + +# Added check if the CUDA_NAME_PREFIX/HIP_NAME_PREFIX variable are not set +if [ -z "$CUDA_NAME_PREFIX" -o -z "$HIP_NAME_PREFIX" ]; then + echo "WARNING: CUDA_NAME_PREFIX/HIP_NAME_PREFIX is not set. Cannot append system info to JSON file names!" +fi + +################################################################## + +# Set variables for later use + +# CUDA +# Check if CUDA_HOME has not been set from the outside, usefull in CI/CD +if [[ -z "$CUDA_HOME" ]]; then + COMPILER=$(which nvcc 2>/dev/null) + while [ -L "$COMPILER" ]; do + COMPILER=$(readlink "$COMPILER") + done + export COMPILER_PATH=$COMPILER + + if [[ "$COMPILER_PATH" ]]; then + export CUDA_HOME=$(dirname $(dirname $COMPILER_PATH)) + export PATH=$CUDA_HOME${PATH:+:${PATH}} + fi +fi + +# HIP +# Check if HIP_HOME has not been set from the outside, usefull in CI/CD +if [[ -z "$HIP_HOME" ]]; then + COMPILER=$(which hipcc 2>/dev/null) + while [ -L "$COMPILER" ]; do + COMPILER=$(readlink "$COMPILER") + done + export COMPILER_PATH=$COMPILER + + if [[ "$COMPILER_PATH" ]]; then + export HIP_HOME=$(dirname $(dirname $COMPILER_PATH)) + export PATH=$HIP_HOME${PATH:+:${PATH}} + fi +fi + +# Prefix for saving the JSON files in workspace folder in the tools/profiling directory +prefix="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" + +export USEBUILDDIR=1 +export NTPBMAX=1024 +export CXX=`which g++` +export FC=`which gfortran` + +export MG_EXE="./gcheck.exe" #GPU +#export MG_EXE="./check.exe" #CPU + +export WORKSPACE=$prefix/workspace_mg4gpu + +REPORT_FOLDER="${WORKSPACE}/$(date +"%y-%m-%d")_${CUDA_NAME_PREFIX}_${branch}" + +mkdir $WORKSPACE 2>/dev/null; true +mkdir $REPORT_FOLDER 2>/dev/null; true + +export MG_PROC_DIR=$prefix/../../epochX/cudacpp/$MG_PROC +export MG_SP_DIR=$MG_PROC_DIR/SubProcesses/P1_* +export MG5AMC_CARD_PATH=$MG_PROC_DIR/Cards + +# Build executable + +cd $MG_SP_DIR +make -j $makeArgs + +# Run executable + +cd build.${makeArgs:3}* +mkdir -p perf/data/ 2>/dev/null; true +$MG_EXE -j $blocksPerGrid $threadsPerBlock $iterations + +echo "${MG_EXE} -j ${blocksPerGrid} ${threadsPerBlock} ${iterations}" + +cd perf/data/ + +if [ -n "$CUDA_NAME_PREFIX" ]; then + mv 0-perf-test-run0.json "${REPORT_FOLDER}/test_${MG_PROC}_${CUDA_NAME_PREFIX}_${blocksPerGrid}_${threadsPerBlock}_${iterations}.json" +elif [ -n "$HIP_NAME_PREFIX" ]; then + mv 0-perf-test-run0.json "${REPORT_FOLDER}/test_${MG_PROC}_${HIP_NAME_PREFIX}_${blocksPerGrid}_${threadsPerBlock}_${iterations}.json" +else + mv 0-perf-test-run0.json "${REPORT_FOLDER}/test_${MG_PROC}_undefined_${blocksPerGrid}_${threadsPerBlock}_${iterations}.json" +fi \ No newline at end of file diff --git a/tools/profiling/buildSYCLProcess.sh b/tools/profiling/buildSYCLProcess.sh new file mode 100755 index 0000000000..efdfd66c71 --- /dev/null +++ b/tools/profiling/buildSYCLProcess.sh @@ -0,0 +1,154 @@ +#!/bin/bash + +# +# __ __ _ ____ _ _ _ ____ ____ _ _ +# | \/ | __ _ __| | / ___| _ __ __ _ _ __ | |__ | || | / ___| | _ \ | | | | +# | |\/| | / _` | / _` | | | _ | '__| / _` | | '_ \ | '_ \ | || |_ | | _ | |_) | | | | | +# | | | | | (_| | | (_| | | |_| | | | | (_| | | |_) | | | | | |__ _| | |_| | | __/ | |_| | +# |_| |_| \__,_| \__,_| \____| |_| \__,_| | .__/ |_| |_| |_| \____| |_| \___/ +# |_| +# +# +# Bash script for compiling and executing physics processes using the MadGraph5_aMC@NLO GPU development framework +# using oneAPI/SYCL +# +# Author: Jorgen Teig, CERN 2023 +# + +helpFunction() +{ + echo "" + echo "Usage: $0 -n gg_ttgg -b 1024 -t 128 -i 10" + echo -e "\t-n Name of the physics process being built and run" + echo -e "\t-b Blocks per grid" + echo -e "\t-t Threads per block" + echo -e "\t-i Iterations" + echo -e "\t-r Branch" + echo -e "\t-d Flag for setting device id" + exit 1 # Exit script after printing help +} + +while getopts "n:b:t:i:r:d:" opt +do + case "$opt" in + n ) MG_PROC="$OPTARG" ;; #process to target + b ) blocksPerGrid="$OPTARG" ;; + t ) threadsPerBlock="$OPTARG" ;; + i ) iterations="$OPTARG" ;; + r ) branch="$OPTARG" ;; + d ) DEVICE_ID="$OPTARG" ;; + ? ) helpFunction ;; # Print helpFunction in case parameter is non-existent + esac +done + +# Print helpFunction in case parameters are empty +if [ -z "${MG_PROC}" ] || [ -z "${blocksPerGrid}" ] || [ -z "${threadsPerBlock}" ] || [ -z "${iterations}" ] +then + echo "Some or all of the parameters are empty"; + helpFunction +fi + +# Added check if the SYCL_NAME_PREFIX variable are not set +if [ -z "$SYCL_NAME_PREFIX" ]; then + echo "WARNING: SYCL_NAME_PREFIX is not set. Cannot append system info to JSON file names!" +fi + +################################################################## + +# Assign correct SM level for NVIDIA GPUs + +# Check if nvidia-smi command exists +if command -v nvidia-smi > /dev/null 2>&1; then + + # Get the name of the GPU + GPU_NAME=$(nvidia-smi --query-gpu=name --format=csv,noheader) + + # GPU (DEVICE_ID=2 for oneAPI toolkit runs on GPUs, else DEVICE_ID=0 with LLVM compiler) + export DEVICE_ID=2 + # CPU + #export DEVICE_ID=1 +else + echo "nvidia-smi non existent on system, Nvidia GPU possibly not present!" + exit +fi + +case $GPU_NAME in + *V100S* ) export SM_LEVEL="sm_70" ;; + *A100* ) export SM_LEVEL="sm_80" ;; +esac + +################################################################## + +# Begin script in case all parameters and GPU specific settings are set + +################################################################## + +# Set variables for later use + +# Assumes that this is run from profiling directory in the repo +prefix=$(pwd) + +export USEBUILDDIR=1 +export NTPBMAX=1024 +export CUDA_PATH=/usr/local/cuda-12.0/ +export WORKSPACE=$prefix/workspace_mg4gpu + +export CXTYPE="thrust" + +# Old SYCLFLAGS +# export SYCLFLAGS="-fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xsycl-target-backend '--cuda-gpu-arch=$SM_LEVEL' -fgpu-rdc --cuda-path=$CUDA_PATH" + +export SYCLFLAGS="-fsycl -fsycl-targets=nvptx64-nvidia-cuda -Xcuda-ptxas --maxrregcount=255 -Xcuda-ptxas --verbose -Xsycl-target-backend --cuda-gpu-arch=$SM_LEVEL" + +# Compilation using OneAPI Toolkit through CVMFS +#export CXX=/cvmfs/projects.cern.ch/intelsw/oneAPI/linux/x86_64/2023/compiler/2023.0.0/linux/bin-llvm/clang++ + +# Compilation with LLVM DPC++ compiler +export DPCPP_HOME=/afs/cern.ch/work/j/jteig/sycl_workspace +export CXX=$DPCPP_HOME/llvm/llvm-20230418-fea99cc9ad67-gcc-11.2.1-cuda-12.0/bin/clang++ + +# Sets CUDA in PATH +export PATH=$CUDA_HOME:$PATH + +# Branch should be enviroment variable in main script and then passed down if none then it is not displayed in prefix +REPORT_FOLDER="${WORKSPACE}/$(date +"%y-%m-%d")_${SYCL_NAME_PREFIX}_${branch}" + +mkdir -p $WORKSPACE/mg4gpu/lib 2>/dev/null; true +mkdir -p $WORKSPACE/mg4gpu/bin 2>/dev/null; true +mkdir $REPORT_FOLDER 2>/dev/null; true + +export MG4GPU_LIB=$WORKSPACE/mg4gpu/lib +export MG4GPU_BIN=$WORKSPACE/mg4gpu/bin + +export MG_PROC_DIR=$prefix/../../epochX/sycl/$MG_PROC +export MG_SP_DIR=$MG_PROC_DIR/SubProcesses/P1_* + +export MG_LIBS_DIR="${MG4GPU_LIB}/build_${MG_PROC}_${SYCL_NAME_PREFIX}" + +if [[ -z "${DPCPP_HOME}" ]]; then + export MG_LIBS="$MG_LIBS_DIR" +else + export MG_LIBS="$DPCPP_HOME/llvm/build/lib:$MG_LIBS_DIR" +fi + +export MG_EXE_DIR="${MG4GPU_BIN}/build_${MG_PROC}_${SYCL_NAME_PREFIX}" +export MG_EXE="$MG_EXE_DIR/check.exe" +export MG5AMC_CARD_PATH=$MG_PROC_DIR/Cards + +# Build executable +cd $MG_SP_DIR +make -j build.d_inl0_hrd1/check.exe +mv -f ../../lib/build.*/ $MG_LIBS_DIR #2>/dev/null; true +mv -f build.*/ $MG_EXE_DIR + +# Run executable +cd $WORKSPACE + +if [ $DEVICE_ID == "info" ]; then + # Add MG Libs to linker library path and display the devices + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$MG_LIBS $MG_EXE --param_card $MG5AMC_CARD_PATH/param_card.dat --device_info 32 32 10 + +else + # Add MG Libs to linker library path and run the executable + LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$MG_LIBS $MG_EXE -j --json_file ${REPORT_FOLDER}/test_${MG_PROC}_${SYCL_NAME_PREFIX}_${blocksPerGrid}_${threadsPerBlock}_${iterations}.json --param_card $MG5AMC_CARD_PATH/param_card.dat --device_id $DEVICE_ID $blocksPerGrid $threadsPerBlock $iterations +fi diff --git a/tools/profiling/evaluation.py b/tools/profiling/evaluation.py index a3c7dfc8c1..a1fa06490b 100755 --- a/tools/profiling/evaluation.py +++ b/tools/profiling/evaluation.py @@ -4,6 +4,7 @@ Created on Tue Mar 30 09:59:03 2021 @author: andy +@edited: Jorgen Teig """ import json import os @@ -14,13 +15,65 @@ #from matplotlib.offsetbox import TextArea, DrawingArea, OffsetImage, AnnotationBbox import seaborn as sns import configparser +import argparse import re import math +############################# +# # +# Argument defaults # +# # +############################# +physicsProcesses = ['ee_mumu', 'gg_tt', 'gg_ttg', 'gg_ttgg', 'gg_ttggg'] +reportPath = 'C:\\Users\\jteig\\cernbox\\Documents\\Report folder 2023\\Merged_23-02-07' + +savePath = 'C:\\Users\\jteig\\cernbox\\Documents\\Report folder 2023\\Graphs\\Graphs but big\\' + +filePrefix = 'test_A100_sycl_11.5' + +# 'test_v100s_sycl_11.5' + +hardware = 'Nvidia A100' +#hardware = 'NVIDIA v100s' + +############################# +# +# Compare graphs +# +############################# + +compare = True + +processToCompare = 'gg_ttgg.mad' + +graphsToCompare = ['test_' + processToCompare , 'test_A100_CUDA_' + processToCompare] + +stat = 'MECalcOnly' +#stat = 'MatrixElems' + +############################# + +parser = argparse.ArgumentParser(description='A program for profiling GPUs using MadGraph.') + +parser.add_argument("-p", help="Physic process used for making the graphs.", default=physicsProcesses[0], choices=physicsProcesses) +parser.add_argument("-r", help="Path for the directory containing the reports.", default=reportPath) +parser.add_argument("-s", help="Path for the directory where the graphs will be saved.", default=savePath) +parser.add_argument("-n", help="The prefix in the name of the files of the reports e.g test_v100s_sycl-11.5.", default=filePrefix) +parser.add_argument("-c", help="Option for comparing graphs instead of plotting them.", default=compare) +parser.add_argument("-d", help="What device/hardware has been used in the profiling, used as a descriptor in the plots", default=hardware) +parser.add_argument("-g", help="Graphs to use with the compare option.") + +args = parser.parse_args() + +#exit(0) class Evaluation: + + # Remove warnings regarding chained assignment using pandas dataframes + # The code is still working as expected + pd.set_option('mode.chained_assignment', None) list_results=[] #List results Data=pd.DataFrame() #To store all results in one DataFrame @@ -44,7 +97,7 @@ def load_df(self,path): listfolders = os.listdir() for datafolder in listfolders: - os.chdir(path+'/'+datafolder) #Jump in datafolder + os.chdir(path+'\\'+datafolder) #Jump in datafolder df_dict[datafolder]=pd.DataFrame() Data=pd.DataFrame() list_results =[] @@ -123,10 +176,10 @@ def convertunits_2(self): temp_df =pd.DataFrame() temp_df = dataframes[df][['NumIterations','NumThreadsPerBlock', 'NumBlocksPerGrid', 'EvtsPerSec[MatrixElems] (3)','EvtsPerSec[Rnd+Rmb+ME](123)', - 'EvtsPerSec[Rmb+ME] (23)']] + 'EvtsPerSec[Rmb+ME] (23)', 'EvtsPerSec[MECalcOnly] (3)']] columns_to_convert = ['EvtsPerSec[MatrixElems] (3)','EvtsPerSec[Rnd+Rmb+ME](123)', - 'EvtsPerSec[Rmb+ME] (23)'] + 'EvtsPerSec[Rmb+ME] (23)', 'EvtsPerSec[MECalcOnly] (3)'] for column in columns_to_convert: for val in range(len(temp_df[column])): @@ -156,8 +209,9 @@ def plots(self,df,plotlist): ax.spines['top'].set_visible(False) #enable grid - plt.rcParams['grid.linestyle']=':' - ax.yaxis.grid() + #plt.rcParams['grid.linestyle']=':' + #ax.yaxis.grid() + plt.grid(which='both',axis = 'y') #setup x-axis ax.set_xscale('log') @@ -175,7 +229,9 @@ def plots(self,df,plotlist): #Labels and titel plt.xlabel('Gridsize',fontsize=15) plt.ylabel('Troughput\n'+yaxis,fontsize=13.5) - plt.title(yaxis,fontsize=15) + + + plt.title('SYCL (GCC 11.3) on ATS-P',fontsize=15) # plt.ylabel(yaxis,fontsize=30) # plt.xlabel('NumThreadsPerBlock*NumBlocksPerGrid',fontsize=30) @@ -206,7 +262,13 @@ def plots(self,df,plotlist): #plt.rcParams['legend.title_fontsize']='large' #plt.text(16400, 250000, 'Here we have space for some\nfurther information like:\n\nCuda\nepoch2\ngg_ttgg',fontsize=25) plt.show() - fig.savefig('/home/andy/cernbox/data/raw/data'+'epoch2_ee_mumu_gcheck_float'+yaxis) + + # Adjusts labels to fit + plt.tight_layout() + plt.autoscale() + + # Savepath and physics process set by arguments + fig.savefig(args.s + args.p + '_' + yaxis) def data_compare(self,df_dict,compare_list,stat): #This function takes the dictinary of data frames and plots the selected df from the list @@ -227,8 +289,8 @@ def data_compare(self,df_dict,compare_list,stat): if temp_df.empty: pass else: - df_to_be_plotted=df_to_be_plotted.append(temp_df[(temp_df['EvtsPerSec[MatrixElems] (3)'] - ==eval(stat)(temp_df['EvtsPerSec[MatrixElems] (3)']))]) + df_to_be_plotted = pd.concat([df_to_be_plotted, temp_df[(temp_df['EvtsPerSec[MatrixElems] (3)'] + ==eval(stat)(temp_df['EvtsPerSec[MatrixElems] (3)']))]]) df_to_be_plotted=df_to_be_plotted.astype({'gridsize':int}) @@ -256,6 +318,12 @@ def data_compare(self,df_dict,compare_list,stat): def data_compare2(self,df_dict,compare_list): + + # Get names of files to compare + + graph1 = graphsToCompare[0].split('_') + graph2 = graphsToCompare[1].split('_') + #Takes a dictionary with dataframes and plots it in the same scatter plot fig = plt.figure() @@ -269,54 +337,85 @@ def data_compare2(self,df_dict,compare_list): #enable grid plt.rcParams['grid.linestyle']=':' + plt.rc('font', size=15) + plt.rc('axes', labelsize=50) plt.grid() #setup x axis ax1.set_xscale('log') - plt.xticks(df_dict[list(df_dict.keys())[0]]['gridsize']) + plt.xticks(df_dict[list(df_dict.keys())[0]]['gridsize'],size=15) ax1.set_xticklabels(df_dict[list(df_dict.keys())[0]]['gridsize'],rotation=75) #setup y axis #get maximum value of all df for ylim - max_y = [max(df_dict[df]['EvtsPerSec[MatrixElems] (3)']) for df in df_dict] + #max_y = max(df_dict[compare_list[0]]['EvtsPerSec[MatrixElems] (3)'], df_dict[compare_list[1]]['EvtsPerSec[MatrixElems] (3)']) + + #print(max_y) + + #min_y = [min(df_dict[df]['EvtsPerSec[MatrixElems] (3)']) for df in df_dict] + #plt.ylim(-0.1*10**9,max(max_y)*1.3) - plt.ylim(10**5,max(max_y)*10) + #plt.ylim(min(min_y),max(max_y)*10) ax1.set_yscale('log') #Add labels and title - plt.ylabel('Throughput\nMatrix Elements [s-1]') - plt.xlabel('Gridsize') - plt.title('Cuda throughput for ee_mumu on NVIDIA T4\n') + plt.ylabel('Throughput\n'+ stat +' [s-1]', size=30) + plt.xlabel('Gridsize (nBlocksGPU * nThreadsGPU)', size=30) + plt.title("SYCL vs CUDA throughput for "+ graph1[3] + '_' + graph1[4] +" on " + hardware + "\n", size=30,wrap=True) #Change colormap. More info here https://matplotlib.org/stable/tutorials/colors/colormaps.html cmap=plt.get_cmap('Set1') - i=1 + i=2 for data in compare_list: + + tempVar = 'EvtsPerSec['+ stat +'] (3)' + #Get maximum values for each dataset - maxima_y=max(df_dict[data]['EvtsPerSec[MatrixElems] (3)']) - maxima_x=df_dict[data].loc[df_dict[data]['EvtsPerSec[MatrixElems] (3)']==maxima_y,'gridsize'].item() + maxima_y=max(df_dict[data][tempVar]) + maxima_x=df_dict[data].loc[df_dict[data][tempVar]==maxima_y,'gridsize'].item() #label maximum values length=len(str(maxima_y))-1 label_maximas=str(round(maxima_y*10**-(length),3))+'e'+str(length) + if i == 2: + markerType='o' + else: + markerType='X' + #plot datasets - ax1.scatter(df_dict[data]['gridsize'].to_list(),df_dict[data]['EvtsPerSec[MatrixElems] (3)'].to_list(), + ax1.scatter(df_dict[data]['gridsize'].to_list(),df_dict[data][tempVar].to_list(), label=data+ ' (max = %s)'%label_maximas, color=cmap(i), - s=150,alpha=0.9) + s=150,alpha=0.9, marker=markerType) + + ax1.plot(df_dict[data]['gridsize'].to_list(),df_dict[data][tempVar].to_list(), color=cmap(i)) + #Get next cmap color - i+=1 + i+=2 #plot max values ax1.scatter(maxima_x,maxima_y,c='r',marker='o',s=50) - ax1.legend(loc='upper left') + ax1.legend(loc='best') + + + plt.autoscale() + plt.tight_layout() + plt.show() + + graph1 = graphsToCompare[0].split('_') + + graph2 = graphsToCompare[1].split('_') + + # args.s + graph1[3] + '_' + graph1[4] + '_vs_' + graph2[3] + '_' + graph2[4] + + fig.savefig(args.s + 'SYCL_' + graph1[3] + '_' + graph1[4] + '_vs_CUDA_' + graph2[3] + '_' + graph2[4] + '_' + stat +'.png', bbox_inches="tight") def dataframes_statistical_transfomation(self,df_dict,stat): #This functions takes a dictionary of dataframes and returns a dictionary with dataframes @@ -334,8 +433,8 @@ def dataframes_statistical_transfomation(self,df_dict,stat): if temp_df.empty: pass else: - df_dict_to_return[df]=df_dict_to_return[df].append(temp_df[(temp_df['EvtsPerSec[MatrixElems] (3)'] - ==eval(stat)(temp_df['EvtsPerSec[MatrixElems] (3)']))]) + df_dict_to_return[df]=pd.concat([df_dict_to_return[df], temp_df[(temp_df['EvtsPerSec[MatrixElems] (3)'] + ==eval(stat)(temp_df['EvtsPerSec[MatrixElems] (3)']))]]) df_dict_to_return[df]=df_dict_to_return[df].astype({'gridsize':int}) return df_dict_to_return @@ -368,28 +467,34 @@ def color(self,value): Ev.readConfig() #logo=mpimg.imread('/home/andy/cernbox/Madgraph/profiler/Logo/Logo_CERN.png') #imagebox=OffsetImage(logo) - path='/home/andy/cernbox/data/Andrea' + + # Gets directory containing the reports from -r argument + path = args.r + dataframes=Ev.load_df(path) #returns a directory that contains df for all data given in the path plotlist= [item for item in Ev.plot_confi['plots']if Ev.plot_confi['plots'][item] == 'on'] - - dataframes_conv=Ev.convertunits_2() #returns a df directory with converted units - dataframes_statisical=Ev.dataframes_statistical_transfomation(dataframes_conv,'max') - - ''' - Ev.plots(dataframes_conv['gcheck.exe_epoch1_cuda_ee_mumu_double'],plotlist) - ''' - #max(df_adj_units['EvtsPerSec[MatrixElems] (3)']) - # To be done - list_to_compare=['gcheck.exe_epoch1_cuda_ee_mumu_float','gcheck.exe_epoch1_cuda_ee_mumu_double'] - #test_df=Ev.data_compare(dataframes_conv,list_to_compare,'max') - Ev.data_compare2(dataframes_statisical,list_to_compare) + if not compare: + + print(dataframes_conv) + + # Plots the graphs in the supplied directories with the info from the config file + Ev.plots(dataframes_conv[args.n + '_' + args.p],plotlist) - dataframes_statisical[list(dataframes_statisical.keys())[0]] - dataframes_statisical[list(dataframes_statisical.keys())[0]]['gridsize'] - dataframes_statisical['gcheck.exe_epoch1_cuda_ee_mumu_float'].dtypes + else: + # Compare graphs + dataframes_statisical=Ev.dataframes_statistical_transfomation(dataframes_conv,'max') + + #max(df_adj_units['EvtsPerSec[MatrixElems] (3)']) + # To be done + #test_df=Ev.data_compare(dataframes_conv,list_to_compare,'max') + print(dataframes_statisical) + + Ev.data_compare2(dataframes_statisical,graphsToCompare) - \ No newline at end of file + #dataframes_statisical[list(dataframes_statisical.keys())[0]] + #dataframes_statisical[list(dataframes_statisical.keys())[0]]['gridsize'] + #dataframes_statisical['check.exe_epochx_cuda_ee_mumu_float'].dtypes \ No newline at end of file diff --git a/tools/profiling/performanceProfiler.py b/tools/profiling/performanceProfiler.py new file mode 100644 index 0000000000..f1b5fcf33f --- /dev/null +++ b/tools/profiling/performanceProfiler.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# +# __ __ _ ____ _ _ _ ____ ____ _ _ +# | \/ | __ _ __| | / ___| _ __ __ _ _ __ | |__ | || | / ___| | _ \ | | | | +# | |\/| | / _` | / _` | | | _ | '__| / _` | | '_ \ | '_ \ | || |_ | | _ | |_) | | | | | +# | | | | | (_| | | (_| | | |_| | | | | (_| | | |_) | | | | | |__ _| | |_| | | __/ | |_| | +# |_| |_| \__,_| \__,_| \____| |_| \__,_| | .__/ |_| |_| |_| \____| |_| \___/ +# |_| +# +# +# Python script for performance profiling using the MadGraph5_aMC@NLO GPU development framework +# +# Author: Jorgen Teig, CERN 2023 +# + +import sys +import subprocess +import datetime +import argparse + +# Parser arguments defaults +ABS_LAYER = "SYCL" +BRANCH = "master" + +# Physics processes defaults +MG_PROCESSES_SA = ["ee_mumu.sa", "gg_tt.sa", "gg_ttg.sa", "gg_ttgg.sa", "gg_ttggg.sa"] + +DOUBLE_PRECISION_CONSTANT = 2560 +ITERATIONS = 10 +THREADS_PER_BLOCK = [256] +#THREADS_PER_BLOCK = [32, 64, 128, 256] +BLOCKS_PER_GRID = [16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384] + +# Parser +parser = argparse.ArgumentParser(description='A program for profiling GPUs using MadGraph.') + +parser.add_argument("-l", help="Choose which abstraction layer you want to use (CUDA/SYCL).", default=ABS_LAYER) +parser.add_argument("-p", help="Choose which processes you want to profile.", default=MG_PROCESSES_SA, nargs='+') +parser.add_argument("-b", help="Choose which branch the madgraph4gpu repo is in.", default=BRANCH) + +pyArgs = parser.parse_args() + +# How many runs in total the program made +count = 0 + +for process in pyArgs.p: + for TPB in THREADS_PER_BLOCK: + for BPG in BLOCKS_PER_GRID: + if TPB * BPG > DOUBLE_PRECISION_CONSTANT: + + if pyArgs.l.upper() == 'SYCL': + + # There is no .sa in br_golden_epochX4 + # so it makes sure that .sa is included in everything other than that branch + # if pyArgs.b != 'br_golden_epochX4': + #if ".sa" not in process: + # process = process + ".sa" + + bashArgs = ["./buildSYCLProcess.sh", + "-n", process, + "-i", str(ITERATIONS), + "-t", str(TPB), + "-b", str(BPG), + "-r", str(pyArgs.b).lower()] + + elif pyArgs.l.upper() == 'CUDA' or pyArgs.l.upper() == 'HIP': + + bashArgs = ["./buildCUDAProcess.sh", + "-n", process, + "-i", str(ITERATIONS), + "-t", str(TPB), + "-b", str(BPG), + "-r", str(pyArgs.b).lower()] + + else: sys.exit("No abstraction layer matching the supplied string!") + + time = str(datetime.datetime.now().strftime("%H:%M:%S")) + + print(time + " Started " + process + " with TPB("+ str(TPB) +") * BPG("+ str(BPG) +"): " + str(TPB * BPG) + "!") + + build = subprocess.run(bashArgs, check=True)#, stdout=subprocess.DEVNULL) + if build.returncode != 0: + print(time + " " + process + + " FAILED!, threadsPerBlock: " + str(TPB) + + ", blocksPerGrid: " + str(BPG) + + ", Product: " + str(TPB * BPG)) + else: + print(time + " " + process + + " COMPLETED!, threadsPerBlock: " + str(TPB) + + ", blocksPerGrid: " + str(BPG) + + ", Product: " + str(TPB * BPG)) + + count += 1 + +print("Builded " + str(count) + " processes!") \ No newline at end of file diff --git a/tools/profiling/profileconfig.ini b/tools/profiling/profileconfig.ini index a233430420..09ccf41895 100755 --- a/tools/profiling/profileconfig.ini +++ b/tools/profiling/profileconfig.ini @@ -3,12 +3,12 @@ make = make #____________________________________________ #REMOVE # whether you want to execute gcheck.exe or ccheck.exe #sys = ccheck.exe -sys = gcheck.exe +#sys = gcheck.exe # check.exe still in development -#sys = check.exe +sys = check.exe #____________________________________________ -epoch = epoch2 -abstr_layer = cuda +epoch = epochx +abstr_layer = sycl process = ee_mumu sigma = P1_Sigma_sm_epem_mupmum #process = gg_ttgg @@ -27,9 +27,10 @@ threads_max = 4 # creats a plot with (NumThreadsPerBlock * BlocksPerGrid) # # on the x-axis # ################################################################## -EvtsPerSec[Rnd+Rmb+ME](123) = on -EvtsPerSec[Rmb+ME] (23) = on +EvtsPerSec[Rnd+Rmb+ME](123) = off +EvtsPerSec[Rmb+ME] (23) = off EvtsPerSec[MatrixElems] (3) = on +EvtsPerSec[MECalcOnly] (3) = on NumMatrixElements(notNan) = off MatrixElemEventsPerSec = off NumIterations = off diff --git a/tools/profiling/sendData.py b/tools/profiling/sendData.py new file mode 100644 index 0000000000..dcc34a0004 --- /dev/null +++ b/tools/profiling/sendData.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# +# __ __ _ ____ _ _ _ ____ ____ _ _ +# | \/ | __ _ __| | / ___| _ __ __ _ _ __ | |__ | || | / ___| | _ \ | | | | +# | |\/| | / _` | / _` | | | _ | '__| / _` | | '_ \ | '_ \ | || |_ | | _ | |_) | | | | | +# | | | | | (_| | | (_| | | |_| | | | | (_| | | |_) | | | | | |__ _| | |_| | | __/ | |_| | +# |_| |_| \__,_| \__,_| \____| |_| \__,_| | .__/ |_| |_| |_| \____| |_| \___/ +# |_| +# +# +# Python script for sending generated reports from performance profiling to InfluxDB instance +# using the MadGraph5_aMC@NLO GPU development framework +# +# Author: Jorgen Teig, CERN 2023 +# + +import os +import glob +import json +import re +import logging +import subprocess +import datetime +import argparse +import sys + +# Parameter defaults +URL = os.environ.get('MADGRAPH4GPU_DB_URL') +secret = os.environ.get('MADGRAPH4GPU_DB_SECRET') +AUTH = ['db_user', secret] +PHYS_PROCESSES = ['ee_mumu', 'gg_ttggg', 'gg_ttgg', 'gg_ttg', 'gg_tt'] +ABS_LAYERS = ['SYCL', 'CUDA', 'HIP'] +BRANCH = 'master' +FIELDS = ['EvtsPerSec[MatrixElems] (3)', 'EvtsPerSec[MECalcOnly] (3)'] + +# Default reportPath (Useful for testing) +REPORT_PATH = 'C:\\Users\\jteig\\cernbox\\Documents\\test\\22-12-07_cudacpp_Xeon-Silver-4216_v100s_gcc-11.3_cuda-11.6.2_master' + +# Argument parser +parser = argparse.ArgumentParser(description='A script for sending data from profiler to InfluxDB.') + +parser.add_argument('-r', '--reportPath', help="Path for the reports that is being put into the database.", default=REPORT_PATH) +parser.add_argument('-f', '--fields', help="Fields in the JSON to be put into the database.", default=FIELDS) +parser.add_argument('-a', '--absLayer', help="Abstraction layer used when profiling.", default=ABS_LAYERS[0]) +parser.add_argument('-b', '--branch', help="Branch the profiler data is in.", default=BRANCH) +parser.add_argument('-p', '--profiler', help="Enable CI profiling defaults.", default='0') + +args = parser.parse_args() + +# +# Main +# +if __name__=='__main__': + + # Sets report path for extracting the reports generated from performanceProfiler.py + if args.profiler == '1': + + if args.absLayer.upper() == "SYCL": + + syclNamePrefix = os.getenv('SYCL_NAME_PREFIX') + + if syclNamePrefix is None: + logging.error('Sycl name prefix has not been set!') + sys.exit(1) + + reportfolder= "workspace_mg4gpu/" + datetime.datetime.now().strftime('%y-%m-%d') + '_' + syclNamePrefix + '_' + args.branch + + if not os.path.exists(reportfolder): + logging.error('SYCL report path does not exist!') + sys.exit(1) + + elif args.absLayer.upper() == "CUDA": + + cudaNamePrefix = os.getenv('CUDA_NAME_PREFIX') + + if cudaNamePrefix is None: + logging.error('Cuda name prefix has not been set!') + sys.exit(1) + + reportfolder= "workspace_mg4gpu/" + datetime.datetime.now().strftime('%y-%m-%d') + '_' + cudaNamePrefix + '_' + args.branch + + if not os.path.exists(reportfolder): + logging.error('CUDA report path does not exist!') + sys.exit(1) + + elif args.absLayer.upper() == "HIP": + + hipNamePrefix = os.getenv('HIP_NAME_PREFIX') + + if cudaNamePrefix is None: + logging.error('HIP name prefix has not been set!') + sys.exit(1) + + reportfolder= "workspace_mg4gpu/" + datetime.datetime.now().strftime('%y-%m-%d') + '_' + hipNamePrefix + '_' + args.branch + + if not os.path.exists(reportfolder): + logging.error('CUDA report path does not exist!') + sys.exit(1) + + else: + logging.error('No abstraction layer that is supported has been selected!') + sys.exit(1) + + else: + reportfolder = args.reportPath + + filePath = [] + filePath.append(glob.glob(reportfolder + '/test_*.json')) + filePath.append(glob.glob(reportfolder + '/*/test_*.json')) + + # Flatten the list + files = [p for sublist in filePath for p in sublist] + + for file in files: + + with open(file, "r", encoding='utf-8') as f: + + fileContents = f.read() + + if fileContents != '': + data = json.loads(fileContents) + + fileName = (os.path.basename(file)) + + for process in PHYS_PROCESSES: + if process in fileName.lower(): + physicsProcess = process + break + + fileNameParts = fileName.split('_') + + CPU = fileNameParts[4] + + GPU = fileNameParts[5] + + GCCVersion = fileNameParts[6].split('-')[1] + + GPUVersion = fileNameParts[7].split('-')[1] + + gridsize = data[0]["NumThreadsPerBlock"] * data[0]["NumBlocksPerGrid"] + + DBdata = f'{physicsProcess},CPU={CPU},GPU={GPU},AbstractionLayer={args.absLayer},GCCVersion={GCCVersion},GPUVersion={GPUVersion},NumThreadsPerBlock={data[0]["NumThreadsPerBlock"]},NumBlocksPerGrid={data[0]["NumBlocksPerGrid"]},NumIterations={data[0]["NumIterations"]} Gridsize={gridsize}' + + for field in FIELDS: + value = float(re.findall(r'[\d.]+',data[0][field])[0]) + + DBdata = DBdata + ',' + args.absLayer + "_" + field.replace(" ", "_") + '=' + str(value) + + requestInfo = ["curl", "-i", "-k", '-XPOST', "-i", URL, "--header", "Authorization: Token "+AUTH[0]+":"+AUTH[1], "--data-raw", DBdata] + + request = subprocess.run(requestInfo, stdout=subprocess.DEVNULL, check=True) + + f.close() + + if request.returncode != 0: + print(str(datetime.datetime.now().strftime("%H:%M:%S")) + " Request FAILED! Data: " + DBdata) + else: + print(str(datetime.datetime.now().strftime("%H:%M:%S")) + " Request COMPLETED! Data: " + DBdata) + + + else: logging.error('No information/fields in the JSON report!')