diff --git a/slurm/2_ApplicationSpecific/README.md b/slurm/2_ApplicationSpecific/README.md index f6de005..d906827 100644 --- a/slurm/2_ApplicationSpecific/README.md +++ b/slurm/2_ApplicationSpecific/README.md @@ -12,6 +12,7 @@ This directory contains batch scripts for a variety of applications that have sp | [LSDYNA](./lsdyna) | LSDYNA examples for both single and multi node message passing parallel jobs as well as single node shared memory parallel jobs (See [README](./lsdyna/README.md) for details) | | [MATLAB](./matlab) | The MATLAB directory includes example bash scripts and MATLAB functions for running [serial](./matlab/serial), [multithreaded](./matlab/multithreaded), and [GPU](./matlab/GPU) MATLAB jobs | | [Python](./python) | The Python directory includes examples bash scripts and Python functions for [serial](./python/serial) Python job, with multithreaded and GPU examples coming soon | +| [Quantum ESPRESSO](./quantum_espresso) | Quantum ESPRESSO examples for single and multi node jobs using both the CPU and GPU versions of QE | | [R](./R) | R example using RNA-seq data for testing and differential gene expression analysis | ## Additional Information diff --git a/slurm/2_ApplicationSpecific/quantum_espresso/README.md b/slurm/2_ApplicationSpecific/quantum_espresso/README.md new file mode 100644 index 0000000..b8e636a --- /dev/null +++ b/slurm/2_ApplicationSpecific/quantum_espresso/README.md @@ -0,0 +1,25 @@ +# Quantum ESPRESSO on the CCR Clusters + +This directory includes examples of single and multi-node Quantum Espresso CPU and GPU Slurm scipts + +In these examples the Quantum ESPRESSO binary "pw.x" is only provided the input file as a parameter. +To effectively use Quantum ESPRESSO in parallel there are additional parameters that should be set +(specific to your use case) to distribute the processing over the requested resources, that is: +-nimage, -npools, -nband, -ntg, -ndiag or -northo (shorthands, respectively: -ni, -nk, -nb, -nt, -nd) + +See the [Quantum ESPRESSO Parallelization levels documentation](https://www.quantum-espresso.org/Doc/user_guide/node20.html) for more information + + +## Example CPU Scripts + +[quantum_espresso_CPU_1_node.bash](./quantum_espresso_CPU_1_node.bash) +[quantum_espresso_CPU_2_node.bash](./quantum_espresso_CPU_2_node.bash) + +## Example GPU Scripts + +[quantum_espresso_1_GPU_1_node.bash](./quantum_espresso_1_GPU_1_node.bash) +[quantum_espresso_1_GPU_2_node.bash](./quantum_espresso_1_GPU_2_node.bash) +[quantum_espresso_2_GPU_1_node.bash](./quantum_espresso_2_GPU_1_node.bash) +[quantum_espresso_2_GPU_2_node.bash](./quantum_espresso_2_GPU_2_node.bash) + + diff --git a/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_1_GPU_1_node.bash b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_1_GPU_1_node.bash new file mode 100644 index 0000000..00cddad --- /dev/null +++ b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_1_GPU_1_node.bash @@ -0,0 +1,95 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Similar tested GPUs on both nodes :- +## [A16|A40|A100] +## +## CUDA version of Quantum ESPRESSO only (currenlty) built for AVX512 :- +## AVX512 +## +#SBATCH --constraint="[A16|A40|A100]&AVX512" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Similar tested GPUs on both nodes :- +## [A2|A40|A100] +## +## CUDA version of Quantum ESPRESSO only (currenlty) built for AVX512 :- +## AVX512 +## +##SBATCH --constraint="[A2|A40|A100]&AVX512" +## +############################################################################### + +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=1 +## One MPI task per GPU +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --exclusive + +module load ccrsoft/2023.01 + +# GPU version +module load nvhpc/22.7-CUDA-11.8.0 openmpi/4.1.4 quantumespresso/7.2 + +# report the GPU in the job +nvidia-smi -L +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +# Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +# Use ${SLURMTMPDIR} for run files +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" + +# OpenMPI environment variables for PMIx over shared memory, then +# CUDA shared memory +export OMPI_MCA_pml=ucx && export OMPI_MCA_btl="self,vader,smcuda" +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +# Run Quantum ESPRESSO +LD_LIBRARY_PATH=/opt/software/slurm/lib64 srun --mpi=pmix pw.x -in "${INFILE}" > "${OUTFILE}" + +# Optional - save the config files for the run: +mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" + diff --git a/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_1_GPU_2_node.bash b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_1_GPU_2_node.bash new file mode 100644 index 0000000..9542c03 --- /dev/null +++ b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_1_GPU_2_node.bash @@ -0,0 +1,118 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Infiniband with ccrsoft/2023.01 :- +## [ICE-LAKE-IB|CASCADE-LAKE-IB] +## +## Similar tested GPUs on both nodes: +## [A16|A40|A100] +## +## CUDA version of Quantum ESPRESSO only (currenlty) built for AVX512 :- +## AVX512 +## +## The constraint should be: +## --constraint="[ICE-LAKE-IB|CASCADE-LAKE-IB]&[A16|A40|A100]&AVX512" +## +## ...but multiple square bracket "exclusive or" sections are not supported +## +## The ICE-LAKE-IB nodes with a GPU are all A100 +## The CASCADE-LAKE-IB nodes with a GPU are all V100 +## The V100 does not work with Quantum ESPRESSO, hence we can use the following +## for two nodes with similar tested GPUs: +#SBATCH --constraint="ICE-LAKE-IB&[A16|A40|A100]&AVX512" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Infiniband +## IB +## +## Similar tested GPUs on both nodes: +## [A2|A40|A100] +## +## CUDA version of Quantum ESPRESSO only (currenlty) built for AVX512 :- +## AVX512 +## +##SBATCH --constraint="IB&[A2|A40|A100]&AVX512" +## +############################################################################### + +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=1 +## One MPI task per GPU on each node +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +#SBATCH --exclusive + +module load ccrsoft/2023.01 + +# GPU version +module load nvhpc/22.7-CUDA-11.8.0 openmpi/4.1.4 quantumespresso/7.2 ucx/1.13.1 + +# report the GPUs in the job +srun --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" bash -c 'printf "hostname: %s\n%s\n\n" "$(hostname -s)" "$(nvidia-smi -L)"' +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +# Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +# use Global Scratch for run files +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" +mkdir -p "${GS}" +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" + +# OpenMPI environment variables for PMIx over shared memory, then +# CUDA shared memory and finally over OpenFabrics Interface (Infiniband) +export OMPI_MCA_pml=ucx && export OMPI_MCA_btl="self,vader,smcuda,ofi" +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +# Run Quantum ESPRESSO +LD_LIBRARY_PATH=/opt/software/slurm/lib64 srun --mpi=pmix pw.x -in "${INFILE}" > "${OUTFILE}" + +# Optional - save the config files for the run: +mv "${GS}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" + +# Cleanup - Remove run files +if [ -d "${GS}" ] +then + rm -rf "${GS}" +fi + diff --git a/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_2_GPU_1_node.bash b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_2_GPU_1_node.bash new file mode 100644 index 0000000..475f505 --- /dev/null +++ b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_2_GPU_1_node.bash @@ -0,0 +1,95 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Similar tested GPUs on both nodes :- +## [A16|A40|A100] +## +## CUDA version of Quantum ESPRESSO only (currenlty) built for AVX512 :- +## AVX512 +## +#SBATCH --constraint="[A16|A40|A100]&AVX512" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Similar tested GPUs on both nodes :- +## [A2|A40|A100] +## +## CUDA version of Quantum ESPRESSO only (currenlty) built for AVX512 :- +## AVX512 +## +##SBATCH --constraint="[A2|A40|A100]&AVX512" +## +############################################################################### + +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=2 +## One MPI task per GPU +#SBATCH --ntasks-per-node=2 +#SBATCH --cpus-per-task=20 +#SBATCH --exclusive + +module load ccrsoft/2023.01 + +# GPU version +module load nvhpc/22.7-CUDA-11.8.0 openmpi/4.1.4 quantumespresso/7.2 + +# report the GPUs in the job +nvidia-smi -L +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +# Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +# Use ${SLURMTMPDIR} for run files +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" + +# OpenMPI environment variables for PMIx over shared memory, then +# CUDA shared memory +export OMPI_MCA_pml=ucx && export OMPI_MCA_btl="self,vader,smcuda" +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +# Run Quantum ESPRESSO +LD_LIBRARY_PATH=/opt/software/slurm/lib64 srun --mpi=pmix pw.x -in "${INFILE}" > "${OUTFILE}" + +# Optional - save the config files for the run: +mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" + diff --git a/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_2_GPU_2_node.bash b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_2_GPU_2_node.bash new file mode 100644 index 0000000..8716ef9 --- /dev/null +++ b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_2_GPU_2_node.bash @@ -0,0 +1,117 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Infiniband with ccrsoft/2023.01 :- +## [ICE-LAKE-IB|CASCADE-LAKE-IB] +## +## Similar tested GPUs on both nodes :- +## [A16|A40|A100] +## +## CUDA version of Quantum ESPRESSO only (currenlty) built for AVX512 :- +## AVX512 +## +## The constraint should be: +## --constraint="[ICE-LAKE-IB|CASCADE-LAKE-IB]&[A16|A40|A100]&AVX512]" +## +## ...but multiple square bracket "exclusive or" sections are not supported +## +## The ICE-LAKE-IB nodes with a GPU are all A100 +## The CASCADE-LAKE-IB nodes with a GPU are all V100 +## The V100 does not work with Quantum ESPRESSO, hence we can use the following +## for two nodes with similar tested GPUs: +#SBATCH --constraint="ICE-LAKE-IB&[A16|A40|A100]&AVX512" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Infiniband +## IB +## +## Similar tested GPUs on both nodes :- +## [A2|A40|A100] +## +## CUDA version of Quantum ESPRESSO only (currenlty) built for AVX512 :- +## AVX512 +## +##SBATCH --constraint="IB&[A2|A40|A100]&AVX512" +## +############################################################################### + +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=2 +## One MPI task per GPU on each node +#SBATCH --ntasks-per-node=2 +#SBATCH --cpus-per-task=20 +#SBATCH --exclusive + +module load ccrsoft/2023.01 + +# GPU version +module load nvhpc/22.7-CUDA-11.8.0 openmpi/4.1.4 quantumespresso/7.2 ucx/1.13.1 + +# report the GPUs in the job +srun --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" bash -c 'printf "hostname: %s\n%s\n\n" "$(hostname -s)" "$(nvidia-smi -L)"' +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +# Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +# use Global Scratch for run files +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" +mkdir -p "${GS}" +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" + +# OpenMPI environment variables for PMIx over shared memory, then +# CUDA shared memory and finally over OpenFabrics Interface (Infiniband) +export OMPI_MCA_pml=ucx && export OMPI_MCA_btl="self,vader,smcuda,ofi" +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +# Run Quantum ESPRESSO +LD_LIBRARY_PATH=/opt/software/slurm/lib64 srun --mpi=pmix pw.x -in "${INFILE}" > "${OUTFILE}" + +# Optional - save the config files for the run: +mv "${GS}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" + +# Cleanup - Remove run files +if [ -d "${GS}" ] +then + rm -rf "${GS}" +fi diff --git a/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_CPU_1_node.bash b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_CPU_1_node.bash new file mode 100644 index 0000000..c8df4d6 --- /dev/null +++ b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_CPU_1_node.bash @@ -0,0 +1,60 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 +#SBATCH --nodes=1 +## One MPI task per core +#SBATCH --ntasks-per-node=64 +#SBATCH --cpus-per-task=1 +#SBATCH --exclusive + +module load ccrsoft/2023.01 + +# CPU version +module load gcc/11.2.0 openmpi/4.1.1 quantumespresso/7.1 ucx/1.13.1 + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +# Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +# Use ${SLURMTMPDIR} for run files +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" + +# OpenMPI environment variables for PMIx over shared memory +export OMPI_MCA_pml=ucx && export OMPI_MCA_btl="self,vader" +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +# Run Quantum ESPRESSO +srun --mpi=pmix pw.x -in "${INFILE}" > "${OUTFILE}" + +# Optional - save the config files for the run: +mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" + diff --git a/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_CPU_2_node.bash b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_CPU_2_node.bash new file mode 100644 index 0000000..c02b8de --- /dev/null +++ b/slurm/2_ApplicationSpecific/quantum_espresso/quantum_espresso_CPU_2_node.bash @@ -0,0 +1,106 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Infiniband with ccrsoft/2023.01 +## [ICE-LAKE-IB|CASCADE-LAKE-IB] +## +## Use all AVX512 nodes or all AVX2 nodes (so both nodes are running the same +## Quantum Espresso binary) +## [AVX512|AVX2] +## +## The constraint should be: +## --constraint="[ICE-LAKE-IB|CASCADE-LAKE-IB]&[AVX512|AVX2]" +## +## ...but multiple square bracket "exclusive or" sections are not supported, +## +## All the ICE-LAKE-IB and CASCADE-LAKE-IB nodes are AVX512 +## Hence this is sufficient to guarantee we get two Infiniband nodes with AVX512 +## CPUs: +#SBATCH --constraint="[ICE-LAKE-IB|CASCADE-LAKE-IB]" +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Infiniband +## IB +## +## Use all AVX512 nodes or all AVX2 nodes (so both nodes are running the same +## Quantum Espresso binary) +## [AVX512|AVX2] +## +##SBATCH --constraint="IB&[AVX512|AVX2]" +############################################################################### + +#SBATCH --nodes=2 +## One MPI task per core on each node +#SBATCH --ntasks-per-node=40 +#SBATCH --cpus-per-task=1 +#SBATCH --exclusive + +module load ccrsoft/2023.01 + +# CPU version +module load gcc/11.2.0 openmpi/4.1.1 quantumespresso/7.1 ucx/1.13.1 + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +# Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +# use Global Scratch for run files +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" +mkdir -p "${GS}" +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" + +# OpenMPI environment variables for PMIx over shared memory first, then +# OpenFabrics Interface (Infiniband) +export OMPI_MCA_pml=ucx && export OMPI_MCA_btl="self,vader,ofi" +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +# Run Quantum ESPRESSO +srun --mpi=pmix pw.x -in "${INFILE}" > "${OUTFILE}" + +# Optional - save the config files for the run: +mv "${GS}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" + +# Cleanup - Remove run files +if [ -d "${GS}" ] +then + rm -rf "${GS}" +fi +