diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md new file mode 100644 index 0000000..48b26cf --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md @@ -0,0 +1,69 @@ +# Example Quantum ESPRESSO Slurm scripts using nvidia's QE container + +Quantum ESPRESSO is an integrated suite of Open-Source computer codes for +electronic-structure calculations and materials modeling at the nanoscale, +plane waves, and pseudopotentials + +## Slurm scripts + +These Slurm scripts are templates. +You will have to provide cluster, partition, qos, account information, +and possibly "cpus-per-task" depending on the nodes you wish to use. + +There are many "--constraint=" line examples in each Slurm script. +There must be only one "--constraint=" line starting with "#SBATCH" +If you use multiple "#SBATCH --constraint=" lines, only the last one +will be used to constrain the job. + +e.g. + +``` +#SBATCH --constraint="EMERALD-RAPIDS-IB&H100" +``` + +This example requests an H100 GPU node connected to the "EMERALD-RAPIDS-IB" +Infiniband network." + + +For the single node examples, replace "[CCRgroupname]" in the "CONTAINER_DIR=" line: + +``` +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" +``` + +For the multi node examples ALSO replace "[CCRgroupname]" in the +"GS" Global Scratch line: + +``` +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" +``` + +## Quantum ESPRESSO Slurm script examples + +### One node scripts + +x86_64 + +[QE 1 GPU 1 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash) +[QE 2 GPUs 1 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash) + +ARM64 + +[QE 1 GPU 1 ARM64 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash) + + +### Two node scripts: + +x86_64 + +[QE 1 GPU 2 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash) +[QE 2 GPU 2 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash) + +ARM64 + +[QE 1 GPU 2 ARM64 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash) + + +See the [Quantum ESPRESSO website](https://www.quantum-espresso.org) and [Quantum ESPRESSO Documentation](https://www.quantum-espresso.org/documentation/) for more information on Quantum ESPRESSO. +For more info on the container image, see the [nvidia Quantum ESPRESSO container page](https://catalog.ngc.nvidia.com/orgs/hpc/containers/quantum_espresso) on their NGC Catalogue website. + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash new file mode 100644 index 0000000..bd822a0 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash @@ -0,0 +1,148 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +## +## Note: This example requires about 21000 MiB of GPU RAM +## +## The following GPUs types requuire more than one GPU to run this example: +## +## GPU GPU RAM +## A2 15356 MiB ==> 2 GPUs +## A16 15356 MiB ==> 2 GPUs +## P4000 8192 MiB ==> 3 GPUs +## T4 15360 MiB ==> 2 GPUs +## + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +#SBATCH --constraint="[A40|A100|GH200|H100|V100]" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +##SBATCH --constraint="[A40|A100|H100|V100]" +## +############################################################################### + +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=1 +## One MPI task per GPU +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${CONTAINER_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## report the GPU in the job +nvidia-smi -L +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +## Use ${SLURMTMPDIR} for run files +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" +sed -E -i "/^[[:space:]]*wfcdir([[:space:]]|=)/d" "${INFILE}" + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${CONTAINER_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash new file mode 100644 index 0000000..f1aca75 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash @@ -0,0 +1,115 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## Select an account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --cluster="ub-hpc" +#SBATCH --partition="arm64" +#SBATCH --qos="arm64" +#SBATCH --export=HOME,TERM,SHELL +#SBATCH --constraint="GH200" +#SBATCH --time=01:00:00 +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=1 +## One MPI task per GPU +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${CONTAINER_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## report the GPU in the job +nvidia-smi -L +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +## Use ${SLURMTMPDIR} for run files +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" +sed -E -i "/^[[:space:]]*wfcdir([[:space:]]|=)/d" "${INFILE}" + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${CONTAINER_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash new file mode 100644 index 0000000..fe5bf87 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash @@ -0,0 +1,212 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +############################################################################### +## Infiniband connected GPU nodes +##---------------------------------------------------------------------------- +## +## Infininband constraints +## [EMERALD-RAPIDS-IB|SAPPHIRE-RAPIDS-IB|ICE-LAKE-IB|CASCADE-LAKE-IB] +## +## Similar GPUs on both nodes: +## [A100|H100|V100]" +## +## The constraint should be: +## --constraint="[EMERALD-RAPIDS-IB|SAPPHIRE-RAPIDS-IB|ICE-LAKE-IB|CASCADE-LAKE-IB]&[A100|H100|V100]" +## +## ...but multiple square bracket "exclusive or" sections are not supported +## +## Hence, pick one of: +## +#SBATCH --constraint="EMERALD-RAPIDS-IB&H100" +## +##SBATCH --constraint="SAPPHIRE-RAPIDS-IB&H100" +## +##SBATCH --constraint="ICE-LAKE-IB&A100" +## +##SBATCH --constraint="CASCADE-LAKE-IB&V100" +## +##---------------------------------------------------------------------------- +## Non Infiniband connected GPU nodes (MPI over Ethernet) +##---------------------------------------------------------------------------- +## +##SBATCH --constraint="[A16|A40|GH200]" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +############################################################################### +## Infiniband connected GPU nodes +##---------------------------------------------------------------------------- +## +## The only Infiniband connected GPU nodes [at the time of writing] have A100 +## GPUs: +## +## Infininband constraints +## IB +## +## GPU constraints +## A100 +## +##SBATCH --constraint="IB&A100" +## +##---------------------------------------------------------------------------- +## Non Infiniband connected GPU nodes (MPI over Ethernet) +##---------------------------------------------------------------------------- +## +## Similar GPUs on both nodes: +## [A2|A40|A100|H100|T4|V100] +## +##SBATCH --constraint="[A2|A40|A100|H100|T4|V100]" +## +############################################################################### + +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=1 +## One MPI task per GPU on each node +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" + +TIMESTAMP="$(date "+%F_%T")" + +## use Global Scratch for run files +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${CONTAINER_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## report the GPUs in the job +srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- bash -c 'printf "hostname: %s\n%s\n\n" "$(hostname -s)" "$(nvidia-smi -L)"' +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +## Set the output directory "outdir" to the Global Scratch directory "${GS}" +mkdir -p "${GS}" +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" + +## Optional: +## +## Set "wfcdir" the directory to store per process files (*.wfc{N}, *.igk{N}, etc.) +## to ${SLURMTMPDIR} (local scratch on each node) +## +## Note: You probably don't want to do this if you are planning to use "restart" +## or you need to perform further calculations using these files +if grep -E -q '^[[:space:]]*wfcdir([[:space:]]|=)' "${INFILE}" +then + # modify "wfcdir" setting + sed -E -i "/^[[:space:]]*wfcdir/s|^([[:space:]]*).*$|\1wfcdir = '${SLURMTMPDIR}'|" "${INFILE}" +else + # add "wfcdir" setting + sed -E -i "/^[[:space:]]*outdir/a \ wfcdir = '${SLURMTMPDIR}'" "${INFILE}" +fi + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${CONTAINER_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + +## Cleanup - Remove run files +if [ -d "${GS}" ] +then + rm -rf "${GS}" +fi + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash new file mode 100644 index 0000000..ea6d84c --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash @@ -0,0 +1,140 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## Select an account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --cluster="ub-hpc" +#SBATCH --partition="arm64" +#SBATCH --qos="arm64" +#SBATCH --export=HOME,TERM,SHELL +#SBATCH --constraint="GH200" +#SBATCH --time=01:00:00 +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=1 +## One MPI task per GPU on each node +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" + +TIMESTAMP="$(date "+%F_%T")" + +## use Global Scratch for run files +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${CONTAINER_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## report the GPUs in the job +srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- bash -c 'printf "hostname: %s\n%s\n\n" "$(hostname -s)" "$(nvidia-smi -L)"' +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +## Set the output directory "outdir" to the Global Scratch directory "${GS}" +mkdir -p "${GS}" +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" + +## Optional: +## +## Set "wfcdir" the directory to store per process files (*.wfc{N}, *.igk{N}, etc.) +## to ${SLURMTMPDIR} (local scratch on each node) +## +## Note: You probably don't want to do this if you are planning to use "restart" +## or you need to perform further calculations using these files +if grep -E -q '^[[:space:]]*wfcdir([[:space:]]|=)' "${INFILE}" +then + # modify "wfcdir" setting + sed -E -i "/^[[:space:]]*wfcdir/s|^([[:space:]]*).*$|\1wfcdir = '${SLURMTMPDIR}'|" "${INFILE}" +else + # add "wfcdir" setting + sed -E -i "/^[[:space:]]*outdir/a \ wfcdir = '${SLURMTMPDIR}'" "${INFILE}" +fi + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${CONTAINER_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + +## Cleanup - Remove run files +if [ -d "${GS}" ] +then + rm -rf "${GS}" +fi + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash new file mode 100644 index 0000000..945c52d --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash @@ -0,0 +1,140 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +#SBATCH --constraint="[A16|A40|A100|GH200|H100|V100]" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +## Note: This example requires about 21000 MiB of GPU RAM +## To use P4000 GPUs (with 8192 MiB of GPU RAM) change this script to +## request at least 3 GPUs +## +##SBATCH --constraint="[A2|A40|A100|H100|P4000|T4|V100]" +## +############################################################################### + +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=2 +## One MPI task per GPU +#SBATCH --ntasks-per-node=2 +#SBATCH --cpus-per-task=20 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${CONTAINER_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## report the GPUs in the job +nvidia-smi -L +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +## Use ${SLURMTMPDIR} for run files +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" +sed -E -i "/^[[:space:]]*wfcdir([[:space:]]|=)/d" "${INFILE}" + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${CONTAINER_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash new file mode 100644 index 0000000..3e1a352 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash @@ -0,0 +1,210 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +############################################################################### +## Infiniband connected GPU nodes +##---------------------------------------------------------------------------- +## +## Infininband constraints +## [EMERALD-RAPIDS-IB|ICE-LAKE-IB|CASCADE-LAKE-IB] +## +## Similar GPUs on both nodes: +## [A100|H100|V100]" +## +## The constraint should be: +## --constraint="[EMERALD-RAPIDS-IB|ICE-LAKE-IB|CASCADE-LAKE-IB]&[A100|H100|V100]" +## +## ...but multiple square bracket "exclusive or" sections are not supported +## +## Hence, pick one of: +## +#SBATCH --constraint="EMERALD-RAPIDS-IB&H100" +## +##SBATCH --constraint="ICE-LAKE-IB&A100" +## +##SBATCH --constraint="CASCADE-LAKE-IB&V100" +## +##---------------------------------------------------------------------------- +## Non Infiniband connected GPU nodes (MPI over Ethernet) +##---------------------------------------------------------------------------- +## +##SBATCH --constraint="[A16|A40]" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +############################################################################### +## Infiniband connected GPU nodes +##---------------------------------------------------------------------------- +## +## The only Infiniband connected GPU nodes [at the time of writing] have A100 +## GPUs: +## +## Infininband constraints +## IB +## +## GPU constraints +## A100 +## +##SBATCH --constraint="IB&A100" +## +##---------------------------------------------------------------------------- +## Non Infiniband connected GPU nodes (MPI over Ethernet) +##---------------------------------------------------------------------------- +## +## Similar GPUs on both nodes: +## [A2|A40|A100|H100|T4|V100] +## +##SBATCH --constraint="[A2|A40|A100|H100|T4|V100]" +## +############################################################################### + +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=2 +## One MPI task per GPU on each node +#SBATCH --ntasks-per-node=2 +#SBATCH --cpus-per-task=20 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" + +TIMESTAMP="$(date "+%F_%T")" + +## use Global Scratch for run files +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${CONTAINER_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## report the GPUs in the job +srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- bash -c 'printf "hostname: %s\n%s\n\n" "$(hostname -s)" "$(nvidia-smi -L)"' +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +## Set the output directory "outdir" to the Global Scratch directory "${GS}" +mkdir -p "${GS}" +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" + +## Optional: +## +## Set "wfcdir" the directory to store per process files (*.wfc{N}, *.igk{N}, etc.) +## to ${SLURMTMPDIR} (local scratch on each node) +## +## Note: You probably don't want to do this if you are planning to use "restart" +## or you need to perform further calculations using these files +if grep -E -q '^[[:space:]]*wfcdir([[:space:]]|=)' "${INFILE}" +then + # modify "wfcdir" setting + sed -E -i "/^[[:space:]]*wfcdir/s|^([[:space:]]*).*$|\1wfcdir = '${SLURMTMPDIR}'|" "${INFILE}" +else + # add "wfcdir" setting + sed -E -i "/^[[:space:]]*outdir/a \ wfcdir = '${SLURMTMPDIR}'" "${INFILE}" +fi + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${CONTAINER_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + +## Cleanup - Remove run files +if [ -d "${GS}" ] +then + rm -rf "${GS}" +fi + diff --git a/containers/2_ApplicationSpecific/README.md b/containers/2_ApplicationSpecific/README.md index 144a5b8..efd89c4 100644 --- a/containers/2_ApplicationSpecific/README.md +++ b/containers/2_ApplicationSpecific/README.md @@ -19,6 +19,7 @@ Please refer to CCR's [container documentation](https://docs.ccr.buffalo.edu/en/ | [OpenFF-Toolkit](./Open_Force_Field_toolkit) | Open Force Field toolkit container with steps for building and running via Apptainer | | [OpenFOAM](./OpenFOAM) | OpenFOAM container with steps for building and running via Apptainer and Slurm | | [OpenSees](./OpenSees) | OpenSees container with steps for building and running via Apptainer | +| [Quantum ESPRESSO](./Quantum_ESPRESSO) | Example Slurm scripts to run the nvidia Quantum ESPRESSO container with Apptainer | | [SAS](./sas) | Guide for running SAS using Apptainer via Slurm batch script, command line, and GUI access | | [Seurat](./seurat) | Seurat container with example scRNA analysis | | [VASP](./vasp) | Example VASP container with steps for building and running via Apptainer |