From bd639bf2801e8332f170f565c62bd8a2113a3ae6 Mon Sep 17 00:00:00 2001 From: Tony Kew Date: Tue, 3 Feb 2026 18:19:17 -0500 Subject: [PATCH 1/6] Quantum ESPRESSO nvidia container Slurm example scripts Initial commit Tested on all the GPUs in the Center - does not run on L40S GPUs Tested succesfully with both ccrsoft/2023.01 ans crsoft/2024.04 both single and multi-node jobs Tony --- .../Quantum_ESPRESSO/README.md | 55 +++++ .../quantum_espresso_1_GPU_1_node.bash | 149 +++++++++++++ .../quantum_espresso_1_GPU_2_nodes.bash | 197 ++++++++++++++++++ .../quantum_espresso_2_GPU_1_node.bash | 141 +++++++++++++ .../quantum_espresso_2_GPU_2_nodes.bash | 195 +++++++++++++++++ containers/2_ApplicationSpecific/README.md | 1 + 6 files changed, 738 insertions(+) create mode 100644 containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md create mode 100644 containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash create mode 100644 containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash create mode 100644 containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash create mode 100644 containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md new file mode 100644 index 0000000..363a016 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md @@ -0,0 +1,55 @@ +# Example Quantum ESPRESSO Slurm scripts using nvidia's QE container + +Quantum ESPRESSO is an integrated suite of Open-Source computer codes for +electronic-structure calculations and materials modeling at the nanoscale, +plane waves, and pseudopotentials + +## Slurm scripts + +These Slurm scripts are templates. +You will have to provide cluster, partition, qos, account information, +and possibly "cpus-per-task" depending on the nodes you wish to use. + +There are many "--constraint=" line examples in each Slurm script. +There must be only one "--constraint=" line starting with "#SBATCH" +If you use multiple "#SBATCH --constraint=" lines, only the last one +will be used to constrain the job. + +e.g. + +> ``` +> #SBATCH --constraint="EMERALD-RAPIDS-IB&H100" +> ``` + +This example requests an H100 GPU node connected to the "EMERALD-RAPIDS-IB" +Infiniband network." + + +For the single node examples, update the "BASE_DIR=" line: + +> ``` +> BASE_DIR="/projects/academic/[CCRgroupname]/QE" +> ``` + +For the multi node examples also update the "GS" Global Scratch line: + +> ``` +> GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" +> ``` + +## Quantum ESPRESSO Slurm script examples + +One node scripts: + +[QE 1 GPU 1 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash) +[QE 2 GPUs 1 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash) + +Two node scripts: + +[QE 1 GPU 2 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash) +[QE 2 GPU 2 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash) + + +See the [Quantum ESPRESSO website](https://www.quantum-espresso.org) and [Quantum ESPRESSO Documentation](https://www.quantum-espresso.org/documentation/) for more information on Quantum ESPRESSO. +For more info on the container image, see the [nvidia Quantum ESPRESSO container page](https://catalog.ngc.nvidia.com/orgs/hpc/containers/quantum_espresso) on their NGC Catalogue website. + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash new file mode 100644 index 0000000..00b0372 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash @@ -0,0 +1,149 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +## +## Note: This example requires about 21000 MiB of GPU RAM +## +## The following GPUs types requuire more than one GPU to run this example: +## +## GPU GPU RAM +## A2 15356 MiB ==> 2 GPUs +## A16 15356 MiB ==> 2 GPUs +## P4000 8192 MiB ==> 3 GPUs +## T4 15360 MiB ==> 2 GPUs +## + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +#SBATCH --constraint="[A40|A100|GH200|H100|V100]" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +##SBATCH --constraint="[A40|A100|H100|V100]" +## +############################################################################### + +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=1 +## One MPI task per GPU +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## BASE_DIR == directory with the Quantum ESPRESSO container image +BASE_DIR="/projects/academic/[CCRgroupname]/QE" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${BASE_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## report the GPU in the job +nvidia-smi -L +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +## Use ${SLURMTMPDIR} for run files +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${BASE_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash new file mode 100644 index 0000000..6fbd335 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash @@ -0,0 +1,197 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +############################################################################### +## Infiniband connected GPU nodes +##---------------------------------------------------------------------------- +## +## Infininband constraints +## [EMERALD-RAPIDS-IB|SAPPHIRE-RAPIDS-IB|ICE-LAKE-IB|CASCADE-LAKE-IB] +## +## Similar GPUs on both nodes: +## [A100|H100|V100]" +## +## The constraint should be: +## --constraint="[EMERALD-RAPIDS-IB|SAPPHIRE-RAPIDS-IB|ICE-LAKE-IB|CASCADE-LAKE-IB]&[A100|H100|V100]" +## +## ...but multiple square bracket "exclusive or" sections are not supported +## +## Hence, pick one of: +## +#SBATCH --constraint="EMERALD-RAPIDS-IB&H100" +## +##SBATCH --constraint="SAPPHIRE-RAPIDS-IB&H100" +## +##SBATCH --constraint="ICE-LAKE-IB&A100" +## +##SBATCH --constraint="CASCADE-LAKE-IB&V100" +## +##---------------------------------------------------------------------------- +## Non Infiniband connected GPU nodes (MPI over Ethernet) +##---------------------------------------------------------------------------- +## +##SBATCH --constraint="[A16|A40|GH200]" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +############################################################################### +## Infiniband connected GPU nodes +##---------------------------------------------------------------------------- +## +## The only Infiniband connected GPU nodes [at the time of writing] have A100 +## GPUs: +## +## Infininband constraints +## IB +## +## GPU constraints +## A100 +## +##SBATCH --constraint="IB&A100" +## +##---------------------------------------------------------------------------- +## Non Infiniband connected GPU nodes (MPI over Ethernet) +##---------------------------------------------------------------------------- +## +## Similar GPUs on both nodes: +## [A2|A40|A100|H100|T4|V100] +## +##SBATCH --constraint="[A2|A40|A100|H100|T4|V100]" +## +############################################################################### + +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=1 +## One MPI task per GPU on each node +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## BASE_DIR == directory with the Quantum ESPRESSO container image +BASE_DIR="/projects/academic/[CCRgroupname]/QE" + +TIMESTAMP="$(date "+%F_%T")" + +## use Global Scratch for run files +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${BASE_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## report the GPUs in the job +srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- bash -c 'printf "hostname: %s\n%s\n\n" "$(hostname -s)" "$(nvidia-smi -L)"' +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +mkdir -p "${GS}" +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${BASE_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + +## Cleanup - Remove run files +if [ -d "${GS}" ] +then + rm -rf "${GS}" +fi + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash new file mode 100644 index 0000000..2c01be4 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash @@ -0,0 +1,141 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +#SBATCH --constraint="[A16|A40|A100|GH200|H100|V100]" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +## Note: This example requires about 21000 MiB of GPU RAM +## To use P4000 GPUs (with 8192 MiB of GPU RAM) change this script to +## request at least 3 GPUs +## +##SBATCH --constraint="[A2|A40|A100|H100|P4000|T4|V100]" +## +############################################################################### + +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=2 +## One MPI task per GPU +#SBATCH --ntasks-per-node=2 +#SBATCH --cpus-per-task=20 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## BASE_DIR == directory with the Quantum ESPRESSO container image +BASE_DIR="/projects/academic/[CCRgroupname]/QE" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${BASE_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## report the GPU in the job +nvidia-smi -L +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +## Use ${SLURMTMPDIR} for run files +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${BASE_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash new file mode 100644 index 0000000..8e93325 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash @@ -0,0 +1,195 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release + +## Select a cluster, partition, qos and account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --cluster="[cluster]" +#SBATCH --partition="[partition]" +#SBATCH --qos="[qos]" +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --time=01:00:00 + +############################################################################### +## "ub-hpc" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +############################################################################### +## Infiniband connected GPU nodes +##---------------------------------------------------------------------------- +## +## Infininband constraints +## [EMERALD-RAPIDS-IB|ICE-LAKE-IB|CASCADE-LAKE-IB] +## +## Similar GPUs on both nodes: +## [A100|H100|V100]" +## +## The constraint should be: +## --constraint="[EMERALD-RAPIDS-IB|ICE-LAKE-IB|CASCADE-LAKE-IB]&[A100|H100|V100]" +## +## ...but multiple square bracket "exclusive or" sections are not supported +## +## Hence, pick one of: +## +#SBATCH --constraint="EMERALD-RAPIDS-IB&H100" +## +##SBATCH --constraint="ICE-LAKE-IB&A100" +## +##SBATCH --constraint="CASCADE-LAKE-IB&V100" +## +##---------------------------------------------------------------------------- +## Non Infiniband connected GPU nodes (MPI over Ethernet) +##---------------------------------------------------------------------------- +## +##SBATCH --constraint="[A16|A40]" +## +############################################################################### + +############################################################################### +## "faculty" cluster constraints +############################################################################### +## +## Note: The Quantum ESPRESSO container verison 7.3.1 does not support L40S +## GPU - this may be resolved in newer versions compiled with "-gpu=cc89" +## +############################################################################### +## Infiniband connected GPU nodes +##---------------------------------------------------------------------------- +## +## The only Infiniband connected GPU nodes [at the time of writing] have A100 +## GPUs: +## +## Infininband constraints +## IB +## +## GPU constraints +## A100 +## +##SBATCH --constraint="IB&A100" +## +##---------------------------------------------------------------------------- +## Non Infiniband connected GPU nodes (MPI over Ethernet) +##---------------------------------------------------------------------------- +## +## Similar GPUs on both nodes: +## [A2|A40|A100|H100|T4|V100] +## +##SBATCH --constraint="[A2|A40|A100|H100|T4|V100]" +## +############################################################################### + +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=2 +## One MPI task per GPU on each node +#SBATCH --ntasks-per-node=2 +#SBATCH --cpus-per-task=20 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## BASE_DIR == directory with the Quantum ESPRESSO container image +BASE_DIR="/projects/academic/[CCRgroupname]/QE" + +TIMESTAMP="$(date "+%F_%T")" + +## use Global Scratch for run files +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${BASE_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## report the GPUs in the job +srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- bash -c 'printf "hostname: %s\n%s\n\n" "$(hostname -s)" "$(nvidia-smi -L)"' +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +mkdir -p "${GS}" +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${BASE_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + +## Cleanup - Remove run files +if [ -d "${GS}" ] +then + rm -rf "${GS}" +fi + diff --git a/containers/2_ApplicationSpecific/README.md b/containers/2_ApplicationSpecific/README.md index 144a5b8..efd89c4 100644 --- a/containers/2_ApplicationSpecific/README.md +++ b/containers/2_ApplicationSpecific/README.md @@ -19,6 +19,7 @@ Please refer to CCR's [container documentation](https://docs.ccr.buffalo.edu/en/ | [OpenFF-Toolkit](./Open_Force_Field_toolkit) | Open Force Field toolkit container with steps for building and running via Apptainer | | [OpenFOAM](./OpenFOAM) | OpenFOAM container with steps for building and running via Apptainer and Slurm | | [OpenSees](./OpenSees) | OpenSees container with steps for building and running via Apptainer | +| [Quantum ESPRESSO](./Quantum_ESPRESSO) | Example Slurm scripts to run the nvidia Quantum ESPRESSO container with Apptainer | | [SAS](./sas) | Guide for running SAS using Apptainer via Slurm batch script, command line, and GUI access | | [Seurat](./seurat) | Seurat container with example scRNA analysis | | [VASP](./vasp) | Example VASP container with steps for building and running via Apptainer | From fd2a9795ca77ba42c4c182695f2714beaa79136b Mon Sep 17 00:00:00 2001 From: Tony Kew Date: Wed, 4 Feb 2026 11:34:38 -0500 Subject: [PATCH 2/6] Performance improvement for multi node jobs For 2 node jobs, added the "wfcdir" parameter to the input file. This change uses ${SLURMTMPDIR} to store per process files on local scratch rather than global scratch Tony --- .../quantum_espresso_1_GPU_2_nodes.bash | 17 +++++++++++++++++ .../quantum_espresso_2_GPU_2_nodes.bash | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash index 6fbd335..316b5dd 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash @@ -145,9 +145,26 @@ cd "benchmarks/AUSURF112" OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" echo "OUTFILE=${OUTFILE}" +## Set the output directory "outdir" to the Global Scratch directory "${GS}" mkdir -p "${GS}" sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" +## Optional: +## +## Set "wfcdir" the directory to store per process files (*.wfc{N}, *.igk{N}, etc.) +## to ${SLURMTMPDIR} (local scratch on each node) +## +## Note: You probably don't want to do this if you are planning to use "restart" +## or you need to perform further calculations using these files +if grep -E -q '^[[:space:]]*wfcdir([[:space:]]|=)' "${INFILE}" +then + # modify "wfcdir" setting + sed -E -i "/^[[:space:]]*wfcdir/s|^([[:space:]]*).*$|\1wfcdir = '${SLURMTMPDIR}'|" "${INFILE}" +else + # add "wfcdir" setting + sed -E -i "/^[[:space:]]*outdir/a \ wfcdir = '${SLURMTMPDIR}'" "${INFILE}" +fi + ## There are several options to save data file and the charge density files to ## disk - in this case the files will be written to the scratch space defined above ## Generally, the more data written to disk, the lower the RAM requiremenets diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash index 8e93325..44e85ca 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash @@ -143,9 +143,26 @@ cd "benchmarks/AUSURF112" OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" echo "OUTFILE=${OUTFILE}" +## Set the output directory "outdir" to the Global Scratch directory "${GS}" mkdir -p "${GS}" sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" +## Optional: +## +## Set "wfcdir" the directory to store per process files (*.wfc{N}, *.igk{N}, etc.) +## to ${SLURMTMPDIR} (local scratch on each node) +## +## Note: You probably don't want to do this if you are planning to use "restart" +## or you need to perform further calculations using these files +if grep -E -q '^[[:space:]]*wfcdir([[:space:]]|=)' "${INFILE}" +then + # modify "wfcdir" setting + sed -E -i "/^[[:space:]]*wfcdir/s|^([[:space:]]*).*$|\1wfcdir = '${SLURMTMPDIR}'|" "${INFILE}" +else + # add "wfcdir" setting + sed -E -i "/^[[:space:]]*outdir/a \ wfcdir = '${SLURMTMPDIR}'" "${INFILE}" +fi + ## There are several options to save data file and the charge density files to ## disk - in this case the files will be written to the scratch space defined above ## Generally, the more data written to disk, the lower the RAM requiremenets From 25505dc6a9817efbb0ed36f26a3bf8b0bdc02bd0 Mon Sep 17 00:00:00 2001 From: Tony Kew Date: Wed, 4 Feb 2026 14:27:29 -0500 Subject: [PATCH 3/6] Pagination and layout fixes Tony --- .../Quantum_ESPRESSO/README.md | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md index 363a016..5824763 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md @@ -10,16 +10,16 @@ These Slurm scripts are templates. You will have to provide cluster, partition, qos, account information, and possibly "cpus-per-task" depending on the nodes you wish to use. -There are many "--constraint=" line examples in each Slurm script. -There must be only one "--constraint=" line starting with "#SBATCH" +There are many "--constraint=" line examples in each Slurm script. +There must be only one "--constraint=" line starting with "#SBATCH" If you use multiple "#SBATCH --constraint=" lines, only the last one will be used to constrain the job. e.g. -> ``` -> #SBATCH --constraint="EMERALD-RAPIDS-IB&H100" -> ``` +``` +#SBATCH --constraint="EMERALD-RAPIDS-IB&H100" +``` This example requests an H100 GPU node connected to the "EMERALD-RAPIDS-IB" Infiniband network." @@ -27,15 +27,15 @@ Infiniband network." For the single node examples, update the "BASE_DIR=" line: -> ``` -> BASE_DIR="/projects/academic/[CCRgroupname]/QE" -> ``` +``` +BASE_DIR="/projects/academic/[CCRgroupname]/QE" +``` For the multi node examples also update the "GS" Global Scratch line: -> ``` -> GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" -> ``` +``` +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" +``` ## Quantum ESPRESSO Slurm script examples From c6dd7e8c7b8ae711fae6b6acef61737ec03861f9 Mon Sep 17 00:00:00 2001 From: Tony Kew Date: Wed, 4 Feb 2026 14:31:06 -0500 Subject: [PATCH 4/6] Clarify text to replace Tony --- containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md index 5824763..2e7cb57 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md @@ -25,13 +25,14 @@ This example requests an H100 GPU node connected to the "EMERALD-RAPIDS-IB" Infiniband network." -For the single node examples, update the "BASE_DIR=" line: +For the single node examples, replace "[CCRgroupname]" in the "BASE_DIR=" line: ``` BASE_DIR="/projects/academic/[CCRgroupname]/QE" ``` -For the multi node examples also update the "GS" Global Scratch line: +For the multi node examples ALSO replace "[CCRgroupname]" in the +"GS" Global Scratch line: ``` GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" From 12a16d3c7bd1ac8ed64f6a419dca1000091b7a66 Mon Sep 17 00:00:00 2001 From: Tony Kew Date: Wed, 4 Feb 2026 17:10:41 -0500 Subject: [PATCH 5/6] Added ARM64 example scripts Fixed some layout issues All script work under both ccrsoft/2023.01 and ccrsoft/2024.4 Tony --- .../Quantum_ESPRESSO/README.md | 17 ++- .../quantum_espresso_1_GPU_1_node.bash | 3 +- .../quantum_espresso_1_GPU_1_node_ARM64.bash | 115 ++++++++++++++ .../quantum_espresso_1_GPU_2_nodes.bash | 2 - .../quantum_espresso_1_GPU_2_nodes_ARM64.bash | 140 ++++++++++++++++++ .../quantum_espresso_2_GPU_1_node.bash | 5 +- .../quantum_espresso_2_GPU_2_nodes.bash | 2 - 7 files changed, 273 insertions(+), 11 deletions(-) create mode 100644 containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash create mode 100644 containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md index 2e7cb57..d81f5f5 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md @@ -40,16 +40,29 @@ GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" ## Quantum ESPRESSO Slurm script examples -One node scripts: +### One node scripts + +x86_64 [QE 1 GPU 1 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash) [QE 2 GPUs 1 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash) -Two node scripts: +ARM64 + +[QE 1 GPU 1 ARM64 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash) + + +### Two node scripts: + +x86_64 [QE 1 GPU 2 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash) [QE 2 GPU 2 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash) +ARM64 + +[QE 1 GPU 2 ARM64 node Slurm Script](https://raw.githubusercontent.com/ubccr/ccr-examples/refs/heads/main/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash) + See the [Quantum ESPRESSO website](https://www.quantum-espresso.org) and [Quantum ESPRESSO Documentation](https://www.quantum-espresso.org/documentation/) for more information on Quantum ESPRESSO. For more info on the container image, see the [nvidia Quantum ESPRESSO container page](https://catalog.ngc.nvidia.com/orgs/hpc/containers/quantum_espresso) on their NGC Catalogue website. diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash index 00b0372..21fa0c9 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash @@ -5,8 +5,6 @@ ## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md ## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs -## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release - ## Select a cluster, partition, qos and account that is appropriate for your use case ## Available options and more details are provided in CCR's documentation: ## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos @@ -99,6 +97,7 @@ echo "OUTFILE=${OUTFILE}" ## Use ${SLURMTMPDIR} for run files sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" +sed -E -i "/^[[:space:]]*wfcdir([[:space:]]|=)/d" "${INFILE}" ## There are several options to save data file and the charge density files to ## disk - in this case the files will be written to the scratch space defined above diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash new file mode 100644 index 0000000..abd0738 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash @@ -0,0 +1,115 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## Select an account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --cluster="ub-hpc" +#SBATCH --partition="arm64" +#SBATCH --qos="arm64" +#SBATCH --export=HOME,TERM,SHELL +#SBATCH --constraint="GH200" +#SBATCH --time=01:00:00 +#SBATCH --nodes=1 +#SBATCH --gpus-per-node=1 +## One MPI task per GPU +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## BASE_DIR == directory with the Quantum ESPRESSO container image +BASE_DIR="/projects/academic/[CCRgroupname]/QE" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${BASE_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## report the GPU in the job +nvidia-smi -L +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +TIMESTAMP="$(date "+%F_%T")" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +## Use ${SLURMTMPDIR} for run files +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" +sed -E -i "/^[[:space:]]*wfcdir([[:space:]]|=)/d" "${INFILE}" + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${BASE_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash index 316b5dd..3361bb0 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash @@ -5,8 +5,6 @@ ## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md ## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs -## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release - ## Select a cluster, partition, qos and account that is appropriate for your use case ## Available options and more details are provided in CCR's documentation: ## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash new file mode 100644 index 0000000..a5ade00 --- /dev/null +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash @@ -0,0 +1,140 @@ +#!/bin/bash -l + +## This file is intended to serve as a template to be downloaded and modified for your use case. +## For more information, refer to the following resources whenever referenced in the script- +## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md +## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs + +## Select an account that is appropriate for your use case +## Available options and more details are provided in CCR's documentation: +## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos +#SBATCH --account="[SlurmAccountName]" + +#SBATCH --cluster="ub-hpc" +#SBATCH --partition="arm64" +#SBATCH --qos="arm64" +#SBATCH --export=HOME,TERM,SHELL +#SBATCH --constraint="GH200" +#SBATCH --time=01:00:00 +#SBATCH --nodes=2 +#SBATCH --gpus-per-node=1 +## One MPI task per GPU on each node +#SBATCH --ntasks-per-node=1 +#SBATCH --cpus-per-task=40 +## Note: Use "--exclusive" for shared memory/shared namespace with apptainer +#SBATCH --exclusive + +## BASE_DIR == directory with the Quantum ESPRESSO container image +BASE_DIR="/projects/academic/[CCRgroupname]/QE" + +TIMESTAMP="$(date "+%F_%T")" + +## use Global Scratch for run files +GS="/vscratch/[CCRgroupname]/QE/${TIMESTAMP}" + +qe_version="7.3.1" + + +qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" +container_image="quantum_espresso-${qe_version}-$(arch).sif" +## Fetch the Quantum ESPRESSO container, if necessary +pushd "${BASE_DIR}" > /dev/null +if ! test -f "${container_image}" +then + apptainer pull "${container_image}" "${qe_url}" +fi +popd > /dev/null + +## Use the OpenMPI UCX Point-to-point Messaging Layer +export OMPI_MCA_pml=ucx + +## requerted MPIx environment variables for authentication (or srun can fail) +export PMIX_MCA_psec=native && export PMIX_MCA_gds=hash + +## report the GPUs in the job +srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- bash -c 'printf "hostname: %s\n%s\n\n" "$(hostname -s)" "$(nvidia-smi -L)"' +echo + +export OMP_NUM_THREADS=${SLURM_CPUS_PER_TASK} + +BASE=ausurf +INFILE=${BASE}.in + +## Get the test files if necessary... +if ! [ -f "benchmarks/AUSURF112/${INFILE}" ] +then + git clone "https://github.com/QEF/benchmarks.git" +fi + +cd "benchmarks/AUSURF112" + +OUTFILE="${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.out" +echo "OUTFILE=${OUTFILE}" + +## Set the output directory "outdir" to the Global Scratch directory "${GS}" +mkdir -p "${GS}" +sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${GS}'|" "${INFILE}" + +## Optional: +## +## Set "wfcdir" the directory to store per process files (*.wfc{N}, *.igk{N}, etc.) +## to ${SLURMTMPDIR} (local scratch on each node) +## +## Note: You probably don't want to do this if you are planning to use "restart" +## or you need to perform further calculations using these files +if grep -E -q '^[[:space:]]*wfcdir([[:space:]]|=)' "${INFILE}" +then + # modify "wfcdir" setting + sed -E -i "/^[[:space:]]*wfcdir/s|^([[:space:]]*).*$|\1wfcdir = '${SLURMTMPDIR}'|" "${INFILE}" +else + # add "wfcdir" setting + sed -E -i "/^[[:space:]]*outdir/a \ wfcdir = '${SLURMTMPDIR}'" "${INFILE}" +fi + +## There are several options to save data file and the charge density files to +## disk - in this case the files will be written to the scratch space defined above +## Generally, the more data written to disk, the lower the RAM requiremenets +## +## see: +## https://www.quantum-espresso.org/Doc/INPUT_PW.html#id20 +## +## This problem requires about 21000 MiB of GPU ram to run in GPU memory +## Using the disk_io = 'high' will reduce this requirement, but the job still +## fails in our tests on a sinlge GPU with ~15000 MiB of GPU ram +## +total_gpu_memory="$(expr $(echo $(srun --export=ALL --ntasks-per-node=1 --nodes="${SLURM_JOB_NUM_NODES}" -- nvidia-smi --query-gpu=memory.total --format=csv,noheader | awk '{print $1, "+"}') 0))" +if [ ${total_gpu_memory} -lt 21000 ] +then + # Set disk_io to "high" to minimise needed GPU RAM + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'high'|" "${INFILE}" + echo "Warning: This job requires about 21000 MiB of GPU RAM and will likley fail" >&2 + echo " with a cuMemAlloc Out of memory error" >&2 +else + # Set "disk_io" to the default setting of "low" + sed -E -i "/^[[:space:]]*disk_io/s|^([[:space:]]*).*$|\1disk_io = 'low'|" "${INFILE}" +fi + +## Run Quantum ESPRESSO +srun --mpi=pmix \ + --export=ALL \ + apptainer exec \ + -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ + --sharens \ + --nv \ + "${BASE_DIR}/${container_image}" \ + pw.x -in "${INFILE}" > "${OUTFILE}" + +## Optional: +## If the "disk_io" optoin is set to anything other than "none" a .save +## directory is created - move this directory from scratch space +if test -d "${SLURMTMPDIR}/${BASE}.save" +then + mv "${SLURMTMPDIR}/${BASE}.save" "${SLURM_SUBMIT_DIR}/${BASE}_${TIMESTAMP}.save" +fi + +## Cleanup - Remove run files +if [ -d "${GS}" ] +then + rm -rf "${GS}" +fi + diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash index 2c01be4..715a713 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash @@ -5,8 +5,6 @@ ## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md ## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs -## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release - ## Select a cluster, partition, qos and account that is appropriate for your use case ## Available options and more details are provided in CCR's documentation: ## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos @@ -67,7 +65,7 @@ then fi popd > /dev/null -## report the GPU in the job +## report the GPUs in the job nvidia-smi -L echo @@ -91,6 +89,7 @@ echo "OUTFILE=${OUTFILE}" ## Use ${SLURMTMPDIR} for run files sed -E -i "/^[[:space:]]*outdir/s|^([[:space:]]*).*$|\1outdir = '${SLURMTMPDIR}'|" "${INFILE}" +sed -E -i "/^[[:space:]]*wfcdir([[:space:]]|=)/d" "${INFILE}" ## There are several options to save data file and the charge density files to ## disk - in this case the files will be written to the scratch space defined above diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash index 44e85ca..9cd18f0 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash @@ -5,8 +5,6 @@ ## README- https://github.com/ubccr/ccr-examples/tree/main/slurm/README.md ## DOCUMENTATION- https://docs.ccr.buffalo.edu/en/latest/hpc/jobs -## NOTE: This Slurm script was tested with the ccrsoft/2024.04 software release - ## Select a cluster, partition, qos and account that is appropriate for your use case ## Available options and more details are provided in CCR's documentation: ## https://docs.ccr.buffalo.edu/en/latest/hpc/jobs/#slurm-directives-partitions-qos From 722db8d0b4e1a72034b360f7cd09c1512483b10c Mon Sep 17 00:00:00 2001 From: Tony Kew Date: Fri, 6 Feb 2026 09:09:05 -0500 Subject: [PATCH 6/6] Renamed variable for clarity Tony --- .../2_ApplicationSpecific/Quantum_ESPRESSO/README.md | 4 ++-- .../Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash | 8 ++++---- .../quantum_espresso_1_GPU_1_node_ARM64.bash | 8 ++++---- .../Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash | 8 ++++---- .../quantum_espresso_1_GPU_2_nodes_ARM64.bash | 8 ++++---- .../Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash | 8 ++++---- .../Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash | 8 ++++---- 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md index d81f5f5..48b26cf 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/README.md @@ -25,10 +25,10 @@ This example requests an H100 GPU node connected to the "EMERALD-RAPIDS-IB" Infiniband network." -For the single node examples, replace "[CCRgroupname]" in the "BASE_DIR=" line: +For the single node examples, replace "[CCRgroupname]" in the "CONTAINER_DIR=" line: ``` -BASE_DIR="/projects/academic/[CCRgroupname]/QE" +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" ``` For the multi node examples ALSO replace "[CCRgroupname]" in the diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash index 21fa0c9..bd822a0 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node.bash @@ -57,8 +57,8 @@ ## Note: Use "--exclusive" for shared memory/shared namespace with apptainer #SBATCH --exclusive -## BASE_DIR == directory with the Quantum ESPRESSO container image -BASE_DIR="/projects/academic/[CCRgroupname]/QE" +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" qe_version="7.3.1" @@ -66,7 +66,7 @@ qe_version="7.3.1" qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" container_image="quantum_espresso-${qe_version}-$(arch).sif" ## Fetch the Quantum ESPRESSO container, if necessary -pushd "${BASE_DIR}" > /dev/null +pushd "${CONTAINER_DIR}" > /dev/null if ! test -f "${container_image}" then apptainer pull "${container_image}" "${qe_url}" @@ -135,7 +135,7 @@ srun --mpi=pmix \ -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ --sharens \ --nv \ - "${BASE_DIR}/${container_image}" \ + "${CONTAINER_DIR}/${container_image}" \ pw.x -in "${INFILE}" > "${OUTFILE}" ## Optional: diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash index abd0738..f1aca75 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_1_node_ARM64.bash @@ -24,8 +24,8 @@ ## Note: Use "--exclusive" for shared memory/shared namespace with apptainer #SBATCH --exclusive -## BASE_DIR == directory with the Quantum ESPRESSO container image -BASE_DIR="/projects/academic/[CCRgroupname]/QE" +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" qe_version="7.3.1" @@ -33,7 +33,7 @@ qe_version="7.3.1" qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" container_image="quantum_espresso-${qe_version}-$(arch).sif" ## Fetch the Quantum ESPRESSO container, if necessary -pushd "${BASE_DIR}" > /dev/null +pushd "${CONTAINER_DIR}" > /dev/null if ! test -f "${container_image}" then apptainer pull "${container_image}" "${qe_url}" @@ -102,7 +102,7 @@ srun --mpi=pmix \ -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ --sharens \ --nv \ - "${BASE_DIR}/${container_image}" \ + "${CONTAINER_DIR}/${container_image}" \ pw.x -in "${INFILE}" > "${OUTFILE}" ## Optional: diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash index 3361bb0..fe5bf87 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes.bash @@ -96,8 +96,8 @@ ## Note: Use "--exclusive" for shared memory/shared namespace with apptainer #SBATCH --exclusive -## BASE_DIR == directory with the Quantum ESPRESSO container image -BASE_DIR="/projects/academic/[CCRgroupname]/QE" +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" TIMESTAMP="$(date "+%F_%T")" @@ -110,7 +110,7 @@ qe_version="7.3.1" qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" container_image="quantum_espresso-${qe_version}-$(arch).sif" ## Fetch the Quantum ESPRESSO container, if necessary -pushd "${BASE_DIR}" > /dev/null +pushd "${CONTAINER_DIR}" > /dev/null if ! test -f "${container_image}" then apptainer pull "${container_image}" "${qe_url}" @@ -193,7 +193,7 @@ srun --mpi=pmix \ -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ --sharens \ --nv \ - "${BASE_DIR}/${container_image}" \ + "${CONTAINER_DIR}/${container_image}" \ pw.x -in "${INFILE}" > "${OUTFILE}" ## Optional: diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash index a5ade00..ea6d84c 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_1_GPU_2_nodes_ARM64.bash @@ -24,8 +24,8 @@ ## Note: Use "--exclusive" for shared memory/shared namespace with apptainer #SBATCH --exclusive -## BASE_DIR == directory with the Quantum ESPRESSO container image -BASE_DIR="/projects/academic/[CCRgroupname]/QE" +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" TIMESTAMP="$(date "+%F_%T")" @@ -38,7 +38,7 @@ qe_version="7.3.1" qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" container_image="quantum_espresso-${qe_version}-$(arch).sif" ## Fetch the Quantum ESPRESSO container, if necessary -pushd "${BASE_DIR}" > /dev/null +pushd "${CONTAINER_DIR}" > /dev/null if ! test -f "${container_image}" then apptainer pull "${container_image}" "${qe_url}" @@ -121,7 +121,7 @@ srun --mpi=pmix \ -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ --sharens \ --nv \ - "${BASE_DIR}/${container_image}" \ + "${CONTAINER_DIR}/${container_image}" \ pw.x -in "${INFILE}" > "${OUTFILE}" ## Optional: diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash index 715a713..945c52d 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_1_node.bash @@ -49,8 +49,8 @@ ## Note: Use "--exclusive" for shared memory/shared namespace with apptainer #SBATCH --exclusive -## BASE_DIR == directory with the Quantum ESPRESSO container image -BASE_DIR="/projects/academic/[CCRgroupname]/QE" +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" qe_version="7.3.1" @@ -58,7 +58,7 @@ qe_version="7.3.1" qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" container_image="quantum_espresso-${qe_version}-$(arch).sif" ## Fetch the Quantum ESPRESSO container, if necessary -pushd "${BASE_DIR}" > /dev/null +pushd "${CONTAINER_DIR}" > /dev/null if ! test -f "${container_image}" then apptainer pull "${container_image}" "${qe_url}" @@ -127,7 +127,7 @@ srun --mpi=pmix \ -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ --sharens \ --nv \ - "${BASE_DIR}/${container_image}" \ + "${CONTAINER_DIR}/${container_image}" \ pw.x -in "${INFILE}" > "${OUTFILE}" ## Optional: diff --git a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash index 9cd18f0..3e1a352 100644 --- a/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash +++ b/containers/2_ApplicationSpecific/Quantum_ESPRESSO/quantum_espresso_2_GPU_2_nodes.bash @@ -94,8 +94,8 @@ ## Note: Use "--exclusive" for shared memory/shared namespace with apptainer #SBATCH --exclusive -## BASE_DIR == directory with the Quantum ESPRESSO container image -BASE_DIR="/projects/academic/[CCRgroupname]/QE" +## CONTAINER_DIR == directory with the Quantum ESPRESSO container image +CONTAINER_DIR="/projects/academic/[CCRgroupname]/QE" TIMESTAMP="$(date "+%F_%T")" @@ -108,7 +108,7 @@ qe_version="7.3.1" qe_url="docker://nvcr.io/hpc/quantum_espresso:qe-${qe_version}" container_image="quantum_espresso-${qe_version}-$(arch).sif" ## Fetch the Quantum ESPRESSO container, if necessary -pushd "${BASE_DIR}" > /dev/null +pushd "${CONTAINER_DIR}" > /dev/null if ! test -f "${container_image}" then apptainer pull "${container_image}" "${qe_url}" @@ -191,7 +191,7 @@ srun --mpi=pmix \ -B /projects:/projects,/scratch:/scratch,/util:/util,/vscratch:/vscratch \ --sharens \ --nv \ - "${BASE_DIR}/${container_image}" \ + "${CONTAINER_DIR}/${container_image}" \ pw.x -in "${INFILE}" > "${OUTFILE}" ## Optional: