From 042fa51bae724b1a9fc534a63473e6052119f1c3 Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Tue, 4 Mar 2025 17:19:21 +0100 Subject: [PATCH 1/7] Resolve failing to submit to hortense GPU partition --- config/vsc_hortense.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index a2327879..28cbbce2 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -256,7 +256,7 @@ def command(self, job): 'general': [ { 'remote_detect': True, - 'purge_environment': True, + 'purge_environment': False, 'resolve_module_conflicts': False, # avoid loading the module before submitting the job **common_general_config() } From 52b0030b51c97b172204d28d7de5fe1f12992576 Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Tue, 4 Mar 2025 17:46:47 +0100 Subject: [PATCH 2/7] add reference to issue in config --- config/vsc_hortense.py | 1 + 1 file changed, 1 insertion(+) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 28cbbce2..a25812fd 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -256,6 +256,7 @@ def command(self, job): 'general': [ { 'remote_detect': True, + # Needs to be false see https://github.com/EESSI/test-suite/issues/242 'purge_environment': False, 'resolve_module_conflicts': False, # avoid loading the module before submitting the job **common_general_config() From 6f40c19a474757686aae8fe5154d49247055776e Mon Sep 17 00:00:00 2001 From: Lara Ramona Peeters <49882639+laraPPr@users.noreply.github.com> Date: Tue, 4 Mar 2025 17:47:21 +0100 Subject: [PATCH 3/7] Update vsc_hortense.py --- config/vsc_hortense.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index a25812fd..2388c62d 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -256,7 +256,7 @@ def command(self, job): 'general': [ { 'remote_detect': True, - # Needs to be false see https://github.com/EESSI/test-suite/issues/242 + # Purge_environment needs to be false see https://github.com/EESSI/test-suite/issues/242 'purge_environment': False, 'resolve_module_conflicts': False, # avoid loading the module before submitting the job **common_general_config() From 96e76152657c3fad61f8fd31e52a01519945e24f Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Mon, 10 Mar 2025 15:00:41 +0100 Subject: [PATCH 4/7] update the ci_config for running EESSI on Hortense --- CI/hortense_EESSI_ss/ci_config.sh | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/CI/hortense_EESSI_ss/ci_config.sh b/CI/hortense_EESSI_ss/ci_config.sh index 6ecaad39..a3e5f907 100644 --- a/CI/hortense_EESSI_ss/ci_config.sh +++ b/CI/hortense_EESSI_ss/ci_config.sh @@ -1,19 +1,21 @@ # Configurable items -if [ -z "${TEST_SUITE_PARTITION}" ]; then - echo "You have to indicate on which partition the test-suite will run on vsc-Hortense" - echo "This environment variable needs to be set TEST_SUITE_PARTITION=cpu_rome_256gb" - echo "Can only set to 'cpu_rome_256gb' untill new functionality of 'sched_options' is part of" - echo "the ReFrame release https://github.com/reframe-hpc/reframe/issues/2970" - exit 1 +if [ -z "${UNSET_MODULEPATH}" ]; then + export UNSET_MODULEPATH=False + module --force purge fi -if [ -z "${REFRAME_ARGS}" ]; then - REFRAME_ARGS="--tag CI --tag 1_core --system hortense:${TEST_SUITE_PARTITION}" +if [[ "$TEST_SUITE_PARTITION" == "GPU" ]]; then + if [ -z "${SET_LOCAL_MODULE_ENV}"]; then + export SET_LOCAL_MODULE_ENV=True + fi + if [ -z "${LOCAL_MODULES}"]; then + export LOCAL_MODULES="env/slurm/dodrio/gpu_rome_a100" + # module use to accellorator + fi fi -if [ -z "${UNSET_MODULEPATH}" ]; then - export UNSET_MODULEPATH=False - module --force purge +if [ -z "${REFRAME_ARGS}" ]; then + REFRAME_ARGS="--tag CI --tag 1_core" fi if [ -z "${USE_EESSI_SOFTWARE_STACK}" ]; then @@ -29,5 +31,5 @@ if [ -z "${SET_LOCAL_MODULE_ENV}"]; then fi if [ -z "${LOCAL_MODULES}"]; then - export LOCAL_MODULES="env/vsc/dodrio/${TEST_SUITE_PARTITION} env/slurm/dodrio/${TEST_SUITE_PARTITION}" + export LOCAL_MODULES="env/slurm/dodrio/cpu_rome" fi From 291c136e86013f11c6bee16df004bc19265a14bd Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Thu, 17 Apr 2025 13:32:21 +0200 Subject: [PATCH 5/7] use env_vars instead of not purging the environment --- CI/hortense_EESSI_ss/ci_config.sh | 2 +- config/vsc_hortense.py | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/CI/hortense_EESSI_ss/ci_config.sh b/CI/hortense_EESSI_ss/ci_config.sh index a3e5f907..bd407515 100644 --- a/CI/hortense_EESSI_ss/ci_config.sh +++ b/CI/hortense_EESSI_ss/ci_config.sh @@ -10,7 +10,7 @@ if [[ "$TEST_SUITE_PARTITION" == "GPU" ]]; then fi if [ -z "${LOCAL_MODULES}"]; then export LOCAL_MODULES="env/slurm/dodrio/gpu_rome_a100" - # module use to accellorator + module use /cvmfs/software.eessi.io/versions/2023.06/software/linux/x86_64/amd/zen2/accel/nvidia/cc80/modules/all fi fi diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 2388c62d..67a0ad28 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -57,6 +57,7 @@ def command(self, job): 'descr': 'Hortense', 'hostnames': ['login.*.dodrio.os'], 'modules_system': 'lmod', + 'env_vars': [['SLURM_CONF','/etc/slurm/slurm.conf_dodrio']], 'partitions': [ { 'name': 'cpu_rome', @@ -145,7 +146,10 @@ def command(self, job): { 'name': 'gpu_rome_a100_40', 'scheduler': 'slurm', - 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], + 'prepare_cmds': [ + prepare_eessi_init, + common_eessi_init(), + ], 'access': hortense_access + ['--partition=gpu_rome_a100_40'], 'sched_options': { 'sched_access_in_submit': True, @@ -185,7 +189,10 @@ def command(self, job): { 'name': 'gpu_rome_a100_80', 'scheduler': 'slurm', - 'prepare_cmds': [prepare_eessi_init, common_eessi_init()], + 'prepare_cmds': [ + prepare_eessi_init, + common_eessi_init(), + ], 'access': hortense_access + ['--partition=gpu_rome_a100_80'], 'sched_options': { 'sched_access_in_submit': True, @@ -257,7 +264,7 @@ def command(self, job): { 'remote_detect': True, # Purge_environment needs to be false see https://github.com/EESSI/test-suite/issues/242 - 'purge_environment': False, + 'purge_environment': True, 'resolve_module_conflicts': False, # avoid loading the module before submitting the job **common_general_config() } From d537c4230d48278f401853a6b484305840e82908 Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Thu, 17 Apr 2025 13:35:13 +0200 Subject: [PATCH 6/7] make linter happy --- config/vsc_hortense.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index 67a0ad28..c5b7493a 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -57,7 +57,7 @@ def command(self, job): 'descr': 'Hortense', 'hostnames': ['login.*.dodrio.os'], 'modules_system': 'lmod', - 'env_vars': [['SLURM_CONF','/etc/slurm/slurm.conf_dodrio']], + 'env_vars': [['SLURM_CONF', '/etc/slurm/slurm.conf_dodrio']], 'partitions': [ { 'name': 'cpu_rome', @@ -147,7 +147,7 @@ def command(self, job): 'name': 'gpu_rome_a100_40', 'scheduler': 'slurm', 'prepare_cmds': [ - prepare_eessi_init, + prepare_eessi_init, common_eessi_init(), ], 'access': hortense_access + ['--partition=gpu_rome_a100_40'], @@ -190,7 +190,7 @@ def command(self, job): 'name': 'gpu_rome_a100_80', 'scheduler': 'slurm', 'prepare_cmds': [ - prepare_eessi_init, + prepare_eessi_init, common_eessi_init(), ], 'access': hortense_access + ['--partition=gpu_rome_a100_80'], From 5804a7a6318c76b6e666cc4562b9f63be73f6b9c Mon Sep 17 00:00:00 2001 From: vsc46128 vscuser Date: Thu, 17 Apr 2025 13:38:27 +0200 Subject: [PATCH 7/7] fix comment --- config/vsc_hortense.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/config/vsc_hortense.py b/config/vsc_hortense.py index c5b7493a..f2426675 100644 --- a/config/vsc_hortense.py +++ b/config/vsc_hortense.py @@ -57,6 +57,8 @@ def command(self, job): 'descr': 'Hortense', 'hostnames': ['login.*.dodrio.os'], 'modules_system': 'lmod', + # Need to set the environment variable to be able to submit to GPU_partitions + # see https://github.com/EESSI/test-suite/issues/242 'env_vars': [['SLURM_CONF', '/etc/slurm/slurm.conf_dodrio']], 'partitions': [ { @@ -263,7 +265,6 @@ def command(self, job): 'general': [ { 'remote_detect': True, - # Purge_environment needs to be false see https://github.com/EESSI/test-suite/issues/242 'purge_environment': True, 'resolve_module_conflicts': False, # avoid loading the module before submitting the job **common_general_config()