From 65f9c599e67dd3b31b02bb6bde79e77186b8292f Mon Sep 17 00:00:00 2001 From: "Benjamin T. Liu" Date: Mon, 26 Jan 2026 16:23:42 -0800 Subject: [PATCH 1/2] Prevent failure when running without same_node on the login node; same_node should raise an exception later if running on the login node --- ats/atsMachines/fluxScheduled.py | 12 ++++++++++-- ats/atsMachines/slurmProcessorScheduled.py | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/ats/atsMachines/fluxScheduled.py b/ats/atsMachines/fluxScheduled.py index a1492c9..90321d7 100755 --- a/ats/atsMachines/fluxScheduled.py +++ b/ats/atsMachines/fluxScheduled.py @@ -104,8 +104,16 @@ def init(self): log(("DEBUG: FluxScheduled init : self.numNodesAvailable =%i" % (self.numNodesAvailable)), echo=True) log(("DEBUG: FluxScheduled init : self.numGPUsAvailable =%i" % (self.numGPUs)), echo=True) - # Call get_physical_node to cache the hardware node listing before starting jobs - self.get_physical_node(0) + # Call get_physical_node to cache the hardware node listing before starting jobs. + # This is required for the same_node functionality. + try: + self.get_physical_node(0) + except RuntimeError: + # If you are not in an allocation, an exception will be thrown. + # We ignore the exception here and allow _cached_nodes to be None. + # If you are not using same_node, this is fine. If you are using same_node, + # it should throw an exception when setting up the command list. + pass def expand_nodelist(self, nodelist_field): """ diff --git a/ats/atsMachines/slurmProcessorScheduled.py b/ats/atsMachines/slurmProcessorScheduled.py index 7eceec5..91c4ee7 100644 --- a/ats/atsMachines/slurmProcessorScheduled.py +++ b/ats/atsMachines/slurmProcessorScheduled.py @@ -77,8 +77,16 @@ def init(self): super(SlurmProcessorScheduled, self).init() - # Call get_physical_node to cache the hardware node listing before starting jobs - self.get_physical_node(0) + # Call get_physical_node to cache the hardware node listing before starting jobs. + # This is required for the same_node functionality. + try: + self.get_physical_node(0) + except RuntimeError: + # If you are not in an allocation, an exception will be thrown. + # We ignore the exception here and allow _cached_nodes to be None. + # If you are not using same_node, this is fine. If you are using same_node, + # it should throw an exception when setting up the command list. + pass def expand_nodelist(self, nodelist_field): """ From 28aab0c0b20eb09ae8f02c141fe4505b86d903ac Mon Sep 17 00:00:00 2001 From: "Benjamin T. Liu" Date: Tue, 27 Jan 2026 08:29:39 -0800 Subject: [PATCH 2/2] Update error message --- ats/atsMachines/fluxScheduled.py | 2 +- ats/atsMachines/slurmProcessorScheduled.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ats/atsMachines/fluxScheduled.py b/ats/atsMachines/fluxScheduled.py index 90321d7..18754fe 100755 --- a/ats/atsMachines/fluxScheduled.py +++ b/ats/atsMachines/fluxScheduled.py @@ -153,7 +153,7 @@ def get_physical_node(self, rel_index): nodelist_field = parts[-1] break if nodelist_field is None: - raise RuntimeError("Could not find NODELIST field in flux resource list output.") + raise RuntimeError("Could not find NODELIST field in flux resource list output. Use of ATS same_node feature requires running ATS within an allocation.") FluxScheduled._cached_nodes = self.expand_nodelist(nodelist_field) log(("Info: Physical Hardware Nodes: %s" % FluxScheduled._cached_nodes), echo=True) diff --git a/ats/atsMachines/slurmProcessorScheduled.py b/ats/atsMachines/slurmProcessorScheduled.py index 91c4ee7..dca3b8e 100644 --- a/ats/atsMachines/slurmProcessorScheduled.py +++ b/ats/atsMachines/slurmProcessorScheduled.py @@ -120,7 +120,7 @@ def get_physical_node(self, rel_index): nodelist_str = os.environ.get("SLURM_JOB_NODELIST") if not nodelist_str: raise RuntimeError( - "SLURM_JOB_NODELIST is not set. Are you running inside a Slurm allocation/job?" + "SLURM_JOB_NODELIST is not set. Use of ATS same_node feature requires running ATS within an allocation." ) # Option 1: if your expand_nodelist already handles Slurm-style nodelists,