From 29a1067ce78cccc97fda04e524b55b3127950557 Mon Sep 17 00:00:00 2001 From: Wesley Mason Date: Tue, 27 Jan 2026 04:59:15 -0700 Subject: [PATCH 1/5] Fix Slurmrestd Version Inference (#1168) * Updated get_slurmrestd_version function to query api_version with new syntax * Updated job status query in SlurmrestdWorker * Updated test_slurm_worker.py with new slurmrestd syntax --- beeflow/client/core.py | 2 +- beeflow/common/worker/slurm_worker.py | 2 +- beeflow/common/worker/utils.py | 8 ++++---- beeflow/tests/test_slurm_worker.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/beeflow/client/core.py b/beeflow/client/core.py index 7bc2152a8..c5df9c59b 100644 --- a/beeflow/client/core.py +++ b/beeflow/client/core.py @@ -279,7 +279,7 @@ def start_slurm_restd(): slurmrestd_log = '/'.join([bee_workdir, 'logs', 'restd.log']) openapi_version = worker_utils.get_slurmrestd_version() print(f"Inferred slurmrestd version: {openapi_version}") - slurm_args = f'-s openapi/{openapi_version}' + slurm_args = f'-d {openapi_version} -s openapi/slurmctld' # The following adds the db plugin we opted not to use for now # slurm_args = f'-s openapi/{openapi_version},openapi/db{openapi_version}' slurm_socket = paths.slurm_socket() diff --git a/beeflow/common/worker/slurm_worker.py b/beeflow/common/worker/slurm_worker.py index 9fddd130b..064379c62 100644 --- a/beeflow/common/worker/slurm_worker.py +++ b/beeflow/common/worker/slurm_worker.py @@ -233,7 +233,7 @@ def query_task(self,job_id): check_slurm_error(data, f'Failed to query job {job_id}, slurm error.') # For some versions of slurm, the job_state isn't included on failure try: - job_state = data['jobs'][0]['job_state'] + job_state = data['jobs'][0]['job_state'][0] job_info = deepcopy(data['jobs'][0]) except (KeyError, IndexError) as exc: diff --git a/beeflow/common/worker/utils.py b/beeflow/common/worker/utils.py index 44d8814ed..c30a5f597 100644 --- a/beeflow/common/worker/utils.py +++ b/beeflow/common/worker/utils.py @@ -40,14 +40,14 @@ def parse_key_val(pair): def get_slurmrestd_version(): """Get the newest slurmrestd version.""" - resp = subprocess.run(["slurmrestd", "-s", "list"], check=True, stderr=subprocess.PIPE, - text=True).stderr + resp = subprocess.run(["slurmrestd", "-d", "list"], check=True, stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, text=True).stdout resp = resp.split("\n") # Confirm slurmrestd format is the same # If the slurmrestd list outputs has changed potentially something else has broken - if "Possible OpenAPI plugins" not in resp[0]: + if "Possible data_parser plugins" not in resp[0]: print("Slurmrestd OpenAPI format has changed and things may break") - api_versions = [line.split('/')[1] for line in resp[1:] if re.search(r"openapi/v\d+\.\d+\.\d+", + api_versions = [line.split('/')[1] for line in resp[1:] if re.search(r"data_parser/v\d+\.\d+\.\d+", line)] # Sort the versions and grab the newest one newest_api = sorted(api_versions, key=Version, reverse=True)[0] diff --git a/beeflow/tests/test_slurm_worker.py b/beeflow/tests/test_slurm_worker.py index 13dc332e0..34e6ed85b 100644 --- a/beeflow/tests/test_slurm_worker.py +++ b/beeflow/tests/test_slurm_worker.py @@ -56,7 +56,7 @@ def slurm_worker(request): bee_workdir = os.path.expanduser(f'/tmp/{uuid.uuid4().hex}.tmp') os.mkdir(bee_workdir) openapi_version = worker_utils.get_slurmrestd_version() - proc = subprocess.Popen(f'slurmrestd -s openapi/{openapi_version} unix:{slurm_socket}', + proc = subprocess.Popen(f'slurmrestd -d {openapi_version} -s openapi/slurmctld unix:{slurm_socket}', shell=True) time.sleep(1) worker_iface = WorkerInterface(worker=SlurmWorker, container_runtime='Charliecloud', From e0786115eb2fd42a58bd3ecdc13624c278736b7f Mon Sep 17 00:00:00 2001 From: Wesley Mason Date: Tue, 27 Jan 2026 05:23:21 -0700 Subject: [PATCH 2/5] Fix linting issue in beeflow/common/worker/utils.py --- beeflow/common/worker/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/beeflow/common/worker/utils.py b/beeflow/common/worker/utils.py index c30a5f597..7c166c9ec 100644 --- a/beeflow/common/worker/utils.py +++ b/beeflow/common/worker/utils.py @@ -47,8 +47,8 @@ def get_slurmrestd_version(): # If the slurmrestd list outputs has changed potentially something else has broken if "Possible data_parser plugins" not in resp[0]: print("Slurmrestd OpenAPI format has changed and things may break") - api_versions = [line.split('/')[1] for line in resp[1:] if re.search(r"data_parser/v\d+\.\d+\.\d+", - line)] + api_versions = [line.split('/')[1] for line in resp[1:] if + re.search(r"data_parser/v\d+\.\d+\.\d+", line)] # Sort the versions and grab the newest one newest_api = sorted(api_versions, key=Version, reverse=True)[0] return newest_api From b25bf2d6cf19b6aaf9a9a58276e08d454be95208 Mon Sep 17 00:00:00 2001 From: Wesley Mason Date: Tue, 27 Jan 2026 05:29:04 -0700 Subject: [PATCH 3/5] Fix linting issue in beeflow/common/worker/utils.py (trailing whitespace) --- beeflow/common/worker/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beeflow/common/worker/utils.py b/beeflow/common/worker/utils.py index 7c166c9ec..df2c9fbdf 100644 --- a/beeflow/common/worker/utils.py +++ b/beeflow/common/worker/utils.py @@ -47,7 +47,7 @@ def get_slurmrestd_version(): # If the slurmrestd list outputs has changed potentially something else has broken if "Possible data_parser plugins" not in resp[0]: print("Slurmrestd OpenAPI format has changed and things may break") - api_versions = [line.split('/')[1] for line in resp[1:] if + api_versions = [line.split('/')[1] for line in resp[1:] if re.search(r"data_parser/v\d+\.\d+\.\d+", line)] # Sort the versions and grab the newest one newest_api = sorted(api_versions, key=Version, reverse=True)[0] From af5be67c2ebddfc15fd8d0e9df2ee064d5d435a8 Mon Sep 17 00:00:00 2001 From: Wesley Mason Date: Tue, 27 Jan 2026 09:57:43 -0700 Subject: [PATCH 4/5] updating testing-coverage.yml to use ubuntu-latest --- .github/workflows/testing-coverage.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/testing-coverage.yml b/.github/workflows/testing-coverage.yml index 0d68a93ec..4fb78e257 100644 --- a/.github/workflows/testing-coverage.yml +++ b/.github/workflows/testing-coverage.yml @@ -23,7 +23,7 @@ jobs: BEE_WORKER: ${{ matrix.bee_worker }} # Note: Needs to run on 22.04 or later since slurmrestd doesn't seem to be # available on 20.04 - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install and Configure @@ -53,7 +53,7 @@ jobs: BEE_WORKER: Slurmrestd # Note: Needs to run on 22.04 or later since slurmrestd doesn't seem to be # available on 20.04 - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Install and Configure @@ -78,7 +78,7 @@ jobs: coverage: needs: [integration-test, unit-test] name: Coverage - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 From c02e437399c5b07fa23a697ab8a9ce3bd12bff7a Mon Sep 17 00:00:00 2001 From: Wesley Mason Date: Wed, 28 Jan 2026 12:49:54 -0700 Subject: [PATCH 5/5] Update CI for GitHub workflows; build slurm 24.11.7 from source in Ubuntu 22.04 container --- .github/workflows/testing-coverage.yml | 6 +++--- ci/deps_install.sh | 12 ++++++++++-- ci/env.sh | 2 ++ ci/slurm_start.sh | 10 +++++----- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/.github/workflows/testing-coverage.yml b/.github/workflows/testing-coverage.yml index 4fb78e257..0d68a93ec 100644 --- a/.github/workflows/testing-coverage.yml +++ b/.github/workflows/testing-coverage.yml @@ -23,7 +23,7 @@ jobs: BEE_WORKER: ${{ matrix.bee_worker }} # Note: Needs to run on 22.04 or later since slurmrestd doesn't seem to be # available on 20.04 - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 - name: Install and Configure @@ -53,7 +53,7 @@ jobs: BEE_WORKER: Slurmrestd # Note: Needs to run on 22.04 or later since slurmrestd doesn't seem to be # available on 20.04 - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 - name: Install and Configure @@ -78,7 +78,7 @@ jobs: coverage: needs: [integration-test, unit-test] name: Coverage - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v4 diff --git a/ci/deps_install.sh b/ci/deps_install.sh index a4f9d5d9b..c4e02119b 100755 --- a/ci/deps_install.sh +++ b/ci/deps_install.sh @@ -3,7 +3,7 @@ set -e sudo apt-get update -sudo apt-get install -y slurmctld slurmd slurmrestd munge python3 python3-venv \ +sudo apt-get install -y libhttp-parser-dev libjson-c-dev libjwt-dev munge python3 python3-venv \ curl build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev \ libssl-dev libsqlite3-dev libreadline-dev libffi-dev libbz2-dev \ libmunge-dev \ @@ -16,7 +16,15 @@ curl -O -L https://github.com/hpc/charliecloud/releases/download/v${CHARLIECLOUD tar -xvf charliecloud-${CHARLIECLOUD_VERSION}.tar.gz (cd charliecloud-${CHARLIECLOUD_VERSION} ./configure --prefix=/usr - make + make -j4 + sudo make install) + +# Install Slurm +curl -O -L https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2 +tar -xvf slurm-${SLURM_VERSION}.tar.bz2 +(cd slurm-${SLURM_VERSION} + ./configure --prefix=/usr + make -j4 sudo make install) # Install Python3 diff --git a/ci/env.sh b/ci/env.sh index 8a725fab4..fbcf2e91a 100644 --- a/ci/env.sh +++ b/ci/env.sh @@ -12,6 +12,8 @@ mkdir -p $SLURMD_SPOOL_DIR $SLURM_STATE_SAVE_LOCATION $LOG_DIR export SLURMCTLD_LOG=$LOG_DIR/slurmctld.log export SLURMD_LOG=$LOG_DIR/slurmd.log export SLURM_USER=`whoami` +export SLURMRESTD_SECURITY=disable_user_check +export SLURM_VERSION=24.11.7 export MUNGE_SOCKET=/tmp/munge.sock export MUNGE_LOG=/tmp/munge.log export MUNGE_PID=/tmp/munge.pid diff --git a/ci/slurm_start.sh b/ci/slurm_start.sh index 6221277d5..c2c81cba0 100755 --- a/ci/slurm_start.sh +++ b/ci/slurm_start.sh @@ -3,10 +3,6 @@ . ./ci/env.sh -printf "#### SLURM VERSION ####\n" -srun -V -printf "#######################\n" - # Determine config of CI host export NODE_CONFIG=`slurmd -C | head -n 1` @@ -55,6 +51,10 @@ $NODE_CONFIG PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP EOF +printf "#### SLURM VERSION ####\n" +srun -V +printf "#######################\n" + printf "\n\n" printf "#### slurm.conf ####\n" cat $SLURM_CONF @@ -82,5 +82,5 @@ srun --mpi=list printf "#######################\n" printf "\n" printf "#### OPENAPI VERSIONS ####\n" -slurmrestd -s list +slurmrestd -d list printf "##########################\n"