Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion beeflow/client/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def start_slurm_restd():
slurmrestd_log = '/'.join([bee_workdir, 'logs', 'restd.log'])
openapi_version = worker_utils.get_slurmrestd_version()
print(f"Inferred slurmrestd version: {openapi_version}")
slurm_args = f'-s openapi/{openapi_version}'
slurm_args = f'-d {openapi_version} -s openapi/slurmctld'
# The following adds the db plugin we opted not to use for now
# slurm_args = f'-s openapi/{openapi_version},openapi/db{openapi_version}'
slurm_socket = paths.slurm_socket()
Expand Down
2 changes: 1 addition & 1 deletion beeflow/common/worker/slurm_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ def query_task(self,job_id):
check_slurm_error(data, f'Failed to query job {job_id}, slurm error.')
# For some versions of slurm, the job_state isn't included on failure
try:
job_state = data['jobs'][0]['job_state']
job_state = data['jobs'][0]['job_state'][0]
job_info = deepcopy(data['jobs'][0])

except (KeyError, IndexError) as exc:
Expand Down
10 changes: 5 additions & 5 deletions beeflow/common/worker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,15 @@ def parse_key_val(pair):

def get_slurmrestd_version():
"""Get the newest slurmrestd version."""
resp = subprocess.run(["slurmrestd", "-s", "list"], check=True, stderr=subprocess.PIPE,
text=True).stderr
resp = subprocess.run(["slurmrestd", "-d", "list"], check=True, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, text=True).stdout
resp = resp.split("\n")
# Confirm slurmrestd format is the same
# If the slurmrestd list outputs has changed potentially something else has broken
if "Possible OpenAPI plugins" not in resp[0]:
if "Possible data_parser plugins" not in resp[0]:
print("Slurmrestd OpenAPI format has changed and things may break")
api_versions = [line.split('/')[1] for line in resp[1:] if re.search(r"openapi/v\d+\.\d+\.\d+",
line)]
api_versions = [line.split('/')[1] for line in resp[1:] if
re.search(r"data_parser/v\d+\.\d+\.\d+", line)]
# Sort the versions and grab the newest one
newest_api = sorted(api_versions, key=Version, reverse=True)[0]
return newest_api
Expand Down
2 changes: 1 addition & 1 deletion beeflow/tests/test_slurm_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def slurm_worker(request):
bee_workdir = os.path.expanduser(f'/tmp/{uuid.uuid4().hex}.tmp')
os.mkdir(bee_workdir)
openapi_version = worker_utils.get_slurmrestd_version()
proc = subprocess.Popen(f'slurmrestd -s openapi/{openapi_version} unix:{slurm_socket}',
proc = subprocess.Popen(f'slurmrestd -d {openapi_version} -s openapi/slurmctld unix:{slurm_socket}',
shell=True)
time.sleep(1)
worker_iface = WorkerInterface(worker=SlurmWorker, container_runtime='Charliecloud',
Expand Down
12 changes: 10 additions & 2 deletions ci/deps_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
set -e

sudo apt-get update
sudo apt-get install -y slurmctld slurmd slurmrestd munge python3 python3-venv \
sudo apt-get install -y libhttp-parser-dev libjson-c-dev libjwt-dev munge python3 python3-venv \
curl build-essential zlib1g-dev libncurses5-dev libgdbm-dev libnss3-dev \
libssl-dev libsqlite3-dev libreadline-dev libffi-dev libbz2-dev \
libmunge-dev \
Expand All @@ -16,7 +16,15 @@ curl -O -L https://github.com/hpc/charliecloud/releases/download/v${CHARLIECLOUD
tar -xvf charliecloud-${CHARLIECLOUD_VERSION}.tar.gz
(cd charliecloud-${CHARLIECLOUD_VERSION}
./configure --prefix=/usr
make
make -j4
sudo make install)

# Install Slurm
curl -O -L https://download.schedmd.com/slurm/slurm-${SLURM_VERSION}.tar.bz2
tar -xvf slurm-${SLURM_VERSION}.tar.bz2
(cd slurm-${SLURM_VERSION}
./configure --prefix=/usr
make -j4
sudo make install)

# Install Python3
Expand Down
2 changes: 2 additions & 0 deletions ci/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ mkdir -p $SLURMD_SPOOL_DIR $SLURM_STATE_SAVE_LOCATION $LOG_DIR
export SLURMCTLD_LOG=$LOG_DIR/slurmctld.log
export SLURMD_LOG=$LOG_DIR/slurmd.log
export SLURM_USER=`whoami`
export SLURMRESTD_SECURITY=disable_user_check
export SLURM_VERSION=24.11.7
export MUNGE_SOCKET=/tmp/munge.sock
export MUNGE_LOG=/tmp/munge.log
export MUNGE_PID=/tmp/munge.pid
Expand Down
10 changes: 5 additions & 5 deletions ci/slurm_start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@

. ./ci/env.sh

printf "#### SLURM VERSION ####\n"
srun -V
printf "#######################\n"

# Determine config of CI host
export NODE_CONFIG=`slurmd -C | head -n 1`

Expand Down Expand Up @@ -55,6 +51,10 @@ $NODE_CONFIG
PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP
EOF

printf "#### SLURM VERSION ####\n"
srun -V
printf "#######################\n"

printf "\n\n"
printf "#### slurm.conf ####\n"
cat $SLURM_CONF
Expand Down Expand Up @@ -82,5 +82,5 @@ srun --mpi=list
printf "#######################\n"
printf "\n"
printf "#### OPENAPI VERSIONS ####\n"
slurmrestd -s list
slurmrestd -d list
printf "##########################\n"
Loading