From 6ef7f497b2b1d12804f2ffa4611677c6d1e540d8 Mon Sep 17 00:00:00 2001 From: Alexey Rybalchenko Date: Tue, 21 Oct 2025 10:15:57 +0200 Subject: [PATCH] add pure vae25+fairm n-m example --- examples/n-m/SLURM_README.md | 256 ++++++++++++++++++++++ examples/n-m/fairmq-start-ex-n-m-slurm.sh | 110 ++++++++++ 2 files changed, 366 insertions(+) create mode 100644 examples/n-m/SLURM_README.md create mode 100755 examples/n-m/fairmq-start-ex-n-m-slurm.sh diff --git a/examples/n-m/SLURM_README.md b/examples/n-m/SLURM_README.md new file mode 100644 index 000000000..a17555a36 --- /dev/null +++ b/examples/n-m/SLURM_README.md @@ -0,0 +1,256 @@ +# Running FairMQ n-m Example on Slurm + +This guide explains how to run the n-m example topology on a Slurm-managed cluster. + +## For GSI vae25 Cluster Users + +### Accessing the Cluster + +1. **Connect to the submit node:** + ```bash + ssh vae25.hpc.gsi.de + ``` + + You'll see a message indicating the container has been launched: + ``` + Slurm Cluster – Virgo 3.0 Submit Node – Manual: https://hpc.gsi.de/virgo + Container launched: /cvmfs/vae.gsi.de/vae25/containers/vae25-user_container_20250827T1311.sif + ``` + +2. **Transfer the Slurm script to the cluster:** + + From your local machine: + ```bash + scp fairmq-start-ex-n-m-slurm.sh vae25.hpc.gsi.de:~/ + ``` + + Or create it directly on the cluster using your preferred editor. + +3. **Copy the script to your lustre workspace:** + + **Important:** The home directory (`/u/username/`) may not be accessible from compute nodes within the container. Use the shared lustre filesystem instead: + + ```bash + cp fairmq-start-ex-n-m-slurm.sh /lustre/rz/$USER/ + cd /lustre/rz/$USER/ + ``` + +5. **Verify FairMQ is available:** + ```bash + ls /cvmfs/fairsoft.gsi.de/debian12/fairsoft/jan24p5/bin/fairmq-ex-n-m-* + ``` + + You should see the three executables we need: + - `fairmq-ex-n-m-synchronizer` + - `fairmq-ex-n-m-sender` + - `fairmq-ex-n-m-receiver` + +6. **Submit the job:** + ```bash + sbatch fairmq-start-ex-n-m-slurm.sh + ``` + + Monitor with: + ```bash + squeue -u $USER + ``` + +**Notes:** +- The vae25 container is automatically loaded on all compute nodes, so no container configuration is needed in the script. +- The script uses the `main` partition by default (8 hour time limit). Other available partitions: `debug` (30 min), `grid` (3 days), `long` (7 days), `high_mem`, `gpu`. + +**Cluster Documentation:** https://hpc.gsi.de/virgo + +## General Prerequisites + +1. FairMQ must be built and installed (or available via CVMFS as on vae25) +2. The executables must be accessible on all compute nodes +3. Access to a Slurm cluster with at least 8 nodes + +## Quick Start (General) + +### Submit the job to Slurm: +```bash +sbatch fairmq-start-ex-n-m-slurm.sh +``` + +### Check job status: +```bash +squeue -u $USER +``` + +### View output (replace JOBID with your job ID): +```bash +# Get your job ID from squeue, then: +tail -f fairmq-n-m-JOBID.out + +# Or check the latest output file: +tail -f fairmq-n-m-*.out +``` + +### Cancel the job: +```bash +scancel JOBID +``` + +## Customization + +### For GSI vae25 Cluster + +The script is pre-configured to use: +```bash +FAIRSOFT_BIN="/cvmfs/fairsoft.gsi.de/debian12/fairsoft/jan24p5/bin" +``` + +If you're using a different FairSoft version on CVMFS, update this path. + +### General Configuration + +You can modify the following parameters in the script: + +#### Resource Allocation +Edit the SBATCH directives at the top of the script: + +```bash +#SBATCH --partition=main # Partition (main, debug, grid, long, high_mem, gpu) +#SBATCH --nodes=8 # Total nodes needed (1 sync + N senders + M receivers) +#SBATCH --ntasks=8 # Total tasks +#SBATCH --time=01:00:00 # Wall time limit +``` + +**GSI vae25 partition limits:** +- `debug`: 30 minutes, 8 nodes max +- `main`: 8 hours (default) +- `grid`: 3 days +- `long`: 7 days + +#### Topology Configuration +Edit the configuration variables in the script: + +```bash +NUM_SENDERS=3 # Number of sender devices +NUM_RECEIVERS=4 # Number of receiver devices +SUBTIMEFRAME_SIZE=1000000 # Size of subtimeframes in bytes +RATE=100 # Rate of synchronizer in Hz +``` + +**Important:** If you change NUM_SENDERS or NUM_RECEIVERS, you must also update the SBATCH `--nodes` and `--ntasks` parameters to match: `nodes = 1 + NUM_SENDERS + NUM_RECEIVERS` + +#### Port Configuration + +The script uses the following default ports: +- Synchronizer: 8010 +- Receivers: 8021-8024 (incremental based on NUM_RECEIVERS) + +You can modify these by editing: +```bash +SYNC_PORT=8010 +RECEIVER_BASE_PORT=8021 +``` + +## How It Works + +### Node Allocation +The script allocates nodes in the following order: +1. **Node 0**: Synchronizer +2. **Nodes 1-3**: Senders +3. **Nodes 4-7**: Receivers + +### Device Startup Order +All devices are started in parallel: +1. **Synchronizer** binds to port 8010 +2. **Receivers** bind to their respective ports (8021-8024) +3. **Senders** connect to the synchronizer and all receivers + +ZeroMQ handles the bind/connect establishment automatically, so the startup order doesn't matter. Devices can start in any order and will establish connections when both sides are ready. + +### Communication Pattern + +``` +Synchronizer (PUB) + | + | sync messages + v +Sender 1, 2, 3 (SUB -> PUSH) + | + | data distribution based on message ID + v +Receiver 1, 2, 3, 4 (PULL) +``` + +- The synchronizer publishes sync messages via PUB/SUB pattern +- Each sender subscribes to sync messages +- Senders distribute data to receivers using PUSH/PULL pattern +- Data is routed to specific receivers based on the ID in the sync message + +## Running on Fewer Nodes + +If you want to run multiple devices per node (e.g., for testing on a small cluster): + +```bash +#SBATCH --nodes=4 # Use 4 nodes instead of 8 +#SBATCH --ntasks=8 # Still 8 tasks total +#SBATCH --ntasks-per-node=2 # 2 tasks per node +``` + +Note: This may have performance implications due to shared resources. + +## Troubleshooting + +### Issue: "Unable to allocate resources" +**Solution:** Reduce the number of requested nodes or check cluster availability with `sinfo` + +### Issue: Job fails immediately with "couldn't chdir" error +**Symptoms:** +``` +slurmstepd: error: couldn't chdir to `/u/username/...': No such file or directory +``` + +**Solution:** +- The home directory is not accessible from compute nodes within the container +- Copy your script to the lustre filesystem: `/lustre/rz/$USER/` +- Submit the job from there + +### Issue: Devices can't connect +**Solution:** +- Check that firewall rules allow communication between nodes +- Verify hostnames are resolvable between nodes +- Check the output log for specific error messages + +### Issue: Port already in use +**Solution:** +- Change `SYNC_PORT` and `RECEIVER_BASE_PORT` to unused ports +- Wait for previous job to fully terminate + +### Issue: Devices exit immediately +**Solution:** +- Check that FairMQ executables are in PATH on all compute nodes +- Verify the build was successful and executables exist +- Use `--control static` mode to prevent interactive state machine (already set in script) + +## Advanced: Interactive Mode + +To run interactively for debugging (allocates resources and gives you a shell): + +```bash +salloc --nodes=8 --ntasks=8 --time=01:00:00 +# Then run the script content manually or modify for interactive use +``` + +## Monitoring + +While the job is running: + +```bash +# Watch job status +watch -n 1 squeue -u $USER + +# Monitor output in real-time +tail -f fairmq-n-m-JOBID.out + +# Check resource usage +sstat -j JOBID + +# After completion, view accounting info +sacct -j JOBID --format=JobID,JobName,Partition,State,Elapsed,MaxRSS +``` diff --git a/examples/n-m/fairmq-start-ex-n-m-slurm.sh b/examples/n-m/fairmq-start-ex-n-m-slurm.sh new file mode 100755 index 000000000..676358d45 --- /dev/null +++ b/examples/n-m/fairmq-start-ex-n-m-slurm.sh @@ -0,0 +1,110 @@ +#!/bin/bash +#SBATCH --job-name=fairmq-n-m +#SBATCH --partition=main +#SBATCH --nodes=8 +#SBATCH --ntasks=8 +#SBATCH --ntasks-per-node=1 +#SBATCH --time=01:00:00 +#SBATCH --output=fairmq-n-m-%j.out +#SBATCH --error=fairmq-n-m-%j.err + +# FairMQ n-m Example for Slurm with vae25 container +# Topology: 1 synchronizer -> 3 senders -> 4 receivers +# Container is automatically loaded by the cluster + +# FairSoft configuration +FAIRSOFT_BIN="/cvmfs/fairsoft.gsi.de/debian12/fairsoft/jan24p5/bin" + +# Configuration +NUM_SENDERS=3 +NUM_RECEIVERS=4 +SUBTIMEFRAME_SIZE=1000000 +RATE=100 + +# Base port numbers +SYNC_PORT=8010 +RECEIVER_BASE_PORT=8021 + +# Get the list of allocated nodes +NODELIST=($(scontrol show hostname $SLURM_NODELIST)) + +# Assign nodes to devices +SYNC_NODE=${NODELIST[0]} +SENDER_NODES=(${NODELIST[1]} ${NODELIST[2]} ${NODELIST[3]}) +RECEIVER_NODES=(${NODELIST[4]} ${NODELIST[5]} ${NODELIST[6]} ${NODELIST[7]}) + +echo "===========================================" +echo "FairMQ n-m Example on Slurm" +echo "===========================================" +echo "Job ID: $SLURM_JOB_ID" +echo "Synchronizer node: $SYNC_NODE" +echo "Sender nodes: ${SENDER_NODES[@]}" +echo "Receiver nodes: ${RECEIVER_NODES[@]}" +echo "===========================================" + +# Build receiver addresses for senders to connect to +RECEIVER_ADDRESSES="" +for i in $(seq 0 $((NUM_RECEIVERS - 1))); do + RECEIVER_PORT=$((RECEIVER_BASE_PORT + i)) + if [ $i -eq 0 ]; then + RECEIVER_ADDRESSES="address=tcp://${RECEIVER_NODES[$i]}:${RECEIVER_PORT}" + else + RECEIVER_ADDRESSES="${RECEIVER_ADDRESSES},address=tcp://${RECEIVER_NODES[$i]}:${RECEIVER_PORT}" + fi +done + +# Start all devices in parallel (ZeroMQ handles bind/connect order automatically) +echo "Starting synchronizer on $SYNC_NODE..." +srun --nodes=1 --ntasks=1 --nodelist=$SYNC_NODE \ + ${FAIRSOFT_BIN}/fairmq-ex-n-m-synchronizer \ + --id Sync \ + --channel-config name=sync,type=pub,method=bind,address=tcp://*:${SYNC_PORT} \ + --rate ${RATE} \ + --verbosity veryhigh \ + --control static & + +echo "Starting ${NUM_RECEIVERS} receivers..." +for i in $(seq 0 $((NUM_RECEIVERS - 1))); do + RECEIVER_ID="Receiver$((i + 1))" + RECEIVER_PORT=$((RECEIVER_BASE_PORT + i)) + RECEIVER_NODE=${RECEIVER_NODES[$i]} + + echo " Starting $RECEIVER_ID on $RECEIVER_NODE:$RECEIVER_PORT" + srun --nodes=1 --ntasks=1 --nodelist=$RECEIVER_NODE \ + ${FAIRSOFT_BIN}/fairmq-ex-n-m-receiver \ + --id $RECEIVER_ID \ + --channel-config name=data,type=pull,method=bind,address=tcp://*:${RECEIVER_PORT} \ + --num-senders ${NUM_SENDERS} \ + --verbosity veryhigh \ + --control static & +done + +echo "Starting ${NUM_SENDERS} senders..." +for i in $(seq 0 $((NUM_SENDERS - 1))); do + SENDER_ID="Sender$((i + 1))" + SENDER_NODE=${SENDER_NODES[$i]} + + echo " Starting $SENDER_ID on $SENDER_NODE" + srun --nodes=1 --ntasks=1 --nodelist=$SENDER_NODE \ + ${FAIRSOFT_BIN}/fairmq-ex-n-m-sender \ + --id $SENDER_ID \ + --channel-config name=sync,type=sub,method=connect,address=tcp://${SYNC_NODE}:${SYNC_PORT} \ + name=data,type=push,method=connect,${RECEIVER_ADDRESSES} \ + --sender-index $i \ + --subtimeframe-size ${SUBTIMEFRAME_SIZE} \ + --num-receivers ${NUM_RECEIVERS} \ + --verbosity veryhigh \ + --control static & +done + +echo "===========================================" +echo "All devices started. Waiting for completion..." +echo "Press Ctrl+C to terminate all processes." +echo "===========================================" + +# Wait for all background jobs +wait + +echo "===========================================" +echo "FairMQ n-m example completed" +echo "==========================================="