From 6ef7f497b2b1d12804f2ffa4611677c6d1e540d8 Mon Sep 17 00:00:00 2001
From: Alexey Rybalchenko <alexryba@gmail.com>
Date: Tue, 21 Oct 2025 10:15:57 +0200
Subject: [PATCH] add pure vae25+fairm n-m example

---
 examples/n-m/SLURM_README.md              | 256 ++++++++++++++++++++++
 examples/n-m/fairmq-start-ex-n-m-slurm.sh | 110 ++++++++++
 2 files changed, 366 insertions(+)
 create mode 100644 examples/n-m/SLURM_README.md
 create mode 100755 examples/n-m/fairmq-start-ex-n-m-slurm.sh

diff --git a/examples/n-m/SLURM_README.md b/examples/n-m/SLURM_README.md
new file mode 100644
index 000000000..a17555a36
--- /dev/null
+++ b/examples/n-m/SLURM_README.md
@@ -0,0 +1,256 @@
+# Running FairMQ n-m Example on Slurm
+
+This guide explains how to run the n-m example topology on a Slurm-managed cluster.
+
+## For GSI vae25 Cluster Users
+
+### Accessing the Cluster
+
+1. **Connect to the submit node:**
+   ```bash
+   ssh vae25.hpc.gsi.de
+   ```
+
+   You'll see a message indicating the container has been launched:
+   ```
+   Slurm Cluster – Virgo 3.0 Submit Node – Manual: https://hpc.gsi.de/virgo
+   Container launched: /cvmfs/vae.gsi.de/vae25/containers/vae25-user_container_20250827T1311.sif
+   ```
+
+2. **Transfer the Slurm script to the cluster:**
+
+   From your local machine:
+   ```bash
+   scp fairmq-start-ex-n-m-slurm.sh vae25.hpc.gsi.de:~/
+   ```
+
+   Or create it directly on the cluster using your preferred editor.
+
+3. **Copy the script to your lustre workspace:**
+
+   **Important:** The home directory (`/u/username/`) may not be accessible from compute nodes within the container. Use the shared lustre filesystem instead:
+
+   ```bash
+   cp fairmq-start-ex-n-m-slurm.sh /lustre/rz/$USER/
+   cd /lustre/rz/$USER/
+   ```
+
+5. **Verify FairMQ is available:**
+   ```bash
+   ls /cvmfs/fairsoft.gsi.de/debian12/fairsoft/jan24p5/bin/fairmq-ex-n-m-*
+   ```
+
+   You should see the three executables we need:
+   - `fairmq-ex-n-m-synchronizer`
+   - `fairmq-ex-n-m-sender`
+   - `fairmq-ex-n-m-receiver`
+
+6. **Submit the job:**
+   ```bash
+   sbatch fairmq-start-ex-n-m-slurm.sh
+   ```
+
+   Monitor with:
+   ```bash
+   squeue -u $USER
+   ```
+
+**Notes:**
+- The vae25 container is automatically loaded on all compute nodes, so no container configuration is needed in the script.
+- The script uses the `main` partition by default (8 hour time limit). Other available partitions: `debug` (30 min), `grid` (3 days), `long` (7 days), `high_mem`, `gpu`.
+
+**Cluster Documentation:** https://hpc.gsi.de/virgo
+
+## General Prerequisites
+
+1. FairMQ must be built and installed (or available via CVMFS as on vae25)
+2. The executables must be accessible on all compute nodes
+3. Access to a Slurm cluster with at least 8 nodes
+
+## Quick Start (General)
+
+### Submit the job to Slurm:
+```bash
+sbatch fairmq-start-ex-n-m-slurm.sh
+```
+
+### Check job status:
+```bash
+squeue -u $USER
+```
+
+### View output (replace JOBID with your job ID):
+```bash
+# Get your job ID from squeue, then:
+tail -f fairmq-n-m-JOBID.out
+
+# Or check the latest output file:
+tail -f fairmq-n-m-*.out
+```
+
+### Cancel the job:
+```bash
+scancel JOBID
+```
+
+## Customization
+
+### For GSI vae25 Cluster
+
+The script is pre-configured to use:
+```bash
+FAIRSOFT_BIN="/cvmfs/fairsoft.gsi.de/debian12/fairsoft/jan24p5/bin"
+```
+
+If you're using a different FairSoft version on CVMFS, update this path.
+
+### General Configuration
+
+You can modify the following parameters in the script:
+
+#### Resource Allocation
+Edit the SBATCH directives at the top of the script:
+
+```bash
+#SBATCH --partition=main       # Partition (main, debug, grid, long, high_mem, gpu)
+#SBATCH --nodes=8              # Total nodes needed (1 sync + N senders + M receivers)
+#SBATCH --ntasks=8             # Total tasks
+#SBATCH --time=01:00:00        # Wall time limit
+```
+
+**GSI vae25 partition limits:**
+- `debug`: 30 minutes, 8 nodes max
+- `main`: 8 hours (default)
+- `grid`: 3 days
+- `long`: 7 days
+
+#### Topology Configuration
+Edit the configuration variables in the script:
+
+```bash
+NUM_SENDERS=3                  # Number of sender devices
+NUM_RECEIVERS=4                # Number of receiver devices
+SUBTIMEFRAME_SIZE=1000000      # Size of subtimeframes in bytes
+RATE=100                       # Rate of synchronizer in Hz
+```
+
+**Important:** If you change NUM_SENDERS or NUM_RECEIVERS, you must also update the SBATCH `--nodes` and `--ntasks` parameters to match: `nodes = 1 + NUM_SENDERS + NUM_RECEIVERS`
+
+#### Port Configuration
+
+The script uses the following default ports:
+- Synchronizer: 8010
+- Receivers: 8021-8024 (incremental based on NUM_RECEIVERS)
+
+You can modify these by editing:
+```bash
+SYNC_PORT=8010
+RECEIVER_BASE_PORT=8021
+```
+
+## How It Works
+
+### Node Allocation
+The script allocates nodes in the following order:
+1. **Node 0**: Synchronizer
+2. **Nodes 1-3**: Senders
+3. **Nodes 4-7**: Receivers
+
+### Device Startup Order
+All devices are started in parallel:
+1. **Synchronizer** binds to port 8010
+2. **Receivers** bind to their respective ports (8021-8024)
+3. **Senders** connect to the synchronizer and all receivers
+
+ZeroMQ handles the bind/connect establishment automatically, so the startup order doesn't matter. Devices can start in any order and will establish connections when both sides are ready.
+
+### Communication Pattern
+
+```
+Synchronizer (PUB)
+    |
+    | sync messages
+    v
+Sender 1, 2, 3 (SUB -> PUSH)
+    |
+    | data distribution based on message ID
+    v
+Receiver 1, 2, 3, 4 (PULL)
+```
+
+- The synchronizer publishes sync messages via PUB/SUB pattern
+- Each sender subscribes to sync messages
+- Senders distribute data to receivers using PUSH/PULL pattern
+- Data is routed to specific receivers based on the ID in the sync message
+
+## Running on Fewer Nodes
+
+If you want to run multiple devices per node (e.g., for testing on a small cluster):
+
+```bash
+#SBATCH --nodes=4              # Use 4 nodes instead of 8
+#SBATCH --ntasks=8             # Still 8 tasks total
+#SBATCH --ntasks-per-node=2    # 2 tasks per node
+```
+
+Note: This may have performance implications due to shared resources.
+
+## Troubleshooting
+
+### Issue: "Unable to allocate resources"
+**Solution:** Reduce the number of requested nodes or check cluster availability with `sinfo`
+
+### Issue: Job fails immediately with "couldn't chdir" error
+**Symptoms:**
+```
+slurmstepd: error: couldn't chdir to `/u/username/...': No such file or directory
+```
+
+**Solution:**
+- The home directory is not accessible from compute nodes within the container
+- Copy your script to the lustre filesystem: `/lustre/rz/$USER/`
+- Submit the job from there
+
+### Issue: Devices can't connect
+**Solution:**
+- Check that firewall rules allow communication between nodes
+- Verify hostnames are resolvable between nodes
+- Check the output log for specific error messages
+
+### Issue: Port already in use
+**Solution:**
+- Change `SYNC_PORT` and `RECEIVER_BASE_PORT` to unused ports
+- Wait for previous job to fully terminate
+
+### Issue: Devices exit immediately
+**Solution:**
+- Check that FairMQ executables are in PATH on all compute nodes
+- Verify the build was successful and executables exist
+- Use `--control static` mode to prevent interactive state machine (already set in script)
+
+## Advanced: Interactive Mode
+
+To run interactively for debugging (allocates resources and gives you a shell):
+
+```bash
+salloc --nodes=8 --ntasks=8 --time=01:00:00
+# Then run the script content manually or modify for interactive use
+```
+
+## Monitoring
+
+While the job is running:
+
+```bash
+# Watch job status
+watch -n 1 squeue -u $USER
+
+# Monitor output in real-time
+tail -f fairmq-n-m-JOBID.out
+
+# Check resource usage
+sstat -j JOBID
+
+# After completion, view accounting info
+sacct -j JOBID --format=JobID,JobName,Partition,State,Elapsed,MaxRSS
+```
diff --git a/examples/n-m/fairmq-start-ex-n-m-slurm.sh b/examples/n-m/fairmq-start-ex-n-m-slurm.sh
new file mode 100755
index 000000000..676358d45
--- /dev/null
+++ b/examples/n-m/fairmq-start-ex-n-m-slurm.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+#SBATCH --job-name=fairmq-n-m
+#SBATCH --partition=main
+#SBATCH --nodes=8
+#SBATCH --ntasks=8
+#SBATCH --ntasks-per-node=1
+#SBATCH --time=01:00:00
+#SBATCH --output=fairmq-n-m-%j.out
+#SBATCH --error=fairmq-n-m-%j.err
+
+# FairMQ n-m Example for Slurm with vae25 container
+# Topology: 1 synchronizer -> 3 senders -> 4 receivers
+# Container is automatically loaded by the cluster
+
+# FairSoft configuration
+FAIRSOFT_BIN="/cvmfs/fairsoft.gsi.de/debian12/fairsoft/jan24p5/bin"
+
+# Configuration
+NUM_SENDERS=3
+NUM_RECEIVERS=4
+SUBTIMEFRAME_SIZE=1000000
+RATE=100
+
+# Base port numbers
+SYNC_PORT=8010
+RECEIVER_BASE_PORT=8021
+
+# Get the list of allocated nodes
+NODELIST=($(scontrol show hostname $SLURM_NODELIST))
+
+# Assign nodes to devices
+SYNC_NODE=${NODELIST[0]}
+SENDER_NODES=(${NODELIST[1]} ${NODELIST[2]} ${NODELIST[3]})
+RECEIVER_NODES=(${NODELIST[4]} ${NODELIST[5]} ${NODELIST[6]} ${NODELIST[7]})
+
+echo "==========================================="
+echo "FairMQ n-m Example on Slurm"
+echo "==========================================="
+echo "Job ID: $SLURM_JOB_ID"
+echo "Synchronizer node: $SYNC_NODE"
+echo "Sender nodes: ${SENDER_NODES[@]}"
+echo "Receiver nodes: ${RECEIVER_NODES[@]}"
+echo "==========================================="
+
+# Build receiver addresses for senders to connect to
+RECEIVER_ADDRESSES=""
+for i in $(seq 0 $((NUM_RECEIVERS - 1))); do
+    RECEIVER_PORT=$((RECEIVER_BASE_PORT + i))
+    if [ $i -eq 0 ]; then
+        RECEIVER_ADDRESSES="address=tcp://${RECEIVER_NODES[$i]}:${RECEIVER_PORT}"
+    else
+        RECEIVER_ADDRESSES="${RECEIVER_ADDRESSES},address=tcp://${RECEIVER_NODES[$i]}:${RECEIVER_PORT}"
+    fi
+done
+
+# Start all devices in parallel (ZeroMQ handles bind/connect order automatically)
+echo "Starting synchronizer on $SYNC_NODE..."
+srun --nodes=1 --ntasks=1 --nodelist=$SYNC_NODE \
+    ${FAIRSOFT_BIN}/fairmq-ex-n-m-synchronizer \
+    --id Sync \
+    --channel-config name=sync,type=pub,method=bind,address=tcp://*:${SYNC_PORT} \
+    --rate ${RATE} \
+    --verbosity veryhigh \
+    --control static &
+
+echo "Starting ${NUM_RECEIVERS} receivers..."
+for i in $(seq 0 $((NUM_RECEIVERS - 1))); do
+    RECEIVER_ID="Receiver$((i + 1))"
+    RECEIVER_PORT=$((RECEIVER_BASE_PORT + i))
+    RECEIVER_NODE=${RECEIVER_NODES[$i]}
+
+    echo "  Starting $RECEIVER_ID on $RECEIVER_NODE:$RECEIVER_PORT"
+    srun --nodes=1 --ntasks=1 --nodelist=$RECEIVER_NODE \
+        ${FAIRSOFT_BIN}/fairmq-ex-n-m-receiver \
+        --id $RECEIVER_ID \
+        --channel-config name=data,type=pull,method=bind,address=tcp://*:${RECEIVER_PORT} \
+        --num-senders ${NUM_SENDERS} \
+        --verbosity veryhigh \
+        --control static &
+done
+
+echo "Starting ${NUM_SENDERS} senders..."
+for i in $(seq 0 $((NUM_SENDERS - 1))); do
+    SENDER_ID="Sender$((i + 1))"
+    SENDER_NODE=${SENDER_NODES[$i]}
+
+    echo "  Starting $SENDER_ID on $SENDER_NODE"
+    srun --nodes=1 --ntasks=1 --nodelist=$SENDER_NODE \
+        ${FAIRSOFT_BIN}/fairmq-ex-n-m-sender \
+        --id $SENDER_ID \
+        --channel-config name=sync,type=sub,method=connect,address=tcp://${SYNC_NODE}:${SYNC_PORT} \
+                          name=data,type=push,method=connect,${RECEIVER_ADDRESSES} \
+        --sender-index $i \
+        --subtimeframe-size ${SUBTIMEFRAME_SIZE} \
+        --num-receivers ${NUM_RECEIVERS} \
+        --verbosity veryhigh \
+        --control static &
+done
+
+echo "==========================================="
+echo "All devices started. Waiting for completion..."
+echo "Press Ctrl+C to terminate all processes."
+echo "==========================================="
+
+# Wait for all background jobs
+wait
+
+echo "==========================================="
+echo "FairMQ n-m example completed"
+echo "==========================================="