From 3a325e7932aa969fc5a9c4509983520d39d877bf Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 4 Nov 2025 10:25:34 -0800 Subject: [PATCH 1/2] Add multi-node support via SSH --- README.md | 20 ++ cpp/tools/rrun.cpp | 564 ++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 532 insertions(+), 52 deletions(-) diff --git a/README.md b/README.md index 120cd7184..48a3cb755 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,26 @@ cmake --build . --target rrun ./tools/rrun -v -n 4 -g 0,1,2,3 ./benchmarks/bench_comm -C ucxx ``` +#### Multi-Node Usage (SSH) + +```bash +# Create hostfile +cat > hosts.txt << EOF +node1 slots=8 gpus=0,1,2,3,4,5,6,7 +node2 slots=8 gpus=0,1,2,3,4,5,6,7 +EOF + +# Launch across multiple nodes +./tools/rrun --hostfile hosts.txt \ + -d /shared/nfs/coord \ + ./benchmarks/bench_comm -C ucxx-bootstrap + +# With processes per node override +./tools/rrun --hostfile hosts.txt --ppn 4 \ + -d /shared/nfs/coord \ + ./benchmarks/bench_comm -C ucxx-bootstrap +``` + ## Algorithms ### Table Shuffle Service Example of a MPI program that uses the shuffler: diff --git a/cpp/tools/rrun.cpp b/cpp/tools/rrun.cpp index 93379905d..14005f70e 100644 --- a/cpp/tools/rrun.cpp +++ b/cpp/tools/rrun.cpp @@ -4,12 +4,13 @@ */ /** - * @brief Process launcher for multi-GPU applications (single-node). + * @brief Process launcher for multi-GPU applications. * * rrun is a lightweight alternative to mpirun that: - * - Launches multiple processes locally without requiring MPI + * - Launches multiple processes without requiring MPI * - Automatically assigns GPUs to ranks * - Provides file-based coordination for inter-process synchronization + * - Supports both single-node and multi-node (SSH) deployments * - Tags process output with rank numbers (--tag-output feature) */ @@ -45,18 +46,32 @@ namespace { static std::mutex output_mutex; +/** + * @brief Host information for multi-node deployment. + */ +struct HostInfo { + std::string hostname; + int slots{1}; // Number of processes to launch on this host + std::vector gpus; // GPU IDs available on this host +}; + /** * @brief Configuration for the rrun launcher. */ struct Config { int nranks{1}; // Total number of ranks + int ppn{-1}; // Processes per node (-1 = auto from hostfile) std::string app_binary; // Application binary path std::vector app_args; // Arguments to pass to application - std::vector gpus; // GPU IDs to use + std::vector gpus; // GPU IDs to use (single-node mode) + std::vector hosts; // Host list for multi-node mode + std::string hostfile; // Path to hostfile std::string coord_dir; // Coordination directory + std::string ssh_opts; // Additional SSH options std::map env_vars; // Environment variables to pass bool verbose{false}; // Verbose output bool cleanup{true}; // Cleanup coordination directory on exit + bool use_ssh{false}; // Multi-node mode via SSH bool tag_output{false}; // Tag output with rank number }; @@ -84,8 +99,8 @@ std::string generate_session_id() { /** * @brief Detect available GPUs on the system. * - * Currently using nvidia-smi to detect GPUs. This may be replaced with NVML in the - * future. + * Currently using nvidia-smi to simplify multi-node detection. This will be replaced with + * NVML in the future. * * @return Vector of monotonically increasing GPU indices, as observed in nvidia-smi. */ @@ -110,6 +125,98 @@ std::vector detect_gpus() { return gpus; } +/** + * @brief Parse hostfile in OpenMPI/Slurm format. + * + * Format examples: + * node1 slots=4 gpus=0,1,2,3 + * node2 slots=4 + * node3 + */ +std::vector parse_hostfile(std::string const& path) { + std::ifstream file(path); + if (!file) { + throw std::runtime_error("Failed to open hostfile: " + path); + } + + std::vector hosts; + std::string line; + int line_num = 0; + + while (std::getline(file, line)) { + ++line_num; + + // Remove comments + auto comment_pos = line.find('#'); + if (comment_pos != std::string::npos) { + line = line.substr(0, comment_pos); + } + + // Trim whitespace + line.erase(0, line.find_first_not_of(" \t\r\n")); + line.erase(line.find_last_not_of(" \t\r\n") + 1); + + // Skip empty lines + if (line.empty()) + continue; + + HostInfo host; + std::istringstream iss(line); + iss >> host.hostname; + + if (host.hostname.empty()) { + std::cerr << "Warning: Empty hostname on line " << line_num << std::endl; + continue; + } + + // Parse key=value pairs + std::string token; + while (iss >> token) { + auto eq_pos = token.find('='); + if (eq_pos == std::string::npos) { + std::cerr << "Warning: Invalid token '" << token << "' on line " + << line_num << std::endl; + continue; + } + + std::string key = token.substr(0, eq_pos); + std::string value = token.substr(eq_pos + 1); + + if (key == "slots") { + try { + host.slots = std::stoi(value); + } catch (...) { + throw std::runtime_error( + "Invalid slots value on line " + std::to_string(line_num) + ); + } + } else if (key == "gpus") { + // Parse comma-separated GPU list + std::istringstream gpu_stream(value); + std::string gpu_id; + while (std::getline(gpu_stream, gpu_id, ',')) { + try { + host.gpus.push_back(std::stoi(gpu_id)); + } catch (...) { + throw std::runtime_error( + "Invalid GPU ID '" + gpu_id + "' on line " + + std::to_string(line_num) + ); + } + } + } + } + + hosts.push_back(host); + } + + if (hosts.empty()) { + throw std::runtime_error("No valid hosts found in hostfile: " + path); + } + + return hosts; +} + /** * @brief Print usage information. */ @@ -121,9 +228,15 @@ void print_usage(std::string_view prog_name) { << " -n Number of ranks to launch (required)\n" << " -g Comma-separated list of GPU IDs (e.g., 0,1,2,3)\n" << " If not specified, auto-detect available GPUs\n\n" + << "Multi-Node Options:\n" + << " --hostfile Path to hostfile (enables SSH mode)\n" + << " --ppn Processes per node (default: from hostfile slots)\n" + << " --ssh-opts Additional SSH options (e.g., '-i ~/.ssh/key')\n" + << " --tag-output Tag stdout and stderr with rank number\n\n" << "Common Options:\n" - << " -d Coordination directory (default: /tmp/rrun_)\n" - << " --tag-output Tag stdout and stderr with rank number\n" + << " -d Coordination directory (default: " + "/tmp/rrun_)\n" + << " Must be on shared filesystem for multi-node\n" << " -x, --set-env \n" << " Set environment variable for all ranks\n" << " Can be specified multiple times\n" @@ -141,6 +254,17 @@ void print_usage(std::string_view prog_name) { << " # Launch with custom environment variables:\n" << " rrun -n 2 -x UCX_TLS=cuda_copy,cuda_ipc,rc,tcp -x MY_VAR=value " "./bench_comm\n\n" + << "Multi-Node Examples:\n" + << " # Launch using hostfile:\n" + << " rrun --hostfile hosts.txt ./bench_comm -C ucxx-bootstrap\n\n" + << " # Launch 2 processes per node:\n" + << " rrun --hostfile hosts.txt --ppn 2 ./bench_comm -C ucxx-bootstrap\n\n" + << " # Use specific coordination directory:\n" + << " rrun --hostfile hosts.txt -d /shared/nfs/coord ./bench_comm\n\n" + << "Hostfile Format:\n" + << " node1 slots=4 gpus=0,1,2,3\n" + << " node2 slots=4 gpus=0,1,2,3\n" + << " # Lines starting with # are comments\n" << std::endl; } @@ -188,6 +312,25 @@ Config parse_args(int argc, char* argv[]) { throw std::runtime_error("Missing argument for -g"); } cfg.gpus = parse_gpu_list(argv[++i]); + } else if (arg == "--hostfile") { + if (i + 1 >= argc) { + throw std::runtime_error("Missing argument for --hostfile"); + } + cfg.hostfile = argv[++i]; + cfg.use_ssh = true; + } else if (arg == "--ppn") { + if (i + 1 >= argc) { + throw std::runtime_error("Missing argument for --ppn"); + } + cfg.ppn = std::stoi(argv[++i]); + if (cfg.ppn <= 0) { + throw std::runtime_error("Invalid ppn: " + std::to_string(cfg.ppn)); + } + } else if (arg == "--ssh-opts") { + if (i + 1 >= argc) { + throw std::runtime_error("Missing argument for --ssh-opts"); + } + cfg.ssh_opts = argv[++i]; } else if (arg == "--tag-output") { cfg.tag_output = true; } else if (arg == "-d") { @@ -236,26 +379,57 @@ Config parse_args(int argc, char* argv[]) { throw std::runtime_error("Missing application binary"); } - // Single-node mode validation - if (cfg.nranks <= 0) { - throw std::runtime_error("Number of ranks (-n) must be specified and positive"); - } + if (cfg.use_ssh) { + // Parse hostfile if in SSH mode + if (cfg.hostfile.empty()) { + throw std::runtime_error("--hostfile required for multi-node mode"); + } + cfg.hosts = parse_hostfile(cfg.hostfile); + + // Calculate total ranks from hostfile if not specified + if (cfg.nranks == 1) { // Default value, not explicitly set + if (cfg.ppn > 0) { + // ppn specified, calculate from hosts * ppn + cfg.nranks = cfg.ppn * static_cast(cfg.hosts.size()); + } else { + // Use slots from hostfile + cfg.nranks = 0; + for (auto const& host : cfg.hosts) { + cfg.nranks += host.slots; + } + } + } - // Auto-detect GPUs if not specified - if (cfg.gpus.empty()) { - cfg.gpus = detect_gpus(); + // Validate coordination directory for multi-node + if (cfg.coord_dir.empty()) { + std::cerr << "Warning: No coordination directory specified for multi-node.\n" + << "Using /tmp/rrun_ - ensure this is on a shared filesystem!" + << std::endl; + } + } else { + // Single-node mode + if (cfg.nranks <= 0) { + throw std::runtime_error( + "Number of ranks (-n) must be specified and positive" + ); + } + + // Auto-detect GPUs if not specified if (cfg.gpus.empty()) { - std::cerr - << "Warning: No GPUs detected. CUDA_VISIBLE_DEVICES will not be set." - << std::endl; + cfg.gpus = detect_gpus(); + if (cfg.gpus.empty()) { + std::cerr + << "Warning: No GPUs detected. CUDA_VISIBLE_DEVICES will not be set." + << std::endl; + } } - } - // Validate GPU count vs rank count - if (!cfg.gpus.empty() && cfg.nranks > static_cast(cfg.gpus.size())) { - std::cerr << "Warning: Number of ranks (" << cfg.nranks - << ") exceeds number of GPUs (" << cfg.gpus.size() - << "). Multiple ranks will share GPUs." << std::endl; + // Validate GPU count vs rank count + if (!cfg.gpus.empty() && cfg.nranks > static_cast(cfg.gpus.size())) { + std::cerr << "Warning: Number of ranks (" << cfg.nranks + << ") exceeds number of GPUs (" << cfg.gpus.size() + << "). Multiple ranks will share GPUs." << std::endl; + } } // Generate coordination directory if not specified @@ -391,6 +565,115 @@ pid_t launch_rank_local( ); } +/** + * @brief Launch a single rank on a remote host via SSH. + */ +pid_t launch_rank_ssh( + Config const& cfg, + int rank, + std::string const& hostname, + int local_rank, + std::vector const& host_gpus, + int* out_fd_stdout, + int* out_fd_stderr +) { + return fork_with_piped_stdio( + out_fd_stdout, + out_fd_stderr, + /*combine_stderr*/ true, + [&cfg, rank, &hostname, local_rank, &host_gpus]() { + // Build the remote command + std::ostringstream remote_cmd; + + // Set custom environment variables first + for (auto const& env_pair : cfg.env_vars) { + remote_cmd << env_pair.first << "="; + // Quote value if it contains spaces or special characters + if (env_pair.second.find(' ') != std::string::npos + || env_pair.second.find('"') != std::string::npos + || env_pair.second.find('\'') != std::string::npos) + { + // Escape double quotes and wrap in double quotes + std::string escaped_value = env_pair.second; + size_t pos = 0; + while ((pos = escaped_value.find('"', pos)) != std::string::npos) { + escaped_value.insert(pos, "\\"); + pos += 2; + } + remote_cmd << "\"" << escaped_value << "\" "; + } else { + remote_cmd << env_pair.second << " "; + } + } + + // Set environment variables + remote_cmd << "RAPIDSMPF_RANK=" << rank << " "; + remote_cmd << "RAPIDSMPF_NRANKS=" << cfg.nranks << " "; + remote_cmd << "RAPIDSMPF_COORD_DIR=" << cfg.coord_dir << " "; + + // Set CUDA_VISIBLE_DEVICES if GPUs specified for this host + if (!host_gpus.empty()) { + int gpu_id = + host_gpus[static_cast(local_rank) % host_gpus.size()]; + remote_cmd << "CUDA_VISIBLE_DEVICES=" << gpu_id << " "; + } + + // Build inner application command with args + std::ostringstream app_cmd; + app_cmd << cfg.app_binary; + for (auto const& arg : cfg.app_args) { + if (arg.find(' ') != std::string::npos) { + app_cmd << " \"" << arg << "\""; + } else { + app_cmd << " " << arg; + } + } + + // Wrap with POSIX sh to launch app in background, write its PID, then wait + remote_cmd << "sh -c '"; + remote_cmd << "PGFILE=\"$RAPIDSMPF_COORD_DIR/pids/rank_" << rank + << ".pgid\"; "; + remote_cmd << "umask 022; mkdir -p \"$(dirname \"$PGFILE\")\"; "; + remote_cmd << "stdbuf -o0 -e0 " << app_cmd.str() << " & "; + remote_cmd << "app_pid=$!; echo \"$app_pid\" > \"$PGFILE\"; "; + remote_cmd << "wait \"$app_pid\""; + remote_cmd << "'"; + + static std::string remote_cmd_storage; + remote_cmd_storage = remote_cmd.str(); + + // Build SSH command + std::vector ssh_args; + ssh_args.push_back(const_cast("ssh")); + + // Add SSH options if provided + if (!cfg.ssh_opts.empty()) { + // Simple parsing - split on spaces + std::istringstream iss(cfg.ssh_opts); + static std::vector opts_storage; + std::string opt; + while (iss >> opt) { + opts_storage.push_back(opt); + } + for (auto const& opt : opts_storage) { + ssh_args.push_back(const_cast(opt.c_str())); + } + } + + // Add hostname and remote command + static std::string hostname_storage = hostname; + ssh_args.push_back(const_cast(hostname_storage.c_str())); + ssh_args.push_back(const_cast(remote_cmd_storage.c_str())); + ssh_args.push_back(nullptr); + + // Execute SSH + execvp("ssh", ssh_args.data()); + std::cerr << "Failed to execute ssh: " << std::strerror(errno) << std::endl; + _exit(1); + } + ); +} + /** * @brief Wait for all child processes and check their exit status. */ @@ -432,6 +715,95 @@ int wait_for_ranks(std::vector const& pids) { return overall_status; } + +/** + * @brief Issue remote kills to process groups read from pidfiles. + */ +void terminate_remote_process_groups( + Config const& cfg, std::vector const& rank_to_host, int signum +) { + std::vector ssh_pids; + ssh_pids.reserve(static_cast(cfg.nranks)); + for (int rank = 0; rank < cfg.nranks; ++rank) { + std::string const* host = nullptr; + if (!rank_to_host.empty() && static_cast(rank) < rank_to_host.size()) { + host = &rank_to_host[static_cast(rank)]; + } + if (host == nullptr || host->empty()) { + continue; + } + + std::string pgfile = + cfg.coord_dir + "/pids/rank_" + std::to_string(rank) + ".pgid"; + + // We avoid reading the file here to keep logic simple; let remote shell read it + std::ostringstream remote_kill; + remote_kill << "sh -c 'pid=$(cat \"" << pgfile + << "\" 2>/dev/null) || exit 0; kill -" << signum + << " \"$pid\" >/dev/null 2>&1 || true'"; + + // Build SSH command + std::vector ssh_args; + ssh_args.push_back(const_cast("ssh")); + + // Add SSH options if provided + std::istringstream iss(cfg.ssh_opts); + static std::vector opts_storage_kill; + opts_storage_kill.clear(); + if (!cfg.ssh_opts.empty()) { + std::string opt; + while (iss >> opt) { + opts_storage_kill.push_back(opt); + } + for (auto const& opt_str : opts_storage_kill) { + ssh_args.push_back(const_cast(opt_str.c_str())); + } + } + + static std::string host_storage; // ensure lifetime across execvp call + static std::string cmd_storage; + host_storage = *host; + cmd_storage = remote_kill.str(); + + ssh_args.push_back(const_cast(host_storage.c_str())); + ssh_args.push_back(const_cast(cmd_storage.c_str())); + ssh_args.push_back(nullptr); + + pid_t pid = fork(); + if (pid == 0) { + execvp("ssh", ssh_args.data()); + _exit(127); + } else if (pid > 0) { + ssh_pids.push_back(pid); + } + } + // Wait for all ssh kill commands to complete + for (pid_t pid : ssh_pids) { + int status; + (void)waitpid(pid, &status, 0); + } +} + +/** + * @brief Bounded retries with escalation to ensure reliable remote termination. + */ +void terminate_remote_process_groups_with_retries( + Config const& cfg, std::vector const& rank_to_host, int first_signum +) { + // Attempts: 6 total, ~0.5s spacing -> ~3s total; escalate INT->TERM->KILL + for (int attempt = 0; attempt < 6; ++attempt) { + int sig = first_signum; + if (attempt >= 2 && attempt < 4) { + sig = SIGTERM; + } else if (attempt >= 4) { + sig = SIGKILL; + } + terminate_remote_process_groups(cfg, rank_to_host, sig); + if (attempt < 5) { + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } + } +} } // namespace int main(int argc, char* argv[]) { @@ -441,20 +813,44 @@ int main(int argc, char* argv[]) { if (cfg.verbose) { std::cout << "rrun configuration:\n"; - std::cout << " Mode: Single-node\n" - << " GPUs: "; - if (cfg.gpus.empty()) { - std::cout << "(none)\n"; + if (cfg.use_ssh) { + std::cout << " Mode: Multi-node (SSH)\n" + << " Hosts: " << cfg.hosts.size() << "\n"; + for (size_t i = 0; i < cfg.hosts.size(); ++i) { + std::cout << " [" << i << "] " << cfg.hosts[i].hostname + << " (slots=" << cfg.hosts[i].slots; + if (!cfg.hosts[i].gpus.empty()) { + std::cout << ", gpus="; + for (size_t j = 0; j < cfg.hosts[i].gpus.size(); ++j) { + if (j > 0) + std::cout << ","; + std::cout << cfg.hosts[i].gpus[j]; + } + } + std::cout << ")\n"; + } + if (cfg.ppn > 0) { + std::cout << " PPN: " << cfg.ppn << "\n"; + } + if (!cfg.ssh_opts.empty()) { + std::cout << " SSH Options: " << cfg.ssh_opts << "\n"; + } + if (cfg.tag_output) { + std::cout << " Tag Output: Yes\n"; + } } else { - for (size_t i = 0; i < cfg.gpus.size(); ++i) { - if (i > 0) - std::cout << ", "; - std::cout << cfg.gpus[i]; + std::cout << " Mode: Single-node\n" + << " GPUs: "; + if (cfg.gpus.empty()) { + std::cout << "(none)\n"; + } else { + for (size_t i = 0; i < cfg.gpus.size(); ++i) { + if (i > 0) + std::cout << ", "; + std::cout << cfg.gpus[i]; + } + std::cout << "\n"; } - std::cout << "\n"; - } - if (cfg.tag_output) { - std::cout << " Tag Output: Yes\n"; } std::cout << " Ranks: " << cfg.nranks << "\n" << " Application: " << cfg.app_binary << "\n" @@ -474,10 +870,18 @@ int main(int argc, char* argv[]) { } std::filesystem::create_directories(cfg.coord_dir); + // Ensure pids subdirectory exists for remote PGID files + std::filesystem::create_directories(cfg.coord_dir + "/pids"); std::vector pids; pids.reserve(static_cast(cfg.nranks)); + // Track rank to host mapping for remote termination handling + std::vector rank_to_host; + if (cfg.use_ssh) { + rank_to_host.resize(static_cast(cfg.nranks)); + } + // Block SIGINT/SIGTERM in this thread; a dedicated thread will handle them. sigset_t signal_set; sigemptyset(&signal_set); @@ -523,28 +927,74 @@ int main(int argc, char* argv[]) { }); }; - // Single-node local mode - for (int rank = 0; rank < cfg.nranks; ++rank) { - int fd_out = -1; - int fd_err = -1; - pid_t pid = launch_rank_local(cfg, rank, &fd_out, &fd_err); - pids.push_back(pid); + if (cfg.use_ssh) { + // Multi-node SSH mode + int rank = 0; + for (auto const& host : cfg.hosts) { + int ranks_on_host = (cfg.ppn > 0) ? cfg.ppn : host.slots; - if (cfg.verbose) { - std::cout << "Launched rank " << rank << " (PID " << pid << ")"; - if (!cfg.gpus.empty()) { - std::cout << " on GPU " - << cfg.gpus[static_cast(rank) % cfg.gpus.size()]; + for (int local_rank = 0; local_rank < ranks_on_host; ++local_rank) { + if (rank >= cfg.nranks) + break; + + int fd_out = -1; + pid_t pid = launch_rank_ssh( + cfg, rank, host.hostname, local_rank, host.gpus, &fd_out, nullptr + ); + pids.push_back(pid); + if (static_cast(rank) < rank_to_host.size()) { + rank_to_host[static_cast(rank)] = host.hostname; + } + + if (cfg.verbose) { + std::cout << "Launched rank " << rank << " (PID " << pid + << ") on " << host.hostname; + if (!host.gpus.empty()) { + std::cout << " GPU " + << host.gpus + [static_cast(local_rank) + % host.gpus.size()]; + } + std::cout << std::endl; + } + // Parent-side forwarder for SSH combined stdout/stderr + start_forwarder(fd_out, rank, false); + ++rank; } - std::cout << std::endl; } - // Parent-side forwarders for local stdout and stderr - start_forwarder(fd_out, rank, false); - start_forwarder(fd_err, rank, true); + } else { + // Single-node local mode + for (int rank = 0; rank < cfg.nranks; ++rank) { + int fd_out = -1; + int fd_err = -1; + pid_t pid = launch_rank_local(cfg, rank, &fd_out, &fd_err); + pids.push_back(pid); + + if (cfg.verbose) { + std::cout << "Launched rank " << rank << " (PID " << pid << ")"; + if (!cfg.gpus.empty()) { + std::cout + << " on GPU " + << cfg.gpus[static_cast(rank) % cfg.gpus.size()]; + } + std::cout << std::endl; + } + // Parent-side forwarders for local stdout and stderr + start_forwarder(fd_out, rank, false); + start_forwarder(fd_err, rank, true); + } } - // Start a signal-waiting thread to forward signals. - std::thread([signal_set, &pids, suppress_output]() mutable { + // Start a signal-waiting thread to forward signals and handle remote termination. + auto remote_kill_done = std::make_shared>(false); + auto ack_printed = std::make_shared>(false); + std::thread([signal_set, + &pids, + &cfg, + &rank_to_host, + remote_kill_done, + ack_printed, + suppress_output]() mutable { for (;;) { int sig = 0; int rc = sigwait(&signal_set, &sig); @@ -557,6 +1007,16 @@ int main(int argc, char* argv[]) { for (pid_t pid : pids) { std::ignore = kill(pid, sig); } + // In SSH mode, terminate remote process groups once with retries + if (cfg.use_ssh && !remote_kill_done->exchange(true)) { + if (!ack_printed->exchange(true)) { + std::cerr << "Termination requested (signal " << sig + << ") - terminating remote ranks, please wait, this " + "may take a while..." + << std::endl; + } + terminate_remote_process_groups_with_retries(cfg, rank_to_host, sig); + } } }).detach(); From 0f5d97e6d8fc64877767415e8bfc021f3086b514 Mon Sep 17 00:00:00 2001 From: Peter Andreas Entschev Date: Tue, 4 Nov 2025 10:40:27 -0800 Subject: [PATCH 2/2] Update docs to ucxx communicator --- README.md | 4 ++-- cpp/tools/rrun.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 48a3cb755..41ba3c1d2 100644 --- a/README.md +++ b/README.md @@ -110,12 +110,12 @@ EOF # Launch across multiple nodes ./tools/rrun --hostfile hosts.txt \ -d /shared/nfs/coord \ - ./benchmarks/bench_comm -C ucxx-bootstrap + ./benchmarks/bench_comm -C ucxx # With processes per node override ./tools/rrun --hostfile hosts.txt --ppn 4 \ -d /shared/nfs/coord \ - ./benchmarks/bench_comm -C ucxx-bootstrap + ./benchmarks/bench_comm -C ucxx ``` ## Algorithms diff --git a/cpp/tools/rrun.cpp b/cpp/tools/rrun.cpp index 14005f70e..ed9347d9a 100644 --- a/cpp/tools/rrun.cpp +++ b/cpp/tools/rrun.cpp @@ -256,9 +256,9 @@ void print_usage(std::string_view prog_name) { "./bench_comm\n\n" << "Multi-Node Examples:\n" << " # Launch using hostfile:\n" - << " rrun --hostfile hosts.txt ./bench_comm -C ucxx-bootstrap\n\n" + << " rrun --hostfile hosts.txt ./bench_comm -C ucxx\n\n" << " # Launch 2 processes per node:\n" - << " rrun --hostfile hosts.txt --ppn 2 ./bench_comm -C ucxx-bootstrap\n\n" + << " rrun --hostfile hosts.txt --ppn 2 ./bench_comm -C ucxx\n\n" << " # Use specific coordination directory:\n" << " rrun --hostfile hosts.txt -d /shared/nfs/coord ./bench_comm\n\n" << "Hostfile Format:\n"