diff --git a/src/vt-lb/algo/driver/driver.impl.h b/src/vt-lb/algo/driver/driver.impl.h index 83b63ef..a502d84 100644 --- a/src/vt-lb/algo/driver/driver.impl.h +++ b/src/vt-lb/algo/driver/driver.impl.h @@ -57,10 +57,10 @@ void runLB(DriverAlgoEnum algo, CommT& comm, ConfigT config, std::unique_ptr>(comm, config); - lb->inputData(std::move(phase_data)); - lb->run(); + // Run TemperedLB algorithm + auto lb = std::make_unique>(comm, config); + lb->inputData(std::move(phase_data)); + lb->run(); } break; default: @@ -68,6 +68,29 @@ void runLB(DriverAlgoEnum algo, CommT& comm, ConfigT config, std::unique_ptr +std::unordered_map> +runLBAllGather(DriverAlgoEnum algo, CommT& comm, ConfigT config, std::unique_ptr phase_data) { + switch (algo) { + case DriverAlgoEnum::None: + // No load balancing + return {}; + break; + case DriverAlgoEnum::TemperedLB: + { + // Run TemperedLB algorithm + auto lb = std::make_unique>(comm, config); + lb->inputData(std::move(phase_data)); + auto local_tasks = lb->run(); + return lb->getGlobalDistribution(local_tasks); + } + break; + default: + throw std::runtime_error("Invalid load balancer algorithm"); + return {}; + } +} + } /* end namespace vt_lb */ #endif /*INCLUDED_VT_LB_ALGO_DRIVER_DRIVER_IMPL_H*/ \ No newline at end of file diff --git a/src/vt-lb/algo/temperedlb/temperedlb.h b/src/vt-lb/algo/temperedlb/temperedlb.h index 6ba76d7..b9ed552 100644 --- a/src/vt-lb/algo/temperedlb/temperedlb.h +++ b/src/vt-lb/algo/temperedlb/temperedlb.h @@ -371,6 +371,24 @@ struct TemperedLB final : baselb::BaseLB { } } + /** + * @brief Get the global distribution of tasks after load balancing + * + * @param local_tasks The set of tasks on the this rank + */ + std::unordered_map> + getGlobalDistribution(std::unordered_set const& local_tasks) { + std::vector local_task_vec( + local_tasks.begin(), local_tasks.end() + ); + auto all_task_vecs = handle_.template allgather( + local_task_vec.data(), static_cast(local_task_vec.size()) + ); + + return all_task_vecs; + } + + Clusterer const* getClusterer() const { return clusterer_.get(); } private: @@ -402,12 +420,15 @@ struct TemperedLB final : baselb::BaseLB { double global_min = 0.0; double global_max = 0.0; double global_sum = 0.0; - // For now, do P reductions since we don't have broadcast yet - for (int p = 0; p < comm_.numRanks(); ++p) { - handle_.reduce(p, MPI_DOUBLE, MPI_MIN, &local_value, &global_min, 1); - handle_.reduce(p, MPI_DOUBLE, MPI_MAX, &local_value, &global_max, 1); - handle_.reduce(p, MPI_DOUBLE, MPI_SUM, &local_value, &global_sum, 1); - } + + handle_.reduce(0, MPI_DOUBLE, MPI_MIN, &local_value, &global_min, 1); + handle_.reduce(0, MPI_DOUBLE, MPI_MAX, &local_value, &global_max, 1); + handle_.reduce(0, MPI_DOUBLE, MPI_SUM, &local_value, &global_sum, 1); + + handle_.broadcast(0, MPI_DOUBLE, &global_min, 1); + handle_.broadcast(0, MPI_DOUBLE, &global_max, 1); + handle_.broadcast(0, MPI_DOUBLE, &global_sum, 1); + double global_avg = global_sum / static_cast(comm_.numRanks()); double I = 0; if (global_avg > 0.0) { diff --git a/src/vt-lb/model/Communication.h b/src/vt-lb/model/Communication.h index 7277b8f..262e5b0 100644 --- a/src/vt-lb/model/Communication.h +++ b/src/vt-lb/model/Communication.h @@ -56,21 +56,23 @@ namespace vt_lb::model { struct Edge { Edge() = default; Edge(TaskType from, TaskType to, BytesType volume) - : from_(from), to_(to), volume_(volume) + : from_(from), to_(to), volume_(volume), num_messages_(1) {} // New ctor with explicit endpoint ranks Edge(TaskType from, TaskType to, BytesType volume, RankType from_rank, RankType to_rank) - : from_(from), to_(to), volume_(volume), from_rank_(from_rank), to_rank_(to_rank) + : from_(from), to_(to), volume_(volume), from_rank_(from_rank), to_rank_(to_rank), num_messages_(1) {} TaskType getFrom() const { return from_; } TaskType getTo() const { return to_; } BytesType getVolume() const { return volume_; } - // New rank accessors + void setVolume(BytesType volume) { volume_ = volume; } RankType getFromRank() const { return from_rank_; } RankType getToRank() const { return to_rank_; } void setFromRank(RankType rank) { from_rank_ = rank; } void setToRank(RankType rank) { to_rank_ = rank; } + int getNumMessages() const { return num_messages_; } + void setNumMessages(int num) { num_messages_ = num; } template void serialize(Serializer& s) { @@ -79,6 +81,7 @@ struct Edge { s | volume_; s | from_rank_; s | to_rank_; + s | num_messages_; } private: @@ -87,6 +90,7 @@ struct Edge { BytesType volume_ = 0.0; RankType from_rank_ = invalid_rank; RankType to_rank_ = invalid_rank; + int num_messages_ = 0; }; /** diff --git a/src/vt-lb/model/PhaseData.h b/src/vt-lb/model/PhaseData.h index 9f36744..6a5c8db 100644 --- a/src/vt-lb/model/PhaseData.h +++ b/src/vt-lb/model/PhaseData.h @@ -69,6 +69,21 @@ struct PhaseData { } communications_.push_back(e); } + void aggregateCommunication(Edge const& e) { + for (int i = 0; i < static_cast(communications_.size()); ++i) { + auto& comm = communications_[i]; + if (comm.getFrom() == e.getFrom() && comm.getTo() == e.getTo()) { + comm.setVolume(comm.getVolume() + e.getVolume()); + comm.setNumMessages(comm.getNumMessages() + e.getNumMessages()); + task_messages_out_[e.getFrom()] += e.getNumMessages(); + task_messages_in_[e.getTo()] += e.getNumMessages(); + return; + } + } + communications_.push_back(e); + task_messages_out_[e.getFrom()] += e.getNumMessages(); + task_messages_in_[e.getTo()] += e.getNumMessages(); + } void addSharedBlock(SharedBlock const& b) { shared_blocks_.emplace(b.getId(), b); } RankType getRank() const { return rank_; } @@ -98,6 +113,25 @@ struct PhaseData { std::unordered_map const& getTasksMap() const { return tasks_; } std::vector const& getCommunications() const { return communications_; } std::vector& getCommunicationsRef() { return communications_; } + + std::size_t getCommunicationMessagesCount() const { + std::size_t count = 0; + for (const auto& comm : communications_) { + count += comm.getNumMessages(); + } + return count; + } + + std::size_t getTaskMessagesOut(TaskType id) const { + auto it = task_messages_out_.find(id); + return it != task_messages_out_.end() ? it->second : 0; + } + + std::size_t getTaskMessagesIn(TaskType id) const { + auto it = task_messages_in_.find(id); + return it != task_messages_in_.end() ? it->second : 0; + } + std::unordered_map const& getSharedBlocksMap() const { return shared_blocks_; } std::unordered_set getTaskIds() const { std::unordered_set ids; @@ -133,6 +167,8 @@ struct PhaseData { tasks_.clear(); communications_.clear(); shared_blocks_.clear(); + task_messages_out_.clear(); + task_messages_in_.clear(); rank_footprint_bytes_ = 0.0; rank_max_memory_available_ = 0.0; } @@ -157,6 +193,8 @@ struct PhaseData { s | tasks_; s | communications_; s | shared_blocks_; + s | task_messages_out_; + s | task_messages_in_; s | rank_footprint_bytes_; s | rank_max_memory_available_; } @@ -170,6 +208,8 @@ struct PhaseData { std::unordered_map tasks_; std::vector communications_; std::unordered_map shared_blocks_; + std::unordered_map task_messages_out_; + std::unordered_map task_messages_in_; BytesType rank_footprint_bytes_ = 0.0; BytesType rank_max_memory_available_ = 0.0; }; diff --git a/tests/unit/graph_helpers.h b/tests/unit/graph_helpers.h index 753f444..7157539 100644 --- a/tests/unit/graph_helpers.h +++ b/tests/unit/graph_helpers.h @@ -304,7 +304,7 @@ void generateIntraRankComm( } } double bytes = weight_per_edge_dist(gen); - pd.addCommunication(Edge{from, to, bytes, rank, rank}); + pd.aggregateCommunication(Edge{from, to, bytes, rank, rank}); } } @@ -359,7 +359,7 @@ void generateInterRankComm( while ((remote_rank = remote_rank_dist(gen)) == rank) {} TaskType to = remote_task_dist(gen); double bytes = weight_per_edge_dist(gen); - pd.addCommunication(Edge{from, to, bytes, rank, remote_rank}); + pd.aggregateCommunication(Edge{from, to, bytes, rank, remote_rank}); } for (std::size_t e = from_edge_count; e < local_endpoints.size(); ++e) { TaskType to = local_endpoints[e]; @@ -367,7 +367,7 @@ void generateInterRankComm( while ((remote_rank = remote_rank_dist(gen)) == rank) {} TaskType from = remote_task_dist(gen); double bytes = weight_per_edge_dist(gen); - pd.addCommunication(Edge{from, to, bytes, remote_rank, rank}); + pd.aggregateCommunication(Edge{from, to, bytes, remote_rank, rank}); } } @@ -428,7 +428,7 @@ void generateRankComm( while ((to = remote_task_dist(gen)) == from) {} } double bytes = weight_per_edge_dist(gen); - pd.addCommunication(Edge{from, to, bytes, rank, remote_rank}); + pd.aggregateCommunication(Edge{from, to, bytes, rank, remote_rank}); } for (std::size_t e = from_edge_count; e < local_endpoints.size(); ++e) { TaskType to = local_endpoints[e]; @@ -441,7 +441,7 @@ void generateRankComm( while ((from = remote_task_dist(gen)) == to) {} } double bytes = weight_per_edge_dist(gen); - pd.addCommunication(Edge{from, to, bytes, remote_rank, rank}); + pd.aggregateCommunication(Edge{from, to, bytes, remote_rank, rank}); } } diff --git a/tests/unit/helpers/test_graph_helpers.cc b/tests/unit/helpers/test_graph_helpers.cc index 7b770bb..4fd1e83 100644 --- a/tests/unit/helpers/test_graph_helpers.cc +++ b/tests/unit/helpers/test_graph_helpers.cc @@ -551,471 +551,442 @@ TYPED_TEST(TestGraphHelpers, test_generate_tasks_without_shared_blocks2) { } }; -// TYPED_TEST(TestGraphHelpers, test_generate_intra_rank_comm) { -// auto rank = this->comm.getRank(); -// vt_lb::model::PhaseData pd(rank); - -// std::mt19937 gen(6745 * rank); - -// int min_tasks = 10; -// int max_tasks = 20; -// std::uniform_int_distribution<> dist(min_tasks, max_tasks); - -// generateTaskCountsPerRank(pd, gen, dist, max_tasks); - -// int task_count = pd.getTasksMap().size(); - -// int min_endpoints = 3; -// int max_endpoints = 5; -// std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints); - -// int min_weight = 100; -// int max_weight = 200; -// std::uniform_int_distribution<> weight_dist(min_weight, max_weight); - -// generateIntraRankComm(pd, gen, ep_dist, weight_dist); - -// auto &edges = pd.getCommunications(); - -// if (task_count <= 1) { -// EXPECT_EQ(edges.size(), 0); -// return; -// } - -// EXPECT_GE(edges.size(), task_count * min_endpoints / 2); -// EXPECT_LE(edges.size(), task_count * max_endpoints / 2); - -// std::vector from_task(task_count); -// std::vector to_task(task_count); - -// for (auto &e : edges) { -// EXPECT_EQ(e.getFromRank(), rank); -// EXPECT_EQ(e.getToRank(), rank); -// EXPECT_GE(e.getVolume(), min_weight); -// EXPECT_LE(e.getVolume(), max_weight); - -// auto from_task_id = e.getFrom(); -// auto to_task_id = e.getTo(); -// EXPECT_GE(from_task_id, rank * max_tasks); -// EXPECT_LT(from_task_id, (rank + 1) * max_tasks); -// EXPECT_GE(to_task_id, rank * max_tasks); -// EXPECT_LT(to_task_id, (rank + 1) * max_tasks); - -// int from_lid = from_task_id - rank * max_tasks; -// int to_lid = to_task_id - rank * max_tasks; -// if (from_lid >= 0 and from_lid < task_count) { -// ++(from_task[from_lid]); -// } -// if (to_lid >= 0 and to_lid < task_count) { -// ++(to_task[to_lid]); -// } -// } - -// for (int tlid = 0; tlid < task_count; ++tlid) { -// // in case we generated an odd number of endpoints and one was dropped -// int adjusted_min_endpoints = std::max(min_endpoints - 1, 0); -// EXPECT_GE(from_task[tlid] + to_task[tlid], adjusted_min_endpoints); -// EXPECT_LE(from_task[tlid] + to_task[tlid], max_endpoints); -// } -// }; - -// TYPED_TEST(TestGraphHelpers, test_generate_inter_rank_comm_out_only) { -// auto num_ranks = this->comm.numRanks(); -// auto rank = this->comm.getRank(); -// vt_lb::model::PhaseData pd(rank); - -// std::mt19937 gen(741 * rank); - -// int min_tasks = 5; -// int max_tasks = 7; -// std::uniform_int_distribution<> dist(min_tasks, max_tasks); +TYPED_TEST(TestGraphHelpers, test_generate_intra_rank_comm) { + auto rank = this->comm.getRank(); + vt_lb::model::PhaseData pd(rank); -// generateTaskCountsPerRank(pd, gen, dist, max_tasks); + std::mt19937 gen(6745 * rank); -// int task_count = pd.getTasksMap().size(); + int min_tasks = 10; + int max_tasks = 20; + std::uniform_int_distribution<> dist(min_tasks, max_tasks); -// int min_endpoints = 1; -// int max_endpoints = 4; -// std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints); + generateTaskCountsPerRank(pd, gen, dist, max_tasks); -// int min_weight = 1000; -// int max_weight = 2000; -// std::uniform_int_distribution<> weight_dist(min_weight, max_weight); + int task_count = pd.getTasksMap().size(); -// double frac = 0.0; + int min_endpoints = 3; + int max_endpoints = 5; + std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints); -// generateInterRankComm( -// pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac -// ); + int min_weight = 100; + int max_weight = 200; + std::uniform_int_distribution<> weight_dist(min_weight, max_weight); -// auto &edges = pd.getCommunications(); + generateIntraRankComm(pd, gen, ep_dist, weight_dist); -// if (num_ranks == 1) { -// EXPECT_EQ(edges.size(), 0); -// return; -// } + auto &edges = pd.getCommunications(); + auto message_count = pd.getCommunicationMessagesCount(); -// EXPECT_GE(edges.size(), task_count * min_endpoints); -// EXPECT_LE(edges.size(), task_count * max_endpoints); + if (task_count <= 1) { + EXPECT_EQ(message_count, 0); + return; + } -// std::vector from_task(task_count); + EXPECT_GE(message_count, task_count * min_endpoints / 2); + EXPECT_LE(message_count, task_count * max_endpoints / 2); + + for (auto &e : edges) { + EXPECT_EQ(e.getFromRank(), rank); + EXPECT_EQ(e.getToRank(), rank); + EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages()); + EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages()); + + auto from_task_id = e.getFrom(); + auto to_task_id = e.getTo(); + EXPECT_GE(from_task_id, rank * max_tasks); + EXPECT_LT(from_task_id, (rank + 1) * max_tasks); + EXPECT_GE(to_task_id, rank * max_tasks); + EXPECT_LT(to_task_id, (rank + 1) * max_tasks); + } -// for (auto &e : edges) { -// EXPECT_EQ(e.getFromRank(), rank); -// EXPECT_NE(e.getToRank(), rank); -// EXPECT_GE(e.getVolume(), min_weight); -// EXPECT_LE(e.getVolume(), max_weight); + for (int tlid = 0; tlid < task_count; ++tlid) { + // in case we generated an odd number of endpoints and one was dropped + int adjusted_min_endpoints = std::max(min_endpoints - 1, 0); + auto task_id = rank * max_tasks + tlid; + auto per_task_msgs = pd.getTaskMessagesOut(task_id) + pd.getTaskMessagesIn(task_id); + EXPECT_GE(per_task_msgs, adjusted_min_endpoints); + EXPECT_LE(per_task_msgs, max_endpoints); + } +}; -// auto from_task_id = e.getFrom(); -// EXPECT_GE(from_task_id, rank * max_tasks); -// EXPECT_LT(from_task_id, (rank + 1) * max_tasks); +TYPED_TEST(TestGraphHelpers, test_generate_inter_rank_comm_out_only) { + auto num_ranks = this->comm.numRanks(); + auto rank = this->comm.getRank(); + vt_lb::model::PhaseData pd(rank); -// int from_lid = from_task_id - rank * max_tasks; -// if (from_lid >= 0 and from_lid < task_count) { -// ++(from_task[from_lid]); -// } -// } + std::mt19937 gen(741 * rank); -// for (int tlid = 0; tlid < task_count; ++tlid) { -// EXPECT_GE(from_task[tlid], min_endpoints); -// EXPECT_LE(from_task[tlid], max_endpoints); -// } -// }; + int min_tasks = 5; + int max_tasks = 7; + std::uniform_int_distribution<> dist(min_tasks, max_tasks); -// TYPED_TEST(TestGraphHelpers, test_generate_inter_rank_comm_in_only) { -// auto num_ranks = this->comm.numRanks(); -// auto rank = this->comm.getRank(); -// vt_lb::model::PhaseData pd(rank); + generateTaskCountsPerRank(pd, gen, dist, max_tasks); -// std::mt19937 gen(975 * rank); + int task_count = pd.getTasksMap().size(); -// int min_tasks = 4; -// int max_tasks = 8; -// std::uniform_int_distribution<> dist(min_tasks, max_tasks); + int min_endpoints = 1; + int max_endpoints = 4; + std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints); -// generateTaskCountsPerRank(pd, gen, dist, max_tasks); + int min_weight = 1000; + int max_weight = 2000; + std::uniform_int_distribution<> weight_dist(min_weight, max_weight); -// int task_count = pd.getTasksMap().size(); + double frac = 0.0; -// int min_endpoints = 2; -// int max_endpoints = 5; -// std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints); + generateInterRankComm( + pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac + ); -// int min_weight = 500; -// int max_weight = 3000; -// std::uniform_int_distribution<> weight_dist(min_weight, max_weight); + auto &edges = pd.getCommunications(); + auto message_count = pd.getCommunicationMessagesCount(); -// double frac = 1.0; + if (num_ranks == 1) { + EXPECT_EQ(message_count, 0); + return; + } -// generateInterRankComm( -// pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac -// ); + EXPECT_GE(message_count, task_count * min_endpoints); + EXPECT_LE(message_count, task_count * max_endpoints); -// auto &edges = pd.getCommunications(); + for (auto &e : edges) { + EXPECT_EQ(e.getFromRank(), rank); + EXPECT_NE(e.getToRank(), rank); + EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages()); + EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages()); + + auto from_task_id = e.getFrom(); + EXPECT_GE(from_task_id, rank * max_tasks); + EXPECT_LT(from_task_id, (rank + 1) * max_tasks); + } -// if (num_ranks == 1) { -// EXPECT_EQ(edges.size(), 0); -// return; -// } + for (int tlid = 0; tlid < task_count; ++tlid) { + auto task_id = rank * max_tasks + tlid; + auto out_msgs = pd.getTaskMessagesOut(task_id); + EXPECT_GE(out_msgs, (std::size_t)min_endpoints); + EXPECT_LE(out_msgs, (std::size_t)max_endpoints); + } +}; -// EXPECT_GE(edges.size(), task_count * min_endpoints); -// EXPECT_LE(edges.size(), task_count * max_endpoints); +TYPED_TEST(TestGraphHelpers, test_generate_inter_rank_comm_in_only) { + auto num_ranks = this->comm.numRanks(); + auto rank = this->comm.getRank(); + vt_lb::model::PhaseData pd(rank); -// std::vector to_task(task_count); + std::mt19937 gen(975 * rank); -// for (auto &e : edges) { -// EXPECT_NE(e.getFromRank(), rank); -// EXPECT_EQ(e.getToRank(), rank); -// EXPECT_GE(e.getVolume(), min_weight); -// EXPECT_LE(e.getVolume(), max_weight); + int min_tasks = 4; + int max_tasks = 8; + std::uniform_int_distribution<> dist(min_tasks, max_tasks); -// auto to_task_id = e.getTo(); -// EXPECT_GE(to_task_id, rank * max_tasks); -// EXPECT_LT(to_task_id, (rank + 1) * max_tasks); + generateTaskCountsPerRank(pd, gen, dist, max_tasks); -// int to_lid = to_task_id - rank * max_tasks; -// if (to_lid >= 0 and to_lid < task_count) { -// ++(to_task[to_lid]); -// } -// } + int task_count = pd.getTasksMap().size(); -// for (int tlid = 0; tlid < task_count; ++tlid) { -// EXPECT_GE(to_task[tlid], min_endpoints); -// EXPECT_LE(to_task[tlid], max_endpoints); -// } -// }; + int min_endpoints = 2; + int max_endpoints = 5; + std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints); -// TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_out_only) { -// auto num_ranks = this->comm.numRanks(); -// auto rank = this->comm.getRank(); -// vt_lb::model::PhaseData pd(rank); + int min_weight = 500; + int max_weight = 3000; + std::uniform_int_distribution<> weight_dist(min_weight, max_weight); -// std::mt19937 gen(468 * rank); + double frac = 1.0; -// int min_tasks = 3; -// int max_tasks = 8; -// std::uniform_int_distribution<> dist(min_tasks, max_tasks); + generateInterRankComm( + pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac + ); -// generateTaskCountsPerRank(pd, gen, dist, max_tasks); + auto &edges = pd.getCommunications(); + auto message_count = pd.getCommunicationMessagesCount(); -// int task_count = pd.getTasksMap().size(); + if (num_ranks == 1) { + EXPECT_EQ(message_count, 0); + return; + } -// int min_endpoints = 3; -// int max_endpoints = 6; -// std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints); + EXPECT_GE(message_count, task_count * min_endpoints); + EXPECT_LE(message_count, task_count * max_endpoints); -// int min_weight = 10000; -// int max_weight = 20000; -// std::uniform_int_distribution<> weight_dist(min_weight, max_weight); + for (auto &e : edges) { + EXPECT_NE(e.getFromRank(), rank); + EXPECT_EQ(e.getToRank(), rank); + EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages()); + EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages()); + + auto to_task_id = e.getTo(); + EXPECT_GE(to_task_id, rank * max_tasks); + EXPECT_LT(to_task_id, (rank + 1) * max_tasks); + } -// double frac = 0.0; + for (int tlid = 0; tlid < task_count; ++tlid) { + auto task_id = rank * max_tasks + tlid; + auto in_msgs = pd.getTaskMessagesIn(task_id); + EXPECT_GE(in_msgs, (std::size_t)min_endpoints); + EXPECT_LE(in_msgs, (std::size_t)max_endpoints); + } +}; -// generateRankComm( -// pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac -// ); +TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_out_only) { + auto num_ranks = this->comm.numRanks(); + auto rank = this->comm.getRank(); + vt_lb::model::PhaseData pd(rank); -// auto &edges = pd.getCommunications(); + std::mt19937 gen(468 * rank); -// if (num_ranks == 1 and task_count <= 1) { -// EXPECT_EQ(edges.size(), 0); -// return; -// } + int min_tasks = 3; + int max_tasks = 8; + std::uniform_int_distribution<> dist(min_tasks, max_tasks); -// EXPECT_GE(edges.size(), task_count * min_endpoints); -// EXPECT_LE(edges.size(), task_count * max_endpoints); + generateTaskCountsPerRank(pd, gen, dist, max_tasks); -// std::vector from_task(task_count); + int task_count = pd.getTasksMap().size(); -// for (auto &e : edges) { -// EXPECT_EQ(e.getFromRank(), rank); -// EXPECT_GE(e.getVolume(), min_weight); -// EXPECT_LE(e.getVolume(), max_weight); + int min_endpoints = 3; + int max_endpoints = 6; + std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints); -// auto from_task_id = e.getFrom(); -// EXPECT_GE(from_task_id, rank * max_tasks); -// EXPECT_LT(from_task_id, (rank + 1) * max_tasks); + int min_weight = 10000; + int max_weight = 20000; + std::uniform_int_distribution<> weight_dist(min_weight, max_weight); -// int from_lid = from_task_id - rank * max_tasks; -// if (from_lid >= 0 and from_lid < task_count) { -// ++(from_task[from_lid]); -// } -// } + double frac = 0.0; -// for (int tlid = 0; tlid < task_count; ++tlid) { -// EXPECT_GE(from_task[tlid], min_endpoints); -// EXPECT_LE(from_task[tlid], max_endpoints); -// } -// }; + generateRankComm( + pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac + ); -// TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_in_only) { -// auto num_ranks = this->comm.numRanks(); -// auto rank = this->comm.getRank(); -// vt_lb::model::PhaseData pd(rank); + auto &edges = pd.getCommunications(); + auto message_count = pd.getCommunicationMessagesCount(); -// std::mt19937 gen(357 * rank); + if (num_ranks == 1 and task_count <= 1) { + EXPECT_EQ(edges.size(), 0); + EXPECT_EQ(message_count, 0); + return; + } -// int min_tasks = 2; -// int max_tasks = 8; -// std::uniform_int_distribution<> dist(min_tasks, max_tasks); + EXPECT_GE(message_count, task_count * min_endpoints); + EXPECT_LE(message_count, task_count * max_endpoints); -// generateTaskCountsPerRank(pd, gen, dist, max_tasks); + for (auto &e : edges) { + EXPECT_EQ(e.getFromRank(), rank); + EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages()); + EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages()); -// int task_count = pd.getTasksMap().size(); + auto from_task_id = e.getFrom(); + EXPECT_GE(from_task_id, rank * max_tasks); + EXPECT_LT(from_task_id, (rank + 1) * max_tasks); + } -// int min_endpoints = 1; -// int max_endpoints = 7; -// std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints); + for (int tlid = 0; tlid < task_count; ++tlid) { + auto task_id = rank * max_tasks + tlid; + auto out_msgs = pd.getTaskMessagesOut(task_id); + EXPECT_GE(out_msgs, (std::size_t)min_endpoints); + EXPECT_LE(out_msgs, (std::size_t)max_endpoints); + } +}; -// int min_weight = 3000; -// int max_weight = 5000; -// std::uniform_int_distribution<> weight_dist(min_weight, max_weight); +TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_in_only) { + auto num_ranks = this->comm.numRanks(); + auto rank = this->comm.getRank(); + vt_lb::model::PhaseData pd(rank); -// double frac = 1.0; + std::mt19937 gen(357 * rank); -// generateRankComm( -// pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac -// ); + int min_tasks = 2; + int max_tasks = 8; + std::uniform_int_distribution<> dist(min_tasks, max_tasks); -// auto &edges = pd.getCommunications(); + generateTaskCountsPerRank(pd, gen, dist, max_tasks); -// if (num_ranks == 1 and task_count <= 1) { -// EXPECT_EQ(edges.size(), 0); -// return; -// } + int task_count = pd.getTasksMap().size(); -// EXPECT_GE(edges.size(), task_count * min_endpoints); -// EXPECT_LE(edges.size(), task_count * max_endpoints); + int min_endpoints = 1; + int max_endpoints = 7; + std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints); -// std::vector to_task(task_count); + int min_weight = 3000; + int max_weight = 5000; + std::uniform_int_distribution<> weight_dist(min_weight, max_weight); -// for (auto &e : edges) { -// EXPECT_EQ(e.getToRank(), rank); -// EXPECT_GE(e.getVolume(), min_weight); -// EXPECT_LE(e.getVolume(), max_weight); + double frac = 1.0; -// auto to_task_id = e.getTo(); -// EXPECT_GE(to_task_id, rank * max_tasks); -// EXPECT_LT(to_task_id, (rank + 1) * max_tasks); + generateRankComm( + pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac + ); -// int to_lid = to_task_id - rank * max_tasks; -// if (to_lid >= 0 and to_lid < task_count) { -// ++(to_task[to_lid]); -// } -// } + auto &edges = pd.getCommunications(); + auto message_count = pd.getCommunicationMessagesCount(); -// for (int tlid = 0; tlid < task_count; ++tlid) { -// EXPECT_GE(to_task[tlid], min_endpoints); -// EXPECT_LE(to_task[tlid], max_endpoints); -// } -// }; + if (num_ranks == 1 and task_count <= 1) { + EXPECT_EQ(edges.size(), 0); + EXPECT_EQ(message_count, 0); + return; + } -// TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_out_only2) { -// auto num_ranks = this->comm.numRanks(); -// auto rank = this->comm.getRank(); -// vt_lb::model::PhaseData pd(rank); + EXPECT_GE(message_count, task_count * min_endpoints); + EXPECT_LE(message_count, task_count * max_endpoints); -// std::mt19937 gen(147 * rank); + for (auto &e : edges) { + EXPECT_EQ(e.getToRank(), rank); + EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages()); + EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages()); -// int min_tasks = 4; -// int max_tasks = 10; -// std::uniform_int_distribution<> dist(min_tasks, max_tasks); + auto to_task_id = e.getTo(); + EXPECT_GE(to_task_id, rank * max_tasks); + EXPECT_LT(to_task_id, (rank + 1) * max_tasks); + } -// generateTaskCountsPerRank(pd, gen, dist, max_tasks); + for (int tlid = 0; tlid < task_count; ++tlid) { + auto task_id = rank * max_tasks + tlid; + auto in_msgs = pd.getTaskMessagesIn(task_id); + EXPECT_GE(in_msgs, (std::size_t)min_endpoints); + EXPECT_LE(in_msgs, (std::size_t)max_endpoints); + } +}; -// int task_count = pd.getTasksMap().size(); +TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_out_only2) { + auto num_ranks = this->comm.numRanks(); + auto rank = this->comm.getRank(); + vt_lb::model::PhaseData pd(rank); -// int max_endpoints = 4; + std::mt19937 gen(147 * rank); -// int min_weight = 1000; -// int max_weight = 20000; -// std::uniform_int_distribution<> weight_dist(min_weight, max_weight); + int min_tasks = 4; + int max_tasks = 10; + std::uniform_int_distribution<> dist(min_tasks, max_tasks); -// double frac = 0.0; + generateTaskCountsPerRank(pd, gen, dist, max_tasks); -// generateRankComm( -// pd, gen, max_endpoints, weight_dist, min_tasks, num_ranks, frac -// ); + int task_count = pd.getTasksMap().size(); -// auto &edges = pd.getCommunications(); + int max_endpoints = 4; -// if (num_ranks == 1 and task_count <= 1) { -// EXPECT_EQ(edges.size(), 0); -// return; -// } + int min_weight = 1000; + int max_weight = 20000; + std::uniform_int_distribution<> weight_dist(min_weight, max_weight); -// EXPECT_LE(edges.size(), task_count * max_endpoints); + double frac = 0.0; -// std::vector from_task(task_count); + generateRankComm( + pd, gen, max_endpoints, weight_dist, min_tasks, num_ranks, frac + ); -// for (auto &e : edges) { -// EXPECT_EQ(e.getFromRank(), rank); -// EXPECT_GE(e.getVolume(), min_weight); -// EXPECT_LE(e.getVolume(), max_weight); + auto &edges = pd.getCommunications(); + auto message_count = pd.getCommunicationMessagesCount(); -// auto from_task_id = e.getFrom(); -// EXPECT_GE(from_task_id, rank * max_tasks); -// EXPECT_LT(from_task_id, (rank + 1) * max_tasks); + if (num_ranks == 1 and task_count <= 1) { + EXPECT_EQ(edges.size(), 0); + EXPECT_EQ(message_count, 0); + return; + } -// int from_lid = from_task_id - rank * max_tasks; -// if (from_lid >= 0 and from_lid < task_count) { -// ++(from_task[from_lid]); -// } -// } + EXPECT_LE(message_count, task_count * max_endpoints); -// for (int tlid = 0; tlid < task_count; ++tlid) { -// EXPECT_LE(from_task[tlid], max_endpoints); -// } -// }; + for (auto &e : edges) { + EXPECT_EQ(e.getFromRank(), rank); + EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages()); + EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages()); -// TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_in_only2) { -// auto num_ranks = this->comm.numRanks(); -// auto rank = this->comm.getRank(); -// vt_lb::model::PhaseData pd(rank); + auto from_task_id = e.getFrom(); + EXPECT_GE(from_task_id, rank * max_tasks); + EXPECT_LT(from_task_id, (rank + 1) * max_tasks); + } -// std::mt19937 gen(258 * rank); + for (int tlid = 0; tlid < task_count; ++tlid) { + auto task_id = rank * max_tasks + tlid; + auto out_msgs = pd.getTaskMessagesOut(task_id); + EXPECT_LE(out_msgs, (std::size_t)max_endpoints); + } +}; -// int min_tasks = 5; -// int max_tasks = 9; -// std::uniform_int_distribution<> dist(min_tasks, max_tasks); +TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_in_only2) { + auto num_ranks = this->comm.numRanks(); + auto rank = this->comm.getRank(); + vt_lb::model::PhaseData pd(rank); -// generateTaskCountsPerRank(pd, gen, dist, max_tasks); + std::mt19937 gen(258 * rank); -// int task_count = pd.getTasksMap().size(); + int min_tasks = 5; + int max_tasks = 9; + std::uniform_int_distribution<> dist(min_tasks, max_tasks); -// int max_endpoints = 7; + generateTaskCountsPerRank(pd, gen, dist, max_tasks); -// int min_weight = 70; -// int max_weight = 300; -// std::uniform_int_distribution<> weight_dist(min_weight, max_weight); + int task_count = pd.getTasksMap().size(); -// double frac = 1.0; + int max_endpoints = 7; -// generateRankComm( -// pd, gen, max_endpoints, weight_dist, min_tasks, num_ranks, frac -// ); + int min_weight = 70; + int max_weight = 300; + std::uniform_int_distribution<> weight_dist(min_weight, max_weight); -// auto &edges = pd.getCommunications(); + double frac = 1.0; -// if (num_ranks == 1 and task_count <= 1) { -// EXPECT_EQ(edges.size(), 0); -// return; -// } + generateRankComm( + pd, gen, max_endpoints, weight_dist, min_tasks, num_ranks, frac + ); -// EXPECT_LE(edges.size(), task_count * max_endpoints); + auto &edges = pd.getCommunications(); + auto message_count = pd.getCommunicationMessagesCount(); -// std::vector to_task(task_count); + if (num_ranks == 1 and task_count <= 1) { + EXPECT_EQ(edges.size(), 0); + EXPECT_EQ(message_count, 0); + return; + } -// for (auto &e : edges) { -// EXPECT_EQ(e.getToRank(), rank); -// EXPECT_GE(e.getVolume(), min_weight); -// EXPECT_LE(e.getVolume(), max_weight); + EXPECT_LE(message_count, task_count * max_endpoints); -// auto to_task_id = e.getTo(); -// EXPECT_GE(to_task_id, rank * max_tasks); -// EXPECT_LT(to_task_id, (rank + 1) * max_tasks); + for (auto &e : edges) { + EXPECT_EQ(e.getToRank(), rank); + EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages()); + EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages()); -// int to_lid = to_task_id - rank * max_tasks; -// if (to_lid >= 0 and to_lid < task_count) { -// ++(to_task[to_lid]); -// } -// } + auto to_task_id = e.getTo(); + EXPECT_GE(to_task_id, rank * max_tasks); + EXPECT_LT(to_task_id, (rank + 1) * max_tasks); + } -// for (int tlid = 0; tlid < task_count; ++tlid) { -// EXPECT_LE(to_task[tlid], max_endpoints); -// } -// }; + for (int tlid = 0; tlid < task_count; ++tlid) { + auto task_id = rank * max_tasks + tlid; + auto in_msgs = pd.getTaskMessagesIn(task_id); + EXPECT_LE(in_msgs, (std::size_t)max_endpoints); + } +}; -// TYPED_TEST(TestGraphHelpers, test_generate_scale_rel) { -// std::mt19937 gen(123456); +TYPED_TEST(TestGraphHelpers, test_generate_scale_rel) { + std::mt19937 gen(123456); -// int largest_max_allowed = 100; -// int smallest_max_allowed = 50; -// double min_as_frac_of_max = 0.3; + int largest_max_allowed = 100; + int smallest_max_allowed = 50; + double min_as_frac_of_max = 0.3; -// auto [max_chosen, min_chosen] = generateScaleRel( -// gen, largest_max_allowed, smallest_max_allowed, min_as_frac_of_max -// ); + auto [max_chosen, min_chosen] = generateScaleRel( + gen, largest_max_allowed, smallest_max_allowed, min_as_frac_of_max + ); -// EXPECT_GE(max_chosen, smallest_max_allowed); -// EXPECT_LE(max_chosen, largest_max_allowed); -// EXPECT_GE(min_chosen, static_cast(max_chosen * min_as_frac_of_max)); -// }; + EXPECT_GE(max_chosen, smallest_max_allowed); + EXPECT_LE(max_chosen, largest_max_allowed); + EXPECT_GE(min_chosen, static_cast(max_chosen * min_as_frac_of_max)); +}; -// TYPED_TEST(TestGraphHelpers, test_generate_scale_abs) { -// std::mt19937 gen(123456); +TYPED_TEST(TestGraphHelpers, test_generate_scale_abs) { + std::mt19937 gen(123456); -// int largest_max_allowed = 100; -// int smallest_max_allowed = 50; -// int min_allowed = 40; + int largest_max_allowed = 100; + int smallest_max_allowed = 50; + int min_allowed = 40; -// auto [max_chosen, min_chosen] = generateScaleAbs( -// gen, largest_max_allowed, smallest_max_allowed, min_allowed -// ); + auto [max_chosen, min_chosen] = generateScaleAbs( + gen, largest_max_allowed, smallest_max_allowed, min_allowed + ); -// EXPECT_GE(max_chosen, smallest_max_allowed); -// EXPECT_LE(max_chosen, largest_max_allowed); -// EXPECT_GE(min_chosen, min_allowed); -// }; + EXPECT_GE(max_chosen, smallest_max_allowed); + EXPECT_LE(max_chosen, largest_max_allowed); + EXPECT_GE(min_chosen, min_allowed); +}; void sanityCheckBlockMem(const vt_lb::model::PhaseData &pd) { // check block memory @@ -1149,27 +1120,27 @@ TYPED_TEST(TestGraphHelpers, test_generate_graph_with_shared_blocks_no_comm) { sanityCheckEdges(pd, include_comm, num_ranks); }; -// TYPED_TEST(TestGraphHelpers, test_generate_graph_with_shared_blocks_with_comm) { -// auto num_ranks = this->comm.numRanks(); -// auto rank = this->comm.getRank(); -// vt_lb::model::PhaseData pd(rank); +TYPED_TEST(TestGraphHelpers, test_generate_graph_with_shared_blocks_with_comm) { + auto num_ranks = this->comm.numRanks(); + auto rank = this->comm.getRank(); + vt_lb::model::PhaseData pd(rank); -// int seed_same_across_ranks = 13; -// int seed_diff_each_rank = 35 * rank; + int seed_same_across_ranks = 13; + int seed_diff_each_rank = 35 * rank; -// bool uniform_shared_block_count = false; -// bool uniform_task_count = false; -// bool include_comm = true; + bool uniform_shared_block_count = false; + bool uniform_task_count = false; + bool include_comm = true; -// generateGraphWithSharedBlocks( -// pd, num_ranks, uniform_shared_block_count, uniform_task_count, include_comm, -// seed_same_across_ranks, seed_diff_each_rank -// ); + generateGraphWithSharedBlocks( + pd, num_ranks, uniform_shared_block_count, uniform_task_count, include_comm, + seed_same_across_ranks, seed_diff_each_rank + ); -// sanityCheckBlocks(pd, true); -// sanityCheckTasks(pd, true, uniform_task_count); -// sanityCheckEdges(pd, include_comm, num_ranks); -// }; + sanityCheckBlocks(pd, true); + sanityCheckTasks(pd, true, uniform_task_count); + sanityCheckEdges(pd, include_comm, num_ranks); +}; TYPED_TEST(TestGraphHelpers, test_generate_graph_with_shared_blocks_no_comm_unib) { auto num_ranks = this->comm.numRanks(); @@ -1215,24 +1186,24 @@ TYPED_TEST(TestGraphHelpers, test_generate_graph_with_shared_blocks_no_comm_unib sanityCheckEdges(pd, include_comm, num_ranks); }; -// TYPED_TEST(TestGraphHelpers, test_generate_graph_without_shared_blocks_with_comm) { -// auto num_ranks = this->comm.numRanks(); -// auto rank = this->comm.getRank(); -// vt_lb::model::PhaseData pd(rank); +TYPED_TEST(TestGraphHelpers, test_generate_graph_without_shared_blocks_with_comm) { + auto num_ranks = this->comm.numRanks(); + auto rank = this->comm.getRank(); + vt_lb::model::PhaseData pd(rank); -// int seed_same_across_ranks = 16; -// int seed_diff_each_rank = 38 * rank; + int seed_same_across_ranks = 16; + int seed_diff_each_rank = 38 * rank; -// bool uniform_task_count = false; -// bool include_comm = true; + bool uniform_task_count = false; + bool include_comm = true; -// generateGraphWithoutSharedBlocks( -// pd, num_ranks, uniform_task_count, include_comm, seed_same_across_ranks, -// seed_diff_each_rank -// ); + generateGraphWithoutSharedBlocks( + pd, num_ranks, uniform_task_count, include_comm, seed_same_across_ranks, + seed_diff_each_rank + ); -// sanityCheckTasks(pd, true, false); -// sanityCheckEdges(pd, include_comm, num_ranks); -// }; + sanityCheckTasks(pd, true, false); + sanityCheckEdges(pd, include_comm, num_ranks); +}; }}} // end namespace vt_lb::tests::unit diff --git a/tests/unit/temperedlb/test_temperedlb.cc b/tests/unit/temperedlb/test_temperedlb.cc new file mode 100644 index 0000000..2f6e3e6 --- /dev/null +++ b/tests/unit/temperedlb/test_temperedlb.cc @@ -0,0 +1,350 @@ +/* +//@HEADER +// ***************************************************************************** +// +// test_temperedlb.cc +// DARMA/vt-lb => Virtual Transport/Load Balancers +// +// Copyright 2019-2024 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include + +#include +#include + +#include "test_parallel_harness.h" +#include "test_helpers.h" +#include "graph_helpers.h" + +#include + +namespace vt_lb::tests::unit { + +// Wrapper that zips a communicator type with a single integer seed +template +struct CommSeedPack { + using Comm = CommT; + static constexpr int seed = Seed; +}; + +// Helper: build TemperedLB, compute initial and final global distributions +template +struct LbRunSummary { + std::unordered_map> initial; + std::unordered_map> final; +}; + +template +LbRunSummary runTemperedLB( + CommT& comm, + vt_lb::algo::temperedlb::Configuration const& config, + vt_lb::model::PhaseData const& pd +) { + vt_lb::algo::temperedlb::TemperedLB lb(comm, config); + lb.inputData(std::make_unique(pd)); + + auto initial_global = lb.getGlobalDistribution(pd.getTaskIds()); + std::unordered_map> initial_int; + for (auto const& [r, vec] : initial_global) { + auto& out = initial_int[r]; + out.reserve(vec.size()); + for (auto tid : vec) { + out.push_back(static_cast(tid)); + } + } + + auto local_after = lb.run(); + (void)local_after; // not needed for summary + + auto final_global = lb.getGlobalDistribution(local_after); + std::unordered_map> final_int; + for (auto const& [r, vec] : final_global) { + auto& out = final_int[r]; + out.reserve(vec.size()); + for (auto tid : vec) { + out.push_back(static_cast(tid)); + } + } + + return {std::move(initial_int), std::move(final_int)}; +} + +// Typed fixture over zipped communicator+seeds; communicator type remains implicit +template +struct TestTemperedLB: TestParallelHarness {}; + +TYPED_TEST_SUITE_P(TestTemperedLB); + +TYPED_TEST_P(TestTemperedLB, test_lb_no_comm_task_counts) { + auto num_ranks = this->comm.numRanks(); + auto rank = this->comm.getRank(); + + SET_MIN_NUM_NODES_CONSTRAINT(2); + + int seed = TypeParam::seed; + vt_lb::model::PhaseData pd(rank); + + // Generate a random graph without shared blocks and without communication + bool uniform_task_count = false; + bool include_comm = false; + int seed_same_across_ranks = seed; + int seed_diff_each_rank = 9876 * rank + 1; + + generateGraphWithoutSharedBlocks( + pd, num_ranks, uniform_task_count, include_comm, + seed_same_across_ranks, seed_diff_each_rank + ); + + // Sanity: no shared blocks or communications + EXPECT_EQ(pd.getSharedBlocksMap().size(), 0); + EXPECT_EQ(pd.getCommunications().size(), 0); + + // Build LB once and get initial/final global distributions + vt_lb::algo::temperedlb::Configuration config(num_ranks); + auto summary = runTemperedLB(this->comm, config, pd); + + // Compute initial global task count + auto const& initial_global = summary.initial; + std::size_t initial_total = 0; + for (auto const& [_r, vec] : initial_global) { + initial_total += vec.size(); + } + + // Validate the global task count is preserved + auto const& final_global = summary.final; + std::size_t final_total = 0; + for (auto const& [_r, vec] : final_global) { + final_total += vec.size(); + } + EXPECT_EQ(final_total, initial_total); +}; + +TYPED_TEST_P(TestTemperedLB, test_significant_load_imbalance_reduction) { + auto num_ranks = this->comm.numRanks(); + auto rank = this->comm.getRank(); + + SET_MIN_NUM_NODES_CONSTRAINT(2); + + int seed = TypeParam::seed; + vt_lb::model::PhaseData pd(rank); + + // Generate random tasks without shared blocks and without communication + bool uniform_task_count = false; + bool include_comm = false; + int seed_same_across_ranks = seed; + int seed_diff_each_rank = 13579 * rank + 3; + + generateGraphWithoutSharedBlocks( + pd, num_ranks, uniform_task_count, include_comm, + seed_same_across_ranks, seed_diff_each_rank + ); + + // Introduce strong load imbalance: amplify loads on rank 0 + if (rank == 0) { + for (auto tid : pd.getTaskIds()) { + auto t = pd.getTask(tid); + t->setLoad(t->getLoad() * 50.0); + } + } + + // Compute local total load before balancing + double local_before = 0.0; + for (auto const& [tid, task] : pd.getTasksMap()) { + local_before += task.getLoad(); + } + + // Compute global before stats (max and avg) via communicator handle + struct CollectiveDummy { } dummy; + auto handle = this->comm.template registerInstanceCollective(&dummy); + + double global_before_max = 0.0; + handle.reduce(0, MPI_DOUBLE, MPI_MAX, &local_before, &global_before_max, 1); + handle.broadcast(0, MPI_DOUBLE, &global_before_max, 1); + + double global_before_sum = 0.0; + handle.reduce(0, MPI_DOUBLE, MPI_SUM, &local_before, &global_before_sum, 1); + handle.broadcast(0, MPI_DOUBLE, &global_before_sum, 1); + double global_before_avg = global_before_sum / static_cast(num_ranks); + + // Build LB once and get initial/final global distributions + vt_lb::algo::temperedlb::Configuration config(num_ranks); + auto summary = runTemperedLB(this->comm, config, pd); + + // Capture initial global task distribution for preservation checks + auto const& initial_global = summary.initial; + std::size_t initial_total = 0; + std::unordered_set initial_task_set; + for (auto const& [r, vec] : initial_global) { + initial_total += vec.size(); + for (auto tid : vec) initial_task_set.insert(static_cast(tid)); + } + + // Gather global mapping of tasks per rank after + auto const& final_global = summary.final; + std::size_t final_total = 0; + std::unordered_set final_task_set; + for (auto const& [r, vec] : final_global) { + final_total += vec.size(); + for (auto tid : vec) { + final_task_set.insert(static_cast(tid)); + } + } + + // Build a global mapping of task loads: allgather ids and loads via communicator + std::vector local_ids; + std::vector local_loads; + local_ids.reserve(pd.getTasksMap().size()); + local_loads.reserve(pd.getTasksMap().size()); + for (auto const& [tid, task] : pd.getTasksMap()) { + local_ids.push_back(static_cast(tid)); + local_loads.push_back(task.getLoad()); + } + + auto ids_by_rank = handle.allgather(local_ids.data(), static_cast(local_ids.size())); + auto loads_by_rank = handle.allgather(local_loads.data(), static_cast(local_loads.size())); + + // Build lookup for task id -> load + std::unordered_map id_to_load; + for (auto const& [r, vec] : ids_by_rank) { + auto const& loads_vec = loads_by_rank[r]; + for (std::size_t i = 0; i < vec.size(); ++i) { + id_to_load.emplace(vec[i], loads_vec[i]); + } + } + + // Compute local after load: sum loads for tasks assigned to this rank + double local_after = 0.0; + auto it = final_global.find(rank); + if (it != final_global.end()) { + for (auto tid : it->second) { + local_after += id_to_load[static_cast(tid)]; + } + } + + // Compute global after stats (max and avg) via communicator handle + double global_after_max = 0.0; + handle.reduce(0, MPI_DOUBLE, MPI_MAX, &local_after, &global_after_max, 1); + handle.broadcast(0, MPI_DOUBLE, &global_after_max, 1); + + double global_after_sum = 0.0; + handle.reduce(0, MPI_DOUBLE, MPI_SUM, &local_after, &global_after_sum, 1); + handle.broadcast(0, MPI_DOUBLE, &global_after_sum, 1); + double global_after_avg = global_after_sum / static_cast(num_ranks); + + // Expect improvement: global max work decreases after balancing + EXPECT_GT(global_before_max, global_before_avg); + EXPECT_LT(global_after_max, global_before_max); + + // Tasks should be preserved globally: same total and same set of task IDs + EXPECT_EQ(final_total, initial_total); + EXPECT_EQ(final_task_set.size(), initial_task_set.size()); + // Verify the sets are identical + for (auto tid : initial_task_set) { + EXPECT_TRUE(final_task_set.count(tid) == 1); + } + + // Average work should remain nearly constant (loads are redistributed) + EXPECT_NEAR(global_after_avg, global_before_avg, 1e-9); + + // Compute imbalance metric I = max/avg - 1 and check it approaches zero + double I_before = 0.0; + if (global_before_avg > 0.0) { + I_before = (global_before_max / global_before_avg) - 1.0; + } + double I_after = 0.0; + if (global_after_avg > 0.0) { + I_after = (global_after_max / global_after_avg) - 1.0; + } + EXPECT_GT(I_before, 0.0); + EXPECT_LT(I_after, I_before); + EXPECT_NEAR(I_after, 0.0, 0.02); + + // fmt::print( + // "Rank {}: before max={}, avg={}, I={:.4f}; after max={}, avg={}, I={:.4f}\n", + // rank, global_before_max, global_before_avg, I_before, + // global_after_max, global_after_avg, I_after + // ); +}; + +REGISTER_TYPED_TEST_SUITE_P( + TestTemperedLB, test_lb_no_comm_task_counts, test_significant_load_imbalance_reduction +); + +// Zip communicator type list with an integer seed sequence +template +struct ZipCommWithSeeds; + +template +struct ZipCommWithSeeds<::testing::Types, std::integer_sequence> { + template + struct PacksForComm { + using type = ::testing::Types...>; + }; + + template + struct ConcatTypes; + + template + struct ConcatTypes<::testing::Types> { + using type = ::testing::Types; + }; + + template + struct ConcatTypes<::testing::Types, ::testing::Types, Rest...> { + using type = typename ConcatTypes<::testing::Types, Rest...>::type; + }; + + using type = typename ConcatTypes::type...>::type; +}; + +using SeedSeq = std::integer_sequence; +using CommSeedTypesForTesting = typename ZipCommWithSeeds::type; + +// Name generator that appends the seed to the base communicator name +struct SeededCommNameGenerator { + template + static std::string GetName(int i) { + auto base = CommNameGenerator::template GetName(i); + return base + std::string("_seed_") + std::to_string(Pack::seed); + } +}; + +INSTANTIATE_TYPED_TEST_SUITE_P( + ZippedSeeds, TestTemperedLB, CommSeedTypesForTesting, SeededCommNameGenerator +); + +} /* end namespace vt_lb::tests::unit */