diff --git a/CMakeLists.txt b/CMakeLists.txt index 91091ee..4052741 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,7 +23,10 @@ endif() # vt includes magistrate and we must have magistrate for the MPI backend if(NOT vt_FOUND) + set(vt_backend_feature_enabled 0) find_package(magistrate REQUIRED) +else() + set(vt_backend_feature_enabled 1) endif() set(FMT_LIBRARY fmt) diff --git a/src/vt-lb/algo/baselb/baselb.h b/src/vt-lb/algo/baselb/baselb.h index 13c1e90..004b8ee 100644 --- a/src/vt-lb/algo/baselb/baselb.h +++ b/src/vt-lb/algo/baselb/baselb.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_BASELB_BASELB_H #define INCLUDED_VT_LB_ALGO_BASELB_BASELB_H +#include + #include #include diff --git a/src/vt-lb/algo/driver/driver.h b/src/vt-lb/algo/driver/driver.h index ebe92ab..45b0357 100644 --- a/src/vt-lb/algo/driver/driver.h +++ b/src/vt-lb/algo/driver/driver.h @@ -44,7 +44,9 @@ #if !defined INCLUDED_VT_LB_ALGO_DRIVER_DRIVER_H #define INCLUDED_VT_LB_ALGO_DRIVER_DRIVER_H -#include "vt-lb/model/PhaseData.h" +#include + +#include #include diff --git a/src/vt-lb/algo/driver/driver.impl.h b/src/vt-lb/algo/driver/driver.impl.h index 83b63ef..83913ab 100644 --- a/src/vt-lb/algo/driver/driver.impl.h +++ b/src/vt-lb/algo/driver/driver.impl.h @@ -44,8 +44,10 @@ #if !defined INCLUDED_VT_LB_ALGO_DRIVER_DRIVER_IMPL_H #define INCLUDED_VT_LB_ALGO_DRIVER_DRIVER_IMPL_H +#include + #include -#include "vt-lb/algo/driver/driver.h" +#include namespace vt_lb { diff --git a/src/vt-lb/algo/temperedlb/basic_transfer.h b/src/vt-lb/algo/temperedlb/basic_transfer.h index a2ec045..c36186e 100644 --- a/src/vt-lb/algo/temperedlb/basic_transfer.h +++ b/src/vt-lb/algo/temperedlb/basic_transfer.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_BASIC_TRANSFER_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_BASIC_TRANSFER_H +#include + #include #include #include diff --git a/src/vt-lb/algo/temperedlb/cluster_summarizer.h b/src/vt-lb/algo/temperedlb/cluster_summarizer.h index 8607269..8530dae 100644 --- a/src/vt-lb/algo/temperedlb/cluster_summarizer.h +++ b/src/vt-lb/algo/temperedlb/cluster_summarizer.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTER_SUMMARIZER_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTER_SUMMARIZER_H +#include + #include #include #include diff --git a/src/vt-lb/algo/temperedlb/cluster_summarizer.impl.h b/src/vt-lb/algo/temperedlb/cluster_summarizer.impl.h index 7cf7615..1cf2bca 100644 --- a/src/vt-lb/algo/temperedlb/cluster_summarizer.impl.h +++ b/src/vt-lb/algo/temperedlb/cluster_summarizer.impl.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTER_SUMMARIZER_IMPL_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTER_SUMMARIZER_IMPL_H +#include + #include #include #include diff --git a/src/vt-lb/algo/temperedlb/clustering.h b/src/vt-lb/algo/temperedlb/clustering.h index dfed885..0778764 100644 --- a/src/vt-lb/algo/temperedlb/clustering.h +++ b/src/vt-lb/algo/temperedlb/clustering.h @@ -43,6 +43,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTERING_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTERING_H +#include + #include #include diff --git a/src/vt-lb/algo/temperedlb/configuration.h b/src/vt-lb/algo/temperedlb/configuration.h index d2c28cf..e9dcf8e 100644 --- a/src/vt-lb/algo/temperedlb/configuration.h +++ b/src/vt-lb/algo/temperedlb/configuration.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_CONFIGURATION_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_CONFIGURATION_H +#include + #include #include diff --git a/src/vt-lb/algo/temperedlb/full_graph_visualizer.h b/src/vt-lb/algo/temperedlb/full_graph_visualizer.h index d1ddb75..452a697 100644 --- a/src/vt-lb/algo/temperedlb/full_graph_visualizer.h +++ b/src/vt-lb/algo/temperedlb/full_graph_visualizer.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_FULL_GRAPH_VISUALIZER_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_FULL_GRAPH_VISUALIZER_H +#include + #include #include #include diff --git a/src/vt-lb/algo/temperedlb/info_propagation.h b/src/vt-lb/algo/temperedlb/info_propagation.h index 81be7b7..d294e59 100644 --- a/src/vt-lb/algo/temperedlb/info_propagation.h +++ b/src/vt-lb/algo/temperedlb/info_propagation.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_INFO_PROPAGATION_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_INFO_PROPAGATION_H +#include + #include #include #include diff --git a/src/vt-lb/algo/temperedlb/relaxed_cluster_transfer.h b/src/vt-lb/algo/temperedlb/relaxed_cluster_transfer.h index 21cd4b9..e698fe7 100644 --- a/src/vt-lb/algo/temperedlb/relaxed_cluster_transfer.h +++ b/src/vt-lb/algo/temperedlb/relaxed_cluster_transfer.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_RELAXED_CLUSTER_TRANSFER_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_RELAXED_CLUSTER_TRANSFER_H +#include + #include #include #include diff --git a/src/vt-lb/algo/temperedlb/statistics.h b/src/vt-lb/algo/temperedlb/statistics.h index b753f54..6be1c37 100644 --- a/src/vt-lb/algo/temperedlb/statistics.h +++ b/src/vt-lb/algo/temperedlb/statistics.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_STATISTICS_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_STATISTICS_H +#include + #include namespace vt_lb::algo::temperedlb { diff --git a/src/vt-lb/algo/temperedlb/symmetrize_comm.h b/src/vt-lb/algo/temperedlb/symmetrize_comm.h index 2e077f5..e44bd6b 100644 --- a/src/vt-lb/algo/temperedlb/symmetrize_comm.h +++ b/src/vt-lb/algo/temperedlb/symmetrize_comm.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_SYMMETRIZE_COMM_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_SYMMETRIZE_COMM_H +#include + #include #include diff --git a/src/vt-lb/algo/temperedlb/task_cluster_summary_info.h b/src/vt-lb/algo/temperedlb/task_cluster_summary_info.h index 6a0fbee..838c20a 100644 --- a/src/vt-lb/algo/temperedlb/task_cluster_summary_info.h +++ b/src/vt-lb/algo/temperedlb/task_cluster_summary_info.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_TASK_CLUSTER_SUMMARY_INFO_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_TASK_CLUSTER_SUMMARY_INFO_H +#include + #include #include diff --git a/src/vt-lb/algo/temperedlb/temperedlb.h b/src/vt-lb/algo/temperedlb/temperedlb.h index 54a01bd..40c754b 100644 --- a/src/vt-lb/algo/temperedlb/temperedlb.h +++ b/src/vt-lb/algo/temperedlb/temperedlb.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_TEMPEREDLB_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_TEMPEREDLB_H +#include + #include #include diff --git a/src/vt-lb/algo/temperedlb/transfer.h b/src/vt-lb/algo/temperedlb/transfer.h index 4e4ffa3..98f925b 100644 --- a/src/vt-lb/algo/temperedlb/transfer.h +++ b/src/vt-lb/algo/temperedlb/transfer.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_TRANSFER_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_TRANSFER_H +#include + #include #include #include diff --git a/src/vt-lb/algo/temperedlb/transfer_util.h b/src/vt-lb/algo/temperedlb/transfer_util.h index 3b48795..b5b2ea7 100644 --- a/src/vt-lb/algo/temperedlb/transfer_util.h +++ b/src/vt-lb/algo/temperedlb/transfer_util.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_TRANSFER_UTIL_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_TRANSFER_UTIL_H +#include + #include #include #include diff --git a/src/vt-lb/algo/temperedlb/visualize.h b/src/vt-lb/algo/temperedlb/visualize.h index 3d2ef72..08acd20 100644 --- a/src/vt-lb/algo/temperedlb/visualize.h +++ b/src/vt-lb/algo/temperedlb/visualize.h @@ -1,8 +1,10 @@ #pragma once +#include + #include #include -#include "clustering.h" +#include #include #include diff --git a/src/vt-lb/algo/temperedlb/work_model.h b/src/vt-lb/algo/temperedlb/work_model.h index 88c6b44..a2cf9f4 100644 --- a/src/vt-lb/algo/temperedlb/work_model.h +++ b/src/vt-lb/algo/temperedlb/work_model.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_WORK_MODEL_H #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_WORK_MODEL_H +#include + #include #include #include diff --git a/src/vt-lb/comm/MPI/class_handle.h b/src/vt-lb/comm/MPI/class_handle.h index 3899732..d80d7a0 100644 --- a/src/vt-lb/comm/MPI/class_handle.h +++ b/src/vt-lb/comm/MPI/class_handle.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_COMM_CLASS_HANDLE_H #define INCLUDED_VT_LB_COMM_CLASS_HANDLE_H +#include + #include namespace vt_lb::comm { diff --git a/src/vt-lb/comm/MPI/class_handle.impl.h b/src/vt-lb/comm/MPI/class_handle.impl.h index fdb25c4..3d93832 100644 --- a/src/vt-lb/comm/MPI/class_handle.impl.h +++ b/src/vt-lb/comm/MPI/class_handle.impl.h @@ -44,8 +44,10 @@ #if !defined INCLUDED_VT_LB_COMM_CLASS_HANDLE_IMPL_H #define INCLUDED_VT_LB_COMM_CLASS_HANDLE_IMPL_H -#include "class_handle.h" -#include "vt-lb/comm/MPI/comm_mpi.h" +#include + +#include +#include namespace vt_lb::comm { diff --git a/src/vt-lb/comm/MPI/comm_mpi.h b/src/vt-lb/comm/MPI/comm_mpi.h index f599fb5..e46d245 100644 --- a/src/vt-lb/comm/MPI/comm_mpi.h +++ b/src/vt-lb/comm/MPI/comm_mpi.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_COMM_COMM_MPI_H #define INCLUDED_VT_LB_COMM_COMM_MPI_H +#include + #include #include #include diff --git a/src/vt-lb/comm/MPI/comm_mpi_detail.h b/src/vt-lb/comm/MPI/comm_mpi_detail.h index aeaf062..256b03c 100644 --- a/src/vt-lb/comm/MPI/comm_mpi_detail.h +++ b/src/vt-lb/comm/MPI/comm_mpi_detail.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_COMM_COMM_MPI_DETAIL_H #define INCLUDED_VT_LB_COMM_COMM_MPI_DETAIL_H +#include + #include namespace vt_lb::comm::detail { diff --git a/src/vt-lb/comm/MPI/termination.h b/src/vt-lb/comm/MPI/termination.h index 113bf66..ffa1f86 100644 --- a/src/vt-lb/comm/MPI/termination.h +++ b/src/vt-lb/comm/MPI/termination.h @@ -44,7 +44,9 @@ #if !defined INCLUDED_VT_LB_COMM_TERMINATION_H #define INCLUDED_VT_LB_COMM_TERMINATION_H -#include "vt-lb/comm/MPI/class_handle.h" +#include + +#include #include #include diff --git a/src/vt-lb/comm/comm_traits.h b/src/vt-lb/comm/comm_traits.h index 5a543ed..e0103d5 100644 --- a/src/vt-lb/comm/comm_traits.h +++ b/src/vt-lb/comm/comm_traits.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_COMM_COMM_TRAITS_H #define INCLUDED_VT_LB_COMM_COMM_TRAITS_H +#include + #include #include #include diff --git a/src/vt-lb/comm/vt/comm_vt.h b/src/vt-lb/comm/vt/comm_vt.h index 3e0d4eb..735c4e3 100644 --- a/src/vt-lb/comm/vt/comm_vt.h +++ b/src/vt-lb/comm/vt/comm_vt.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_COMM_COMM_VT_H #define INCLUDED_VT_LB_COMM_COMM_VT_H +#include + #if vt_backend_enabled #include diff --git a/src/vt-lb/comm/vt/comm_vt.impl.h b/src/vt-lb/comm/vt/comm_vt.impl.h index 7174571..32ceb69 100644 --- a/src/vt-lb/comm/vt/comm_vt.impl.h +++ b/src/vt-lb/comm/vt/comm_vt.impl.h @@ -44,8 +44,10 @@ #if !defined INCLUDED_VT_LB_COMM_COMM_VT_IMPL_H #define INCLUDED_VT_LB_COMM_COMM_VT_IMPL_H -#include "vt-lb/comm/vt/comm_vt.h" -#include "vt-lb/comm/vt/proxy_wrapper.h" +#include + +#include +#include #include namespace vt_lb::comm { diff --git a/src/vt-lb/comm/vt/proxy_wrapper.h b/src/vt-lb/comm/vt/proxy_wrapper.h index 31f1927..f20a022 100644 --- a/src/vt-lb/comm/vt/proxy_wrapper.h +++ b/src/vt-lb/comm/vt/proxy_wrapper.h @@ -44,6 +44,8 @@ #ifndef INCLUDED_VT_LB_COMM_PROXY_WRAPPER_H #define INCLUDED_VT_LB_COMM_PROXY_WRAPPER_H +#include + #include #include #include diff --git a/src/vt-lb/comm/vt/proxy_wrapper.impl.h b/src/vt-lb/comm/vt/proxy_wrapper.impl.h index e3547e0..85ba630 100644 --- a/src/vt-lb/comm/vt/proxy_wrapper.impl.h +++ b/src/vt-lb/comm/vt/proxy_wrapper.impl.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_COMM_PROXY_WRAPPER_IMPL_H #define INCLUDED_VT_LB_COMM_PROXY_WRAPPER_IMPL_H +#include + #include #include diff --git a/src/vt-lb/config/cmake_config.h.in b/src/vt-lb/config/cmake_config.h.in index ae5086a..2e90feb 100644 --- a/src/vt-lb/config/cmake_config.h.in +++ b/src/vt-lb/config/cmake_config.h.in @@ -41,4 +41,4 @@ //@HEADER */ -#define vt_backend_enabled @vt_backend_enabled@ \ No newline at end of file +#define vt_backend_enabled @vt_backend_feature_enabled@ diff --git a/src/vt-lb/model/Communication.h b/src/vt-lb/model/Communication.h index c68a012..783e958 100644 --- a/src/vt-lb/model/Communication.h +++ b/src/vt-lb/model/Communication.h @@ -44,7 +44,9 @@ #if !defined INCLUDED_VT_LB_MODEL_COMMUNICATION_H #define INCLUDED_VT_LB_MODEL_COMMUNICATION_H -#include "types.h" +#include + +#include namespace vt_lb::model { diff --git a/src/vt-lb/model/PhaseData.h b/src/vt-lb/model/PhaseData.h index 65ba7c4..a8c0e48 100644 --- a/src/vt-lb/model/PhaseData.h +++ b/src/vt-lb/model/PhaseData.h @@ -44,10 +44,12 @@ #if !defined INCLUDED_VT_LB_MODEL_PHASE_DATA_H #define INCLUDED_VT_LB_MODEL_PHASE_DATA_H -#include "types.h" -#include "SharedBlock.h" -#include "Task.h" -#include "Communication.h" +#include + +#include +#include +#include +#include #include #include diff --git a/src/vt-lb/model/SharedBlock.h b/src/vt-lb/model/SharedBlock.h index ecb71ef..5ee7ff2 100644 --- a/src/vt-lb/model/SharedBlock.h +++ b/src/vt-lb/model/SharedBlock.h @@ -44,8 +44,10 @@ #if !defined INCLUDED_VT_LB_MODEL_BLOCK_H #define INCLUDED_VT_LB_MODEL_BLOCK_H -#include "types.h" -#include "Task.h" +#include + +#include +#include namespace vt_lb::model { diff --git a/src/vt-lb/model/Task.h b/src/vt-lb/model/Task.h index 1d29691..f84df09 100644 --- a/src/vt-lb/model/Task.h +++ b/src/vt-lb/model/Task.h @@ -44,7 +44,9 @@ #if !defined INCLUDED_VT_LB_MODEL_TASK_H #define INCLUDED_VT_LB_MODEL_TASK_H -#include "types.h" +#include + +#include #include @@ -75,6 +77,10 @@ struct TaskMemory { struct Task { Task() = default; + Task(TaskType id, LoadType load) + : id_(id), load_(load) + {} + Task(TaskType id, RankType home, RankType current, bool migratable, TaskMemory const& memory, LoadType load) : id_(id), diff --git a/src/vt-lb/model/types.h b/src/vt-lb/model/types.h index 025a03c..5fe5226 100644 --- a/src/vt-lb/model/types.h +++ b/src/vt-lb/model/types.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_MODEL_TYPES_H #define INCLUDED_VT_LB_MODEL_TYPES_H +#include + #include #include diff --git a/src/vt-lb/util/logging.h b/src/vt-lb/util/logging.h index b260240..5015ea5 100644 --- a/src/vt-lb/util/logging.h +++ b/src/vt-lb/util/logging.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_UTIL_LOGGING_H #define INCLUDED_VT_LB_UTIL_LOGGING_H +#include + #include #include #include diff --git a/tests/unit/graph_helpers.h b/tests/unit/graph_helpers.h index b5de54f..aa25905 100644 --- a/tests/unit/graph_helpers.h +++ b/tests/unit/graph_helpers.h @@ -304,10 +304,87 @@ void generateIntraRankComm( } } double bytes = weight_per_edge_dist(gen); + bytes = std::max(bytes, 1.0); pd.addCommunication(Edge{from, to, bytes, rank, rank}); } } +/** + * \brief Generate random intra-rank communications on each rank + * in a ring pattern + * + * \note Each task connects to its K nearest successors on the ID ring. + * Guarantees: no self-edges, at most one edge per task pair. + * + * @param pd The PhaseData for this rank + * @param gen The seeded generator for this rank + * @param min_neighbors Minimum number of neighbors per task + * @param max_neighbors Maximum number of neighbors per task + * @param weight_per_edge_dist Random edge weights distribution + * @param make_random_neighbors If true, randomly select neighbors rather than + * using the closest neighbors in the ring + */ +template +void generateIntraRankCommRing( + vt_lb::model::PhaseData& pd, + std::mt19937 &gen, + int min_neighbors, + int max_neighbors, + WeightPerEdgeDistType &weight_per_edge_dist, + bool make_random_neighbors +) { + using namespace vt_lb::model; + + int const rank = pd.getRank(); + assert(rank != invalid_node); + + auto local_ids_set = pd.getTaskIds(); + std::vector local_ids( + local_ids_set.begin(), local_ids_set.end() + ); + std::size_t const N = local_ids.size(); + if (N <= 1) return; + + if (max_neighbors <= 0) return; + if (min_neighbors <= 0) return; + + assert(max_neighbors >= min_neighbors && "max_neighbors >= min_neighbors"); + + // Sort IDs so "distance" is by TaskType ordering, then build a ring + std::sort(local_ids.begin(), local_ids.end()); + + // To avoid duplicates when adding only forward neighbors, + // clamp K to floor((N-1)/2). This ensures each undirected pair is added once. + int const half_cap = static_cast((N - 1) / 2); + int K_min = std::min(min_neighbors, half_cap); + int K_max = std::min(max_neighbors, half_cap); + if (K_max == 0) return; + + std::uniform_int_distribution<> K_dist(K_min, K_max); + + for (std::size_t i = 0; i < N; ++i) { + int K = K_dist(gen); + std::vector neighbor_distances; + for (int d = 1; d <= K_max; ++d) { + neighbor_distances.push_back(d); + } + if (make_random_neighbors) { + std::shuffle( + neighbor_distances.begin(), neighbor_distances.end(), gen + ); + } + // Add edges to the first K neighbors in the shuffled list + for (int d = 0; d < K; ++d) { + std::size_t j = (i + static_cast(neighbor_distances[d])) % N; // ring successor by ID distance d + TaskType from = local_ids[i]; + TaskType to = local_ids[j]; + double bytes = std::max(weight_per_edge_dist(gen), 1.0); + // d >= 1 ensures no self-edge; clamped K ensures no duplicate pairs + pd.addCommunication(Edge{from, to, bytes, rank, rank}); + } + } +} + /** * Generate random inter-rank communications on each rank * @@ -328,7 +405,7 @@ void generateInterRankComm( ) { using namespace vt_lb::model; - if (num_ranks == 1) { + if (num_ranks == 1 or min_tasks_per_rank == 0) { return; } @@ -359,6 +436,7 @@ void generateInterRankComm( while ((remote_rank = remote_rank_dist(gen)) == rank) {} TaskType to = remote_task_dist(gen); double bytes = weight_per_edge_dist(gen); + bytes = std::max(bytes, 1.0); pd.addCommunication(Edge{from, to, bytes, rank, remote_rank}); } for (std::size_t e = from_edge_count; e < local_endpoints.size(); ++e) { @@ -367,6 +445,7 @@ void generateInterRankComm( while ((remote_rank = remote_rank_dist(gen)) == rank) {} TaskType from = remote_task_dist(gen); double bytes = weight_per_edge_dist(gen); + bytes = std::max(bytes, 1.0); pd.addCommunication(Edge{from, to, bytes, remote_rank, rank}); } } @@ -428,6 +507,7 @@ void generateRankComm( while ((to = remote_task_dist(gen)) == from) {} } double bytes = weight_per_edge_dist(gen); + bytes = std::max(bytes, 1.0); pd.addCommunication(Edge{from, to, bytes, rank, remote_rank}); } for (std::size_t e = from_edge_count; e < local_endpoints.size(); ++e) { @@ -441,6 +521,7 @@ void generateRankComm( while ((from = remote_task_dist(gen)) == to) {} } double bytes = weight_per_edge_dist(gen); + bytes = std::max(bytes, 1.0); pd.addCommunication(Edge{from, to, bytes, remote_rank, rank}); } } @@ -697,7 +778,7 @@ void generateGraphWithSharedBlocks( ); if (include_comm) { - double edge_weight_lambda = 1000.0, locally_gen_in_edge_frac = 0.5; + double edge_weight_lambda = 1000000.0, locally_gen_in_edge_frac = 0.5; int max_endpoints = 4; std::exponential_distribution<> edge_weight_dist(edge_weight_lambda); diff --git a/tests/unit/test_config.h b/tests/unit/test_config.h index 6a244ec..9167be7 100644 --- a/tests/unit/test_config.h +++ b/tests/unit/test_config.h @@ -44,6 +44,8 @@ #if !defined INCLUDED_VT_LB_UNIT_TEST_CONFIG_H #define INCLUDED_VT_LB_UNIT_TEST_CONFIG_H +#include + #define DEBUG_TEST_HARNESS_PRINT 0 // Stub "value" to when not supplying a trailing macro variadic argument. diff --git a/tests/unit/work_model/test_work_model.cc b/tests/unit/work_model/test_work_model.cc new file mode 100644 index 0000000..a212536 --- /dev/null +++ b/tests/unit/work_model/test_work_model.cc @@ -0,0 +1,869 @@ +/* +//@HEADER +// ***************************************************************************** +// +// test_work_model.cc +// DARMA/vt-lb => Virtual Transport/Load Balancers +// +// Copyright 2019-2024 National Technology & Engineering Solutions of Sandia, LLC +// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +// Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the copyright holder nor the names of its +// contributors may be used to endorse or promote products derived from this +// software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact darma@sandia.gov +// +// ***************************************************************************** +//@HEADER +*/ + +#include + +#include "test_parallel_harness.h" +#include "test_helpers.h" +#include "graph_helpers.h" + +#include +#include +#include +#include + +namespace vt_lb { namespace tests { namespace unit { + +template +struct TestWorkModelBasic : TestParallelHarness { + static constexpr int num_seeds = 100; + + void setupRandomNonzeroWorkModel( + std::mt19937 &gen, algo::temperedlb::WorkModel &wm + ); + void setupNoMemoryInfo(algo::temperedlb::Configuration &cfg); + void setupRandomTaskMemory(std::mt19937 &gen, model::PhaseData &pd); + void setupUniformTaskMemory( + std::mt19937 &gen, model::PhaseData &pd, double working_mem, + double footprint_mem, double serialized_mem + ); + + double setupRandomUniformLoadOnlyNoMemProblem( + std::mt19937 &gen, model::PhaseData &pd, + algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, + algo::temperedlb::WorkModel &wm + ); + double setupRandomLoadOnlyNoMemProblem( + std::mt19937 &gen, model::PhaseData &pd, + algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, + algo::temperedlb::WorkModel &wm + ); + double setupRandomUniformSharedBlocksProblem( + std::mt19937 &gen, model::PhaseData &pd, + algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, + algo::temperedlb::WorkModel &wm + ); + double setupRandomSharedBlocksProblem( + std::mt19937 &gen, model::PhaseData &pd, + algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, + algo::temperedlb::WorkModel &wm + ); + double setupRandomLoadAndIntraCommNoMemProblem( + std::mt19937 &gen, model::PhaseData &pd, + algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, + algo::temperedlb::WorkModel &wm + ); + double setupRandomLoadAndInterCommNoMemProblem( + std::mt19937 &gen, model::PhaseData &pd, + algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, + algo::temperedlb::WorkModel &wm + ); + double setupRandomLoadAndMixedCommNoMemProblem( + std::mt19937 &gen, model::PhaseData &pd, + algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, + algo::temperedlb::WorkModel &wm + ); + + void verifyNoChangeUpdate( + const model::PhaseData &pd, const algo::temperedlb::Configuration &cfg, + const algo::temperedlb::WorkBreakdown &expected_bd, + const algo::temperedlb::WorkModel &wm, double expected_work + ); +}; + +template +void TestWorkModelBasic::setupRandomNonzeroWorkModel( + std::mt19937 &gen, algo::temperedlb::WorkModel &wm +) { + std::uniform_real_distribution<> uni_dist(0.0, 1.0); + wm.rank_alpha = 2.0 * uni_dist(gen); + wm.beta = uni_dist(gen); + wm.gamma = uni_dist(gen); + wm.delta = uni_dist(gen); +} + +template +void TestWorkModelBasic::setupNoMemoryInfo( + algo::temperedlb::Configuration &cfg +) { + cfg.work_model_.has_memory_info = false; + // Make sure that the above always overrides the below by leaving them on + cfg.work_model_.has_shared_block_memory_info = true; + cfg.work_model_.has_task_footprint_memory_info = true; + cfg.work_model_.has_task_working_memory_info = true; + cfg.work_model_.has_task_serialized_memory_info = true; +} + +template +void TestWorkModelBasic::setupRandomTaskMemory( + std::mt19937 &gen, model::PhaseData &pd +) { + std::exponential_distribution<> expo_dist(1000.0); + int smem = static_cast(expo_dist(gen)); + int fmem = smem + static_cast(expo_dist(gen)); + generateTaskMemory(pd, gen, expo_dist, fmem, smem); +} + +template +void TestWorkModelBasic::setupUniformTaskMemory( + std::mt19937 &gen, model::PhaseData &pd, double working_mem, + double footprint_mem, double serialized_mem +) { + std::uniform_real_distribution<> dist(working_mem, working_mem); + generateTaskMemory(pd, gen, dist, footprint_mem, serialized_mem); +} + +template +double TestWorkModelBasic::setupRandomUniformLoadOnlyNoMemProblem( + std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm +) { + // We're using all non-zero coefficients but there will only be load + setupRandomNonzeroWorkModel(gen, wm); + + // Do not consider memory even though we will define some + setupNoMemoryInfo(cfg); + + // Generate graph: task load will be uniform across all tasks on a given rank; + // there will be no shared blocks or communication + int min_tasks_per_rank = 0; + int max_tasks_per_rank = 100; + double expo_lambda = 100.0; + std::exponential_distribution<> expo_dist(expo_lambda); + double uniform_load = expo_dist(gen); + std::uniform_real_distribution<> load_dist(uniform_load, uniform_load); + generateTasksWithoutSharedBlocks( + pd, gen, min_tasks_per_rank, max_tasks_per_rank, load_dist + ); + + // Define random task memory even though we will not use it + setupRandomTaskMemory(gen, pd); + + // Define expected breakdown and work + expected_bd.compute = pd.getTasksMap().size() * uniform_load; + expected_bd.inter_node_recv_comm = 0.0; + expected_bd.inter_node_send_comm = 0.0; + expected_bd.intra_node_recv_comm = 0.0; + expected_bd.intra_node_send_comm = 0.0; + expected_bd.shared_mem_comm = 0.0; + expected_bd.memory_breakdown = {0.0, 0.0, 0.0}; + + // Compute expected work + double expected_work = expected_bd.compute * wm.rank_alpha; + return expected_work; +} + +template +double TestWorkModelBasic::setupRandomLoadOnlyNoMemProblem( + std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm +) { + // We're using all non-zero coefficients but there will only be load + setupRandomNonzeroWorkModel(gen, wm); + + // Do not consider memory even though we will define some + setupNoMemoryInfo(cfg); + + // Generate graph: task load will be uniform across all tasks on a given rank; + // there will be no shared blocks or communication + int min_tasks_per_rank = 0; + int max_tasks_per_rank = 100; + double expo_lambda = 1000.0; + std::exponential_distribution<> load_dist(expo_lambda); + generateTasksWithoutSharedBlocks( + pd, gen, min_tasks_per_rank, max_tasks_per_rank, load_dist + ); + + // Define random task memory even though we will not use it + setupRandomTaskMemory(gen, pd); + + // Define expected breakdown and work + double expected_compute = 0.0; + auto &taskmap = pd.getTasksMap(); + for (auto &t : taskmap) { + expected_compute += t.second.getLoad(); + } + expected_bd.compute = expected_compute; + expected_bd.inter_node_recv_comm = 0.0; + expected_bd.inter_node_send_comm = 0.0; + expected_bd.intra_node_recv_comm = 0.0; + expected_bd.intra_node_send_comm = 0.0; + expected_bd.shared_mem_comm = 0.0; + expected_bd.memory_breakdown = {0.0, 0.0, 0.0}; + + // Compute expected work + double expected_work = expected_bd.compute * wm.rank_alpha; + return expected_work; +} + +template +double TestWorkModelBasic::setupRandomUniformSharedBlocksProblem( + std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm +) { + // We're using all non-zero coefficients but there will only be load + setupRandomNonzeroWorkModel(gen, wm); + + // Consider all types of memory + cfg.work_model_.has_memory_info = true; + cfg.work_model_.has_shared_block_memory_info = true; + cfg.work_model_.has_task_footprint_memory_info = true; + cfg.work_model_.has_task_working_memory_info = true; + cfg.work_model_.has_task_serialized_memory_info = true; + + // Generate graph: task load will be uniform across all tasks on a given rank; + // there will be no shared blocks or communication + int min_blocks_per_rank = 0; + int max_blocks_per_rank = 100; + double expo_lambda = 1000000.0; + std::exponential_distribution<> expo_dist(expo_lambda); + int uniform_mem = static_cast(expo_dist(gen)); + int min_tasks_per_block = 1; + int max_tasks_per_block = 10; + double uniform_load = expo_dist(gen); + std::uniform_real_distribution<> load_dist(uniform_load, uniform_load); + generateSharedBlocksWithTasks( + pd, gen, min_blocks_per_rank, max_blocks_per_rank, uniform_mem, uniform_mem, + min_tasks_per_block, max_tasks_per_block, load_dist + ); + + auto num_tasks = pd.getTasksMap().size(); + std::uniform_int_distribution<> uni_dist(100, 1000); + double working_mem = 0.0; + double serialized_mem = 0.0; + if (num_tasks > 0) { + working_mem = uni_dist(gen); + serialized_mem = uni_dist(gen); + } + double footprint_mem = serialized_mem * 2.0; + setupUniformTaskMemory(gen, pd, working_mem, footprint_mem, serialized_mem); + + // Define expected breakdown and work + double expected_block_mem = + static_cast(uniform_mem) * pd.getSharedBlocksMap().size(); + double expected_task_mem = + footprint_mem * num_tasks + working_mem + serialized_mem; + expected_bd.compute = num_tasks * uniform_load; + expected_bd.inter_node_recv_comm = 0.0; + expected_bd.inter_node_send_comm = 0.0; + expected_bd.intra_node_recv_comm = 0.0; + expected_bd.intra_node_send_comm = 0.0; + expected_bd.shared_mem_comm = 0.0; // all at home + expected_bd.memory_breakdown = { + expected_block_mem + expected_task_mem, working_mem, serialized_mem + }; + + // Compute expected work + double expected_work = + expected_bd.compute * wm.rank_alpha + + expected_bd.shared_mem_comm * wm.delta; + + return expected_work; +} + +template +double TestWorkModelBasic::setupRandomSharedBlocksProblem( + std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm +) { + // We're using all non-zero coefficients but there will only be load + setupRandomNonzeroWorkModel(gen, wm); + + // Consider all types of memory even though only shared blocks have it + cfg.work_model_.has_memory_info = true; + cfg.work_model_.has_shared_block_memory_info = true; + cfg.work_model_.has_task_footprint_memory_info = true; + cfg.work_model_.has_task_working_memory_info = true; + cfg.work_model_.has_task_serialized_memory_info = true; + + // Generate graph: task load will be uniform across all tasks on a given rank; + // there will be no shared blocks or communication + int min_blocks_per_rank = 0; + int max_blocks_per_rank = 100; + double expo_lambda = 10000000.0; + std::exponential_distribution<> expo_dist(expo_lambda); + int max_mem = static_cast(expo_dist(gen)); + int min_mem = max_mem / 2; + int min_tasks_per_block = 1; + int max_tasks_per_block = 10; + std::exponential_distribution<> load_dist(expo_lambda / 10); + generateSharedBlocksWithTasks( + pd, gen, min_blocks_per_rank, max_blocks_per_rank, min_mem, max_mem, + min_tasks_per_block, max_tasks_per_block, load_dist + ); + + // Define expected breakdown and work + double expected_compute = 0.0; + auto &taskmap = pd.getTasksMap(); + for (auto &t : taskmap) { + expected_compute += t.second.getLoad(); + } + double expected_block_mem = 0.0; + auto &blockmap = pd.getSharedBlocksMap(); + for (auto &b : blockmap) { + expected_block_mem += b.second.getSize(); + } + expected_bd.compute = expected_compute; + expected_bd.inter_node_recv_comm = 0.0; + expected_bd.inter_node_send_comm = 0.0; + expected_bd.intra_node_recv_comm = 0.0; + expected_bd.intra_node_send_comm = 0.0; + expected_bd.shared_mem_comm = 0.0; // all at home + expected_bd.memory_breakdown = {expected_block_mem, 0.0, 0.0}; + + // Compute expected work + double expected_work = + expected_bd.compute * wm.rank_alpha + + expected_bd.shared_mem_comm * wm.delta; + + return expected_work; +} + +template +double TestWorkModelBasic::setupRandomLoadAndIntraCommNoMemProblem( + std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm +) { + // We're using all non-zero coefficients but there will only be load + setupRandomNonzeroWorkModel(gen, wm); + + // Do not consider memory even though we will define some + setupNoMemoryInfo(cfg); + + // Generate graph: task load will be uniform across all tasks on a given rank; + // there will be no shared blocks or communication + int min_tasks_per_rank = 0; + int max_tasks_per_rank = 100; + double expo_lambda = 5000.0; + std::exponential_distribution<> load_dist(expo_lambda); + generateTasksWithoutSharedBlocks( + pd, gen, min_tasks_per_rank, max_tasks_per_rank, load_dist + ); + + // Define random task memory even though we will not use it + setupRandomTaskMemory(gen, pd); + + std::uniform_int_distribution<> eps_per_task_dist(0, 10); + std::exponential_distribution<> weight_per_edge_dist(100.0); + generateIntraRankComm( + pd, gen, eps_per_task_dist, weight_per_edge_dist + ); + + // Define expected breakdown and work + double expected_compute = 0.0; + auto &taskmap = pd.getTasksMap(); + for (auto &t : taskmap) { + expected_compute += t.second.getLoad(); + } + double expected_intra_recv = 0.0; + double expected_intra_send = 0.0; + auto &edges = pd.getCommunications(); + for (auto &e : edges) { + expected_intra_recv += e.getVolume(); + expected_intra_send += e.getVolume(); + } + expected_bd.compute = expected_compute; + expected_bd.inter_node_recv_comm = 0.0; + expected_bd.inter_node_send_comm = 0.0; + expected_bd.intra_node_recv_comm = expected_intra_recv; + expected_bd.intra_node_send_comm = expected_intra_send; + expected_bd.shared_mem_comm = 0.0; + expected_bd.memory_breakdown = {0.0, 0.0, 0.0}; + + // Compute expected work + double expected_work = + expected_bd.compute * wm.rank_alpha + + std::max( + expected_bd.intra_node_recv_comm, expected_bd.intra_node_send_comm + ) * wm.gamma; + return expected_work; +} + +template +double TestWorkModelBasic::setupRandomLoadAndInterCommNoMemProblem( + std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm +) { + // We're using all non-zero coefficients but there will only be load + setupRandomNonzeroWorkModel(gen, wm); + + // Do not consider memory even though we will define some + setupNoMemoryInfo(cfg); + + // Generate graph: task load will be uniform across all tasks on a given rank; + // there will be no shared blocks or communication + int min_tasks_per_rank = 20; + int max_tasks_per_rank = 50; + std::uniform_real_distribution<> load_dist(1.0, 1000.0); + generateTasksWithoutSharedBlocks( + pd, gen, min_tasks_per_rank, max_tasks_per_rank, load_dist + ); + + // Define random task memory even though we will not use it + setupRandomTaskMemory(gen, pd); + + std::uniform_int_distribution<> eps_per_task_dist(0, 10); + std::exponential_distribution<> weight_per_edge_dist(100.0); + generateInterRankComm( + pd, gen, eps_per_task_dist, weight_per_edge_dist, min_tasks_per_rank, + this->comm.numRanks(), 0.5 + ); + + vt_lb::algo::temperedlb::CommunicationsSymmetrizer cs(this->comm, pd); + cs.run(); + + // Define expected breakdown and work + double expected_compute = 0.0; + auto &taskmap = pd.getTasksMap(); + for (auto &t : taskmap) { + expected_compute += t.second.getLoad(); + } + auto rank = this->comm.getRank(); + double expected_inter_recv = 0.0; + double expected_inter_send = 0.0; + auto &edges = pd.getCommunications(); + for (auto &e : edges) { + if (e.getFromRank() != e.getToRank()) { + if (e.getToRank() == rank) { + expected_inter_recv += e.getVolume(); + } else { + expected_inter_send += e.getVolume(); + } + } + } + expected_bd.compute = expected_compute; + expected_bd.inter_node_recv_comm = expected_inter_recv; + expected_bd.inter_node_send_comm = expected_inter_send; + expected_bd.intra_node_recv_comm = 0.0; + expected_bd.intra_node_send_comm = 0.0; + expected_bd.shared_mem_comm = 0.0; + expected_bd.memory_breakdown = {0.0, 0.0, 0.0}; + + // Compute expected work + double expected_work = + expected_bd.compute * wm.rank_alpha + + std::max( + expected_bd.inter_node_recv_comm, expected_bd.inter_node_send_comm + ) * wm.beta; + return expected_work; +} + +template +double TestWorkModelBasic::setupRandomLoadAndMixedCommNoMemProblem( + std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg, + algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm +) { + // We're using all non-zero coefficients but there will only be load + setupRandomNonzeroWorkModel(gen, wm); + + // Do not consider memory even though we will define some + setupNoMemoryInfo(cfg); + + // Generate graph: task load will be uniform across all tasks on a given rank; + // there will be no shared blocks or communication + int min_tasks_per_rank = 20; + int max_tasks_per_rank = 50; + std::uniform_real_distribution<> load_dist(0.1, 50.0); + generateTasksWithoutSharedBlocks( + pd, gen, min_tasks_per_rank, max_tasks_per_rank, load_dist + ); + + // Define random task memory even though we will not use it + setupRandomTaskMemory(gen, pd); + + std::uniform_int_distribution<> eps_per_task_dist(0, 10); + std::exponential_distribution<> weight_per_edge_dist(1000.0); + generateRankComm( + pd, gen, eps_per_task_dist, weight_per_edge_dist, min_tasks_per_rank, + this->comm.numRanks(), 0.5 + ); + + vt_lb::algo::temperedlb::CommunicationsSymmetrizer cs(this->comm, pd); + cs.run(); + + // Define expected breakdown and work + double expected_compute = 0.0; + auto &taskmap = pd.getTasksMap(); + for (auto &t : taskmap) { + expected_compute += t.second.getLoad(); + } + auto rank = this->comm.getRank(); + double expected_intra_recv = 0.0; + double expected_intra_send = 0.0; + double expected_inter_recv = 0.0; + double expected_inter_send = 0.0; + auto &edges = pd.getCommunications(); + for (auto &e : edges) { + if (e.getFromRank() != e.getToRank()) { + if (e.getToRank() == rank) { + expected_inter_recv += e.getVolume(); + } else { + expected_inter_send += e.getVolume(); + } + } else { + expected_intra_recv += e.getVolume(); + expected_intra_send += e.getVolume(); + } + } + expected_bd.compute = expected_compute; + expected_bd.inter_node_recv_comm = expected_inter_recv; + expected_bd.inter_node_send_comm = expected_inter_send; + expected_bd.intra_node_recv_comm = expected_intra_recv; + expected_bd.intra_node_send_comm = expected_intra_send; + expected_bd.shared_mem_comm = 0.0; + expected_bd.memory_breakdown = {0.0, 0.0, 0.0}; + + // Compute expected work + double expected_work = + expected_bd.compute * wm.rank_alpha + + std::max( + expected_bd.inter_node_recv_comm, expected_bd.inter_node_send_comm + ) * wm.beta + + std::max( + expected_bd.intra_node_recv_comm, expected_bd.intra_node_send_comm + ) * wm.gamma; + return expected_work; +} + +template +void TestWorkModelBasic::verifyNoChangeUpdate( + const model::PhaseData &pd, const algo::temperedlb::Configuration &cfg, + const algo::temperedlb::WorkBreakdown &expected_bd, + const algo::temperedlb::WorkModel &wm, double expected_work +) { + auto bd = algo::temperedlb::WorkModelCalculator::computeWorkBreakdown(pd, cfg); + auto &mb = bd.memory_breakdown; + auto &emb = expected_bd.memory_breakdown; + + // Verify computed work breakdown + EXPECT_FLOAT_EQ(bd.compute, expected_bd.compute); + EXPECT_DOUBLE_EQ(bd.inter_node_recv_comm, expected_bd.inter_node_recv_comm); + EXPECT_DOUBLE_EQ(bd.inter_node_send_comm, expected_bd.inter_node_send_comm); + EXPECT_DOUBLE_EQ(bd.intra_node_recv_comm, expected_bd.intra_node_recv_comm); + EXPECT_DOUBLE_EQ(bd.intra_node_send_comm, expected_bd.intra_node_send_comm); + EXPECT_DOUBLE_EQ(bd.shared_mem_comm, expected_bd.shared_mem_comm); + + // Verify computed memory breakdown + EXPECT_DOUBLE_EQ(mb.current_memory_usage, emb.current_memory_usage); + EXPECT_DOUBLE_EQ( + mb.current_max_task_working_bytes, emb.current_max_task_working_bytes + ); + EXPECT_DOUBLE_EQ( + mb.current_max_task_serialized_bytes, emb.current_max_task_serialized_bytes + ); + + // Verify baseline work + double base = algo::temperedlb::WorkModelCalculator::computeWork(wm, bd); + EXPECT_FLOAT_EQ(base, expected_work); + + // Verify update under no changes + std::vector add_tasks; + std::vector add_edges; + std::vector remove_ids; + double updated = algo::temperedlb::WorkModelCalculator::computeWorkUpdate( + pd, wm, bd, add_tasks, add_edges, remove_ids + ); + EXPECT_DOUBLE_EQ(updated, base); +} + +TYPED_TEST_SUITE(TestWorkModelBasic, CommTypesForTesting, CommNameGenerator); + +TYPED_TEST(TestWorkModelBasic, compute_work_uses_max_comm_components) { + // Build a breakdown with differing send/recv values to test max selection + algo::temperedlb::WorkBreakdown bd; + bd.compute = 10.0; + bd.inter_node_recv_comm = 5.0; + bd.inter_node_send_comm = 7.5; // inter-node max should be 7.5 + bd.intra_node_recv_comm = 1.0; + bd.intra_node_send_comm = 2.0; // intra-node max should be 2.0 + bd.shared_mem_comm = 3.0; + + algo::temperedlb::WorkModel wm; + wm.rank_alpha = 2.0; + wm.beta = 1.0; + wm.gamma = 0.5; + wm.delta = 3.0; + + double w = algo::temperedlb::WorkModelCalculator::computeWork(wm, bd); + // Expected: 2*10 + 1*7.5 + 0.5*2 + 3*3 = 20 + 7.5 + 1 + 9 = 37.5 + EXPECT_DOUBLE_EQ(w, 37.5); +} + +TYPED_TEST(TestWorkModelBasic, compute_memory_usage_rank_only) { + auto& the_comm = this->comm; + algo::temperedlb::Configuration cfg; + // Disable all task/shared-block memory flags so only rank footprint contributes + cfg.work_model_.has_memory_info = true; + cfg.work_model_.has_task_serialized_memory_info = false; + cfg.work_model_.has_task_working_memory_info = false; + cfg.work_model_.has_task_footprint_memory_info = false; + cfg.work_model_.has_shared_block_memory_info = false; + + model::PhaseData pd(the_comm.getRank()); + pd.setRankFootprintBytes(12345.0); + pd.setRankMaxMemoryAvailable(999999.0); + + auto mb = algo::temperedlb::WorkModelCalculator::computeMemoryUsage(cfg, pd); + EXPECT_DOUBLE_EQ(mb.current_memory_usage, 12345.0); + EXPECT_DOUBLE_EQ(mb.current_max_task_working_bytes, 0.0); + EXPECT_DOUBLE_EQ(mb.current_max_task_serialized_bytes, 0.0); +} + +TYPED_TEST(TestWorkModelBasic, check_memory_fit_basic) { + auto& the_comm = this->comm; + algo::temperedlb::Configuration cfg; + cfg.work_model_.has_memory_info = true; // enable memory checks + + model::PhaseData pd(the_comm.getRank()); + pd.setRankMaxMemoryAvailable(1000.0); + + // Fits + EXPECT_TRUE(algo::temperedlb::WorkModelCalculator::checkMemoryFit(cfg, pd, 999.9)); + // Boundary + EXPECT_TRUE(algo::temperedlb::WorkModelCalculator::checkMemoryFit(cfg, pd, 1000.0)); + // Exceeds + EXPECT_FALSE(algo::temperedlb::WorkModelCalculator::checkMemoryFit(cfg, pd, 1000.1)); +} + +TYPED_TEST(TestWorkModelBasic, compute_work_update_no_changes_is_identity) { + auto& the_comm = this->comm; + + // Work model with non-zero coefficients + algo::temperedlb::WorkModel wm; + wm.rank_alpha = 0.5; + wm.beta = 0.2; + wm.gamma = 0.3; + wm.delta = 1.0; + + // Phase data with tasks referencing shared blocks; shared blocks off-home produce bytes + model::PhaseData pd(the_comm.getRank()); + // Define two shared blocks with specific homes/sizes + model::SharedBlock sbA{1, 100.0, (the_comm.getRank() + 1) % the_comm.numRanks()}; + model::SharedBlock sbB{2, 50.0, the_comm.getRank()}; // local-home; should not add shared_mem_comm + pd.addSharedBlock(sbA); + pd.addSharedBlock(sbB); + + // Two tasks that reference both blocks + model::Task t1{10, 11.0}; + t1.addSharedBlock(sbA.getId()); + t1.addSharedBlock(sbB.getId()); + + model::Task t2{11, 0.0}; + t2.addSharedBlock(sbA.getId()); + + pd.addTask(t1); + pd.addTask(t2); + + // Current breakdown computed from PhaseData and a default config with memory info on + algo::temperedlb::Configuration cfg; + cfg.work_model_.has_memory_info = true; + cfg.work_model_.has_shared_block_memory_info = true; + cfg.work_model_.has_task_footprint_memory_info = false; + cfg.work_model_.has_task_working_memory_info = false; + cfg.work_model_.has_task_serialized_memory_info = false; + + auto bd = algo::temperedlb::WorkModelCalculator::computeWorkBreakdown(pd, cfg); + + // Baseline work + double base = algo::temperedlb::WorkModelCalculator::computeWork(wm, bd); + + // No changes + std::vector add_tasks; + std::vector add_edges; + std::vector remove_ids; + + double updated = algo::temperedlb::WorkModelCalculator::computeWorkUpdate( + pd, wm, bd, add_tasks, add_edges, remove_ids + ); + + EXPECT_DOUBLE_EQ(updated, base); +} + +TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesUniformLoadOnlyNoMem) { + auto& the_comm = this->comm; + auto rank = the_comm.getRank(); + + algo::temperedlb::Configuration cfg; + algo::temperedlb::WorkBreakdown bd; + algo::temperedlb::WorkModel wm; + + std::mt19937 gen; + for (int i=0; inum_seeds; ++i) { + gen.seed(rank * 89 + i * 3); + model::PhaseData pd(rank); + double expected_work = this->setupRandomUniformLoadOnlyNoMemProblem( + gen, pd, cfg, bd, wm + ); + + this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work); + } +} + +TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesLoadOnlyNoMem) { + auto& the_comm = this->comm; + auto rank = the_comm.getRank(); + + algo::temperedlb::Configuration cfg; + algo::temperedlb::WorkBreakdown bd; + algo::temperedlb::WorkModel wm; + + std::mt19937 gen; + for (int i=0; inum_seeds; ++i) { + gen.seed(rank * 21 + i * 4); + model::PhaseData pd(rank); + double expected_work = this->setupRandomLoadOnlyNoMemProblem( + gen, pd, cfg, bd, wm + ); + + this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work); + } +} + +TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesUniformSharedBlocks) { + auto& the_comm = this->comm; + auto rank = the_comm.getRank(); + + algo::temperedlb::Configuration cfg; + algo::temperedlb::WorkBreakdown bd; + algo::temperedlb::WorkModel wm; + + std::mt19937 gen; + for (int i=0; inum_seeds; ++i) { + gen.seed(rank * 191 + i * 5); + model::PhaseData pd(rank); + double expected_work = this->setupRandomUniformSharedBlocksProblem( + gen, pd, cfg, bd, wm + ); + + this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work); + } +} + +TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesSharedBlocks) { + auto& the_comm = this->comm; + auto rank = the_comm.getRank(); + + algo::temperedlb::Configuration cfg; + algo::temperedlb::WorkBreakdown bd; + algo::temperedlb::WorkModel wm; + + std::mt19937 gen; + for (int i=0; inum_seeds; ++i) { + gen.seed(rank * 88 + i * 6); + model::PhaseData pd(rank); + double expected_work = this->setupRandomSharedBlocksProblem( + gen, pd, cfg, bd, wm + ); + + this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work); + } +} + +TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesLoadAndIntraComm) { + auto& the_comm = this->comm; + auto rank = the_comm.getRank(); + + algo::temperedlb::Configuration cfg; + algo::temperedlb::WorkBreakdown bd; + algo::temperedlb::WorkModel wm; + + std::mt19937 gen; + for (int i=0; inum_seeds; ++i) { + gen.seed(rank * 67 + i * 7); + model::PhaseData pd(rank); + double expected_work = this->setupRandomLoadAndIntraCommNoMemProblem( + gen, pd, cfg, bd, wm + ); + + this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work); + } +} + +TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesLoadAndInterComm) { + auto& the_comm = this->comm; + auto rank = the_comm.getRank(); + + algo::temperedlb::Configuration cfg; + algo::temperedlb::WorkBreakdown bd; + algo::temperedlb::WorkModel wm; + + std::mt19937 gen; + for (int i=0; inum_seeds; ++i) { + gen.seed(rank * 42 + i * 8); + model::PhaseData pd(rank); + double expected_work = this->setupRandomLoadAndInterCommNoMemProblem( + gen, pd, cfg, bd, wm + ); + + this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work); + } +} + +TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesLoadAndMixedComm) { + auto& the_comm = this->comm; + auto rank = the_comm.getRank(); + + algo::temperedlb::Configuration cfg; + algo::temperedlb::WorkBreakdown bd; + algo::temperedlb::WorkModel wm; + + std::mt19937 gen; + for (int i=0; inum_seeds; ++i) { + gen.seed(rank * 11 + i * 9); + model::PhaseData pd(rank); + double expected_work = this->setupRandomLoadAndMixedCommNoMemProblem( + gen, pd, cfg, bd, wm + ); + + this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work); + } +} + +}}} // end namespace vt_lb::tests::unit