From 8fa0d4c2906ba407d54a37c1c534b7005da9be1c Mon Sep 17 00:00:00 2001 From: tonibohnlein Date: Mon, 8 Dec 2025 12:11:40 +0100 Subject: [PATCH 1/9] update architecture --- include/osp/bsp/model/BspArchitecture.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp index 74872aae..aaf73d3f 100644 --- a/include/osp/bsp/model/BspArchitecture.hpp +++ b/include/osp/bsp/model/BspArchitecture.hpp @@ -604,7 +604,7 @@ class BspArchitecture { std::vector>> matrix(numberOfProcessors_, std::vector>(numberOfProcessors_)); for (unsigned i = 0; i < numberOfProcessors_; ++i) { for (unsigned j = 0; j < numberOfProcessors_; ++j) { - matrix[i][j] = sendCosts_[FlatIndex(i, j)]; + matrix[i][j] = sendCosts_.at(FlatIndex(i, j)); } } return matrix; From 2219a6b46740d77c2d453baa782adffe3ad9458c Mon Sep 17 00:00:00 2001 From: tonibohnlein Date: Mon, 8 Dec 2025 14:19:59 +0100 Subject: [PATCH 2/9] removed some bounds checking --- include/osp/bsp/model/BspArchitecture.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp index aaf73d3f..74872aae 100644 --- a/include/osp/bsp/model/BspArchitecture.hpp +++ b/include/osp/bsp/model/BspArchitecture.hpp @@ -604,7 +604,7 @@ class BspArchitecture { std::vector>> matrix(numberOfProcessors_, std::vector>(numberOfProcessors_)); for (unsigned i = 0; i < numberOfProcessors_; ++i) { for (unsigned j = 0; j < numberOfProcessors_; ++j) { - matrix[i][j] = sendCosts_.at(FlatIndex(i, j)); + matrix[i][j] = sendCosts_[FlatIndex(i, j)]; } } return matrix; From a9ba08678c488eba9711dcfe03925a3e8c31d978 Mon Sep 17 00:00:00 2001 From: tonibohnlein Date: Mon, 8 Dec 2025 16:49:24 +0100 Subject: [PATCH 3/9] cosmetics --- include/osp/bsp/model/BspSchedule.hpp | 25 ++++++------------- .../computational_dag_vector_impl.hpp | 5 ---- 2 files changed, 8 insertions(+), 22 deletions(-) diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp index eeeaeec3..25de96cc 100644 --- a/include/osp/bsp/model/BspSchedule.hpp +++ b/include/osp/bsp/model/BspSchedule.hpp @@ -105,9 +105,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEval comm_phase_empty(number_of_supersteps, true); - for (const auto &node : instance->vertices()) - for (const auto &child : instance->getComputationalDag().children(node)) - if (node_to_processor_assignment[node] != node_to_processor_assignment[child]) + for (const auto &node : instance->vertices()) { + for (const auto &child : instance->getComputationalDag().children(node)) { + if (node_to_processor_assignment[node] != node_to_processor_assignment[child]) { for (unsigned offset = 1; offset <= getStaleness(); ++offset) comm_phase_empty[node_to_superstep_assignment[child] - offset] = false; + } + } + } std::vector new_step_index(number_of_supersteps); unsigned current_index = 0; @@ -585,9 +586,9 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalvertices()) + for (const auto &node : instance->vertices()) { node_to_superstep_assignment[node] = new_step_index[node_to_superstep_assignment[node]]; - + } setNumberOfSupersteps(current_index); } @@ -633,7 +634,6 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval> current_proc_transient_memory(instance->numberOfProcessors(), 0); for (const auto &node : instance->vertices()) { - const unsigned proc = node_to_processor_assignment[node]; current_proc_persistent_memory[proc] += instance->getComputationalDag().vertex_mem_weight(node); current_proc_transient_memory[proc] = std::max( @@ -659,7 +659,6 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval> current_proc_memory(instance->numberOfProcessors(), 0); for (const auto &node : instance->vertices()) { - const unsigned proc = node_to_processor_assignment[node]; current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(node); @@ -671,12 +670,10 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalnumberOfProcessors(); proc++) { - v_memw_t memory = 0; for (const auto &node : set_schedule.step_processor_vertices[step][proc]) { memory += instance->getComputationalDag().vertex_mem_weight(node) + @@ -701,12 +698,10 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalnumberOfProcessors(); proc++) { - std::unordered_set> nodes_with_incoming_edges; v_memw_t memory = 0; @@ -714,7 +709,6 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalgetComputationalDag().vertex_comm_weight(node); for (const auto &parent : instance->getComputationalDag().parents(node)) { - if (node_to_superstep_assignment[parent] != step) { nodes_with_incoming_edges.insert(parent); } @@ -734,17 +728,14 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalnumberOfProcessors(); proc++) { - std::unordered_set> nodes_with_incoming_edges; v_memw_t memory = 0; for (const auto &node : set_schedule.step_processor_vertices[step][proc]) { - if (is_source(node, instance->getComputationalDag())) { memory += instance->getComputationalDag().vertex_mem_weight(node); } diff --git a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp index efe1996e..0a1b676a 100644 --- a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp +++ b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp @@ -78,7 +78,6 @@ class computational_dag_vector_impl { explicit computational_dag_vector_impl(const vertex_idx num_vertices) : vertices_(num_vertices), out_neigbors(num_vertices), in_neigbors(num_vertices), num_edges_(0), num_vertex_types_(0) { - for (vertex_idx i = 0; i < num_vertices; ++i) { vertices_[i].id = i; } @@ -98,9 +97,7 @@ class computational_dag_vector_impl { */ template explicit computational_dag_vector_impl(const Graph_t &other) { - static_assert(is_computational_dag_v, "Graph_t must satisfy the is_computation_dag concept"); - constructComputationalDag(other, *this); } @@ -196,7 +193,6 @@ class computational_dag_vector_impl { */ vertex_idx add_vertex(const vertex_work_weight_type work_weight, const vertex_comm_weight_type comm_weight, const vertex_mem_weight_type mem_weight, const vertex_type_type vertex_type = 0) { - vertices_.emplace_back(vertices_.size(), work_weight, comm_weight, mem_weight, vertex_type); out_neigbors.push_back({}); in_neigbors.push_back({}); @@ -231,7 +227,6 @@ class computational_dag_vector_impl { * @return True if the edge was added, false if it already exists or vertices are invalid. */ bool add_edge(const vertex_idx source, const vertex_idx target) { - if (source >= static_cast(vertices_.size()) || target >= static_cast(vertices_.size()) || source == target) return false; From 3876a8fdb27d824b7cba0c8ea14928f440357e45 Mon Sep 17 00:00:00 2001 From: tonibohnlein Date: Mon, 8 Dec 2025 16:54:06 +0100 Subject: [PATCH 4/9] to_string --- include/osp/bsp/model/BspInstance.hpp | 28 +++++++++------------------ include/osp/bsp/model/BspSchedule.hpp | 1 - 2 files changed, 9 insertions(+), 20 deletions(-) diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp index 7ab72fd4..6257b99d 100644 --- a/include/osp/bsp/model/BspInstance.hpp +++ b/include/osp/bsp/model/BspInstance.hpp @@ -32,7 +32,11 @@ enum class RETURN_STATUS { OSP_SUCCESS, TIMEOUT, ERROR }; -inline std::string to_string(const RETURN_STATUS status) { +/** + * @brief Converts the enum to a string literal. + * Returns const char* to avoid std::string allocation overhead. + */ +inline const char *to_string(const RETURN_STATUS status) { switch (status) { case RETURN_STATUS::OSP_SUCCESS: return "SUCCESS"; @@ -47,25 +51,11 @@ inline std::string to_string(const RETURN_STATUS status) { } } +/** + * @brief Stream operator overload using the helper function. + */ inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) { - switch (status) { - case RETURN_STATUS::OSP_SUCCESS: - os << "SUCCESS"; - break; - case RETURN_STATUS::BEST_FOUND: - os << "BEST_FOUND"; - break; - case RETURN_STATUS::TIMEOUT: - os << "TIMEOUT"; - break; - case RETURN_STATUS::ERROR: - os << "ERROR"; - break; - default: - os << "UNKNOWN"; - break; - } - return os; + return os << to_string(status); } /** diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp index 25de96cc..7a02d0f3 100644 --- a/include/osp/bsp/model/BspSchedule.hpp +++ b/include/osp/bsp/model/BspSchedule.hpp @@ -741,7 +741,6 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalgetComputationalDag().parents(node)) { - if (node_to_superstep_assignment[parent] != step) { nodes_with_incoming_edges.insert(parent); } From f31fcf01798c9bfcacaa6fbdf12aecc140f6970e Mon Sep 17 00:00:00 2001 From: tonibohnlein Date: Mon, 8 Dec 2025 16:55:34 +0100 Subject: [PATCH 5/9] noexcept --- include/osp/bsp/model/BspInstance.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp index 6257b99d..697488be 100644 --- a/include/osp/bsp/model/BspInstance.hpp +++ b/include/osp/bsp/model/BspInstance.hpp @@ -113,10 +113,10 @@ class BspInstance { } BspInstance(const BspInstance &other) = default; - BspInstance(BspInstance &&other) = default; + BspInstance(BspInstance &&other) noexcept = default; BspInstance &operator=(const BspInstance &other) = default; - BspInstance &operator=(BspInstance &&other) = default; + BspInstance &operator=(BspInstance &&other) noexcept = default; /** * @brief Returns a reference to the BSP architecture for the instance. From 29240622471cbe1826111de34f59525c276350a2 Mon Sep 17 00:00:00 2001 From: tonibohnlein Date: Mon, 8 Dec 2025 17:32:30 +0100 Subject: [PATCH 6/9] Processor range --- include/osp/bsp/model/BspInstance.hpp | 263 ++++++++++-------- .../model/util/CompatibleProcessorRange.hpp | 101 +++++++ .../KernighanLin_v2/kl_improver.hpp | 1 + tests/bsp_instance.cpp | 1 + 4 files changed, 256 insertions(+), 110 deletions(-) create mode 100644 include/osp/bsp/model/util/CompatibleProcessorRange.hpp diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp index 697488be..e5a1ac3a 100644 --- a/include/osp/bsp/model/BspInstance.hpp +++ b/include/osp/bsp/model/BspInstance.hpp @@ -78,6 +78,20 @@ class BspInstance { // for problem instances with heterogeneity std::vector> nodeProcessorCompatibility = std::vector>({{true}}); + /** + * @brief Calculates the maximum memory bound for each processor type. + * + * @return A vector where the index corresponds to the processor type and the value is the maximum memory bound for that type. + */ + std::vector> calculateMaxMemoryPerProcessorType() const { + std::vector> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0); + for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) { + max_memory_per_proc_type[architecture.processorType(proc)] = + std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc)); + } + return max_memory_per_proc_type; + } + public: /** * @brief Default constructor for the BspInstance class. @@ -123,90 +137,86 @@ class BspInstance { * * @return A reference to the BSP architecture for the instance. */ - inline const BspArchitecture &getArchitecture() const { return architecture; } - - /** - * @brief Returns a reference to the BSP architecture for the instance. - * - * @return A reference to the BSP architecture for the instance. - */ - inline BspArchitecture &getArchitecture() { return architecture; } + [[nodiscard]] const BspArchitecture &getArchitecture() const { return architecture; } + [[nodiscard]] BspArchitecture &getArchitecture() { return architecture; } /** * @brief Sets the BSP architecture for the instance. * * @param architecture_ The BSP architecture for the instance. */ - inline void setArchitecture(const BspArchitecture &architechture_) { architecture = architechture_; } + void setArchitecture(const BspArchitecture &architechture_) { architecture = architechture_; } /** * @brief Returns a reference to the computational DAG for the instance. * * @return A reference to the computational DAG for the instance. */ - inline const Graph_t &getComputationalDag() const { return cdag; } + [[nodiscard]] const Graph_t &getComputationalDag() const { return cdag; } + [[nodiscard]] Graph_t &getComputationalDag() { return cdag; } /** - * @brief Returns a reference to the computational DAG for the instance. + * @brief Returns the number of vertices in the computational DAG. * - * @return A reference to the computational DAG for the instance. + * @return The number of vertices. */ - inline Graph_t &getComputationalDag() { return cdag; } + [[nodiscard]] vertex_idx_t numberOfVertices() const { return cdag.num_vertices(); } - inline vertex_idx_t numberOfVertices() const { return cdag.num_vertices(); } - - inline auto vertices() const { return cdag.vertices(); } + /** + * @brief Returns a view over the vertex indices of the computational DAG. + * @return A view over the vertex indices. + */ + [[nodiscard]] auto vertices() const { return cdag.vertices(); } - inline auto processors() const { return architecture.processors(); } + /** + * @brief Returns a view over the processor indices of the BSP architecture. + * @return A view over the processor indices. + */ + [[nodiscard]] auto processors() const { return architecture.processors(); } /** * @brief Returns the number of processors in the BSP architecture. - * * @return The number of processors in the BSP architecture. */ - inline unsigned numberOfProcessors() const { return architecture.numberOfProcessors(); } + [[nodiscard]] unsigned numberOfProcessors() const { return architecture.numberOfProcessors(); } /** * @brief Returns the communication costs between two processors. - * * The communication costs are the send costs multiplied by the communication costs. * - * @param p1 The index of the first processor. - * @param p2 The index of the second processor. + * @param p_send The index of the sending processor. + * @param p_receive The index of the receiving processor. * * @return The communication costs between the two processors. */ - inline v_commw_t communicationCosts(unsigned int p1, unsigned int p2) const { - return architecture.communicationCosts(p1, p2); + [[nodiscard]] v_commw_t communicationCosts(const unsigned p_send, const unsigned p_receive) const { + return architecture.communicationCosts(p_send, p_receive); } /** * @brief Returns the send costs between two processors. * - * - * @param p1 The index of the first processor. - * @param p2 The index of the second processor. + * @param p_send The index of the sending processor. + * @param p_receive The index of the receiving processor. * * @return The send costs between the two processors. */ - inline v_commw_t sendCosts(unsigned int p1, unsigned int p2) const { - return architecture.sendCosts(p1, p2); + [[nodiscard]] v_commw_t sendCosts(const unsigned p_send, const unsigned p_receive) const { + return architecture.sendCosts(p_send, p_receive); } /** * @brief Returns a copy of the send costs matrix. * @return A copy of the send costs matrix. */ - inline std::vector>> sendCostMatrix() const { - return architecture.sendCostMatrix(); - } + [[nodiscard]] std::vector>> sendCosts() const { return architecture.sendCosts(); } /** * @brief Returns the flattened send costs vector. * * @return The flattened send costs vector. */ - inline const std::vector> &sendCostsVector() const { + [[nodiscard]] const std::vector> &sendCostsVector() const { return architecture.sendCostsVector(); } @@ -215,29 +225,49 @@ class BspInstance { * * @return The communication costs as an unsigned integer. */ - inline v_commw_t communicationCosts() const { return architecture.communicationCosts(); } + [[nodiscard]] v_commw_t communicationCosts() const { return architecture.communicationCosts(); } /** * @brief Returns the synchronization costs of the BSP architecture. * * @return The synchronization costs as an unsigned integer. */ - inline v_commw_t synchronisationCosts() const { return architecture.synchronisationCosts(); } + [[nodiscard]] v_commw_t synchronisationCosts() const { return architecture.synchronisationCosts(); } /** * @brief Returns whether the architecture is NUMA. * * @return True if the architecture is NUMA, false otherwise. */ - inline bool isNumaInstance() const { return architecture.isNumaArchitecture(); } + [[nodiscard]] bool isNumaInstance() const { return architecture.isNumaArchitecture(); } - inline v_memw_t memoryBound(unsigned proc) const { return architecture.memoryBound(proc); } + /** + * @brief Returns the memory bound for a specific processor. + * + * @param proc The processor index. + * @return The memory bound for the processor. + */ + [[nodiscard]] v_memw_t memoryBound(const unsigned proc) const { return architecture.memoryBound(proc); } - v_memw_t maxMemoryBoundProcType(unsigned procType) const { + /** + * @brief Returns the maximum memory bound for a specific processor type. + * + * @param procType The processor type. + * @return The maximum memory bound for the processor type. + */ + [[nodiscard]] v_memw_t maxMemoryBoundProcType(const unsigned procType) const { return architecture.maxMemoryBoundProcType(procType); } - v_memw_t maxMemoryBoundNodeType(unsigned nodeType) const { + /** + * @brief Returns the maximum memory bound for a specific node type. + * + * This considers all compatible processor types for the given node type. + * + * @param nodeType The node type. + * @return The maximum memory bound for the node type. + */ + [[nodiscard]] v_memw_t maxMemoryBoundNodeType(const unsigned nodeType) const { int max_mem = 0; for (unsigned proc = 0; proc < architecture.getNumberOfProcessorTypes(); proc++) { if (isCompatibleType(nodeType, architecture.processorType(proc))) { @@ -252,29 +282,30 @@ class BspInstance { * * @param cost The communication costs to set. */ - inline void setCommunicationCosts(const v_commw_t cost) { architecture.setCommunicationCosts(cost); } + void setCommunicationCosts(const v_commw_t cost) { architecture.setCommunicationCosts(cost); } /** * @brief Sets the synchronisation costs of the BSP architecture. * * @param cost The synchronisation costs to set. */ - inline void setSynchronisationCosts(const v_commw_t cost) { architecture.setSynchronisationCosts(cost); } + void setSynchronisationCosts(const v_commw_t cost) { architecture.setSynchronisationCosts(cost); } /** * @brief Sets the number of processors in the BSP architecture. * * @param num The number of processors to set. */ - inline void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); } + void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); } - bool check_memory_constraints_feasibility() const { + /** + * @brief Checks if the memory constraints are feasible for the given instance. + * + * @return True if the memory constraints are feasible, false otherwise. + */ + [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const { + const auto max_memory_per_proc_type = calculateMaxMemoryPerProcessorType(); - std::vector> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) { - max_memory_per_proc_type[architecture.processorType(proc)] = - std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc)); - } for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) { v_memw_t max_memory_of_type = max_memory_weight(vertType, cdag); bool fits = false; @@ -294,13 +325,15 @@ class BspInstance { return true; } + /** + * @brief Adjusts the memory constraints of the architecture to ensure feasibility. + * + * If a node type requires more memory than available on any compatible processor type, + * the memory bound of compatible processors is increased. + */ void adjust_memory_constraints() { + const auto max_memory_per_proc_type = calculateMaxMemoryPerProcessorType(); - std::vector> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) { - max_memory_per_proc_type[architecture.processorType(proc)] = - std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc)); - } for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) { v_memw_t max_memory_of_type = max_memory_weight(vertType, cdag); bool fits = false; @@ -330,34 +363,72 @@ class BspInstance { } } - inline v_type_t processorType(unsigned p1) const { return architecture.processorType(p1); } + /** + * @brief Returns the processor type for a given processor index. + * + * @param proc The processor index. + * @return The processor type. + */ + [[nodiscard]] v_type_t processorType(const unsigned proc) const { return architecture.processorType(proc); } - inline bool isCompatible(const vertex_idx_t &node, unsigned processor_id) const { + /** + * @brief Checks if a node is compatible with a processor. + * + * @param node The node index. + * @param processor_id The processor index. + * @return True if the node is compatible with the processor, false otherwise. + */ + [[nodiscard]] bool isCompatible(const vertex_idx_t &node, const unsigned processor_id) const { return isCompatibleType(cdag.vertex_type(node), architecture.processorType(processor_id)); } - inline bool isCompatibleType(v_type_t nodeType, v_type_t processorType) const { - + /** + * @brief Checks if a node type is compatible with a processor type. + * + * @param nodeType The node type. + * @param processorType The processor type. + * @return True if the node type is compatible with the processor type, false otherwise. + */ + [[nodiscard]] bool isCompatibleType(const v_type_t nodeType, const v_type_t processorType) const { return nodeProcessorCompatibility[nodeType][processorType]; } + /** + * @brief Sets the node-processor compatibility matrix. + * + * @param compatibility_ The compatibility matrix. + */ void setNodeProcessorCompatibility(const std::vector> &compatibility_) { - nodeProcessorCompatibility = compatibility_; } - const std::vector> &getProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; } - - void setDiagonalCompatibilityMatrix(unsigned number_of_types) { + /** + * @brief Returns the node-processor compatibility matrix. + * + * @return The node-processor compatibility matrix. + */ + [[nodiscard]] const std::vector> &getProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; } + /** + * @brief Sets the compatibility matrix to be diagonal. + * + * This implies that node type `i` is only compatible with processor type `i`. + * + * @param number_of_types The number of types. + */ + void setDiagonalCompatibilityMatrix(const unsigned number_of_types) { nodeProcessorCompatibility = std::vector>(number_of_types, std::vector(number_of_types, false)); for (unsigned i = 0; i < number_of_types; ++i) nodeProcessorCompatibility[i][i] = true; } + /** + * @brief Sets the compatibility matrix to all ones. + * + * This implies that all node types are compatible with all processor types. + */ void setAllOnesCompatibilityMatrix() { - unsigned number_of_node_types = cdag.num_vertex_types(); unsigned number_of_proc_types = architecture.getNumberOfProcessorTypes(); @@ -365,7 +436,12 @@ class BspInstance { std::vector>(number_of_node_types, std::vector(number_of_proc_types, true)); } - std::vector> getProcTypesCompatibleWithNodeType() const { + /** + * @brief Returns a list of compatible processor types for each node type. + * + * @return A vector where the index is the node type and the value is a vector of compatible processor types. + */ + [[nodiscard]] std::vector> getProcTypesCompatibleWithNodeType() const { unsigned numberOfNodeTypes = cdag.num_vertex_types(); unsigned numberOfProcTypes = architecture.getNumberOfProcessorTypes(); std::vector> compatibleProcTypes(numberOfNodeTypes); @@ -378,7 +454,14 @@ class BspInstance { return compatibleProcTypes; } - std::vector> getNodeNodeCompatabilityMatrix() const { + /** + * @brief Returns a compatibility matrix between node types. + * + * Two node types are compatible if they share at least one compatible processor type. + * + * @return A matrix where `[i][j]` is true if node type `i` and node type `j` are compatible. + */ + [[nodiscard]] std::vector> getNodeNodeCompatabilityMatrix() const { std::vector> compMat(cdag.num_vertex_types(), std::vector(cdag.num_vertex_types(), false)); for (unsigned nodeType1 = 0; nodeType1 < cdag.num_vertex_types(); nodeType1++) { @@ -394,54 +477,14 @@ class BspInstance { return compMat; } - inline const std::vector> &getNodeProcessorCompatibilityMatrix() const { + /** + * @brief Returns the node-processor compatibility matrix. + * + * @return The node-processor compatibility matrix. + */ + [[nodiscard]] const std::vector> &getNodeProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; } }; -template -class CompatibleProcessorRange { - - std::vector> type_processor_idx; - const BspInstance *instance = nullptr; - - public: - CompatibleProcessorRange() = default; - - CompatibleProcessorRange(const BspInstance &inst) { - initialize(inst); - } - - inline void initialize(const BspInstance &inst) { - - instance = &inst; - - if constexpr (has_typed_vertices_v) { - - type_processor_idx = std::vector>(inst.getComputationalDag().num_vertex_types()); - - for (v_type_t v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) { - for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++) - if (inst.isCompatibleType(v_type, inst.processorType(proc))) - type_processor_idx[v_type].push_back(proc); - } - } - } - - inline const auto &compatible_processors_type(v_type_t type) const { - - assert(instance != nullptr); - - if constexpr (has_typed_vertices_v) { - return type_processor_idx[type]; - } else { - return instance->processors(); - } - } - - inline const auto &compatible_processors_vertex(vertex_idx_t vertex) const { - return compatible_processors_type(instance->getComputationalDag().vertex_type(vertex)); - } -}; - } // namespace osp \ No newline at end of file diff --git a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp new file mode 100644 index 00000000..a4c5800a --- /dev/null +++ b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp @@ -0,0 +1,101 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include + +namespace osp { + +/** + * @class CompatibleProcessorRange + * @brief Helper class to efficiently iterate over compatible processors for a given node or node type. + * + * This class precomputes and stores the list of compatible processors for each node type. + * + * @tparam Graph_t The type of the computational DAG. + */ +template +class CompatibleProcessorRange { + + std::vector> type_processor_idx; + const BspInstance *instance = nullptr; + + public: + /** + * @brief Default constructor. + */ + CompatibleProcessorRange() = default; + + /** + * @brief Constructs a CompatibleProcessorRange for the given BspInstance. + * + * @param inst The BspInstance. + */ + CompatibleProcessorRange(const BspInstance &inst) { + initialize(inst); + } + + /** + * @brief Initializes the CompatibleProcessorRange with a BspInstance. + * + * @param inst The BspInstance. + */ + void initialize(const BspInstance &inst) { + instance = &inst; + + if constexpr (has_typed_vertices_v) { + + type_processor_idx = std::vector>(inst.getComputationalDag().num_vertex_types()); + + for (v_type_t v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) { + for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++) + if (inst.isCompatibleType(v_type, inst.processorType(proc))) + type_processor_idx[v_type].push_back(proc); + } + } + } + + /** + * @brief Returns a range of compatible processors for a given node type. + * + * @param type The node type. + * @return A const reference to a vector of compatible processor indices. + */ + [[nodiscard]] const auto &compatible_processors_type(v_type_t type) const { + assert(instance != nullptr); + + if constexpr (has_typed_vertices_v) { + return type_processor_idx[type]; + } else { + return instance->processors(); + } + } + + /** + * @brief Returns a range of compatible processors for a given vertex. + * + * @param vertex The vertex index. + * @return A const reference to a vector of compatible processor indices. + */ + [[nodiscard]] const auto &compatible_processors_vertex(vertex_idx_t vertex) const { + return compatible_processors_type(instance->getComputationalDag().vertex_type(vertex)); + } +}; + +} // namespace osp \ No newline at end of file diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp index 3657ed52..dd572710 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp @@ -30,6 +30,7 @@ limitations under the License. #include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp" #include "osp/auxiliary/misc.hpp" +#include "osp/bsp/model/util/CompatibleProcessorRange.hpp" #include "osp/bsp/scheduler/ImprovementScheduler.hpp" #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" diff --git a/tests/bsp_instance.cpp b/tests/bsp_instance.cpp index 60e95999..f45434de 100644 --- a/tests/bsp_instance.cpp +++ b/tests/bsp_instance.cpp @@ -23,6 +23,7 @@ limitations under the License. #include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/model/BspInstance.hpp" #include "osp/bsp/model/BspSchedule.hpp" +#include "osp/bsp/model/util/CompatibleProcessorRange.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include From 4be4f9ba6dccaf3203f445c048962d2204343b10 Mon Sep 17 00:00:00 2001 From: tonibohnlein Date: Mon, 8 Dec 2025 18:23:20 +0100 Subject: [PATCH 7/9] return_status.hpp update update update, bugfix --- .../AbstractTestSuiteRunner.hpp | 35 ++- include/osp/auxiliary/return_status.hpp | 56 ++++ include/osp/bsp/model/BspArchitecture.hpp | 1 + include/osp/bsp/model/BspInstance.hpp | 248 ++++-------------- include/osp/bsp/model/BspSchedule.hpp | 20 +- .../model/util/CompatibleProcessorRange.hpp | 16 +- .../osp/bsp/model/{ => util}/SetSchedule.hpp | 2 +- .../bsp/model/{ => util}/VectorSchedule.hpp | 2 +- .../IlpSchedulers/CoptFullScheduler.hpp | 152 +++++------ .../TotalCommunicationScheduler.hpp | 3 +- .../KernighanLin/kl_current_schedule.hpp | 12 +- .../KernighanLin_v2/kl_active_schedule.hpp | 212 ++++++++------- .../LocalSearchMemoryConstraintModules.hpp | 13 +- include/osp/bsp/scheduler/Scheduler.hpp | 1 + include/osp/coarser/BspScheduleCoarser.hpp | 5 +- include/osp/coarser/MultilevelCoarser.hpp | 72 +++-- .../IsomorphicSubgraphScheduler.hpp | 9 - .../partitioners/partitioning_ILP.hpp | 49 ++-- 18 files changed, 377 insertions(+), 531 deletions(-) create mode 100644 include/osp/auxiliary/return_status.hpp rename include/osp/bsp/model/{ => util}/SetSchedule.hpp (99%) rename include/osp/bsp/model/{ => util}/VectorSchedule.hpp (99%) diff --git a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp index 86a9f1ea..80282f58 100644 --- a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp +++ b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp @@ -28,15 +28,16 @@ limitations under the License. #include #include -#include -#include #include "ConfigParser.hpp" #include "StatsModules/IStatsModule.hpp" -#include "osp/bsp/model/BspInstance.hpp" #include "osp/auxiliary/io/arch_file_reader.hpp" #include "osp/auxiliary/io/general_file_reader.hpp" +#include "osp/auxiliary/return_status.hpp" +#include "osp/bsp/model/BspInstance.hpp" +#include +#include -//#define EIGEN_FOUND 1 +// #define EIGEN_FOUND 1 #ifdef EIGEN_FOUND #include @@ -83,7 +84,7 @@ class AbstractTestSuiteRunner { if (write_target_object_to_file) { output_target_object_dir_path = parser.global_params.get_child("scheduleDirectory") - .get_value(); + .get_value(); if (output_target_object_dir_path.substr(0, 1) != "/") output_target_object_dir_path = executable_dir + output_target_object_dir_path; if (!output_target_object_dir_path.empty() && !std::filesystem::exists(output_target_object_dir_path)) { @@ -167,13 +168,13 @@ class AbstractTestSuiteRunner { } } - virtual RETURN_STATUS compute_target_object_impl(const BspInstance &instance, std::unique_ptr& target_object, - const pt::ptree &algo_config, - long long &computation_time_ms) = 0; + virtual RETURN_STATUS compute_target_object_impl(const BspInstance &instance, std::unique_ptr &target_object, + const pt::ptree &algo_config, + long long &computation_time_ms) = 0; virtual void create_and_register_statistic_modules(const std::string &module_name) = 0; - virtual void write_target_object_hook(const TargetObjectType&, const std::string &, const std::string &, + virtual void write_target_object_hook(const TargetObjectType &, const std::string &, const std::string &, const std::string &) { } // default in case TargetObjectType cannot be written to file @@ -268,12 +269,12 @@ class AbstractTestSuiteRunner { SM_csc_int64 L_csc_int64{}; if constexpr (std::is_same_v || std::is_same_v) { - if (ext != "mtx"){ + if (ext != "mtx") { log_stream << "Error: Only .mtx file is accepted for SpTRSV" << std::endl; return 0; } - - if constexpr (std::is_same_v){ + + if constexpr (std::is_same_v) { graph_status = Eigen::loadMarket(L_csr_int32, filename_graph); if (!graph_status) { std::cerr << "Failed to read matrix from " << filename_graph << std::endl; @@ -297,7 +298,7 @@ class AbstractTestSuiteRunner { } } else { #endif - graph_status = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag()); + graph_status = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag()); #ifdef EIGEN_FOUND } @@ -309,22 +310,20 @@ class AbstractTestSuiteRunner { for (auto &algorithm_config_pair : parser.scheduler) { const pt::ptree &algo_config = algorithm_config_pair.second; - - std::string current_algo_name = algo_config.get_child("name").get_value(); log_stream << "Start Algorithm " + current_algo_name + "\n"; long long computation_time_ms; - std::unique_ptr target_object; - + std::unique_ptr target_object; + RETURN_STATUS exec_status = compute_target_object_impl(bsp_instance, target_object, algo_config, computation_time_ms); if (exec_status != RETURN_STATUS::OSP_SUCCESS && exec_status != RETURN_STATUS::BEST_FOUND) { if (exec_status == RETURN_STATUS::ERROR) log_stream << "Error computing with " << current_algo_name << "." << std::endl; else if (exec_status == RETURN_STATUS::TIMEOUT) - log_stream << "Scheduler " << current_algo_name << " timed out." << std::endl; + log_stream << "Scheduler " << current_algo_name << " timed out." << std::endl; continue; } diff --git a/include/osp/auxiliary/return_status.hpp b/include/osp/auxiliary/return_status.hpp new file mode 100644 index 00000000..e5f0b870 --- /dev/null +++ b/include/osp/auxiliary/return_status.hpp @@ -0,0 +1,56 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include + +namespace osp { + +enum class RETURN_STATUS { OSP_SUCCESS, + BEST_FOUND, + TIMEOUT, + ERROR }; + +/** + * @brief Converts the enum to a string literal. + * Returns const char* to avoid std::string allocation overhead. + */ +inline const char *to_string(const RETURN_STATUS status) { + switch (status) { + case RETURN_STATUS::OSP_SUCCESS: + return "SUCCESS"; + case RETURN_STATUS::BEST_FOUND: + return "BEST FOUND"; + case RETURN_STATUS::TIMEOUT: + return "TIMEOUT"; + case RETURN_STATUS::ERROR: + return "ERROR"; + default: + return "UNKNOWN"; + } +} + +/** + * @brief Stream operator overload using the helper function. + */ +inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) { + return os << to_string(status); +} + +} // namespace osp \ No newline at end of file diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp index 74872aae..32f37d0f 100644 --- a/include/osp/bsp/model/BspArchitecture.hpp +++ b/include/osp/bsp/model/BspArchitecture.hpp @@ -636,6 +636,7 @@ class BspArchitecture { /** * @brief Returns the send costs between two processors. Does not perform bounds checking. + * Does not the communication costs into account. * * @param p1 The index of the first processor. * @param p2 The index of the second processor. diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp index e5a1ac3a..914c6fdc 100644 --- a/include/osp/bsp/model/BspInstance.hpp +++ b/include/osp/bsp/model/BspInstance.hpp @@ -27,37 +27,6 @@ limitations under the License. namespace osp { -enum class RETURN_STATUS { OSP_SUCCESS, - BEST_FOUND, - TIMEOUT, - ERROR }; - -/** - * @brief Converts the enum to a string literal. - * Returns const char* to avoid std::string allocation overhead. - */ -inline const char *to_string(const RETURN_STATUS status) { - switch (status) { - case RETURN_STATUS::OSP_SUCCESS: - return "SUCCESS"; - case RETURN_STATUS::BEST_FOUND: - return "BEST FOUND"; - case RETURN_STATUS::TIMEOUT: - return "TIMEOUT"; - case RETURN_STATUS::ERROR: - return "ERROR"; - default: - return "UNKNOWN"; - } -} - -/** - * @brief Stream operator overload using the helper function. - */ -inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) { - return os << to_string(status); -} - /** * @class BspInstance * @brief Represents an instance of the BSP (Bulk Synchronous Parallel) model. @@ -65,32 +34,34 @@ inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) { * The BspInstance class encapsulates the computational DAG (Directed Acyclic Graph) and the BSP architecture * for a specific instance of the BSP model. It provides methods to access and modify the architecture and DAG, * as well as retrieve information about the instance such as the number of vertices and processors. + * + * The instance specifies the compatibility between node types and processor types. + * + * @tparam Graph_t The type of the computational DAG. */ template class BspInstance { - - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); + static_assert(is_computational_dag_v, "BspInstance can only be used with computational DAGs."); private: + /** + * @brief The computational DAG of the instance. Holds the graph structure and the node types, work, memory, communication weights. + */ Graph_t cdag; + /** + * @brief The BSP architecture of the instance. Holds the processor types and the memory bounds. Communication and synchronization cost. And the send cost between processors. + */ BspArchitecture architecture; - // for problem instances with heterogeneity - std::vector> nodeProcessorCompatibility = std::vector>({{true}}); - /** - * @brief Calculates the maximum memory bound for each processor type. + * @brief Stores the compatibility between node types and processor types. * - * @return A vector where the index corresponds to the processor type and the value is the maximum memory bound for that type. + * The architecture defines a type for each processor, and the dag defines a type for each node. + * This matrix stores for each node type and processor type whether they are compatible, i.e., + * if a node of the can be assigned to a processor of the given type in a schedule. + * @note The outer vector is indexed by node type, the inner vector is indexed by processor type. */ - std::vector> calculateMaxMemoryPerProcessorType() const { - std::vector> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) { - max_memory_per_proc_type[architecture.processorType(proc)] = - std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc)); - } - return max_memory_per_proc_type; - } + std::vector> nodeProcessorCompatibility = std::vector>({{true}}); public: /** @@ -100,6 +71,7 @@ class BspInstance { /** * @brief Constructs a BspInstance object with the specified computational DAG and BSP architecture. + * Computational DAG and BSP architecture are copied! * * @param cdag The computational DAG for the instance. * @param architecture The BSP architecture for the instance. @@ -110,6 +82,7 @@ class BspInstance { /** * @brief Constructs a BspInstance object with the specified computational DAG and BSP architecture. + * Computational DAG and BSP architecture are moved! * * @param cdag The computational DAG for the instance. * @param architecture The BSP architecture for the instance. @@ -133,9 +106,7 @@ class BspInstance { BspInstance &operator=(BspInstance &&other) noexcept = default; /** - * @brief Returns a reference to the BSP architecture for the instance. - * - * @return A reference to the BSP architecture for the instance. + * @brief Returns a reference to the BSP architecture of the instance. */ [[nodiscard]] const BspArchitecture &getArchitecture() const { return architecture; } [[nodiscard]] BspArchitecture &getArchitecture() { return architecture; } @@ -148,58 +119,48 @@ class BspInstance { void setArchitecture(const BspArchitecture &architechture_) { architecture = architechture_; } /** - * @brief Returns a reference to the computational DAG for the instance. - * - * @return A reference to the computational DAG for the instance. + * @brief Returns a reference to the computational DAG of the instance. */ [[nodiscard]] const Graph_t &getComputationalDag() const { return cdag; } [[nodiscard]] Graph_t &getComputationalDag() { return cdag; } /** * @brief Returns the number of vertices in the computational DAG. - * - * @return The number of vertices. */ [[nodiscard]] vertex_idx_t numberOfVertices() const { return cdag.num_vertices(); } /** * @brief Returns a view over the vertex indices of the computational DAG. - * @return A view over the vertex indices. */ [[nodiscard]] auto vertices() const { return cdag.vertices(); } /** * @brief Returns a view over the processor indices of the BSP architecture. - * @return A view over the processor indices. */ [[nodiscard]] auto processors() const { return architecture.processors(); } /** * @brief Returns the number of processors in the BSP architecture. - * @return The number of processors in the BSP architecture. */ [[nodiscard]] unsigned numberOfProcessors() const { return architecture.numberOfProcessors(); } /** - * @brief Returns the communication costs between two processors. + * @brief Returns the communication costs between two processors. Does not perform bounds checking. * The communication costs are the send costs multiplied by the communication costs. * * @param p_send The index of the sending processor. * @param p_receive The index of the receiving processor. - * - * @return The communication costs between the two processors. */ [[nodiscard]] v_commw_t communicationCosts(const unsigned p_send, const unsigned p_receive) const { return architecture.communicationCosts(p_send, p_receive); } /** - * @brief Returns the send costs between two processors. + * @brief Returns the send costs between two processors. Does not perform bounds checking. + * Does not the communication costs into account. * * @param p_send The index of the sending processor. * @param p_receive The index of the receiving processor. - * - * @return The send costs between the two processors. */ [[nodiscard]] v_commw_t sendCosts(const unsigned p_send, const unsigned p_receive) const { return architecture.sendCosts(p_send, p_receive); @@ -207,14 +168,11 @@ class BspInstance { /** * @brief Returns a copy of the send costs matrix. - * @return A copy of the send costs matrix. */ [[nodiscard]] std::vector>> sendCosts() const { return architecture.sendCosts(); } /** * @brief Returns the flattened send costs vector. - * - * @return The flattened send costs vector. */ [[nodiscard]] const std::vector> &sendCostsVector() const { return architecture.sendCostsVector(); @@ -222,89 +180,51 @@ class BspInstance { /** * @brief Returns the communication costs of the BSP architecture. - * - * @return The communication costs as an unsigned integer. */ [[nodiscard]] v_commw_t communicationCosts() const { return architecture.communicationCosts(); } /** * @brief Returns the synchronization costs of the BSP architecture. - * - * @return The synchronization costs as an unsigned integer. */ [[nodiscard]] v_commw_t synchronisationCosts() const { return architecture.synchronisationCosts(); } - /** - * @brief Returns whether the architecture is NUMA. - * - * @return True if the architecture is NUMA, false otherwise. - */ - [[nodiscard]] bool isNumaInstance() const { return architecture.isNumaArchitecture(); } - /** * @brief Returns the memory bound for a specific processor. * * @param proc The processor index. - * @return The memory bound for the processor. */ [[nodiscard]] v_memw_t memoryBound(const unsigned proc) const { return architecture.memoryBound(proc); } - /** - * @brief Returns the maximum memory bound for a specific processor type. - * - * @param procType The processor type. - * @return The maximum memory bound for the processor type. - */ - [[nodiscard]] v_memw_t maxMemoryBoundProcType(const unsigned procType) const { - return architecture.maxMemoryBoundProcType(procType); - } - - /** - * @brief Returns the maximum memory bound for a specific node type. - * - * This considers all compatible processor types for the given node type. - * - * @param nodeType The node type. - * @return The maximum memory bound for the node type. - */ - [[nodiscard]] v_memw_t maxMemoryBoundNodeType(const unsigned nodeType) const { - int max_mem = 0; - for (unsigned proc = 0; proc < architecture.getNumberOfProcessorTypes(); proc++) { - if (isCompatibleType(nodeType, architecture.processorType(proc))) { - max_mem = std::max(max_mem, architecture.memoryBound(proc)); - } - } - return max_mem; - } - /** * @brief Sets the communication costs of the BSP architecture. - * * @param cost The communication costs to set. */ void setCommunicationCosts(const v_commw_t cost) { architecture.setCommunicationCosts(cost); } /** * @brief Sets the synchronisation costs of the BSP architecture. - * * @param cost The synchronisation costs to set. */ void setSynchronisationCosts(const v_commw_t cost) { architecture.setSynchronisationCosts(cost); } /** - * @brief Sets the number of processors in the BSP architecture. - * - * @param num The number of processors to set. + * @brief Sets the number of processors. Processor type is set to 0 for all processors. + * Resets send costs to uniform (1) and diagonal to 0. The memory bound is set to 100 for all processors. + * @param numberOfProcessors The number of processors. Must be greater than 0. + * @throws std::invalid_argument if the number of processors is 0. */ void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); } /** - * @brief Checks if the memory constraints are feasible for the given instance. - * + * @brief Returns false if there is a node whose weight does not fit on any of its compatible processors. * @return True if the memory constraints are feasible, false otherwise. */ [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const { - const auto max_memory_per_proc_type = calculateMaxMemoryPerProcessorType(); + std::vector> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0); + for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) { + max_memory_per_proc_type[architecture.processorType(proc)] = + std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc)); + } for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) { v_memw_t max_memory_of_type = max_memory_weight(vertType, cdag); @@ -326,53 +246,13 @@ class BspInstance { } /** - * @brief Adjusts the memory constraints of the architecture to ensure feasibility. - * - * If a node type requires more memory than available on any compatible processor type, - * the memory bound of compatible processors is increased. - */ - void adjust_memory_constraints() { - const auto max_memory_per_proc_type = calculateMaxMemoryPerProcessorType(); - - for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) { - v_memw_t max_memory_of_type = max_memory_weight(vertType, cdag); - bool fits = false; - - for (unsigned proc_type = 0; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) { - if (isCompatibleType(vertType, proc_type)) { - fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]); - if (fits) - break; - } - } - - if (!fits) { - std::cout << "Warning: Computational DAG memory weight exceeds architecture memory bound." << std::endl; - std::cout << "VertexType " << vertType << " has memory " - << " and exceeds compatible processor types memory limit." << std::endl; - - for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) { - if (isCompatibleType(vertType, architecture.processorType(proc))) { - std::cout << "Increasing memory of processor " << proc << " of type " - << architecture.processorType(proc) << " to " << max_memory_of_type << "." - << std::endl; - architecture.setMemoryBound(max_memory_of_type, proc); - } - } - } - } - } - - /** - * @brief Returns the processor type for a given processor index. - * + * @brief Returns the processor type for a given processor index. Does not perform bounds checking. * @param proc The processor index. - * @return The processor type. */ [[nodiscard]] v_type_t processorType(const unsigned proc) const { return architecture.processorType(proc); } /** - * @brief Checks if a node is compatible with a processor. + * @brief Checks if a node is compatible with a processor. Does not perform bounds checking. * * @param node The node index. * @param processor_id The processor index. @@ -383,7 +263,7 @@ class BspInstance { } /** - * @brief Checks if a node type is compatible with a processor type. + * @brief Checks if a node type is compatible with a processor type. Does not perform bounds checking. * * @param nodeType The node type. * @param processorType The processor type. @@ -394,46 +274,37 @@ class BspInstance { } /** - * @brief Sets the node-processor compatibility matrix. - * + * @brief Sets the node-processor compatibility matrix. The matrix is copied. * @param compatibility_ The compatibility matrix. + * @throw std::runtime_error if the compatibility matrix size does not match the number of node types and processor types. */ void setNodeProcessorCompatibility(const std::vector> &compatibility_) { + if (compatibility_.size() < cdag.num_vertex_types() || compatibility_[0].size() < architecture.getNumberOfProcessorTypes()) { + throw std::runtime_error("Compatibility matrix size does not match the number of node types and processor types."); + } nodeProcessorCompatibility = compatibility_; } /** - * @brief Returns the node-processor compatibility matrix. - * - * @return The node-processor compatibility matrix. + * @brief Returns the node type - processor type compatibility matrix. */ [[nodiscard]] const std::vector> &getProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; } /** - * @brief Sets the compatibility matrix to be diagonal. - * - * This implies that node type `i` is only compatible with processor type `i`. - * + * @brief Sets the compatibility matrix to be diagonal. This implies that node type `i` is only compatible with processor type `i`. * @param number_of_types The number of types. */ void setDiagonalCompatibilityMatrix(const unsigned number_of_types) { - nodeProcessorCompatibility = - std::vector>(number_of_types, std::vector(number_of_types, false)); + nodeProcessorCompatibility.assign(number_of_types, std::vector(number_of_types, false)); for (unsigned i = 0; i < number_of_types; ++i) nodeProcessorCompatibility[i][i] = true; } /** - * @brief Sets the compatibility matrix to all ones. - * - * This implies that all node types are compatible with all processor types. + * @brief Sets the compatibility matrix to all ones. This implies that all node types are compatible with all processor types. */ void setAllOnesCompatibilityMatrix() { - unsigned number_of_node_types = cdag.num_vertex_types(); - unsigned number_of_proc_types = architecture.getNumberOfProcessorTypes(); - - nodeProcessorCompatibility = - std::vector>(number_of_node_types, std::vector(number_of_proc_types, true)); + nodeProcessorCompatibility.assign(cdag.num_vertex_types(), std::vector(architecture.getNumberOfProcessorTypes(), true)); } /** @@ -454,33 +325,8 @@ class BspInstance { return compatibleProcTypes; } - /** - * @brief Returns a compatibility matrix between node types. - * - * Two node types are compatible if they share at least one compatible processor type. - * - * @return A matrix where `[i][j]` is true if node type `i` and node type `j` are compatible. - */ - [[nodiscard]] std::vector> getNodeNodeCompatabilityMatrix() const { - std::vector> compMat(cdag.num_vertex_types(), - std::vector(cdag.num_vertex_types(), false)); - for (unsigned nodeType1 = 0; nodeType1 < cdag.num_vertex_types(); nodeType1++) { - for (unsigned nodeType2 = 0; nodeType2 < cdag.num_vertex_types(); nodeType2++) { - for (unsigned procType = 0; procType < architecture.getNumberOfProcessorTypes(); procType++) { - if (isCompatibleType(nodeType1, procType) && isCompatibleType(nodeType2, procType)) { - compMat[nodeType1][nodeType2] = true; - break; - } - } - } - } - return compMat; - } - /** * @brief Returns the node-processor compatibility matrix. - * - * @return The node-processor compatibility matrix. */ [[nodiscard]] const std::vector> &getNodeProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp index 7a02d0f3..9e5a5d52 100644 --- a/include/osp/bsp/model/BspSchedule.hpp +++ b/include/osp/bsp/model/BspSchedule.hpp @@ -25,8 +25,8 @@ limitations under the License. #include "IBspSchedule.hpp" #include "IBspScheduleEval.hpp" -#include "SetSchedule.hpp" #include "osp/bsp/model/cost/LazyCommunicationCost.hpp" +#include "osp/bsp/model/util/SetSchedule.hpp" #include "osp/concepts/computational_dag_concept.hpp" namespace osp { @@ -213,7 +213,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEval, public IBspScheduleEvalnumberOfVertices()) { node_to_superstep_assignment[node] = superstep; @@ -273,7 +273,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEval, public IBspScheduleEval> getAssignedNodeVector(unsigned int processor) const { + [[nodiscard]] std::vector> getAssignedNodeVector(const unsigned processor) const { std::vector> vec; for (const auto &node : instance->vertices()) { @@ -496,7 +496,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval> getAssignedNodeVector(unsigned int processor, unsigned int superstep) const { + [[nodiscard]] std::vector> getAssignedNodeVector(const unsigned processor, const unsigned superstep) const { std::vector> vec; for (const auto &node : instance->vertices()) { @@ -513,7 +513,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEvalvertices()) { diff --git a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp index a4c5800a..c4d8df30 100644 --- a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp +++ b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp @@ -18,6 +18,7 @@ limitations under the License. #pragma once +#include "osp/bsp/model/BspInstance.hpp" #include namespace osp { @@ -33,7 +34,7 @@ namespace osp { template class CompatibleProcessorRange { - std::vector> type_processor_idx; + std::vector> typeProcessorIdx; const BspInstance *instance = nullptr; public: @@ -60,13 +61,12 @@ class CompatibleProcessorRange { instance = &inst; if constexpr (has_typed_vertices_v) { - - type_processor_idx = std::vector>(inst.getComputationalDag().num_vertex_types()); + typeProcessorIdx.resize(inst.getComputationalDag().num_vertex_types()); for (v_type_t v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) { for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++) if (inst.isCompatibleType(v_type, inst.processorType(proc))) - type_processor_idx[v_type].push_back(proc); + typeProcessorIdx[v_type].push_back(proc); } } } @@ -77,11 +77,10 @@ class CompatibleProcessorRange { * @param type The node type. * @return A const reference to a vector of compatible processor indices. */ - [[nodiscard]] const auto &compatible_processors_type(v_type_t type) const { + [[nodiscard]] const auto &compatible_processors_type(const v_type_t type) const { assert(instance != nullptr); - if constexpr (has_typed_vertices_v) { - return type_processor_idx[type]; + return typeProcessorIdx[type]; } else { return instance->processors(); } @@ -93,7 +92,8 @@ class CompatibleProcessorRange { * @param vertex The vertex index. * @return A const reference to a vector of compatible processor indices. */ - [[nodiscard]] const auto &compatible_processors_vertex(vertex_idx_t vertex) const { + [[nodiscard]] const auto &compatible_processors_vertex(const vertex_idx_t vertex) const { + assert(instance != nullptr); return compatible_processors_type(instance->getComputationalDag().vertex_type(vertex)); } }; diff --git a/include/osp/bsp/model/SetSchedule.hpp b/include/osp/bsp/model/util/SetSchedule.hpp similarity index 99% rename from include/osp/bsp/model/SetSchedule.hpp rename to include/osp/bsp/model/util/SetSchedule.hpp index da851f98..61946fae 100644 --- a/include/osp/bsp/model/SetSchedule.hpp +++ b/include/osp/bsp/model/util/SetSchedule.hpp @@ -18,7 +18,7 @@ limitations under the License. #pragma once -#include "IBspSchedule.hpp" +#include "osp/bsp/model/IBspSchedule.hpp" #include "osp/concepts/computational_dag_concept.hpp" namespace osp { diff --git a/include/osp/bsp/model/VectorSchedule.hpp b/include/osp/bsp/model/util/VectorSchedule.hpp similarity index 99% rename from include/osp/bsp/model/VectorSchedule.hpp rename to include/osp/bsp/model/util/VectorSchedule.hpp index a81cc3e5..ea856c1b 100644 --- a/include/osp/bsp/model/VectorSchedule.hpp +++ b/include/osp/bsp/model/util/VectorSchedule.hpp @@ -18,7 +18,7 @@ limitations under the License. #pragma once -#include "IBspSchedule.hpp" +#include "osp/bsp/model/IBspSchedule.hpp" #include "osp/concepts/computational_dag_concept.hpp" #include diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp index aa199c45..45b58ca3 100644 --- a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp +++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp @@ -21,14 +21,14 @@ limitations under the License. #include #include +#include "osp/auxiliary/io/DotFileWriter.hpp" #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/model/BspScheduleCS.hpp" #include "osp/bsp/model/BspScheduleRecomp.hpp" #include "osp/bsp/model/MaxBspSchedule.hpp" #include "osp/bsp/model/MaxBspScheduleCS.hpp" -#include "osp/bsp/model/VectorSchedule.hpp" +#include "osp/bsp/model/util/VectorSchedule.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" -#include "osp/auxiliary/io/DotFileWriter.hpp" namespace osp { @@ -111,17 +111,19 @@ class CoptFullScheduler : public Scheduler { if (allow_recomputation_cb) { - auto sched = constructBspScheduleRecompFromCallback(); - DotFileWriter sched_writer; - sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" + - std::to_string(counter) + "_schedule.dot", sched); + auto sched = constructBspScheduleRecompFromCallback(); + DotFileWriter sched_writer; + sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" + + std::to_string(counter) + "_schedule.dot", + sched); } else { - BspSchedule sched = constructBspScheduleFromCallback(); - DotFileWriter sched_writer; - sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" + - std::to_string(counter) + "_schedule.dot", sched); + BspSchedule sched = constructBspScheduleFromCallback(); + DotFileWriter sched_writer; + sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" + + std::to_string(counter) + "_schedule.dot", + sched); } counter++; } @@ -259,7 +261,7 @@ class CoptFullScheduler : public Scheduler { } } - if(is_max_bsp && number_of_supersteps>0) // can ignore last 2 comm phases in this case + if (is_max_bsp && number_of_supersteps > 0) // can ignore last 2 comm phases in this case --number_of_supersteps; schedule.getCommunicationSchedule().clear(); @@ -268,7 +270,7 @@ class CoptFullScheduler : public Scheduler { for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) { if (p_from != p_to) { - for (unsigned int step = 0; step < number_of_supersteps-1; step++) { + for (unsigned int step = 0; step < number_of_supersteps - 1; step++) { if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step] [static_cast(node)] .Get(COPT_DBLINFO_VALUE) >= .99) { @@ -302,7 +304,7 @@ class CoptFullScheduler : public Scheduler { for (unsigned processor = 0; processor < schedule.getInstance().numberOfProcessors(); processor++) { - for (unsigned step = 0; step < number_of_supersteps-1; step++) { + for (unsigned step = 0; step < number_of_supersteps - 1; step++) { if (node_to_processor_superstep_var[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { schedule.assignments(node).emplace_back(processor, step); @@ -334,46 +336,35 @@ class CoptFullScheduler : public Scheduler { } } - void loadInitialSchedule(Model &model, const BspInstance &instance) { if (use_initial_schedule_recomp && (max_number_supersteps < initial_schedule_recomp->numberOfSupersteps() || - instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() || - instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) { + instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() || + instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) { throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not " "agree with those of the initial schedule's instance!"); } if (!use_initial_schedule_recomp & use_initial_schedule && (max_number_supersteps < initial_schedule->numberOfSupersteps() || - instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() || - instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) { + instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() || + instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) { throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not " "agree with those of the initial schedule's instance!"); } - const auto& DAG = use_initial_schedule_recomp ? - initial_schedule_recomp->getInstance().getComputationalDag() : - initial_schedule->getInstance().getComputationalDag(); + const auto &DAG = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getComputationalDag() : initial_schedule->getInstance().getComputationalDag(); - const auto& arch = use_initial_schedule_recomp ? - initial_schedule_recomp->getInstance().getArchitecture() : - initial_schedule->getInstance().getArchitecture(); + const auto &arch = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getArchitecture() : initial_schedule->getInstance().getArchitecture(); - const unsigned& num_processors = use_initial_schedule_recomp ? - initial_schedule_recomp->getInstance().numberOfProcessors() : - initial_schedule->getInstance().numberOfProcessors(); + const unsigned &num_processors = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().numberOfProcessors() : initial_schedule->getInstance().numberOfProcessors(); - const unsigned& num_supersteps = use_initial_schedule_recomp ? - initial_schedule_recomp->numberOfSupersteps() : - initial_schedule->numberOfSupersteps(); + const unsigned &num_supersteps = use_initial_schedule_recomp ? initial_schedule_recomp->numberOfSupersteps() : initial_schedule->numberOfSupersteps(); - const auto &cs = use_initial_schedule_recomp ? - initial_schedule_recomp->getCommunicationSchedule() : - initial_schedule->getCommunicationSchedule(); + const auto &cs = use_initial_schedule_recomp ? initial_schedule_recomp->getCommunicationSchedule() : initial_schedule->getCommunicationSchedule(); - assert(max_number_supersteps <= static_cast( std::numeric_limits::max()) ); + assert(max_number_supersteps <= static_cast(std::numeric_limits::max())); for (unsigned step = 0; step < max_number_supersteps; step++) { if (step < num_supersteps) { @@ -387,28 +378,23 @@ class CoptFullScheduler : public Scheduler { // model.SetMipStart(max_comm_superstep_var[step], COPT_INFINITY); } - std::vector > > computed(DAG.num_vertices()); - for (const auto &node : DAG.vertices()) - { - if(use_initial_schedule_recomp) - for (const std::pair& assignment : initial_schedule_recomp->assignments(node)) + std::vector>> computed(DAG.num_vertices()); + for (const auto &node : DAG.vertices()) { + if (use_initial_schedule_recomp) + for (const std::pair &assignment : initial_schedule_recomp->assignments(node)) computed[node].emplace(assignment); else - computed[node].emplace(initial_schedule->assignedProcessor(node),initial_schedule->assignedSuperstep(node)); + computed[node].emplace(initial_schedule->assignedProcessor(node), initial_schedule->assignedSuperstep(node)); } - std::vector > first_at(DAG.num_vertices(), std::vector(num_processors, std::numeric_limits::max())); - for (const auto &node : DAG.vertices()) - { - if(use_initial_schedule_recomp) - { - for (const std::pair& assignment : initial_schedule_recomp->assignments(node)) + std::vector> first_at(DAG.num_vertices(), std::vector(num_processors, std::numeric_limits::max())); + for (const auto &node : DAG.vertices()) { + if (use_initial_schedule_recomp) { + for (const std::pair &assignment : initial_schedule_recomp->assignments(node)) first_at[node][assignment.first] = std::min(first_at[node][assignment.first], assignment.second); - } - else - { + } else { first_at[node][initial_schedule->assignedProcessor(node)] = std::min(first_at[node][initial_schedule->assignedProcessor(node)], - initial_schedule->assignedSuperstep(node) ); + initial_schedule->assignedSuperstep(node)); } } @@ -431,7 +417,7 @@ class CoptFullScheduler : public Scheduler { comm_processor_to_processor_superstep_node_var[p1][p2][step] [static_cast(node)], 1); - first_at[node][p2] = std::min(first_at[node][p2], step+staleness); + first_at[node][p2] = std::min(first_at[node][p2], step + staleness); } else { model.SetMipStart( comm_processor_to_processor_superstep_node_var[p1][p2][step] @@ -447,14 +433,15 @@ class CoptFullScheduler : public Scheduler { for (const auto &node : DAG.vertices()) for (unsigned proc = 0; proc < num_processors; proc++) - for(unsigned step = 0; step < max_number_supersteps; step++) - { - if(step >= first_at[node][proc]) + for (unsigned step = 0; step < max_number_supersteps; step++) { + if (step >= first_at[node][proc]) model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step] - [static_cast(node)], 1); + [static_cast(node)], + 1); else model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step] - [static_cast(node)], 0); + [static_cast(node)], + 0); } for (const auto &node : DAG.vertices()) { @@ -478,16 +465,13 @@ class CoptFullScheduler : public Scheduler { max_number_supersteps, std::vector>(num_processors, 0)); - if(use_initial_schedule_recomp) - { + if (use_initial_schedule_recomp) { for (const auto &node : initial_schedule_recomp->getInstance().vertices()) { - for (const std::pair& assignment : initial_schedule_recomp->assignments(node)) { + for (const std::pair &assignment : initial_schedule_recomp->assignments(node)) { work[assignment.second][assignment.first] += DAG.vertex_work_weight(node); } } - } - else - { + } else { for (const auto &node : initial_schedule->getInstance().vertices()) work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] += DAG.vertex_work_weight(node); @@ -544,15 +528,14 @@ class CoptFullScheduler : public Scheduler { Variables */ - assert(max_number_supersteps <= static_cast( std::numeric_limits::max() )); - assert(instance.numberOfProcessors() <= static_cast( std::numeric_limits::max()) ); + assert(max_number_supersteps <= static_cast(std::numeric_limits::max())); + assert(instance.numberOfProcessors() <= static_cast(std::numeric_limits::max())); // variables indicating if superstep is used at all superstep_used_var = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "superstep_used"); VarArray superstep_has_comm, mergeable_superstep_penalty; - if(is_max_bsp) - { + if (is_max_bsp) { // variables indicating if there is any communication in superstep superstep_has_comm = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "superstep_has_comm"); // variables that incentivize the schedule to be continuous - needs to be done differently for maxBsp @@ -676,13 +659,12 @@ class CoptFullScheduler : public Scheduler { if (step > 0) { for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { - if(!is_max_bsp || p_from == processor){ + if (!is_max_bsp || p_from == processor) { expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1] - [static_cast(node)]; - } - else if(step > 1){ + [static_cast(node)]; + } else if (step > 1) { expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 2] - [static_cast(node)]; + [static_cast(node)]; } } } @@ -700,26 +682,25 @@ class CoptFullScheduler : public Scheduler { } // synchronization cost calculation & forcing continuous schedule in maxBsp - if(is_max_bsp) - { + if (is_max_bsp) { for (unsigned int step = 0; step < max_number_supersteps; step++) { Expr expr; for (const auto &node : instance.vertices()) { for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) { - if(p_from != p_to) + if (p_from != p_to) expr += comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)]; } } } model.AddConstr(static_cast(instance.numberOfProcessors() * instance.numberOfProcessors() * instance.numberOfVertices()) * - superstep_has_comm[static_cast(step)] >= expr); + superstep_has_comm[static_cast(step)] >= + expr); } // if step i and (i+1) has no comm, and (i+2) has work, then (i+1) and (i+2) are mergeable -> penalize for (unsigned int step = 0; step < max_number_supersteps - 2; step++) - model.AddConstr(superstep_used_var[static_cast(step + 2)] - superstep_has_comm[static_cast(step)] - - superstep_has_comm[static_cast(step + 1)] <= mergeable_superstep_penalty[static_cast(step)]); + model.AddConstr(superstep_used_var[static_cast(step + 2)] - superstep_has_comm[static_cast(step)] - superstep_has_comm[static_cast(step + 1)] <= mergeable_superstep_penalty[static_cast(step)]); } max_comm_superstep_var = @@ -784,7 +765,7 @@ class CoptFullScheduler : public Scheduler { // vertex type restrictions for (const vertex_idx_t &node : instance.vertices()) { for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - if(!instance.isCompatible(node, processor)) { + if (!instance.isCompatible(node, processor)) { for (unsigned int step = 0; step < max_number_supersteps; step++) { model.AddConstr(node_to_processor_superstep_var[node][processor][static_cast(step)] == 0); } @@ -797,20 +778,17 @@ class CoptFullScheduler : public Scheduler { */ Expr expr; - if(is_max_bsp) - { + if (is_max_bsp) { VarArray max_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_superstep"); for (unsigned int step = 0; step < max_number_supersteps; step++) { model.AddConstr(max_superstep_var[static_cast(step)] >= max_work_superstep_var[static_cast(step)]); - if(step > 0) - model.AddConstr(max_superstep_var[static_cast(step)] >= instance.communicationCosts() * max_comm_superstep_var[static_cast(step-1)]); + if (step > 0) + model.AddConstr(max_superstep_var[static_cast(step)] >= instance.communicationCosts() * max_comm_superstep_var[static_cast(step - 1)]); expr += max_superstep_var[static_cast(step)]; expr += instance.synchronisationCosts() * superstep_has_comm[static_cast(step)]; expr += instance.synchronisationCosts() * mergeable_superstep_penalty[static_cast(step)]; } - } - else - { + } else { for (unsigned int step = 0; step < max_number_supersteps; step++) { expr += max_work_superstep_var[static_cast(step)] + instance.communicationCosts() * max_comm_superstep_var[static_cast(step)] + @@ -877,7 +855,7 @@ class CoptFullScheduler : public Scheduler { // solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; } - CoptFullScheduler(const BspScheduleRecomp &schedule) + CoptFullScheduler(const BspScheduleRecomp &schedule) : allow_recomputation(true), use_memory_constraint(false), use_initial_schedule_recomp(true), write_solutions_found(false), initial_schedule_recomp(&schedule), max_number_supersteps(schedule.numberOfSupersteps()) { @@ -931,7 +909,6 @@ class CoptFullScheduler : public Scheduler { return run_scheduler(schedule); } - virtual RETURN_STATUS computeScheduleCS(BspScheduleCS &schedule) override { allow_recomputation = false; is_max_bsp = false; @@ -1010,7 +987,6 @@ class CoptFullScheduler : public Scheduler { model.Solve(); } - /** * @brief Sets the provided schedule as the initial solution for the ILP. * diff --git a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp index 5d759687..c051c8dc 100644 --- a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp +++ b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp @@ -240,7 +240,7 @@ class TotalCommunicationScheduler : public Scheduler { SetSolution((*max_work_superstep_var_ptr)[static_cast(step)], max_work); } - if (instance_ptr->isNumaInstance()) { + if (instance_ptr->getArchitecture().isNumaArchitecture()) { for (unsigned p1 = 0; p1 < instance_ptr->numberOfProcessors(); p1++) { for (unsigned p2 = 0; p2 < instance_ptr->numberOfProcessors(); p2++) { @@ -670,7 +670,6 @@ class TotalCommunicationScheduler : public Scheduler { loadInitialSchedule(); } - model.SetIntParam(COPT_INTPARAM_THREADS, 128); model.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1); model.SetIntParam(COPT_INTPARAM_LPMETHOD, 1); diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp index af5bfd19..1c544fd1 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp @@ -16,12 +16,12 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ -//#define KL_DEBUG +// #define KL_DEBUG #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/model/IBspSchedule.hpp" -#include "osp/bsp/model/SetSchedule.hpp" -#include "osp/bsp/model/VectorSchedule.hpp" +#include "osp/bsp/model/util/SetSchedule.hpp" +#include "osp/bsp/model/util/VectorSchedule.hpp" #include "osp/bsp/scheduler/ImprovementScheduler.hpp" #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" @@ -73,17 +73,15 @@ class kl_current_schedule { using EdgeType = edge_desc_t; public: - kl_current_schedule(Ikl_cost_function *cost_f_) : cost_f(cost_f_) { -#ifdef KL_DEBUG +#ifdef KL_DEBUG if constexpr (use_memory_constraint) { std::cout << "KLCurrentSchedule constructor with memory constraint" << std::endl; } else { std::cout << "KLCurrentSchedule constructor without memory constraint" << std::endl; } #endif - } virtual ~kl_current_schedule() = default; @@ -358,7 +356,7 @@ class kl_current_schedule { if constexpr (use_memory_constraint) { memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step); - } + } } virtual void initialize_current_schedule(const IBspSchedule &schedule) { diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp index 6fe460f8..862eeacc 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp @@ -16,13 +16,12 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ - #pragma once #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/model/IBspSchedule.hpp" -#include "osp/bsp/model/SetSchedule.hpp" -#include "osp/bsp/model/VectorSchedule.hpp" +#include "osp/bsp/model/util/SetSchedule.hpp" +#include "osp/bsp/model/util/VectorSchedule.hpp" #include "osp/bsp/scheduler/ImprovementScheduler.hpp" #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" @@ -54,7 +53,7 @@ struct kl_move_struct { bool operator>(kl_move_struct const &rhs) const { return (gain > rhs.gain) or (gain >= rhs.gain and node < rhs.node); } - + kl_move_struct reverse_move() const { return kl_move_struct(node, -gain, to_proc, to_step, from_proc, from_step); } @@ -73,13 +72,12 @@ struct pre_move_work_data { pre_move_work_data() {} pre_move_work_data(work_weight_t from_step_max_work_, work_weight_t from_step_second_max_work_, unsigned from_step_max_work_processor_count_, - work_weight_t to_step_max_work_, work_weight_t to_step_second_max_work_, - unsigned to_step_max_work_processor_count_) + work_weight_t to_step_max_work_, work_weight_t to_step_second_max_work_, + unsigned to_step_max_work_processor_count_) : from_step_max_work(from_step_max_work_), from_step_second_max_work(from_step_second_max_work_), from_step_max_work_processor_count(from_step_max_work_processor_count_), to_step_max_work(to_step_max_work_), to_step_second_max_work(to_step_second_max_work_), - to_step_max_work_processor_count(to_step_max_work_processor_count_) {} - + to_step_max_work_processor_count(to_step_max_work_processor_count_) {} }; template @@ -87,16 +85,16 @@ struct kl_active_schedule_work_datastructures { using work_weight_t = v_workw_t; - const BspInstance *instance; + const BspInstance *instance; const SetSchedule *set_schedule; - + struct weight_proc { work_weight_t work; unsigned proc; weight_proc() : work(0), proc(0) {} weight_proc(work_weight_t _work, unsigned _proc) : work(_work), proc(_proc) {} - + bool operator<(weight_proc const &rhs) const { return (work > rhs.work) or (work == rhs.work and proc < rhs.proc); } @@ -106,17 +104,17 @@ struct kl_active_schedule_work_datastructures { std::vector> step_processor_position; std::vector step_max_work_processor_count; work_weight_t max_work_weight; - work_weight_t total_work_weight; + work_weight_t total_work_weight; inline work_weight_t step_max_work(unsigned step) const { return step_processor_work_[step][0].work; } inline work_weight_t step_second_max_work(unsigned step) const { return step_processor_work_[step][step_max_work_processor_count[step]].work; } inline work_weight_t step_proc_work(unsigned step, unsigned proc) const { return step_processor_work_[step][step_processor_position[step][proc]].work; } - inline work_weight_t & step_proc_work(unsigned step, unsigned proc) { return step_processor_work_[step][step_processor_position[step][proc]].work; } + inline work_weight_t &step_proc_work(unsigned step, unsigned proc) { return step_processor_work_[step][step_processor_position[step][proc]].work; } template - inline pre_move_work_data get_pre_move_work_data(kl_move_struct move) { + inline pre_move_work_data get_pre_move_work_data(kl_move_struct move) { return pre_move_work_data(step_max_work(move.from_step), step_second_max_work(move.from_step), step_max_work_processor_count[move.from_step], - step_max_work(move.to_step), step_second_max_work(move.to_step), step_max_work_processor_count[move.to_step]); + step_max_work(move.to_step), step_second_max_work(move.to_step), step_max_work_processor_count[move.to_step]); } inline void initialize(const SetSchedule &sched, const BspInstance &inst, unsigned num_steps) { @@ -140,20 +138,20 @@ struct kl_active_schedule_work_datastructures { unsigned pos = 0; const work_weight_t max_work_to = step_processor_work_[step][0].work; - for (const auto & wp : step_processor_work_[step]) { + for (const auto &wp : step_processor_work_[step]) { step_processor_position[step][wp.proc] = pos++; if (wp.work == max_work_to && pos < instance->numberOfProcessors()) - step_max_work_processor_count[step] = pos; + step_max_work_processor_count[step] = pos; } } template - void apply_move(kl_move_struct move, work_weight_t work_weight) { + void apply_move(kl_move_struct move, work_weight_t work_weight) { - if (work_weight == 0) + if (work_weight == 0) return; - + if (move.to_step != move.from_step) { step_proc_work(move.to_step, move.to_proc) += work_weight; step_proc_work(move.from_step, move.from_proc) -= work_weight; @@ -171,7 +169,7 @@ struct kl_active_schedule_work_datastructures { // } // unsigned to_proc_pos = step_processor_position[move.to_step][move.to_proc]; - + // while (to_proc_pos > 0 && step_processor_work_[move.to_step][to_proc_pos - 1].work < new_weight_to) { // std::swap(step_processor_work_[move.to_step][to_proc_pos], step_processor_work_[move.to_step][to_proc_pos - 1]); // std::swap(step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos].proc], step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos - 1].proc]); @@ -189,15 +187,15 @@ struct kl_active_schedule_work_datastructures { // std::swap(step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos].proc], step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos + 1].proc]); // from_proc_pos++; // } - + // if (prev_max_work_from == prev_weight_from) { - // step_max_work_processor_count[move.from_step]--; - // if (step_max_work_processor_count[move.from_step] == 0) { - // step_max_work_processor_count[move.from_step] = from_proc_pos; + // step_max_work_processor_count[move.from_step]--; + // if (step_max_work_processor_count[move.from_step] == 0) { + // step_max_work_processor_count[move.from_step] = from_proc_pos; // } - // } + // } - } else { + } else { step_proc_work(move.to_step, move.to_proc) += work_weight; step_proc_work(move.from_step, move.from_proc) -= work_weight; arrange_superstep_data(move.to_step); @@ -209,21 +207,21 @@ struct kl_active_schedule_work_datastructures { std::swap(step_processor_position[step1], step_processor_position[step2]); std::swap(step_max_work_processor_count[step1], step_max_work_processor_count[step2]); } - + void override_next_superstep(unsigned step) { const unsigned next_step = step + 1; for (unsigned i = 0; i < instance->numberOfProcessors(); i++) { - step_processor_work_[next_step][i] = step_processor_work_[step][i]; - step_processor_position[next_step][i] = step_processor_position[step][i]; + step_processor_work_[next_step][i] = step_processor_work_[step][i]; + step_processor_position[next_step][i] = step_processor_position[step][i]; } step_max_work_processor_count[next_step] = step_max_work_processor_count[step]; } void reset_superstep(unsigned step) { for (unsigned i = 0; i < instance->numberOfProcessors(); i++) { - step_processor_work_[step][i] = {0,i}; - step_processor_position[step][i] = i; + step_processor_work_[step][i] = {0, i}; + step_processor_position[step][i] = i; } step_max_work_processor_count[step] = instance->numberOfProcessors() - 1; } @@ -249,12 +247,12 @@ struct kl_active_schedule_work_datastructures { step_max_work_processor_count[step] = 1; } else if (step_processor_work_[step][proc].work == max_work && step_max_work_processor_count[step] < (instance->numberOfProcessors() - 1)) { step_max_work_processor_count[step]++; - } + } } std::sort(step_processor_work_[step].begin(), step_processor_work_[step].end()); unsigned pos = 0; - for (const auto & wp : step_processor_work_[step]) { + for (const auto &wp : step_processor_work_[step]) { step_processor_position[step][wp.proc] = pos++; } } @@ -287,15 +285,15 @@ struct thread_local_active_schedule_data { cost = cost_; best_cost = cost_; feasible = true; - } - + } + inline void update_cost(cost_t change_in_cost) { - cost += change_in_cost; + cost += change_in_cost; if (cost <= best_cost && feasible) { best_cost = cost; best_schedule_idx = static_cast(applied_moves.size()); - } + } } }; @@ -319,23 +317,23 @@ class kl_active_schedule { public: virtual ~kl_active_schedule() = default; - inline const BspInstance & getInstance() const { return *instance; } - inline const VectorSchedule & getVectorSchedule() const { return vector_schedule; } - inline VectorSchedule & getVectorSchedule() { return vector_schedule; } - inline const SetSchedule & getSetSchedule() const { return set_schedule; } + inline const BspInstance &getInstance() const { return *instance; } + inline const VectorSchedule &getVectorSchedule() const { return vector_schedule; } + inline VectorSchedule &getVectorSchedule() { return vector_schedule; } + inline const SetSchedule &getSetSchedule() const { return set_schedule; } inline cost_t get_cost() { return cost; } inline bool is_feasible() { return feasible; } inline unsigned num_steps() const { return vector_schedule.numberOfSupersteps(); } inline unsigned assigned_processor(VertexType node) const { return vector_schedule.assignedProcessor(node); } inline unsigned assigned_superstep(VertexType node) const { return vector_schedule.assignedSuperstep(node); } - inline v_workw_t get_step_max_work(unsigned step) const {return work_datastructures.step_max_work(step); } - inline v_workw_t get_step_second_max_work(unsigned step) const {return work_datastructures.step_second_max_work(step); } - inline std::vector & get_step_max_work_processor_count() {return work_datastructures.step_max_work_processor_count; } - inline v_workw_t get_step_processor_work(unsigned step, unsigned proc) const {return work_datastructures.step_proc_work(step, proc); } + inline v_workw_t get_step_max_work(unsigned step) const { return work_datastructures.step_max_work(step); } + inline v_workw_t get_step_second_max_work(unsigned step) const { return work_datastructures.step_second_max_work(step); } + inline std::vector &get_step_max_work_processor_count() { return work_datastructures.step_max_work_processor_count; } + inline v_workw_t get_step_processor_work(unsigned step, unsigned proc) const { return work_datastructures.step_proc_work(step, proc); } inline pre_move_work_data> get_pre_move_work_data(kl_move move) { return work_datastructures.get_pre_move_work_data(move); } inline v_workw_t get_max_work_weight() { return work_datastructures.max_work_weight; } inline v_workw_t get_total_work_weight() { return work_datastructures.total_work_weight; } - inline void set_cost(cost_t cost_) { cost = cost_; } + inline void set_cost(cost_t cost_) { cost = cost_; } constexpr static bool use_memory_constraint = is_local_search_memory_constraint_v; @@ -343,11 +341,11 @@ class kl_active_schedule { kl_active_schedule_work_datastructures work_datastructures; - inline v_workw_t get_step_total_work(unsigned step) const { - v_workw_t total_work = 0; + inline v_workw_t get_step_total_work(unsigned step) const { + v_workw_t total_work = 0; for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { total_work += get_step_processor_work(step, proc); - } + } return total_work; } @@ -357,18 +355,18 @@ class kl_active_schedule { set_schedule.step_processor_vertices[move.from_step][move.from_proc].erase(move.node); set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node); - + update_violations(move.node, thread_data); thread_data.applied_moves.push_back(move); work_datastructures.apply_move(move, instance->getComputationalDag().vertex_work_weight(move.node)); if constexpr (use_memory_constraint) { memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step); - } + } } template - void revert_to_best_schedule(unsigned start_move, unsigned insert_step, comm_datastructures_t & comm_datastructures, thread_data_t & thread_data, unsigned start_step, unsigned & end_step) { + void revert_to_best_schedule(unsigned start_move, unsigned insert_step, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned &end_step) { const unsigned bound = std::max(start_move, thread_data.best_schedule_idx); revert_moves(bound, comm_datastructures, thread_data, start_step, end_step); @@ -391,7 +389,7 @@ class kl_active_schedule { } template - void revert_schedule_to_bound(const size_t bound, const cost_t new_cost, const bool is_feasible, comm_datastructures_t & comm_datastructures, thread_data_t & thread_data, unsigned start_step, unsigned end_step) { + void revert_schedule_to_bound(const size_t bound, const cost_t new_cost, const bool is_feasible, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned end_step) { revert_moves(bound, comm_datastructures, thread_data, start_step, end_step); thread_data.current_violations.clear(); @@ -399,10 +397,9 @@ class kl_active_schedule { thread_data.cost = new_cost; } - - void compute_violations(thread_data_t & thread_data); + void compute_violations(thread_data_t &thread_data); void compute_work_memory_datastructures(unsigned start_step, unsigned end_step); - void write_schedule (BspSchedule &schedule); + void write_schedule(BspSchedule &schedule); inline void initialize(const IBspSchedule &schedule); inline void clear(); void remove_empty_step(unsigned step); @@ -412,15 +409,14 @@ class kl_active_schedule { void swap_steps(const unsigned step1, const unsigned step2); private: - template - void revert_moves(const size_t bound, comm_datastructures_t & comm_datastructures, thread_data_t & thread_data, unsigned start_step, unsigned end_step) { + void revert_moves(const size_t bound, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned end_step) { while (thread_data.applied_moves.size() > bound) { const auto move = thread_data.applied_moves.back().reverse_move(); thread_data.applied_moves.pop_back(); vector_schedule.setAssignedProcessor(move.node, move.to_proc); - vector_schedule.setAssignedSuperstep(move.node, move.to_step); + vector_schedule.setAssignedSuperstep(move.node, move.to_step); set_schedule.step_processor_vertices[move.from_step][move.from_proc].erase(move.node); set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node); @@ -443,16 +439,16 @@ class kl_active_schedule { const auto &child = target(edge, instance->getComputationalDag()); if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) { - if ((node_step > vector_schedule.assignedSuperstep(child)) || + if ((node_step > vector_schedule.assignedSuperstep(child)) || (node_step == vector_schedule.assignedSuperstep(child) && node_proc != vector_schedule.assignedProcessor(child))) { - thread_data.current_violations.insert(edge); - thread_data.new_violations[child] = edge; + thread_data.current_violations.insert(edge); + thread_data.new_violations[child] = edge; } } else { - if ((node_step < vector_schedule.assignedSuperstep(child)) || + if ((node_step < vector_schedule.assignedSuperstep(child)) || (node_step == vector_schedule.assignedSuperstep(child) && node_proc == vector_schedule.assignedProcessor(child))) { - thread_data.current_violations.erase(edge); - thread_data.resolved_violations.insert(edge); + thread_data.current_violations.erase(edge); + thread_data.resolved_violations.insert(edge); } } } @@ -460,17 +456,17 @@ class kl_active_schedule { for (const auto &edge : in_edges(node, instance->getComputationalDag())) { const auto &parent = source(edge, instance->getComputationalDag()); - if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) { - if ((node_step < vector_schedule.assignedSuperstep(parent)) || + if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) { + if ((node_step < vector_schedule.assignedSuperstep(parent)) || (node_step == vector_schedule.assignedSuperstep(parent) && node_proc != vector_schedule.assignedProcessor(parent))) { - thread_data.current_violations.insert(edge); - thread_data.new_violations[parent] = edge; + thread_data.current_violations.insert(edge); + thread_data.new_violations[parent] = edge; } } else { - if ((node_step > vector_schedule.assignedSuperstep(parent)) || + if ((node_step > vector_schedule.assignedSuperstep(parent)) || (node_step == vector_schedule.assignedSuperstep(parent) && node_proc == vector_schedule.assignedProcessor(parent))) { - thread_data.current_violations.erase(edge); - thread_data.resolved_violations.insert(edge); + thread_data.current_violations.erase(edge); + thread_data.resolved_violations.insert(edge); } } } @@ -501,7 +497,6 @@ class kl_active_schedule { thread_data.feasible = true; } } - }; template @@ -515,7 +510,7 @@ void kl_active_schedule::clear() { } template -void kl_active_schedule::compute_violations(thread_data_t & thread_data) { +void kl_active_schedule::compute_violations(thread_data_t &thread_data) { thread_data.current_violations.clear(); thread_data.feasible = true; @@ -529,12 +524,12 @@ void kl_active_schedule::compute_violations const unsigned target_proc = assigned_processor(target_v); const unsigned source_step = assigned_superstep(source_v); const unsigned target_step = assigned_superstep(target_v); - + if (source_step > target_step || (source_step == target_step && source_proc != target_proc)) { thread_data.current_violations.insert(edge); thread_data.feasible = false; - } - } + } + } } template @@ -563,7 +558,7 @@ void kl_active_schedule::compute_work_memor } template -void kl_active_schedule::write_schedule (BspSchedule &schedule) { +void kl_active_schedule::write_schedule(BspSchedule &schedule) { for (const auto v : instance->vertices()) { schedule.setAssignedProcessor(v, vector_schedule.assignedProcessor(v)); schedule.setAssignedSuperstep(v, vector_schedule.assignedSuperstep(v)); @@ -572,91 +567,92 @@ void kl_active_schedule::write_schedule (Bs } template -void kl_active_schedule::remove_empty_step(unsigned step) { +void kl_active_schedule::remove_empty_step(unsigned step) { for (unsigned i = step; i < num_steps() - 1; i++) { - for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]){ + for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { + for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]) { vector_schedule.setAssignedSuperstep(node, i); } } std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i + 1]); - work_datastructures.swap_steps(i, i+1); + work_datastructures.swap_steps(i, i + 1); if constexpr (use_memory_constraint) { - memory_constraint.swap_steps(i, i+1); + memory_constraint.swap_steps(i, i + 1); } } vector_schedule.number_of_supersteps--; } template -void kl_active_schedule::swap_empty_step_fwd(const unsigned step, const unsigned to_step) { +void kl_active_schedule::swap_empty_step_fwd(const unsigned step, const unsigned to_step) { for (unsigned i = step; i < to_step; i++) { - for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]){ + for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { + for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]) { vector_schedule.setAssignedSuperstep(node, i); } } std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i + 1]); work_datastructures.swap_steps(i, i + 1); if constexpr (use_memory_constraint) { - memory_constraint.swap_steps(i, i+1); + memory_constraint.swap_steps(i, i + 1); } } } template void kl_active_schedule::insert_empty_step(unsigned step) { - unsigned i = vector_schedule.number_of_supersteps++; - + unsigned i = vector_schedule.number_of_supersteps++; + for (; i > step; i--) { - for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (const auto node : set_schedule.step_processor_vertices[i-1][proc]){ + for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { + for (const auto node : set_schedule.step_processor_vertices[i - 1][proc]) { vector_schedule.setAssignedSuperstep(node, i); } } std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i - 1]); - work_datastructures.swap_steps(i-1, i); + work_datastructures.swap_steps(i - 1, i); if constexpr (use_memory_constraint) { - memory_constraint.swap_steps(i - 1, i); + memory_constraint.swap_steps(i - 1, i); } - } + } } template void kl_active_schedule::swap_empty_step_bwd(const unsigned to_step, const unsigned empty_step) { - unsigned i = to_step; - + unsigned i = to_step; + for (; i > empty_step; i--) { - for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (const auto node : set_schedule.step_processor_vertices[i-1][proc]){ + for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { + for (const auto node : set_schedule.step_processor_vertices[i - 1][proc]) { vector_schedule.setAssignedSuperstep(node, i); } } std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i - 1]); - work_datastructures.swap_steps(i-1, i); + work_datastructures.swap_steps(i - 1, i); if constexpr (use_memory_constraint) { - memory_constraint.swap_steps(i - 1, i); + memory_constraint.swap_steps(i - 1, i); } - } + } } template void kl_active_schedule::swap_steps(const unsigned step1, const unsigned step2) { - if (step1 == step2) return; + if (step1 == step2) + return; - for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (const auto node : set_schedule.step_processor_vertices[step1][proc]){ + for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { + for (const auto node : set_schedule.step_processor_vertices[step1][proc]) { vector_schedule.setAssignedSuperstep(node, step2); } - for (const auto node : set_schedule.step_processor_vertices[step2][proc]){ + for (const auto node : set_schedule.step_processor_vertices[step2][proc]) { vector_schedule.setAssignedSuperstep(node, step1); } } std::swap(set_schedule.step_processor_vertices[step1], set_schedule.step_processor_vertices[step2]); - work_datastructures.swap_steps(step1, step2); + work_datastructures.swap_steps(step1, step2); if constexpr (use_memory_constraint) { memory_constraint.swap_steps(step1, step2); - } + } } } // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp index 6961ef92..2cee3d0f 100644 --- a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp @@ -19,8 +19,8 @@ limitations under the License. #pragma once #include "osp/bsp/model/BspSchedule.hpp" -#include "osp/bsp/model/SetSchedule.hpp" -#include "osp/bsp/model/VectorSchedule.hpp" +#include "osp/bsp/model/util/SetSchedule.hpp" +#include "osp/bsp/model/util/VectorSchedule.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" namespace osp { @@ -42,7 +42,7 @@ struct is_local_search_memory_constraint< std::declval(), std::declval(), std::declval(), std::declval())), decltype(std::declval().compute_memory_datastructure(std::declval(), - std::declval())), + std::declval())), decltype(std::declval().swap_steps(std::declval(), std::declval())), decltype(std::declval().reset_superstep(std::declval())), decltype(std::declval().override_superstep(std::declval(), std::declval(), @@ -105,7 +105,7 @@ struct ls_local_memory_constraint { void swap_steps(const unsigned step1, const unsigned step2) { std::swap(step_processor_memory[step1], step_processor_memory[step2]); - } + } void compute_memory_datastructure(unsigned start_step, unsigned end_step) { @@ -150,7 +150,7 @@ struct ls_local_memory_constraint { } } return true; - } + } }; template @@ -378,7 +378,7 @@ struct ls_local_sources_inc_edges_memory_constraint { inline void swap_steps(const unsigned step1, const unsigned step2) { std::swap(step_processor_memory[step1], step_processor_memory[step2]); std::swap(step_processor_pred[step1], step_processor_pred[step2]); - } + } inline void initialize(const SetSchedule &set_schedule_, const VectorSchedule &vec_schedule_) { @@ -587,7 +587,6 @@ struct ls_local_sources_inc_edges_memory_constraint { } return true; - } }; diff --git a/include/osp/bsp/scheduler/Scheduler.hpp b/include/osp/bsp/scheduler/Scheduler.hpp index a57e2e84..fa458ba9 100644 --- a/include/osp/bsp/scheduler/Scheduler.hpp +++ b/include/osp/bsp/scheduler/Scheduler.hpp @@ -18,6 +18,7 @@ limitations under the License. #pragma once +#include "osp/auxiliary/return_status.hpp" #include "osp/bsp/model/BspInstance.hpp" #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/model/BspScheduleCS.hpp" diff --git a/include/osp/coarser/BspScheduleCoarser.hpp b/include/osp/coarser/BspScheduleCoarser.hpp index 64684b7a..ea4cf9f9 100644 --- a/include/osp/coarser/BspScheduleCoarser.hpp +++ b/include/osp/coarser/BspScheduleCoarser.hpp @@ -18,10 +18,10 @@ limitations under the License. #pragma once -#include "osp/coarser/Coarser.hpp" #include "osp/bsp/model/BspSchedule.hpp" -#include "osp/bsp/model/SetSchedule.hpp" +#include "osp/bsp/model/util/SetSchedule.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" +#include "osp/coarser/Coarser.hpp" #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" namespace osp { @@ -63,7 +63,6 @@ class BspScheduleCoarser : public CoarserGenContractionMapgetInstance().getComputationalDag()); assert(schedule->satisfiesPrecedenceConstraints()); - SetSchedule set_schedule(*schedule); std::vector reverse_vertex_map(dag_in.num_vertices(), 0); std::vector> vertex_map; diff --git a/include/osp/coarser/MultilevelCoarser.hpp b/include/osp/coarser/MultilevelCoarser.hpp index bbd090e4..f8a1434e 100644 --- a/include/osp/coarser/MultilevelCoarser.hpp +++ b/include/osp/coarser/MultilevelCoarser.hpp @@ -23,11 +23,11 @@ limitations under the License. #include #include -#include "osp/coarser/Coarser.hpp" +#include "osp/auxiliary/return_status.hpp" #include "osp/bsp/model/BspInstance.hpp" +#include "osp/coarser/Coarser.hpp" #include "osp/coarser/coarser_util.hpp" - namespace osp { template @@ -36,10 +36,12 @@ class MultilevelCoarseAndSchedule; template class MultilevelCoarser : public Coarser { friend class MultilevelCoarseAndSchedule; + private: const Graph_t *original_graph; + protected: - inline const Graph_t * getOriginalGraph() const { return original_graph; }; + inline const Graph_t *getOriginalGraph() const { return original_graph; }; std::vector> dag_history; std::vector>>> contraction_maps; @@ -49,7 +51,7 @@ class MultilevelCoarser : public Coarser { RETURN_STATUS add_contraction(const std::vector> &contraction_map, const Graph_t_coarse &contracted_graph); RETURN_STATUS add_contraction(std::vector> &&contraction_map, Graph_t_coarse &&contracted_graph); void add_identity_contraction(); - + std::vector> getCombinedContractionMap() const; virtual RETURN_STATUS run_contractions() = 0; @@ -62,19 +64,15 @@ class MultilevelCoarser : public Coarser { MultilevelCoarser(const Graph_t &graph) : original_graph(&graph) {}; virtual ~MultilevelCoarser() = default; - bool coarsenDag(const Graph_t &dag_in, Graph_t_coarse &coarsened_dag, - std::vector> &vertex_contraction_map) override; + std::vector> &vertex_contraction_map) override; - RETURN_STATUS run(const Graph_t &graph); RETURN_STATUS run(const BspInstance &inst); virtual std::string getCoarserName() const override = 0; }; - - template RETURN_STATUS MultilevelCoarser::run(const Graph_t &graph) { clear_computation_data(); @@ -91,7 +89,7 @@ RETURN_STATUS MultilevelCoarser::run(const Graph_t &gra } template -RETURN_STATUS MultilevelCoarser::run(const BspInstance< Graph_t > &inst) { +RETURN_STATUS MultilevelCoarser::run(const BspInstance &inst) { return run(inst.getComputationalDag()); } @@ -99,15 +97,15 @@ template void MultilevelCoarser::clear_computation_data() { dag_history.clear(); dag_history.shrink_to_fit(); - + contraction_maps.clear(); contraction_maps.shrink_to_fit(); } - template void MultilevelCoarser::compactify_dag_history() { - if (dag_history.size() < 3) return; + if (dag_history.size() < 3) + return; size_t dag_indx_first = dag_history.size() - 2; size_t map_indx_first = contraction_maps.size() - 2; @@ -115,13 +113,13 @@ void MultilevelCoarser::compactify_dag_history() { size_t dag_indx_second = dag_history.size() - 1; size_t map_indx_second = contraction_maps.size() - 1; - if ( (static_cast( dag_history[dag_indx_first-1]->num_vertices() ) / static_cast( dag_history[dag_indx_second-1]->num_vertices() )) > 1.25 ) return; - + if ((static_cast(dag_history[dag_indx_first - 1]->num_vertices()) / static_cast(dag_history[dag_indx_second - 1]->num_vertices())) > 1.25) + return; // Compute combined contraction_map - std::unique_ptr>> combi_contraction_map = std::make_unique>>( contraction_maps[map_indx_first]->size() ); + std::unique_ptr>> combi_contraction_map = std::make_unique>>(contraction_maps[map_indx_first]->size()); for (std::size_t vert = 0; vert < contraction_maps[map_indx_first]->size(); ++vert) { - combi_contraction_map->at(vert) = contraction_maps[map_indx_second]->at( contraction_maps[map_indx_first]->at( vert ) ); + combi_contraction_map->at(vert) = contraction_maps[map_indx_second]->at(contraction_maps[map_indx_first]->at(vert)); } // Delete ComputationalDag @@ -138,7 +136,6 @@ void MultilevelCoarser::compactify_dag_history() { contraction_maps[map_indx_first] = std::move(combi_contraction_map); } - template RETURN_STATUS MultilevelCoarser::add_contraction(const std::vector> &contraction_map) { std::unique_ptr new_graph = std::make_unique(); @@ -148,12 +145,12 @@ RETURN_STATUS MultilevelCoarser::add_contraction(const bool success = false; if (dag_history.size() == 0) { - success = coarser_util::construct_coarse_dag(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()) ); + success = coarser_util::construct_coarse_dag(*(getOriginalGraph()), *new_graph, *(contraction_maps.back())); } else { - success = coarser_util::construct_coarse_dag(*(dag_history.back()), *new_graph, *(contraction_maps.back()) ); + success = coarser_util::construct_coarse_dag(*(dag_history.back()), *new_graph, *(contraction_maps.back())); } - dag_history.emplace_back( std::move(new_graph) ); + dag_history.emplace_back(std::move(new_graph)); if (success) { compactify_dag_history(); @@ -166,19 +163,19 @@ RETURN_STATUS MultilevelCoarser::add_contraction(const template RETURN_STATUS MultilevelCoarser::add_contraction(std::vector> &&contraction_map) { std::unique_ptr new_graph = std::make_unique(); - + std::unique_ptr>> contr_map_ptr(new std::vector>(std::move(contraction_map))); contraction_maps.emplace_back(std::move(contr_map_ptr)); bool success = false; if (dag_history.size() == 0) { - success = coarser_util::construct_coarse_dag(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()) ); + success = coarser_util::construct_coarse_dag(*(getOriginalGraph()), *new_graph, *(contraction_maps.back())); } else { - success = coarser_util::construct_coarse_dag(*(dag_history.back()), *new_graph, *(contraction_maps.back()) ); + success = coarser_util::construct_coarse_dag(*(dag_history.back()), *new_graph, *(contraction_maps.back())); } - dag_history.emplace_back( std::move(new_graph) ); + dag_history.emplace_back(std::move(new_graph)); if (success) { compactify_dag_history(); @@ -188,12 +185,11 @@ RETURN_STATUS MultilevelCoarser::add_contraction(std::v } } - template RETURN_STATUS MultilevelCoarser::add_contraction(const std::vector> &contraction_map, const Graph_t_coarse &contracted_graph) { std::unique_ptr graph_ptr(new Graph_t_coarse(contracted_graph)); dag_history.emplace_back(std::move(graph_ptr)); - + std::unique_ptr>> contr_map_ptr(new std::vector>(contraction_map)); contraction_maps.emplace_back(std::move(contr_map_ptr)); @@ -213,7 +209,6 @@ RETURN_STATUS MultilevelCoarser::add_contraction(std::v return RETURN_STATUS::OSP_SUCCESS; } - template std::vector> MultilevelCoarser::getCombinedContractionMap() const { std::vector> combinedContractionMap(original_graph->num_vertices()); @@ -221,23 +216,22 @@ std::vector> MultilevelCoarserat( combinedContractionMap[i] ); + combinedContractionMap[i] = contraction_maps[j]->at(combinedContractionMap[i]); } } return combinedContractionMap; } - - template bool MultilevelCoarser::coarsenDag(const Graph_t &dag_in, Graph_t_coarse &coarsened_dag, - std::vector> &vertex_contraction_map) { + std::vector> &vertex_contraction_map) { clear_computation_data(); RETURN_STATUS status = run(dag_in); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) return false; + if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) + return false; assert(dag_history.size() != 0); coarsened_dag = *(dag_history.back()); @@ -251,20 +245,16 @@ template void MultilevelCoarser::add_identity_contraction() { std::size_t n_vert; if (dag_history.size() == 0) { - n_vert = static_cast( original_graph->num_vertices() ); + n_vert = static_cast(original_graph->num_vertices()); } else { - n_vert = static_cast( dag_history.back()->num_vertices() ); + n_vert = static_cast(dag_history.back()->num_vertices()); } - - std::vector> contraction_map( n_vert ); + + std::vector> contraction_map(n_vert); std::iota(contraction_map.begin(), contraction_map.end(), 0); add_contraction(std::move(contraction_map)); compactify_dag_history(); } - - - - } // end namespace osp \ No newline at end of file diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp index d1d61016..8d6355ad 100644 --- a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp +++ b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp @@ -435,15 +435,6 @@ class IsomorphicSubgraphScheduler { } std::cout << std::endl; std::cout << " Sync cost: " << sub_arch.synchronisationCosts() << ", Comm cost: " << sub_arch.communicationCosts() << std::endl; - std::cout << " Sub-problem compatibility matrix:" << std::endl; - const auto &sub_comp_matrix = representative_instance.getNodeNodeCompatabilityMatrix(); - for (unsigned i = 0; i < sub_comp_matrix.size(); ++i) { - std::cout << " Node Type " << i << ": [ "; - for (unsigned j = 0; j < sub_comp_matrix[i].size(); ++j) { - std::cout << (sub_comp_matrix[i][j] ? "1" : "0") << " "; - } - std::cout << "]" << std::endl; - } } scheduler_for_group_ptr->computeSchedule(bsp_schedule); diff --git a/include/osp/partitioning/partitioners/partitioning_ILP.hpp b/include/osp/partitioning/partitioners/partitioning_ILP.hpp index 0482d936..2e6c4e0e 100644 --- a/include/osp/partitioning/partitioners/partitioning_ILP.hpp +++ b/include/osp/partitioning/partitioners/partitioning_ILP.hpp @@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once @@ -21,40 +21,39 @@ limitations under the License. #include #include -#include "osp/partitioning/partitioners/partitioning_ILP_base.hpp" +#include "osp/auxiliary/return_status.hpp" #include "osp/partitioning/model/partitioning.hpp" +#include "osp/partitioning/partitioners/partitioning_ILP_base.hpp" -namespace osp{ +namespace osp { template class HypergraphPartitioningILP : public HypergraphPartitioningILPBase { protected: - std::vector readCoptAssignment(const PartitioningProblem &instance, Model& model); + std::vector readCoptAssignment(const PartitioningProblem &instance, Model &model); - void setupExtraVariablesConstraints(const PartitioningProblem &instance, Model& model); + void setupExtraVariablesConstraints(const PartitioningProblem &instance, Model &model); - void setInitialSolution(const Partitioning &partition, Model& model); + void setInitialSolution(const Partitioning &partition, Model &model); public: - virtual ~HypergraphPartitioningILP() override = default; - RETURN_STATUS computePartitioning(Partitioning& result); + RETURN_STATUS computePartitioning(Partitioning &result); virtual std::string getAlgorithmName() const override { return "HypergraphPartitioningILP"; } }; template -RETURN_STATUS HypergraphPartitioningILP::computePartitioning(Partitioning& result) -{ +RETURN_STATUS HypergraphPartitioningILP::computePartitioning(Partitioning &result) { Envr env; Model model = env.CreateModel("HypergraphPart"); this->setupFundamentalVariablesConstraintsObjective(result.getInstance(), model); setupExtraVariablesConstraints(result.getInstance(), model); - if(this->use_initial_solution) + if (this->use_initial_solution) setInitialSolution(result, model); this->solveILP(model); @@ -82,7 +81,7 @@ RETURN_STATUS HypergraphPartitioningILP::computePartitioning(Parti } template -void HypergraphPartitioningILP::setupExtraVariablesConstraints(const PartitioningProblem &instance, Model& model) { +void HypergraphPartitioningILP::setupExtraVariablesConstraints(const PartitioningProblem &instance, Model &model) { using index_type = typename hypergraph_t::vertex_idx; @@ -104,19 +103,17 @@ void HypergraphPartitioningILP::setupExtraVariablesConstraints(con // hyperedge indicators match node variables for (unsigned part = 0; part < numberOfParts; part++) for (index_type node = 0; node < numberOfVertices; node++) - for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) + for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part)] >= this->node_in_partition[node][static_cast(part)]); - } // convert generic one-to-many assingment (of base class function) to one-to-one template -std::vector HypergraphPartitioningILP::readCoptAssignment(const PartitioningProblem &instance, Model& model) -{ +std::vector HypergraphPartitioningILP::readCoptAssignment(const PartitioningProblem &instance, Model &model) { using index_type = typename hypergraph_t::vertex_idx; std::vector node_to_partition(instance.getHypergraph().num_vertices(), std::numeric_limits::max()); - std::vector > assignmentsGenericForm = this->readAllCoptAssignments(instance, model); + std::vector> assignmentsGenericForm = this->readAllCoptAssignments(instance, model); for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) node_to_partition[node] = assignmentsGenericForm[node].front(); @@ -125,21 +122,19 @@ std::vector HypergraphPartitioningILP::readCoptAssignmen } template -void HypergraphPartitioningILP::setInitialSolution(const Partitioning &partition, Model& model) -{ +void HypergraphPartitioningILP::setInitialSolution(const Partitioning &partition, Model &model) { using index_type = typename hypergraph_t::vertex_idx; - const std::vector& assignment = partition.assignedPartitions(); - const unsigned& numPartitions = partition.getInstance().getNumberOfPartitions(); - if(assignment.size() != partition.getInstance().getHypergraph().num_vertices()) + const std::vector &assignment = partition.assignedPartitions(); + const unsigned &numPartitions = partition.getInstance().getNumberOfPartitions(); + if (assignment.size() != partition.getInstance().getHypergraph().num_vertices()) return; - for(index_type node = 0; node < assignment.size(); ++node) - { - if(assignment[node] >= numPartitions) + for (index_type node = 0; node < assignment.size(); ++node) { + if (assignment[node] >= numPartitions) continue; - - for(unsigned part = 0; part < numPartitions; ++part) + + for (unsigned part = 0; part < numPartitions; ++part) model.SetMipStart(this->node_in_partition[node][static_cast(part)], static_cast(assignment[node] == part)); } model.LoadMipStart(); From 0dab7f21e52ff99fd829cc9eca3566886070e2e7 Mon Sep 17 00:00:00 2001 From: tonibohnlein Date: Tue, 9 Dec 2025 09:10:34 +0100 Subject: [PATCH 8/9] documentation docu updates --- .../AbstractTestSuiteRunner.hpp | 2 +- .../StringToScheduler/run_bsp_scheduler.hpp | 6 +- include/osp/bsp/model/BspArchitecture.hpp | 38 ++--- include/osp/bsp/model/BspInstance.hpp | 158 ++++++++++-------- .../osp/bsp/scheduler/CoarseAndSchedule.hpp | 8 +- .../IsomorphicSubgraphScheduler.hpp | 4 +- .../TrimmedGroupScheduler.hpp | 2 +- tests/bsp_instance.cpp | 2 +- tests/coarser.cpp | 97 +++++------ tests/trimmed_group_scheduler.cpp | 16 +- 10 files changed, 163 insertions(+), 170 deletions(-) diff --git a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp index 80282f58..f023f937 100644 --- a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp +++ b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp @@ -251,7 +251,7 @@ class AbstractTestSuiteRunner { log_stream << "Start Graph: " + filename_graph + "\n"; BspInstance bsp_instance; - bsp_instance.setArchitecture(arch); + bsp_instance.getArchitecture() = arch; bool graph_status = false; std::string ext; if (filename_graph.rfind('.') != std::string::npos) diff --git a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp index 97e7e473..08209efd 100644 --- a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp +++ b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp @@ -57,8 +57,8 @@ limitations under the License. namespace osp { const std::set get_available_bsp_scheduler_names() { - return {"Serial", "GreedyBsp", "GrowLocal", "BspLocking", "Cilk", "Etf", "GreedyRandom", - "GreedyChildren", "Variance", "MultiHC", "LocalSearch", "Coarser", "FullILP", "MultiLevel"}; + return {"Serial", "GreedyBsp", "GrowLocal", "BspLocking", "Cilk", "Etf", "GreedyRandom", + "GreedyChildren", "Variance", "MultiHC", "LocalSearch", "Coarser", "FullILP", "MultiLevel"}; } template @@ -247,7 +247,7 @@ RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::propert if (!status) return RETURN_STATUS::ERROR; - instance_coarse.setArchitecture(instance.getArchitecture()); + instance_coarse.getArchitecture() = instance.getArchitecture(); instance_coarse.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); BspSchedule schedule_coarse(instance_coarse); diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp index 32f37d0f..5ef01b0e 100644 --- a/include/osp/bsp/model/BspArchitecture.hpp +++ b/include/osp/bsp/model/BspArchitecture.hpp @@ -90,11 +90,12 @@ inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) { * bounds. It provides methods to set and retrieve these values. * * **Processors:** - * The architecture consists of p processors, indexed from 0 to p-1. + * The architecture consists of p processors, indexed from 0 to p-1. Note that processor indices are represented using `unsigned`. * * **Processor Types:** * Processors can have different types, which are represented by non-negative integers. - * Processor types are assumed to be consecutive integers starting from 0. + * Processor types are assumed to be consecutive integers starting from 0. Note that processor types are represented using `unsigned`. + * Processor types are used to express compatabilities, which can be specified in the BspInstance, regarding node types. * * **Communication and Synchronization Costs:** * - Communication Cost (g): The cost of communicating a unit of data between processors, i.e., the bandwidth. @@ -185,35 +186,16 @@ class BspArchitecture { } public: - /** - * @brief Default constructor. - * Initializes a BSP architecture with 2 processors, 1 processor type, - * communication costs of 1, synchronisation costs of 2, memory bounds of 100, - * and send costs of 1 between all processors. - */ - BspArchitecture() - : numberOfProcessors_(2U), numberOfProcessorTypes_(1U), communicationCosts_(1U), synchronisationCosts_(2U), - memoryBound_(numberOfProcessors_, 100U), isNuma_(false), - processorTypes_(numberOfProcessors_, 0U), sendCosts_(numberOfProcessors_ * numberOfProcessors_, 1U) { - SetSendCostDiagonalToZero(); - } - - BspArchitecture(const BspArchitecture &other) = default; - BspArchitecture(BspArchitecture &&other) noexcept = default; - BspArchitecture &operator=(const BspArchitecture &other) = default; - BspArchitecture &operator=(BspArchitecture &&other) noexcept = default; - virtual ~BspArchitecture() = default; - /** * @brief Constructs a BspArchitecture object with the specified number of processors, communication cost, and * synchronization cost. * - * @param NumberOfProcessors The number of processors in the architecture. Must be greater than 0. - * @param CommunicationCost The communication cost between processors. - * @param SynchronisationCost The synchronization cost between processors. + * @param NumberOfProcessors The number of processors in the architecture. Must be greater than 0. Default: 2. + * @param CommunicationCost The communication cost between processors. Default: 1. + * @param SynchronisationCost The synchronization cost between processors. Default: 2. * @param MemoryBound The memory bound for each processor (default: 100). */ - BspArchitecture(const unsigned NumberOfProcessors, const v_commw_t CommunicationCost, const v_commw_t SynchronisationCost, + BspArchitecture(const unsigned NumberOfProcessors = 2U, const v_commw_t CommunicationCost = 1U, const v_commw_t SynchronisationCost = 2U, const v_memw_t MemoryBound = 100U) : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost), synchronisationCosts_(SynchronisationCost), @@ -225,6 +207,12 @@ class BspArchitecture { SetSendCostDiagonalToZero(); } + BspArchitecture(const BspArchitecture &other) = default; + BspArchitecture(BspArchitecture &&other) noexcept = default; + BspArchitecture &operator=(const BspArchitecture &other) = default; + BspArchitecture &operator=(BspArchitecture &&other) noexcept = default; + virtual ~BspArchitecture() = default; + /** * @brief Copy constructor from a BspArchitecture with a different graph type. * diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp index 914c6fdc..c5a973a7 100644 --- a/include/osp/bsp/model/BspInstance.hpp +++ b/include/osp/bsp/model/BspInstance.hpp @@ -29,15 +29,33 @@ namespace osp { /** * @class BspInstance - * @brief Represents an instance of the BSP (Bulk Synchronous Parallel) model. + * @brief Represents a scheduling problem instance for the Bulk Synchronous Parallel (BSP) model. * - * The BspInstance class encapsulates the computational DAG (Directed Acyclic Graph) and the BSP architecture - * for a specific instance of the BSP model. It provides methods to access and modify the architecture and DAG, - * as well as retrieve information about the instance such as the number of vertices and processors. + * The BspInstance class serves as a container for all the necessary information to define a + * BSP scheduling problem. It acts as the "ground" object that holds the actual implementation + * of the graph and architecture. * - * The instance specifies the compatibility between node types and processor types. + * It aggregates three main components: * - * @tparam Graph_t The type of the computational DAG. + * 1. **Computational DAG**: The directed acyclic graph representing the program to be executed. + * It defines the tasks (nodes), their dependencies (directed edges), and associated weights (work, memory, communication). + * + * 2. **BSP Architecture**: The hardware model description, including the number of processors, + * their types, memory bounds, and communication/synchronization costs. + * Note that processor indices are represented using `unsigned`. + * + * 3. **Node-Processor Compatibility**: A matrix defining which node types can be executed on which + * processor types. This enables the modeling of heterogeneous systems (e.g., CPU + GPU) where + * certain nodes are restricted to specific hardware accelerators. + * + * @warning Be careful when assigning an existing graph to a BspInstance. Depending on the + * constructor or assignment operator used, this may result in a deep copy of the graph structure, + * which can be expensive for large graphs. + * + * This class provides a unified interface to access and modify these components, facilitating + * the development of scheduling algorithms that need to query problem constraints and properties. + * + * @tparam Graph_t The type of the computational DAG, which must satisfy the `is_computational_dag` concept. */ template class BspInstance { @@ -45,24 +63,37 @@ class BspInstance { private: /** - * @brief The computational DAG of the instance. Holds the graph structure and the node types, work, memory, communication weights. + * @brief The computational DAG representing the program structure. + * + * It contains the graph topology (nodes and directed edges) as well as attributes such as node types, + * work weights, memory weights, and edge communication weights. */ Graph_t cdag; /** - * @brief The BSP architecture of the instance. Holds the processor types and the memory bounds. Communication and synchronization cost. And the send cost between processors. + * @brief The BSP architecture model. + * + * It defines the hardware characteristics including processor types, memory limits, + * communication bandwidth/latency (send costs), and global synchronization costs. */ BspArchitecture architecture; /** * @brief Stores the compatibility between node types and processor types. * - * The architecture defines a type for each processor, and the dag defines a type for each node. + * The architecture defines a type for each processor, and the DAG defines a type for each node. * This matrix stores for each node type and processor type whether they are compatible, i.e., - * if a node of the can be assigned to a processor of the given type in a schedule. + * if a node of that type can be assigned to a processor of the given type in a schedule. * @note The outer vector is indexed by node type, the inner vector is indexed by processor type. */ std::vector> nodeProcessorCompatibility = std::vector>({{true}}); + /** + * @brief The type of the vectex types in the computational DAG. + * If the DAG does not support vertex types, this is `unsigned`. + */ + using vertex_type_t_or_default = std::conditional_t, v_type_t, unsigned>; + using processor_type_t = unsigned; + public: /** * @brief Default constructor for the BspInstance class. @@ -107,19 +138,16 @@ class BspInstance { /** * @brief Returns a reference to the BSP architecture of the instance. + * Assigning the BSP architecture via the reference creates a copy of the architecture. + * The move operator may be used to transfer ownership of the architecture. */ [[nodiscard]] const BspArchitecture &getArchitecture() const { return architecture; } [[nodiscard]] BspArchitecture &getArchitecture() { return architecture; } - /** - * @brief Sets the BSP architecture for the instance. - * - * @param architecture_ The BSP architecture for the instance. - */ - void setArchitecture(const BspArchitecture &architechture_) { architecture = architechture_; } - /** * @brief Returns a reference to the computational DAG of the instance. + * Assigning the computational DAG via the reference creates a copy of the DAG. + * The move operator may be used to transfer ownership of the DAG. */ [[nodiscard]] const Graph_t &getComputationalDag() const { return cdag; } [[nodiscard]] Graph_t &getComputationalDag() { return cdag; } @@ -190,7 +218,6 @@ class BspInstance { /** * @brief Returns the memory bound for a specific processor. - * * @param proc The processor index. */ [[nodiscard]] v_memw_t memoryBound(const unsigned proc) const { return architecture.memoryBound(proc); } @@ -215,41 +242,11 @@ class BspInstance { */ void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); } - /** - * @brief Returns false if there is a node whose weight does not fit on any of its compatible processors. - * @return True if the memory constraints are feasible, false otherwise. - */ - [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const { - std::vector> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) { - max_memory_per_proc_type[architecture.processorType(proc)] = - std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc)); - } - - for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) { - v_memw_t max_memory_of_type = max_memory_weight(vertType, cdag); - bool fits = false; - - for (unsigned proc_type = 0; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) { - if (isCompatibleType(vertType, proc_type)) { - fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]); - if (fits) - break; - } - } - - if (!fits) - return false; - } - - return true; - } - /** * @brief Returns the processor type for a given processor index. Does not perform bounds checking. * @param proc The processor index. */ - [[nodiscard]] v_type_t processorType(const unsigned proc) const { return architecture.processorType(proc); } + [[nodiscard]] vertex_type_t_or_default processorType(const unsigned proc) const { return architecture.processorType(proc); } /** * @brief Checks if a node is compatible with a processor. Does not perform bounds checking. @@ -269,7 +266,7 @@ class BspInstance { * @param processorType The processor type. * @return True if the node type is compatible with the processor type, false otherwise. */ - [[nodiscard]] bool isCompatibleType(const v_type_t nodeType, const v_type_t processorType) const { + [[nodiscard]] bool isCompatibleType(const vertex_type_t_or_default nodeType, const processor_type_t processorType) const { return nodeProcessorCompatibility[nodeType][processorType]; } @@ -285,6 +282,13 @@ class BspInstance { nodeProcessorCompatibility = compatibility_; } + /** + * @brief Returns the node-processor compatibility matrix. + */ + [[nodiscard]] const std::vector> &getNodeProcessorCompatibilityMatrix() const { + return nodeProcessorCompatibility; + } + /** * @brief Returns the node type - processor type compatibility matrix. */ @@ -294,9 +298,9 @@ class BspInstance { * @brief Sets the compatibility matrix to be diagonal. This implies that node type `i` is only compatible with processor type `i`. * @param number_of_types The number of types. */ - void setDiagonalCompatibilityMatrix(const unsigned number_of_types) { + void setDiagonalCompatibilityMatrix(const vertex_type_t_or_default number_of_types) { nodeProcessorCompatibility.assign(number_of_types, std::vector(number_of_types, false)); - for (unsigned i = 0; i < number_of_types; ++i) + for (vertex_type_t_or_default i = 0; i < number_of_types; ++i) nodeProcessorCompatibility[i][i] = true; } @@ -307,30 +311,52 @@ class BspInstance { nodeProcessorCompatibility.assign(cdag.num_vertex_types(), std::vector(architecture.getNumberOfProcessorTypes(), true)); } + /** + * @brief Returns false if there is a node whose weight does not fit on any of its compatible processors. + * @return True if the memory constraints are feasible, false otherwise. + */ + [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const { + std::vector> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0); + for (unsigned proc = 0U; proc < architecture.numberOfProcessors(); proc++) { + max_memory_per_proc_type[architecture.processorType(proc)] = + std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc)); + } + + for (vertex_type_t_or_default vertType = 0U; vertType < cdag.num_vertex_types(); vertType++) { + v_memw_t max_memory_of_type = max_memory_weight(vertType, cdag); + bool fits = false; + + for (processor_type_t proc_type = 0U; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) { + if (isCompatibleType(vertType, proc_type)) { + fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]); + if (fits) + break; + } + } + + if (!fits) + return false; + } + + return true; + } + /** * @brief Returns a list of compatible processor types for each node type. - * * @return A vector where the index is the node type and the value is a vector of compatible processor types. */ - [[nodiscard]] std::vector> getProcTypesCompatibleWithNodeType() const { - unsigned numberOfNodeTypes = cdag.num_vertex_types(); - unsigned numberOfProcTypes = architecture.getNumberOfProcessorTypes(); - std::vector> compatibleProcTypes(numberOfNodeTypes); + [[nodiscard]] std::vector> getProcTypesCompatibleWithNodeType() const { + vertex_type_t_or_default numberOfNodeTypes = cdag.num_vertex_types(); + processor_type_t numberOfProcTypes = architecture.getNumberOfProcessorTypes(); + std::vector> compatibleProcTypes(numberOfNodeTypes); - for (unsigned nodeType = 0; nodeType < numberOfNodeTypes; ++nodeType) - for (unsigned processorType = 0; processorType < numberOfProcTypes; ++processorType) + for (vertex_type_t_or_default nodeType = 0U; nodeType < numberOfNodeTypes; ++nodeType) + for (processor_type_t processorType = 0U; processorType < numberOfProcTypes; ++processorType) if (isCompatibleType(nodeType, processorType)) compatibleProcTypes[nodeType].push_back(processorType); return compatibleProcTypes; } - - /** - * @brief Returns the node-processor compatibility matrix. - */ - [[nodiscard]] const std::vector> &getNodeProcessorCompatibilityMatrix() const { - return nodeProcessorCompatibility; - } }; } // namespace osp \ No newline at end of file diff --git a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp index 0e9df967..2e23c22e 100644 --- a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp +++ b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp @@ -42,17 +42,17 @@ class CoarseAndSchedule : public Scheduler { const auto &instance = schedule.getInstance(); BspInstance instance_coarse; - + std::vector> reverse_vertex_map; bool status = coarser.coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(), - reverse_vertex_map); + reverse_vertex_map); if (!status) { return RETURN_STATUS::ERROR; - } + } - instance_coarse.setArchitecture(instance.getArchitecture()); + instance_coarse.getArchitecture() = instance.getArchitecture(); instance_coarse.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); BspSchedule schedule_coarse(instance_coarse); diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp index 8d6355ad..83556089 100644 --- a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp +++ b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp @@ -302,7 +302,7 @@ class IsomorphicSubgraphScheduler { const std::vector &was_trimmed) { subgraph_scheduler_input result; - result.instance.setArchitecture(original_instance.getArchitecture()); + result.instance.getArchitecture() = original_instance.getArchitecture(); const unsigned num_proc_types = original_instance.getArchitecture().getNumberOfProcessorTypes(); result.multiplicities.resize(isomorphic_groups.size()); @@ -373,7 +373,7 @@ class IsomorphicSubgraphScheduler { BspInstance representative_instance; auto rep_global_to_local_map = create_induced_subgraph_map(instance.getComputationalDag(), representative_instance.getComputationalDag(), rep_subgraph_vertices_sorted); - representative_instance.setArchitecture(instance.getArchitecture()); + representative_instance.getArchitecture() = instance.getArchitecture(); const auto &procs_for_group = sub_sched.node_assigned_worker_per_type[group_idx]; std::vector> mem_weights(procs_for_group.size(), 0); for (unsigned proc_type = 0; proc_type < procs_for_group.size(); ++proc_type) { diff --git a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp index 88dcf1fa..97fa53a5 100644 --- a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp +++ b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp @@ -135,7 +135,7 @@ class TrimmedGroupScheduler : public Scheduler { std::sort(group_vertices.begin(), group_vertices.end()); BspInstance sub_instanc; - sub_instanc.setArchitecture(sub_arch); // Set the sub-architecture + sub_instanc.getArchitecture() = sub_arch; sub_instanc.setNodeProcessorCompatibility(instance.getNodeProcessorCompatibilityMatrix()); // Inherit compatibility auto global_to_local_map = create_induced_subgraph_map(dag, sub_instanc.getComputationalDag(), group_vertices); // Create induced subgraph diff --git a/tests/bsp_instance.cpp b/tests/bsp_instance.cpp index f45434de..101e4b2f 100644 --- a/tests/bsp_instance.cpp +++ b/tests/bsp_instance.cpp @@ -44,7 +44,7 @@ BOOST_AUTO_TEST_CASE(test_1) { BspArchitecture architecture_2(6, 3, 1); - instance.setArchitecture(architecture_2); + instance.getArchitecture() = architecture_2; BOOST_CHECK_EQUAL(instance.numberOfProcessors(), 6); BOOST_CHECK_EQUAL(instance.synchronisationCosts(), 1); diff --git a/tests/coarser.cpp b/tests/coarser.cpp index e4bd92c3..9c77703d 100644 --- a/tests/coarser.cpp +++ b/tests/coarser.cpp @@ -23,24 +23,24 @@ limitations under the License. #include #include +#include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/general_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/CoarseAndSchedule.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/coarser/BspScheduleCoarser.hpp" -#include "osp/coarser/coarser_util.hpp" -#include "osp/coarser/funnel/FunnelBfs.hpp" -#include "osp/coarser/hdagg/hdagg_coarser.hpp" #include "osp/coarser/Sarkar/Sarkar.hpp" #include "osp/coarser/Sarkar/SarkarMul.hpp" #include "osp/coarser/SquashA/SquashA.hpp" #include "osp/coarser/SquashA/SquashAMul.hpp" +#include "osp/coarser/coarser_util.hpp" +#include "osp/coarser/funnel/FunnelBfs.hpp" +#include "osp/coarser/hdagg/hdagg_coarser.hpp" #include "osp/coarser/top_order/top_order_coarser.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp" -#include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "osp/auxiliary/io/general_file_reader.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "test_graphs.hpp" using namespace osp; @@ -121,14 +121,15 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test) { std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl + << "Graph: " << name_graph << std::endl; using graph_t = computational_dag_edge_idx_vector_impl_def_t; BspInstance instance; bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + instance.getComputationalDag()); bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); @@ -140,7 +141,7 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test) { } BspInstance coarse_instance; - coarse_instance.setArchitecture(instance.getArchitecture()); + coarse_instance.getArchitecture() = instance.getArchitecture(); std::vector> vertex_map; std::vector reverse_vertex_map; @@ -193,7 +194,8 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) { std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl + << "Graph: " << name_graph << std::endl; using graph_t1 = computational_dag_edge_idx_vector_impl_def_t; using graph_t2 = computational_dag_vector_impl_def_t; @@ -201,7 +203,7 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) { BspInstance instance; bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + instance.getComputationalDag()); bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); @@ -214,7 +216,7 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) { BspInstance coarse_instance; BspArchitecture architecture_t2(instance.getArchitecture()); - coarse_instance.setArchitecture(architecture_t2); + coarse_instance.getArchitecture() = architecture_t2; std::vector> vertex_map; std::vector reverse_vertex_map; @@ -265,14 +267,15 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) { std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl + << "Graph: " << name_graph << std::endl; using graph_t = computational_dag_edge_idx_vector_impl_def_t; BspInstance instance; bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + instance.getComputationalDag()); bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); @@ -284,7 +287,7 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) { } BspInstance coarse_instance; - coarse_instance.setArchitecture(instance.getArchitecture()); + coarse_instance.getArchitecture() = instance.getArchitecture(); std::vector> vertex_map; std::vector reverse_vertex_map; @@ -345,12 +348,13 @@ void test_coarser_same_graph(Coarser &coarser) { std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl + << "Graph: " << name_graph << std::endl; BspInstance instance; bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + instance.getComputationalDag()); bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); @@ -362,17 +366,15 @@ void test_coarser_same_graph(Coarser &coarser) { } BspInstance coarse_instance; - coarse_instance.setArchitecture(instance.getArchitecture()); + coarse_instance.getArchitecture() = instance.getArchitecture(); std::vector> vertex_map; std::vector reverse_vertex_map; GreedyBspScheduler scheduler; - bool coarse_success = coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map); BOOST_CHECK(coarse_success); - vertex_map = coarser_util::invert_vertex_contraction_map(reverse_vertex_map); BOOST_CHECK(check_vertex_map(vertex_map, instance.getComputationalDag().num_vertices())); @@ -446,27 +448,20 @@ BOOST_AUTO_TEST_CASE(squashA_test) { SquashA coarser(params); test_coarser_same_graph(coarser); - - + params.mode = SquashAParams::Mode::TRIANGLES; params.use_structured_poset = true; params.use_top_poset = true; coarser.setParams(params); - + test_coarser_same_graph(coarser); params.use_top_poset = false; coarser.setParams(params); - + test_coarser_same_graph(coarser); } - - - - - - BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); std::vector filenames_graph = tiny_spaa_graphs(); @@ -484,7 +479,8 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) { std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl + << "Graph: " << name_graph << std::endl; using graph_t1 = computational_dag_edge_idx_vector_impl_def_t; using graph_t2 = CSG; @@ -492,7 +488,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) { BspInstance instance; bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + instance.getComputationalDag()); bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); @@ -505,7 +501,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) { BspInstance coarse_instance; BspArchitecture architecture_t2(instance.getArchitecture()); - coarse_instance.setArchitecture(architecture_t2); + coarse_instance.getArchitecture() = architecture_t2; std::vector> vertex_map; std::vector reverse_vertex_map; @@ -560,7 +556,8 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) { std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl + << "Graph: " << name_graph << std::endl; using graph_t1 = computational_dag_edge_idx_vector_impl_def_t; using graph_t2 = CSGE; @@ -568,7 +565,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) { BspInstance instance; bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + instance.getComputationalDag()); bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); @@ -581,7 +578,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) { BspInstance coarse_instance; BspArchitecture architecture_t2(instance.getArchitecture()); - coarse_instance.setArchitecture(architecture_t2); + coarse_instance.getArchitecture() = architecture_t2; std::vector> vertex_map; std::vector reverse_vertex_map; @@ -619,13 +616,6 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) { } } - - - - - - - BOOST_AUTO_TEST_CASE(Sarkar_test) { using graph_t = computational_dag_edge_idx_vector_impl_def_t; // using graph_t = computational_dag_vector_impl_def_t; @@ -639,58 +629,47 @@ BOOST_AUTO_TEST_CASE(Sarkar_test) { test_coarser_same_graph(coarser); - params.useTopPoset = false; coarser.setParameters(params); test_coarser_same_graph(coarser); - - + params.mode = SarkarParams::Mode::FAN_IN_FULL; coarser.setParameters(params); test_coarser_same_graph(coarser); - params.mode = SarkarParams::Mode::FAN_IN_PARTIAL; coarser.setParameters(params); test_coarser_same_graph(coarser); - params.mode = SarkarParams::Mode::FAN_OUT_FULL; coarser.setParameters(params); test_coarser_same_graph(coarser); - params.mode = SarkarParams::Mode::FAN_OUT_PARTIAL; coarser.setParameters(params); test_coarser_same_graph(coarser); - params.mode = SarkarParams::Mode::LEVEL_EVEN; coarser.setParameters(params); test_coarser_same_graph(coarser); - - + params.mode = SarkarParams::Mode::LEVEL_ODD; coarser.setParameters(params); test_coarser_same_graph(coarser); - params.mode = SarkarParams::Mode::FAN_IN_BUFFER; coarser.setParameters(params); test_coarser_same_graph(coarser); - params.mode = SarkarParams::Mode::FAN_OUT_BUFFER; coarser.setParameters(params); test_coarser_same_graph(coarser); - params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER; coarser.setParameters(params); test_coarser_same_graph(coarser); } - BOOST_AUTO_TEST_CASE(SarkarML_test) { using graph_t = computational_dag_edge_idx_vector_impl_def_t; // using graph_t = computational_dag_vector_impl_def_t; @@ -723,6 +702,6 @@ BOOST_AUTO_TEST_CASE(SquashAML_test) { // using graph_t = computational_dag_vector_impl_def_t; SquashAMul coarser; - + test_coarser_same_graph(coarser); } \ No newline at end of file diff --git a/tests/trimmed_group_scheduler.cpp b/tests/trimmed_group_scheduler.cpp index 52cf4cdb..ccbfee8a 100644 --- a/tests/trimmed_group_scheduler.cpp +++ b/tests/trimmed_group_scheduler.cpp @@ -19,10 +19,10 @@ limitations under the License. #define BOOST_TEST_MODULE TrimmedGroupSchedulerTest #include -#include "osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp" #include "osp/bsp/model/BspInstance.hpp" #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" +#include "osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" using namespace osp; @@ -30,9 +30,9 @@ using namespace osp; using graph_t = computational_dag_vector_impl_def_t; // Mock SubScheduler for TrimmedGroupScheduler tests -template +template class MockSubScheduler : public Scheduler { -public: + public: // This mock scheduler assigns all nodes to local processor 0 and superstep 0. // This simplifies verification of the TrimmedGroupScheduler's mapping logic. RETURN_STATUS computeSchedule(BspSchedule &schedule) override { @@ -66,7 +66,7 @@ BOOST_FIXTURE_TEST_SUITE(TrimmedGroupSchedulerTestSuite, TrimmedGroupSchedulerFi BOOST_AUTO_TEST_CASE(EmptyGraphTest) { // Graph is empty by default arch.setNumberOfProcessors(4); - instance.setArchitecture(arch); + instance.getArchitecture() = arch; TrimmedGroupScheduler scheduler(mock_sub_scheduler, 1); BspSchedule schedule(instance); @@ -87,7 +87,7 @@ BOOST_AUTO_TEST_CASE(SingleComponentSingleProcessorTypeTest) { // Architecture: 4 processors of type 0 arch.setProcessorsWithTypes({0, 0, 0, 0}); - instance.setArchitecture(arch); + instance.getArchitecture() = arch; // min_non_zero_procs_ = 1 (all 4 processors assigned to this single component group) TrimmedGroupScheduler scheduler(mock_sub_scheduler, 1); @@ -119,7 +119,7 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeEvenDistributionTest) // Architecture: 4 processors of type 0 arch.setProcessorsWithTypes({0, 0, 0, 0}); - instance.setArchitecture(arch); + instance.getArchitecture() = arch; // min_non_zero_procs_ = 2 (2 component groups, each gets 2 processors) TrimmedGroupScheduler scheduler(mock_sub_scheduler, 2); @@ -154,7 +154,7 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeUnevenDistributionTest // Architecture: 6 processors of type 0 arch.setProcessorsWithTypes({0, 0, 0, 0, 0, 0}); - instance.setArchitecture(arch); + instance.getArchitecture() = arch; // min_non_zero_procs_ = 2 (3 components, 2 groups) // base_count = 3 / 2 = 1, remainder = 3 % 2 = 1 @@ -190,7 +190,7 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsHeterogeneousArchitectureTest) { // Architecture: 2 processors of type 0 (global 0,1), 2 processors of type 1 (global 2,3) arch.setProcessorsWithTypes({0, 0, 1, 1}); - instance.setArchitecture(arch); + instance.getArchitecture() = arch; instance.setDiagonalCompatibilityMatrix(2); // Node type 0 compatible with proc type 0, etc. // min_non_zero_procs_ = 2 (2 components, 2 groups) From 8b4e1a62c32b2a4c6587eb505e8f839c5e23d810 Mon Sep 17 00:00:00 2001 From: tonibohnlein Date: Tue, 9 Dec 2025 11:48:24 +0100 Subject: [PATCH 9/9] arch constructor --- include/osp/bsp/model/BspArchitecture.hpp | 83 +++++++---------------- include/osp/bsp/model/BspInstance.hpp | 6 +- 2 files changed, 25 insertions(+), 64 deletions(-) diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp index 5ef01b0e..5575fad2 100644 --- a/include/osp/bsp/model/BspArchitecture.hpp +++ b/include/osp/bsp/model/BspArchitecture.hpp @@ -194,17 +194,37 @@ class BspArchitecture { * @param CommunicationCost The communication cost between processors. Default: 1. * @param SynchronisationCost The synchronization cost between processors. Default: 2. * @param MemoryBound The memory bound for each processor (default: 100). + * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal entries are forced to zero. Default: empty (uniform costs). */ BspArchitecture(const unsigned NumberOfProcessors = 2U, const v_commw_t CommunicationCost = 1U, const v_commw_t SynchronisationCost = 2U, - const v_memw_t MemoryBound = 100U) + const v_memw_t MemoryBound = 100U, const std::vector>> &SendCosts = {}) : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost), synchronisationCosts_(SynchronisationCost), memoryBound_(NumberOfProcessors, MemoryBound), isNuma_(false), - processorTypes_(NumberOfProcessors, 0U), sendCosts_(NumberOfProcessors * NumberOfProcessors, 1U) { + processorTypes_(NumberOfProcessors, 0U) { if (NumberOfProcessors == 0U) { throw std::runtime_error("BspArchitecture: Number of processors must be greater than 0."); } - SetSendCostDiagonalToZero(); + + if (SendCosts.empty()) { + InitializeUniformSendCosts(); + } else { + if (NumberOfProcessors != SendCosts.size()) { + throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n"); + } + if (std::any_of(SendCosts.begin(), SendCosts.end(), + [NumberOfProcessors](const auto &thing) { return thing.size() != NumberOfProcessors; })) { + throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n"); + } + + sendCosts_.reserve(NumberOfProcessors * NumberOfProcessors); + for (const auto &row : SendCosts) { + sendCosts_.insert(sendCosts_.end(), row.begin(), row.end()); + } + + SetSendCostDiagonalToZero(); + isNuma_ = AreSendCostsNuma(); + } } BspArchitecture(const BspArchitecture &other) = default; @@ -246,62 +266,7 @@ class BspArchitecture { */ BspArchitecture(const unsigned NumberOfProcessors, const v_commw_t CommunicationCost, const v_commw_t SynchronisationCost, const std::vector>> &SendCosts) - : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost), - synchronisationCosts_(SynchronisationCost), memoryBound_(NumberOfProcessors, 100U), - processorTypes_(NumberOfProcessors, 0U) { - if (NumberOfProcessors == 0U) { - throw std::runtime_error("BspArchitecture: Number of processors must be greater than 0."); - } - if (NumberOfProcessors != SendCosts.size()) { - throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n"); - } - if (std::any_of(SendCosts.begin(), SendCosts.end(), - [NumberOfProcessors](const auto &thing) { return thing.size() != NumberOfProcessors; })) { - throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n"); - } - - sendCosts_.reserve(NumberOfProcessors * NumberOfProcessors); - for (const auto &row : SendCosts) { - sendCosts_.insert(sendCosts_.end(), row.begin(), row.end()); - } - - SetSendCostDiagonalToZero(); - isNuma_ = AreSendCostsNuma(); - } - - /** - * @brief Constructs a BspArchitecture object with custom send costs and memory bound. - * - * @param NumberOfProcessors The number of processors. Must be greater than 0. - * @param CommunicationCost The communication cost. - * @param SynchronisationCost The synchronization cost. - * @param MemoryBound The memory bound for each processor. - * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal entries are forced to zero. - */ - BspArchitecture(const unsigned NumberOfProcessors, const v_commw_t CommunicationCost, const v_commw_t SynchronisationCost, - const v_memw_t MemoryBound, const std::vector>> &SendCosts) - : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost), - synchronisationCosts_(SynchronisationCost), memoryBound_(NumberOfProcessors, MemoryBound), - processorTypes_(NumberOfProcessors, 0U) { - if (NumberOfProcessors == 0U) { - throw std::runtime_error("BspArchitecture: Number of processors must be greater than 0."); - } - if (NumberOfProcessors != SendCosts.size()) { - throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n"); - } - if (std::any_of(SendCosts.begin(), SendCosts.end(), - [NumberOfProcessors](const auto &thing) { return thing.size() != NumberOfProcessors; })) { - throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n"); - } - - sendCosts_.reserve(NumberOfProcessors * NumberOfProcessors); - for (const auto &row : SendCosts) { - sendCosts_.insert(sendCosts_.end(), row.begin(), row.end()); - } - - SetSendCostDiagonalToZero(); - isNuma_ = AreSendCostsNuma(); - } + : BspArchitecture(NumberOfProcessors, CommunicationCost, SynchronisationCost, 100U, SendCosts) {} /** * @brief Sets the uniform send cost for each pair of processors. diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp index c5a973a7..bed4fd40 100644 --- a/include/osp/bsp/model/BspInstance.hpp +++ b/include/osp/bsp/model/BspInstance.hpp @@ -271,14 +271,10 @@ class BspInstance { } /** - * @brief Sets the node-processor compatibility matrix. The matrix is copied. + * @brief Sets the node-processor compatibility matrix. The matrix is copied. Dimensions are not checked. * @param compatibility_ The compatibility matrix. - * @throw std::runtime_error if the compatibility matrix size does not match the number of node types and processor types. */ void setNodeProcessorCompatibility(const std::vector> &compatibility_) { - if (compatibility_.size() < cdag.num_vertex_types() || compatibility_[0].size() < architecture.getNumberOfProcessorTypes()) { - throw std::runtime_error("Compatibility matrix size does not match the number of node types and processor types."); - } nodeProcessorCompatibility = compatibility_; }