From 8fa0d4c2906ba407d54a37c1c534b7005da9be1c Mon Sep 17 00:00:00 2001
From: tonibohnlein <toni.boehnlein18@gmail.com>
Date: Mon, 8 Dec 2025 12:11:40 +0100
Subject: [PATCH 1/9] update architecture

---
 include/osp/bsp/model/BspArchitecture.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp
index 74872aae..aaf73d3f 100644
--- a/include/osp/bsp/model/BspArchitecture.hpp
+++ b/include/osp/bsp/model/BspArchitecture.hpp
@@ -604,7 +604,7 @@ class BspArchitecture {
         std::vector<std::vector<v_commw_t<Graph_t>>> matrix(numberOfProcessors_, std::vector<v_commw_t<Graph_t>>(numberOfProcessors_));
         for (unsigned i = 0; i < numberOfProcessors_; ++i) {
             for (unsigned j = 0; j < numberOfProcessors_; ++j) {
-                matrix[i][j] = sendCosts_[FlatIndex(i, j)];
+                matrix[i][j] = sendCosts_.at(FlatIndex(i, j));
             }
         }
         return matrix;

From 2219a6b46740d77c2d453baa782adffe3ad9458c Mon Sep 17 00:00:00 2001
From: tonibohnlein <toni.boehnlein18@gmail.com>
Date: Mon, 8 Dec 2025 14:19:59 +0100
Subject: [PATCH 2/9] removed some bounds checking

---
 include/osp/bsp/model/BspArchitecture.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp
index aaf73d3f..74872aae 100644
--- a/include/osp/bsp/model/BspArchitecture.hpp
+++ b/include/osp/bsp/model/BspArchitecture.hpp
@@ -604,7 +604,7 @@ class BspArchitecture {
         std::vector<std::vector<v_commw_t<Graph_t>>> matrix(numberOfProcessors_, std::vector<v_commw_t<Graph_t>>(numberOfProcessors_));
         for (unsigned i = 0; i < numberOfProcessors_; ++i) {
             for (unsigned j = 0; j < numberOfProcessors_; ++j) {
-                matrix[i][j] = sendCosts_.at(FlatIndex(i, j));
+                matrix[i][j] = sendCosts_[FlatIndex(i, j)];
             }
         }
         return matrix;

From a9ba08678c488eba9711dcfe03925a3e8c31d978 Mon Sep 17 00:00:00 2001
From: tonibohnlein <toni.boehnlein18@gmail.com>
Date: Mon, 8 Dec 2025 16:49:24 +0100
Subject: [PATCH 3/9] cosmetics

---
 include/osp/bsp/model/BspSchedule.hpp         | 25 ++++++-------------
 .../computational_dag_vector_impl.hpp         |  5 ----
 2 files changed, 8 insertions(+), 22 deletions(-)

diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp
index eeeaeec3..25de96cc 100644
--- a/include/osp/bsp/model/BspSchedule.hpp
+++ b/include/osp/bsp/model/BspSchedule.hpp
@@ -105,9 +105,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         : instance(&schedule.getInstance()), number_of_supersteps(schedule.numberOfSupersteps()),
           node_to_processor_assignment(schedule.getInstance().numberOfVertices()),
           node_to_superstep_assignment(schedule.getInstance().numberOfVertices()) {
-
         for (const auto &v : schedule.getInstance().getComputationalDag().vertices()) {
-
             node_to_processor_assignment[v] = schedule.assignedProcessor(v);
             node_to_superstep_assignment[v] = schedule.assignedSuperstep(v);
         }
@@ -572,11 +570,14 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      */
     virtual void shrinkByMergingSupersteps() {
         std::vector<bool> comm_phase_empty(number_of_supersteps, true);
-        for (const auto &node : instance->vertices())
-            for (const auto &child : instance->getComputationalDag().children(node))
-                if (node_to_processor_assignment[node] != node_to_processor_assignment[child])
+        for (const auto &node : instance->vertices()) {
+            for (const auto &child : instance->getComputationalDag().children(node)) {
+                if (node_to_processor_assignment[node] != node_to_processor_assignment[child]) {
                     for (unsigned offset = 1; offset <= getStaleness(); ++offset)
                         comm_phase_empty[node_to_superstep_assignment[child] - offset] = false;
+                }
+            }
+        }
 
         std::vector<unsigned> new_step_index(number_of_supersteps);
         unsigned current_index = 0;
@@ -585,9 +586,9 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
             if (!comm_phase_empty[step])
                 current_index++;
         }
-        for (const auto &node : instance->vertices())
+        for (const auto &node : instance->vertices()) {
             node_to_superstep_assignment[node] = new_step_index[node_to_superstep_assignment[node]];
-
+        }
         setNumberOfSupersteps(current_index);
     }
 
@@ -633,7 +634,6 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         std::vector<v_memw_t<Graph_t>> current_proc_transient_memory(instance->numberOfProcessors(), 0);
 
         for (const auto &node : instance->vertices()) {
-
             const unsigned proc = node_to_processor_assignment[node];
             current_proc_persistent_memory[proc] += instance->getComputationalDag().vertex_mem_weight(node);
             current_proc_transient_memory[proc] = std::max(
@@ -659,7 +659,6 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
         std::vector<v_memw_t<Graph_t>> current_proc_memory(instance->numberOfProcessors(), 0);
 
         for (const auto &node : instance->vertices()) {
-
             const unsigned proc = node_to_processor_assignment[node];
             current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(node);
 
@@ -671,12 +670,10 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
     }
 
     bool satisfiesLocalInOutMemoryConstraints() const {
-
         SetSchedule set_schedule = SetSchedule(*this);
 
         for (unsigned step = 0; step < number_of_supersteps; step++) {
             for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
                 v_memw_t<Graph_t> memory = 0;
                 for (const auto &node : set_schedule.step_processor_vertices[step][proc]) {
                     memory += instance->getComputationalDag().vertex_mem_weight(node) +
@@ -701,12 +698,10 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
     }
 
     bool satisfiesLocalIncEdgesMemoryConstraints() const {
-
         SetSchedule set_schedule = SetSchedule(*this);
 
         for (unsigned step = 0; step < number_of_supersteps; step++) {
             for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
                 std::unordered_set<vertex_idx_t<Graph_t>> nodes_with_incoming_edges;
 
                 v_memw_t<Graph_t> memory = 0;
@@ -714,7 +709,6 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
                     memory += instance->getComputationalDag().vertex_comm_weight(node);
 
                     for (const auto &parent : instance->getComputationalDag().parents(node)) {
-
                         if (node_to_superstep_assignment[parent] != step) {
                             nodes_with_incoming_edges.insert(parent);
                         }
@@ -734,17 +728,14 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
     }
 
     bool satisfiesLocalSourcesIncEdgesMemoryConstraints() const {
-
         SetSchedule set_schedule = SetSchedule(*this);
 
         for (unsigned step = 0; step < number_of_supersteps; step++) {
             for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-
                 std::unordered_set<vertex_idx_t<Graph_t>> nodes_with_incoming_edges;
 
                 v_memw_t<Graph_t> memory = 0;
                 for (const auto &node : set_schedule.step_processor_vertices[step][proc]) {
-
                     if (is_source(node, instance->getComputationalDag())) {
                         memory += instance->getComputationalDag().vertex_mem_weight(node);
                     }
diff --git a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp
index efe1996e..0a1b676a 100644
--- a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp
+++ b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp
@@ -78,7 +78,6 @@ class computational_dag_vector_impl {
     explicit computational_dag_vector_impl(const vertex_idx num_vertices)
         : vertices_(num_vertices), out_neigbors(num_vertices), in_neigbors(num_vertices), num_edges_(0),
           num_vertex_types_(0) {
-
         for (vertex_idx i = 0; i < num_vertices; ++i) {
             vertices_[i].id = i;
         }
@@ -98,9 +97,7 @@ class computational_dag_vector_impl {
      */
     template<typename Graph_t>
     explicit computational_dag_vector_impl(const Graph_t &other) {
-
         static_assert(is_computational_dag_v<Graph_t>, "Graph_t must satisfy the is_computation_dag concept");
-
         constructComputationalDag(other, *this);
     }
 
@@ -196,7 +193,6 @@ class computational_dag_vector_impl {
      */
     vertex_idx add_vertex(const vertex_work_weight_type work_weight, const vertex_comm_weight_type comm_weight,
                           const vertex_mem_weight_type mem_weight, const vertex_type_type vertex_type = 0) {
-
         vertices_.emplace_back(vertices_.size(), work_weight, comm_weight, mem_weight, vertex_type);
         out_neigbors.push_back({});
         in_neigbors.push_back({});
@@ -231,7 +227,6 @@ class computational_dag_vector_impl {
      * @return True if the edge was added, false if it already exists or vertices are invalid.
      */
     bool add_edge(const vertex_idx source, const vertex_idx target) {
-
         if (source >= static_cast<vertex_idx>(vertices_.size()) || target >= static_cast<vertex_idx>(vertices_.size()) || source == target)
             return false;
 

From 3876a8fdb27d824b7cba0c8ea14928f440357e45 Mon Sep 17 00:00:00 2001
From: tonibohnlein <toni.boehnlein18@gmail.com>
Date: Mon, 8 Dec 2025 16:54:06 +0100
Subject: [PATCH 4/9] to_string

---
 include/osp/bsp/model/BspInstance.hpp | 28 +++++++++------------------
 include/osp/bsp/model/BspSchedule.hpp |  1 -
 2 files changed, 9 insertions(+), 20 deletions(-)

diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp
index 7ab72fd4..6257b99d 100644
--- a/include/osp/bsp/model/BspInstance.hpp
+++ b/include/osp/bsp/model/BspInstance.hpp
@@ -32,7 +32,11 @@ enum class RETURN_STATUS { OSP_SUCCESS,
                            TIMEOUT,
                            ERROR };
 
-inline std::string to_string(const RETURN_STATUS status) {
+/**
+ * @brief Converts the enum to a string literal.
+ * Returns const char* to avoid std::string allocation overhead.
+ */
+inline const char *to_string(const RETURN_STATUS status) {
     switch (status) {
     case RETURN_STATUS::OSP_SUCCESS:
         return "SUCCESS";
@@ -47,25 +51,11 @@ inline std::string to_string(const RETURN_STATUS status) {
     }
 }
 
+/**
+ * @brief Stream operator overload using the helper function.
+ */
 inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) {
-    switch (status) {
-    case RETURN_STATUS::OSP_SUCCESS:
-        os << "SUCCESS";
-        break;
-    case RETURN_STATUS::BEST_FOUND:
-        os << "BEST_FOUND";
-        break;
-    case RETURN_STATUS::TIMEOUT:
-        os << "TIMEOUT";
-        break;
-    case RETURN_STATUS::ERROR:
-        os << "ERROR";
-        break;
-    default:
-        os << "UNKNOWN";
-        break;
-    }
-    return os;
+    return os << to_string(status);
 }
 
 /**
diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp
index 25de96cc..7a02d0f3 100644
--- a/include/osp/bsp/model/BspSchedule.hpp
+++ b/include/osp/bsp/model/BspSchedule.hpp
@@ -741,7 +741,6 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
                     }
 
                     for (const auto &parent : instance->getComputationalDag().parents(node)) {
-
                         if (node_to_superstep_assignment[parent] != step) {
                             nodes_with_incoming_edges.insert(parent);
                         }

From f31fcf01798c9bfcacaa6fbdf12aecc140f6970e Mon Sep 17 00:00:00 2001
From: tonibohnlein <toni.boehnlein18@gmail.com>
Date: Mon, 8 Dec 2025 16:55:34 +0100
Subject: [PATCH 5/9] noexcept

---
 include/osp/bsp/model/BspInstance.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp
index 6257b99d..697488be 100644
--- a/include/osp/bsp/model/BspInstance.hpp
+++ b/include/osp/bsp/model/BspInstance.hpp
@@ -113,10 +113,10 @@ class BspInstance {
     }
 
     BspInstance(const BspInstance<Graph_t> &other) = default;
-    BspInstance(BspInstance<Graph_t> &&other) = default;
+    BspInstance(BspInstance<Graph_t> &&other) noexcept = default;
 
     BspInstance<Graph_t> &operator=(const BspInstance<Graph_t> &other) = default;
-    BspInstance<Graph_t> &operator=(BspInstance<Graph_t> &&other) = default;
+    BspInstance<Graph_t> &operator=(BspInstance<Graph_t> &&other) noexcept = default;
 
     /**
      * @brief Returns a reference to the BSP architecture for the instance.

From 29240622471cbe1826111de34f59525c276350a2 Mon Sep 17 00:00:00 2001
From: tonibohnlein <toni.boehnlein18@gmail.com>
Date: Mon, 8 Dec 2025 17:32:30 +0100
Subject: [PATCH 6/9] Processor range

---
 include/osp/bsp/model/BspInstance.hpp         | 263 ++++++++++--------
 .../model/util/CompatibleProcessorRange.hpp   | 101 +++++++
 .../KernighanLin_v2/kl_improver.hpp           |   1 +
 tests/bsp_instance.cpp                        |   1 +
 4 files changed, 256 insertions(+), 110 deletions(-)
 create mode 100644 include/osp/bsp/model/util/CompatibleProcessorRange.hpp

diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp
index 697488be..e5a1ac3a 100644
--- a/include/osp/bsp/model/BspInstance.hpp
+++ b/include/osp/bsp/model/BspInstance.hpp
@@ -78,6 +78,20 @@ class BspInstance {
     // for problem instances with heterogeneity
     std::vector<std::vector<bool>> nodeProcessorCompatibility = std::vector<std::vector<bool>>({{true}});
 
+    /**
+     * @brief Calculates the maximum memory bound for each processor type.
+     *
+     * @return A vector where the index corresponds to the processor type and the value is the maximum memory bound for that type.
+     */
+    std::vector<v_memw_t<Graph_t>> calculateMaxMemoryPerProcessorType() const {
+        std::vector<v_memw_t<Graph_t>> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0);
+        for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) {
+            max_memory_per_proc_type[architecture.processorType(proc)] =
+                std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
+        }
+        return max_memory_per_proc_type;
+    }
+
   public:
     /**
      * @brief Default constructor for the BspInstance class.
@@ -123,90 +137,86 @@ class BspInstance {
      *
      * @return A reference to the BSP architecture for the instance.
      */
-    inline const BspArchitecture<Graph_t> &getArchitecture() const { return architecture; }
-
-    /**
-     * @brief Returns a reference to the BSP architecture for the instance.
-     *
-     * @return A reference to the BSP architecture for the instance.
-     */
-    inline BspArchitecture<Graph_t> &getArchitecture() { return architecture; }
+    [[nodiscard]] const BspArchitecture<Graph_t> &getArchitecture() const { return architecture; }
+    [[nodiscard]] BspArchitecture<Graph_t> &getArchitecture() { return architecture; }
 
     /**
      * @brief Sets the BSP architecture for the instance.
      *
      * @param architecture_ The BSP architecture for the instance.
      */
-    inline void setArchitecture(const BspArchitecture<Graph_t> &architechture_) { architecture = architechture_; }
+    void setArchitecture(const BspArchitecture<Graph_t> &architechture_) { architecture = architechture_; }
 
     /**
      * @brief Returns a reference to the computational DAG for the instance.
      *
      * @return A reference to the computational DAG for the instance.
      */
-    inline const Graph_t &getComputationalDag() const { return cdag; }
+    [[nodiscard]] const Graph_t &getComputationalDag() const { return cdag; }
+    [[nodiscard]] Graph_t &getComputationalDag() { return cdag; }
 
     /**
-     * @brief Returns a reference to the computational DAG for the instance.
+     * @brief Returns the number of vertices in the computational DAG.
      *
-     * @return A reference to the computational DAG for the instance.
+     * @return The number of vertices.
      */
-    inline Graph_t &getComputationalDag() { return cdag; }
+    [[nodiscard]] vertex_idx_t<Graph_t> numberOfVertices() const { return cdag.num_vertices(); }
 
-    inline vertex_idx_t<Graph_t> numberOfVertices() const { return cdag.num_vertices(); }
-
-    inline auto vertices() const { return cdag.vertices(); }
+    /**
+     * @brief Returns a view over the vertex indices of the computational DAG.
+     * @return A view over the vertex indices.
+     */
+    [[nodiscard]] auto vertices() const { return cdag.vertices(); }
 
-    inline auto processors() const { return architecture.processors(); }
+    /**
+     * @brief Returns a view over the processor indices of the BSP architecture.
+     * @return A view over the processor indices.
+     */
+    [[nodiscard]] auto processors() const { return architecture.processors(); }
 
     /**
      * @brief Returns the number of processors in the BSP architecture.
-     *
      * @return The number of processors in the BSP architecture.
      */
-    inline unsigned numberOfProcessors() const { return architecture.numberOfProcessors(); }
+    [[nodiscard]] unsigned numberOfProcessors() const { return architecture.numberOfProcessors(); }
 
     /**
      * @brief Returns the communication costs between two processors.
-     *
      * The communication costs are the send costs multiplied by the communication costs.
      *
-     * @param p1 The index of the first processor.
-     * @param p2 The index of the second processor.
+     * @param p_send The index of the sending processor.
+     * @param p_receive The index of the receiving processor.
      *
      * @return The communication costs between the two processors.
      */
-    inline v_commw_t<Graph_t> communicationCosts(unsigned int p1, unsigned int p2) const {
-        return architecture.communicationCosts(p1, p2);
+    [[nodiscard]] v_commw_t<Graph_t> communicationCosts(const unsigned p_send, const unsigned p_receive) const {
+        return architecture.communicationCosts(p_send, p_receive);
     }
 
     /**
      * @brief Returns the send costs between two processors.
      *
-     *
-     * @param p1 The index of the first processor.
-     * @param p2 The index of the second processor.
+     * @param p_send The index of the sending processor.
+     * @param p_receive The index of the receiving processor.
      *
      * @return The send costs between the two processors.
      */
-    inline v_commw_t<Graph_t> sendCosts(unsigned int p1, unsigned int p2) const {
-        return architecture.sendCosts(p1, p2);
+    [[nodiscard]] v_commw_t<Graph_t> sendCosts(const unsigned p_send, const unsigned p_receive) const {
+        return architecture.sendCosts(p_send, p_receive);
     }
 
     /**
      * @brief Returns a copy of the send costs matrix.
      * @return A copy of the send costs matrix.
      */
-    inline std::vector<std::vector<v_commw_t<Graph_t>>> sendCostMatrix() const {
-        return architecture.sendCostMatrix();
-    }
+    [[nodiscard]] std::vector<std::vector<v_commw_t<Graph_t>>> sendCosts() const { return architecture.sendCosts(); }
 
     /**
      * @brief Returns the flattened send costs vector.
      *
      * @return The flattened send costs vector.
      */
-    inline const std::vector<v_commw_t<Graph_t>> &sendCostsVector() const {
+    [[nodiscard]] const std::vector<v_commw_t<Graph_t>> &sendCostsVector() const {
         return architecture.sendCostsVector();
     }
 
@@ -215,29 +225,49 @@ class BspInstance {
      *
      * @return The communication costs as an unsigned integer.
      */
-    inline v_commw_t<Graph_t> communicationCosts() const { return architecture.communicationCosts(); }
+    [[nodiscard]] v_commw_t<Graph_t> communicationCosts() const { return architecture.communicationCosts(); }
 
     /**
      * @brief Returns the synchronization costs of the BSP architecture.
      *
      * @return The synchronization costs as an unsigned integer.
      */
-    inline v_commw_t<Graph_t> synchronisationCosts() const { return architecture.synchronisationCosts(); }
+    [[nodiscard]] v_commw_t<Graph_t> synchronisationCosts() const { return architecture.synchronisationCosts(); }
 
     /**
      * @brief Returns whether the architecture is NUMA.
      *
      * @return True if the architecture is NUMA, false otherwise.
      */
-    inline bool isNumaInstance() const { return architecture.isNumaArchitecture(); }
+    [[nodiscard]] bool isNumaInstance() const { return architecture.isNumaArchitecture(); }
 
-    inline v_memw_t<Graph_t> memoryBound(unsigned proc) const { return architecture.memoryBound(proc); }
+    /**
+     * @brief Returns the memory bound for a specific processor.
+     *
+     * @param proc The processor index.
+     * @return The memory bound for the processor.
+     */
+    [[nodiscard]] v_memw_t<Graph_t> memoryBound(const unsigned proc) const { return architecture.memoryBound(proc); }
 
-    v_memw_t<Graph_t> maxMemoryBoundProcType(unsigned procType) const {
+    /**
+     * @brief Returns the maximum memory bound for a specific processor type.
+     *
+     * @param procType The processor type.
+     * @return The maximum memory bound for the processor type.
+     */
+    [[nodiscard]] v_memw_t<Graph_t> maxMemoryBoundProcType(const unsigned procType) const {
         return architecture.maxMemoryBoundProcType(procType);
     }
 
-    v_memw_t<Graph_t> maxMemoryBoundNodeType(unsigned nodeType) const {
+    /**
+     * @brief Returns the maximum memory bound for a specific node type.
+     *
+     * This considers all compatible processor types for the given node type.
+     *
+     * @param nodeType The node type.
+     * @return The maximum memory bound for the node type.
+     */
+    [[nodiscard]] v_memw_t<Graph_t> maxMemoryBoundNodeType(const unsigned nodeType) const {
         int max_mem = 0;
         for (unsigned proc = 0; proc < architecture.getNumberOfProcessorTypes(); proc++) {
             if (isCompatibleType(nodeType, architecture.processorType(proc))) {
@@ -252,29 +282,30 @@ class BspInstance {
      *
      * @param cost The communication costs to set.
      */
-    inline void setCommunicationCosts(const v_commw_t<Graph_t> cost) { architecture.setCommunicationCosts(cost); }
+    void setCommunicationCosts(const v_commw_t<Graph_t> cost) { architecture.setCommunicationCosts(cost); }
 
     /**
      * @brief Sets the synchronisation costs of the BSP architecture.
      *
      * @param cost The synchronisation costs to set.
      */
-    inline void setSynchronisationCosts(const v_commw_t<Graph_t> cost) { architecture.setSynchronisationCosts(cost); }
+    void setSynchronisationCosts(const v_commw_t<Graph_t> cost) { architecture.setSynchronisationCosts(cost); }
 
     /**
      * @brief Sets the number of processors in the BSP architecture.
      *
      * @param num The number of processors to set.
      */
-    inline void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); }
+    void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); }
 
-    bool check_memory_constraints_feasibility() const {
+    /**
+     * @brief Checks if the memory constraints are feasible for the given instance.
+     *
+     * @return True if the memory constraints are feasible, false otherwise.
+     */
+    [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const {
+        const auto max_memory_per_proc_type = calculateMaxMemoryPerProcessorType();
 
-        std::vector<v_memw_t<Graph_t>> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0);
-        for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) {
-            max_memory_per_proc_type[architecture.processorType(proc)] =
-                std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
-        }
         for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) {
             v_memw_t<Graph_t> max_memory_of_type = max_memory_weight(vertType, cdag);
             bool fits = false;
@@ -294,13 +325,15 @@ class BspInstance {
         return true;
     }
 
+    /**
+     * @brief Adjusts the memory constraints of the architecture to ensure feasibility.
+     *
+     * If a node type requires more memory than available on any compatible processor type,
+     * the memory bound of compatible processors is increased.
+     */
     void adjust_memory_constraints() {
+        const auto max_memory_per_proc_type = calculateMaxMemoryPerProcessorType();
 
-        std::vector<v_memw_t<Graph_t>> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0);
-        for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) {
-            max_memory_per_proc_type[architecture.processorType(proc)] =
-                std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
-        }
         for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) {
             v_memw_t<Graph_t> max_memory_of_type = max_memory_weight(vertType, cdag);
             bool fits = false;
@@ -330,34 +363,72 @@ class BspInstance {
         }
     }
 
-    inline v_type_t<Graph_t> processorType(unsigned p1) const { return architecture.processorType(p1); }
+    /**
+     * @brief Returns the processor type for a given processor index.
+     *
+     * @param proc The processor index.
+     * @return The processor type.
+     */
+    [[nodiscard]] v_type_t<Graph_t> processorType(const unsigned proc) const { return architecture.processorType(proc); }
 
-    inline bool isCompatible(const vertex_idx_t<Graph_t> &node, unsigned processor_id) const {
+    /**
+     * @brief Checks if a node is compatible with a processor.
+     *
+     * @param node The node index.
+     * @param processor_id The processor index.
+     * @return True if the node is compatible with the processor, false otherwise.
+     */
+    [[nodiscard]] bool isCompatible(const vertex_idx_t<Graph_t> &node, const unsigned processor_id) const {
         return isCompatibleType(cdag.vertex_type(node), architecture.processorType(processor_id));
     }
 
-    inline bool isCompatibleType(v_type_t<Graph_t> nodeType, v_type_t<Graph_t> processorType) const {
-
+    /**
+     * @brief Checks if a node type is compatible with a processor type.
+     *
+     * @param nodeType The node type.
+     * @param processorType The processor type.
+     * @return True if the node type is compatible with the processor type, false otherwise.
+     */
+    [[nodiscard]] bool isCompatibleType(const v_type_t<Graph_t> nodeType, const v_type_t<Graph_t> processorType) const {
         return nodeProcessorCompatibility[nodeType][processorType];
     }
 
+    /**
+     * @brief Sets the node-processor compatibility matrix.
+     *
+     * @param compatibility_ The compatibility matrix.
+     */
     void setNodeProcessorCompatibility(const std::vector<std::vector<bool>> &compatibility_) {
-
         nodeProcessorCompatibility = compatibility_;
     }
 
-    const std::vector<std::vector<bool>> &getProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; }
-
-    void setDiagonalCompatibilityMatrix(unsigned number_of_types) {
+    /**
+     * @brief Returns the node-processor compatibility matrix.
+     *
+     * @return The node-processor compatibility matrix.
+     */
+    [[nodiscard]] const std::vector<std::vector<bool>> &getProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; }
 
+    /**
+     * @brief Sets the compatibility matrix to be diagonal.
+     *
+     * This implies that node type `i` is only compatible with processor type `i`.
+     *
+     * @param number_of_types The number of types.
+     */
+    void setDiagonalCompatibilityMatrix(const unsigned number_of_types) {
         nodeProcessorCompatibility =
             std::vector<std::vector<bool>>(number_of_types, std::vector<bool>(number_of_types, false));
         for (unsigned i = 0; i < number_of_types; ++i)
             nodeProcessorCompatibility[i][i] = true;
     }
 
+    /**
+     * @brief Sets the compatibility matrix to all ones.
+     *
+     * This implies that all node types are compatible with all processor types.
+     */
     void setAllOnesCompatibilityMatrix() {
-
         unsigned number_of_node_types = cdag.num_vertex_types();
         unsigned number_of_proc_types = architecture.getNumberOfProcessorTypes();
 
@@ -365,7 +436,12 @@ class BspInstance {
             std::vector<std::vector<bool>>(number_of_node_types, std::vector<bool>(number_of_proc_types, true));
     }
 
-    std::vector<std::vector<unsigned>> getProcTypesCompatibleWithNodeType() const {
+    /**
+     * @brief Returns a list of compatible processor types for each node type.
+     *
+     * @return A vector where the index is the node type and the value is a vector of compatible processor types.
+     */
+    [[nodiscard]] std::vector<std::vector<unsigned>> getProcTypesCompatibleWithNodeType() const {
         unsigned numberOfNodeTypes = cdag.num_vertex_types();
         unsigned numberOfProcTypes = architecture.getNumberOfProcessorTypes();
         std::vector<std::vector<unsigned>> compatibleProcTypes(numberOfNodeTypes);
@@ -378,7 +454,14 @@ class BspInstance {
         return compatibleProcTypes;
     }
 
-    std::vector<std::vector<bool>> getNodeNodeCompatabilityMatrix() const {
+    /**
+     * @brief Returns a compatibility matrix between node types.
+     *
+     * Two node types are compatible if they share at least one compatible processor type.
+     *
+     * @return A matrix where `[i][j]` is true if node type `i` and node type `j` are compatible.
+     */
+    [[nodiscard]] std::vector<std::vector<bool>> getNodeNodeCompatabilityMatrix() const {
         std::vector<std::vector<bool>> compMat(cdag.num_vertex_types(),
                                                std::vector<bool>(cdag.num_vertex_types(), false));
         for (unsigned nodeType1 = 0; nodeType1 < cdag.num_vertex_types(); nodeType1++) {
@@ -394,54 +477,14 @@ class BspInstance {
         return compMat;
     }
 
-    inline const std::vector<std::vector<bool>> &getNodeProcessorCompatibilityMatrix() const {
+    /**
+     * @brief Returns the node-processor compatibility matrix.
+     *
+     * @return The node-processor compatibility matrix.
+     */
+    [[nodiscard]] const std::vector<std::vector<bool>> &getNodeProcessorCompatibilityMatrix() const {
         return nodeProcessorCompatibility;
     }
 };
 
-template<typename Graph_t>
-class CompatibleProcessorRange {
-
-    std::vector<std::vector<unsigned>> type_processor_idx;
-    const BspInstance<Graph_t> *instance = nullptr;
-
-  public:
-    CompatibleProcessorRange() = default;
-
-    CompatibleProcessorRange(const BspInstance<Graph_t> &inst) {
-        initialize(inst);
-    }
-
-    inline void initialize(const BspInstance<Graph_t> &inst) {
-
-        instance = &inst;
-
-        if constexpr (has_typed_vertices_v<Graph_t>) {
-
-            type_processor_idx = std::vector<std::vector<unsigned>>(inst.getComputationalDag().num_vertex_types());
-
-            for (v_type_t<Graph_t> v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) {
-                for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++)
-                    if (inst.isCompatibleType(v_type, inst.processorType(proc)))
-                        type_processor_idx[v_type].push_back(proc);
-            }
-        }
-    }
-
-    inline const auto &compatible_processors_type(v_type_t<Graph_t> type) const {
-
-        assert(instance != nullptr);
-
-        if constexpr (has_typed_vertices_v<Graph_t>) {
-            return type_processor_idx[type];
-        } else {
-            return instance->processors();
-        }
-    }
-
-    inline const auto &compatible_processors_vertex(vertex_idx_t<Graph_t> vertex) const {
-        return compatible_processors_type(instance->getComputationalDag().vertex_type(vertex));
-    }
-};
-
 } // namespace osp
\ No newline at end of file
diff --git a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp
new file mode 100644
index 00000000..a4c5800a
--- /dev/null
+++ b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp
@@ -0,0 +1,101 @@
+/*
+Copyright 2024 Huawei Technologies Co., Ltd.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
+*/
+
+#pragma once
+
+#include <vector>
+
+namespace osp {
+
+/**
+ * @class CompatibleProcessorRange
+ * @brief Helper class to efficiently iterate over compatible processors for a given node or node type.
+ *
+ * This class precomputes and stores the list of compatible processors for each node type.
+ *
+ * @tparam Graph_t The type of the computational DAG.
+ */
+template<typename Graph_t>
+class CompatibleProcessorRange {
+
+    std::vector<std::vector<unsigned>> type_processor_idx;
+    const BspInstance<Graph_t> *instance = nullptr;
+
+  public:
+    /**
+     * @brief Default constructor.
+     */
+    CompatibleProcessorRange() = default;
+
+    /**
+     * @brief Constructs a CompatibleProcessorRange for the given BspInstance.
+     *
+     * @param inst The BspInstance.
+     */
+    CompatibleProcessorRange(const BspInstance<Graph_t> &inst) {
+        initialize(inst);
+    }
+
+    /**
+     * @brief Initializes the CompatibleProcessorRange with a BspInstance.
+     *
+     * @param inst The BspInstance.
+     */
+    void initialize(const BspInstance<Graph_t> &inst) {
+        instance = &inst;
+
+        if constexpr (has_typed_vertices_v<Graph_t>) {
+
+            type_processor_idx = std::vector<std::vector<unsigned>>(inst.getComputationalDag().num_vertex_types());
+
+            for (v_type_t<Graph_t> v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) {
+                for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++)
+                    if (inst.isCompatibleType(v_type, inst.processorType(proc)))
+                        type_processor_idx[v_type].push_back(proc);
+            }
+        }
+    }
+
+    /**
+     * @brief Returns a range of compatible processors for a given node type.
+     *
+     * @param type The node type.
+     * @return A const reference to a vector of compatible processor indices.
+     */
+    [[nodiscard]] const auto &compatible_processors_type(v_type_t<Graph_t> type) const {
+        assert(instance != nullptr);
+
+        if constexpr (has_typed_vertices_v<Graph_t>) {
+            return type_processor_idx[type];
+        } else {
+            return instance->processors();
+        }
+    }
+
+    /**
+     * @brief Returns a range of compatible processors for a given vertex.
+     *
+     * @param vertex The vertex index.
+     * @return A const reference to a vector of compatible processor indices.
+     */
+    [[nodiscard]] const auto &compatible_processors_vertex(vertex_idx_t<Graph_t> vertex) const {
+        return compatible_processors_type(instance->getComputationalDag().vertex_type(vertex));
+    }
+};
+
+} // namespace osp
\ No newline at end of file
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp
index 3657ed52..dd572710 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp
@@ -30,6 +30,7 @@ limitations under the License.
 
 #include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp"
 #include "osp/auxiliary/misc.hpp"
+#include "osp/bsp/model/util/CompatibleProcessorRange.hpp"
 #include "osp/bsp/scheduler/ImprovementScheduler.hpp"
 #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
diff --git a/tests/bsp_instance.cpp b/tests/bsp_instance.cpp
index 60e95999..f45434de 100644
--- a/tests/bsp_instance.cpp
+++ b/tests/bsp_instance.cpp
@@ -23,6 +23,7 @@ limitations under the License.
 #include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
+#include "osp/bsp/model/util/CompatibleProcessorRange.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include <filesystem>

From 4be4f9ba6dccaf3203f445c048962d2204343b10 Mon Sep 17 00:00:00 2001
From: tonibohnlein <toni.boehnlein18@gmail.com>
Date: Mon, 8 Dec 2025 18:23:20 +0100
Subject: [PATCH 7/9] return_status.hpp

update

update

update, bugfix
---
 .../AbstractTestSuiteRunner.hpp               |  35 ++-
 include/osp/auxiliary/return_status.hpp       |  56 ++++
 include/osp/bsp/model/BspArchitecture.hpp     |   1 +
 include/osp/bsp/model/BspInstance.hpp         | 248 ++++--------------
 include/osp/bsp/model/BspSchedule.hpp         |  20 +-
 .../model/util/CompatibleProcessorRange.hpp   |  16 +-
 .../osp/bsp/model/{ => util}/SetSchedule.hpp  |   2 +-
 .../bsp/model/{ => util}/VectorSchedule.hpp   |   2 +-
 .../IlpSchedulers/CoptFullScheduler.hpp       | 152 +++++------
 .../TotalCommunicationScheduler.hpp           |   3 +-
 .../KernighanLin/kl_current_schedule.hpp      |  12 +-
 .../KernighanLin_v2/kl_active_schedule.hpp    | 212 ++++++++-------
 .../LocalSearchMemoryConstraintModules.hpp    |  13 +-
 include/osp/bsp/scheduler/Scheduler.hpp       |   1 +
 include/osp/coarser/BspScheduleCoarser.hpp    |   5 +-
 include/osp/coarser/MultilevelCoarser.hpp     |  72 +++--
 .../IsomorphicSubgraphScheduler.hpp           |   9 -
 .../partitioners/partitioning_ILP.hpp         |  49 ++--
 18 files changed, 377 insertions(+), 531 deletions(-)
 create mode 100644 include/osp/auxiliary/return_status.hpp
 rename include/osp/bsp/model/{ => util}/SetSchedule.hpp (99%)
 rename include/osp/bsp/model/{ => util}/VectorSchedule.hpp (99%)

diff --git a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
index 86a9f1ea..80282f58 100644
--- a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
+++ b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
@@ -28,15 +28,16 @@ limitations under the License.
 #include <string>
 #include <vector>
 
-#include <boost/property_tree/json_parser.hpp>
-#include <boost/property_tree/ptree.hpp>
 #include "ConfigParser.hpp"
 #include "StatsModules/IStatsModule.hpp"
-#include "osp/bsp/model/BspInstance.hpp"
 #include "osp/auxiliary/io/arch_file_reader.hpp"
 #include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/auxiliary/return_status.hpp"
+#include "osp/bsp/model/BspInstance.hpp"
+#include <boost/property_tree/json_parser.hpp>
+#include <boost/property_tree/ptree.hpp>
 
-//#define EIGEN_FOUND 1
+// #define EIGEN_FOUND 1
 
 #ifdef EIGEN_FOUND
 #include <Eigen/Sparse>
@@ -83,7 +84,7 @@ class AbstractTestSuiteRunner {
 
             if (write_target_object_to_file) {
                 output_target_object_dir_path = parser.global_params.get_child("scheduleDirectory")
-                                                    .get_value<std::string>(); 
+                                                    .get_value<std::string>();
                 if (output_target_object_dir_path.substr(0, 1) != "/")
                     output_target_object_dir_path = executable_dir + output_target_object_dir_path;
                 if (!output_target_object_dir_path.empty() && !std::filesystem::exists(output_target_object_dir_path)) {
@@ -167,13 +168,13 @@ class AbstractTestSuiteRunner {
         }
     }
 
-    virtual RETURN_STATUS compute_target_object_impl(const BspInstance<GraphType> &instance, std::unique_ptr<TargetObjectType>& target_object,
-                                                        const pt::ptree &algo_config,  
-                                                        long long &computation_time_ms) = 0;
+    virtual RETURN_STATUS compute_target_object_impl(const BspInstance<GraphType> &instance, std::unique_ptr<TargetObjectType> &target_object,
+                                                     const pt::ptree &algo_config,
+                                                     long long &computation_time_ms) = 0;
 
     virtual void create_and_register_statistic_modules(const std::string &module_name) = 0;
 
-    virtual void write_target_object_hook(const TargetObjectType&, const std::string &, const std::string &,
+    virtual void write_target_object_hook(const TargetObjectType &, const std::string &, const std::string &,
                                           const std::string &) {
     } // default in case TargetObjectType cannot be written to file
 
@@ -268,12 +269,12 @@ class AbstractTestSuiteRunner {
                 SM_csc_int64 L_csc_int64{};
 
                 if constexpr (std::is_same_v<GraphType, sparse_matrix_graph_int32_t> || std::is_same_v<GraphType, sparse_matrix_graph_int64_t>) {
-                    if (ext != "mtx"){
+                    if (ext != "mtx") {
                         log_stream << "Error: Only .mtx file is accepted for SpTRSV" << std::endl;
                         return 0;
                     }
-                    
-                    if constexpr (std::is_same_v<GraphType, sparse_matrix_graph_int32_t>){
+
+                    if constexpr (std::is_same_v<GraphType, sparse_matrix_graph_int32_t>) {
                         graph_status = Eigen::loadMarket(L_csr_int32, filename_graph);
                         if (!graph_status) {
                             std::cerr << "Failed to read matrix from " << filename_graph << std::endl;
@@ -297,7 +298,7 @@ class AbstractTestSuiteRunner {
                     }
                 } else {
 #endif
-                graph_status = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag());
+                    graph_status = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag());
 
 #ifdef EIGEN_FOUND
                 }
@@ -309,22 +310,20 @@ class AbstractTestSuiteRunner {
 
                 for (auto &algorithm_config_pair : parser.scheduler) {
                     const pt::ptree &algo_config = algorithm_config_pair.second;
-                    
-        
 
                     std::string current_algo_name = algo_config.get_child("name").get_value<std::string>();
                     log_stream << "Start Algorithm " + current_algo_name + "\n";
 
                     long long computation_time_ms;
-                    std::unique_ptr<TargetObjectType> target_object; 
-                    
+                    std::unique_ptr<TargetObjectType> target_object;
+
                     RETURN_STATUS exec_status = compute_target_object_impl(bsp_instance, target_object, algo_config, computation_time_ms);
 
                     if (exec_status != RETURN_STATUS::OSP_SUCCESS && exec_status != RETURN_STATUS::BEST_FOUND) {
                         if (exec_status == RETURN_STATUS::ERROR)
                             log_stream << "Error computing with " << current_algo_name << "." << std::endl;
                         else if (exec_status == RETURN_STATUS::TIMEOUT)
-                            log_stream << "Scheduler " << current_algo_name << " timed out." << std::endl;                           
+                            log_stream << "Scheduler " << current_algo_name << " timed out." << std::endl;
                         continue;
                     }
 
diff --git a/include/osp/auxiliary/return_status.hpp b/include/osp/auxiliary/return_status.hpp
new file mode 100644
index 00000000..e5f0b870
--- /dev/null
+++ b/include/osp/auxiliary/return_status.hpp
@@ -0,0 +1,56 @@
+/*
+Copyright 2024 Huawei Technologies Co., Ltd.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
+*/
+
+#pragma once
+
+#include <iostream>
+
+namespace osp {
+
+enum class RETURN_STATUS { OSP_SUCCESS,
+                           BEST_FOUND,
+                           TIMEOUT,
+                           ERROR };
+
+/**
+ * @brief Converts the enum to a string literal.
+ * Returns const char* to avoid std::string allocation overhead.
+ */
+inline const char *to_string(const RETURN_STATUS status) {
+    switch (status) {
+    case RETURN_STATUS::OSP_SUCCESS:
+        return "SUCCESS";
+    case RETURN_STATUS::BEST_FOUND:
+        return "BEST FOUND";
+    case RETURN_STATUS::TIMEOUT:
+        return "TIMEOUT";
+    case RETURN_STATUS::ERROR:
+        return "ERROR";
+    default:
+        return "UNKNOWN";
+    }
+}
+
+/**
+ * @brief Stream operator overload using the helper function.
+ */
+inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) {
+    return os << to_string(status);
+}
+
+} // namespace osp
\ No newline at end of file
diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp
index 74872aae..32f37d0f 100644
--- a/include/osp/bsp/model/BspArchitecture.hpp
+++ b/include/osp/bsp/model/BspArchitecture.hpp
@@ -636,6 +636,7 @@ class BspArchitecture {
 
     /**
      * @brief Returns the send costs between two processors. Does not perform bounds checking.
+     * Does not the communication costs into account.
      *
      * @param p1 The index of the first processor.
      * @param p2 The index of the second processor.
diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp
index e5a1ac3a..914c6fdc 100644
--- a/include/osp/bsp/model/BspInstance.hpp
+++ b/include/osp/bsp/model/BspInstance.hpp
@@ -27,37 +27,6 @@ limitations under the License.
 
 namespace osp {
 
-enum class RETURN_STATUS { OSP_SUCCESS,
-                           BEST_FOUND,
-                           TIMEOUT,
-                           ERROR };
-
-/**
- * @brief Converts the enum to a string literal.
- * Returns const char* to avoid std::string allocation overhead.
- */
-inline const char *to_string(const RETURN_STATUS status) {
-    switch (status) {
-    case RETURN_STATUS::OSP_SUCCESS:
-        return "SUCCESS";
-    case RETURN_STATUS::BEST_FOUND:
-        return "BEST FOUND";
-    case RETURN_STATUS::TIMEOUT:
-        return "TIMEOUT";
-    case RETURN_STATUS::ERROR:
-        return "ERROR";
-    default:
-        return "UNKNOWN";
-    }
-}
-
-/**
- * @brief Stream operator overload using the helper function.
- */
-inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) {
-    return os << to_string(status);
-}
-
 /**
  * @class BspInstance
  * @brief Represents an instance of the BSP (Bulk Synchronous Parallel) model.
@@ -65,32 +34,34 @@ inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) {
  * The BspInstance class encapsulates the computational DAG (Directed Acyclic Graph) and the BSP architecture
  * for a specific instance of the BSP model. It provides methods to access and modify the architecture and DAG,
  * as well as retrieve information about the instance such as the number of vertices and processors.
+ *
+ * The instance specifies the compatibility between node types and processor types.
+ *
+ * @tparam Graph_t The type of the computational DAG.
  */
 template<typename Graph_t>
 class BspInstance {
-
-    static_assert(is_computational_dag_v<Graph_t>, "BspSchedule can only be used with computational DAGs.");
+    static_assert(is_computational_dag_v<Graph_t>, "BspInstance can only be used with computational DAGs.");
 
   private:
+    /**
+     * @brief  The computational DAG of the instance. Holds the graph structure and the node types, work, memory, communication weights.
+     */
     Graph_t cdag;
+    /**
+     * @brief The BSP architecture of the instance. Holds the processor types and the memory bounds. Communication and synchronization cost. And the send cost between processors.
+     */
     BspArchitecture<Graph_t> architecture;
 
-    // for problem instances with heterogeneity
-    std::vector<std::vector<bool>> nodeProcessorCompatibility = std::vector<std::vector<bool>>({{true}});
-
     /**
-     * @brief Calculates the maximum memory bound for each processor type.
+     * @brief Stores the compatibility between node types and processor types.
      *
-     * @return A vector where the index corresponds to the processor type and the value is the maximum memory bound for that type.
+     * The architecture defines a type for each processor, and the dag defines a type for each node.
+     * This matrix stores for each node type and processor type whether they are compatible, i.e.,
+     * if a node of the can be assigned to a processor of the given type in a schedule.
+     * @note The outer vector is indexed by node type, the inner vector is indexed by processor type.
      */
-    std::vector<v_memw_t<Graph_t>> calculateMaxMemoryPerProcessorType() const {
-        std::vector<v_memw_t<Graph_t>> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0);
-        for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) {
-            max_memory_per_proc_type[architecture.processorType(proc)] =
-                std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
-        }
-        return max_memory_per_proc_type;
-    }
+    std::vector<std::vector<bool>> nodeProcessorCompatibility = std::vector<std::vector<bool>>({{true}});
 
   public:
     /**
@@ -100,6 +71,7 @@ class BspInstance {
 
     /**
      * @brief Constructs a BspInstance object with the specified computational DAG and BSP architecture.
+     * Computational DAG and BSP architecture are copied!
      *
      * @param cdag The computational DAG for the instance.
      * @param architecture The BSP architecture for the instance.
@@ -110,6 +82,7 @@ class BspInstance {
 
     /**
      * @brief Constructs a BspInstance object with the specified computational DAG and BSP architecture.
+     * Computational DAG and BSP architecture are moved!
      *
      * @param cdag The computational DAG for the instance.
      * @param architecture The BSP architecture for the instance.
@@ -133,9 +106,7 @@ class BspInstance {
     BspInstance<Graph_t> &operator=(BspInstance<Graph_t> &&other) noexcept = default;
 
     /**
-     * @brief Returns a reference to the BSP architecture for the instance.
-     *
-     * @return A reference to the BSP architecture for the instance.
+     * @brief Returns a reference to the BSP architecture of the instance.
      */
     [[nodiscard]] const BspArchitecture<Graph_t> &getArchitecture() const { return architecture; }
     [[nodiscard]] BspArchitecture<Graph_t> &getArchitecture() { return architecture; }
@@ -148,58 +119,48 @@ class BspInstance {
     void setArchitecture(const BspArchitecture<Graph_t> &architechture_) { architecture = architechture_; }
 
     /**
-     * @brief Returns a reference to the computational DAG for the instance.
-     *
-     * @return A reference to the computational DAG for the instance.
+     * @brief Returns a reference to the computational DAG of the instance.
      */
     [[nodiscard]] const Graph_t &getComputationalDag() const { return cdag; }
     [[nodiscard]] Graph_t &getComputationalDag() { return cdag; }
 
     /**
      * @brief Returns the number of vertices in the computational DAG.
-     *
-     * @return The number of vertices.
      */
     [[nodiscard]] vertex_idx_t<Graph_t> numberOfVertices() const { return cdag.num_vertices(); }
 
     /**
      * @brief Returns a view over the vertex indices of the computational DAG.
-     * @return A view over the vertex indices.
      */
     [[nodiscard]] auto vertices() const { return cdag.vertices(); }
 
     /**
      * @brief Returns a view over the processor indices of the BSP architecture.
-     * @return A view over the processor indices.
      */
     [[nodiscard]] auto processors() const { return architecture.processors(); }
 
     /**
      * @brief Returns the number of processors in the BSP architecture.
-     * @return The number of processors in the BSP architecture.
      */
     [[nodiscard]] unsigned numberOfProcessors() const { return architecture.numberOfProcessors(); }
 
     /**
-     * @brief Returns the communication costs between two processors.
+     * @brief Returns the communication costs between two processors. Does not perform bounds checking.
      * The communication costs are the send costs multiplied by the communication costs.
      *
      * @param p_send The index of the sending processor.
      * @param p_receive The index of the receiving processor.
-     *
-     * @return The communication costs between the two processors.
      */
     [[nodiscard]] v_commw_t<Graph_t> communicationCosts(const unsigned p_send, const unsigned p_receive) const {
         return architecture.communicationCosts(p_send, p_receive);
     }
 
     /**
-     * @brief Returns the send costs between two processors.
+     * @brief Returns the send costs between two processors. Does not perform bounds checking.
+     * Does not the communication costs into account.
      *
      * @param p_send The index of the sending processor.
      * @param p_receive The index of the receiving processor.
-     *
-     * @return The send costs between the two processors.
      */
     [[nodiscard]] v_commw_t<Graph_t> sendCosts(const unsigned p_send, const unsigned p_receive) const {
         return architecture.sendCosts(p_send, p_receive);
@@ -207,14 +168,11 @@ class BspInstance {
 
     /**
      * @brief Returns a copy of the send costs matrix.
-     * @return A copy of the send costs matrix.
      */
     [[nodiscard]] std::vector<std::vector<v_commw_t<Graph_t>>> sendCosts() const { return architecture.sendCosts(); }
 
     /**
      * @brief Returns the flattened send costs vector.
-     *
-     * @return The flattened send costs vector.
      */
     [[nodiscard]] const std::vector<v_commw_t<Graph_t>> &sendCostsVector() const {
         return architecture.sendCostsVector();
@@ -222,89 +180,51 @@ class BspInstance {
 
     /**
      * @brief Returns the communication costs of the BSP architecture.
-     *
-     * @return The communication costs as an unsigned integer.
      */
     [[nodiscard]] v_commw_t<Graph_t> communicationCosts() const { return architecture.communicationCosts(); }
 
     /**
      * @brief Returns the synchronization costs of the BSP architecture.
-     *
-     * @return The synchronization costs as an unsigned integer.
      */
     [[nodiscard]] v_commw_t<Graph_t> synchronisationCosts() const { return architecture.synchronisationCosts(); }
 
-    /**
-     * @brief Returns whether the architecture is NUMA.
-     *
-     * @return True if the architecture is NUMA, false otherwise.
-     */
-    [[nodiscard]] bool isNumaInstance() const { return architecture.isNumaArchitecture(); }
-
     /**
      * @brief Returns the memory bound for a specific processor.
      *
      * @param proc The processor index.
-     * @return The memory bound for the processor.
      */
     [[nodiscard]] v_memw_t<Graph_t> memoryBound(const unsigned proc) const { return architecture.memoryBound(proc); }
 
-    /**
-     * @brief Returns the maximum memory bound for a specific processor type.
-     *
-     * @param procType The processor type.
-     * @return The maximum memory bound for the processor type.
-     */
-    [[nodiscard]] v_memw_t<Graph_t> maxMemoryBoundProcType(const unsigned procType) const {
-        return architecture.maxMemoryBoundProcType(procType);
-    }
-
-    /**
-     * @brief Returns the maximum memory bound for a specific node type.
-     *
-     * This considers all compatible processor types for the given node type.
-     *
-     * @param nodeType The node type.
-     * @return The maximum memory bound for the node type.
-     */
-    [[nodiscard]] v_memw_t<Graph_t> maxMemoryBoundNodeType(const unsigned nodeType) const {
-        int max_mem = 0;
-        for (unsigned proc = 0; proc < architecture.getNumberOfProcessorTypes(); proc++) {
-            if (isCompatibleType(nodeType, architecture.processorType(proc))) {
-                max_mem = std::max(max_mem, architecture.memoryBound(proc));
-            }
-        }
-        return max_mem;
-    }
-
     /**
      * @brief Sets the communication costs of the BSP architecture.
-     *
      * @param cost The communication costs to set.
      */
     void setCommunicationCosts(const v_commw_t<Graph_t> cost) { architecture.setCommunicationCosts(cost); }
 
     /**
      * @brief Sets the synchronisation costs of the BSP architecture.
-     *
      * @param cost The synchronisation costs to set.
      */
     void setSynchronisationCosts(const v_commw_t<Graph_t> cost) { architecture.setSynchronisationCosts(cost); }
 
     /**
-     * @brief Sets the number of processors in the BSP architecture.
-     *
-     * @param num The number of processors to set.
+     * @brief Sets the number of processors. Processor type is set to 0 for all processors.
+     * Resets send costs to uniform (1) and diagonal to 0. The memory bound is set to 100 for all processors.
+     * @param numberOfProcessors The number of processors. Must be greater than 0.
+     * @throws std::invalid_argument if the number of processors is 0.
      */
     void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); }
 
     /**
-     * @brief Checks if the memory constraints are feasible for the given instance.
-     *
+     * @brief Returns false if there is a node whose weight does not fit on any of its compatible processors.
      * @return True if the memory constraints are feasible, false otherwise.
      */
     [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const {
-        const auto max_memory_per_proc_type = calculateMaxMemoryPerProcessorType();
+        std::vector<v_memw_t<Graph_t>> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0);
+        for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) {
+            max_memory_per_proc_type[architecture.processorType(proc)] =
+                std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
+        }
 
         for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) {
             v_memw_t<Graph_t> max_memory_of_type = max_memory_weight(vertType, cdag);
@@ -326,53 +246,13 @@ class BspInstance {
     }
 
     /**
-     * @brief Adjusts the memory constraints of the architecture to ensure feasibility.
-     *
-     * If a node type requires more memory than available on any compatible processor type,
-     * the memory bound of compatible processors is increased.
-     */
-    void adjust_memory_constraints() {
-        const auto max_memory_per_proc_type = calculateMaxMemoryPerProcessorType();
-
-        for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) {
-            v_memw_t<Graph_t> max_memory_of_type = max_memory_weight(vertType, cdag);
-            bool fits = false;
-
-            for (unsigned proc_type = 0; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) {
-                if (isCompatibleType(vertType, proc_type)) {
-                    fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]);
-                    if (fits)
-                        break;
-                }
-            }
-
-            if (!fits) {
-                std::cout << "Warning: Computational DAG memory weight exceeds architecture memory bound." << std::endl;
-                std::cout << "VertexType " << vertType << " has memory "
-                          << " and exceeds compatible processor types memory limit." << std::endl;
-
-                for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) {
-                    if (isCompatibleType(vertType, architecture.processorType(proc))) {
-                        std::cout << "Increasing memory of processor " << proc << " of type "
-                                  << architecture.processorType(proc) << " to " << max_memory_of_type << "."
-                                  << std::endl;
-                        architecture.setMemoryBound(max_memory_of_type, proc);
-                    }
-                }
-            }
-        }
-    }
-
-    /**
-     * @brief Returns the processor type for a given processor index.
-     *
+     * @brief Returns the processor type for a given processor index. Does not perform bounds checking.
      * @param proc The processor index.
-     * @return The processor type.
      */
     [[nodiscard]] v_type_t<Graph_t> processorType(const unsigned proc) const { return architecture.processorType(proc); }
 
     /**
-     * @brief Checks if a node is compatible with a processor.
+     * @brief Checks if a node is compatible with a processor. Does not perform bounds checking.
      *
      * @param node The node index.
      * @param processor_id The processor index.
@@ -383,7 +263,7 @@ class BspInstance {
     }
 
     /**
-     * @brief Checks if a node type is compatible with a processor type.
+     * @brief Checks if a node type is compatible with a processor type. Does not perform bounds checking.
      *
      * @param nodeType The node type.
      * @param processorType The processor type.
@@ -394,46 +274,37 @@ class BspInstance {
     }
 
     /**
-     * @brief Sets the node-processor compatibility matrix.
-     *
+     * @brief Sets the node-processor compatibility matrix. The matrix is copied.
      * @param compatibility_ The compatibility matrix.
+     * @throw std::runtime_error if the compatibility matrix size does not match the number of node types and processor types.
      */
     void setNodeProcessorCompatibility(const std::vector<std::vector<bool>> &compatibility_) {
+        if (compatibility_.size() < cdag.num_vertex_types() || compatibility_[0].size() < architecture.getNumberOfProcessorTypes()) {
+            throw std::runtime_error("Compatibility matrix size does not match the number of node types and processor types.");
+        }
         nodeProcessorCompatibility = compatibility_;
     }
 
     /**
-     * @brief Returns the node-processor compatibility matrix.
-     *
-     * @return The node-processor compatibility matrix.
+     * @brief Returns the node type - processor type compatibility matrix.
      */
     [[nodiscard]] const std::vector<std::vector<bool>> &getProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; }
 
     /**
-     * @brief Sets the compatibility matrix to be diagonal.
-     *
-     * This implies that node type `i` is only compatible with processor type `i`.
-     *
+     * @brief Sets the compatibility matrix to be diagonal. This implies that node type `i` is only compatible with processor type `i`.
      * @param number_of_types The number of types.
      */
     void setDiagonalCompatibilityMatrix(const unsigned number_of_types) {
-        nodeProcessorCompatibility =
-            std::vector<std::vector<bool>>(number_of_types, std::vector<bool>(number_of_types, false));
+        nodeProcessorCompatibility.assign(number_of_types, std::vector<bool>(number_of_types, false));
         for (unsigned i = 0; i < number_of_types; ++i)
             nodeProcessorCompatibility[i][i] = true;
     }
 
     /**
-     * @brief Sets the compatibility matrix to all ones.
-     *
-     * This implies that all node types are compatible with all processor types.
+     * @brief Sets the compatibility matrix to all ones. This implies that all node types are compatible with all processor types.
      */
     void setAllOnesCompatibilityMatrix() {
-        unsigned number_of_node_types = cdag.num_vertex_types();
-        unsigned number_of_proc_types = architecture.getNumberOfProcessorTypes();
-
-        nodeProcessorCompatibility =
-            std::vector<std::vector<bool>>(number_of_node_types, std::vector<bool>(number_of_proc_types, true));
+        nodeProcessorCompatibility.assign(cdag.num_vertex_types(), std::vector<bool>(architecture.getNumberOfProcessorTypes(), true));
     }
 
     /**
@@ -454,33 +325,8 @@ class BspInstance {
         return compatibleProcTypes;
     }
 
-    /**
-     * @brief Returns a compatibility matrix between node types.
-     *
-     * Two node types are compatible if they share at least one compatible processor type.
-     *
-     * @return A matrix where `[i][j]` is true if node type `i` and node type `j` are compatible.
-     */
-    [[nodiscard]] std::vector<std::vector<bool>> getNodeNodeCompatabilityMatrix() const {
-        std::vector<std::vector<bool>> compMat(cdag.num_vertex_types(),
-                                               std::vector<bool>(cdag.num_vertex_types(), false));
-        for (unsigned nodeType1 = 0; nodeType1 < cdag.num_vertex_types(); nodeType1++) {
-            for (unsigned nodeType2 = 0; nodeType2 < cdag.num_vertex_types(); nodeType2++) {
-                for (unsigned procType = 0; procType < architecture.getNumberOfProcessorTypes(); procType++) {
-                    if (isCompatibleType(nodeType1, procType) && isCompatibleType(nodeType2, procType)) {
-                        compMat[nodeType1][nodeType2] = true;
-                        break;
-                    }
-                }
-            }
-        }
-        return compMat;
-    }
-
     /**
      * @brief Returns the node-processor compatibility matrix.
-     *
-     * @return The node-processor compatibility matrix.
      */
     [[nodiscard]] const std::vector<std::vector<bool>> &getNodeProcessorCompatibilityMatrix() const {
         return nodeProcessorCompatibility;
diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp
index 7a02d0f3..9e5a5d52 100644
--- a/include/osp/bsp/model/BspSchedule.hpp
+++ b/include/osp/bsp/model/BspSchedule.hpp
@@ -25,8 +25,8 @@ limitations under the License.
 
 #include "IBspSchedule.hpp"
 #include "IBspScheduleEval.hpp"
-#include "SetSchedule.hpp"
 #include "osp/bsp/model/cost/LazyCommunicationCost.hpp"
+#include "osp/bsp/model/util/SetSchedule.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 
 namespace osp {
@@ -213,7 +213,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param node The node for which to return the assigned superstep.
      * @return The superstep assigned to the specified node.
      */
-    [[nodiscard]] unsigned assignedSuperstep(vertex_idx node) const override { return node_to_superstep_assignment[node]; }
+    [[nodiscard]] unsigned assignedSuperstep(const vertex_idx node) const override { return node_to_superstep_assignment[node]; }
 
     /**
      * @brief Returns the processor assigned to the specified node.
@@ -221,7 +221,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param node The node for which to return the assigned processor.
      * @return The processor assigned to the specified node.
      */
-    [[nodiscard]] unsigned assignedProcessor(vertex_idx node) const override { return node_to_processor_assignment[node]; }
+    [[nodiscard]] unsigned assignedProcessor(const vertex_idx node) const override { return node_to_processor_assignment[node]; }
 
     /**
      * @brief Returns the superstep assignment for the schedule.
@@ -254,7 +254,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param node The node for which to set the assigned superstep.
      * @param superstep The superstep to assign to the node.
      */
-    void setAssignedSuperstep(vertex_idx node, unsigned superstep) {
+    void setAssignedSuperstep(const vertex_idx node, const unsigned superstep) {
         if (node < instance->numberOfVertices()) {
             node_to_superstep_assignment[node] = superstep;
 
@@ -273,7 +273,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param node The node for which to set the assigned superstep.
      * @param superstep The superstep to assign to the node.
      */
-    void setAssignedSuperstepNoUpdateNumSuperstep(vertex_idx node, unsigned superstep) {
+    void setAssignedSuperstepNoUpdateNumSuperstep(const vertex_idx node, const unsigned superstep) {
         node_to_superstep_assignment.at(node) = superstep;
     }
 
@@ -283,7 +283,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param node The node for which to set the assigned processor.
      * @param processor The processor to assign to the node.
      */
-    void setAssignedProcessor(vertex_idx node, unsigned processor) {
+    void setAssignedProcessor(const vertex_idx node, const unsigned processor) {
         node_to_processor_assignment.at(node) = processor;
     }
 
@@ -477,7 +477,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param processor The processor index.
      * @return A vector of nodes assigned to the specified processor.
      */
-    [[nodiscard]] std::vector<vertex_idx_t<Graph_t>> getAssignedNodeVector(unsigned int processor) const {
+    [[nodiscard]] std::vector<vertex_idx_t<Graph_t>> getAssignedNodeVector(const unsigned processor) const {
         std::vector<vertex_idx_t<Graph_t>> vec;
 
         for (const auto &node : instance->vertices()) {
@@ -496,7 +496,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param superstep The superstep index.
      * @return A vector of nodes assigned to the specified processor and superstep.
      */
-    [[nodiscard]] std::vector<vertex_idx_t<Graph_t>> getAssignedNodeVector(unsigned int processor, unsigned int superstep) const {
+    [[nodiscard]] std::vector<vertex_idx_t<Graph_t>> getAssignedNodeVector(const unsigned processor, const unsigned superstep) const {
         std::vector<vertex_idx_t<Graph_t>> vec;
 
         for (const auto &node : instance->vertices()) {
@@ -513,7 +513,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      *
      * @param number_of_supersteps_ The number of supersteps.
      */
-    void setNumberOfSupersteps(unsigned int number_of_supersteps_) {
+    void setNumberOfSupersteps(const unsigned number_of_supersteps_) {
         number_of_supersteps = number_of_supersteps_;
     }
 
@@ -523,7 +523,7 @@ class BspSchedule : public IBspSchedule<Graph_t>, public IBspScheduleEval<Graph_
      * @param processor The processor index.
      * @return The number of nodes assigned to the specified processor.
      */
-    [[nodiscard]] unsigned numAssignedNodes(unsigned processor) const {
+    [[nodiscard]] unsigned numAssignedNodes(const unsigned processor) const {
         unsigned num = 0;
 
         for (const auto &node : instance->vertices()) {
diff --git a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp
index a4c5800a..c4d8df30 100644
--- a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp
+++ b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp
@@ -18,6 +18,7 @@ limitations under the License.
 
 #pragma once
 
+#include "osp/bsp/model/BspInstance.hpp"
 #include <vector>
 
 namespace osp {
@@ -33,7 +34,7 @@ namespace osp {
 template<typename Graph_t>
 class CompatibleProcessorRange {
 
-    std::vector<std::vector<unsigned>> type_processor_idx;
+    std::vector<std::vector<unsigned>> typeProcessorIdx;
     const BspInstance<Graph_t> *instance = nullptr;
 
   public:
@@ -60,13 +61,12 @@ class CompatibleProcessorRange {
         instance = &inst;
 
         if constexpr (has_typed_vertices_v<Graph_t>) {
-
-            type_processor_idx = std::vector<std::vector<unsigned>>(inst.getComputationalDag().num_vertex_types());
+            typeProcessorIdx.resize(inst.getComputationalDag().num_vertex_types());
 
             for (v_type_t<Graph_t> v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) {
                 for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++)
                     if (inst.isCompatibleType(v_type, inst.processorType(proc)))
-                        type_processor_idx[v_type].push_back(proc);
+                        typeProcessorIdx[v_type].push_back(proc);
             }
         }
     }
@@ -77,11 +77,10 @@ class CompatibleProcessorRange {
      * @param type The node type.
      * @return A const reference to a vector of compatible processor indices.
      */
-    [[nodiscard]] const auto &compatible_processors_type(v_type_t<Graph_t> type) const {
+    [[nodiscard]] const auto &compatible_processors_type(const v_type_t<Graph_t> type) const {
         assert(instance != nullptr);
-
         if constexpr (has_typed_vertices_v<Graph_t>) {
-            return type_processor_idx[type];
+            return typeProcessorIdx[type];
         } else {
             return instance->processors();
         }
@@ -93,7 +92,8 @@ class CompatibleProcessorRange {
      * @param vertex The vertex index.
      * @return A const reference to a vector of compatible processor indices.
      */
-    [[nodiscard]] const auto &compatible_processors_vertex(vertex_idx_t<Graph_t> vertex) const {
+    [[nodiscard]] const auto &compatible_processors_vertex(const vertex_idx_t<Graph_t> vertex) const {
+        assert(instance != nullptr);
         return compatible_processors_type(instance->getComputationalDag().vertex_type(vertex));
     }
 };
diff --git a/include/osp/bsp/model/SetSchedule.hpp b/include/osp/bsp/model/util/SetSchedule.hpp
similarity index 99%
rename from include/osp/bsp/model/SetSchedule.hpp
rename to include/osp/bsp/model/util/SetSchedule.hpp
index da851f98..61946fae 100644
--- a/include/osp/bsp/model/SetSchedule.hpp
+++ b/include/osp/bsp/model/util/SetSchedule.hpp
@@ -18,7 +18,7 @@ limitations under the License.
 
 #pragma once
 
-#include "IBspSchedule.hpp"
+#include "osp/bsp/model/IBspSchedule.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 
 namespace osp {
diff --git a/include/osp/bsp/model/VectorSchedule.hpp b/include/osp/bsp/model/util/VectorSchedule.hpp
similarity index 99%
rename from include/osp/bsp/model/VectorSchedule.hpp
rename to include/osp/bsp/model/util/VectorSchedule.hpp
index a81cc3e5..ea856c1b 100644
--- a/include/osp/bsp/model/VectorSchedule.hpp
+++ b/include/osp/bsp/model/util/VectorSchedule.hpp
@@ -18,7 +18,7 @@ limitations under the License.
 
 #pragma once
 
-#include "IBspSchedule.hpp"
+#include "osp/bsp/model/IBspSchedule.hpp"
 #include "osp/concepts/computational_dag_concept.hpp"
 #include <vector>
 
diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp
index aa199c45..45b58ca3 100644
--- a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp
+++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp
@@ -21,14 +21,14 @@ limitations under the License.
 #include <callbackbase.h>
 #include <coptcpp_pch.h>
 
+#include "osp/auxiliary/io/DotFileWriter.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/BspScheduleCS.hpp"
 #include "osp/bsp/model/BspScheduleRecomp.hpp"
 #include "osp/bsp/model/MaxBspSchedule.hpp"
 #include "osp/bsp/model/MaxBspScheduleCS.hpp"
-#include "osp/bsp/model/VectorSchedule.hpp"
+#include "osp/bsp/model/util/VectorSchedule.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
-#include "osp/auxiliary/io/DotFileWriter.hpp"
 
 namespace osp {
 
@@ -111,17 +111,19 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
                         if (allow_recomputation_cb) {
 
-                        auto sched = constructBspScheduleRecompFromCallback();
-                        DotFileWriter sched_writer;
-                        sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" +
-                                            std::to_string(counter) + "_schedule.dot", sched);
+                            auto sched = constructBspScheduleRecompFromCallback();
+                            DotFileWriter sched_writer;
+                            sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" +
+                                                                   std::to_string(counter) + "_schedule.dot",
+                                                               sched);
 
                         } else {
 
-                        BspSchedule<Graph_t> sched = constructBspScheduleFromCallback();
-                        DotFileWriter sched_writer;
-                        sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" +
-                                                   std::to_string(counter) + "_schedule.dot", sched);
+                            BspSchedule<Graph_t> sched = constructBspScheduleFromCallback();
+                            DotFileWriter sched_writer;
+                            sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" +
+                                                            std::to_string(counter) + "_schedule.dot",
+                                                        sched);
                         }
                         counter++;
                     }
@@ -259,7 +261,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             }
         }
 
-        if(is_max_bsp && number_of_supersteps>0) // can ignore last 2 comm phases in this case
+        if (is_max_bsp && number_of_supersteps > 0) // can ignore last 2 comm phases in this case
             --number_of_supersteps;
 
         schedule.getCommunicationSchedule().clear();
@@ -268,7 +270,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
                 for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) {
                     if (p_from != p_to) {
-                        for (unsigned int step = 0; step < number_of_supersteps-1; step++) {
+                        for (unsigned int step = 0; step < number_of_supersteps - 1; step++) {
                             if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step]
                                                                               [static_cast<int>(node)]
                                                                                   .Get(COPT_DBLINFO_VALUE) >= .99) {
@@ -302,7 +304,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
             for (unsigned processor = 0; processor < schedule.getInstance().numberOfProcessors(); processor++) {
 
-                for (unsigned step = 0; step < number_of_supersteps-1; step++) {
+                for (unsigned step = 0; step < number_of_supersteps - 1; step++) {
 
                     if (node_to_processor_superstep_var[node][processor][static_cast<int>(step)].Get(COPT_DBLINFO_VALUE) >= .99) {
                         schedule.assignments(node).emplace_back(processor, step);
@@ -334,46 +336,35 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         }
     }
 
-
     void loadInitialSchedule(Model &model, const BspInstance<Graph_t> &instance) {
 
         if (use_initial_schedule_recomp &&
             (max_number_supersteps < initial_schedule_recomp->numberOfSupersteps() ||
-            instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() ||
-            instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) {
+             instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() ||
+             instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) {
             throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not "
                                         "agree with those of the initial schedule's instance!");
         }
 
         if (!use_initial_schedule_recomp & use_initial_schedule &&
             (max_number_supersteps < initial_schedule->numberOfSupersteps() ||
-            instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() ||
-            instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
+             instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() ||
+             instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) {
             throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not "
                                         "agree with those of the initial schedule's instance!");
         }
 
-        const auto& DAG = use_initial_schedule_recomp ?
-                        initial_schedule_recomp->getInstance().getComputationalDag() :
-                        initial_schedule->getInstance().getComputationalDag();
+        const auto &DAG = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getComputationalDag() : initial_schedule->getInstance().getComputationalDag();
 
-        const auto& arch = use_initial_schedule_recomp ?
-                        initial_schedule_recomp->getInstance().getArchitecture() :
-                        initial_schedule->getInstance().getArchitecture();
+        const auto &arch = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getArchitecture() : initial_schedule->getInstance().getArchitecture();
 
-        const unsigned& num_processors = use_initial_schedule_recomp ?
-                        initial_schedule_recomp->getInstance().numberOfProcessors() :
-                        initial_schedule->getInstance().numberOfProcessors();
+        const unsigned &num_processors = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().numberOfProcessors() : initial_schedule->getInstance().numberOfProcessors();
 
-        const unsigned& num_supersteps = use_initial_schedule_recomp ?
-                        initial_schedule_recomp->numberOfSupersteps() :
-                        initial_schedule->numberOfSupersteps();
+        const unsigned &num_supersteps = use_initial_schedule_recomp ? initial_schedule_recomp->numberOfSupersteps() : initial_schedule->numberOfSupersteps();
 
-        const auto &cs = use_initial_schedule_recomp ?
-                        initial_schedule_recomp->getCommunicationSchedule() :
-                        initial_schedule->getCommunicationSchedule();
+        const auto &cs = use_initial_schedule_recomp ? initial_schedule_recomp->getCommunicationSchedule() : initial_schedule->getCommunicationSchedule();
 
-        assert(max_number_supersteps <= static_cast<unsigned>( std::numeric_limits<int>::max()) );
+        assert(max_number_supersteps <= static_cast<unsigned>(std::numeric_limits<int>::max()));
         for (unsigned step = 0; step < max_number_supersteps; step++) {
 
             if (step < num_supersteps) {
@@ -387,28 +378,23 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             // model.SetMipStart(max_comm_superstep_var[step], COPT_INFINITY);
         }
 
-        std::vector<std::set<std::pair<unsigned, unsigned> > > computed(DAG.num_vertices());
-        for (const auto &node : DAG.vertices())
-        {
-            if(use_initial_schedule_recomp)
-                for (const std::pair<unsigned, unsigned>& assignment : initial_schedule_recomp->assignments(node))
+        std::vector<std::set<std::pair<unsigned, unsigned>>> computed(DAG.num_vertices());
+        for (const auto &node : DAG.vertices()) {
+            if (use_initial_schedule_recomp)
+                for (const std::pair<unsigned, unsigned> &assignment : initial_schedule_recomp->assignments(node))
                     computed[node].emplace(assignment);
             else
-                computed[node].emplace(initial_schedule->assignedProcessor(node),initial_schedule->assignedSuperstep(node));
+                computed[node].emplace(initial_schedule->assignedProcessor(node), initial_schedule->assignedSuperstep(node));
         }
 
-        std::vector<std::vector<unsigned> > first_at(DAG.num_vertices(), std::vector<unsigned>(num_processors, std::numeric_limits<unsigned>::max()));
-        for (const auto &node : DAG.vertices())
-        {
-            if(use_initial_schedule_recomp)
-            {
-                for (const std::pair<unsigned, unsigned>& assignment : initial_schedule_recomp->assignments(node))
+        std::vector<std::vector<unsigned>> first_at(DAG.num_vertices(), std::vector<unsigned>(num_processors, std::numeric_limits<unsigned>::max()));
+        for (const auto &node : DAG.vertices()) {
+            if (use_initial_schedule_recomp) {
+                for (const std::pair<unsigned, unsigned> &assignment : initial_schedule_recomp->assignments(node))
                     first_at[node][assignment.first] = std::min(first_at[node][assignment.first], assignment.second);
-            }
-            else
-            {
+            } else {
                 first_at[node][initial_schedule->assignedProcessor(node)] = std::min(first_at[node][initial_schedule->assignedProcessor(node)],
-                                                                                    initial_schedule->assignedSuperstep(node) );
+                                                                                     initial_schedule->assignedSuperstep(node));
             }
         }
 
@@ -431,7 +417,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
                                         comm_processor_to_processor_superstep_node_var[p1][p2][step]
                                                                                       [static_cast<int>(node)],
                                         1);
-                                        first_at[node][p2] = std::min(first_at[node][p2], step+staleness);
+                                    first_at[node][p2] = std::min(first_at[node][p2], step + staleness);
                                 } else {
                                     model.SetMipStart(
                                         comm_processor_to_processor_superstep_node_var[p1][p2][step]
@@ -447,14 +433,15 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
 
         for (const auto &node : DAG.vertices())
             for (unsigned proc = 0; proc < num_processors; proc++)
-                for(unsigned step = 0; step < max_number_supersteps; step++)
-                {
-                    if(step >= first_at[node][proc])
+                for (unsigned step = 0; step < max_number_supersteps; step++) {
+                    if (step >= first_at[node][proc])
                         model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step]
-                                                                                        [static_cast<int>(node)], 1);
+                                                                                        [static_cast<int>(node)],
+                                          1);
                     else
                         model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step]
-                                                                                        [static_cast<int>(node)], 0);
+                                                                                        [static_cast<int>(node)],
+                                          0);
                 }
 
         for (const auto &node : DAG.vertices()) {
@@ -478,16 +465,13 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
             max_number_supersteps,
             std::vector<v_workw_t<Graph_t>>(num_processors, 0));
 
-        if(use_initial_schedule_recomp)
-        {
+        if (use_initial_schedule_recomp) {
             for (const auto &node : initial_schedule_recomp->getInstance().vertices()) {
-                for (const std::pair<unsigned, unsigned>& assignment : initial_schedule_recomp->assignments(node)) {
+                for (const std::pair<unsigned, unsigned> &assignment : initial_schedule_recomp->assignments(node)) {
                     work[assignment.second][assignment.first] += DAG.vertex_work_weight(node);
                 }
             }
-        }
-        else
-        {
+        } else {
             for (const auto &node : initial_schedule->getInstance().vertices())
                 work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] +=
                     DAG.vertex_work_weight(node);
@@ -544,15 +528,14 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
        Variables
        */
 
-        assert(max_number_supersteps <= static_cast<unsigned>( std::numeric_limits<int>::max() ));
-        assert(instance.numberOfProcessors() <= static_cast<unsigned>( std::numeric_limits<int>::max()) );
+        assert(max_number_supersteps <= static_cast<unsigned>(std::numeric_limits<int>::max()));
+        assert(instance.numberOfProcessors() <= static_cast<unsigned>(std::numeric_limits<int>::max()));
 
         // variables indicating if superstep is used at all
         superstep_used_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "superstep_used");
 
         VarArray superstep_has_comm, mergeable_superstep_penalty;
-        if(is_max_bsp)
-        {
+        if (is_max_bsp) {
             // variables indicating if there is any communication in superstep
             superstep_has_comm = model.AddVars(static_cast<int>(max_number_supersteps), COPT_BINARY, "superstep_has_comm");
             // variables that incentivize the schedule to be continuous - needs to be done differently for maxBsp
@@ -676,13 +659,12 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
                     if (step > 0) {
 
                         for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
-                            if(!is_max_bsp || p_from == processor){
+                            if (!is_max_bsp || p_from == processor) {
                                 expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1]
-                                                                                   [static_cast<int>(node)];
-                            }
-                            else if(step > 1){
+                                                                                       [static_cast<int>(node)];
+                            } else if (step > 1) {
                                 expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 2]
-                                                                                   [static_cast<int>(node)];
+                                                                                       [static_cast<int>(node)];
                             }
                         }
                     }
@@ -700,26 +682,25 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         }
 
         // synchronization cost calculation & forcing continuous schedule in maxBsp
-        if(is_max_bsp)
-        {
+        if (is_max_bsp) {
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
                 Expr expr;
                 for (const auto &node : instance.vertices()) {
                     for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) {
                         for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) {
-                            if(p_from != p_to)
+                            if (p_from != p_to)
                                 expr += comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast<int>(node)];
                         }
                     }
                 }
                 model.AddConstr(static_cast<unsigned>(instance.numberOfProcessors() * instance.numberOfProcessors() * instance.numberOfVertices()) *
-                                superstep_has_comm[static_cast<int>(step)] >= expr);
+                                    superstep_has_comm[static_cast<int>(step)] >=
+                                expr);
             }
 
             // if step i and (i+1) has no comm, and (i+2) has work, then (i+1) and (i+2) are mergeable -> penalize
             for (unsigned int step = 0; step < max_number_supersteps - 2; step++)
-                model.AddConstr(superstep_used_var[static_cast<int>(step + 2)] - superstep_has_comm[static_cast<int>(step)]
-                                - superstep_has_comm[static_cast<int>(step + 1)] <= mergeable_superstep_penalty[static_cast<int>(step)]);
+                model.AddConstr(superstep_used_var[static_cast<int>(step + 2)] - superstep_has_comm[static_cast<int>(step)] - superstep_has_comm[static_cast<int>(step + 1)] <= mergeable_superstep_penalty[static_cast<int>(step)]);
         }
 
         max_comm_superstep_var =
@@ -784,7 +765,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         // vertex type restrictions
         for (const vertex_idx_t<Graph_t> &node : instance.vertices()) {
             for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) {
-                if(!instance.isCompatible(node, processor)) {
+                if (!instance.isCompatible(node, processor)) {
                     for (unsigned int step = 0; step < max_number_supersteps; step++) {
                         model.AddConstr(node_to_processor_superstep_var[node][processor][static_cast<int>(step)] == 0);
                     }
@@ -797,20 +778,17 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
           */
         Expr expr;
 
-        if(is_max_bsp)
-        {
+        if (is_max_bsp) {
             VarArray max_superstep_var = model.AddVars(static_cast<int>(max_number_supersteps), COPT_INTEGER, "max_superstep");
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
                 model.AddConstr(max_superstep_var[static_cast<int>(step)] >= max_work_superstep_var[static_cast<int>(step)]);
-                if(step > 0)
-                    model.AddConstr(max_superstep_var[static_cast<int>(step)] >= instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step-1)]);
+                if (step > 0)
+                    model.AddConstr(max_superstep_var[static_cast<int>(step)] >= instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step - 1)]);
                 expr += max_superstep_var[static_cast<int>(step)];
                 expr += instance.synchronisationCosts() * superstep_has_comm[static_cast<int>(step)];
                 expr += instance.synchronisationCosts() * mergeable_superstep_penalty[static_cast<int>(step)];
             }
-        }
-        else
-        {
+        } else {
             for (unsigned int step = 0; step < max_number_supersteps; step++) {
                 expr += max_work_superstep_var[static_cast<int>(step)] +
                         instance.communicationCosts() * max_comm_superstep_var[static_cast<int>(step)] +
@@ -877,7 +855,7 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         // solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var;
     }
 
-        CoptFullScheduler(const BspScheduleRecomp<Graph_t> &schedule)
+    CoptFullScheduler(const BspScheduleRecomp<Graph_t> &schedule)
         : allow_recomputation(true), use_memory_constraint(false), use_initial_schedule_recomp(true),
           write_solutions_found(false), initial_schedule_recomp(&schedule),
           max_number_supersteps(schedule.numberOfSupersteps()) {
@@ -931,7 +909,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         return run_scheduler(schedule);
     }
 
-
     virtual RETURN_STATUS computeScheduleCS(BspScheduleCS<Graph_t> &schedule) override {
         allow_recomputation = false;
         is_max_bsp = false;
@@ -1010,7 +987,6 @@ class CoptFullScheduler : public Scheduler<Graph_t> {
         model.Solve();
     }
 
-
     /**
      * @brief Sets the provided schedule as the initial solution for the ILP.
      *
diff --git a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp
index 5d759687..c051c8dc 100644
--- a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp
+++ b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp
@@ -240,7 +240,7 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
                 SetSolution((*max_work_superstep_var_ptr)[static_cast<int>(step)], max_work);
             }
 
-            if (instance_ptr->isNumaInstance()) {
+            if (instance_ptr->getArchitecture().isNumaArchitecture()) {
 
                 for (unsigned p1 = 0; p1 < instance_ptr->numberOfProcessors(); p1++) {
                     for (unsigned p2 = 0; p2 < instance_ptr->numberOfProcessors(); p2++) {
@@ -670,7 +670,6 @@ class TotalCommunicationScheduler : public Scheduler<Graph_t> {
             loadInitialSchedule();
         }
 
-
         model.SetIntParam(COPT_INTPARAM_THREADS, 128);
         model.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1);
         model.SetIntParam(COPT_INTPARAM_LPMETHOD, 1);
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp
index af5bfd19..1c544fd1 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp
@@ -16,12 +16,12 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-//#define KL_DEBUG
+// #define KL_DEBUG
 
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/IBspSchedule.hpp"
-#include "osp/bsp/model/SetSchedule.hpp"
-#include "osp/bsp/model/VectorSchedule.hpp"
+#include "osp/bsp/model/util/SetSchedule.hpp"
+#include "osp/bsp/model/util/VectorSchedule.hpp"
 #include "osp/bsp/scheduler/ImprovementScheduler.hpp"
 #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
@@ -73,17 +73,15 @@ class kl_current_schedule {
     using EdgeType = edge_desc_t<Graph_t>;
 
   public:
-
     kl_current_schedule(Ikl_cost_function *cost_f_) : cost_f(cost_f_) {
 
-#ifdef KL_DEBUG        
+#ifdef KL_DEBUG
         if constexpr (use_memory_constraint) {
             std::cout << "KLCurrentSchedule constructor with memory constraint" << std::endl;
         } else {
             std::cout << "KLCurrentSchedule constructor without memory constraint" << std::endl;
         }
 #endif
-
     }
 
     virtual ~kl_current_schedule() = default;
@@ -358,7 +356,7 @@ class kl_current_schedule {
         if constexpr (use_memory_constraint) {
 
             memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step);
-        }  
+        }
     }
 
     virtual void initialize_current_schedule(const IBspSchedule<Graph_t> &schedule) {
diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp
index 6fe460f8..862eeacc 100644
--- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp
@@ -16,13 +16,12 @@ limitations under the License.
 @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
-
 #pragma once
 
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/IBspSchedule.hpp"
-#include "osp/bsp/model/SetSchedule.hpp"
-#include "osp/bsp/model/VectorSchedule.hpp"
+#include "osp/bsp/model/util/SetSchedule.hpp"
+#include "osp/bsp/model/util/VectorSchedule.hpp"
 #include "osp/bsp/scheduler/ImprovementScheduler.hpp"
 #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
@@ -54,7 +53,7 @@ struct kl_move_struct {
     bool operator>(kl_move_struct<cost_t, vertex_idx_t> const &rhs) const {
         return (gain > rhs.gain) or (gain >= rhs.gain and node < rhs.node);
     }
-    
+
     kl_move_struct<cost_t, vertex_idx_t> reverse_move() const {
         return kl_move_struct(node, -gain, to_proc, to_step, from_proc, from_step);
     }
@@ -73,13 +72,12 @@ struct pre_move_work_data {
 
     pre_move_work_data() {}
     pre_move_work_data(work_weight_t from_step_max_work_, work_weight_t from_step_second_max_work_, unsigned from_step_max_work_processor_count_,
-                 work_weight_t to_step_max_work_, work_weight_t to_step_second_max_work_,
-                 unsigned to_step_max_work_processor_count_)
+                       work_weight_t to_step_max_work_, work_weight_t to_step_second_max_work_,
+                       unsigned to_step_max_work_processor_count_)
         : from_step_max_work(from_step_max_work_), from_step_second_max_work(from_step_second_max_work_),
           from_step_max_work_processor_count(from_step_max_work_processor_count_),
           to_step_max_work(to_step_max_work_), to_step_second_max_work(to_step_second_max_work_),
-          to_step_max_work_processor_count(to_step_max_work_processor_count_) {}          
-
+          to_step_max_work_processor_count(to_step_max_work_processor_count_) {}
 };
 
 template<typename Graph_t>
@@ -87,16 +85,16 @@ struct kl_active_schedule_work_datastructures {
 
     using work_weight_t = v_workw_t<Graph_t>;
 
-    const BspInstance<Graph_t> *instance;   
+    const BspInstance<Graph_t> *instance;
     const SetSchedule<Graph_t> *set_schedule;
-   
+
     struct weight_proc {
         work_weight_t work;
         unsigned proc;
 
         weight_proc() : work(0), proc(0) {}
         weight_proc(work_weight_t _work, unsigned _proc) : work(_work), proc(_proc) {}
-    
+
         bool operator<(weight_proc const &rhs) const {
             return (work > rhs.work) or (work == rhs.work and proc < rhs.proc);
         }
@@ -106,17 +104,17 @@ struct kl_active_schedule_work_datastructures {
     std::vector<std::vector<unsigned>> step_processor_position;
     std::vector<unsigned> step_max_work_processor_count;
     work_weight_t max_work_weight;
-    work_weight_t total_work_weight;     
+    work_weight_t total_work_weight;
 
     inline work_weight_t step_max_work(unsigned step) const { return step_processor_work_[step][0].work; }
     inline work_weight_t step_second_max_work(unsigned step) const { return step_processor_work_[step][step_max_work_processor_count[step]].work; }
     inline work_weight_t step_proc_work(unsigned step, unsigned proc) const { return step_processor_work_[step][step_processor_position[step][proc]].work; }
-    inline work_weight_t & step_proc_work(unsigned step, unsigned proc) { return step_processor_work_[step][step_processor_position[step][proc]].work; }
+    inline work_weight_t &step_proc_work(unsigned step, unsigned proc) { return step_processor_work_[step][step_processor_position[step][proc]].work; }
 
     template<typename cost_t, typename vertex_idx_t>
-    inline pre_move_work_data<work_weight_t> get_pre_move_work_data(kl_move_struct<cost_t, vertex_idx_t> move) { 
+    inline pre_move_work_data<work_weight_t> get_pre_move_work_data(kl_move_struct<cost_t, vertex_idx_t> move) {
         return pre_move_work_data<work_weight_t>(step_max_work(move.from_step), step_second_max_work(move.from_step), step_max_work_processor_count[move.from_step],
-                                                        step_max_work(move.to_step), step_second_max_work(move.to_step), step_max_work_processor_count[move.to_step]); 
+                                                 step_max_work(move.to_step), step_second_max_work(move.to_step), step_max_work_processor_count[move.to_step]);
     }
 
     inline void initialize(const SetSchedule<Graph_t> &sched, const BspInstance<Graph_t> &inst, unsigned num_steps) {
@@ -140,20 +138,20 @@ struct kl_active_schedule_work_datastructures {
         unsigned pos = 0;
         const work_weight_t max_work_to = step_processor_work_[step][0].work;
 
-        for (const auto & wp : step_processor_work_[step]) {
+        for (const auto &wp : step_processor_work_[step]) {
             step_processor_position[step][wp.proc] = pos++;
 
             if (wp.work == max_work_to && pos < instance->numberOfProcessors())
-                step_max_work_processor_count[step] = pos; 
+                step_max_work_processor_count[step] = pos;
         }
     }
 
     template<typename cost_t, typename vertex_idx_t>
-    void apply_move(kl_move_struct<cost_t, vertex_idx_t> move, work_weight_t work_weight) {      
+    void apply_move(kl_move_struct<cost_t, vertex_idx_t> move, work_weight_t work_weight) {
 
-        if (work_weight == 0) 
+        if (work_weight == 0)
             return;
-        
+
         if (move.to_step != move.from_step) {
             step_proc_work(move.to_step, move.to_proc) += work_weight;
             step_proc_work(move.from_step, move.from_proc) -= work_weight;
@@ -171,7 +169,7 @@ struct kl_active_schedule_work_datastructures {
             // }
 
             // unsigned to_proc_pos = step_processor_position[move.to_step][move.to_proc];
-            
+
             // while (to_proc_pos > 0 && step_processor_work_[move.to_step][to_proc_pos - 1].work < new_weight_to) {
             //     std::swap(step_processor_work_[move.to_step][to_proc_pos], step_processor_work_[move.to_step][to_proc_pos - 1]);
             //     std::swap(step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos].proc], step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos - 1].proc]);
@@ -189,15 +187,15 @@ struct kl_active_schedule_work_datastructures {
             //     std::swap(step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos].proc], step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos + 1].proc]);
             //     from_proc_pos++;
             // }
-                
+
             // if (prev_max_work_from == prev_weight_from) {
-            //     step_max_work_processor_count[move.from_step]--;        
-            //     if (step_max_work_processor_count[move.from_step] == 0) {  
-            //         step_max_work_processor_count[move.from_step] = from_proc_pos; 
+            //     step_max_work_processor_count[move.from_step]--;
+            //     if (step_max_work_processor_count[move.from_step] == 0) {
+            //         step_max_work_processor_count[move.from_step] = from_proc_pos;
             //     }
-            // }    
+            // }
 
-        } else {            
+        } else {
             step_proc_work(move.to_step, move.to_proc) += work_weight;
             step_proc_work(move.from_step, move.from_proc) -= work_weight;
             arrange_superstep_data(move.to_step);
@@ -209,21 +207,21 @@ struct kl_active_schedule_work_datastructures {
         std::swap(step_processor_position[step1], step_processor_position[step2]);
         std::swap(step_max_work_processor_count[step1], step_max_work_processor_count[step2]);
     }
-    
+
     void override_next_superstep(unsigned step) {
 
         const unsigned next_step = step + 1;
         for (unsigned i = 0; i < instance->numberOfProcessors(); i++) {
-            step_processor_work_[next_step][i] = step_processor_work_[step][i]; 
-            step_processor_position[next_step][i] = step_processor_position[step][i];            
+            step_processor_work_[next_step][i] = step_processor_work_[step][i];
+            step_processor_position[next_step][i] = step_processor_position[step][i];
         }
         step_max_work_processor_count[next_step] = step_max_work_processor_count[step];
     }
 
     void reset_superstep(unsigned step) {
         for (unsigned i = 0; i < instance->numberOfProcessors(); i++) {
-            step_processor_work_[step][i] = {0,i}; 
-            step_processor_position[step][i] = i;            
+            step_processor_work_[step][i] = {0, i};
+            step_processor_position[step][i] = i;
         }
         step_max_work_processor_count[step] = instance->numberOfProcessors() - 1;
     }
@@ -249,12 +247,12 @@ struct kl_active_schedule_work_datastructures {
                     step_max_work_processor_count[step] = 1;
                 } else if (step_processor_work_[step][proc].work == max_work && step_max_work_processor_count[step] < (instance->numberOfProcessors() - 1)) {
                     step_max_work_processor_count[step]++;
-                } 
+                }
             }
 
             std::sort(step_processor_work_[step].begin(), step_processor_work_[step].end());
             unsigned pos = 0;
-            for (const auto & wp : step_processor_work_[step]) {
+            for (const auto &wp : step_processor_work_[step]) {
                 step_processor_position[step][wp.proc] = pos++;
             }
         }
@@ -287,15 +285,15 @@ struct thread_local_active_schedule_data {
         cost = cost_;
         best_cost = cost_;
         feasible = true;
-    }  
-   
+    }
+
     inline void update_cost(cost_t change_in_cost) {
-        cost += change_in_cost;        
+        cost += change_in_cost;
 
         if (cost <= best_cost && feasible) {
             best_cost = cost;
             best_schedule_idx = static_cast<unsigned>(applied_moves.size());
-        }    
+        }
     }
 };
 
@@ -319,23 +317,23 @@ class kl_active_schedule {
   public:
     virtual ~kl_active_schedule() = default;
 
-    inline const BspInstance<Graph_t> & getInstance() const { return *instance; }
-    inline const VectorSchedule<Graph_t> & getVectorSchedule() const { return vector_schedule; }
-    inline VectorSchedule<Graph_t> & getVectorSchedule() { return vector_schedule; }
-    inline const SetSchedule<Graph_t> & getSetSchedule() const { return set_schedule; }
+    inline const BspInstance<Graph_t> &getInstance() const { return *instance; }
+    inline const VectorSchedule<Graph_t> &getVectorSchedule() const { return vector_schedule; }
+    inline VectorSchedule<Graph_t> &getVectorSchedule() { return vector_schedule; }
+    inline const SetSchedule<Graph_t> &getSetSchedule() const { return set_schedule; }
     inline cost_t get_cost() { return cost; }
     inline bool is_feasible() { return feasible; }
     inline unsigned num_steps() const { return vector_schedule.numberOfSupersteps(); }
     inline unsigned assigned_processor(VertexType node) const { return vector_schedule.assignedProcessor(node); }
     inline unsigned assigned_superstep(VertexType node) const { return vector_schedule.assignedSuperstep(node); }
-    inline v_workw_t<Graph_t> get_step_max_work(unsigned step) const {return work_datastructures.step_max_work(step); }
-    inline v_workw_t<Graph_t> get_step_second_max_work(unsigned step) const {return work_datastructures.step_second_max_work(step); }
-    inline std::vector<unsigned> & get_step_max_work_processor_count() {return work_datastructures.step_max_work_processor_count; }    
-    inline v_workw_t<Graph_t> get_step_processor_work(unsigned step, unsigned proc) const {return work_datastructures.step_proc_work(step, proc); }
+    inline v_workw_t<Graph_t> get_step_max_work(unsigned step) const { return work_datastructures.step_max_work(step); }
+    inline v_workw_t<Graph_t> get_step_second_max_work(unsigned step) const { return work_datastructures.step_second_max_work(step); }
+    inline std::vector<unsigned> &get_step_max_work_processor_count() { return work_datastructures.step_max_work_processor_count; }
+    inline v_workw_t<Graph_t> get_step_processor_work(unsigned step, unsigned proc) const { return work_datastructures.step_proc_work(step, proc); }
     inline pre_move_work_data<v_workw_t<Graph_t>> get_pre_move_work_data(kl_move move) { return work_datastructures.get_pre_move_work_data(move); }
     inline v_workw_t<Graph_t> get_max_work_weight() { return work_datastructures.max_work_weight; }
     inline v_workw_t<Graph_t> get_total_work_weight() { return work_datastructures.total_work_weight; }
-    inline void set_cost(cost_t cost_) { cost = cost_; }  
+    inline void set_cost(cost_t cost_) { cost = cost_; }
 
     constexpr static bool use_memory_constraint = is_local_search_memory_constraint_v<MemoryConstraint_t>;
 
@@ -343,11 +341,11 @@ class kl_active_schedule {
 
     kl_active_schedule_work_datastructures<Graph_t> work_datastructures;
 
-    inline v_workw_t<Graph_t> get_step_total_work(unsigned step) const {        
-        v_workw_t<Graph_t> total_work = 0;        
+    inline v_workw_t<Graph_t> get_step_total_work(unsigned step) const {
+        v_workw_t<Graph_t> total_work = 0;
         for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
             total_work += get_step_processor_work(step, proc);
-        }       
+        }
         return total_work;
     }
 
@@ -357,18 +355,18 @@ class kl_active_schedule {
 
         set_schedule.step_processor_vertices[move.from_step][move.from_proc].erase(move.node);
         set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node);
-       
+
         update_violations(move.node, thread_data);
         thread_data.applied_moves.push_back(move);
 
         work_datastructures.apply_move(move, instance->getComputationalDag().vertex_work_weight(move.node));
         if constexpr (use_memory_constraint) {
             memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step);
-        }       
+        }
     }
 
     template<typename comm_datastructures_t>
-    void revert_to_best_schedule(unsigned start_move, unsigned insert_step, comm_datastructures_t & comm_datastructures, thread_data_t & thread_data, unsigned start_step, unsigned & end_step) {
+    void revert_to_best_schedule(unsigned start_move, unsigned insert_step, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned &end_step) {
         const unsigned bound = std::max(start_move, thread_data.best_schedule_idx);
         revert_moves(bound, comm_datastructures, thread_data, start_step, end_step);
 
@@ -391,7 +389,7 @@ class kl_active_schedule {
     }
 
     template<typename comm_datastructures_t>
-    void revert_schedule_to_bound(const size_t bound, const cost_t new_cost, const bool is_feasible, comm_datastructures_t & comm_datastructures, thread_data_t & thread_data, unsigned start_step, unsigned end_step) {
+    void revert_schedule_to_bound(const size_t bound, const cost_t new_cost, const bool is_feasible, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned end_step) {
         revert_moves(bound, comm_datastructures, thread_data, start_step, end_step);
 
         thread_data.current_violations.clear();
@@ -399,10 +397,9 @@ class kl_active_schedule {
         thread_data.cost = new_cost;
     }
 
-
-    void compute_violations(thread_data_t & thread_data);
+    void compute_violations(thread_data_t &thread_data);
     void compute_work_memory_datastructures(unsigned start_step, unsigned end_step);
-    void write_schedule (BspSchedule<Graph_t> &schedule);
+    void write_schedule(BspSchedule<Graph_t> &schedule);
     inline void initialize(const IBspSchedule<Graph_t> &schedule);
     inline void clear();
     void remove_empty_step(unsigned step);
@@ -412,15 +409,14 @@ class kl_active_schedule {
     void swap_steps(const unsigned step1, const unsigned step2);
 
   private:
-
     template<typename comm_datastructures_t>
-    void revert_moves(const size_t bound, comm_datastructures_t & comm_datastructures, thread_data_t & thread_data, unsigned start_step, unsigned end_step) {
+    void revert_moves(const size_t bound, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned end_step) {
         while (thread_data.applied_moves.size() > bound) {
             const auto move = thread_data.applied_moves.back().reverse_move();
             thread_data.applied_moves.pop_back();
 
             vector_schedule.setAssignedProcessor(move.node, move.to_proc);
-            vector_schedule.setAssignedSuperstep(move.node, move.to_step);  
+            vector_schedule.setAssignedSuperstep(move.node, move.to_step);
 
             set_schedule.step_processor_vertices[move.from_step][move.from_proc].erase(move.node);
             set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node);
@@ -443,16 +439,16 @@ class kl_active_schedule {
             const auto &child = target(edge, instance->getComputationalDag());
 
             if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) {
-                if ((node_step > vector_schedule.assignedSuperstep(child)) || 
+                if ((node_step > vector_schedule.assignedSuperstep(child)) ||
                     (node_step == vector_schedule.assignedSuperstep(child) && node_proc != vector_schedule.assignedProcessor(child))) {
-                        thread_data.current_violations.insert(edge);
-                        thread_data.new_violations[child] = edge;                    
+                    thread_data.current_violations.insert(edge);
+                    thread_data.new_violations[child] = edge;
                 }
             } else {
-                if ((node_step < vector_schedule.assignedSuperstep(child)) || 
+                if ((node_step < vector_schedule.assignedSuperstep(child)) ||
                     (node_step == vector_schedule.assignedSuperstep(child) && node_proc == vector_schedule.assignedProcessor(child))) {
-                        thread_data.current_violations.erase(edge);
-                        thread_data.resolved_violations.insert(edge);                    
+                    thread_data.current_violations.erase(edge);
+                    thread_data.resolved_violations.insert(edge);
                 }
             }
         }
@@ -460,17 +456,17 @@ class kl_active_schedule {
         for (const auto &edge : in_edges(node, instance->getComputationalDag())) {
             const auto &parent = source(edge, instance->getComputationalDag());
 
-            if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) {   
-                if ((node_step < vector_schedule.assignedSuperstep(parent)) || 
+            if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) {
+                if ((node_step < vector_schedule.assignedSuperstep(parent)) ||
                     (node_step == vector_schedule.assignedSuperstep(parent) && node_proc != vector_schedule.assignedProcessor(parent))) {
-                        thread_data.current_violations.insert(edge);
-                        thread_data.new_violations[parent] = edge;                    
+                    thread_data.current_violations.insert(edge);
+                    thread_data.new_violations[parent] = edge;
                 }
             } else {
-                if ((node_step > vector_schedule.assignedSuperstep(parent)) || 
+                if ((node_step > vector_schedule.assignedSuperstep(parent)) ||
                     (node_step == vector_schedule.assignedSuperstep(parent) && node_proc == vector_schedule.assignedProcessor(parent))) {
-                        thread_data.current_violations.erase(edge);
-                        thread_data.resolved_violations.insert(edge);
+                    thread_data.current_violations.erase(edge);
+                    thread_data.resolved_violations.insert(edge);
                 }
             }
         }
@@ -501,7 +497,6 @@ class kl_active_schedule {
             thread_data.feasible = true;
         }
     }
-
 };
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
@@ -515,7 +510,7 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::clear() {
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
-void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_violations(thread_data_t & thread_data) {
+void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_violations(thread_data_t &thread_data) {
 
     thread_data.current_violations.clear();
     thread_data.feasible = true;
@@ -529,12 +524,12 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_violations
         const unsigned target_proc = assigned_processor(target_v);
         const unsigned source_step = assigned_superstep(source_v);
         const unsigned target_step = assigned_superstep(target_v);
-    
+
         if (source_step > target_step || (source_step == target_step && source_proc != target_proc)) {
             thread_data.current_violations.insert(edge);
             thread_data.feasible = false;
-        } 
-    }    
+        }
+    }
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
@@ -563,7 +558,7 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::compute_work_memor
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
-void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::write_schedule (BspSchedule<Graph_t> &schedule) {
+void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::write_schedule(BspSchedule<Graph_t> &schedule) {
     for (const auto v : instance->vertices()) {
         schedule.setAssignedProcessor(v, vector_schedule.assignedProcessor(v));
         schedule.setAssignedSuperstep(v, vector_schedule.assignedSuperstep(v));
@@ -572,91 +567,92 @@ void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::write_schedule (Bs
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
-void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::remove_empty_step(unsigned step) {    
+void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::remove_empty_step(unsigned step) {
     for (unsigned i = step; i < num_steps() - 1; i++) {
-        for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-            for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]){
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
+            for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]) {
                 vector_schedule.setAssignedSuperstep(node, i);
             }
         }
         std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i + 1]);
-        work_datastructures.swap_steps(i, i+1);
+        work_datastructures.swap_steps(i, i + 1);
         if constexpr (use_memory_constraint) {
-           memory_constraint.swap_steps(i, i+1);
+            memory_constraint.swap_steps(i, i + 1);
         }
     }
     vector_schedule.number_of_supersteps--;
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
-void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_fwd(const unsigned step, const unsigned to_step) {    
+void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_fwd(const unsigned step, const unsigned to_step) {
     for (unsigned i = step; i < to_step; i++) {
-        for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-            for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]){
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
+            for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]) {
                 vector_schedule.setAssignedSuperstep(node, i);
             }
         }
         std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i + 1]);
         work_datastructures.swap_steps(i, i + 1);
         if constexpr (use_memory_constraint) {
-           memory_constraint.swap_steps(i, i+1);
+            memory_constraint.swap_steps(i, i + 1);
         }
     }
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::insert_empty_step(unsigned step) {
-    unsigned i = vector_schedule.number_of_supersteps++;  
- 
+    unsigned i = vector_schedule.number_of_supersteps++;
+
     for (; i > step; i--) {
-        for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-            for (const auto node : set_schedule.step_processor_vertices[i-1][proc]){
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
+            for (const auto node : set_schedule.step_processor_vertices[i - 1][proc]) {
                 vector_schedule.setAssignedSuperstep(node, i);
             }
         }
         std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i - 1]);
-        work_datastructures.swap_steps(i-1, i);
+        work_datastructures.swap_steps(i - 1, i);
         if constexpr (use_memory_constraint) {
-           memory_constraint.swap_steps(i - 1, i);
+            memory_constraint.swap_steps(i - 1, i);
         }
-    } 
+    }
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_empty_step_bwd(const unsigned to_step, const unsigned empty_step) {
-    unsigned i = to_step;  
- 
+    unsigned i = to_step;
+
     for (; i > empty_step; i--) {
-        for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-            for (const auto node : set_schedule.step_processor_vertices[i-1][proc]){
+        for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
+            for (const auto node : set_schedule.step_processor_vertices[i - 1][proc]) {
                 vector_schedule.setAssignedSuperstep(node, i);
             }
         }
         std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i - 1]);
-        work_datastructures.swap_steps(i-1, i);
+        work_datastructures.swap_steps(i - 1, i);
         if constexpr (use_memory_constraint) {
-           memory_constraint.swap_steps(i - 1, i);
+            memory_constraint.swap_steps(i - 1, i);
         }
-    }     
+    }
 }
 
 template<typename Graph_t, typename cost_t, typename MemoryConstraint_t>
 void kl_active_schedule<Graph_t, cost_t, MemoryConstraint_t>::swap_steps(const unsigned step1, const unsigned step2) {
-    if (step1 == step2) return;
+    if (step1 == step2)
+        return;
 
-    for(unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
-        for (const auto node : set_schedule.step_processor_vertices[step1][proc]){
+    for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) {
+        for (const auto node : set_schedule.step_processor_vertices[step1][proc]) {
             vector_schedule.setAssignedSuperstep(node, step2);
         }
-        for (const auto node : set_schedule.step_processor_vertices[step2][proc]){
+        for (const auto node : set_schedule.step_processor_vertices[step2][proc]) {
             vector_schedule.setAssignedSuperstep(node, step1);
         }
     }
     std::swap(set_schedule.step_processor_vertices[step1], set_schedule.step_processor_vertices[step2]);
-    work_datastructures.swap_steps(step1, step2);  
+    work_datastructures.swap_steps(step1, step2);
     if constexpr (use_memory_constraint) {
         memory_constraint.swap_steps(step1, step2);
-    }   
+    }
 }
 
 } // namespace osp
diff --git a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp
index 6961ef92..2cee3d0f 100644
--- a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp
+++ b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp
@@ -19,8 +19,8 @@ limitations under the License.
 #pragma once
 
 #include "osp/bsp/model/BspSchedule.hpp"
-#include "osp/bsp/model/SetSchedule.hpp"
-#include "osp/bsp/model/VectorSchedule.hpp"
+#include "osp/bsp/model/util/SetSchedule.hpp"
+#include "osp/bsp/model/util/VectorSchedule.hpp"
 #include "osp/graph_algorithms/directed_graph_util.hpp"
 
 namespace osp {
@@ -42,7 +42,7 @@ struct is_local_search_memory_constraint<
                                                          std::declval<unsigned>(), std::declval<unsigned>(),
                                                          std::declval<unsigned>(), std::declval<unsigned>())),
                    decltype(std::declval<T>().compute_memory_datastructure(std::declval<unsigned>(),
-                                                                             std::declval<unsigned>())),
+                                                                           std::declval<unsigned>())),
                    decltype(std::declval<T>().swap_steps(std::declval<unsigned>(), std::declval<unsigned>())),
                    decltype(std::declval<T>().reset_superstep(std::declval<unsigned>())),
                    decltype(std::declval<T>().override_superstep(std::declval<unsigned>(), std::declval<unsigned>(),
@@ -105,7 +105,7 @@ struct ls_local_memory_constraint {
 
     void swap_steps(const unsigned step1, const unsigned step2) {
         std::swap(step_processor_memory[step1], step_processor_memory[step2]);
-    } 
+    }
 
     void compute_memory_datastructure(unsigned start_step, unsigned end_step) {
 
@@ -150,7 +150,7 @@ struct ls_local_memory_constraint {
             }
         }
         return true;
-    }  
+    }
 };
 
 template<typename Graph_t>
@@ -378,7 +378,7 @@ struct ls_local_sources_inc_edges_memory_constraint {
     inline void swap_steps(const unsigned step1, const unsigned step2) {
         std::swap(step_processor_memory[step1], step_processor_memory[step2]);
         std::swap(step_processor_pred[step1], step_processor_pred[step2]);
-    }    
+    }
 
     inline void initialize(const SetSchedule<Graph_t> &set_schedule_, const VectorSchedule<Graph_t> &vec_schedule_) {
 
@@ -587,7 +587,6 @@ struct ls_local_sources_inc_edges_memory_constraint {
         }
 
         return true;
-    
     }
 };
 
diff --git a/include/osp/bsp/scheduler/Scheduler.hpp b/include/osp/bsp/scheduler/Scheduler.hpp
index a57e2e84..fa458ba9 100644
--- a/include/osp/bsp/scheduler/Scheduler.hpp
+++ b/include/osp/bsp/scheduler/Scheduler.hpp
@@ -18,6 +18,7 @@ limitations under the License.
 
 #pragma once
 
+#include "osp/auxiliary/return_status.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/model/BspScheduleCS.hpp"
diff --git a/include/osp/coarser/BspScheduleCoarser.hpp b/include/osp/coarser/BspScheduleCoarser.hpp
index 64684b7a..ea4cf9f9 100644
--- a/include/osp/coarser/BspScheduleCoarser.hpp
+++ b/include/osp/coarser/BspScheduleCoarser.hpp
@@ -18,10 +18,10 @@ limitations under the License.
 
 #pragma once
 
-#include "osp/coarser/Coarser.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
-#include "osp/bsp/model/SetSchedule.hpp"
+#include "osp/bsp/model/util/SetSchedule.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
+#include "osp/coarser/Coarser.hpp"
 #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp"
 
 namespace osp {
@@ -63,7 +63,6 @@ class BspScheduleCoarser : public CoarserGenContractionMap<Graph_t_in, Graph_t_o
         assert(&dag_in == &schedule->getInstance().getComputationalDag());
         assert(schedule->satisfiesPrecedenceConstraints());
 
-
         SetSchedule<Graph_t_in> set_schedule(*schedule);
         std::vector<VertexType_out> reverse_vertex_map(dag_in.num_vertices(), 0);
         std::vector<std::vector<VertexType_in>> vertex_map;
diff --git a/include/osp/coarser/MultilevelCoarser.hpp b/include/osp/coarser/MultilevelCoarser.hpp
index bbd090e4..f8a1434e 100644
--- a/include/osp/coarser/MultilevelCoarser.hpp
+++ b/include/osp/coarser/MultilevelCoarser.hpp
@@ -23,11 +23,11 @@ limitations under the License.
 #include <set>
 #include <vector>
 
-#include "osp/coarser/Coarser.hpp"
+#include "osp/auxiliary/return_status.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
+#include "osp/coarser/Coarser.hpp"
 #include "osp/coarser/coarser_util.hpp"
 
-
 namespace osp {
 
 template<typename Graph_t, typename Graph_t_coarse>
@@ -36,10 +36,12 @@ class MultilevelCoarseAndSchedule;
 template<typename Graph_t, typename Graph_t_coarse>
 class MultilevelCoarser : public Coarser<Graph_t, Graph_t_coarse> {
     friend class MultilevelCoarseAndSchedule<Graph_t, Graph_t_coarse>;
+
   private:
     const Graph_t *original_graph;
+
   protected:
-    inline const Graph_t * getOriginalGraph() const { return original_graph; };
+    inline const Graph_t *getOriginalGraph() const { return original_graph; };
 
     std::vector<std::unique_ptr<Graph_t_coarse>> dag_history;
     std::vector<std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>>> contraction_maps;
@@ -49,7 +51,7 @@ class MultilevelCoarser : public Coarser<Graph_t, Graph_t_coarse> {
     RETURN_STATUS add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map, const Graph_t_coarse &contracted_graph);
     RETURN_STATUS add_contraction(std::vector<vertex_idx_t<Graph_t_coarse>> &&contraction_map, Graph_t_coarse &&contracted_graph);
     void add_identity_contraction();
-    
+
     std::vector<vertex_idx_t<Graph_t_coarse>> getCombinedContractionMap() const;
 
     virtual RETURN_STATUS run_contractions() = 0;
@@ -62,19 +64,15 @@ class MultilevelCoarser : public Coarser<Graph_t, Graph_t_coarse> {
     MultilevelCoarser(const Graph_t &graph) : original_graph(&graph) {};
     virtual ~MultilevelCoarser() = default;
 
-
     bool coarsenDag(const Graph_t &dag_in, Graph_t_coarse &coarsened_dag,
-                            std::vector<vertex_idx_t<Graph_t_coarse>> &vertex_contraction_map) override;
+                    std::vector<vertex_idx_t<Graph_t_coarse>> &vertex_contraction_map) override;
 
-    
     RETURN_STATUS run(const Graph_t &graph);
     RETURN_STATUS run(const BspInstance<Graph_t> &inst);
 
     virtual std::string getCoarserName() const override = 0;
 };
 
-
-
 template<typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const Graph_t &graph) {
     clear_computation_data();
@@ -91,7 +89,7 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const Graph_t &gra
 }
 
 template<typename Graph_t, typename Graph_t_coarse>
-RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const BspInstance< Graph_t > &inst) {
+RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::run(const BspInstance<Graph_t> &inst) {
     return run(inst.getComputationalDag());
 }
 
@@ -99,15 +97,15 @@ template<typename Graph_t, typename Graph_t_coarse>
 void MultilevelCoarser<Graph_t, Graph_t_coarse>::clear_computation_data() {
     dag_history.clear();
     dag_history.shrink_to_fit();
-    
+
     contraction_maps.clear();
     contraction_maps.shrink_to_fit();
 }
 
-
 template<typename Graph_t, typename Graph_t_coarse>
 void MultilevelCoarser<Graph_t, Graph_t_coarse>::compactify_dag_history() {
-    if (dag_history.size() < 3) return;
+    if (dag_history.size() < 3)
+        return;
 
     size_t dag_indx_first = dag_history.size() - 2;
     size_t map_indx_first = contraction_maps.size() - 2;
@@ -115,13 +113,13 @@ void MultilevelCoarser<Graph_t, Graph_t_coarse>::compactify_dag_history() {
     size_t dag_indx_second = dag_history.size() - 1;
     size_t map_indx_second = contraction_maps.size() - 1;
 
-    if ( (static_cast<double>( dag_history[dag_indx_first-1]->num_vertices() ) / static_cast<double>( dag_history[dag_indx_second-1]->num_vertices() )) > 1.25 ) return;
-    
+    if ((static_cast<double>(dag_history[dag_indx_first - 1]->num_vertices()) / static_cast<double>(dag_history[dag_indx_second - 1]->num_vertices())) > 1.25)
+        return;
 
     // Compute combined contraction_map
-    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> combi_contraction_map = std::make_unique<std::vector<vertex_idx_t<Graph_t_coarse>>>( contraction_maps[map_indx_first]->size() );
+    std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> combi_contraction_map = std::make_unique<std::vector<vertex_idx_t<Graph_t_coarse>>>(contraction_maps[map_indx_first]->size());
     for (std::size_t vert = 0; vert < contraction_maps[map_indx_first]->size(); ++vert) {
-        combi_contraction_map->at(vert) = contraction_maps[map_indx_second]->at( contraction_maps[map_indx_first]->at( vert ) );
+        combi_contraction_map->at(vert) = contraction_maps[map_indx_second]->at(contraction_maps[map_indx_first]->at(vert));
     }
 
     // Delete ComputationalDag
@@ -138,7 +136,6 @@ void MultilevelCoarser<Graph_t, Graph_t_coarse>::compactify_dag_history() {
     contraction_maps[map_indx_first] = std::move(combi_contraction_map);
 }
 
-
 template<typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map) {
     std::unique_ptr<Graph_t_coarse> new_graph = std::make_unique<Graph_t_coarse>();
@@ -148,12 +145,12 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const
     bool success = false;
 
     if (dag_history.size() == 0) {
-        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()) );
+        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()));
     } else {
-        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(*(dag_history.back()), *new_graph, *(contraction_maps.back()) );
+        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(*(dag_history.back()), *new_graph, *(contraction_maps.back()));
     }
 
-    dag_history.emplace_back( std::move(new_graph) );
+    dag_history.emplace_back(std::move(new_graph));
 
     if (success) {
         compactify_dag_history();
@@ -166,19 +163,19 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const
 template<typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::vector<vertex_idx_t<Graph_t_coarse>> &&contraction_map) {
     std::unique_ptr<Graph_t_coarse> new_graph = std::make_unique<Graph_t_coarse>();
-    
+
     std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> contr_map_ptr(new std::vector<vertex_idx_t<Graph_t_coarse>>(std::move(contraction_map)));
     contraction_maps.emplace_back(std::move(contr_map_ptr));
 
     bool success = false;
 
     if (dag_history.size() == 0) {
-        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()) );
+        success = coarser_util::construct_coarse_dag<Graph_t, Graph_t_coarse>(*(getOriginalGraph()), *new_graph, *(contraction_maps.back()));
     } else {
-        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(*(dag_history.back()), *new_graph, *(contraction_maps.back()) );
+        success = coarser_util::construct_coarse_dag<Graph_t_coarse, Graph_t_coarse>(*(dag_history.back()), *new_graph, *(contraction_maps.back()));
     }
 
-    dag_history.emplace_back( std::move(new_graph) );
+    dag_history.emplace_back(std::move(new_graph));
 
     if (success) {
         compactify_dag_history();
@@ -188,12 +185,11 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::v
     }
 }
 
-
 template<typename Graph_t, typename Graph_t_coarse>
 RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(const std::vector<vertex_idx_t<Graph_t_coarse>> &contraction_map, const Graph_t_coarse &contracted_graph) {
     std::unique_ptr<Graph_t_coarse> graph_ptr(new Graph_t_coarse(contracted_graph));
     dag_history.emplace_back(std::move(graph_ptr));
-    
+
     std::unique_ptr<std::vector<vertex_idx_t<Graph_t_coarse>>> contr_map_ptr(new std::vector<vertex_idx_t<Graph_t_coarse>>(contraction_map));
     contraction_maps.emplace_back(std::move(contr_map_ptr));
 
@@ -213,7 +209,6 @@ RETURN_STATUS MultilevelCoarser<Graph_t, Graph_t_coarse>::add_contraction(std::v
     return RETURN_STATUS::OSP_SUCCESS;
 }
 
-
 template<typename Graph_t, typename Graph_t_coarse>
 std::vector<vertex_idx_t<Graph_t_coarse>> MultilevelCoarser<Graph_t, Graph_t_coarse>::getCombinedContractionMap() const {
     std::vector<vertex_idx_t<Graph_t_coarse>> combinedContractionMap(original_graph->num_vertices());
@@ -221,23 +216,22 @@ std::vector<vertex_idx_t<Graph_t_coarse>> MultilevelCoarser<Graph_t, Graph_t_coa
 
     for (std::size_t j = 0; j < contraction_maps.size(); ++j) {
         for (std::size_t i = 0; i < combinedContractionMap.size(); ++i) {
-            combinedContractionMap[i] = contraction_maps[j]->at( combinedContractionMap[i] );
+            combinedContractionMap[i] = contraction_maps[j]->at(combinedContractionMap[i]);
         }
     }
 
     return combinedContractionMap;
 }
 
-
-
 template<typename Graph_t, typename Graph_t_coarse>
 bool MultilevelCoarser<Graph_t, Graph_t_coarse>::coarsenDag(const Graph_t &dag_in, Graph_t_coarse &coarsened_dag,
-                                                                    std::vector<vertex_idx_t<Graph_t_coarse>> &vertex_contraction_map) {
+                                                            std::vector<vertex_idx_t<Graph_t_coarse>> &vertex_contraction_map) {
     clear_computation_data();
 
     RETURN_STATUS status = run(dag_in);
 
-    if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) return false;
+    if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND)
+        return false;
 
     assert(dag_history.size() != 0);
     coarsened_dag = *(dag_history.back());
@@ -251,20 +245,16 @@ template<typename Graph_t, typename Graph_t_coarse>
 void MultilevelCoarser<Graph_t, Graph_t_coarse>::add_identity_contraction() {
     std::size_t n_vert;
     if (dag_history.size() == 0) {
-        n_vert = static_cast<std::size_t>( original_graph->num_vertices() );
+        n_vert = static_cast<std::size_t>(original_graph->num_vertices());
     } else {
-        n_vert = static_cast<std::size_t>( dag_history.back()->num_vertices() );
+        n_vert = static_cast<std::size_t>(dag_history.back()->num_vertices());
     }
-    
-    std::vector<vertex_idx_t<Graph_t_coarse>> contraction_map( n_vert );
+
+    std::vector<vertex_idx_t<Graph_t_coarse>> contraction_map(n_vert);
     std::iota(contraction_map.begin(), contraction_map.end(), 0);
 
     add_contraction(std::move(contraction_map));
     compactify_dag_history();
 }
 
-
-
-
-
 } // end namespace osp
\ No newline at end of file
diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
index d1d61016..8d6355ad 100644
--- a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
@@ -435,15 +435,6 @@ class IsomorphicSubgraphScheduler {
                 }
                 std::cout << std::endl;
                 std::cout << "    Sync cost: " << sub_arch.synchronisationCosts() << ", Comm cost: " << sub_arch.communicationCosts() << std::endl;
-                std::cout << "    Sub-problem compatibility matrix:" << std::endl;
-                const auto &sub_comp_matrix = representative_instance.getNodeNodeCompatabilityMatrix();
-                for (unsigned i = 0; i < sub_comp_matrix.size(); ++i) {
-                    std::cout << "      Node Type " << i << ": [ ";
-                    for (unsigned j = 0; j < sub_comp_matrix[i].size(); ++j) {
-                        std::cout << (sub_comp_matrix[i][j] ? "1" : "0") << " ";
-                    }
-                    std::cout << "]" << std::endl;
-                }
             }
 
             scheduler_for_group_ptr->computeSchedule(bsp_schedule);
diff --git a/include/osp/partitioning/partitioners/partitioning_ILP.hpp b/include/osp/partitioning/partitioners/partitioning_ILP.hpp
index 0482d936..2e6c4e0e 100644
--- a/include/osp/partitioning/partitioners/partitioning_ILP.hpp
+++ b/include/osp/partitioning/partitioners/partitioning_ILP.hpp
@@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 
-@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner   
+@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner
 */
 
 #pragma once
@@ -21,40 +21,39 @@ limitations under the License.
 #include <callbackbase.h>
 #include <coptcpp_pch.h>
 
-#include "osp/partitioning/partitioners/partitioning_ILP_base.hpp"
+#include "osp/auxiliary/return_status.hpp"
 #include "osp/partitioning/model/partitioning.hpp"
+#include "osp/partitioning/partitioners/partitioning_ILP_base.hpp"
 
-namespace osp{
+namespace osp {
 
 template<typename hypergraph_t>
 class HypergraphPartitioningILP : public HypergraphPartitioningILPBase<hypergraph_t> {
 
   protected:
-    std::vector<unsigned> readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance, Model& model);
+    std::vector<unsigned> readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance, Model &model);
 
-    void setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model& model);
+    void setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model &model);
 
-    void setInitialSolution(const Partitioning<hypergraph_t> &partition, Model& model);
+    void setInitialSolution(const Partitioning<hypergraph_t> &partition, Model &model);
 
   public:
-
     virtual ~HypergraphPartitioningILP() override = default;
 
-    RETURN_STATUS computePartitioning(Partitioning<hypergraph_t>& result);
+    RETURN_STATUS computePartitioning(Partitioning<hypergraph_t> &result);
 
     virtual std::string getAlgorithmName() const override { return "HypergraphPartitioningILP"; }
 };
 
 template<typename hypergraph_t>
-RETURN_STATUS HypergraphPartitioningILP<hypergraph_t>::computePartitioning(Partitioning<hypergraph_t>& result)
-{
+RETURN_STATUS HypergraphPartitioningILP<hypergraph_t>::computePartitioning(Partitioning<hypergraph_t> &result) {
     Envr env;
     Model model = env.CreateModel("HypergraphPart");
 
     this->setupFundamentalVariablesConstraintsObjective(result.getInstance(), model);
     setupExtraVariablesConstraints(result.getInstance(), model);
 
-    if(this->use_initial_solution)
+    if (this->use_initial_solution)
         setInitialSolution(result, model);
 
     this->solveILP(model);
@@ -82,7 +81,7 @@ RETURN_STATUS HypergraphPartitioningILP<hypergraph_t>::computePartitioning(Parti
 }
 
 template<typename hypergraph_t>
-void HypergraphPartitioningILP<hypergraph_t>::setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model& model) {
+void HypergraphPartitioningILP<hypergraph_t>::setupExtraVariablesConstraints(const PartitioningProblem<hypergraph_t> &instance, Model &model) {
 
     using index_type = typename hypergraph_t::vertex_idx;
 
@@ -104,19 +103,17 @@ void HypergraphPartitioningILP<hypergraph_t>::setupExtraVariablesConstraints(con
     // hyperedge indicators match node variables
     for (unsigned part = 0; part < numberOfParts; part++)
         for (index_type node = 0; node < numberOfVertices; node++)
-            for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node))
+            for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node))
                 model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast<int>(part)] >= this->node_in_partition[node][static_cast<int>(part)]);
-             
 }
 
 // convert generic one-to-many assingment (of base class function) to one-to-one
 template<typename hypergraph_t>
-std::vector<unsigned> HypergraphPartitioningILP<hypergraph_t>::readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance, Model& model)
-{
+std::vector<unsigned> HypergraphPartitioningILP<hypergraph_t>::readCoptAssignment(const PartitioningProblem<hypergraph_t> &instance, Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
 
     std::vector<unsigned> node_to_partition(instance.getHypergraph().num_vertices(), std::numeric_limits<unsigned>::max());
-    std::vector<std::vector<unsigned> > assignmentsGenericForm = this->readAllCoptAssignments(instance, model);
+    std::vector<std::vector<unsigned>> assignmentsGenericForm = this->readAllCoptAssignments(instance, model);
 
     for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++)
         node_to_partition[node] = assignmentsGenericForm[node].front();
@@ -125,21 +122,19 @@ std::vector<unsigned> HypergraphPartitioningILP<hypergraph_t>::readCoptAssignmen
 }
 
 template<typename hypergraph_t>
-void HypergraphPartitioningILP<hypergraph_t>::setInitialSolution(const Partitioning<hypergraph_t> &partition,  Model& model)
-{
+void HypergraphPartitioningILP<hypergraph_t>::setInitialSolution(const Partitioning<hypergraph_t> &partition, Model &model) {
     using index_type = typename hypergraph_t::vertex_idx;
 
-    const std::vector<unsigned>& assignment = partition.assignedPartitions();
-    const unsigned& numPartitions = partition.getInstance().getNumberOfPartitions();
-    if(assignment.size() != partition.getInstance().getHypergraph().num_vertices())
+    const std::vector<unsigned> &assignment = partition.assignedPartitions();
+    const unsigned &numPartitions = partition.getInstance().getNumberOfPartitions();
+    if (assignment.size() != partition.getInstance().getHypergraph().num_vertices())
         return;
 
-    for(index_type node = 0; node < assignment.size(); ++node)
-    {
-        if(assignment[node] >= numPartitions)
+    for (index_type node = 0; node < assignment.size(); ++node) {
+        if (assignment[node] >= numPartitions)
             continue;
-        
-        for(unsigned part = 0; part < numPartitions; ++part)
+
+        for (unsigned part = 0; part < numPartitions; ++part)
             model.SetMipStart(this->node_in_partition[node][static_cast<int>(part)], static_cast<int>(assignment[node] == part));
     }
     model.LoadMipStart();

From 0dab7f21e52ff99fd829cc9eca3566886070e2e7 Mon Sep 17 00:00:00 2001
From: tonibohnlein <toni.boehnlein18@gmail.com>
Date: Tue, 9 Dec 2025 09:10:34 +0100
Subject: [PATCH 8/9] documentation

docu

updates
---
 .../AbstractTestSuiteRunner.hpp               |   2 +-
 .../StringToScheduler/run_bsp_scheduler.hpp   |   6 +-
 include/osp/bsp/model/BspArchitecture.hpp     |  38 ++---
 include/osp/bsp/model/BspInstance.hpp         | 158 ++++++++++--------
 .../osp/bsp/scheduler/CoarseAndSchedule.hpp   |   8 +-
 .../IsomorphicSubgraphScheduler.hpp           |   4 +-
 .../TrimmedGroupScheduler.hpp                 |   2 +-
 tests/bsp_instance.cpp                        |   2 +-
 tests/coarser.cpp                             |  97 +++++------
 tests/trimmed_group_scheduler.cpp             |  16 +-
 10 files changed, 163 insertions(+), 170 deletions(-)

diff --git a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
index 80282f58..f023f937 100644
--- a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
+++ b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp
@@ -251,7 +251,7 @@ class AbstractTestSuiteRunner {
                 log_stream << "Start Graph: " + filename_graph + "\n";
 
                 BspInstance<GraphType> bsp_instance;
-                bsp_instance.setArchitecture(arch);
+                bsp_instance.getArchitecture() = arch;
                 bool graph_status = false;
                 std::string ext;
                 if (filename_graph.rfind('.') != std::string::npos)
diff --git a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp
index 97e7e473..08209efd 100644
--- a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp
+++ b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp
@@ -57,8 +57,8 @@ limitations under the License.
 namespace osp {
 
 const std::set<std::string> get_available_bsp_scheduler_names() {
-    return {"Serial",         "GreedyBsp", "GrowLocal", "BspLocking",  "Cilk",    "Etf",     "GreedyRandom",
-            "GreedyChildren", "Variance",  "MultiHC",   "LocalSearch", "Coarser", "FullILP", "MultiLevel"};
+    return {"Serial", "GreedyBsp", "GrowLocal", "BspLocking", "Cilk", "Etf", "GreedyRandom",
+            "GreedyChildren", "Variance", "MultiHC", "LocalSearch", "Coarser", "FullILP", "MultiLevel"};
 }
 
 template<typename Graph_t>
@@ -247,7 +247,7 @@ RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::propert
         if (!status)
             return RETURN_STATUS::ERROR;
 
-        instance_coarse.setArchitecture(instance.getArchitecture());
+        instance_coarse.getArchitecture() = instance.getArchitecture();
         instance_coarse.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix());
         BspSchedule<boost_graph_t> schedule_coarse(instance_coarse);
 
diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp
index 32f37d0f..5ef01b0e 100644
--- a/include/osp/bsp/model/BspArchitecture.hpp
+++ b/include/osp/bsp/model/BspArchitecture.hpp
@@ -90,11 +90,12 @@ inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) {
  * bounds. It provides methods to set and retrieve these values.
  *
  * **Processors:**
- * The architecture consists of p processors, indexed from 0 to p-1.
+ * The architecture consists of p processors, indexed from 0 to p-1. Note that processor indices are represented using `unsigned`.
  *
  * **Processor Types:**
  * Processors can have different types, which are represented by non-negative integers.
- * Processor types are assumed to be consecutive integers starting from 0.
+ * Processor types are assumed to be consecutive integers starting from 0. Note that processor types are represented using `unsigned`.
+ * Processor types are used to express compatabilities, which can be specified in the BspInstance, regarding node types.
  *
  * **Communication and Synchronization Costs:**
  * - Communication Cost (g): The cost of communicating a unit of data between processors, i.e., the bandwidth.
@@ -185,35 +186,16 @@ class BspArchitecture {
     }
 
   public:
-    /**
-     * @brief Default constructor.
-     * Initializes a BSP architecture with 2 processors, 1 processor type,
-     * communication costs of 1, synchronisation costs of 2, memory bounds of 100,
-     * and send costs of 1 between all processors.
-     */
-    BspArchitecture()
-        : numberOfProcessors_(2U), numberOfProcessorTypes_(1U), communicationCosts_(1U), synchronisationCosts_(2U),
-          memoryBound_(numberOfProcessors_, 100U), isNuma_(false),
-          processorTypes_(numberOfProcessors_, 0U), sendCosts_(numberOfProcessors_ * numberOfProcessors_, 1U) {
-        SetSendCostDiagonalToZero();
-    }
-
-    BspArchitecture(const BspArchitecture &other) = default;
-    BspArchitecture(BspArchitecture &&other) noexcept = default;
-    BspArchitecture &operator=(const BspArchitecture &other) = default;
-    BspArchitecture &operator=(BspArchitecture &&other) noexcept = default;
-    virtual ~BspArchitecture() = default;
-
     /**
      * @brief Constructs a BspArchitecture object with the specified number of processors, communication cost, and
      * synchronization cost.
      *
-     * @param NumberOfProcessors The number of processors in the architecture. Must be greater than 0.
-     * @param CommunicationCost The communication cost between processors.
-     * @param SynchronisationCost The synchronization cost between processors.
+     * @param NumberOfProcessors The number of processors in the architecture. Must be greater than 0. Default: 2.
+     * @param CommunicationCost The communication cost between processors. Default: 1.
+     * @param SynchronisationCost The synchronization cost between processors. Default: 2.
      * @param MemoryBound The memory bound for each processor (default: 100).
      */
-    BspArchitecture(const unsigned NumberOfProcessors, const v_commw_t<Graph_t> CommunicationCost, const v_commw_t<Graph_t> SynchronisationCost,
+    BspArchitecture(const unsigned NumberOfProcessors = 2U, const v_commw_t<Graph_t> CommunicationCost = 1U, const v_commw_t<Graph_t> SynchronisationCost = 2U,
                     const v_memw_t<Graph_t> MemoryBound = 100U)
         : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost),
           synchronisationCosts_(SynchronisationCost),
@@ -225,6 +207,12 @@ class BspArchitecture {
         SetSendCostDiagonalToZero();
     }
 
+    BspArchitecture(const BspArchitecture &other) = default;
+    BspArchitecture(BspArchitecture &&other) noexcept = default;
+    BspArchitecture &operator=(const BspArchitecture &other) = default;
+    BspArchitecture &operator=(BspArchitecture &&other) noexcept = default;
+    virtual ~BspArchitecture() = default;
+
     /**
      * @brief Copy constructor from a BspArchitecture with a different graph type.
      *
diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp
index 914c6fdc..c5a973a7 100644
--- a/include/osp/bsp/model/BspInstance.hpp
+++ b/include/osp/bsp/model/BspInstance.hpp
@@ -29,15 +29,33 @@ namespace osp {
 
 /**
  * @class BspInstance
- * @brief Represents an instance of the BSP (Bulk Synchronous Parallel) model.
+ * @brief Represents a scheduling problem instance for the Bulk Synchronous Parallel (BSP) model.
  *
- * The BspInstance class encapsulates the computational DAG (Directed Acyclic Graph) and the BSP architecture
- * for a specific instance of the BSP model. It provides methods to access and modify the architecture and DAG,
- * as well as retrieve information about the instance such as the number of vertices and processors.
+ * The BspInstance class serves as a container for all the necessary information to define a
+ * BSP scheduling problem. It acts as the "ground" object that holds the actual implementation
+ * of the graph and architecture.
  *
- * The instance specifies the compatibility between node types and processor types.
+ * It aggregates three main components:
  *
- * @tparam Graph_t The type of the computational DAG.
+ * 1. **Computational DAG**: The directed acyclic graph representing the program to be executed.
+ *    It defines the tasks (nodes), their dependencies (directed edges), and associated weights (work, memory, communication).
+ *
+ * 2. **BSP Architecture**: The hardware model description, including the number of processors,
+ *    their types, memory bounds, and communication/synchronization costs.
+ *    Note that processor indices are represented using `unsigned`.
+ *
+ * 3. **Node-Processor Compatibility**: A matrix defining which node types can be executed on which
+ *    processor types. This enables the modeling of heterogeneous systems (e.g., CPU + GPU) where
+ *    certain nodes are restricted to specific hardware accelerators.
+ *
+ * @warning Be careful when assigning an existing graph to a BspInstance. Depending on the
+ * constructor or assignment operator used, this may result in a deep copy of the graph structure,
+ * which can be expensive for large graphs.
+ *
+ * This class provides a unified interface to access and modify these components, facilitating
+ * the development of scheduling algorithms that need to query problem constraints and properties.
+ *
+ * @tparam Graph_t The type of the computational DAG, which must satisfy the `is_computational_dag` concept.
  */
 template<typename Graph_t>
 class BspInstance {
@@ -45,24 +63,37 @@ class BspInstance {
 
   private:
     /**
-     * @brief  The computational DAG of the instance. Holds the graph structure and the node types, work, memory, communication weights.
+     * @brief The computational DAG representing the program structure.
+     *
+     * It contains the graph topology (nodes and directed edges) as well as attributes such as node types,
+     * work weights, memory weights, and edge communication weights.
      */
     Graph_t cdag;
     /**
-     * @brief The BSP architecture of the instance. Holds the processor types and the memory bounds. Communication and synchronization cost. And the send cost between processors.
+     * @brief The BSP architecture model.
+     *
+     * It defines the hardware characteristics including processor types, memory limits,
+     * communication bandwidth/latency (send costs), and global synchronization costs.
      */
     BspArchitecture<Graph_t> architecture;
 
     /**
      * @brief Stores the compatibility between node types and processor types.
      *
-     * The architecture defines a type for each processor, and the dag defines a type for each node.
+     * The architecture defines a type for each processor, and the DAG defines a type for each node.
      * This matrix stores for each node type and processor type whether they are compatible, i.e.,
-     * if a node of the can be assigned to a processor of the given type in a schedule.
+     * if a node of that type can be assigned to a processor of the given type in a schedule.
      * @note The outer vector is indexed by node type, the inner vector is indexed by processor type.
      */
     std::vector<std::vector<bool>> nodeProcessorCompatibility = std::vector<std::vector<bool>>({{true}});
 
+    /**
+     * @brief The type of the vectex types in the computational DAG.
+     * If the DAG does not support vertex types, this is `unsigned`.
+     */
+    using vertex_type_t_or_default = std::conditional_t<is_computational_dag_typed_vertices_v<Graph_t>, v_type_t<Graph_t>, unsigned>;
+    using processor_type_t = unsigned;
+
   public:
     /**
      * @brief Default constructor for the BspInstance class.
@@ -107,19 +138,16 @@ class BspInstance {
 
     /**
      * @brief Returns a reference to the BSP architecture of the instance.
+     * Assigning the BSP architecture via the reference creates a copy of the architecture.
+     * The move operator may be used to transfer ownership of the architecture.
      */
     [[nodiscard]] const BspArchitecture<Graph_t> &getArchitecture() const { return architecture; }
     [[nodiscard]] BspArchitecture<Graph_t> &getArchitecture() { return architecture; }
 
-    /**
-     * @brief Sets the BSP architecture for the instance.
-     *
-     * @param architecture_ The BSP architecture for the instance.
-     */
-    void setArchitecture(const BspArchitecture<Graph_t> &architechture_) { architecture = architechture_; }
-
     /**
      * @brief Returns a reference to the computational DAG of the instance.
+     * Assigning the computational DAG via the reference creates a copy of the DAG.
+     * The move operator may be used to transfer ownership of the DAG.
      */
     [[nodiscard]] const Graph_t &getComputationalDag() const { return cdag; }
     [[nodiscard]] Graph_t &getComputationalDag() { return cdag; }
@@ -190,7 +218,6 @@ class BspInstance {
 
     /**
      * @brief Returns the memory bound for a specific processor.
-     *
      * @param proc The processor index.
      */
     [[nodiscard]] v_memw_t<Graph_t> memoryBound(const unsigned proc) const { return architecture.memoryBound(proc); }
@@ -215,41 +242,11 @@ class BspInstance {
      */
     void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); }
 
-    /**
-     * @brief Returns false if there is a node whose weight does not fit on any of its compatible processors.
-     * @return True if the memory constraints are feasible, false otherwise.
-     */
-    [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const {
-        std::vector<v_memw_t<Graph_t>> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0);
-        for (unsigned proc = 0; proc < architecture.numberOfProcessors(); proc++) {
-            max_memory_per_proc_type[architecture.processorType(proc)] =
-                std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
-        }
-
-        for (unsigned vertType = 0; vertType < cdag.num_vertex_types(); vertType++) {
-            v_memw_t<Graph_t> max_memory_of_type = max_memory_weight(vertType, cdag);
-            bool fits = false;
-
-            for (unsigned proc_type = 0; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) {
-                if (isCompatibleType(vertType, proc_type)) {
-                    fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]);
-                    if (fits)
-                        break;
-                }
-            }
-
-            if (!fits)
-                return false;
-        }
-
-        return true;
-    }
-
     /**
      * @brief Returns the processor type for a given processor index. Does not perform bounds checking.
      * @param proc The processor index.
      */
-    [[nodiscard]] v_type_t<Graph_t> processorType(const unsigned proc) const { return architecture.processorType(proc); }
+    [[nodiscard]] vertex_type_t_or_default processorType(const unsigned proc) const { return architecture.processorType(proc); }
 
     /**
      * @brief Checks if a node is compatible with a processor. Does not perform bounds checking.
@@ -269,7 +266,7 @@ class BspInstance {
      * @param processorType The processor type.
      * @return True if the node type is compatible with the processor type, false otherwise.
      */
-    [[nodiscard]] bool isCompatibleType(const v_type_t<Graph_t> nodeType, const v_type_t<Graph_t> processorType) const {
+    [[nodiscard]] bool isCompatibleType(const vertex_type_t_or_default nodeType, const processor_type_t processorType) const {
         return nodeProcessorCompatibility[nodeType][processorType];
     }
 
@@ -285,6 +282,13 @@ class BspInstance {
         nodeProcessorCompatibility = compatibility_;
     }
 
+    /**
+     * @brief Returns the node-processor compatibility matrix.
+     */
+    [[nodiscard]] const std::vector<std::vector<bool>> &getNodeProcessorCompatibilityMatrix() const {
+        return nodeProcessorCompatibility;
+    }
+
     /**
      * @brief Returns the node type - processor type compatibility matrix.
      */
@@ -294,9 +298,9 @@ class BspInstance {
      * @brief Sets the compatibility matrix to be diagonal. This implies that node type `i` is only compatible with processor type `i`.
      * @param number_of_types The number of types.
      */
-    void setDiagonalCompatibilityMatrix(const unsigned number_of_types) {
+    void setDiagonalCompatibilityMatrix(const vertex_type_t_or_default number_of_types) {
         nodeProcessorCompatibility.assign(number_of_types, std::vector<bool>(number_of_types, false));
-        for (unsigned i = 0; i < number_of_types; ++i)
+        for (vertex_type_t_or_default i = 0; i < number_of_types; ++i)
             nodeProcessorCompatibility[i][i] = true;
     }
 
@@ -307,30 +311,52 @@ class BspInstance {
         nodeProcessorCompatibility.assign(cdag.num_vertex_types(), std::vector<bool>(architecture.getNumberOfProcessorTypes(), true));
     }
 
+    /**
+     * @brief Returns false if there is a node whose weight does not fit on any of its compatible processors.
+     * @return True if the memory constraints are feasible, false otherwise.
+     */
+    [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const {
+        std::vector<v_memw_t<Graph_t>> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0);
+        for (unsigned proc = 0U; proc < architecture.numberOfProcessors(); proc++) {
+            max_memory_per_proc_type[architecture.processorType(proc)] =
+                std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc));
+        }
+
+        for (vertex_type_t_or_default vertType = 0U; vertType < cdag.num_vertex_types(); vertType++) {
+            v_memw_t<Graph_t> max_memory_of_type = max_memory_weight(vertType, cdag);
+            bool fits = false;
+
+            for (processor_type_t proc_type = 0U; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) {
+                if (isCompatibleType(vertType, proc_type)) {
+                    fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]);
+                    if (fits)
+                        break;
+                }
+            }
+
+            if (!fits)
+                return false;
+        }
+
+        return true;
+    }
+
     /**
      * @brief Returns a list of compatible processor types for each node type.
-     *
      * @return A vector where the index is the node type and the value is a vector of compatible processor types.
      */
-    [[nodiscard]] std::vector<std::vector<unsigned>> getProcTypesCompatibleWithNodeType() const {
-        unsigned numberOfNodeTypes = cdag.num_vertex_types();
-        unsigned numberOfProcTypes = architecture.getNumberOfProcessorTypes();
-        std::vector<std::vector<unsigned>> compatibleProcTypes(numberOfNodeTypes);
+    [[nodiscard]] std::vector<std::vector<processor_type_t>> getProcTypesCompatibleWithNodeType() const {
+        vertex_type_t_or_default numberOfNodeTypes = cdag.num_vertex_types();
+        processor_type_t numberOfProcTypes = architecture.getNumberOfProcessorTypes();
+        std::vector<std::vector<processor_type_t>> compatibleProcTypes(numberOfNodeTypes);
 
-        for (unsigned nodeType = 0; nodeType < numberOfNodeTypes; ++nodeType)
-            for (unsigned processorType = 0; processorType < numberOfProcTypes; ++processorType)
+        for (vertex_type_t_or_default nodeType = 0U; nodeType < numberOfNodeTypes; ++nodeType)
+            for (processor_type_t processorType = 0U; processorType < numberOfProcTypes; ++processorType)
                 if (isCompatibleType(nodeType, processorType))
                     compatibleProcTypes[nodeType].push_back(processorType);
 
         return compatibleProcTypes;
     }
-
-    /**
-     * @brief Returns the node-processor compatibility matrix.
-     */
-    [[nodiscard]] const std::vector<std::vector<bool>> &getNodeProcessorCompatibilityMatrix() const {
-        return nodeProcessorCompatibility;
-    }
 };
 
 } // namespace osp
\ No newline at end of file
diff --git a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp
index 0e9df967..2e23c22e 100644
--- a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp
+++ b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp
@@ -42,17 +42,17 @@ class CoarseAndSchedule : public Scheduler<Graph_t> {
         const auto &instance = schedule.getInstance();
 
         BspInstance<Graph_t_coarse> instance_coarse;
-        
+
         std::vector<vertex_idx_t<Graph_t_coarse>> reverse_vertex_map;
 
         bool status = coarser.coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(),
-                                        reverse_vertex_map);
+                                         reverse_vertex_map);
 
         if (!status) {
             return RETURN_STATUS::ERROR;
-        }  
+        }
 
-        instance_coarse.setArchitecture(instance.getArchitecture());
+        instance_coarse.getArchitecture() = instance.getArchitecture();
         instance_coarse.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix());
 
         BspSchedule<Graph_t_coarse> schedule_coarse(instance_coarse);
diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
index 8d6355ad..83556089 100644
--- a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp
@@ -302,7 +302,7 @@ class IsomorphicSubgraphScheduler {
         const std::vector<bool> &was_trimmed) {
 
         subgraph_scheduler_input<Graph_t, Constr_Graph_t> result;
-        result.instance.setArchitecture(original_instance.getArchitecture());
+        result.instance.getArchitecture() = original_instance.getArchitecture();
         const unsigned num_proc_types = original_instance.getArchitecture().getNumberOfProcessorTypes();
 
         result.multiplicities.resize(isomorphic_groups.size());
@@ -373,7 +373,7 @@ class IsomorphicSubgraphScheduler {
             BspInstance<Constr_Graph_t> representative_instance;
             auto rep_global_to_local_map = create_induced_subgraph_map(instance.getComputationalDag(), representative_instance.getComputationalDag(), rep_subgraph_vertices_sorted);
 
-            representative_instance.setArchitecture(instance.getArchitecture());
+            representative_instance.getArchitecture() = instance.getArchitecture();
             const auto &procs_for_group = sub_sched.node_assigned_worker_per_type[group_idx];
             std::vector<v_memw_t<Constr_Graph_t>> mem_weights(procs_for_group.size(), 0);
             for (unsigned proc_type = 0; proc_type < procs_for_group.size(); ++proc_type) {
diff --git a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp
index 88dcf1fa..97fa53a5 100644
--- a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp
+++ b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp
@@ -135,7 +135,7 @@ class TrimmedGroupScheduler : public Scheduler<Constr_Graph_t> {
             std::sort(group_vertices.begin(), group_vertices.end());
 
             BspInstance<Constr_Graph_t> sub_instanc;
-            sub_instanc.setArchitecture(sub_arch);                                                                          // Set the sub-architecture
+            sub_instanc.getArchitecture() = sub_arch;
             sub_instanc.setNodeProcessorCompatibility(instance.getNodeProcessorCompatibilityMatrix());                      // Inherit compatibility
             auto global_to_local_map = create_induced_subgraph_map(dag, sub_instanc.getComputationalDag(), group_vertices); // Create induced subgraph
 
diff --git a/tests/bsp_instance.cpp b/tests/bsp_instance.cpp
index f45434de..101e4b2f 100644
--- a/tests/bsp_instance.cpp
+++ b/tests/bsp_instance.cpp
@@ -44,7 +44,7 @@ BOOST_AUTO_TEST_CASE(test_1) {
 
     BspArchitecture<computational_dag_vector_impl_def_t> architecture_2(6, 3, 1);
 
-    instance.setArchitecture(architecture_2);
+    instance.getArchitecture() = architecture_2;
 
     BOOST_CHECK_EQUAL(instance.numberOfProcessors(), 6);
     BOOST_CHECK_EQUAL(instance.synchronisationCosts(), 1);
diff --git a/tests/coarser.cpp b/tests/coarser.cpp
index e4bd92c3..9c77703d 100644
--- a/tests/coarser.cpp
+++ b/tests/coarser.cpp
@@ -23,24 +23,24 @@ limitations under the License.
 #include <filesystem>
 #include <iostream>
 
+#include "osp/auxiliary/io/arch_file_reader.hpp"
+#include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
 #include "osp/bsp/scheduler/CoarseAndSchedule.hpp"
 #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp"
 #include "osp/coarser/BspScheduleCoarser.hpp"
-#include "osp/coarser/coarser_util.hpp"
-#include "osp/coarser/funnel/FunnelBfs.hpp"
-#include "osp/coarser/hdagg/hdagg_coarser.hpp"
 #include "osp/coarser/Sarkar/Sarkar.hpp"
 #include "osp/coarser/Sarkar/SarkarMul.hpp"
 #include "osp/coarser/SquashA/SquashA.hpp"
 #include "osp/coarser/SquashA/SquashAMul.hpp"
+#include "osp/coarser/coarser_util.hpp"
+#include "osp/coarser/funnel/FunnelBfs.hpp"
+#include "osp/coarser/hdagg/hdagg_coarser.hpp"
 #include "osp/coarser/top_order/top_order_coarser.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
-#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp"
 #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp"
-#include "osp/auxiliary/io/arch_file_reader.hpp"
-#include "osp/auxiliary/io/hdag_graph_file_reader.hpp"
-#include "osp/auxiliary/io/general_file_reader.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp"
+#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 #include "test_graphs.hpp"
 
 using namespace osp;
@@ -121,14 +121,15 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         using graph_t = computational_dag_edge_idx_vector_impl_def_t;
 
         BspInstance<graph_t> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -140,7 +141,7 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test) {
         }
 
         BspInstance<graph_t> coarse_instance;
-        coarse_instance.setArchitecture(instance.getArchitecture());
+        coarse_instance.getArchitecture() = instance.getArchitecture();
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
@@ -193,7 +194,8 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         using graph_t1 = computational_dag_edge_idx_vector_impl_def_t;
         using graph_t2 = computational_dag_vector_impl_def_t;
@@ -201,7 +203,7 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) {
         BspInstance<graph_t1> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -214,7 +216,7 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) {
 
         BspInstance<graph_t2> coarse_instance;
         BspArchitecture<graph_t2> architecture_t2(instance.getArchitecture());
-        coarse_instance.setArchitecture(architecture_t2);
+        coarse_instance.getArchitecture() = architecture_t2;
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
@@ -265,14 +267,15 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         using graph_t = computational_dag_edge_idx_vector_impl_def_t;
 
         BspInstance<graph_t> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -284,7 +287,7 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) {
         }
 
         BspInstance<graph_t> coarse_instance;
-        coarse_instance.setArchitecture(instance.getArchitecture());
+        coarse_instance.getArchitecture() = instance.getArchitecture();
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
@@ -345,12 +348,13 @@ void test_coarser_same_graph(Coarser<graph_t, graph_t> &coarser) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         BspInstance<graph_t> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -362,17 +366,15 @@ void test_coarser_same_graph(Coarser<graph_t, graph_t> &coarser) {
         }
 
         BspInstance<graph_t> coarse_instance;
-        coarse_instance.setArchitecture(instance.getArchitecture());
+        coarse_instance.getArchitecture() = instance.getArchitecture();
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
         GreedyBspScheduler<graph_t> scheduler;
 
-
         bool coarse_success = coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map);
         BOOST_CHECK(coarse_success);
 
-
         vertex_map = coarser_util::invert_vertex_contraction_map<graph_t, graph_t>(reverse_vertex_map);
 
         BOOST_CHECK(check_vertex_map(vertex_map, instance.getComputationalDag().num_vertices()));
@@ -446,27 +448,20 @@ BOOST_AUTO_TEST_CASE(squashA_test) {
     SquashA<graph_t, graph_t> coarser(params);
 
     test_coarser_same_graph<graph_t>(coarser);
-    
-    
+
     params.mode = SquashAParams::Mode::TRIANGLES;
     params.use_structured_poset = true;
     params.use_top_poset = true;
     coarser.setParams(params);
-    
+
     test_coarser_same_graph<graph_t>(coarser);
 
     params.use_top_poset = false;
     coarser.setParams(params);
-    
+
     test_coarser_same_graph<graph_t>(coarser);
 }
 
-
-
-
-
-
-
 BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) {
     // static_assert(std::is_base_of<Scheduler, T>::value, "Class is not a scheduler!");
     std::vector<std::string> filenames_graph = tiny_spaa_graphs();
@@ -484,7 +479,8 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         using graph_t1 = computational_dag_edge_idx_vector_impl_def_t;
         using graph_t2 = CSG;
@@ -492,7 +488,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) {
         BspInstance<graph_t1> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -505,7 +501,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) {
 
         BspInstance<graph_t2> coarse_instance;
         BspArchitecture<graph_t2> architecture_t2(instance.getArchitecture());
-        coarse_instance.setArchitecture(architecture_t2);
+        coarse_instance.getArchitecture() = architecture_t2;
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
@@ -560,7 +556,8 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) {
         std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1);
         name_graph = name_graph.substr(0, name_graph.find_last_of("."));
 
-        std::cout << std::endl << "Graph: " << name_graph << std::endl;
+        std::cout << std::endl
+                  << "Graph: " << name_graph << std::endl;
 
         using graph_t1 = computational_dag_edge_idx_vector_impl_def_t;
         using graph_t2 = CSGE;
@@ -568,7 +565,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) {
         BspInstance<graph_t1> instance;
 
         bool status_graph = file_reader::readGraph((cwd / filename_graph).string(),
-                                                                            instance.getComputationalDag());
+                                                   instance.getComputationalDag());
 
         bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(),
                                                                     instance.getArchitecture());
@@ -581,7 +578,7 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) {
 
         BspInstance<graph_t2> coarse_instance;
         BspArchitecture<graph_t2> architecture_t2(instance.getArchitecture());
-        coarse_instance.setArchitecture(architecture_t2);
+        coarse_instance.getArchitecture() = architecture_t2;
         std::vector<std::vector<VertexType>> vertex_map;
         std::vector<VertexType> reverse_vertex_map;
 
@@ -619,13 +616,6 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) {
     }
 }
 
-
-
-
-
-
-
-
 BOOST_AUTO_TEST_CASE(Sarkar_test) {
     using graph_t = computational_dag_edge_idx_vector_impl_def_t;
     // using graph_t = computational_dag_vector_impl_def_t;
@@ -639,58 +629,47 @@ BOOST_AUTO_TEST_CASE(Sarkar_test) {
 
     test_coarser_same_graph<graph_t>(coarser);
 
-    
     params.useTopPoset = false;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
-    
-    
+
     params.mode = SarkarParams::Mode::FAN_IN_FULL;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-    
     params.mode = SarkarParams::Mode::FAN_IN_PARTIAL;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-    
     params.mode = SarkarParams::Mode::FAN_OUT_FULL;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-
     params.mode = SarkarParams::Mode::FAN_OUT_PARTIAL;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-
     params.mode = SarkarParams::Mode::LEVEL_EVEN;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
-    
-    
+
     params.mode = SarkarParams::Mode::LEVEL_ODD;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-
     params.mode = SarkarParams::Mode::FAN_IN_BUFFER;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-
     params.mode = SarkarParams::Mode::FAN_OUT_BUFFER;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 
-
     params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER;
     coarser.setParameters(params);
     test_coarser_same_graph<graph_t>(coarser);
 }
 
-
 BOOST_AUTO_TEST_CASE(SarkarML_test) {
     using graph_t = computational_dag_edge_idx_vector_impl_def_t;
     // using graph_t = computational_dag_vector_impl_def_t;
@@ -723,6 +702,6 @@ BOOST_AUTO_TEST_CASE(SquashAML_test) {
     // using graph_t = computational_dag_vector_impl_def_t;
 
     SquashAMul<graph_t, graph_t> coarser;
-    
+
     test_coarser_same_graph<graph_t>(coarser);
 }
\ No newline at end of file
diff --git a/tests/trimmed_group_scheduler.cpp b/tests/trimmed_group_scheduler.cpp
index 52cf4cdb..ccbfee8a 100644
--- a/tests/trimmed_group_scheduler.cpp
+++ b/tests/trimmed_group_scheduler.cpp
@@ -19,10 +19,10 @@ limitations under the License.
 #define BOOST_TEST_MODULE TrimmedGroupSchedulerTest
 #include <boost/test/unit_test.hpp>
 
-#include "osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp"
 #include "osp/bsp/model/BspInstance.hpp"
 #include "osp/bsp/model/BspSchedule.hpp"
 #include "osp/bsp/scheduler/Scheduler.hpp"
+#include "osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp"
 #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp"
 
 using namespace osp;
@@ -30,9 +30,9 @@ using namespace osp;
 using graph_t = computational_dag_vector_impl_def_t;
 
 // Mock SubScheduler for TrimmedGroupScheduler tests
-template <typename Constr_Graph_t>
+template<typename Constr_Graph_t>
 class MockSubScheduler : public Scheduler<Constr_Graph_t> {
-public:
+  public:
     // This mock scheduler assigns all nodes to local processor 0 and superstep 0.
     // This simplifies verification of the TrimmedGroupScheduler's mapping logic.
     RETURN_STATUS computeSchedule(BspSchedule<Constr_Graph_t> &schedule) override {
@@ -66,7 +66,7 @@ BOOST_FIXTURE_TEST_SUITE(TrimmedGroupSchedulerTestSuite, TrimmedGroupSchedulerFi
 BOOST_AUTO_TEST_CASE(EmptyGraphTest) {
     // Graph is empty by default
     arch.setNumberOfProcessors(4);
-    instance.setArchitecture(arch);
+    instance.getArchitecture() = arch;
 
     TrimmedGroupScheduler<graph_t> scheduler(mock_sub_scheduler, 1);
     BspSchedule<graph_t> schedule(instance);
@@ -87,7 +87,7 @@ BOOST_AUTO_TEST_CASE(SingleComponentSingleProcessorTypeTest) {
 
     // Architecture: 4 processors of type 0
     arch.setProcessorsWithTypes({0, 0, 0, 0});
-    instance.setArchitecture(arch);
+    instance.getArchitecture() = arch;
 
     // min_non_zero_procs_ = 1 (all 4 processors assigned to this single component group)
     TrimmedGroupScheduler<graph_t> scheduler(mock_sub_scheduler, 1);
@@ -119,7 +119,7 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeEvenDistributionTest)
 
     // Architecture: 4 processors of type 0
     arch.setProcessorsWithTypes({0, 0, 0, 0});
-    instance.setArchitecture(arch);
+    instance.getArchitecture() = arch;
 
     // min_non_zero_procs_ = 2 (2 component groups, each gets 2 processors)
     TrimmedGroupScheduler<graph_t> scheduler(mock_sub_scheduler, 2);
@@ -154,7 +154,7 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeUnevenDistributionTest
 
     // Architecture: 6 processors of type 0
     arch.setProcessorsWithTypes({0, 0, 0, 0, 0, 0});
-    instance.setArchitecture(arch);
+    instance.getArchitecture() = arch;
 
     // min_non_zero_procs_ = 2 (3 components, 2 groups)
     // base_count = 3 / 2 = 1, remainder = 3 % 2 = 1
@@ -190,7 +190,7 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsHeterogeneousArchitectureTest) {
 
     // Architecture: 2 processors of type 0 (global 0,1), 2 processors of type 1 (global 2,3)
     arch.setProcessorsWithTypes({0, 0, 1, 1});
-    instance.setArchitecture(arch);
+    instance.getArchitecture() = arch;
     instance.setDiagonalCompatibilityMatrix(2); // Node type 0 compatible with proc type 0, etc.
 
     // min_non_zero_procs_ = 2 (2 components, 2 groups)

From 8b4e1a62c32b2a4c6587eb505e8f839c5e23d810 Mon Sep 17 00:00:00 2001
From: tonibohnlein <toni.boehnlein18@gmail.com>
Date: Tue, 9 Dec 2025 11:48:24 +0100
Subject: [PATCH 9/9] arch constructor

---
 include/osp/bsp/model/BspArchitecture.hpp | 83 +++++++----------------
 include/osp/bsp/model/BspInstance.hpp     |  6 +-
 2 files changed, 25 insertions(+), 64 deletions(-)

diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp
index 5ef01b0e..5575fad2 100644
--- a/include/osp/bsp/model/BspArchitecture.hpp
+++ b/include/osp/bsp/model/BspArchitecture.hpp
@@ -194,17 +194,37 @@ class BspArchitecture {
      * @param CommunicationCost The communication cost between processors. Default: 1.
      * @param SynchronisationCost The synchronization cost between processors. Default: 2.
      * @param MemoryBound The memory bound for each processor (default: 100).
+     * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal entries are forced to zero. Default: empty (uniform costs).
      */
     BspArchitecture(const unsigned NumberOfProcessors = 2U, const v_commw_t<Graph_t> CommunicationCost = 1U, const v_commw_t<Graph_t> SynchronisationCost = 2U,
-                    const v_memw_t<Graph_t> MemoryBound = 100U)
+                    const v_memw_t<Graph_t> MemoryBound = 100U, const std::vector<std::vector<v_commw_t<Graph_t>>> &SendCosts = {})
         : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost),
           synchronisationCosts_(SynchronisationCost),
           memoryBound_(NumberOfProcessors, MemoryBound), isNuma_(false),
-          processorTypes_(NumberOfProcessors, 0U), sendCosts_(NumberOfProcessors * NumberOfProcessors, 1U) {
+          processorTypes_(NumberOfProcessors, 0U) {
         if (NumberOfProcessors == 0U) {
             throw std::runtime_error("BspArchitecture: Number of processors must be greater than 0.");
         }
-        SetSendCostDiagonalToZero();
+
+        if (SendCosts.empty()) {
+            InitializeUniformSendCosts();
+        } else {
+            if (NumberOfProcessors != SendCosts.size()) {
+                throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n");
+            }
+            if (std::any_of(SendCosts.begin(), SendCosts.end(),
+                            [NumberOfProcessors](const auto &thing) { return thing.size() != NumberOfProcessors; })) {
+                throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n");
+            }
+
+            sendCosts_.reserve(NumberOfProcessors * NumberOfProcessors);
+            for (const auto &row : SendCosts) {
+                sendCosts_.insert(sendCosts_.end(), row.begin(), row.end());
+            }
+
+            SetSendCostDiagonalToZero();
+            isNuma_ = AreSendCostsNuma();
+        }
     }
 
     BspArchitecture(const BspArchitecture &other) = default;
@@ -246,62 +266,7 @@ class BspArchitecture {
      */
     BspArchitecture(const unsigned NumberOfProcessors, const v_commw_t<Graph_t> CommunicationCost, const v_commw_t<Graph_t> SynchronisationCost,
                     const std::vector<std::vector<v_commw_t<Graph_t>>> &SendCosts)
-        : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost),
-          synchronisationCosts_(SynchronisationCost), memoryBound_(NumberOfProcessors, 100U),
-          processorTypes_(NumberOfProcessors, 0U) {
-        if (NumberOfProcessors == 0U) {
-            throw std::runtime_error("BspArchitecture: Number of processors must be greater than 0.");
-        }
-        if (NumberOfProcessors != SendCosts.size()) {
-            throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n");
-        }
-        if (std::any_of(SendCosts.begin(), SendCosts.end(),
-                        [NumberOfProcessors](const auto &thing) { return thing.size() != NumberOfProcessors; })) {
-            throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n");
-        }
-
-        sendCosts_.reserve(NumberOfProcessors * NumberOfProcessors);
-        for (const auto &row : SendCosts) {
-            sendCosts_.insert(sendCosts_.end(), row.begin(), row.end());
-        }
-
-        SetSendCostDiagonalToZero();
-        isNuma_ = AreSendCostsNuma();
-    }
-
-    /**
-     * @brief Constructs a BspArchitecture object with custom send costs and memory bound.
-     *
-     * @param NumberOfProcessors The number of processors. Must be greater than 0.
-     * @param CommunicationCost The communication cost.
-     * @param SynchronisationCost The synchronization cost.
-     * @param MemoryBound The memory bound for each processor.
-     * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal entries are forced to zero.
-     */
-    BspArchitecture(const unsigned NumberOfProcessors, const v_commw_t<Graph_t> CommunicationCost, const v_commw_t<Graph_t> SynchronisationCost,
-                    const v_memw_t<Graph_t> MemoryBound, const std::vector<std::vector<v_commw_t<Graph_t>>> &SendCosts)
-        : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost),
-          synchronisationCosts_(SynchronisationCost), memoryBound_(NumberOfProcessors, MemoryBound),
-          processorTypes_(NumberOfProcessors, 0U) {
-        if (NumberOfProcessors == 0U) {
-            throw std::runtime_error("BspArchitecture: Number of processors must be greater than 0.");
-        }
-        if (NumberOfProcessors != SendCosts.size()) {
-            throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n");
-        }
-        if (std::any_of(SendCosts.begin(), SendCosts.end(),
-                        [NumberOfProcessors](const auto &thing) { return thing.size() != NumberOfProcessors; })) {
-            throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n");
-        }
-
-        sendCosts_.reserve(NumberOfProcessors * NumberOfProcessors);
-        for (const auto &row : SendCosts) {
-            sendCosts_.insert(sendCosts_.end(), row.begin(), row.end());
-        }
-
-        SetSendCostDiagonalToZero();
-        isNuma_ = AreSendCostsNuma();
-    }
+        : BspArchitecture(NumberOfProcessors, CommunicationCost, SynchronisationCost, 100U, SendCosts) {}
 
     /**
      * @brief Sets the uniform send cost for each pair of processors.
diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp
index c5a973a7..bed4fd40 100644
--- a/include/osp/bsp/model/BspInstance.hpp
+++ b/include/osp/bsp/model/BspInstance.hpp
@@ -271,14 +271,10 @@ class BspInstance {
     }
 
     /**
-     * @brief Sets the node-processor compatibility matrix. The matrix is copied.
+     * @brief Sets the node-processor compatibility matrix. The matrix is copied. Dimensions are not checked.
      * @param compatibility_ The compatibility matrix.
-     * @throw std::runtime_error if the compatibility matrix size does not match the number of node types and processor types.
      */
     void setNodeProcessorCompatibility(const std::vector<std::vector<bool>> &compatibility_) {
-        if (compatibility_.size() < cdag.num_vertex_types() || compatibility_[0].size() < architecture.getNumberOfProcessorTypes()) {
-            throw std::runtime_error("Compatibility matrix size does not match the number of node types and processor types.");
-        }
         nodeProcessorCompatibility = compatibility_;
     }