diff --git a/src/vt-lb/algo/driver/driver.impl.h b/src/vt-lb/algo/driver/driver.impl.h
index 83b63ef..a502d84 100644
--- a/src/vt-lb/algo/driver/driver.impl.h
+++ b/src/vt-lb/algo/driver/driver.impl.h
@@ -57,10 +57,10 @@ void runLB(DriverAlgoEnum algo, CommT& comm, ConfigT config, std::unique_ptr<mod
     break;
   case DriverAlgoEnum::TemperedLB:
     {
-    // Run TemperedLB algorithm
-    auto lb = std::make_unique<algo::temperedlb::TemperedLB<CommT>>(comm, config);
-    lb->inputData(std::move(phase_data));
-    lb->run();
+      // Run TemperedLB algorithm
+      auto lb = std::make_unique<algo::temperedlb::TemperedLB<CommT>>(comm, config);
+      lb->inputData(std::move(phase_data));
+      lb->run();
     }
     break;
   default:
@@ -68,6 +68,29 @@ void runLB(DriverAlgoEnum algo, CommT& comm, ConfigT config, std::unique_ptr<mod
   }
 }
 
+template <typename CommT, typename ConfigT>
+std::unordered_map<model::RankType, std::vector<model::TaskType>>
+runLBAllGather(DriverAlgoEnum algo, CommT& comm, ConfigT config, std::unique_ptr<model::PhaseData> phase_data) {
+  switch (algo) {
+  case DriverAlgoEnum::None:
+    // No load balancing
+    return {};
+    break;
+  case DriverAlgoEnum::TemperedLB:
+    {
+      // Run TemperedLB algorithm
+      auto lb = std::make_unique<algo::temperedlb::TemperedLB<CommT>>(comm, config);
+      lb->inputData(std::move(phase_data));
+      auto local_tasks = lb->run();
+      return lb->getGlobalDistribution(local_tasks);
+    }
+    break;
+  default:
+    throw std::runtime_error("Invalid load balancer algorithm");
+    return {};
+  }
+}
+
 } /* end namespace vt_lb */
 
 #endif /*INCLUDED_VT_LB_ALGO_DRIVER_DRIVER_IMPL_H*/
\ No newline at end of file
diff --git a/src/vt-lb/algo/temperedlb/temperedlb.h b/src/vt-lb/algo/temperedlb/temperedlb.h
index 6ba76d7..b9ed552 100644
--- a/src/vt-lb/algo/temperedlb/temperedlb.h
+++ b/src/vt-lb/algo/temperedlb/temperedlb.h
@@ -371,6 +371,24 @@ struct TemperedLB final : baselb::BaseLB {
     }
   }
 
+  /**
+   * @brief Get the global distribution of tasks after load balancing
+   *
+   * @param local_tasks The set of tasks on the this rank
+   */
+  std::unordered_map<model::RankType, std::vector<model::TaskType>>
+  getGlobalDistribution(std::unordered_set<model::TaskType> const& local_tasks) {
+    std::vector<model::TaskType> local_task_vec(
+      local_tasks.begin(), local_tasks.end()
+    );
+    auto all_task_vecs = handle_.template allgather<model::TaskType>(
+      local_task_vec.data(), static_cast<int>(local_task_vec.size())
+    );
+
+    return all_task_vecs;
+  }
+
+
   Clusterer const* getClusterer() const { return clusterer_.get(); }
 
 private:
@@ -402,12 +420,15 @@ struct TemperedLB final : baselb::BaseLB {
     double global_min = 0.0;
     double global_max = 0.0;
     double global_sum = 0.0;
-    // For now, do P reductions since we don't have broadcast yet
-    for (int p = 0; p < comm_.numRanks(); ++p) {
-      handle_.reduce(p, MPI_DOUBLE, MPI_MIN, &local_value, &global_min, 1);
-      handle_.reduce(p, MPI_DOUBLE, MPI_MAX, &local_value, &global_max, 1);
-      handle_.reduce(p, MPI_DOUBLE, MPI_SUM, &local_value, &global_sum, 1);
-    }
+
+    handle_.reduce(0, MPI_DOUBLE, MPI_MIN, &local_value, &global_min, 1);
+    handle_.reduce(0, MPI_DOUBLE, MPI_MAX, &local_value, &global_max, 1);
+    handle_.reduce(0, MPI_DOUBLE, MPI_SUM, &local_value, &global_sum, 1);
+
+    handle_.broadcast(0, MPI_DOUBLE, &global_min, 1);
+    handle_.broadcast(0, MPI_DOUBLE, &global_max, 1);
+    handle_.broadcast(0, MPI_DOUBLE, &global_sum, 1);
+
     double global_avg = global_sum / static_cast<double>(comm_.numRanks());
     double I = 0;
     if (global_avg > 0.0) {
diff --git a/src/vt-lb/model/Communication.h b/src/vt-lb/model/Communication.h
index 7277b8f..262e5b0 100644
--- a/src/vt-lb/model/Communication.h
+++ b/src/vt-lb/model/Communication.h
@@ -56,21 +56,23 @@ namespace vt_lb::model {
 struct Edge {
   Edge() = default;
   Edge(TaskType from, TaskType to, BytesType volume)
-    : from_(from), to_(to), volume_(volume)
+    : from_(from), to_(to), volume_(volume), num_messages_(1)
   {}
   // New ctor with explicit endpoint ranks
   Edge(TaskType from, TaskType to, BytesType volume, RankType from_rank, RankType to_rank)
-    : from_(from), to_(to), volume_(volume), from_rank_(from_rank), to_rank_(to_rank)
+    : from_(from), to_(to), volume_(volume), from_rank_(from_rank), to_rank_(to_rank), num_messages_(1)
   {}
 
   TaskType getFrom() const { return from_; }
   TaskType getTo() const { return to_; }
   BytesType getVolume() const { return volume_; }
-  // New rank accessors
+  void setVolume(BytesType volume) { volume_ = volume; }
   RankType getFromRank() const { return from_rank_; }
   RankType getToRank() const { return to_rank_; }
   void setFromRank(RankType rank) { from_rank_ = rank; }
   void setToRank(RankType rank) { to_rank_ = rank; }
+  int getNumMessages() const { return num_messages_; }
+  void setNumMessages(int num) { num_messages_ = num; }
 
   template <typename Serializer>
   void serialize(Serializer& s) {
@@ -79,6 +81,7 @@ struct Edge {
     s | volume_;
     s | from_rank_;
     s | to_rank_;
+    s | num_messages_;
   }
 
 private:
@@ -87,6 +90,7 @@ struct Edge {
   BytesType volume_ = 0.0;
   RankType from_rank_ = invalid_rank;
   RankType to_rank_ = invalid_rank;
+  int num_messages_ = 0;
 };
 
 /**
diff --git a/src/vt-lb/model/PhaseData.h b/src/vt-lb/model/PhaseData.h
index 9f36744..6a5c8db 100644
--- a/src/vt-lb/model/PhaseData.h
+++ b/src/vt-lb/model/PhaseData.h
@@ -69,6 +69,21 @@ struct PhaseData {
     }
     communications_.push_back(e);
   }
+  void aggregateCommunication(Edge const& e) {
+    for (int i = 0; i < static_cast<int>(communications_.size()); ++i) {
+      auto& comm = communications_[i];
+      if (comm.getFrom() == e.getFrom() && comm.getTo() == e.getTo()) {
+        comm.setVolume(comm.getVolume() + e.getVolume());
+        comm.setNumMessages(comm.getNumMessages() + e.getNumMessages());
+        task_messages_out_[e.getFrom()] += e.getNumMessages();
+        task_messages_in_[e.getTo()] += e.getNumMessages();
+        return;
+      }
+    }
+    communications_.push_back(e);
+    task_messages_out_[e.getFrom()] += e.getNumMessages();
+    task_messages_in_[e.getTo()] += e.getNumMessages();
+  }
   void addSharedBlock(SharedBlock const& b) { shared_blocks_.emplace(b.getId(), b); }
 
   RankType getRank() const { return rank_; }
@@ -98,6 +113,25 @@ struct PhaseData {
   std::unordered_map<TaskType, Task> const& getTasksMap() const { return tasks_; }
   std::vector<Edge> const& getCommunications() const { return communications_; }
   std::vector<Edge>& getCommunicationsRef() { return communications_; }
+
+  std::size_t getCommunicationMessagesCount() const {
+    std::size_t count = 0;
+    for (const auto& comm : communications_) {
+      count += comm.getNumMessages();
+    }
+    return count;
+  }
+
+  std::size_t getTaskMessagesOut(TaskType id) const {
+    auto it = task_messages_out_.find(id);
+    return it != task_messages_out_.end() ? it->second : 0;
+  }
+
+  std::size_t getTaskMessagesIn(TaskType id) const {
+    auto it = task_messages_in_.find(id);
+    return it != task_messages_in_.end() ? it->second : 0;
+  }
+
   std::unordered_map<SharedBlockType, SharedBlock> const& getSharedBlocksMap() const { return shared_blocks_; }
   std::unordered_set<TaskType> getTaskIds() const {
     std::unordered_set<TaskType> ids;
@@ -133,6 +167,8 @@ struct PhaseData {
     tasks_.clear();
     communications_.clear();
     shared_blocks_.clear();
+    task_messages_out_.clear();
+    task_messages_in_.clear();
     rank_footprint_bytes_ = 0.0;
     rank_max_memory_available_ = 0.0;
   }
@@ -157,6 +193,8 @@ struct PhaseData {
     s | tasks_;
     s | communications_;
     s | shared_blocks_;
+    s | task_messages_out_;
+    s | task_messages_in_;
     s | rank_footprint_bytes_;
     s | rank_max_memory_available_;
   }
@@ -170,6 +208,8 @@ struct PhaseData {
   std::unordered_map<TaskType, Task> tasks_;
   std::vector<Edge> communications_;
   std::unordered_map<SharedBlockType, SharedBlock> shared_blocks_;
+  std::unordered_map<TaskType, std::size_t> task_messages_out_;
+  std::unordered_map<TaskType, std::size_t> task_messages_in_;
   BytesType rank_footprint_bytes_ = 0.0;
   BytesType rank_max_memory_available_ = 0.0;
 };
diff --git a/tests/unit/graph_helpers.h b/tests/unit/graph_helpers.h
index 753f444..7157539 100644
--- a/tests/unit/graph_helpers.h
+++ b/tests/unit/graph_helpers.h
@@ -304,7 +304,7 @@ void generateIntraRankComm(
       }
     }
     double bytes = weight_per_edge_dist(gen);
-    pd.addCommunication(Edge{from, to, bytes, rank, rank});
+    pd.aggregateCommunication(Edge{from, to, bytes, rank, rank});
   }
 }
 
@@ -359,7 +359,7 @@ void generateInterRankComm(
     while ((remote_rank = remote_rank_dist(gen)) == rank) {}
     TaskType to = remote_task_dist(gen);
     double bytes = weight_per_edge_dist(gen);
-    pd.addCommunication(Edge{from, to, bytes, rank, remote_rank});
+    pd.aggregateCommunication(Edge{from, to, bytes, rank, remote_rank});
   }
   for (std::size_t e = from_edge_count; e < local_endpoints.size(); ++e) {
     TaskType to = local_endpoints[e];
@@ -367,7 +367,7 @@ void generateInterRankComm(
     while ((remote_rank = remote_rank_dist(gen)) == rank) {}
     TaskType from = remote_task_dist(gen);
     double bytes = weight_per_edge_dist(gen);
-    pd.addCommunication(Edge{from, to, bytes, remote_rank, rank});
+    pd.aggregateCommunication(Edge{from, to, bytes, remote_rank, rank});
   }
 }
 
@@ -428,7 +428,7 @@ void generateRankComm(
       while ((to = remote_task_dist(gen)) == from) {}
     }
     double bytes = weight_per_edge_dist(gen);
-    pd.addCommunication(Edge{from, to, bytes, rank, remote_rank});
+    pd.aggregateCommunication(Edge{from, to, bytes, rank, remote_rank});
   }
   for (std::size_t e = from_edge_count; e < local_endpoints.size(); ++e) {
     TaskType to = local_endpoints[e];
@@ -441,7 +441,7 @@ void generateRankComm(
       while ((from = remote_task_dist(gen)) == to) {}
     }
     double bytes = weight_per_edge_dist(gen);
-    pd.addCommunication(Edge{from, to, bytes, remote_rank, rank});
+    pd.aggregateCommunication(Edge{from, to, bytes, remote_rank, rank});
   }
 }
 
diff --git a/tests/unit/helpers/test_graph_helpers.cc b/tests/unit/helpers/test_graph_helpers.cc
index 7b770bb..4fd1e83 100644
--- a/tests/unit/helpers/test_graph_helpers.cc
+++ b/tests/unit/helpers/test_graph_helpers.cc
@@ -551,471 +551,442 @@ TYPED_TEST(TestGraphHelpers, test_generate_tasks_without_shared_blocks2) {
   }
 };
 
-// TYPED_TEST(TestGraphHelpers, test_generate_intra_rank_comm) {
-//   auto rank = this->comm.getRank();
-//   vt_lb::model::PhaseData pd(rank);
-
-//   std::mt19937 gen(6745 * rank);
-
-//   int min_tasks = 10;
-//   int max_tasks = 20;
-//   std::uniform_int_distribution<> dist(min_tasks, max_tasks);
-
-//   generateTaskCountsPerRank(pd, gen, dist, max_tasks);
-
-//   int task_count = pd.getTasksMap().size();
-
-//   int min_endpoints = 3;
-//   int max_endpoints = 5;
-//   std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints);
-
-//   int min_weight = 100;
-//   int max_weight = 200;
-//   std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
-
-//   generateIntraRankComm(pd, gen, ep_dist, weight_dist);
-
-//   auto &edges = pd.getCommunications();
-
-//   if (task_count <= 1) {
-//     EXPECT_EQ(edges.size(), 0);
-//     return;
-//   }
-
-//   EXPECT_GE(edges.size(), task_count * min_endpoints / 2);
-//   EXPECT_LE(edges.size(), task_count * max_endpoints / 2);
-
-//   std::vector<int> from_task(task_count);
-//   std::vector<int> to_task(task_count);
-
-//   for (auto &e : edges) {
-//     EXPECT_EQ(e.getFromRank(), rank);
-//     EXPECT_EQ(e.getToRank(), rank);
-//     EXPECT_GE(e.getVolume(), min_weight);
-//     EXPECT_LE(e.getVolume(), max_weight);
-
-//     auto from_task_id = e.getFrom();
-//     auto to_task_id = e.getTo();
-//     EXPECT_GE(from_task_id, rank * max_tasks);
-//     EXPECT_LT(from_task_id, (rank + 1) * max_tasks);
-//     EXPECT_GE(to_task_id, rank * max_tasks);
-//     EXPECT_LT(to_task_id, (rank + 1) * max_tasks);
-
-//     int from_lid = from_task_id - rank * max_tasks;
-//     int to_lid = to_task_id - rank * max_tasks;
-//     if (from_lid >= 0 and from_lid < task_count) {
-//       ++(from_task[from_lid]);
-//     }
-//     if (to_lid >= 0 and to_lid < task_count) {
-//       ++(to_task[to_lid]);
-//     }
-//   }
-
-//   for (int tlid = 0; tlid < task_count; ++tlid) {
-//     // in case we generated an odd number of endpoints and one was dropped
-//     int adjusted_min_endpoints = std::max(min_endpoints - 1, 0);
-//     EXPECT_GE(from_task[tlid] + to_task[tlid], adjusted_min_endpoints);
-//     EXPECT_LE(from_task[tlid] + to_task[tlid], max_endpoints);
-//   }
-// };
-
-// TYPED_TEST(TestGraphHelpers, test_generate_inter_rank_comm_out_only) {
-//   auto num_ranks = this->comm.numRanks();
-//   auto rank = this->comm.getRank();
-//   vt_lb::model::PhaseData pd(rank);
-
-//   std::mt19937 gen(741 * rank);
-
-//   int min_tasks = 5;
-//   int max_tasks = 7;
-//   std::uniform_int_distribution<> dist(min_tasks, max_tasks);
+TYPED_TEST(TestGraphHelpers, test_generate_intra_rank_comm) {
+  auto rank = this->comm.getRank();
+  vt_lb::model::PhaseData pd(rank);
 
-//   generateTaskCountsPerRank(pd, gen, dist, max_tasks);
+  std::mt19937 gen(6745 * rank);
 
-//   int task_count = pd.getTasksMap().size();
+  int min_tasks = 10;
+  int max_tasks = 20;
+  std::uniform_int_distribution<> dist(min_tasks, max_tasks);
 
-//   int min_endpoints = 1;
-//   int max_endpoints = 4;
-//   std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints);
+  generateTaskCountsPerRank(pd, gen, dist, max_tasks);
 
-//   int min_weight = 1000;
-//   int max_weight = 2000;
-//   std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
+  int task_count = pd.getTasksMap().size();
 
-//   double frac = 0.0;
+  int min_endpoints = 3;
+  int max_endpoints = 5;
+  std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints);
 
-//   generateInterRankComm(
-//     pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac
-//   );
+  int min_weight = 100;
+  int max_weight = 200;
+  std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
 
-//   auto &edges = pd.getCommunications();
+  generateIntraRankComm(pd, gen, ep_dist, weight_dist);
 
-//   if (num_ranks == 1) {
-//     EXPECT_EQ(edges.size(), 0);
-//     return;
-//   }
+  auto &edges = pd.getCommunications();
+  auto message_count = pd.getCommunicationMessagesCount();
 
-//   EXPECT_GE(edges.size(), task_count * min_endpoints);
-//   EXPECT_LE(edges.size(), task_count * max_endpoints);
+  if (task_count <= 1) {
+    EXPECT_EQ(message_count, 0);
+    return;
+  }
 
-//   std::vector<int> from_task(task_count);
+  EXPECT_GE(message_count, task_count * min_endpoints / 2);
+  EXPECT_LE(message_count, task_count * max_endpoints / 2);
+
+    for (auto &e : edges) {
+      EXPECT_EQ(e.getFromRank(), rank);
+      EXPECT_EQ(e.getToRank(), rank);
+      EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages());
+      EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages());
+
+      auto from_task_id = e.getFrom();
+      auto to_task_id = e.getTo();
+      EXPECT_GE(from_task_id, rank * max_tasks);
+      EXPECT_LT(from_task_id, (rank + 1) * max_tasks);
+      EXPECT_GE(to_task_id, rank * max_tasks);
+      EXPECT_LT(to_task_id, (rank + 1) * max_tasks);
+    }
 
-//   for (auto &e : edges) {
-//     EXPECT_EQ(e.getFromRank(), rank);
-//     EXPECT_NE(e.getToRank(), rank);
-//     EXPECT_GE(e.getVolume(), min_weight);
-//     EXPECT_LE(e.getVolume(), max_weight);
+    for (int tlid = 0; tlid < task_count; ++tlid) {
+      // in case we generated an odd number of endpoints and one was dropped
+      int adjusted_min_endpoints = std::max(min_endpoints - 1, 0);
+      auto task_id = rank * max_tasks + tlid;
+      auto per_task_msgs = pd.getTaskMessagesOut(task_id) + pd.getTaskMessagesIn(task_id);
+      EXPECT_GE(per_task_msgs, adjusted_min_endpoints);
+      EXPECT_LE(per_task_msgs, max_endpoints);
+    }
+};
 
-//     auto from_task_id = e.getFrom();
-//     EXPECT_GE(from_task_id, rank * max_tasks);
-//     EXPECT_LT(from_task_id, (rank + 1) * max_tasks);
+TYPED_TEST(TestGraphHelpers, test_generate_inter_rank_comm_out_only) {
+  auto num_ranks = this->comm.numRanks();
+  auto rank = this->comm.getRank();
+  vt_lb::model::PhaseData pd(rank);
 
-//     int from_lid = from_task_id - rank * max_tasks;
-//     if (from_lid >= 0 and from_lid < task_count) {
-//       ++(from_task[from_lid]);
-//     }
-//   }
+  std::mt19937 gen(741 * rank);
 
-//   for (int tlid = 0; tlid < task_count; ++tlid) {
-//     EXPECT_GE(from_task[tlid], min_endpoints);
-//     EXPECT_LE(from_task[tlid], max_endpoints);
-//   }
-// };
+  int min_tasks = 5;
+  int max_tasks = 7;
+  std::uniform_int_distribution<> dist(min_tasks, max_tasks);
 
-// TYPED_TEST(TestGraphHelpers, test_generate_inter_rank_comm_in_only) {
-//   auto num_ranks = this->comm.numRanks();
-//   auto rank = this->comm.getRank();
-//   vt_lb::model::PhaseData pd(rank);
+  generateTaskCountsPerRank(pd, gen, dist, max_tasks);
 
-//   std::mt19937 gen(975 * rank);
+  int task_count = pd.getTasksMap().size();
 
-//   int min_tasks = 4;
-//   int max_tasks = 8;
-//   std::uniform_int_distribution<> dist(min_tasks, max_tasks);
+  int min_endpoints = 1;
+  int max_endpoints = 4;
+  std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints);
 
-//   generateTaskCountsPerRank(pd, gen, dist, max_tasks);
+  int min_weight = 1000;
+  int max_weight = 2000;
+  std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
 
-//   int task_count = pd.getTasksMap().size();
+  double frac = 0.0;
 
-//   int min_endpoints = 2;
-//   int max_endpoints = 5;
-//   std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints);
+  generateInterRankComm(
+    pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac
+  );
 
-//   int min_weight = 500;
-//   int max_weight = 3000;
-//   std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
+  auto &edges = pd.getCommunications();
+  auto message_count = pd.getCommunicationMessagesCount();
 
-//   double frac = 1.0;
+  if (num_ranks == 1) {
+    EXPECT_EQ(message_count, 0);
+    return;
+  }
 
-//   generateInterRankComm(
-//     pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac
-//   );
+  EXPECT_GE(message_count, task_count * min_endpoints);
+  EXPECT_LE(message_count, task_count * max_endpoints);
 
-//   auto &edges = pd.getCommunications();
+  for (auto &e : edges) {
+    EXPECT_EQ(e.getFromRank(), rank);
+    EXPECT_NE(e.getToRank(), rank);
+    EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages());
+    EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages());
+
+    auto from_task_id = e.getFrom();
+    EXPECT_GE(from_task_id, rank * max_tasks);
+    EXPECT_LT(from_task_id, (rank + 1) * max_tasks);
+  }
 
-//   if (num_ranks == 1) {
-//     EXPECT_EQ(edges.size(), 0);
-//     return;
-//   }
+  for (int tlid = 0; tlid < task_count; ++tlid) {
+    auto task_id = rank * max_tasks + tlid;
+    auto out_msgs = pd.getTaskMessagesOut(task_id);
+    EXPECT_GE(out_msgs, (std::size_t)min_endpoints);
+    EXPECT_LE(out_msgs, (std::size_t)max_endpoints);
+  }
+};
 
-//   EXPECT_GE(edges.size(), task_count * min_endpoints);
-//   EXPECT_LE(edges.size(), task_count * max_endpoints);
+TYPED_TEST(TestGraphHelpers, test_generate_inter_rank_comm_in_only) {
+  auto num_ranks = this->comm.numRanks();
+  auto rank = this->comm.getRank();
+  vt_lb::model::PhaseData pd(rank);
 
-//   std::vector<int> to_task(task_count);
+  std::mt19937 gen(975 * rank);
 
-//   for (auto &e : edges) {
-//     EXPECT_NE(e.getFromRank(), rank);
-//     EXPECT_EQ(e.getToRank(), rank);
-//     EXPECT_GE(e.getVolume(), min_weight);
-//     EXPECT_LE(e.getVolume(), max_weight);
+  int min_tasks = 4;
+  int max_tasks = 8;
+  std::uniform_int_distribution<> dist(min_tasks, max_tasks);
 
-//     auto to_task_id = e.getTo();
-//     EXPECT_GE(to_task_id, rank * max_tasks);
-//     EXPECT_LT(to_task_id, (rank + 1) * max_tasks);
+  generateTaskCountsPerRank(pd, gen, dist, max_tasks);
 
-//     int to_lid = to_task_id - rank * max_tasks;
-//     if (to_lid >= 0 and to_lid < task_count) {
-//       ++(to_task[to_lid]);
-//     }
-//   }
+  int task_count = pd.getTasksMap().size();
 
-//   for (int tlid = 0; tlid < task_count; ++tlid) {
-//     EXPECT_GE(to_task[tlid], min_endpoints);
-//     EXPECT_LE(to_task[tlid], max_endpoints);
-//   }
-// };
+  int min_endpoints = 2;
+  int max_endpoints = 5;
+  std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints);
 
-// TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_out_only) {
-//   auto num_ranks = this->comm.numRanks();
-//   auto rank = this->comm.getRank();
-//   vt_lb::model::PhaseData pd(rank);
+  int min_weight = 500;
+  int max_weight = 3000;
+  std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
 
-//   std::mt19937 gen(468 * rank);
+  double frac = 1.0;
 
-//   int min_tasks = 3;
-//   int max_tasks = 8;
-//   std::uniform_int_distribution<> dist(min_tasks, max_tasks);
+  generateInterRankComm(
+    pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac
+  );
 
-//   generateTaskCountsPerRank(pd, gen, dist, max_tasks);
+  auto &edges = pd.getCommunications();
+  auto message_count = pd.getCommunicationMessagesCount();
 
-//   int task_count = pd.getTasksMap().size();
+  if (num_ranks == 1) {
+    EXPECT_EQ(message_count, 0);
+    return;
+  }
 
-//   int min_endpoints = 3;
-//   int max_endpoints = 6;
-//   std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints);
+  EXPECT_GE(message_count, task_count * min_endpoints);
+  EXPECT_LE(message_count, task_count * max_endpoints);
 
-//   int min_weight = 10000;
-//   int max_weight = 20000;
-//   std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
+  for (auto &e : edges) {
+    EXPECT_NE(e.getFromRank(), rank);
+    EXPECT_EQ(e.getToRank(), rank);
+    EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages());
+    EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages());
+
+    auto to_task_id = e.getTo();
+    EXPECT_GE(to_task_id, rank * max_tasks);
+    EXPECT_LT(to_task_id, (rank + 1) * max_tasks);
+  }
 
-//   double frac = 0.0;
+  for (int tlid = 0; tlid < task_count; ++tlid) {
+    auto task_id = rank * max_tasks + tlid;
+    auto in_msgs = pd.getTaskMessagesIn(task_id);
+    EXPECT_GE(in_msgs, (std::size_t)min_endpoints);
+    EXPECT_LE(in_msgs, (std::size_t)max_endpoints);
+  }
+};
 
-//   generateRankComm(
-//     pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac
-//   );
+TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_out_only) {
+  auto num_ranks = this->comm.numRanks();
+  auto rank = this->comm.getRank();
+  vt_lb::model::PhaseData pd(rank);
 
-//   auto &edges = pd.getCommunications();
+  std::mt19937 gen(468 * rank);
 
-//   if (num_ranks == 1 and task_count <= 1) {
-//     EXPECT_EQ(edges.size(), 0);
-//     return;
-//   }
+  int min_tasks = 3;
+  int max_tasks = 8;
+  std::uniform_int_distribution<> dist(min_tasks, max_tasks);
 
-//   EXPECT_GE(edges.size(), task_count * min_endpoints);
-//   EXPECT_LE(edges.size(), task_count * max_endpoints);
+  generateTaskCountsPerRank(pd, gen, dist, max_tasks);
 
-//   std::vector<int> from_task(task_count);
+  int task_count = pd.getTasksMap().size();
 
-//   for (auto &e : edges) {
-//     EXPECT_EQ(e.getFromRank(), rank);
-//     EXPECT_GE(e.getVolume(), min_weight);
-//     EXPECT_LE(e.getVolume(), max_weight);
+  int min_endpoints = 3;
+  int max_endpoints = 6;
+  std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints);
 
-//     auto from_task_id = e.getFrom();
-//     EXPECT_GE(from_task_id, rank * max_tasks);
-//     EXPECT_LT(from_task_id, (rank + 1) * max_tasks);
+  int min_weight = 10000;
+  int max_weight = 20000;
+  std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
 
-//     int from_lid = from_task_id - rank * max_tasks;
-//     if (from_lid >= 0 and from_lid < task_count) {
-//       ++(from_task[from_lid]);
-//     }
-//   }
+  double frac = 0.0;
 
-//   for (int tlid = 0; tlid < task_count; ++tlid) {
-//     EXPECT_GE(from_task[tlid], min_endpoints);
-//     EXPECT_LE(from_task[tlid], max_endpoints);
-//   }
-// };
+  generateRankComm(
+    pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac
+  );
 
-// TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_in_only) {
-//   auto num_ranks = this->comm.numRanks();
-//   auto rank = this->comm.getRank();
-//   vt_lb::model::PhaseData pd(rank);
+  auto &edges = pd.getCommunications();
+  auto message_count = pd.getCommunicationMessagesCount();
 
-//   std::mt19937 gen(357 * rank);
+  if (num_ranks == 1 and task_count <= 1) {
+    EXPECT_EQ(edges.size(), 0);
+    EXPECT_EQ(message_count, 0);
+    return;
+  }
 
-//   int min_tasks = 2;
-//   int max_tasks = 8;
-//   std::uniform_int_distribution<> dist(min_tasks, max_tasks);
+  EXPECT_GE(message_count, task_count * min_endpoints);
+  EXPECT_LE(message_count, task_count * max_endpoints);
 
-//   generateTaskCountsPerRank(pd, gen, dist, max_tasks);
+  for (auto &e : edges) {
+    EXPECT_EQ(e.getFromRank(), rank);
+    EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages());
+    EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages());
 
-//   int task_count = pd.getTasksMap().size();
+    auto from_task_id = e.getFrom();
+    EXPECT_GE(from_task_id, rank * max_tasks);
+    EXPECT_LT(from_task_id, (rank + 1) * max_tasks);
+  }
 
-//   int min_endpoints = 1;
-//   int max_endpoints = 7;
-//   std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints);
+  for (int tlid = 0; tlid < task_count; ++tlid) {
+    auto task_id = rank * max_tasks + tlid;
+    auto out_msgs = pd.getTaskMessagesOut(task_id);
+    EXPECT_GE(out_msgs, (std::size_t)min_endpoints);
+    EXPECT_LE(out_msgs, (std::size_t)max_endpoints);
+  }
+};
 
-//   int min_weight = 3000;
-//   int max_weight = 5000;
-//   std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
+TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_in_only) {
+  auto num_ranks = this->comm.numRanks();
+  auto rank = this->comm.getRank();
+  vt_lb::model::PhaseData pd(rank);
 
-//   double frac = 1.0;
+  std::mt19937 gen(357 * rank);
 
-//   generateRankComm(
-//     pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac
-//   );
+  int min_tasks = 2;
+  int max_tasks = 8;
+  std::uniform_int_distribution<> dist(min_tasks, max_tasks);
 
-//   auto &edges = pd.getCommunications();
+  generateTaskCountsPerRank(pd, gen, dist, max_tasks);
 
-//   if (num_ranks == 1 and task_count <= 1) {
-//     EXPECT_EQ(edges.size(), 0);
-//     return;
-//   }
+  int task_count = pd.getTasksMap().size();
 
-//   EXPECT_GE(edges.size(), task_count * min_endpoints);
-//   EXPECT_LE(edges.size(), task_count * max_endpoints);
+  int min_endpoints = 1;
+  int max_endpoints = 7;
+  std::uniform_int_distribution<> ep_dist(min_endpoints, max_endpoints);
 
-//   std::vector<int> to_task(task_count);
+  int min_weight = 3000;
+  int max_weight = 5000;
+  std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
 
-//   for (auto &e : edges) {
-//     EXPECT_EQ(e.getToRank(), rank);
-//     EXPECT_GE(e.getVolume(), min_weight);
-//     EXPECT_LE(e.getVolume(), max_weight);
+  double frac = 1.0;
 
-//     auto to_task_id = e.getTo();
-//     EXPECT_GE(to_task_id, rank * max_tasks);
-//     EXPECT_LT(to_task_id, (rank + 1) * max_tasks);
+  generateRankComm(
+    pd, gen, ep_dist, weight_dist, min_tasks, num_ranks, frac
+  );
 
-//     int to_lid = to_task_id - rank * max_tasks;
-//     if (to_lid >= 0 and to_lid < task_count) {
-//       ++(to_task[to_lid]);
-//     }
-//   }
+  auto &edges = pd.getCommunications();
+  auto message_count = pd.getCommunicationMessagesCount();
 
-//   for (int tlid = 0; tlid < task_count; ++tlid) {
-//     EXPECT_GE(to_task[tlid], min_endpoints);
-//     EXPECT_LE(to_task[tlid], max_endpoints);
-//   }
-// };
+  if (num_ranks == 1 and task_count <= 1) {
+    EXPECT_EQ(edges.size(), 0);
+    EXPECT_EQ(message_count, 0);
+    return;
+  }
 
-// TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_out_only2) {
-//   auto num_ranks = this->comm.numRanks();
-//   auto rank = this->comm.getRank();
-//   vt_lb::model::PhaseData pd(rank);
+  EXPECT_GE(message_count, task_count * min_endpoints);
+  EXPECT_LE(message_count, task_count * max_endpoints);
 
-//   std::mt19937 gen(147 * rank);
+  for (auto &e : edges) {
+    EXPECT_EQ(e.getToRank(), rank);
+    EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages());
+    EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages());
 
-//   int min_tasks = 4;
-//   int max_tasks = 10;
-//   std::uniform_int_distribution<> dist(min_tasks, max_tasks);
+    auto to_task_id = e.getTo();
+    EXPECT_GE(to_task_id, rank * max_tasks);
+    EXPECT_LT(to_task_id, (rank + 1) * max_tasks);
+  }
 
-//   generateTaskCountsPerRank(pd, gen, dist, max_tasks);
+  for (int tlid = 0; tlid < task_count; ++tlid) {
+    auto task_id = rank * max_tasks + tlid;
+    auto in_msgs = pd.getTaskMessagesIn(task_id);
+    EXPECT_GE(in_msgs, (std::size_t)min_endpoints);
+    EXPECT_LE(in_msgs, (std::size_t)max_endpoints);
+  }
+};
 
-//   int task_count = pd.getTasksMap().size();
+TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_out_only2) {
+  auto num_ranks = this->comm.numRanks();
+  auto rank = this->comm.getRank();
+  vt_lb::model::PhaseData pd(rank);
 
-//   int max_endpoints = 4;
+  std::mt19937 gen(147 * rank);
 
-//   int min_weight = 1000;
-//   int max_weight = 20000;
-//   std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
+  int min_tasks = 4;
+  int max_tasks = 10;
+  std::uniform_int_distribution<> dist(min_tasks, max_tasks);
 
-//   double frac = 0.0;
+  generateTaskCountsPerRank(pd, gen, dist, max_tasks);
 
-//   generateRankComm(
-//     pd, gen, max_endpoints, weight_dist, min_tasks, num_ranks, frac
-//   );
+  int task_count = pd.getTasksMap().size();
 
-//   auto &edges = pd.getCommunications();
+  int max_endpoints = 4;
 
-//   if (num_ranks == 1 and task_count <= 1) {
-//     EXPECT_EQ(edges.size(), 0);
-//     return;
-//   }
+  int min_weight = 1000;
+  int max_weight = 20000;
+  std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
 
-//   EXPECT_LE(edges.size(), task_count * max_endpoints);
+  double frac = 0.0;
 
-//   std::vector<int> from_task(task_count);
+  generateRankComm(
+    pd, gen, max_endpoints, weight_dist, min_tasks, num_ranks, frac
+  );
 
-//   for (auto &e : edges) {
-//     EXPECT_EQ(e.getFromRank(), rank);
-//     EXPECT_GE(e.getVolume(), min_weight);
-//     EXPECT_LE(e.getVolume(), max_weight);
+  auto &edges = pd.getCommunications();
+  auto message_count = pd.getCommunicationMessagesCount();
 
-//     auto from_task_id = e.getFrom();
-//     EXPECT_GE(from_task_id, rank * max_tasks);
-//     EXPECT_LT(from_task_id, (rank + 1) * max_tasks);
+  if (num_ranks == 1 and task_count <= 1) {
+    EXPECT_EQ(edges.size(), 0);
+    EXPECT_EQ(message_count, 0);
+    return;
+  }
 
-//     int from_lid = from_task_id - rank * max_tasks;
-//     if (from_lid >= 0 and from_lid < task_count) {
-//       ++(from_task[from_lid]);
-//     }
-//   }
+  EXPECT_LE(message_count, task_count * max_endpoints);
 
-//   for (int tlid = 0; tlid < task_count; ++tlid) {
-//     EXPECT_LE(from_task[tlid], max_endpoints);
-//   }
-// };
+  for (auto &e : edges) {
+    EXPECT_EQ(e.getFromRank(), rank);
+    EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages());
+    EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages());
 
-// TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_in_only2) {
-//   auto num_ranks = this->comm.numRanks();
-//   auto rank = this->comm.getRank();
-//   vt_lb::model::PhaseData pd(rank);
+    auto from_task_id = e.getFrom();
+    EXPECT_GE(from_task_id, rank * max_tasks);
+    EXPECT_LT(from_task_id, (rank + 1) * max_tasks);
+  }
 
-//   std::mt19937 gen(258 * rank);
+  for (int tlid = 0; tlid < task_count; ++tlid) {
+    auto task_id = rank * max_tasks + tlid;
+    auto out_msgs = pd.getTaskMessagesOut(task_id);
+    EXPECT_LE(out_msgs, (std::size_t)max_endpoints);
+  }
+};
 
-//   int min_tasks = 5;
-//   int max_tasks = 9;
-//   std::uniform_int_distribution<> dist(min_tasks, max_tasks);
+TYPED_TEST(TestGraphHelpers, test_generate_rank_comm_in_only2) {
+  auto num_ranks = this->comm.numRanks();
+  auto rank = this->comm.getRank();
+  vt_lb::model::PhaseData pd(rank);
 
-//   generateTaskCountsPerRank(pd, gen, dist, max_tasks);
+  std::mt19937 gen(258 * rank);
 
-//   int task_count = pd.getTasksMap().size();
+  int min_tasks = 5;
+  int max_tasks = 9;
+  std::uniform_int_distribution<> dist(min_tasks, max_tasks);
 
-//   int max_endpoints = 7;
+  generateTaskCountsPerRank(pd, gen, dist, max_tasks);
 
-//   int min_weight = 70;
-//   int max_weight = 300;
-//   std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
+  int task_count = pd.getTasksMap().size();
 
-//   double frac = 1.0;
+  int max_endpoints = 7;
 
-//   generateRankComm(
-//     pd, gen, max_endpoints, weight_dist, min_tasks, num_ranks, frac
-//   );
+  int min_weight = 70;
+  int max_weight = 300;
+  std::uniform_int_distribution<> weight_dist(min_weight, max_weight);
 
-//   auto &edges = pd.getCommunications();
+  double frac = 1.0;
 
-//   if (num_ranks == 1 and task_count <= 1) {
-//     EXPECT_EQ(edges.size(), 0);
-//     return;
-//   }
+  generateRankComm(
+    pd, gen, max_endpoints, weight_dist, min_tasks, num_ranks, frac
+  );
 
-//   EXPECT_LE(edges.size(), task_count * max_endpoints);
+  auto &edges = pd.getCommunications();
+  auto message_count = pd.getCommunicationMessagesCount();
 
-//   std::vector<int> to_task(task_count);
+  if (num_ranks == 1 and task_count <= 1) {
+    EXPECT_EQ(edges.size(), 0);
+    EXPECT_EQ(message_count, 0);
+    return;
+  }
 
-//   for (auto &e : edges) {
-//     EXPECT_EQ(e.getToRank(), rank);
-//     EXPECT_GE(e.getVolume(), min_weight);
-//     EXPECT_LE(e.getVolume(), max_weight);
+  EXPECT_LE(message_count, task_count * max_endpoints);
 
-//     auto to_task_id = e.getTo();
-//     EXPECT_GE(to_task_id, rank * max_tasks);
-//     EXPECT_LT(to_task_id, (rank + 1) * max_tasks);
+  for (auto &e : edges) {
+    EXPECT_EQ(e.getToRank(), rank);
+    EXPECT_GE(e.getVolume(), min_weight * e.getNumMessages());
+    EXPECT_LE(e.getVolume(), max_weight * e.getNumMessages());
 
-//     int to_lid = to_task_id - rank * max_tasks;
-//     if (to_lid >= 0 and to_lid < task_count) {
-//       ++(to_task[to_lid]);
-//     }
-//   }
+    auto to_task_id = e.getTo();
+    EXPECT_GE(to_task_id, rank * max_tasks);
+    EXPECT_LT(to_task_id, (rank + 1) * max_tasks);
+  }
 
-//   for (int tlid = 0; tlid < task_count; ++tlid) {
-//     EXPECT_LE(to_task[tlid], max_endpoints);
-//   }
-// };
+  for (int tlid = 0; tlid < task_count; ++tlid) {
+    auto task_id = rank * max_tasks + tlid;
+    auto in_msgs = pd.getTaskMessagesIn(task_id);
+    EXPECT_LE(in_msgs, (std::size_t)max_endpoints);
+  }
+};
 
-// TYPED_TEST(TestGraphHelpers, test_generate_scale_rel) {
-//   std::mt19937 gen(123456);
+TYPED_TEST(TestGraphHelpers, test_generate_scale_rel) {
+  std::mt19937 gen(123456);
 
-//   int largest_max_allowed = 100;
-//   int smallest_max_allowed = 50;
-//   double min_as_frac_of_max = 0.3;
+  int largest_max_allowed = 100;
+  int smallest_max_allowed = 50;
+  double min_as_frac_of_max = 0.3;
 
-//   auto [max_chosen, min_chosen] = generateScaleRel(
-//     gen, largest_max_allowed, smallest_max_allowed, min_as_frac_of_max
-//   );
+  auto [max_chosen, min_chosen] = generateScaleRel(
+    gen, largest_max_allowed, smallest_max_allowed, min_as_frac_of_max
+  );
 
-//   EXPECT_GE(max_chosen, smallest_max_allowed);
-//   EXPECT_LE(max_chosen, largest_max_allowed);
-//   EXPECT_GE(min_chosen, static_cast<int>(max_chosen * min_as_frac_of_max));
-// };
+  EXPECT_GE(max_chosen, smallest_max_allowed);
+  EXPECT_LE(max_chosen, largest_max_allowed);
+  EXPECT_GE(min_chosen, static_cast<int>(max_chosen * min_as_frac_of_max));
+};
 
-// TYPED_TEST(TestGraphHelpers, test_generate_scale_abs) {
-//   std::mt19937 gen(123456);
+TYPED_TEST(TestGraphHelpers, test_generate_scale_abs) {
+  std::mt19937 gen(123456);
 
-//   int largest_max_allowed = 100;
-//   int smallest_max_allowed = 50;
-//   int min_allowed = 40;
+  int largest_max_allowed = 100;
+  int smallest_max_allowed = 50;
+  int min_allowed = 40;
 
-//   auto [max_chosen, min_chosen] = generateScaleAbs(
-//     gen, largest_max_allowed, smallest_max_allowed, min_allowed
-//   );
+  auto [max_chosen, min_chosen] = generateScaleAbs(
+    gen, largest_max_allowed, smallest_max_allowed, min_allowed
+  );
 
-//   EXPECT_GE(max_chosen, smallest_max_allowed);
-//   EXPECT_LE(max_chosen, largest_max_allowed);
-//   EXPECT_GE(min_chosen, min_allowed);
-// };
+  EXPECT_GE(max_chosen, smallest_max_allowed);
+  EXPECT_LE(max_chosen, largest_max_allowed);
+  EXPECT_GE(min_chosen, min_allowed);
+};
 
 void sanityCheckBlockMem(const vt_lb::model::PhaseData &pd) {
   // check block memory
@@ -1149,27 +1120,27 @@ TYPED_TEST(TestGraphHelpers, test_generate_graph_with_shared_blocks_no_comm) {
   sanityCheckEdges(pd, include_comm, num_ranks);
 };
 
-// TYPED_TEST(TestGraphHelpers, test_generate_graph_with_shared_blocks_with_comm) {
-//   auto num_ranks = this->comm.numRanks();
-//   auto rank = this->comm.getRank();
-//   vt_lb::model::PhaseData pd(rank);
+TYPED_TEST(TestGraphHelpers, test_generate_graph_with_shared_blocks_with_comm) {
+  auto num_ranks = this->comm.numRanks();
+  auto rank = this->comm.getRank();
+  vt_lb::model::PhaseData pd(rank);
 
-//   int seed_same_across_ranks = 13;
-//   int seed_diff_each_rank = 35 * rank;
+  int seed_same_across_ranks = 13;
+  int seed_diff_each_rank = 35 * rank;
 
-//   bool uniform_shared_block_count = false;
-//   bool uniform_task_count = false;
-//   bool include_comm = true;
+  bool uniform_shared_block_count = false;
+  bool uniform_task_count = false;
+  bool include_comm = true;
 
-//   generateGraphWithSharedBlocks(
-//     pd, num_ranks, uniform_shared_block_count, uniform_task_count, include_comm,
-//     seed_same_across_ranks, seed_diff_each_rank
-//   );
+  generateGraphWithSharedBlocks(
+    pd, num_ranks, uniform_shared_block_count, uniform_task_count, include_comm,
+    seed_same_across_ranks, seed_diff_each_rank
+  );
 
-//   sanityCheckBlocks(pd, true);
-//   sanityCheckTasks(pd, true, uniform_task_count);
-//   sanityCheckEdges(pd, include_comm, num_ranks);
-// };
+  sanityCheckBlocks(pd, true);
+  sanityCheckTasks(pd, true, uniform_task_count);
+  sanityCheckEdges(pd, include_comm, num_ranks);
+};
 
 TYPED_TEST(TestGraphHelpers, test_generate_graph_with_shared_blocks_no_comm_unib) {
   auto num_ranks = this->comm.numRanks();
@@ -1215,24 +1186,24 @@ TYPED_TEST(TestGraphHelpers, test_generate_graph_with_shared_blocks_no_comm_unib
   sanityCheckEdges(pd, include_comm, num_ranks);
 };
 
-// TYPED_TEST(TestGraphHelpers, test_generate_graph_without_shared_blocks_with_comm) {
-//   auto num_ranks = this->comm.numRanks();
-//   auto rank = this->comm.getRank();
-//   vt_lb::model::PhaseData pd(rank);
+TYPED_TEST(TestGraphHelpers, test_generate_graph_without_shared_blocks_with_comm) {
+  auto num_ranks = this->comm.numRanks();
+  auto rank = this->comm.getRank();
+  vt_lb::model::PhaseData pd(rank);
 
-//   int seed_same_across_ranks = 16;
-//   int seed_diff_each_rank = 38 * rank;
+  int seed_same_across_ranks = 16;
+  int seed_diff_each_rank = 38 * rank;
 
-//   bool uniform_task_count = false;
-//   bool include_comm = true;
+  bool uniform_task_count = false;
+  bool include_comm = true;
 
-//   generateGraphWithoutSharedBlocks(
-//     pd, num_ranks, uniform_task_count, include_comm, seed_same_across_ranks,
-//     seed_diff_each_rank
-//   );
+  generateGraphWithoutSharedBlocks(
+    pd, num_ranks, uniform_task_count, include_comm, seed_same_across_ranks,
+    seed_diff_each_rank
+  );
 
-//   sanityCheckTasks(pd, true, false);
-//   sanityCheckEdges(pd, include_comm, num_ranks);
-// };
+  sanityCheckTasks(pd, true, false);
+  sanityCheckEdges(pd, include_comm, num_ranks);
+};
 
 }}} // end namespace vt_lb::tests::unit
diff --git a/tests/unit/temperedlb/test_temperedlb.cc b/tests/unit/temperedlb/test_temperedlb.cc
new file mode 100644
index 0000000..2f6e3e6
--- /dev/null
+++ b/tests/unit/temperedlb/test_temperedlb.cc
@@ -0,0 +1,350 @@
+/*
+//@HEADER
+// *****************************************************************************
+//
+//                               test_temperedlb.cc
+//                 DARMA/vt-lb => Virtual Transport/Load Balancers
+//
+// Copyright 2019-2024 National Technology & Engineering Solutions of Sandia, LLC
+// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S.
+// Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// * Neither the name of the copyright holder nor the names of its
+//   contributors may be used to endorse or promote products derived from this
+//   software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact darma@sandia.gov
+//
+// *****************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+
+#include <utility>
+#include <string>
+
+#include "test_parallel_harness.h"
+#include "test_helpers.h"
+#include "graph_helpers.h"
+
+#include <vt-lb/algo/temperedlb/temperedlb.h>
+
+namespace vt_lb::tests::unit {
+
+// Wrapper that zips a communicator type with a single integer seed
+template <typename CommT, int Seed>
+struct CommSeedPack {
+  using Comm = CommT;
+  static constexpr int seed = Seed;
+};
+
+// Helper: build TemperedLB, compute initial and final global distributions
+template <typename CommT>
+struct LbRunSummary {
+  std::unordered_map<int, std::vector<int>> initial;
+  std::unordered_map<int, std::vector<int>> final;
+};
+
+template <typename CommT>
+LbRunSummary<CommT> runTemperedLB(
+  CommT& comm,
+  vt_lb::algo::temperedlb::Configuration const& config,
+  vt_lb::model::PhaseData const& pd
+) {
+  vt_lb::algo::temperedlb::TemperedLB<CommT> lb(comm, config);
+  lb.inputData(std::make_unique<vt_lb::model::PhaseData>(pd));
+
+  auto initial_global = lb.getGlobalDistribution(pd.getTaskIds());
+  std::unordered_map<int, std::vector<int>> initial_int;
+  for (auto const& [r, vec] : initial_global) {
+    auto& out = initial_int[r];
+    out.reserve(vec.size());
+    for (auto tid : vec) {
+      out.push_back(static_cast<int>(tid));
+    }
+  }
+
+  auto local_after = lb.run();
+  (void)local_after; // not needed for summary
+
+  auto final_global = lb.getGlobalDistribution(local_after);
+  std::unordered_map<int, std::vector<int>> final_int;
+  for (auto const& [r, vec] : final_global) {
+    auto& out = final_int[r];
+    out.reserve(vec.size());
+    for (auto tid : vec) {
+      out.push_back(static_cast<int>(tid));
+    }
+  }
+
+  return {std::move(initial_int), std::move(final_int)};
+}
+
+// Typed fixture over zipped communicator+seeds; communicator type remains implicit
+template <typename Pack>
+struct TestTemperedLB: TestParallelHarness<typename Pack::Comm> {};
+
+TYPED_TEST_SUITE_P(TestTemperedLB);
+
+TYPED_TEST_P(TestTemperedLB, test_lb_no_comm_task_counts) {
+	auto num_ranks = this->comm.numRanks();
+	auto rank = this->comm.getRank();
+
+  SET_MIN_NUM_NODES_CONSTRAINT(2);
+
+  int seed = TypeParam::seed;
+  vt_lb::model::PhaseData pd(rank);
+
+  // Generate a random graph without shared blocks and without communication
+  bool uniform_task_count = false;
+  bool include_comm = false;
+  int seed_same_across_ranks = seed;
+  int seed_diff_each_rank = 9876 * rank + 1;
+
+  generateGraphWithoutSharedBlocks(
+    pd, num_ranks, uniform_task_count, include_comm,
+    seed_same_across_ranks, seed_diff_each_rank
+  );
+
+  // Sanity: no shared blocks or communications
+  EXPECT_EQ(pd.getSharedBlocksMap().size(), 0);
+  EXPECT_EQ(pd.getCommunications().size(), 0);
+
+  // Build LB once and get initial/final global distributions
+  vt_lb::algo::temperedlb::Configuration config(num_ranks);
+  auto summary = runTemperedLB(this->comm, config, pd);
+
+  // Compute initial global task count
+  auto const& initial_global = summary.initial;
+  std::size_t initial_total = 0;
+  for (auto const& [_r, vec] : initial_global) {
+    initial_total += vec.size();
+  }
+
+  // Validate the global task count is preserved
+  auto const& final_global = summary.final;
+  std::size_t final_total = 0;
+  for (auto const& [_r, vec] : final_global) {
+    final_total += vec.size();
+  }
+  EXPECT_EQ(final_total, initial_total);
+};
+
+TYPED_TEST_P(TestTemperedLB, test_significant_load_imbalance_reduction) {
+	auto num_ranks = this->comm.numRanks();
+	auto rank = this->comm.getRank();
+
+  SET_MIN_NUM_NODES_CONSTRAINT(2);
+
+  int seed = TypeParam::seed;
+  vt_lb::model::PhaseData pd(rank);
+
+  // Generate random tasks without shared blocks and without communication
+  bool uniform_task_count = false;
+  bool include_comm = false;
+  int seed_same_across_ranks = seed;
+  int seed_diff_each_rank = 13579 * rank + 3;
+
+  generateGraphWithoutSharedBlocks(
+    pd, num_ranks, uniform_task_count, include_comm,
+    seed_same_across_ranks, seed_diff_each_rank
+  );
+
+  // Introduce strong load imbalance: amplify loads on rank 0
+  if (rank == 0) {
+    for (auto tid : pd.getTaskIds()) {
+      auto t = pd.getTask(tid);
+      t->setLoad(t->getLoad() * 50.0);
+    }
+  }
+
+  // Compute local total load before balancing
+  double local_before = 0.0;
+  for (auto const& [tid, task] : pd.getTasksMap()) {
+    local_before += task.getLoad();
+  }
+
+  // Compute global before stats (max and avg) via communicator handle
+  struct CollectiveDummy { } dummy;
+  auto handle = this->comm.template registerInstanceCollective<CollectiveDummy>(&dummy);
+
+  double global_before_max = 0.0;
+  handle.reduce(0, MPI_DOUBLE, MPI_MAX, &local_before, &global_before_max, 1);
+  handle.broadcast(0, MPI_DOUBLE, &global_before_max, 1);
+
+  double global_before_sum = 0.0;
+  handle.reduce(0, MPI_DOUBLE, MPI_SUM, &local_before, &global_before_sum, 1);
+  handle.broadcast(0, MPI_DOUBLE, &global_before_sum, 1);
+  double global_before_avg = global_before_sum / static_cast<double>(num_ranks);
+
+  // Build LB once and get initial/final global distributions
+  vt_lb::algo::temperedlb::Configuration config(num_ranks);
+  auto summary = runTemperedLB(this->comm, config, pd);
+
+  // Capture initial global task distribution for preservation checks
+  auto const& initial_global = summary.initial;
+  std::size_t initial_total = 0;
+  std::unordered_set<int> initial_task_set;
+  for (auto const& [r, vec] : initial_global) {
+    initial_total += vec.size();
+    for (auto tid : vec) initial_task_set.insert(static_cast<int>(tid));
+  }
+
+  // Gather global mapping of tasks per rank after
+  auto const& final_global = summary.final;
+  std::size_t final_total = 0;
+  std::unordered_set<int> final_task_set;
+  for (auto const& [r, vec] : final_global) {
+    final_total += vec.size();
+    for (auto tid : vec) {
+      final_task_set.insert(static_cast<int>(tid));
+    }
+  }
+
+  // Build a global mapping of task loads: allgather ids and loads via communicator
+  std::vector<int> local_ids;
+  std::vector<double> local_loads;
+  local_ids.reserve(pd.getTasksMap().size());
+  local_loads.reserve(pd.getTasksMap().size());
+  for (auto const& [tid, task] : pd.getTasksMap()) {
+    local_ids.push_back(static_cast<int>(tid));
+    local_loads.push_back(task.getLoad());
+  }
+
+  auto ids_by_rank = handle.allgather(local_ids.data(), static_cast<int>(local_ids.size()));
+  auto loads_by_rank = handle.allgather(local_loads.data(), static_cast<int>(local_loads.size()));
+
+  // Build lookup for task id -> load
+  std::unordered_map<int, double> id_to_load;
+  for (auto const& [r, vec] : ids_by_rank) {
+    auto const& loads_vec = loads_by_rank[r];
+    for (std::size_t i = 0; i < vec.size(); ++i) {
+      id_to_load.emplace(vec[i], loads_vec[i]);
+    }
+  }
+
+  // Compute local after load: sum loads for tasks assigned to this rank
+  double local_after = 0.0;
+  auto it = final_global.find(rank);
+  if (it != final_global.end()) {
+    for (auto tid : it->second) {
+      local_after += id_to_load[static_cast<int>(tid)];
+    }
+  }
+
+  // Compute global after stats (max and avg) via communicator handle
+  double global_after_max = 0.0;
+  handle.reduce(0, MPI_DOUBLE, MPI_MAX, &local_after, &global_after_max, 1);
+  handle.broadcast(0, MPI_DOUBLE, &global_after_max, 1);
+
+  double global_after_sum = 0.0;
+  handle.reduce(0, MPI_DOUBLE, MPI_SUM, &local_after, &global_after_sum, 1);
+  handle.broadcast(0, MPI_DOUBLE, &global_after_sum, 1);
+  double global_after_avg = global_after_sum / static_cast<double>(num_ranks);
+
+  // Expect improvement: global max work decreases after balancing
+  EXPECT_GT(global_before_max, global_before_avg);
+  EXPECT_LT(global_after_max, global_before_max);
+
+  // Tasks should be preserved globally: same total and same set of task IDs
+  EXPECT_EQ(final_total, initial_total);
+  EXPECT_EQ(final_task_set.size(), initial_task_set.size());
+  // Verify the sets are identical
+  for (auto tid : initial_task_set) {
+    EXPECT_TRUE(final_task_set.count(tid) == 1);
+  }
+
+  // Average work should remain nearly constant (loads are redistributed)
+  EXPECT_NEAR(global_after_avg, global_before_avg, 1e-9);
+
+  // Compute imbalance metric I = max/avg - 1 and check it approaches zero
+  double I_before = 0.0;
+  if (global_before_avg > 0.0) {
+    I_before = (global_before_max / global_before_avg) - 1.0;
+  }
+  double I_after = 0.0;
+  if (global_after_avg > 0.0) {
+    I_after = (global_after_max / global_after_avg) - 1.0;
+  }
+  EXPECT_GT(I_before, 0.0);
+  EXPECT_LT(I_after, I_before);
+  EXPECT_NEAR(I_after, 0.0, 0.02);
+
+  // fmt::print(
+  //   "Rank {}: before max={}, avg={}, I={:.4f}; after max={}, avg={}, I={:.4f}\n",
+  //   rank, global_before_max, global_before_avg, I_before,
+  //   global_after_max, global_after_avg, I_after
+  // );
+};
+
+REGISTER_TYPED_TEST_SUITE_P(
+  TestTemperedLB, test_lb_no_comm_task_counts, test_significant_load_imbalance_reduction
+);
+
+// Zip communicator type list with an integer seed sequence
+template <typename TypesList, typename IntSeq>
+struct ZipCommWithSeeds;
+
+template <typename... CommTs, int... Seeds>
+struct ZipCommWithSeeds<::testing::Types<CommTs...>, std::integer_sequence<int, Seeds...>> {
+  template <typename CommT>
+  struct PacksForComm {
+    using type = ::testing::Types<CommSeedPack<CommT, Seeds>...>;
+  };
+
+  template <typename... Lists>
+  struct ConcatTypes;
+
+  template <typename... Ts>
+  struct ConcatTypes<::testing::Types<Ts...>> {
+    using type = ::testing::Types<Ts...>;
+  };
+
+  template <typename... Ts, typename... Us, typename... Rest>
+  struct ConcatTypes<::testing::Types<Ts...>, ::testing::Types<Us...>, Rest...> {
+    using type = typename ConcatTypes<::testing::Types<Ts..., Us...>, Rest...>::type;
+  };
+
+  using type = typename ConcatTypes<typename PacksForComm<CommTs>::type...>::type;
+};
+
+using SeedSeq = std::integer_sequence<int, 1, 17, 12345, 24680, 808017424>;
+using CommSeedTypesForTesting = typename ZipCommWithSeeds<CommTypesForTesting, SeedSeq>::type;
+
+// Name generator that appends the seed to the base communicator name
+struct SeededCommNameGenerator {
+  template <typename Pack>
+  static std::string GetName(int i) {
+    auto base = CommNameGenerator::template GetName<typename Pack::Comm>(i);
+    return base + std::string("_seed_") + std::to_string(Pack::seed);
+  }
+};
+
+INSTANTIATE_TYPED_TEST_SUITE_P(
+  ZippedSeeds, TestTemperedLB, CommSeedTypesForTesting, SeededCommNameGenerator
+);
+
+} /* end namespace vt_lb::tests::unit */