diff --git a/CMakeLists.txt b/CMakeLists.txt
index 91091ee..4052741 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,7 +23,10 @@ endif()
 
 # vt includes magistrate and we must have magistrate for the MPI backend
 if(NOT vt_FOUND)
+  set(vt_backend_feature_enabled 0)
   find_package(magistrate REQUIRED)
+else()
+  set(vt_backend_feature_enabled 1)
 endif()
 
 set(FMT_LIBRARY fmt)
diff --git a/src/vt-lb/algo/baselb/baselb.h b/src/vt-lb/algo/baselb/baselb.h
index 13c1e90..004b8ee 100644
--- a/src/vt-lb/algo/baselb/baselb.h
+++ b/src/vt-lb/algo/baselb/baselb.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_BASELB_BASELB_H
 #define INCLUDED_VT_LB_ALGO_BASELB_BASELB_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <memory>
 #include <vt-lb/model/PhaseData.h>
 
diff --git a/src/vt-lb/algo/driver/driver.h b/src/vt-lb/algo/driver/driver.h
index ebe92ab..45b0357 100644
--- a/src/vt-lb/algo/driver/driver.h
+++ b/src/vt-lb/algo/driver/driver.h
@@ -44,7 +44,9 @@
 #if !defined INCLUDED_VT_LB_ALGO_DRIVER_DRIVER_H
 #define INCLUDED_VT_LB_ALGO_DRIVER_DRIVER_H
 
-#include "vt-lb/model/PhaseData.h"
+#include <vt-lb/config/cmake_config.h>
+
+#include <vt-lb/model/PhaseData.h>
 
 #include <memory>
 
diff --git a/src/vt-lb/algo/driver/driver.impl.h b/src/vt-lb/algo/driver/driver.impl.h
index 83b63ef..83913ab 100644
--- a/src/vt-lb/algo/driver/driver.impl.h
+++ b/src/vt-lb/algo/driver/driver.impl.h
@@ -44,8 +44,10 @@
 #if !defined INCLUDED_VT_LB_ALGO_DRIVER_DRIVER_IMPL_H
 #define INCLUDED_VT_LB_ALGO_DRIVER_DRIVER_IMPL_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/algo/temperedlb/temperedlb.h>
-#include "vt-lb/algo/driver/driver.h"
+#include <vt-lb/algo/driver/driver.h>
 
 namespace vt_lb {
 
diff --git a/src/vt-lb/algo/temperedlb/basic_transfer.h b/src/vt-lb/algo/temperedlb/basic_transfer.h
index a2ec045..c36186e 100644
--- a/src/vt-lb/algo/temperedlb/basic_transfer.h
+++ b/src/vt-lb/algo/temperedlb/basic_transfer.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_BASIC_TRANSFER_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_BASIC_TRANSFER_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/types.h>
 #include <vt-lb/comm/comm_traits.h>
 #include <vt-lb/util/logging.h>
diff --git a/src/vt-lb/algo/temperedlb/cluster_summarizer.h b/src/vt-lb/algo/temperedlb/cluster_summarizer.h
index 8607269..8530dae 100644
--- a/src/vt-lb/algo/temperedlb/cluster_summarizer.h
+++ b/src/vt-lb/algo/temperedlb/cluster_summarizer.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTER_SUMMARIZER_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTER_SUMMARIZER_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/PhaseData.h>
 #include <vt-lb/model/Communication.h>
 #include <vt-lb/algo/temperedlb/configuration.h>
diff --git a/src/vt-lb/algo/temperedlb/cluster_summarizer.impl.h b/src/vt-lb/algo/temperedlb/cluster_summarizer.impl.h
index 7cf7615..1cf2bca 100644
--- a/src/vt-lb/algo/temperedlb/cluster_summarizer.impl.h
+++ b/src/vt-lb/algo/temperedlb/cluster_summarizer.impl.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTER_SUMMARIZER_IMPL_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTER_SUMMARIZER_IMPL_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/PhaseData.h>
 #include <vt-lb/model/Communication.h>
 #include <vt-lb/algo/temperedlb/cluster_summarizer.h>
diff --git a/src/vt-lb/algo/temperedlb/clustering.h b/src/vt-lb/algo/temperedlb/clustering.h
index dfed885..0778764 100644
--- a/src/vt-lb/algo/temperedlb/clustering.h
+++ b/src/vt-lb/algo/temperedlb/clustering.h
@@ -43,6 +43,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTERING_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_CLUSTERING_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/PhaseData.h>
 #include <vt-lb/util/logging.h>
 
diff --git a/src/vt-lb/algo/temperedlb/configuration.h b/src/vt-lb/algo/temperedlb/configuration.h
index d2c28cf..e9dcf8e 100644
--- a/src/vt-lb/algo/temperedlb/configuration.h
+++ b/src/vt-lb/algo/temperedlb/configuration.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_CONFIGURATION_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_CONFIGURATION_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/algo/temperedlb/work_model.h>
 #include <vt-lb/algo/temperedlb/transfer_util.h>
 
diff --git a/src/vt-lb/algo/temperedlb/full_graph_visualizer.h b/src/vt-lb/algo/temperedlb/full_graph_visualizer.h
index d1ddb75..452a697 100644
--- a/src/vt-lb/algo/temperedlb/full_graph_visualizer.h
+++ b/src/vt-lb/algo/temperedlb/full_graph_visualizer.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_FULL_GRAPH_VISUALIZER_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_FULL_GRAPH_VISUALIZER_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/PhaseData.h>
 #include <vt-lb/model/Communication.h>
 #include <vt-lb/algo/temperedlb/clustering.h>
diff --git a/src/vt-lb/algo/temperedlb/info_propagation.h b/src/vt-lb/algo/temperedlb/info_propagation.h
index 81be7b7..d294e59 100644
--- a/src/vt-lb/algo/temperedlb/info_propagation.h
+++ b/src/vt-lb/algo/temperedlb/info_propagation.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_INFO_PROPAGATION_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_INFO_PROPAGATION_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <random>
 #include <unordered_map>
 #include <unordered_set>
diff --git a/src/vt-lb/algo/temperedlb/relaxed_cluster_transfer.h b/src/vt-lb/algo/temperedlb/relaxed_cluster_transfer.h
index 21cd4b9..e698fe7 100644
--- a/src/vt-lb/algo/temperedlb/relaxed_cluster_transfer.h
+++ b/src/vt-lb/algo/temperedlb/relaxed_cluster_transfer.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_RELAXED_CLUSTER_TRANSFER_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_RELAXED_CLUSTER_TRANSFER_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/algo/temperedlb/transfer.h>
 #include <vt-lb/model/PhaseData.h>
 #include <vt-lb/algo/temperedlb/statistics.h>
diff --git a/src/vt-lb/algo/temperedlb/statistics.h b/src/vt-lb/algo/temperedlb/statistics.h
index b753f54..6be1c37 100644
--- a/src/vt-lb/algo/temperedlb/statistics.h
+++ b/src/vt-lb/algo/temperedlb/statistics.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_STATISTICS_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_STATISTICS_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <limits>
 
 namespace vt_lb::algo::temperedlb {
diff --git a/src/vt-lb/algo/temperedlb/symmetrize_comm.h b/src/vt-lb/algo/temperedlb/symmetrize_comm.h
index 2e077f5..e44bd6b 100644
--- a/src/vt-lb/algo/temperedlb/symmetrize_comm.h
+++ b/src/vt-lb/algo/temperedlb/symmetrize_comm.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_SYMMETRIZE_COMM_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_SYMMETRIZE_COMM_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/PhaseData.h>
 #include <vt-lb/model/Communication.h>
 
diff --git a/src/vt-lb/algo/temperedlb/task_cluster_summary_info.h b/src/vt-lb/algo/temperedlb/task_cluster_summary_info.h
index 6a0fbee..838c20a 100644
--- a/src/vt-lb/algo/temperedlb/task_cluster_summary_info.h
+++ b/src/vt-lb/algo/temperedlb/task_cluster_summary_info.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_TASK_CLUSTER_SUMMARY_INFO_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_TASK_CLUSTER_SUMMARY_INFO_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/PhaseData.h>
 #include <vt-lb/model/Communication.h>
 
diff --git a/src/vt-lb/algo/temperedlb/temperedlb.h b/src/vt-lb/algo/temperedlb/temperedlb.h
index 54a01bd..40c754b 100644
--- a/src/vt-lb/algo/temperedlb/temperedlb.h
+++ b/src/vt-lb/algo/temperedlb/temperedlb.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_TEMPEREDLB_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_TEMPEREDLB_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/comm/comm_traits.h>
 #include <vt-lb/algo/baselb/baselb.h>
 
diff --git a/src/vt-lb/algo/temperedlb/transfer.h b/src/vt-lb/algo/temperedlb/transfer.h
index 4e4ffa3..98f925b 100644
--- a/src/vt-lb/algo/temperedlb/transfer.h
+++ b/src/vt-lb/algo/temperedlb/transfer.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_TRANSFER_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_TRANSFER_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/types.h>
 #include <vt-lb/comm/comm_traits.h>
 #include <vt-lb/util/logging.h>
diff --git a/src/vt-lb/algo/temperedlb/transfer_util.h b/src/vt-lb/algo/temperedlb/transfer_util.h
index 3b48795..b5b2ea7 100644
--- a/src/vt-lb/algo/temperedlb/transfer_util.h
+++ b/src/vt-lb/algo/temperedlb/transfer_util.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_TRANSFER_UTIL_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_TRANSFER_UTIL_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/types.h>
 #include <vt-lb/model/PhaseData.h>
 #include <vt-lb/algo/temperedlb/task_cluster_summary_info.h>
diff --git a/src/vt-lb/algo/temperedlb/visualize.h b/src/vt-lb/algo/temperedlb/visualize.h
index 3d2ef72..08acd20 100644
--- a/src/vt-lb/algo/temperedlb/visualize.h
+++ b/src/vt-lb/algo/temperedlb/visualize.h
@@ -1,8 +1,10 @@
 #pragma once
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/PhaseData.h>
 #include <vt-lb/model/Communication.h>
-#include "clustering.h"
+#include <vt-lb/algo/temperedlb/clustering.h>
 
 #include <sstream>
 #include <iomanip>
diff --git a/src/vt-lb/algo/temperedlb/work_model.h b/src/vt-lb/algo/temperedlb/work_model.h
index 88c6b44..a2cf9f4 100644
--- a/src/vt-lb/algo/temperedlb/work_model.h
+++ b/src/vt-lb/algo/temperedlb/work_model.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_ALGO_TEMPEREDLB_WORK_MODEL_H
 #define INCLUDED_VT_LB_ALGO_TEMPEREDLB_WORK_MODEL_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt-lb/model/PhaseData.h>
 #include <vt-lb/model/Task.h>
 #include <vt-lb/algo/temperedlb/task_cluster_summary_info.h>
diff --git a/src/vt-lb/comm/MPI/class_handle.h b/src/vt-lb/comm/MPI/class_handle.h
index 3899732..d80d7a0 100644
--- a/src/vt-lb/comm/MPI/class_handle.h
+++ b/src/vt-lb/comm/MPI/class_handle.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_COMM_CLASS_HANDLE_H
 #define INCLUDED_VT_LB_COMM_CLASS_HANDLE_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <mpi.h>
 
 namespace vt_lb::comm {
diff --git a/src/vt-lb/comm/MPI/class_handle.impl.h b/src/vt-lb/comm/MPI/class_handle.impl.h
index fdb25c4..3d93832 100644
--- a/src/vt-lb/comm/MPI/class_handle.impl.h
+++ b/src/vt-lb/comm/MPI/class_handle.impl.h
@@ -44,8 +44,10 @@
 #if !defined INCLUDED_VT_LB_COMM_CLASS_HANDLE_IMPL_H
 #define INCLUDED_VT_LB_COMM_CLASS_HANDLE_IMPL_H
 
-#include "class_handle.h"
-#include "vt-lb/comm/MPI/comm_mpi.h"
+#include <vt-lb/config/cmake_config.h>
+
+#include <vt-lb/comm/MPI/class_handle.h>
+#include <vt-lb/comm/MPI/comm_mpi.h>
 
 namespace vt_lb::comm {
 
diff --git a/src/vt-lb/comm/MPI/comm_mpi.h b/src/vt-lb/comm/MPI/comm_mpi.h
index f599fb5..e46d245 100644
--- a/src/vt-lb/comm/MPI/comm_mpi.h
+++ b/src/vt-lb/comm/MPI/comm_mpi.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_COMM_COMM_MPI_H
 #define INCLUDED_VT_LB_COMM_COMM_MPI_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <cstdint>
 #include <memory>
 #include <tuple>
diff --git a/src/vt-lb/comm/MPI/comm_mpi_detail.h b/src/vt-lb/comm/MPI/comm_mpi_detail.h
index aeaf062..256b03c 100644
--- a/src/vt-lb/comm/MPI/comm_mpi_detail.h
+++ b/src/vt-lb/comm/MPI/comm_mpi_detail.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_COMM_COMM_MPI_DETAIL_H
 #define INCLUDED_VT_LB_COMM_COMM_MPI_DETAIL_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vector>
 
 namespace vt_lb::comm::detail {
diff --git a/src/vt-lb/comm/MPI/termination.h b/src/vt-lb/comm/MPI/termination.h
index 113bf66..ffa1f86 100644
--- a/src/vt-lb/comm/MPI/termination.h
+++ b/src/vt-lb/comm/MPI/termination.h
@@ -44,7 +44,9 @@
 #if !defined INCLUDED_VT_LB_COMM_TERMINATION_H
 #define INCLUDED_VT_LB_COMM_TERMINATION_H
 
-#include "vt-lb/comm/MPI/class_handle.h"
+#include <vt-lb/config/cmake_config.h>
+
+#include <vt-lb/comm/MPI/class_handle.h>
 
 #include <cstdint>
 #include <memory>
diff --git a/src/vt-lb/comm/comm_traits.h b/src/vt-lb/comm/comm_traits.h
index 5a543ed..e0103d5 100644
--- a/src/vt-lb/comm/comm_traits.h
+++ b/src/vt-lb/comm/comm_traits.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_COMM_COMM_TRAITS_H
 #define INCLUDED_VT_LB_COMM_COMM_TRAITS_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <cstddef>
 #include <type_traits>
 #include <utility>
diff --git a/src/vt-lb/comm/vt/comm_vt.h b/src/vt-lb/comm/vt/comm_vt.h
index 3e0d4eb..735c4e3 100644
--- a/src/vt-lb/comm/vt/comm_vt.h
+++ b/src/vt-lb/comm/vt/comm_vt.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_COMM_COMM_VT_H
 #define INCLUDED_VT_LB_COMM_COMM_VT_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #if vt_backend_enabled
 
 #include <vt/configs/types/types_type.h>
diff --git a/src/vt-lb/comm/vt/comm_vt.impl.h b/src/vt-lb/comm/vt/comm_vt.impl.h
index 7174571..32ceb69 100644
--- a/src/vt-lb/comm/vt/comm_vt.impl.h
+++ b/src/vt-lb/comm/vt/comm_vt.impl.h
@@ -44,8 +44,10 @@
 #if !defined INCLUDED_VT_LB_COMM_COMM_VT_IMPL_H
 #define INCLUDED_VT_LB_COMM_COMM_VT_IMPL_H
 
-#include "vt-lb/comm/vt/comm_vt.h"
-#include "vt-lb/comm/vt/proxy_wrapper.h"
+#include <vt-lb/config/cmake_config.h>
+
+#include <vt-lb/comm/vt/comm_vt.h>
+#include <vt-lb/comm/vt/proxy_wrapper.h>
 #include <vt/transport.h>
 
 namespace vt_lb::comm {
diff --git a/src/vt-lb/comm/vt/proxy_wrapper.h b/src/vt-lb/comm/vt/proxy_wrapper.h
index 31f1927..f20a022 100644
--- a/src/vt-lb/comm/vt/proxy_wrapper.h
+++ b/src/vt-lb/comm/vt/proxy_wrapper.h
@@ -44,6 +44,8 @@
 #ifndef INCLUDED_VT_LB_COMM_PROXY_WRAPPER_H
 #define INCLUDED_VT_LB_COMM_PROXY_WRAPPER_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <atomic>
 #include <vt/transport.h>
 #include <mpi.h>
diff --git a/src/vt-lb/comm/vt/proxy_wrapper.impl.h b/src/vt-lb/comm/vt/proxy_wrapper.impl.h
index e3547e0..85ba630 100644
--- a/src/vt-lb/comm/vt/proxy_wrapper.impl.h
+++ b/src/vt-lb/comm/vt/proxy_wrapper.impl.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_COMM_PROXY_WRAPPER_IMPL_H
 #define INCLUDED_VT_LB_COMM_PROXY_WRAPPER_IMPL_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <vt/transport.h>
 
 #include <cstring>
diff --git a/src/vt-lb/config/cmake_config.h.in b/src/vt-lb/config/cmake_config.h.in
index ae5086a..2e90feb 100644
--- a/src/vt-lb/config/cmake_config.h.in
+++ b/src/vt-lb/config/cmake_config.h.in
@@ -41,4 +41,4 @@
 //@HEADER
 */
 
-#define vt_backend_enabled                   @vt_backend_enabled@
\ No newline at end of file
+#define vt_backend_enabled                   @vt_backend_feature_enabled@
diff --git a/src/vt-lb/model/Communication.h b/src/vt-lb/model/Communication.h
index c68a012..783e958 100644
--- a/src/vt-lb/model/Communication.h
+++ b/src/vt-lb/model/Communication.h
@@ -44,7 +44,9 @@
 #if !defined INCLUDED_VT_LB_MODEL_COMMUNICATION_H
 #define INCLUDED_VT_LB_MODEL_COMMUNICATION_H
 
-#include "types.h"
+#include <vt-lb/config/cmake_config.h>
+
+#include <vt-lb/model/types.h>
 
 namespace vt_lb::model {
 
diff --git a/src/vt-lb/model/PhaseData.h b/src/vt-lb/model/PhaseData.h
index 65ba7c4..a8c0e48 100644
--- a/src/vt-lb/model/PhaseData.h
+++ b/src/vt-lb/model/PhaseData.h
@@ -44,10 +44,12 @@
 #if !defined INCLUDED_VT_LB_MODEL_PHASE_DATA_H
 #define INCLUDED_VT_LB_MODEL_PHASE_DATA_H
 
-#include "types.h"
-#include "SharedBlock.h"
-#include "Task.h"
-#include "Communication.h"
+#include <vt-lb/config/cmake_config.h>
+
+#include <vt-lb/model/types.h>
+#include <vt-lb/model/SharedBlock.h>
+#include <vt-lb/model/Task.h>
+#include <vt-lb/model/Communication.h>
 
 #include <unordered_map>
 #include <vector>
diff --git a/src/vt-lb/model/SharedBlock.h b/src/vt-lb/model/SharedBlock.h
index ecb71ef..5ee7ff2 100644
--- a/src/vt-lb/model/SharedBlock.h
+++ b/src/vt-lb/model/SharedBlock.h
@@ -44,8 +44,10 @@
 #if !defined INCLUDED_VT_LB_MODEL_BLOCK_H
 #define INCLUDED_VT_LB_MODEL_BLOCK_H
 
-#include "types.h"
-#include "Task.h"
+#include <vt-lb/config/cmake_config.h>
+
+#include <vt-lb/model/types.h>
+#include <vt-lb/model/Task.h>
 
 namespace vt_lb::model {
 
diff --git a/src/vt-lb/model/Task.h b/src/vt-lb/model/Task.h
index 1d29691..f84df09 100644
--- a/src/vt-lb/model/Task.h
+++ b/src/vt-lb/model/Task.h
@@ -44,7 +44,9 @@
 #if !defined INCLUDED_VT_LB_MODEL_TASK_H
 #define INCLUDED_VT_LB_MODEL_TASK_H
 
-#include "types.h"
+#include <vt-lb/config/cmake_config.h>
+
+#include <vt-lb/model/types.h>
 
 #include <unordered_set>
 
@@ -75,6 +77,10 @@ struct TaskMemory {
 
 struct Task {
   Task() = default;
+  Task(TaskType id, LoadType load)
+    : id_(id), load_(load)
+  {}
+
   Task(TaskType id, RankType home, RankType current, bool migratable,
         TaskMemory const& memory, LoadType load)
     : id_(id),
diff --git a/src/vt-lb/model/types.h b/src/vt-lb/model/types.h
index 025a03c..5fe5226 100644
--- a/src/vt-lb/model/types.h
+++ b/src/vt-lb/model/types.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_MODEL_TYPES_H
 #define INCLUDED_VT_LB_MODEL_TYPES_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <cstdint>
 #include <functional>
 
diff --git a/src/vt-lb/util/logging.h b/src/vt-lb/util/logging.h
index b260240..5015ea5 100644
--- a/src/vt-lb/util/logging.h
+++ b/src/vt-lb/util/logging.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_UTIL_LOGGING_H
 #define INCLUDED_VT_LB_UTIL_LOGGING_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #include <fmt-lb/format.h>
 #include <string_view>
 #include <utility>
diff --git a/tests/unit/graph_helpers.h b/tests/unit/graph_helpers.h
index b5de54f..aa25905 100644
--- a/tests/unit/graph_helpers.h
+++ b/tests/unit/graph_helpers.h
@@ -304,10 +304,87 @@ void generateIntraRankComm(
       }
     }
     double bytes = weight_per_edge_dist(gen);
+    bytes = std::max(bytes, 1.0);
     pd.addCommunication(Edge{from, to, bytes, rank, rank});
   }
 }
 
+/**
+ * \brief Generate random intra-rank communications on each rank
+ * in a ring pattern
+ *
+ * \note Each task connects to its K nearest successors on the ID ring.
+ * Guarantees: no self-edges, at most one edge per task pair.
+ *
+ * @param pd The PhaseData for this rank
+ * @param gen The seeded generator for this rank
+ * @param min_neighbors Minimum number of neighbors per task
+ * @param max_neighbors Maximum number of neighbors per task
+ * @param weight_per_edge_dist Random edge weights distribution
+ * @param make_random_neighbors If true, randomly select neighbors rather than
+ *        using the closest neighbors in the ring
+ */
+template <typename WeightPerEdgeDistType>
+void generateIntraRankCommRing(
+  vt_lb::model::PhaseData& pd,
+  std::mt19937 &gen,
+  int min_neighbors,
+  int max_neighbors,
+  WeightPerEdgeDistType &weight_per_edge_dist,
+  bool make_random_neighbors
+) {
+  using namespace vt_lb::model;
+
+  int const rank = pd.getRank();
+  assert(rank != invalid_node);
+
+  auto local_ids_set = pd.getTaskIds();
+  std::vector<TaskType> local_ids(
+    local_ids_set.begin(), local_ids_set.end()
+  );
+  std::size_t const N = local_ids.size();
+  if (N <= 1) return;
+
+  if (max_neighbors <= 0) return;
+  if (min_neighbors <= 0) return;
+
+  assert(max_neighbors >= min_neighbors && "max_neighbors >= min_neighbors");
+
+  // Sort IDs so "distance" is by TaskType ordering, then build a ring
+  std::sort(local_ids.begin(), local_ids.end());
+
+  // To avoid duplicates when adding only forward neighbors,
+  // clamp K to floor((N-1)/2). This ensures each undirected pair is added once.
+  int const half_cap = static_cast<int>((N - 1) / 2);
+  int K_min = std::min(min_neighbors, half_cap);
+  int K_max = std::min(max_neighbors, half_cap);
+  if (K_max == 0) return;
+
+  std::uniform_int_distribution<> K_dist(K_min, K_max);
+
+  for (std::size_t i = 0; i < N; ++i) {
+    int K = K_dist(gen);
+    std::vector<int> neighbor_distances;
+    for (int d = 1; d <= K_max; ++d) {
+      neighbor_distances.push_back(d);
+    }
+    if (make_random_neighbors) {
+      std::shuffle(
+        neighbor_distances.begin(), neighbor_distances.end(), gen
+      );
+    }
+    // Add edges to the first K neighbors in the shuffled list
+    for (int d = 0; d < K; ++d) {
+      std::size_t j = (i + static_cast<std::size_t>(neighbor_distances[d])) % N; // ring successor by ID distance d
+      TaskType from = local_ids[i];
+      TaskType to   = local_ids[j];
+      double bytes = std::max(weight_per_edge_dist(gen), 1.0);
+      // d >= 1 ensures no self-edge; clamped K ensures no duplicate pairs
+      pd.addCommunication(Edge{from, to, bytes, rank, rank});
+    }
+  }
+}
+
 /**
  * Generate random inter-rank communications on each rank
  *
@@ -328,7 +405,7 @@ void generateInterRankComm(
 ) {
   using namespace vt_lb::model;
 
-  if (num_ranks == 1) {
+  if (num_ranks == 1 or min_tasks_per_rank == 0) {
     return;
   }
 
@@ -359,6 +436,7 @@ void generateInterRankComm(
     while ((remote_rank = remote_rank_dist(gen)) == rank) {}
     TaskType to = remote_task_dist(gen);
     double bytes = weight_per_edge_dist(gen);
+    bytes = std::max(bytes, 1.0);
     pd.addCommunication(Edge{from, to, bytes, rank, remote_rank});
   }
   for (std::size_t e = from_edge_count; e < local_endpoints.size(); ++e) {
@@ -367,6 +445,7 @@ void generateInterRankComm(
     while ((remote_rank = remote_rank_dist(gen)) == rank) {}
     TaskType from = remote_task_dist(gen);
     double bytes = weight_per_edge_dist(gen);
+    bytes = std::max(bytes, 1.0);
     pd.addCommunication(Edge{from, to, bytes, remote_rank, rank});
   }
 }
@@ -428,6 +507,7 @@ void generateRankComm(
       while ((to = remote_task_dist(gen)) == from) {}
     }
     double bytes = weight_per_edge_dist(gen);
+    bytes = std::max(bytes, 1.0);
     pd.addCommunication(Edge{from, to, bytes, rank, remote_rank});
   }
   for (std::size_t e = from_edge_count; e < local_endpoints.size(); ++e) {
@@ -441,6 +521,7 @@ void generateRankComm(
       while ((from = remote_task_dist(gen)) == to) {}
     }
     double bytes = weight_per_edge_dist(gen);
+    bytes = std::max(bytes, 1.0);
     pd.addCommunication(Edge{from, to, bytes, remote_rank, rank});
   }
 }
@@ -697,7 +778,7 @@ void generateGraphWithSharedBlocks(
   );
 
   if (include_comm) {
-    double edge_weight_lambda = 1000.0, locally_gen_in_edge_frac = 0.5;
+    double edge_weight_lambda = 1000000.0, locally_gen_in_edge_frac = 0.5;
     int max_endpoints = 4;
 
     std::exponential_distribution<> edge_weight_dist(edge_weight_lambda);
diff --git a/tests/unit/test_config.h b/tests/unit/test_config.h
index 6a244ec..9167be7 100644
--- a/tests/unit/test_config.h
+++ b/tests/unit/test_config.h
@@ -44,6 +44,8 @@
 #if !defined INCLUDED_VT_LB_UNIT_TEST_CONFIG_H
 #define INCLUDED_VT_LB_UNIT_TEST_CONFIG_H
 
+#include <vt-lb/config/cmake_config.h>
+
 #define DEBUG_TEST_HARNESS_PRINT 0
 
 // Stub "value" to when not supplying a trailing macro variadic argument.
diff --git a/tests/unit/work_model/test_work_model.cc b/tests/unit/work_model/test_work_model.cc
new file mode 100644
index 0000000..a212536
--- /dev/null
+++ b/tests/unit/work_model/test_work_model.cc
@@ -0,0 +1,869 @@
+/*
+//@HEADER
+// *****************************************************************************
+//
+//                                 test_work_model.cc
+//                 DARMA/vt-lb => Virtual Transport/Load Balancers
+//
+// Copyright 2019-2024 National Technology & Engineering Solutions of Sandia, LLC
+// (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S.
+// Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// * Neither the name of the copyright holder nor the names of its
+//   contributors may be used to endorse or promote products derived from this
+//   software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact darma@sandia.gov
+//
+// *****************************************************************************
+//@HEADER
+*/
+
+#include <gtest/gtest.h>
+
+#include "test_parallel_harness.h"
+#include "test_helpers.h"
+#include "graph_helpers.h"
+
+#include <vt-lb/algo/temperedlb/work_model.h>
+#include <vt-lb/algo/temperedlb/configuration.h>
+#include <vt-lb/model/PhaseData.h>
+#include <vt-lb/algo/temperedlb/symmetrize_comm.h>
+
+namespace vt_lb { namespace tests { namespace unit {
+
+template <comm::Communicator CommType>
+struct TestWorkModelBasic : TestParallelHarness<CommType> {
+  static constexpr int num_seeds = 100;
+
+  void setupRandomNonzeroWorkModel(
+    std::mt19937 &gen, algo::temperedlb::WorkModel &wm
+  );
+  void setupNoMemoryInfo(algo::temperedlb::Configuration &cfg);
+  void setupRandomTaskMemory(std::mt19937 &gen, model::PhaseData &pd);
+  void setupUniformTaskMemory(
+    std::mt19937 &gen, model::PhaseData &pd, double working_mem,
+    double footprint_mem, double serialized_mem
+  );
+
+  double setupRandomUniformLoadOnlyNoMemProblem(
+    std::mt19937 &gen, model::PhaseData &pd,
+    algo::temperedlb::Configuration &cfg,
+    algo::temperedlb::WorkBreakdown &expected_bd,
+    algo::temperedlb::WorkModel &wm
+  );
+  double setupRandomLoadOnlyNoMemProblem(
+    std::mt19937 &gen, model::PhaseData &pd,
+    algo::temperedlb::Configuration &cfg,
+    algo::temperedlb::WorkBreakdown &expected_bd,
+    algo::temperedlb::WorkModel &wm
+  );
+  double setupRandomUniformSharedBlocksProblem(
+    std::mt19937 &gen, model::PhaseData &pd,
+    algo::temperedlb::Configuration &cfg,
+    algo::temperedlb::WorkBreakdown &expected_bd,
+    algo::temperedlb::WorkModel &wm
+  );
+  double setupRandomSharedBlocksProblem(
+    std::mt19937 &gen, model::PhaseData &pd,
+    algo::temperedlb::Configuration &cfg,
+    algo::temperedlb::WorkBreakdown &expected_bd,
+    algo::temperedlb::WorkModel &wm
+  );
+  double setupRandomLoadAndIntraCommNoMemProblem(
+    std::mt19937 &gen, model::PhaseData &pd,
+    algo::temperedlb::Configuration &cfg,
+    algo::temperedlb::WorkBreakdown &expected_bd,
+    algo::temperedlb::WorkModel &wm
+  );
+  double setupRandomLoadAndInterCommNoMemProblem(
+    std::mt19937 &gen, model::PhaseData &pd,
+    algo::temperedlb::Configuration &cfg,
+    algo::temperedlb::WorkBreakdown &expected_bd,
+    algo::temperedlb::WorkModel &wm
+  );
+  double setupRandomLoadAndMixedCommNoMemProblem(
+    std::mt19937 &gen, model::PhaseData &pd,
+    algo::temperedlb::Configuration &cfg,
+    algo::temperedlb::WorkBreakdown &expected_bd,
+    algo::temperedlb::WorkModel &wm
+  );
+
+  void verifyNoChangeUpdate(
+    const model::PhaseData &pd, const algo::temperedlb::Configuration &cfg,
+    const algo::temperedlb::WorkBreakdown &expected_bd,
+    const algo::temperedlb::WorkModel &wm, double expected_work
+  );
+};
+
+template <comm::Communicator CommType>
+void TestWorkModelBasic<CommType>::setupRandomNonzeroWorkModel(
+  std::mt19937 &gen, algo::temperedlb::WorkModel &wm
+) {
+  std::uniform_real_distribution<> uni_dist(0.0, 1.0);
+  wm.rank_alpha = 2.0 * uni_dist(gen);
+  wm.beta = uni_dist(gen);
+  wm.gamma = uni_dist(gen);
+  wm.delta = uni_dist(gen);
+}
+
+template <comm::Communicator CommType>
+void TestWorkModelBasic<CommType>::setupNoMemoryInfo(
+  algo::temperedlb::Configuration &cfg
+) {
+  cfg.work_model_.has_memory_info = false;
+  // Make sure that the above always overrides the below by leaving them on
+  cfg.work_model_.has_shared_block_memory_info = true;
+  cfg.work_model_.has_task_footprint_memory_info = true;
+  cfg.work_model_.has_task_working_memory_info = true;
+  cfg.work_model_.has_task_serialized_memory_info = true;
+}
+
+template <comm::Communicator CommType>
+void TestWorkModelBasic<CommType>::setupRandomTaskMemory(
+  std::mt19937 &gen, model::PhaseData &pd
+) {
+  std::exponential_distribution<> expo_dist(1000.0);
+  int smem = static_cast<int>(expo_dist(gen));
+  int fmem = smem + static_cast<int>(expo_dist(gen));
+  generateTaskMemory(pd, gen, expo_dist, fmem, smem);
+}
+
+template <comm::Communicator CommType>
+void TestWorkModelBasic<CommType>::setupUniformTaskMemory(
+  std::mt19937 &gen, model::PhaseData &pd, double working_mem,
+  double footprint_mem, double serialized_mem
+) {
+  std::uniform_real_distribution<> dist(working_mem, working_mem);
+  generateTaskMemory(pd, gen, dist, footprint_mem, serialized_mem);
+}
+
+template <comm::Communicator CommType>
+double TestWorkModelBasic<CommType>::setupRandomUniformLoadOnlyNoMemProblem(
+  std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg,
+  algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm
+) {
+  // We're using all non-zero coefficients but there will only be load
+  setupRandomNonzeroWorkModel(gen, wm);
+
+  // Do not consider memory even though we will define some
+  setupNoMemoryInfo(cfg);
+
+  // Generate graph: task load will be uniform across all tasks on a given rank;
+  // there will be no shared blocks or communication
+  int min_tasks_per_rank = 0;
+  int max_tasks_per_rank = 100;
+  double expo_lambda = 100.0;
+  std::exponential_distribution<> expo_dist(expo_lambda);
+  double uniform_load = expo_dist(gen);
+  std::uniform_real_distribution<> load_dist(uniform_load, uniform_load);
+  generateTasksWithoutSharedBlocks(
+    pd, gen, min_tasks_per_rank, max_tasks_per_rank, load_dist
+  );
+
+  // Define random task memory even though we will not use it
+  setupRandomTaskMemory(gen, pd);
+
+  // Define expected breakdown and work
+  expected_bd.compute = pd.getTasksMap().size() * uniform_load;
+  expected_bd.inter_node_recv_comm = 0.0;
+  expected_bd.inter_node_send_comm = 0.0;
+  expected_bd.intra_node_recv_comm = 0.0;
+  expected_bd.intra_node_send_comm = 0.0;
+  expected_bd.shared_mem_comm = 0.0;
+  expected_bd.memory_breakdown = {0.0, 0.0, 0.0};
+
+  // Compute expected work
+  double expected_work = expected_bd.compute * wm.rank_alpha;
+  return expected_work;
+}
+
+template <comm::Communicator CommType>
+double TestWorkModelBasic<CommType>::setupRandomLoadOnlyNoMemProblem(
+  std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg,
+  algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm
+) {
+  // We're using all non-zero coefficients but there will only be load
+  setupRandomNonzeroWorkModel(gen, wm);
+
+  // Do not consider memory even though we will define some
+  setupNoMemoryInfo(cfg);
+
+  // Generate graph: task load will be uniform across all tasks on a given rank;
+  // there will be no shared blocks or communication
+  int min_tasks_per_rank = 0;
+  int max_tasks_per_rank = 100;
+  double expo_lambda = 1000.0;
+  std::exponential_distribution<> load_dist(expo_lambda);
+  generateTasksWithoutSharedBlocks(
+    pd, gen, min_tasks_per_rank, max_tasks_per_rank, load_dist
+  );
+
+  // Define random task memory even though we will not use it
+  setupRandomTaskMemory(gen, pd);
+
+  // Define expected breakdown and work
+  double expected_compute = 0.0;
+  auto &taskmap = pd.getTasksMap();
+  for (auto &t : taskmap) {
+    expected_compute += t.second.getLoad();
+  }
+  expected_bd.compute = expected_compute;
+  expected_bd.inter_node_recv_comm = 0.0;
+  expected_bd.inter_node_send_comm = 0.0;
+  expected_bd.intra_node_recv_comm = 0.0;
+  expected_bd.intra_node_send_comm = 0.0;
+  expected_bd.shared_mem_comm = 0.0;
+  expected_bd.memory_breakdown = {0.0, 0.0, 0.0};
+
+  // Compute expected work
+  double expected_work = expected_bd.compute * wm.rank_alpha;
+  return expected_work;
+}
+
+template <comm::Communicator CommType>
+double TestWorkModelBasic<CommType>::setupRandomUniformSharedBlocksProblem(
+  std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg,
+  algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm
+) {
+  // We're using all non-zero coefficients but there will only be load
+  setupRandomNonzeroWorkModel(gen, wm);
+
+  // Consider all types of memory
+  cfg.work_model_.has_memory_info = true;
+  cfg.work_model_.has_shared_block_memory_info = true;
+  cfg.work_model_.has_task_footprint_memory_info = true;
+  cfg.work_model_.has_task_working_memory_info = true;
+  cfg.work_model_.has_task_serialized_memory_info = true;
+
+  // Generate graph: task load will be uniform across all tasks on a given rank;
+  // there will be no shared blocks or communication
+  int min_blocks_per_rank = 0;
+  int max_blocks_per_rank = 100;
+  double expo_lambda = 1000000.0;
+  std::exponential_distribution<> expo_dist(expo_lambda);
+  int uniform_mem = static_cast<int>(expo_dist(gen));
+  int min_tasks_per_block = 1;
+  int max_tasks_per_block = 10;
+  double uniform_load = expo_dist(gen);
+  std::uniform_real_distribution<> load_dist(uniform_load, uniform_load);
+  generateSharedBlocksWithTasks(
+    pd, gen, min_blocks_per_rank, max_blocks_per_rank, uniform_mem, uniform_mem,
+    min_tasks_per_block, max_tasks_per_block, load_dist
+  );
+
+  auto num_tasks = pd.getTasksMap().size();
+  std::uniform_int_distribution<> uni_dist(100, 1000);
+  double working_mem = 0.0;
+  double serialized_mem = 0.0;
+  if (num_tasks > 0) {
+    working_mem = uni_dist(gen);
+    serialized_mem = uni_dist(gen);
+  }
+  double footprint_mem = serialized_mem * 2.0;
+  setupUniformTaskMemory(gen, pd, working_mem, footprint_mem, serialized_mem);
+
+  // Define expected breakdown and work
+  double expected_block_mem =
+    static_cast<double>(uniform_mem) * pd.getSharedBlocksMap().size();
+  double expected_task_mem =
+    footprint_mem * num_tasks + working_mem + serialized_mem;
+  expected_bd.compute = num_tasks * uniform_load;
+  expected_bd.inter_node_recv_comm = 0.0;
+  expected_bd.inter_node_send_comm = 0.0;
+  expected_bd.intra_node_recv_comm = 0.0;
+  expected_bd.intra_node_send_comm = 0.0;
+  expected_bd.shared_mem_comm = 0.0;  // all at home
+  expected_bd.memory_breakdown = {
+    expected_block_mem + expected_task_mem, working_mem, serialized_mem
+  };
+
+  // Compute expected work
+  double expected_work =
+    expected_bd.compute * wm.rank_alpha +
+    expected_bd.shared_mem_comm * wm.delta;
+
+  return expected_work;
+}
+
+template <comm::Communicator CommType>
+double TestWorkModelBasic<CommType>::setupRandomSharedBlocksProblem(
+  std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg,
+  algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm
+) {
+  // We're using all non-zero coefficients but there will only be load
+  setupRandomNonzeroWorkModel(gen, wm);
+
+  // Consider all types of memory even though only shared blocks have it
+  cfg.work_model_.has_memory_info = true;
+  cfg.work_model_.has_shared_block_memory_info = true;
+  cfg.work_model_.has_task_footprint_memory_info = true;
+  cfg.work_model_.has_task_working_memory_info = true;
+  cfg.work_model_.has_task_serialized_memory_info = true;
+
+  // Generate graph: task load will be uniform across all tasks on a given rank;
+  // there will be no shared blocks or communication
+  int min_blocks_per_rank = 0;
+  int max_blocks_per_rank = 100;
+  double expo_lambda = 10000000.0;
+  std::exponential_distribution<> expo_dist(expo_lambda);
+  int max_mem = static_cast<int>(expo_dist(gen));
+  int min_mem = max_mem / 2;
+  int min_tasks_per_block = 1;
+  int max_tasks_per_block = 10;
+  std::exponential_distribution<> load_dist(expo_lambda / 10);
+  generateSharedBlocksWithTasks(
+    pd, gen, min_blocks_per_rank, max_blocks_per_rank, min_mem, max_mem,
+    min_tasks_per_block, max_tasks_per_block, load_dist
+  );
+
+  // Define expected breakdown and work
+  double expected_compute = 0.0;
+  auto &taskmap = pd.getTasksMap();
+  for (auto &t : taskmap) {
+    expected_compute += t.second.getLoad();
+  }
+  double expected_block_mem = 0.0;
+  auto &blockmap = pd.getSharedBlocksMap();
+  for (auto &b : blockmap) {
+    expected_block_mem += b.second.getSize();
+  }
+  expected_bd.compute = expected_compute;
+  expected_bd.inter_node_recv_comm = 0.0;
+  expected_bd.inter_node_send_comm = 0.0;
+  expected_bd.intra_node_recv_comm = 0.0;
+  expected_bd.intra_node_send_comm = 0.0;
+  expected_bd.shared_mem_comm = 0.0;  // all at home
+  expected_bd.memory_breakdown = {expected_block_mem, 0.0, 0.0};
+
+  // Compute expected work
+  double expected_work =
+    expected_bd.compute * wm.rank_alpha +
+    expected_bd.shared_mem_comm * wm.delta;
+
+  return expected_work;
+}
+
+template <comm::Communicator CommType>
+double TestWorkModelBasic<CommType>::setupRandomLoadAndIntraCommNoMemProblem(
+  std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg,
+  algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm
+) {
+  // We're using all non-zero coefficients but there will only be load
+  setupRandomNonzeroWorkModel(gen, wm);
+
+  // Do not consider memory even though we will define some
+  setupNoMemoryInfo(cfg);
+
+  // Generate graph: task load will be uniform across all tasks on a given rank;
+  // there will be no shared blocks or communication
+  int min_tasks_per_rank = 0;
+  int max_tasks_per_rank = 100;
+  double expo_lambda = 5000.0;
+  std::exponential_distribution<> load_dist(expo_lambda);
+  generateTasksWithoutSharedBlocks(
+    pd, gen, min_tasks_per_rank, max_tasks_per_rank, load_dist
+  );
+
+  // Define random task memory even though we will not use it
+  setupRandomTaskMemory(gen, pd);
+
+  std::uniform_int_distribution<> eps_per_task_dist(0, 10);
+  std::exponential_distribution<> weight_per_edge_dist(100.0);
+  generateIntraRankComm(
+    pd, gen, eps_per_task_dist, weight_per_edge_dist
+  );
+
+  // Define expected breakdown and work
+  double expected_compute = 0.0;
+  auto &taskmap = pd.getTasksMap();
+  for (auto &t : taskmap) {
+    expected_compute += t.second.getLoad();
+  }
+  double expected_intra_recv = 0.0;
+  double expected_intra_send = 0.0;
+  auto &edges = pd.getCommunications();
+  for (auto &e : edges) {
+    expected_intra_recv += e.getVolume();
+    expected_intra_send += e.getVolume();
+  }
+  expected_bd.compute = expected_compute;
+  expected_bd.inter_node_recv_comm = 0.0;
+  expected_bd.inter_node_send_comm = 0.0;
+  expected_bd.intra_node_recv_comm = expected_intra_recv;
+  expected_bd.intra_node_send_comm = expected_intra_send;
+  expected_bd.shared_mem_comm = 0.0;
+  expected_bd.memory_breakdown = {0.0, 0.0, 0.0};
+
+  // Compute expected work
+  double expected_work =
+    expected_bd.compute * wm.rank_alpha +
+    std::max(
+     expected_bd.intra_node_recv_comm, expected_bd.intra_node_send_comm
+    ) * wm.gamma;
+  return expected_work;
+}
+
+template <comm::Communicator CommType>
+double TestWorkModelBasic<CommType>::setupRandomLoadAndInterCommNoMemProblem(
+  std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg,
+  algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm
+) {
+  // We're using all non-zero coefficients but there will only be load
+  setupRandomNonzeroWorkModel(gen, wm);
+
+  // Do not consider memory even though we will define some
+  setupNoMemoryInfo(cfg);
+
+  // Generate graph: task load will be uniform across all tasks on a given rank;
+  // there will be no shared blocks or communication
+  int min_tasks_per_rank = 20;
+  int max_tasks_per_rank = 50;
+  std::uniform_real_distribution<> load_dist(1.0, 1000.0);
+  generateTasksWithoutSharedBlocks(
+    pd, gen, min_tasks_per_rank, max_tasks_per_rank, load_dist
+  );
+
+  // Define random task memory even though we will not use it
+  setupRandomTaskMemory(gen, pd);
+
+  std::uniform_int_distribution<> eps_per_task_dist(0, 10);
+  std::exponential_distribution<> weight_per_edge_dist(100.0);
+  generateInterRankComm(
+    pd, gen, eps_per_task_dist, weight_per_edge_dist, min_tasks_per_rank,
+    this->comm.numRanks(), 0.5
+  );
+
+  vt_lb::algo::temperedlb::CommunicationsSymmetrizer cs(this->comm, pd);
+  cs.run();
+
+  // Define expected breakdown and work
+  double expected_compute = 0.0;
+  auto &taskmap = pd.getTasksMap();
+  for (auto &t : taskmap) {
+    expected_compute += t.second.getLoad();
+  }
+  auto rank = this->comm.getRank();
+  double expected_inter_recv = 0.0;
+  double expected_inter_send = 0.0;
+  auto &edges = pd.getCommunications();
+  for (auto &e : edges) {
+    if (e.getFromRank() != e.getToRank()) {
+      if (e.getToRank() == rank) {
+        expected_inter_recv += e.getVolume();
+      } else {
+        expected_inter_send += e.getVolume();
+      }
+    }
+  }
+  expected_bd.compute = expected_compute;
+  expected_bd.inter_node_recv_comm = expected_inter_recv;
+  expected_bd.inter_node_send_comm = expected_inter_send;
+  expected_bd.intra_node_recv_comm = 0.0;
+  expected_bd.intra_node_send_comm = 0.0;
+  expected_bd.shared_mem_comm = 0.0;
+  expected_bd.memory_breakdown = {0.0, 0.0, 0.0};
+
+  // Compute expected work
+  double expected_work =
+    expected_bd.compute * wm.rank_alpha +
+    std::max(
+      expected_bd.inter_node_recv_comm, expected_bd.inter_node_send_comm
+    ) * wm.beta;
+  return expected_work;
+}
+
+template <comm::Communicator CommType>
+double TestWorkModelBasic<CommType>::setupRandomLoadAndMixedCommNoMemProblem(
+  std::mt19937 &gen, model::PhaseData &pd, algo::temperedlb::Configuration &cfg,
+  algo::temperedlb::WorkBreakdown &expected_bd, algo::temperedlb::WorkModel &wm
+) {
+  // We're using all non-zero coefficients but there will only be load
+  setupRandomNonzeroWorkModel(gen, wm);
+
+  // Do not consider memory even though we will define some
+  setupNoMemoryInfo(cfg);
+
+  // Generate graph: task load will be uniform across all tasks on a given rank;
+  // there will be no shared blocks or communication
+  int min_tasks_per_rank = 20;
+  int max_tasks_per_rank = 50;
+  std::uniform_real_distribution<> load_dist(0.1, 50.0);
+  generateTasksWithoutSharedBlocks(
+    pd, gen, min_tasks_per_rank, max_tasks_per_rank, load_dist
+  );
+
+  // Define random task memory even though we will not use it
+  setupRandomTaskMemory(gen, pd);
+
+  std::uniform_int_distribution<> eps_per_task_dist(0, 10);
+  std::exponential_distribution<> weight_per_edge_dist(1000.0);
+  generateRankComm(
+    pd, gen, eps_per_task_dist, weight_per_edge_dist, min_tasks_per_rank,
+    this->comm.numRanks(), 0.5
+  );
+
+  vt_lb::algo::temperedlb::CommunicationsSymmetrizer cs(this->comm, pd);
+  cs.run();
+
+  // Define expected breakdown and work
+  double expected_compute = 0.0;
+  auto &taskmap = pd.getTasksMap();
+  for (auto &t : taskmap) {
+    expected_compute += t.second.getLoad();
+  }
+  auto rank = this->comm.getRank();
+  double expected_intra_recv = 0.0;
+  double expected_intra_send = 0.0;
+  double expected_inter_recv = 0.0;
+  double expected_inter_send = 0.0;
+  auto &edges = pd.getCommunications();
+  for (auto &e : edges) {
+    if (e.getFromRank() != e.getToRank()) {
+      if (e.getToRank() == rank) {
+        expected_inter_recv += e.getVolume();
+      } else {
+        expected_inter_send += e.getVolume();
+      }
+    } else {
+      expected_intra_recv += e.getVolume();
+      expected_intra_send += e.getVolume();
+    }
+  }
+  expected_bd.compute = expected_compute;
+  expected_bd.inter_node_recv_comm = expected_inter_recv;
+  expected_bd.inter_node_send_comm = expected_inter_send;
+  expected_bd.intra_node_recv_comm = expected_intra_recv;
+  expected_bd.intra_node_send_comm = expected_intra_send;
+  expected_bd.shared_mem_comm = 0.0;
+  expected_bd.memory_breakdown = {0.0, 0.0, 0.0};
+
+  // Compute expected work
+  double expected_work =
+    expected_bd.compute * wm.rank_alpha +
+    std::max(
+      expected_bd.inter_node_recv_comm, expected_bd.inter_node_send_comm
+    ) * wm.beta +
+    std::max(
+      expected_bd.intra_node_recv_comm, expected_bd.intra_node_send_comm
+    ) * wm.gamma;
+  return expected_work;
+}
+
+template <comm::Communicator CommType>
+void TestWorkModelBasic<CommType>::verifyNoChangeUpdate(
+  const model::PhaseData &pd, const algo::temperedlb::Configuration &cfg,
+  const algo::temperedlb::WorkBreakdown &expected_bd,
+  const algo::temperedlb::WorkModel &wm, double expected_work
+) {
+  auto bd = algo::temperedlb::WorkModelCalculator::computeWorkBreakdown(pd, cfg);
+  auto &mb = bd.memory_breakdown;
+  auto &emb = expected_bd.memory_breakdown;
+
+  // Verify computed work breakdown
+  EXPECT_FLOAT_EQ(bd.compute, expected_bd.compute);
+  EXPECT_DOUBLE_EQ(bd.inter_node_recv_comm, expected_bd.inter_node_recv_comm);
+  EXPECT_DOUBLE_EQ(bd.inter_node_send_comm, expected_bd.inter_node_send_comm);
+  EXPECT_DOUBLE_EQ(bd.intra_node_recv_comm, expected_bd.intra_node_recv_comm);
+  EXPECT_DOUBLE_EQ(bd.intra_node_send_comm, expected_bd.intra_node_send_comm);
+  EXPECT_DOUBLE_EQ(bd.shared_mem_comm, expected_bd.shared_mem_comm);
+
+  // Verify computed memory breakdown
+  EXPECT_DOUBLE_EQ(mb.current_memory_usage, emb.current_memory_usage);
+  EXPECT_DOUBLE_EQ(
+    mb.current_max_task_working_bytes, emb.current_max_task_working_bytes
+  );
+  EXPECT_DOUBLE_EQ(
+    mb.current_max_task_serialized_bytes, emb.current_max_task_serialized_bytes
+  );
+
+  // Verify baseline work
+  double base = algo::temperedlb::WorkModelCalculator::computeWork(wm, bd);
+  EXPECT_FLOAT_EQ(base, expected_work);
+
+  // Verify update under no changes
+  std::vector<model::Task> add_tasks;
+  std::vector<model::Edge> add_edges;
+  std::vector<model::TaskType> remove_ids;
+  double updated = algo::temperedlb::WorkModelCalculator::computeWorkUpdate(
+    pd, wm, bd, add_tasks, add_edges, remove_ids
+  );
+  EXPECT_DOUBLE_EQ(updated, base);
+}
+
+TYPED_TEST_SUITE(TestWorkModelBasic, CommTypesForTesting, CommNameGenerator);
+
+TYPED_TEST(TestWorkModelBasic, compute_work_uses_max_comm_components) {
+  // Build a breakdown with differing send/recv values to test max selection
+  algo::temperedlb::WorkBreakdown bd;
+  bd.compute = 10.0;
+  bd.inter_node_recv_comm = 5.0;
+  bd.inter_node_send_comm = 7.5;   // inter-node max should be 7.5
+  bd.intra_node_recv_comm = 1.0;
+  bd.intra_node_send_comm = 2.0;   // intra-node max should be 2.0
+  bd.shared_mem_comm = 3.0;
+
+  algo::temperedlb::WorkModel wm;
+  wm.rank_alpha = 2.0;
+  wm.beta = 1.0;
+  wm.gamma = 0.5;
+  wm.delta = 3.0;
+
+  double w = algo::temperedlb::WorkModelCalculator::computeWork(wm, bd);
+  // Expected: 2*10 + 1*7.5 + 0.5*2 + 3*3 = 20 + 7.5 + 1 + 9 = 37.5
+  EXPECT_DOUBLE_EQ(w, 37.5);
+}
+
+TYPED_TEST(TestWorkModelBasic, compute_memory_usage_rank_only) {
+  auto& the_comm = this->comm;
+  algo::temperedlb::Configuration cfg;
+  // Disable all task/shared-block memory flags so only rank footprint contributes
+  cfg.work_model_.has_memory_info = true;
+  cfg.work_model_.has_task_serialized_memory_info = false;
+  cfg.work_model_.has_task_working_memory_info = false;
+  cfg.work_model_.has_task_footprint_memory_info = false;
+  cfg.work_model_.has_shared_block_memory_info = false;
+
+  model::PhaseData pd(the_comm.getRank());
+  pd.setRankFootprintBytes(12345.0);
+  pd.setRankMaxMemoryAvailable(999999.0);
+
+  auto mb = algo::temperedlb::WorkModelCalculator::computeMemoryUsage(cfg, pd);
+  EXPECT_DOUBLE_EQ(mb.current_memory_usage, 12345.0);
+  EXPECT_DOUBLE_EQ(mb.current_max_task_working_bytes, 0.0);
+  EXPECT_DOUBLE_EQ(mb.current_max_task_serialized_bytes, 0.0);
+}
+
+TYPED_TEST(TestWorkModelBasic, check_memory_fit_basic) {
+  auto& the_comm = this->comm;
+  algo::temperedlb::Configuration cfg;
+  cfg.work_model_.has_memory_info = true; // enable memory checks
+
+  model::PhaseData pd(the_comm.getRank());
+  pd.setRankMaxMemoryAvailable(1000.0);
+
+  // Fits
+  EXPECT_TRUE(algo::temperedlb::WorkModelCalculator::checkMemoryFit(cfg, pd, 999.9));
+  // Boundary
+  EXPECT_TRUE(algo::temperedlb::WorkModelCalculator::checkMemoryFit(cfg, pd, 1000.0));
+  // Exceeds
+  EXPECT_FALSE(algo::temperedlb::WorkModelCalculator::checkMemoryFit(cfg, pd, 1000.1));
+}
+
+TYPED_TEST(TestWorkModelBasic, compute_work_update_no_changes_is_identity) {
+  auto& the_comm = this->comm;
+
+  // Work model with non-zero coefficients
+  algo::temperedlb::WorkModel wm;
+  wm.rank_alpha = 0.5;
+  wm.beta = 0.2;
+  wm.gamma = 0.3;
+  wm.delta = 1.0;
+
+  // Phase data with tasks referencing shared blocks; shared blocks off-home produce bytes
+  model::PhaseData pd(the_comm.getRank());
+  // Define two shared blocks with specific homes/sizes
+  model::SharedBlock sbA{1, 100.0, (the_comm.getRank() + 1) % the_comm.numRanks()};
+  model::SharedBlock sbB{2, 50.0,  the_comm.getRank()}; // local-home; should not add shared_mem_comm
+  pd.addSharedBlock(sbA);
+  pd.addSharedBlock(sbB);
+
+  // Two tasks that reference both blocks
+  model::Task t1{10, 11.0};
+  t1.addSharedBlock(sbA.getId());
+  t1.addSharedBlock(sbB.getId());
+
+  model::Task t2{11, 0.0};
+  t2.addSharedBlock(sbA.getId());
+
+  pd.addTask(t1);
+  pd.addTask(t2);
+
+  // Current breakdown computed from PhaseData and a default config with memory info on
+  algo::temperedlb::Configuration cfg;
+  cfg.work_model_.has_memory_info = true;
+  cfg.work_model_.has_shared_block_memory_info = true;
+  cfg.work_model_.has_task_footprint_memory_info = false;
+  cfg.work_model_.has_task_working_memory_info = false;
+  cfg.work_model_.has_task_serialized_memory_info = false;
+
+  auto bd = algo::temperedlb::WorkModelCalculator::computeWorkBreakdown(pd, cfg);
+
+  // Baseline work
+  double base = algo::temperedlb::WorkModelCalculator::computeWork(wm, bd);
+
+  // No changes
+  std::vector<model::Task> add_tasks;
+  std::vector<model::Edge> add_edges;
+  std::vector<model::TaskType> remove_ids;
+
+  double updated = algo::temperedlb::WorkModelCalculator::computeWorkUpdate(
+    pd, wm, bd, add_tasks, add_edges, remove_ids
+  );
+
+  EXPECT_DOUBLE_EQ(updated, base);
+}
+
+TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesUniformLoadOnlyNoMem) {
+  auto& the_comm = this->comm;
+  auto rank = the_comm.getRank();
+
+  algo::temperedlb::Configuration cfg;
+  algo::temperedlb::WorkBreakdown bd;
+  algo::temperedlb::WorkModel wm;
+
+  std::mt19937 gen;
+  for (int i=0; i<this->num_seeds; ++i) {
+    gen.seed(rank * 89 + i * 3);
+    model::PhaseData pd(rank);
+    double expected_work = this->setupRandomUniformLoadOnlyNoMemProblem(
+      gen, pd, cfg, bd, wm
+    );
+
+    this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work);
+  }
+}
+
+TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesLoadOnlyNoMem) {
+  auto& the_comm = this->comm;
+  auto rank = the_comm.getRank();
+
+  algo::temperedlb::Configuration cfg;
+  algo::temperedlb::WorkBreakdown bd;
+  algo::temperedlb::WorkModel wm;
+
+  std::mt19937 gen;
+  for (int i=0; i<this->num_seeds; ++i) {
+    gen.seed(rank * 21 + i * 4);
+    model::PhaseData pd(rank);
+    double expected_work = this->setupRandomLoadOnlyNoMemProblem(
+      gen, pd, cfg, bd, wm
+    );
+
+    this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work);
+  }
+}
+
+TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesUniformSharedBlocks) {
+  auto& the_comm = this->comm;
+  auto rank = the_comm.getRank();
+
+  algo::temperedlb::Configuration cfg;
+  algo::temperedlb::WorkBreakdown bd;
+  algo::temperedlb::WorkModel wm;
+
+  std::mt19937 gen;
+  for (int i=0; i<this->num_seeds; ++i) {
+    gen.seed(rank * 191 + i * 5);
+    model::PhaseData pd(rank);
+    double expected_work = this->setupRandomUniformSharedBlocksProblem(
+      gen, pd, cfg, bd, wm
+    );
+
+    this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work);
+  }
+}
+
+TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesSharedBlocks) {
+  auto& the_comm = this->comm;
+  auto rank = the_comm.getRank();
+
+  algo::temperedlb::Configuration cfg;
+  algo::temperedlb::WorkBreakdown bd;
+  algo::temperedlb::WorkModel wm;
+
+  std::mt19937 gen;
+  for (int i=0; i<this->num_seeds; ++i) {
+    gen.seed(rank * 88 + i * 6);
+    model::PhaseData pd(rank);
+    double expected_work = this->setupRandomSharedBlocksProblem(
+      gen, pd, cfg, bd, wm
+    );
+
+    this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work);
+  }
+}
+
+TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesLoadAndIntraComm) {
+  auto& the_comm = this->comm;
+  auto rank = the_comm.getRank();
+
+  algo::temperedlb::Configuration cfg;
+  algo::temperedlb::WorkBreakdown bd;
+  algo::temperedlb::WorkModel wm;
+
+  std::mt19937 gen;
+  for (int i=0; i<this->num_seeds; ++i) {
+    gen.seed(rank * 67 + i * 7);
+    model::PhaseData pd(rank);
+    double expected_work = this->setupRandomLoadAndIntraCommNoMemProblem(
+      gen, pd, cfg, bd, wm
+    );
+
+    this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work);
+  }
+}
+
+TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesLoadAndInterComm) {
+  auto& the_comm = this->comm;
+  auto rank = the_comm.getRank();
+
+  algo::temperedlb::Configuration cfg;
+  algo::temperedlb::WorkBreakdown bd;
+  algo::temperedlb::WorkModel wm;
+
+  std::mt19937 gen;
+  for (int i=0; i<this->num_seeds; ++i) {
+    gen.seed(rank * 42 + i * 8);
+    model::PhaseData pd(rank);
+    double expected_work = this->setupRandomLoadAndInterCommNoMemProblem(
+      gen, pd, cfg, bd, wm
+    );
+
+    this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work);
+  }
+}
+
+TYPED_TEST(TestWorkModelBasic, computeWorkUpdateNoChangesLoadAndMixedComm) {
+  auto& the_comm = this->comm;
+  auto rank = the_comm.getRank();
+
+  algo::temperedlb::Configuration cfg;
+  algo::temperedlb::WorkBreakdown bd;
+  algo::temperedlb::WorkModel wm;
+
+  std::mt19937 gen;
+  for (int i=0; i<this->num_seeds; ++i) {
+    gen.seed(rank * 11 + i * 9);
+    model::PhaseData pd(rank);
+    double expected_work = this->setupRandomLoadAndMixedCommNoMemProblem(
+      gen, pd, cfg, bd, wm
+    );
+
+    this->verifyNoChangeUpdate(pd, cfg, bd, wm, expected_work);
+  }
+}
+
+}}} // end namespace vt_lb::tests::unit