From 4cc237c9b9940810c4c7921b7e532bb6fbef192c Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Sat, 1 Mar 2025 11:43:19 -0500
Subject: [PATCH 01/73] working code

---
 src/pcms/interpolator/mls_interpolation.cpp   |  52 +++++-
 .../interpolator/mls_interpolation_impl.hpp   | 172 ++++++++++++++----
 .../pcms_interpolator_aliases.hpp             |   2 +-
 test/CMakeLists.txt                           |   3 +-
 test/test_rbf_interp.cpp                      |  24 ++-
 5 files changed, 209 insertions(+), 44 deletions(-)
diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index 113986a1..114aeee0 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -11,12 +11,12 @@ struct RBF_GAUSSIAN
   double operator()(double r_sq, double rho_sq) const
   {
     double phi;
-    OMEGA_H_CHECK_PRINTF(rho_sq > 0,
+    OMEGA_H_CHECK_PRINTF(rho_sq >= 0,
                          "ERROR: square of cutoff distance should always be "
                          "positive but the value is %.16f\n",
                          rho_sq);
 
-    OMEGA_H_CHECK_PRINTF(r_sq > 0,
+    OMEGA_H_CHECK_PRINTF(r_sq >= 0,
                          "ERROR: square of  distance should always be positive "
                          "but the value is %.16f\n",
                          r_sq);
@@ -213,5 +213,53 @@ int calculate_scratch_shared_size(const SupportResults& support,
 
   return shared_size;
 }
+
+Reals min_max_normalization(RealConstDefaultScalarArrayView coordinates,
+                            int dim = 2)
+{
+  int num_points = coordinates.size() / dim;
+
+  int coords_size = coordinates.size();
+
+  Write<Real> x_coordinates(num_points, 0, "x coordinates");
+
+  Write<Real> y_coordinates(num_points, 0, "y coordinates");
+
+  parallel_for(
+    "separates x and y coordinates", num_points, KOKKOS_LAMBDA(const int id) {
+      int index = id * dim;
+
+      x_coordinates[id] = coordinates[index];
+      y_coordinates[id] = coordinates[index + 1];
+    });
+
+  HostRead<Real> host_x(read(x_coordinates));
+  HostRead<Real> host_y(read(y_coordinates));
+
+  const auto min_x = Omega_h::get_min(read(x_coordinates));
+  const auto min_y = Omega_h::get_min(read(y_coordinates));
+
+  const auto max_x = Omega_h::get_max(read(x_coordinates));
+  const auto max_y = Omega_h::get_max(read(y_coordinates));
+
+  printf(" [min_x, max_x] : [%12.6f , %12.6f]\n", min_x, max_x);
+  printf(" [min_y, max_y] : [%12.6f , %12.6f]\n", min_y, max_y);
+  const auto del_x = max_x - min_x;
+  const auto del_y = max_y - min_y;
+
+  printf(" [delx, dely] : [%12.6f , %12.6f]\n", del_x, del_y);
+  Write<Real> normalized_coordinates(coords_size, 0,
+                                     "stores scaled coordinates");
+
+  parallel_for(
+    "scale coordinates", num_points, KOKKOS_LAMBDA(const int id) {
+      int index = id * dim;
+      normalized_coordinates[index] = (x_coordinates[id] - min_x) / del_x;
+      normalized_coordinates[index + 1] = (y_coordinates[id] - min_y) / del_y;
+    });
+
+  return read(normalized_coordinates);
+}
+
 } // namespace detail
 } // namespace pcms
diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 5a79f64a..7bfab251 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -84,7 +84,21 @@ int calculate_basis_vector_size(const IntHostMatView& array);
  * @return The scratch size
  */
 int calculate_scratch_shared_size(const SupportResults& support,
-                                  const int nvertices_target, int basis_size);
+                                  const int ntargets, int basis_size);
+/**
+ *
+ * @brief Performs minmax normalization
+ *
+ * This function takes coordinates and dimension
+ * and outputs the normalized coordinates
+ *
+ * @param coordinates The Coordinates
+ * @param dim The Dimension
+ *
+ * @return normalized coordinates The normlaised coordinates
+ */
+Reals min_max_normalization(RealConstDefaultScalarArrayView coordinates,
+                            int dim);
 
 /**
  * @brief Evaluates the polynomial basis
@@ -131,6 +145,49 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p,
   }
 }
 
+/**
+ *  @Brief Normalizes the support coordinates about the given target point
+ *
+ *  This function takes the pivot point Coord object
+ *  and support coordinates as input and gives normalized coordinates
+ *
+ *  @param[in] team The team member
+ *  @param[in, out] pivot The Coord object that stores the coordinate of the
+ *target pivot
+ *  @param[in,out] support_coordinates The orginal coordinates when in, when out
+ *  		normalized coordinates
+ **
+ */
+KOKKOS_INLINE_FUNCTION
+void normalize_coordinates(member_type team, Coord& pivot,
+                           ScratchMatView& support_coordinates)
+{
+  int nsupports = support_coordinates.extent(0);
+  int dim = support_coordinates.extent(1);
+
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), [=](int i) {
+    Real pivot_point[MAX_DIM];
+
+    pivot_point[0] = pivot.x;
+    pivot_point[1] = pivot.y;
+
+    if (dim == 3) {
+      pivot_point[2] = pivot.z;
+    }
+
+    for (int j = 0; j < dim; ++j) {
+      support_coordinates(i, j) -= pivot_point[j];
+    }
+  });
+
+  pivot.x = 0;
+  pivot.y = 0;
+
+  if (dim == 3) {
+    pivot.z = 0;
+  }
+}
+
 /**
  * @brief Creates a vandermonde matrix
  *
@@ -427,8 +484,26 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
   static_assert(!std::is_pointer_v<Func>,
                 "function pointer will fail in GPU execution context");
 
-  const auto nvertices_source = source_coordinates.size() / dim;
-  const auto nvertices_target = target_coordinates.size() / dim;
+  int nsources = source_coordinates.size() / dim;
+
+  OMEGA_H_CHECK_PRINTF(
+    source_values.size() == nsources,
+    "[ERROR] The size of the source values and source coordinates is not same. "
+    "The current sizes are :\n"
+    "source_values_size = %d, source_coordinates_size = %d\n",
+    source_values.size(), nsources);
+
+  const auto ntargets = target_coordinates.size() / dim;
+
+  OMEGA_H_CHECK_PRINTF(
+    approx_target_values.size() == ntargets,
+    "[ERROR] The size of the approx target values and the number of targets is "
+    "not same. The current numbers are :\n"
+    "approx_target_values = %d, ntargets = %d\n",
+    approx_target_values.size(), ntargets);
+
+  //  auto source_coordinates = min_max_normalization(src_coordinates, dim);
+  //  auto target_coordinates = min_max_normalization(tgt_coordinates, dim);
 
   IntHostMatView host_slice_length(
     "stores slice length of  polynomial basis in host", degree, dim);
@@ -447,9 +522,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
   Kokkos::deep_copy(slice_length, slice_length_hd);
 
   int shared_size =
-    calculate_scratch_shared_size(support, nvertices_target, basis_size);
+    calculate_scratch_shared_size(support, ntargets, basis_size);
 
-  team_policy tp(nvertices_target, Kokkos::AUTO);
+  team_policy tp(ntargets, Kokkos::AUTO);
 
   int scratch_size = tp.scratch_size_max(0);
 
@@ -464,25 +539,16 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       int end_ptr = support.supports_ptr[league_rank + 1];
       int nsupports = end_ptr - start_ptr;
 
+      // local_source_point stores the coordinates of source supports of a given
+      // target
       ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim);
-      int count = -1;
-      for (int j = start_ptr; j < end_ptr; ++j) {
-        count++;
-        auto index = support.supports_idx[j];
-        local_source_points(count, 0) = source_coordinates[index * dim];
-        local_source_points(count, 1) = source_coordinates[index * dim + 1];
-        if (dim == 3) {
-          local_source_points(count, 2) = source_coordinates[index * dim + 2];
-        }
-      }
-
-      //  vondermonde matrix P from the vectors of basis vector of supports
-      ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports,
-                                        basis_size);
 
       // rbf function values of source supports Phi(n,n)
       ScratchVecView phi_vector(team.team_scratch(0), nsupports);
 
+      //  vondermonde matrix P from the vectors of basis vector of supports
+      ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports,
+                                        basis_size);
       // stores known vector (b)
       ScratchVecView support_values(team.team_scratch(0), nsupports);
 
@@ -490,11 +556,24 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       ScratchVecView target_basis_vector(team.team_scratch(0), basis_size);
 
       // Initialize the scratch matrices and  vectors
+      fill(0.0, team, local_source_points);
       fill(0.0, team, vandermonde_matrix);
       fill(0.0, team, phi_vector);
       fill(0.0, team, support_values);
       fill(0.0, team, target_basis_vector);
 
+      // storing the coords of local supports
+      int count = -1;
+      for (int j = start_ptr; j < end_ptr; ++j) {
+        count++;
+        auto index = support.supports_idx[j];
+        local_source_points(count, 0) = source_coordinates[index * dim];
+        local_source_points(count, 1) = source_coordinates[index * dim + 1];
+        if (dim == 3) {
+          local_source_points(count, 2) = source_coordinates[index * dim + 2];
+        }
+      }
+
       // evaluates the basis vector of a given target point
       Coord target_point;
       target_point.x = target_coordinates[league_rank * dim];
@@ -503,6 +582,40 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       if (dim == 3) {
         target_point.z = target_coordinates[league_rank * dim + 2];
       }
+
+      /** phi(nsupports) is the array of rbf functions evaluated at the
+       * source supports In the actual implementation, Phi(nsupports, nsupports)
+       * is the diagonal matrix & each diagonal element is the phi evaluated at
+       * each source points
+       *
+       * step 1: evaluate the phi vector
+       */
+
+      Kokkos::parallel_for(
+        Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
+          compute_phi_vector(target_point, local_source_points, j,
+                             support.radii2[league_rank], rbf_func, phi_vector);
+        });
+
+      team.team_barrier();
+
+      /**
+       *
+       * the local_source_points is of the type ScratchMatView with
+       * coordinates information; row is number of local supports
+       * & column is dim
+       *
+       * step 2: normalize local source supports and target point
+       */
+
+      // normalize_coordinates(team, target_point, local_source_points);
+
+      // team.team_barrier();
+      /**
+       *
+       * this can evaluate monomial basis vector for any degree of polynomial
+       * step 3: call basis vector evaluation function (eval_basis_vector);
+       */
       eval_basis_vector(slice_length, target_point, target_basis_vector);
 
       /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is
@@ -523,20 +636,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         "ERROR: radius2 has to be positive but found to be %.16f\n",
         support.radii2[league_rank]);
 
-      /** phi(nsupports) is the array of rbf functions evaluated at the
-       * source supports In the actual implementation, Phi(nsupports, nsupports)
-       * is the diagonal matrix & each diagonal element is the phi evaluated at
-       * each source points
-       */
-
-      Kokkos::parallel_for(
-        Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
-          compute_phi_vector(target_point, local_source_points, j,
-                             support.radii2[league_rank], rbf_func, phi_vector);
-        });
-
-      team.team_barrier();
-
       /** support_values(nsupports) (or known rhs vector b) is the vector of the
        * quantity that we want interpolate
        */
@@ -595,9 +694,9 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const SupportResults& support, const LO& dim,
                               const LO& degree, Func rbf_func)
 {
-  const auto nvertices_source = source_coordinates.size() / dim;
+  const auto nsources = source_coordinates.size() / dim;
 
-  const auto nvertices_target = target_coordinates.size() / dim;
+  const auto ntargets = target_coordinates.size() / dim;
 
   RealConstDefaultScalarArrayView source_values_array_view(
     source_values.data(), source_values.size());
@@ -611,8 +710,7 @@ Write<Real> mls_interpolation(const Reals source_values,
   RealDefaultScalarArrayView radii2_array_view(support.radii2.data(),
                                                support.radii2.size());
 
-  Write<Real> interpolated_values(nvertices_target, 0,
-                                  "approximated target values");
+  Write<Real> interpolated_values(ntargets, 0, "approximated target values");
 
   RealDefaultScalarArrayView interpolated_values_array_view(
     interpolated_values.data(), interpolated_values.size());
diff --git a/src/pcms/interpolator/pcms_interpolator_aliases.hpp b/src/pcms/interpolator/pcms_interpolator_aliases.hpp
index 06d83640..76b1bcd6 100644
--- a/src/pcms/interpolator/pcms_interpolator_aliases.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_aliases.hpp
@@ -3,7 +3,7 @@
 
 #include <Kokkos_Core.hpp>
 #include "../arrays.h"
-
+#include <Omega_h_array_ops.hpp>
 namespace pcms
 {
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index a23262b2..c06cd337 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -111,6 +111,7 @@ if(Catch2_FOUND)
 	      test_mls_basis.cpp
 	      test_rbf_interp.cpp
 	      test_linear_solver.cpp
+		  test_normalisation.cpp
               )
   endif ()
   add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES})
@@ -159,4 +160,4 @@ if(PCMS_ENABLE_Fortran)
 endif()
 
 add_executable(xgc_n0_server xgc_n0_coupling_server.cpp)
-target_link_libraries(xgc_n0_server PUBLIC pcms::core test_support)
\ No newline at end of file
+target_link_libraries(xgc_n0_server PUBLIC pcms::core test_support)
diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp
index 31413597..5040fa5e 100644
--- a/test/test_rbf_interp.cpp
+++ b/test/test_rbf_interp.cpp
@@ -63,7 +63,6 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports,
 
     int m = exact_target_values.size();
     int n = approx_target_values.size();
-
     REQUIRE(m == n);
 
     for (size_t i = 0; i < m; ++i) {
@@ -74,7 +73,7 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports,
   }
 }
 // Test cases for centroid to node mapping using MLS
-TEST_CASE("mls_interp_test")
+TEST_CASE("test_mls_interpolation")
 {
 
   auto lib = Library{};
@@ -131,7 +130,7 @@ TEST_CASE("mls_interp_test")
 
   SECTION("test interpolation degree 1, function degree 0")
   {
-
+    std::cout << "-------starting test: d0p1------------" << "\n";
     int degree = 1;
     LO min_num_supports = 10;
 
@@ -152,11 +151,15 @@ TEST_CASE("mls_interp_test")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+
+    std::cout << " The test for d=0, p=1, passed " << "\n";
+    std::cout << "----------finishing  test: d0p1-------" << "\n";
   }
 
   SECTION("test interpolation degree 1, function degree 1")
   {
 
+    std::cout << "--------starting test: d1p1---------" << "\n";
     int degree = 1;
     LO min_num_supports = 10;
 
@@ -177,11 +180,13 @@ TEST_CASE("mls_interp_test")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+    std::cout << "------------finishing test: d1p1--------" << "\n";
   }
 
   SECTION("test interpo degree 2 poly degree 0")
   {
 
+    std::cout << "-------------starting test: d0p2------------" << "\n";
     int degree = 2;
     LO min_num_supports = 16;
 
@@ -202,11 +207,13 @@ TEST_CASE("mls_interp_test")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+    std::cout << "-------------finishing test: d0p2------------" << "\n";
   }
 
   SECTION("test interpolation degree 2 poly degree 1")
   {
 
+    std::cout << "----------------satrting test: d1p2--------------" << "\n";
     int degree = 2;
     LO min_num_supports = 16;
 
@@ -227,11 +234,13 @@ TEST_CASE("mls_interp_test")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+    std::cout << "----------------finishing test: d1p2--------------" << "\n";
   }
 
   SECTION("test interpolation degree 2, function degree 2")
   {
 
+    std::cout << "-------------starting test: d2p2--------------------" << "\n";
     int degree = 2;
     LO min_num_supports = 16;
 
@@ -252,11 +261,14 @@ TEST_CASE("mls_interp_test")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+    std::cout << "-------------finishing test: d2p2--------------------"
+              << "\n";
   }
 
   SECTION("test interpolation degree 3, function degree 2")
   {
 
+    std::cout << "-------------starting test: d2p3--------------------" << "\n";
     int degree = 3;
     LO min_num_supports = 20;
 
@@ -277,11 +289,14 @@ TEST_CASE("mls_interp_test")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+    std::cout << "-------------finishing test: d2p3--------------------"
+              << "\n";
   }
 
   SECTION("test interpolation degree 3, function degree 3")
   {
 
+    std::cout << "-------------starting test: d3p3--------------------" << "\n";
     int degree = 3;
     LO min_num_supports = 20;
 
@@ -302,5 +317,8 @@ TEST_CASE("mls_interp_test")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+
+    std::cout << "-------------finishing test: d3p3--------------------"
+              << "\n";
   }
 }

From d5b2cc0d6c5398a203847da3df70b0ec15d32cba Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Sat, 1 Mar 2025 11:44:38 -0500
Subject: [PATCH 02/73] add normalization routine

---
 test/test_normalisation.cpp | 202 ++++++++++++++++++++++++++++++++++++
 1 file changed, 202 insertions(+)
 create mode 100644 test/test_normalisation.cpp

diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp
new file mode 100644
index 00000000..41d8a442
--- /dev/null
+++ b/test/test_normalisation.cpp
@@ -0,0 +1,202 @@
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/catch_approx.hpp>
+#include <pcms/interpolator/mls_interpolation.hpp>
+#include <pcms/interpolator/pcms_interpolator_aliases.hpp>
+#include <Omega_h_mesh.hpp>
+#include <Omega_h_build.hpp>
+#include <Omega_h_file.hpp>
+#include <Omega_h_library.hpp>
+#include <catch2/matchers/catch_matchers_floating_point.hpp>
+#include <vector>
+#include <iostream>
+#include <Omega_h_array_ops.hpp>
+using namespace std;
+using namespace Omega_h;
+using namespace pcms;
+
+Reals fillFromMatrix(const Matrix<2, 10>& matrix)
+{
+  int num_points = 10;
+  Write<Real> array(20, 0, "array to store coordinates");
+  parallel_for(
+    "fills the array view from matrix", num_points, KOKKOS_LAMBDA(const int i) {
+      int index = i * 2;
+
+      array[index] = matrix[i][0];
+      array[index + 1] = matrix[i][1];
+    });
+  return read(array);
+}
+
+TEST_CASE("test_normalization_routine")
+{
+
+  SECTION("check min max omega_h function")
+  {
+    Reals vector({0.45, 0.12, -2.04, 1.52, -0.22, 0.96, 5.6});
+
+    const auto min_value = get_min(vector);
+    const auto max_value = get_max(vector);
+
+    printf("min value : %f , max_value : %f", min_value, max_value);
+    REQUIRE(min_value == -2.04);
+    REQUIRE(max_value == 5.6);
+
+  } // end SECTION
+
+  SECTION("test_minmax_normalization")
+  {
+
+    printf("-------------the test for min_max starts-----------\n");
+
+    Real tolerance = 1E-5;
+    int num_points = 10;
+
+    Omega_h::Matrix<2, 10> matrix{{0.45, 3.18},
+                                  {0.12, -1.2},
+                                  {-2.04, -1.45},
+                                  {
+                                    1.52,
+                                    -0.98,
+                                  },
+                                  {-0.22, 1.45},
+                                  {0.96, 3.62},
+                                  {-0.26, -1.62},
+                                  {0.82, 4.22},
+                                  {5.6, 3.62},
+                                  {1.2, 1.2}};
+
+    auto coordinates = fillFromMatrix(matrix);
+
+    RealConstDefaultScalarArrayView coordinates_scalar_array_view(
+      coordinates.data(), 20);
+
+    auto results = min_max_normalization(coordinates_scalar_array_view, 2);
+
+    printf("results after normalization");
+    HostRead<Real> host_results(results);
+
+    double expected_results[10][2] = {
+      {0.325916, 0.821918}, {0.282723, 0.071918}, {0.000000, 0.029110},
+      {0.465969, 0.109589}, {0.238220, 0.525685}, {0.392670, 0.897260},
+      {0.232984, 0.000000}, {0.374346, 1.000000}, {1.000000, 0.897260},
+      {0.424084, 0.482877}};
+
+    printf("<scaled coordinates>\n");
+
+    for (int i = 0; i < 20; ++i) {
+      printf("%12.6f\n", host_results[i]);
+    }
+
+    for (int i = 0; i < 10; ++i) {
+
+      int index = i * 2;
+
+      CAPTURE(i, host_results[index], expected_results[i][0], tolerance);
+      CHECK_THAT(host_results[index],
+                 Catch::Matchers::WithinAbs(expected_results[i][0], tolerance));
+
+      CAPTURE(i, host_results[index + 1], expected_results[i][1], tolerance);
+      CHECK_THAT(host_results[index + 1],
+                 Catch::Matchers::WithinAbs(expected_results[i][1], tolerance));
+    }
+
+  } // end SECTION
+
+  SECTION("test_normalization_relative_distance")
+  {
+    Real tolerance = 1E-5;
+    int nvertices_target = 3;
+
+    Omega_h::Matrix<3, 3> target_coordinates{
+      {0.5, 2.0, 1.4}, {0.1, 1.2, 0.3}, {0.9, 2.8, 2.2}};
+
+    Omega_h::Matrix<3, 6> supports_target_1{
+      {0.45, 3.18, 0.38}, {0.12, 1.2, 1.0},  {2.04, 1.45, 3.2},
+      {1.52, 0.98, 3.2},  {0.22, 1.45, 3.2}, {0.96, 3.62, 2.2}};
+
+    Omega_h::Matrix<3, 8> supports_target_2{
+      {0.88, 0.98, -0.8}, {1.2, 0.23, -1.21}, {-1.23, 2.85, 1.45},
+      {0.63, 0.98, 0.2},  {-0.5, 1.45, 0.33}, {1.4, 2.61, -0.98},
+      {0.88, 1.72, 0.38}, {1.2, 0.50, 2.1}};
+
+    Omega_h::Matrix<3, 4> supports_target_3{{3.15, 3.12, 2.91},
+                                            {2.24, 1.12, 1.12},
+                                            {0.12, 4.29, 1.98},
+                                            {1.24, 1.54, 4.2}};
+
+    LOs nsupports({6, 8, 4}, "number of supports");
+    int dim = 3;
+
+    auto total_coordinates = Omega_h::get_sum(nsupports) * dim;
+
+    REQUIRE(total_coordinates == 54);
+    Write<Real> normalized_support_coordinates(
+      total_coordinates, 0, "coordinates after normalization");
+    Write<Real> all_support_coordinates(total_coordinates, 0,
+                                        "support coordinates all");
+    Write<Real> all_target_coordinates(total_coordinates, 0,
+                                       "target coordinates all");
+
+    team_policy tp(nvertices_target, Kokkos::AUTO);
+    Kokkos::parallel_for(
+      "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)),
+      KOKKOS_LAMBDA(const member_type& team) {
+        int league_rank = team.league_rank();
+
+        int num_supports = nsupports[league_rank];
+        ScratchMatView local_supports(team.team_scratch(0), num_supports, dim);
+        detail::fill(0, team, local_supports);
+
+        Coord target_point;
+        target_point.x = target_coordinates[league_rank][0];
+        target_point.y = target_coordinates[league_rank][1];
+        target_point.z = target_coordinates[league_rank][2];
+
+        Kokkos::parallel_for(
+          Kokkos::TeamThreadRange(team, num_supports), [=](int i) {
+            for (int j = 0; j < dim; ++j) {
+              int index = league_rank * num_supports * dim + i * dim + j;
+
+              all_target_coordinates[index] =
+                target_coordinates[league_rank][j];
+              if (league_rank == 0) {
+                local_supports(i, j) = supports_target_1[i][j];
+                all_support_coordinates[index] = supports_target_1[i][j];
+              } else if (league_rank == 1) {
+                local_supports(i, j) = supports_target_2[i][j];
+                all_support_coordinates[index] = supports_target_2[i][j];
+              } else {
+                local_supports(i, j) = supports_target_3[i][j];
+                all_support_coordinates[index] = supports_target_3[i][j];
+              }
+            }
+          });
+
+        team.team_barrier();
+        detail::normalize_coordinates(team, target_point, local_supports);
+
+        Kokkos::parallel_for(
+          Kokkos::TeamThreadRange(team, num_supports), [=](int i) {
+            for (int j = 0; j < dim; ++j) {
+              int index = league_rank * num_supports * dim + i * dim + j;
+              normalized_support_coordinates[index] = local_supports(i, j);
+            }
+          });
+      });
+
+    auto host_all_support_coordinates =
+      HostRead<Real>(read(all_support_coordinates));
+    auto host_normalized_support_coordinates =
+      HostRead<Real>(read(normalized_support_coordinates));
+    auto host_all_target_coordinates =
+      HostRead<Real>(read(all_target_coordinates));
+    for (int i = 0; i < total_coordinates; ++i) {
+      auto result =
+        host_all_support_coordinates[i] - host_all_target_coordinates[i];
+      CHECK_THAT(host_normalized_support_coordinates[i],
+                 Catch::Matchers::WithinAbs(result, tolerance));
+    }
+  } // end SECTION
+
+} // end TESTCASE

From c4b06bc899281bf7175cd3ad59b796be87d713ce Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Sun, 2 Mar 2025 08:30:57 -0500
Subject: [PATCH 03/73] add min_max normalization routine

---
 src/pcms/interpolator/mls_interpolation.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index 114aeee0..6596a26e 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -214,8 +214,7 @@ int calculate_scratch_shared_size(const SupportResults& support,
   return shared_size;
 }
 
-Reals min_max_normalization(RealConstDefaultScalarArrayView coordinates,
-                            int dim = 2)
+Reals min_max_normalization(Reals& coordinates, int dim = 2)
 {
   int num_points = coordinates.size() / dim;
 

From 0473cbf49ffc04597c8f27c0905bb86e36614224 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Sun, 2 Mar 2025 08:31:59 -0500
Subject: [PATCH 04/73] add relative distance normalization

---
 .../interpolator/mls_interpolation_impl.hpp   | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 7bfab251..2986d4e9 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -97,8 +97,7 @@ int calculate_scratch_shared_size(const SupportResults& support,
  *
  * @return normalized coordinates The normlaised coordinates
  */
-Reals min_max_normalization(RealConstDefaultScalarArrayView coordinates,
-                            int dim);
+Reals min_max_normalization(Reals& coordinates, int dim);
 
 /**
  * @brief Evaluates the polynomial basis
@@ -159,8 +158,8 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p,
  **
  */
 KOKKOS_INLINE_FUNCTION
-void normalize_coordinates(member_type team, Coord& pivot,
-                           ScratchMatView& support_coordinates)
+void normalize_supports(member_type team, Coord& pivot,
+                        ScratchMatView& support_coordinates)
 {
   int nsupports = support_coordinates.extent(0);
   int dim = support_coordinates.extent(1);
@@ -588,7 +587,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        * is the diagonal matrix & each diagonal element is the phi evaluated at
        * each source points
        *
-       * step 1: evaluate the phi vector
+       * step 1: evaluate the phi vector with the original dimension
        */
 
       Kokkos::parallel_for(
@@ -608,9 +607,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        * step 2: normalize local source supports and target point
        */
 
-      // normalize_coordinates(team, target_point, local_source_points);
+      normalize_supports(team, target_point, local_source_points);
 
-      // team.team_barrier();
+      team.team_barrier();
       /**
        *
        * this can evaluate monomial basis vector for any degree of polynomial
@@ -621,8 +620,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is
        * created with the basis vector of source supports stacking on top of
        * each other
+       *
+       * step 4: create vandermonde matrix
        */
-
       Kokkos::parallel_for(
         Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
           create_vandermonde_matrix(local_source_points, j, slice_length,
@@ -638,6 +638,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       /** support_values(nsupports) (or known rhs vector b) is the vector of the
        * quantity that we want interpolate
+       *
+       *
+       * step 4: find local supports function values
        */
       Kokkos::parallel_for(
         Kokkos::TeamThreadRange(team, nsupports), [=](const int j) {
@@ -649,6 +652,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       team.team_barrier();
 
+      // convert normal equation to Ax = b where A is a square matrix
       auto result = convert_normal_equation(vandermonde_matrix, phi_vector,
                                             support_values, team);
 

From 3b951b5c41c0fd81a3c5783fb5972fff59b77e52 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Sun, 2 Mar 2025 08:32:37 -0500
Subject: [PATCH 05/73] add test cases for normalization routine

---
 test/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index c06cd337..fc6ea135 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -112,6 +112,7 @@ if(Catch2_FOUND)
 	      test_rbf_interp.cpp
 	      test_linear_solver.cpp
 		  test_normalisation.cpp
+		  test_spr_meshfields.cpp
               )
   endif ()
   add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES})

From 87ca3a190c2321cb4788d2afee5fac5c976f37db Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Sun, 2 Mar 2025 08:33:26 -0500
Subject: [PATCH 06/73] add test for normalization

---
 test/test_normalisation.cpp | 69 +++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 34 deletions(-)

diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp
index 41d8a442..31e5727a 100644
--- a/test/test_normalisation.cpp
+++ b/test/test_normalisation.cpp
@@ -31,19 +31,6 @@ Reals fillFromMatrix(const Matrix<2, 10>& matrix)
 TEST_CASE("test_normalization_routine")
 {
 
-  SECTION("check min max omega_h function")
-  {
-    Reals vector({0.45, 0.12, -2.04, 1.52, -0.22, 0.96, 5.6});
-
-    const auto min_value = get_min(vector);
-    const auto max_value = get_max(vector);
-
-    printf("min value : %f , max_value : %f", min_value, max_value);
-    REQUIRE(min_value == -2.04);
-    REQUIRE(max_value == 5.6);
-
-  } // end SECTION
-
   SECTION("test_minmax_normalization")
   {
 
@@ -52,26 +39,14 @@ TEST_CASE("test_normalization_routine")
     Real tolerance = 1E-5;
     int num_points = 10;
 
-    Omega_h::Matrix<2, 10> matrix{{0.45, 3.18},
-                                  {0.12, -1.2},
-                                  {-2.04, -1.45},
-                                  {
-                                    1.52,
-                                    -0.98,
-                                  },
-                                  {-0.22, 1.45},
-                                  {0.96, 3.62},
-                                  {-0.26, -1.62},
-                                  {0.82, 4.22},
-                                  {5.6, 3.62},
+    Omega_h::Matrix<2, 10> matrix{{0.45, 3.18},   {0.12, -1.2},  {-2.04, -1.45},
+                                  {1.52, -0.98},  {-0.22, 1.45}, {0.96, 3.62},
+                                  {-0.26, -1.62}, {0.82, 4.22},  {5.6, 3.62},
                                   {1.2, 1.2}};
 
     auto coordinates = fillFromMatrix(matrix);
 
-    RealConstDefaultScalarArrayView coordinates_scalar_array_view(
-      coordinates.data(), 20);
-
-    auto results = min_max_normalization(coordinates_scalar_array_view, 2);
+    auto results = pcms::detail::min_max_normalization(coordinates, 2);
 
     printf("results after normalization");
     HostRead<Real> host_results(results);
@@ -99,6 +74,8 @@ TEST_CASE("test_normalization_routine")
       CAPTURE(i, host_results[index + 1], expected_results[i][1], tolerance);
       CHECK_THAT(host_results[index + 1],
                  Catch::Matchers::WithinAbs(expected_results[i][1], tolerance));
+
+      printf("-------------the test for min_max starts-----------\n");
     }
 
   } // end SECTION
@@ -133,6 +110,10 @@ TEST_CASE("test_normalization_routine")
     REQUIRE(total_coordinates == 54);
     Write<Real> normalized_support_coordinates(
       total_coordinates, 0, "coordinates after normalization");
+
+    Write<Real> normalized_target_coordinates(
+      total_coordinates, 0, "coordinates after normalization");
+
     Write<Real> all_support_coordinates(total_coordinates, 0,
                                         "support coordinates all");
     Write<Real> all_target_coordinates(total_coordinates, 0,
@@ -174,28 +155,48 @@ TEST_CASE("test_normalization_routine")
           });
 
         team.team_barrier();
-        detail::normalize_coordinates(team, target_point, local_supports);
+        detail::normalize_supports(team, target_point, local_supports);
 
         Kokkos::parallel_for(
           Kokkos::TeamThreadRange(team, num_supports), [=](int i) {
             for (int j = 0; j < dim; ++j) {
               int index = league_rank * num_supports * dim + i * dim + j;
               normalized_support_coordinates[index] = local_supports(i, j);
+              if (j == 0) {
+                normalized_target_coordinates[index] = target_point.x;
+              } else if (j == 1) {
+                normalized_target_coordinates[index] = target_point.y;
+              } else {
+                normalized_target_coordinates[index] = target_point.z;
+              }
             }
           });
       });
 
     auto host_all_support_coordinates =
       HostRead<Real>(read(all_support_coordinates));
-    auto host_normalized_support_coordinates =
-      HostRead<Real>(read(normalized_support_coordinates));
+
     auto host_all_target_coordinates =
       HostRead<Real>(read(all_target_coordinates));
+
+    auto host_normalized_support_coordinates =
+      HostRead<Real>(read(normalized_support_coordinates));
+
+    auto host_normalized_target_coordinates =
+      HostRead<Real>(read(normalized_target_coordinates));
+
     for (int i = 0; i < total_coordinates; ++i) {
-      auto result =
+      auto result_support =
         host_all_support_coordinates[i] - host_all_target_coordinates[i];
+
+      auto result_target =
+        host_all_target_coordinates[i] - host_all_target_coordinates[i];
+
       CHECK_THAT(host_normalized_support_coordinates[i],
-                 Catch::Matchers::WithinAbs(result, tolerance));
+                 Catch::Matchers::WithinAbs(result_support, tolerance));
+
+      CHECK_THAT(host_normalized_target_coordinates[i],
+                 Catch::Matchers::WithinAbs(result_target, tolerance));
     }
   } // end SECTION
 

From c527ccf19226ea0b2d406bf9db2944f93efbda7e Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Sun, 2 Mar 2025 08:34:37 -0500
Subject: [PATCH 07/73] update file

---
 test/test_rbf_interp.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp
index 5040fa5e..1a92780a 100644
--- a/test/test_rbf_interp.cpp
+++ b/test/test_rbf_interp.cpp
@@ -66,6 +66,8 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports,
     REQUIRE(m == n);
 
     for (size_t i = 0; i < m; ++i) {
+      CAPTURE(i, host_exact_target_values[i], host_approx_target_values[i],
+              tolerance);
       CHECK_THAT(
         host_exact_target_values[i],
         Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance));
@@ -180,6 +182,8 @@ TEST_CASE("test_mls_interpolation")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+
+    std::cout << " The test for d=1, p=1, passed " << "\n";
     std::cout << "------------finishing test: d1p1--------" << "\n";
   }
 
@@ -207,6 +211,8 @@ TEST_CASE("test_mls_interpolation")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+
+    std::cout << " The test for d=0, p=2, passed " << "\n";
     std::cout << "-------------finishing test: d0p2------------" << "\n";
   }
 
@@ -234,6 +240,8 @@ TEST_CASE("test_mls_interpolation")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+
+    std::cout << " The test for d=1, p=2, passed " << "\n";
     std::cout << "----------------finishing test: d1p2--------------" << "\n";
   }
 
@@ -261,6 +269,7 @@ TEST_CASE("test_mls_interpolation")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+    std::cout << " The test for d=2, p=2, passed " << "\n";
     std::cout << "-------------finishing test: d2p2--------------------"
               << "\n";
   }
@@ -289,6 +298,7 @@ TEST_CASE("test_mls_interpolation")
     test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
+    std::cout << " The test for d=2, p=3, passed " << "\n";
     std::cout << "-------------finishing test: d2p3--------------------"
               << "\n";
   }
@@ -318,6 +328,7 @@ TEST_CASE("test_mls_interpolation")
          Reals(exact_target_values), Reals(source_coordinates),
          Reals(target_coordinates));
 
+    std::cout << " The test for d=3, p=3, passed " << "\n";
     std::cout << "-------------finishing test: d3p3--------------------"
               << "\n";
   }

From 2e3cee71a51b0203a93ba4eac69a7ef951dd9590 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Sun, 2 Mar 2025 08:35:49 -0500
Subject: [PATCH 08/73] add file for spr test

---
 test/test_spr_meshfields.cpp | 215 +++++++++++++++++++++++++++++++++++
 1 file changed, 215 insertions(+)
 create mode 100644 test/test_spr_meshfields.cpp

diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp
new file mode 100644
index 00000000..c8757a36
--- /dev/null
+++ b/test/test_spr_meshfields.cpp
@@ -0,0 +1,215 @@
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/catch_approx.hpp>
+#include <pcms/interpolator/adj_search.hpp>
+#include <pcms/interpolator/mls_interpolation.hpp>
+#include <pcms/interpolator/pcms_interpolator_aliases.hpp>
+#include <Omega_h_mesh.hpp>
+#include <Omega_h_build.hpp>
+#include <Omega_h_file.hpp>
+#include <Omega_h_library.hpp>
+#include <Omega_h_array_ops.hpp>
+#include <catch2/matchers/catch_matchers_floating_point.hpp>
+#include <vector>
+#include <iostream>
+
+using namespace std;
+using namespace Omega_h;
+
+namespace
+{
+
+void print_patches(Omega_h::Graph& patches)
+{
+  auto offsets = HostRead(patches.a2ab);
+  auto values = HostRead(patches.ab2b);
+  std::cout << "num patches " << patches.nnodes() << "\n";
+  for (int patch = 0; patch < patches.nnodes(); patch++) {
+    std::cout << "patch " << patch << " patchElms ";
+    for (auto valIdx = offsets[patch]; valIdx < offsets[patch + 1]; ++valIdx) {
+      auto patchElm = values[valIdx];
+      std::cout << patchElm << " ";
+    }
+    std::cout << "\n";
+  }
+}
+
+std::string getTestName(size_t interp_degree, size_t func_degree)
+{
+  return "test interpolation degree " + std::to_string(interp_degree) +
+         ", polynomial degree " + std::to_string(func_degree);
+}
+
+KOKKOS_INLINE_FUNCTION
+double func(pcms::Coord& p, int degree)
+{
+  [[maybe_unused]] auto x = p.x;
+  [[maybe_unused]] auto y = p.y;
+  if (degree == 0) {
+    return 3;
+  } else if (degree == 1) {
+    return x + y;
+  } else if (degree == 2) {
+    return pow(x, 2) + pow(y, 2);
+  } else if (degree == 3) {
+
+    return pow(x, 3) + pow(y, 3);
+  } else {
+    printf("No polynomials with degree = %d\n", degree);
+  }
+  return -1;
+}
+
+void test(Mesh& mesh, Omega_h::Graph& patches, int degree, Reals source_values,
+          Reals exact_target_values, Reals source_coordinates,
+          Reals target_coordinates)
+{
+
+  int dim = mesh.dim();
+  Real tolerance = 1e-3;
+
+  Omega_h::Write<Real> ignored(patches.a2ab.size(), 1);
+  SupportResults support{patches.a2ab, patches.ab2b, ignored};
+
+  auto approx_target_values = pcms::mls_interpolation(
+    source_values, source_coordinates, target_coordinates, support, dim, degree,
+    pcms::RadialBasisFunction::RBF_CONST);
+
+  const auto delta_abs = Omega_h::fabs_each(
+    Omega_h::subtract_each(exact_target_values, read(approx_target_values)));
+  const auto max_delta_abs = Omega_h::get_max(delta_abs);
+  std::cout << "max_delta_abs " << max_delta_abs << "\n";
+
+  auto host_approx_target_values = HostRead<Real>(approx_target_values);
+  mesh.add_tag(OMEGA_H_VERT, "approx_target_values", 1,
+               read(approx_target_values));
+  Omega_h::vtk::write_parallel("box.vtk", &mesh);
+
+  auto host_exact_target_values = HostRead<Real>(exact_target_values);
+
+  int m = exact_target_values.size();
+  int n = approx_target_values.size();
+
+  REQUIRE(m == n);
+
+  for (size_t i = 0; i < m; ++i) {
+    //   std::cout << "vtx " << i << "\n";
+    //    std::cout << "vtx " << i << "\n";
+    CHECK_THAT(
+      host_exact_target_values[i],
+      Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance));
+  }
+}
+
+} // end anonymous namespace
+
+// Test cases for centroid to node mapping using MLS
+TEST_CASE("meshfields_spr_test")
+{
+
+  auto lib = Library{};
+  auto world = lib.world();
+  auto rank = lib.world()->rank();
+  const auto boxSize = 1.0;
+  const auto nElms = 6;
+  // auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms,
+  // nElms, 0, false);
+  const auto thwaitesMeshFile =
+    "/lore/smithc11/projects/landice/thwaites_basal/thwaites_basalClass.osh";
+  Omega_h::Mesh mesh(&lib);
+  Omega_h::binary::read(thwaitesMeshFile, lib.world(), &mesh);
+  // auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms,
+  // nElms, 0, false);
+  std::cout << "mesh: elms " << mesh.nelems() << " verts " << mesh.nverts()
+            << "\n";
+
+  const auto dim = mesh.dim();
+
+  // const auto& target_coordinates = mesh.coords();
+  const auto& target_coordinates = mesh.coords();
+
+  const auto& nfaces = mesh.nfaces();
+
+  const auto& ntargets = mesh.nverts();
+
+  Write<Real> source_coordinates(
+    dim * nfaces, 0, "stores coordinates of cell centroid of each tri element");
+
+  const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b;
+
+  Kokkos::parallel_for(
+    "calculate the centroid in each tri element", nfaces,
+    OMEGA_H_LAMBDA(const LO id) {
+      const auto current_el_verts = gather_verts<3>(faces2nodes, id);
+      const Omega_h::Few<Omega_h::Vector<2>, 3> current_el_vert_coords =
+        gather_vectors<3, 2>(target_coordinates, current_el_verts);
+      auto centroid = average(current_el_vert_coords);
+      int index = 2 * id;
+      source_coordinates[index] = centroid[0];
+      source_coordinates[index + 1] = centroid[1];
+    });
+
+  pcms::Points source_points;
+
+  source_points.coordinates =
+    pcms::PointsViewType("Number of local source supports", nfaces);
+  Kokkos::parallel_for(
+    "target points", nfaces, KOKKOS_LAMBDA(int j) {
+      source_points.coordinates(j).x = source_coordinates[j * dim];
+      source_points.coordinates(j).y = source_coordinates[j * dim + 1];
+    });
+
+  pcms::Points target_points;
+
+  target_points.coordinates =
+    pcms::PointsViewType("Number of local source supports", mesh.nverts());
+  Kokkos::parallel_for(
+    "target points", mesh.nverts(), KOKKOS_LAMBDA(int j) {
+      target_points.coordinates(j).x = target_coordinates[j * dim];
+      target_points.coordinates(j).y = target_coordinates[j * dim + 1];
+    });
+
+  // Define tests
+  for (size_t interp_degree = 1; interp_degree <= 3; interp_degree++) {
+    for (int func_degree = interp_degree; func_degree >= 0; func_degree--) {
+      SECTION(getTestName(interp_degree, func_degree).c_str())
+      {
+        std::cerr << "start " << interp_degree << ", " << func_degree << " \n";
+        int minPatchSize;
+        if (interp_degree == 1)
+          minPatchSize = 3;
+        if (interp_degree == 2)
+          minPatchSize = 8; // why so large?
+        if (interp_degree == 3)
+          minPatchSize = 10; // why so large?
+        std::cerr << "minPatchSize " << minPatchSize << "\n";
+        auto patches = mesh.get_vtx_patches(minPatchSize);
+        // print_patches(patches);
+        // print_patches(patches);
+
+        Write<Real> source_values(nfaces, 0, "exact target values");
+
+        Kokkos::parallel_for(
+          nfaces, KOKKOS_LAMBDA(int i) {
+            source_values[i] = func(source_points.coordinates(i), func_degree);
+          });
+        mesh.add_tag(mesh.dim(), "source_values", 1, read(source_values));
+
+        Write<Real> exact_target_values(mesh.nverts(), 0,
+                                        "exact target values");
+
+        Kokkos::parallel_for(
+          mesh.nverts(), KOKKOS_LAMBDA(int i) {
+            exact_target_values[i] =
+              func(target_points.coordinates(i), func_degree);
+          });
+        mesh.add_tag(OMEGA_H_VERT, "target_values", 1,
+                     read(exact_target_values));
+
+        test(mesh, patches, interp_degree, Reals(source_values),
+             Reals(exact_target_values), Reals(source_coordinates),
+             Reals(target_coordinates));
+        std::cerr << "done " << interp_degree << ", " << func_degree << " \n";
+      } // end SECTION
+    }
+  }
+}

From 5557207cccdf64b24a7f6944113e5cc12cbf7834 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 3 Mar 2025 16:34:37 -0500
Subject: [PATCH 09/73] add test for qr serial

---
 test/test_serial_qr.cpp | 147 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 147 insertions(+)
 create mode 100644 test/test_serial_qr.cpp

diff --git a/test/test_serial_qr.cpp b/test/test_serial_qr.cpp
new file mode 100644
index 00000000..9635bb7a
--- /dev/null
+++ b/test/test_serial_qr.cpp
@@ -0,0 +1,147 @@
+#include <KokkosBatched_ApplyQ_Decl.hpp> //KokkosBlas::ApplyQ
+#include <KokkosBatched_QR_Decl.hpp>     //KokkosBlas::QR
+#include <KokkosBatched_Trsv_Decl.hpp>   //KokkosBlas::Trsv
+#include <KokkosBatched_Util.hpp>        //KokkosBlas::Algo, KokkosBatched::Diag
+#include <KokkosBlas2_gemv.hpp>          //KokkosBlas::gemv
+#include <Kokkos_Core.hpp>
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/catch_approx.hpp>
+#include <pcms/interpolator/pcms_interpolator_aliases.hpp>
+#include <catch2/matchers/catch_matchers_floating_point.hpp>
+#include <vector>
+#include <iostream>
+
+TEST_CASE("test_qr_serial")
+{
+  /* here is a test case run with Octave */
+  static double const a_data[16][10] = {
+    {1.0000000e+00, -4.9112749e-02, -1.5629814e+00, -8.2662407e-02,
+     7.6762316e-02, 1.2919981e-01, 4.0597781e-03, 2.4120621e-03, 2.4429110e+00,
+     6.8330735e-03},
+    {1.0000000e+00, -7.4718700e-01, 1.1447982e+00, -6.1608208e-01,
+     -8.5537834e-01, -7.0528966e-01, 4.6032852e-01, 5.5828842e-01,
+     1.3105629e+00, 3.7955713e-01},
+    {1.0000000e+00, -4.8564839e-01, -7.2143765e-01, -5.3574860e-02,
+     3.5036503e-01, 3.8650921e-02, 2.6018545e-02, 2.3585436e-01, 5.2047228e-01,
+     2.8702657e-03},
+    {1.0000000e+00, -8.1013248e-01, -1.0234062e+00, 3.4345012e-01,
+     8.2909460e-01, -3.5148898e-01, -2.7824010e-01, 6.5631463e-01,
+     1.0473602e+00, 1.1795799e-01},
+    {1.0000000e+00, -1.0508609e+00, -8.6973926e-01, 1.4570417e+00,
+     9.1397495e-01, -1.2672464e+00, -1.5311481e+00, 1.1043086e+00,
+     7.5644638e-01, 2.1229706e+00},
+    {1.0000000e+00, -1.7802012e-01, 1.8697947e+00, 8.3576559e-01,
+     -3.3286107e-01, 1.5627100e+00, -1.4878309e-01, 3.1691163e-02,
+     3.4961320e+00, 6.9850413e-01},
+    {1.0000000e+00, -6.4594368e-01, -5.5999878e-01, 2.5848452e+00,
+     3.6172768e-01, -1.4475102e+00, -1.6696645e+00, 4.1724324e-01,
+     3.1359864e-01, 6.6814250e+00},
+    {1.0000000e+00, -3.0007589e-01, -5.6481843e-01, -3.0670467e-01,
+     1.6948839e-01, 1.7323245e-01, 9.2034678e-02, 9.0045542e-02, 3.1901985e-01,
+     9.4067754e-02},
+    {1.0000000e+00, 1.8896909e-01, 2.7899549e-01, -2.5907897e-01, 5.2721524e-02,
+     -7.2281865e-02, -4.8957918e-02, 3.5709317e-02, 7.7838484e-02,
+     6.7121914e-02},
+    {1.0000000e+00, 1.8586762e+00, -4.5760801e-01, -9.5892373e-02,
+     -8.5054510e-01, 4.3881118e-02, -1.7823287e-01, 3.4546770e+00,
+     2.0940510e-01, 9.1953472e-03},
+    {1.0000000e+00, -1.0915266e-01, 1.9050743e+00, 3.2218623e-01,
+     -2.0794394e-01, 6.1378871e-01, -3.5167484e-02, 1.1914304e-02,
+     3.6293082e+00, 1.0380396e-01},
+    {1.0000000e+00, -3.2178312e-01, -6.9392454e-01, -6.1112393e-01,
+     2.2329320e-01, 4.2407389e-01, 1.9664936e-01, 1.0354437e-01, 4.8153127e-01,
+     3.7347245e-01},
+    {1.0000000e+00, 1.4229431e+00, 3.8000845e-01, -1.5989849e-01, 5.4073041e-01,
+     -6.0762776e-02, -2.2752645e-01, 2.0247671e+00, 1.4440643e-01,
+     2.5567526e-02},
+    {1.0000000e+00, 9.0145077e-01, -9.6991898e-01, 7.9086551e-01,
+     -8.7433421e-01, -7.6707547e-01, 7.1292632e-01, 8.1261349e-01,
+     9.4074284e-01, 6.2546825e-01},
+    {1.0000000e+00, -7.2533533e-01, 1.4790603e-01, 7.7147564e-01,
+     -1.0728147e-01, 1.1410590e-01, -5.5957854e-01, 5.2611134e-01,
+     2.1876193e-02, 5.9517466e-01},
+    {1.0000000e+00, 5.3974943e-01, -7.7853625e-01, -1.2196455e+00,
+     -4.2021450e-01, 9.4953820e-01, -6.5830294e-01, 2.9132945e-01,
+     6.0611869e-01, 1.4875350e+00},
+  };
+
+  static double const x_data[10] = {
+    4.4283983e-01,  -2.9416715e-01, 2.5654724e-01,  3.3493879e-01,
+    -6.3165447e-01, -5.1360652e-01, -2.4971659e-01, -2.5776427e-01,
+    -8.8432424e-02, -5.2756825e-01};
+
+  using ExecutionSpace = Kokkos::DefaultExecutionSpace;
+  using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
+  using DeviceType = ExecutionSpace::device_type;
+
+  // based on pumi (github.com/SCOREC/core) test/qr.cc
+  typedef Kokkos::View<double[16][10], Kokkos::LayoutLeft, DeviceType>
+    MatrixViewType;
+  typedef Kokkos::View<double[16], Kokkos::LayoutLeft, DeviceType>
+    RowVectorViewType;
+  typedef Kokkos::View<double[10], Kokkos::LayoutLeft, DeviceType>
+    ColVectorViewType;
+  typedef Kokkos::View<double[10], Kokkos::LayoutRight, DeviceType>
+    ColWorkViewType;
+  const auto one = 1.0;
+  const auto zero = 0.0;
+
+  MatrixViewType A("A");
+  typename MatrixViewType::HostMirror A_host = Kokkos::create_mirror_view(A);
+  for (int i = 0; i < 16; i++)
+    for (int j = 0; j < 10; j++)
+      A_host(i, j) = a_data[i][j];
+  Kokkos::deep_copy(A, A_host);
+
+  ColVectorViewType kx("kx");
+  typename ColVectorViewType::HostMirror kx_host =
+    Kokkos::create_mirror_view(kx);
+  for (int j = 0; j < 10; j++)
+    kx_host(j) = x_data[j];
+  Kokkos::deep_copy(kx, kx_host);
+  // b = A*kx
+  RowVectorViewType b("b");
+  KokkosBlas::gemv("N", one, A, kx, zero, b);
+
+  // x = b
+  RowVectorViewType x("x");
+  Kokkos::deep_copy(x, b);
+
+  // t: tau (see SerialQR call below)
+  ColVectorViewType t("t");
+
+  // w: working space for SerialQR
+  ColWorkViewType w("w");
+
+  // roughly following
+  // kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR.hpp
+  typedef KokkosBlas::Algo::QR::Unblocked AlgoTagType;
+  Kokkos::parallel_for(
+    "solveQR", 1, KOKKOS_LAMBDA(int) {
+      // compute the QR factorization of A and store the results in A and t
+      // (tau) - see the lapack dgeqp3(...) documentation:
+      // www.netlib.org/lapack/explore-html-3.6.1/dd/d9a/group__double_g_ecomputational_ga1b0500f49e03d2771b797c6e88adabbb.html
+      KokkosBatched::SerialQR<AlgoTagType>::invoke(A, t, w);
+      // x = Q^{T}x
+      KokkosBatched::SerialApplyQ<
+        KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
+        KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(A, t, x, w);
+      // x = R^{-1}x
+      KokkosBatched::SerialTrsv<KokkosBatched::Uplo::Upper,
+                                KokkosBatched::Trans::NoTranspose,
+                                KokkosBatched::Diag::NonUnit,
+                                KokkosBlas::Algo::Trsv::Unblocked>::invoke(one,
+                                                                           A,
+                                                                           x);
+    });
+
+  typename ColVectorViewType::HostMirror sol_host =
+    Kokkos::create_mirror_view(x);
+  Kokkos::deep_copy(sol_host, x);
+
+  for (int i = 0; i < sol_host.size(); ++i) {
+    REQUIRE(sol_host.size() == kx_host.size());
+    CAPTURE(sol_host(i), kx_host(i), 1e-15);
+    CHECK_THAT(sol_host(i), CATCH::Matchers::WithinAbs(kx_host[i], 1e-15));
+  }
+}

From bc2cae0d9f6faf6b65901a7498833b60965971a8 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 3 Mar 2025 16:35:25 -0500
Subject: [PATCH 10/73] update

---
 .../interpolator/mls_interpolation_impl.hpp   | 35 +++++++++----------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 2986d4e9..6fe28231 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -501,9 +501,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
     "approx_target_values = %d, ntargets = %d\n",
     approx_target_values.size(), ntargets);
 
-  //  auto source_coordinates = min_max_normalization(src_coordinates, dim);
-  //  auto target_coordinates = min_max_normalization(tgt_coordinates, dim);
-
   IntHostMatView host_slice_length(
     "stores slice length of  polynomial basis in host", degree, dim);
 
@@ -598,6 +595,22 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       team.team_barrier();
 
+      /** support_values(nsupports) (or known rhs vector b) is the vector of the
+       * quantity that we want interpolate
+       *
+       *
+       * step 4: find local supports function values
+       */
+      Kokkos::parallel_for(
+        Kokkos::TeamThreadRange(team, nsupports), [=](const int j) {
+          support_values(j) =
+            source_values[support.supports_idx[start_ptr + j]];
+          OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)),
+                               "ERROR: NaN found: at support %d\n", j);
+        });
+
+      team.team_barrier();
+
       /**
        *
        * the local_source_points is of the type ScratchMatView with
@@ -636,22 +649,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         "ERROR: radius2 has to be positive but found to be %.16f\n",
         support.radii2[league_rank]);
 
-      /** support_values(nsupports) (or known rhs vector b) is the vector of the
-       * quantity that we want interpolate
-       *
-       *
-       * step 4: find local supports function values
-       */
-      Kokkos::parallel_for(
-        Kokkos::TeamThreadRange(team, nsupports), [=](const int j) {
-          support_values(j) =
-            source_values[support.supports_idx[start_ptr + j]];
-          OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)),
-                               "ERROR: NaN found: at support %d\n", j);
-        });
-
-      team.team_barrier();
-
       // convert normal equation to Ax = b where A is a square matrix
       auto result = convert_normal_equation(vandermonde_matrix, phi_vector,
                                             support_values, team);

From d8eedf32a01882ab9365d2f241b0c0f99a76e47e Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 3 Mar 2025 17:01:28 -0500
Subject: [PATCH 11/73] add serial qr test

---
 test/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index fc6ea135..4ea80c35 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -113,6 +113,7 @@ if(Catch2_FOUND)
 	      test_linear_solver.cpp
 		  test_normalisation.cpp
 		  test_spr_meshfields.cpp
+		  test_serial_qr.cpp
               )
   endif ()
   add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES})

From 914af9ebd6b4180aafa198d28fbfe9dc3deedccb Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 3 Mar 2025 19:53:38 -0500
Subject: [PATCH 12/73] add a header for team level array operations

---
 .../pcms_interpolator_array_ops.cpp           | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 src/pcms/interpolator/pcms_interpolator_array_ops.cpp

diff --git a/src/pcms/interpolator/pcms_interpolator_array_ops.cpp b/src/pcms/interpolator/pcms_interpolator_array_ops.cpp
new file mode 100644
index 00000000..2233002a
--- /dev/null
+++ b/src/pcms/interpolator/pcms_interpolator_array_ops.cpp
@@ -0,0 +1,20 @@
+#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP
+#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP
+
+#include "pcms_interpolator_aliases.hpp"
+#include <Omega_h_array_ops.hpp>
+#include <cmath>
+
+KOKKOS_INLINE_FUNCTION
+void find_sq_root_each(ScratchVecView& array)
+{
+  int size = array.size();
+
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) {
+    OMEGA_CHECK_PRINTF(array(i) < 0,
+                       "[Error:] Square root of a negative number is invalid!\n"
+                       "value is %12.6f\d",
+                       array(i));
+    array(i) = sqrt(array(i));
+  });
+}

From 213f1af8150074330d3ddcac7912862b7fde1151 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 3 Mar 2025 19:55:24 -0500
Subject: [PATCH 13/73] add test case for serial qr solver

---
 test/test_serial_qr.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/test_serial_qr.cpp b/test/test_serial_qr.cpp
index 9635bb7a..10e8bffc 100644
--- a/test/test_serial_qr.cpp
+++ b/test/test_serial_qr.cpp
@@ -135,13 +135,13 @@ TEST_CASE("test_qr_serial")
                                                                            x);
     });
 
-  typename ColVectorViewType::HostMirror sol_host =
+  typename RowVectorViewType::HostMirror sol_host =
     Kokkos::create_mirror_view(x);
   Kokkos::deep_copy(sol_host, x);
 
-  for (int i = 0; i < sol_host.size(); ++i) {
-    REQUIRE(sol_host.size() == kx_host.size());
+  for (int i = 0; i < kx_host.size(); ++i) {
+    REQUIRE(sol_host.size() == 16);
     CAPTURE(sol_host(i), kx_host(i), 1e-15);
-    CHECK_THAT(sol_host(i), CATCH::Matchers::WithinAbs(kx_host[i], 1e-15));
+    CHECK_THAT(sol_host(i), Catch::Matchers::WithinAbs(kx_host[i], 1e-15));
   }
 }

From b72696c8fcca41ef201df7585c929fdbb4557184 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 3 Mar 2025 19:56:42 -0500
Subject: [PATCH 14/73] add functions for row scaling

---
 .../interpolator/mls_interpolation_impl.hpp   | 34 +++++++++++++++----
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 6fe28231..73e01471 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -287,6 +287,27 @@ void scale_column_trans_matrix(const ScratchMatView& matrix,
   }
 }
 
+KOKKOS_INLINE_FUNCTION
+void eval_row_scaling(member_type team, int i,
+                      const ScratchVecView& diagonal_entries,
+                      ScratchMatView& matrix)
+{
+
+  int row = matrix.extent(0);
+  int column = matrix.extent(1);
+  int vector_size = diagonal_entries.size() OMEGA_H_CHECK_PRINTF(
+    vector_size == row,
+    "[ERROR]: for row scaling the size of diagonal entries vector should be "
+    "equal to the row of the matrix which is to be scaled\n"
+    "size of vector = %d, row of a matrix = %d\n",
+    vector_size, row);
+  ScratchVecView matrix_row = Kokkos::subview(matrix, j, Kokkos::ALL());
+  for (int j = 0; j < column; ++k) {
+    matrix(i, j) *=
+      diagonal_entries(i); // scales each row of a matrix with corresponding
+                           // entry in digonal entries vector
+  }
+}
 /**
  * @struct ResultConvertNormal
  * @brief Stores the results of matrix and vector transformations.
@@ -620,9 +641,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        * step 2: normalize local source supports and target point
        */
 
-      normalize_supports(team, target_point, local_source_points);
+      //      normalize_supports(team, target_point, local_source_points);
 
-      team.team_barrier();
+      //      team.team_barrier();
       /**
        *
        * this can evaluate monomial basis vector for any degree of polynomial
@@ -650,13 +671,14 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         support.radii2[league_rank]);
 
       // convert normal equation to Ax = b where A is a square matrix
-      auto result = convert_normal_equation(vandermonde_matrix, phi_vector,
-                                            support_values, team);
+      //      auto result = convert_normal_equation(vandermonde_matrix,
+      //      phi_vector,
+      //                                            support_values, team);
 
-      team.team_barrier();
+      //      team.team_barrier();
 
       // It stores the solution in rhs vector itself
-      solve_matrix(result.square_matrix, result.transformed_rhs, team);
+      //      solve_matrix(result.square_matrix, result.transformed_rhs, team);
 
       team.team_barrier();
 

From 00d4c95310cd7d1304bf551bce0710224a4ba28a Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 4 Mar 2025 11:38:53 -0500
Subject: [PATCH 15/73] update test_serial_qr.cpp

---
 src/pcms/interpolator/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt
index 08728df7..37f9363b 100644
--- a/src/pcms/interpolator/CMakeLists.txt
+++ b/src/pcms/interpolator/CMakeLists.txt
@@ -8,6 +8,7 @@ set(PCMS_FIELD_TRANSFER_HEADERS
     linear_interpolant.hpp
     multidimarray.hpp
     mls_interpolation.hpp
+	pcms_interpolator_array_ops.hpp
 )
 
 set(PCMS_FIELD_TRANSFER_SOURCES

From 9dc3da0c2aa707cd2af780e0f4c7de044da695a9 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 4 Mar 2025 11:39:43 -0500
Subject: [PATCH 16/73] implement serial qr

---
 .../interpolator/mls_interpolation_impl.hpp   | 140 +++++++++++++-----
 1 file changed, 101 insertions(+), 39 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 73e01471..647f00d6 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -13,11 +13,16 @@
 #include <KokkosBlas.hpp>
 #include <KokkosBlas1_team_dot.hpp>
 #include <Kokkos_StdAlgorithms.hpp>
-
 #include "pcms_interpolator_aliases.hpp"
 #include "adj_search.hpp"
 #include "../assert.h"
 #include "../profile.h"
+#include "pcms_interpolator_array_ops.hpp"
+
+#include <KokkosBatched_ApplyQ_Decl.hpp> //KokkosBlas::ApplyQ
+#include <KokkosBatched_QR_Decl.hpp>     //KokkosBlas::QR
+#include <KokkosBatched_Trsv_Decl.hpp>   //KokkosBlas::Trsv
+#include <KokkosBlas2_gemv.hpp>          //KokkosBlas::gemv
 
 static constexpr int MAX_DIM = 3;
 
@@ -109,6 +114,9 @@ Reals min_max_normalization(Reals& coordinates, int dim);
  *   @param[in] p A reference to the coordinate struct
  *   @param[in,out] basis_vector The polynomial basis vector
  *
+ *   @note the basis_vector size is more than basis_size
+ *   to accomodate with the output from qr
+ *
  */
 KOKKOS_INLINE_FUNCTION
 void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p,
@@ -187,6 +195,44 @@ void normalize_supports(member_type team, Coord& pivot,
   }
 }
 
+/**
+ * @brief Fills the kokkos scratch view
+ *
+ * @param value The value to be populated
+ * @param team The team member
+ * @param matrix The scratch matrix
+ *
+ */
+KOKKOS_INLINE_FUNCTION
+void fill(double value, member_type team, ScratchMatView matrix)
+{
+
+  int row = matrix.extent(0);
+  int col = matrix.extent(1);
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) {
+    for (int k = 0; k < col; ++k) {
+      matrix(j, k) = value;
+    }
+  });
+}
+
+/**
+ * @brief Fills the kokkos scratch view
+ *
+ * @param value The value to be populated
+ * @param team The team member
+ * @param vector The scratch vector
+ *
+ */
+KOKKOS_INLINE_FUNCTION
+void fill(double value, member_type team, ScratchVecView vector)
+{
+
+  int size = vector.extent(0);
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size),
+                       [=](int j) { vector(j) = value; });
+}
+
 /**
  * @brief Creates a vandermonde matrix
  *
@@ -288,21 +334,22 @@ void scale_column_trans_matrix(const ScratchMatView& matrix,
 }
 
 KOKKOS_INLINE_FUNCTION
-void eval_row_scaling(member_type team, int i,
-                      const ScratchVecView& diagonal_entries,
-                      ScratchMatView& matrix)
+void eval_row_scaling(member_type team, int i, ScratchVecView diagonal_entries,
+                      ScratchMatView matrix)
 {
 
   int row = matrix.extent(0);
   int column = matrix.extent(1);
-  int vector_size = diagonal_entries.size() OMEGA_H_CHECK_PRINTF(
+  int vector_size = diagonal_entries.size();
+
+  OMEGA_H_CHECK_PRINTF(
     vector_size == row,
     "[ERROR]: for row scaling the size of diagonal entries vector should be "
     "equal to the row of the matrix which is to be scaled\n"
     "size of vector = %d, row of a matrix = %d\n",
     vector_size, row);
-  ScratchVecView matrix_row = Kokkos::subview(matrix, j, Kokkos::ALL());
-  for (int j = 0; j < column; ++k) {
+  ScratchVecView matrix_row = Kokkos::subview(matrix, i, Kokkos::ALL());
+  for (int j = 0; j < column; ++j) {
     matrix(i, j) *=
       diagonal_entries(i); // scales each row of a matrix with corresponding
                            // entry in digonal entries vector
@@ -436,42 +483,54 @@ void solve_matrix(const ScratchMatView& square_matrix,
     KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs);
 }
 
-/**
- * @brief Fills the kokkos scratch view
- *
- * @param value The value to be populated
- * @param team The team member
- * @param matrix The scratch matrix
- *
- */
 KOKKOS_INLINE_FUNCTION
-void fill(double value, member_type team, ScratchMatView matrix)
+void solve_weighted_matrix_serial_qr(member_type team, int league_rank,
+                                     ScratchVecView& weight,
+                                     ScratchMatView& matrix,
+                                     ScratchVecView& rhs_values)
 {
 
   int row = matrix.extent(0);
-  int col = matrix.extent(1);
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) {
-    for (int k = 0; k < col; ++k) {
-      matrix(j, k) = value;
-    }
+
+  int column = matrix.extent(1);
+
+  int weight_size = weight.size();
+
+  find_sq_root_each(team, weight);
+
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+    eval_row_scaling(team, league_rank, weight, matrix);
   });
-}
 
-/**
- * @brief Fills the kokkos scratch view
- *
- * @param value The value to be populated
- * @param team The team member
- * @param vector The scratch vector
- *
- */
-KOKKOS_INLINE_FUNCTION
-void fill(double value, member_type team, ScratchVecView vector)
-{
+  // initilize tau array
+  ScratchVecView tau(team.team_scratch(0), column);
+  fill(0.0, team, tau);
 
-  int size = vector.extent(0);
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size),
-                       [=](int j) { vector(j) = value; });
+  // initialize work array
+  ScratchVecView work(team.team_scratch(0), column);
+  fill(0.0, team, work);
+
+  // for serial run
+  if (league_rank == 0) {
+
+    // compute QR factorization of matrix and stores the R in A and
+    // tau is used to recover Q
+    KokkosBatched::SerialQR<KokkosBlas::Algo::QR::Unblocked>::invoke(matrix,
+                                                                     tau, work);
+
+    // b = Q^{T}b
+
+    KokkosBatched::SerialApplyQ<
+      KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
+      KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values,
+                                                   work);
+
+    // b = R^{-1}b
+    KokkosBatched::SerialTrsv<
+      KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose,
+      KokkosBatched::Diag::NonUnit,
+      KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values);
+  }
 }
 
 /**
@@ -570,7 +629,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       ScratchVecView support_values(team.team_scratch(0), nsupports);
 
       // basis of target
-      ScratchVecView target_basis_vector(team.team_scratch(0), basis_size);
+      ScratchVecView target_basis_vector(team.team_scratch(0), nsupports);
 
       // Initialize the scratch matrices and  vectors
       fill(0.0, team, local_source_points);
@@ -680,10 +739,13 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       // It stores the solution in rhs vector itself
       //      solve_matrix(result.square_matrix, result.transformed_rhs, team);
 
+      solve_weighted_matrix_serial_qr(team, league_rank, phi_vector,
+                                      vandermonde_matrix, support_values);
+
       team.team_barrier();
 
-      double target_value = KokkosBlas::Experimental::dot(
-        team, result.transformed_rhs, target_basis_vector);
+      double target_value = KokkosBlas::Experimental::dot(team, support_values,
+                                                          target_basis_vector);
 
       if (team.team_rank() == 0) {
         OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",

From 42daeb47a9791c55b13713116bce389581b3276c Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 4 Mar 2025 11:40:26 -0500
Subject: [PATCH 17/73] add new file for team level matrix operations

---
 .../pcms_interpolator_array_ops.hpp           | 28 +++++++++++++++++++
 1 file changed, 28 insertions(+)
 create mode 100644 src/pcms/interpolator/pcms_interpolator_array_ops.hpp

diff --git a/src/pcms/interpolator/pcms_interpolator_array_ops.hpp b/src/pcms/interpolator/pcms_interpolator_array_ops.hpp
new file mode 100644
index 00000000..67291dc4
--- /dev/null
+++ b/src/pcms/interpolator/pcms_interpolator_array_ops.hpp
@@ -0,0 +1,28 @@
+#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP
+#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP
+
+#include "pcms_interpolator_aliases.hpp"
+#include <Omega_h_array_ops.hpp>
+#include <cmath>
+#include <Omega_h_fail.hpp>
+
+namespace pcms
+{
+KOKKOS_INLINE_FUNCTION
+void find_sq_root_each(member_type team, ScratchVecView& array)
+{
+  int size = array.size();
+
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) {
+    OMEGA_H_CHECK_PRINTF(
+      array(i) >= 0,
+      "[Error:] Square root of a negative number is invalid!\n"
+      "value is %12.6f\n",
+      array(i));
+    array(i) = sqrt(array(i));
+  });
+}
+
+} // namespace pcms
+
+#endif

From 08433ceab6b30f1e0b5a6cb55918e0c2681a0c91 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 4 Mar 2025 11:41:09 -0500
Subject: [PATCH 18/73] update a file

---
 src/pcms/interpolator/pcms_interpolator_aliases.hpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/pcms/interpolator/pcms_interpolator_aliases.hpp b/src/pcms/interpolator/pcms_interpolator_aliases.hpp
index 76b1bcd6..55a5f187 100644
--- a/src/pcms/interpolator/pcms_interpolator_aliases.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_aliases.hpp
@@ -17,6 +17,7 @@ using team_policy = typename Kokkos::TeamPolicy<>;
 using member_type = typename Kokkos::TeamPolicy<>::member_type;
 
 // alias for scratch view
+
 using ScratchSpace =
   typename Kokkos::DefaultExecutionSpace::scratch_memory_space;
 

From f6475607676779256dc5830c2472b97e236648da Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 4 Mar 2025 21:55:20 -0500
Subject: [PATCH 19/73] add serial scd routine

---
 src/pcms/interpolator/CMakeLists.txt          |   2 +-
 src/pcms/interpolator/mls_interpolation.cpp   |  21 +-
 .../interpolator/mls_interpolation_impl.hpp   | 729 +++++++++---------
 .../pcms_interpolator_array_ops.hpp           |  28 -
 .../pcms_interpolator_view_utils.hpp          | 148 ++++
 5 files changed, 541 insertions(+), 387 deletions(-)
 delete mode 100644 src/pcms/interpolator/pcms_interpolator_array_ops.hpp
 create mode 100644 src/pcms/interpolator/pcms_interpolator_view_utils.hpp

diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt
index 37f9363b..10dd51a1 100644
--- a/src/pcms/interpolator/CMakeLists.txt
+++ b/src/pcms/interpolator/CMakeLists.txt
@@ -8,7 +8,7 @@ set(PCMS_FIELD_TRANSFER_HEADERS
     linear_interpolant.hpp
     multidimarray.hpp
     mls_interpolation.hpp
-	pcms_interpolator_array_ops.hpp
+	pcms_interpolator_view_utils.hpp
 )
 
 set(PCMS_FIELD_TRANSFER_SOURCES
diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index 6596a26e..417e2bc4 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -174,7 +174,8 @@ int calculate_basis_vector_size(const IntHostMatView& array)
 }
 
 int calculate_scratch_shared_size(const SupportResults& support,
-                                  const int nvertices_target, int basis_size)
+                                  const int nvertices_target, int basis_size,
+                                  int dim)
 {
 
   IntDeviceVecView shmem_each_team("stores the size required for each team",
@@ -186,15 +187,25 @@ int calculate_scratch_shared_size(const SupportResults& support,
       int end_ptr = support.supports_ptr[i + 1];
       int nsupports = end_ptr - start_ptr;
 
+      int max_size;
+      int min_size;
+      if (nsupports > basis_size) {
+        max_size = nsupports;
+        min_size = basis_size;
+      } else {
+        max_size = basis_size;
+        min_size = nsupports;
+      }
       size_t total_shared_size = 0;
-
       total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size);
       total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports);
       total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size);
       total_shared_size += ScratchVecView::shmem_size(basis_size);
-      total_shared_size += ScratchVecView::shmem_size(nsupports) * 3;
-      total_shared_size += ScratchMatView::shmem_size(nsupports, 2);
-      total_shared_size += ScratchMatView::shmem_size(nsupports, 1);
+      total_shared_size += ScratchVecView::shmem_size(nsupports);
+      total_shared_size += ScratchMatView::shmem_size(nsupports, dim);
+      total_shared_size += ScratchMatView::shmem_size(nsupports, nsupports);
+      // total_shared_size += ScratchVecView::shmem_size(max_size);
+      // total_shared_size += ScratchVecView::shmem_size(min_size);
       shmem_each_team(i) = total_shared_size;
     });
 
diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 647f00d6..210b4dfb 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -17,12 +17,16 @@
 #include "adj_search.hpp"
 #include "../assert.h"
 #include "../profile.h"
-#include "pcms_interpolator_array_ops.hpp"
+#include "pcms_interpolator_view_utils.hpp"
 
 #include <KokkosBatched_ApplyQ_Decl.hpp> //KokkosBlas::ApplyQ
 #include <KokkosBatched_QR_Decl.hpp>     //KokkosBlas::QR
 #include <KokkosBatched_Trsv_Decl.hpp>   //KokkosBlas::Trsv
 #include <KokkosBlas2_gemv.hpp>          //KokkosBlas::gemv
+#include "KokkosBatched_SVD_Decl.hpp"
+#include "KokkosBatched_SVD_Serial_Impl.hpp"
+#include <KokkosBlas2_serial_gemv_impl.hpp>
+#include <KokkosBatched_Gemm_Decl.hpp>
 
 static constexpr int MAX_DIM = 3;
 
@@ -89,7 +93,7 @@ int calculate_basis_vector_size(const IntHostMatView& array);
  * @return The scratch size
  */
 int calculate_scratch_shared_size(const SupportResults& support,
-                                  const int ntargets, int basis_size);
+                                  const int ntargets, int basis_size, int dim);
 /**
  *
  * @brief Performs minmax normalization
@@ -195,44 +199,6 @@ void normalize_supports(member_type team, Coord& pivot,
   }
 }
 
-/**
- * @brief Fills the kokkos scratch view
- *
- * @param value The value to be populated
- * @param team The team member
- * @param matrix The scratch matrix
- *
- */
-KOKKOS_INLINE_FUNCTION
-void fill(double value, member_type team, ScratchMatView matrix)
-{
-
-  int row = matrix.extent(0);
-  int col = matrix.extent(1);
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) {
-    for (int k = 0; k < col; ++k) {
-      matrix(j, k) = value;
-    }
-  });
-}
-
-/**
- * @brief Fills the kokkos scratch view
- *
- * @param value The value to be populated
- * @param team The team member
- * @param vector The scratch vector
- *
- */
-KOKKOS_INLINE_FUNCTION
-void fill(double value, member_type team, ScratchVecView vector)
-{
-
-  int size = vector.extent(0);
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size),
-                       [=](int j) { vector(j) = value; });
-}
-
 /**
  * @brief Creates a vandermonde matrix
  *
@@ -333,28 +299,6 @@ void scale_column_trans_matrix(const ScratchMatView& matrix,
   }
 }
 
-KOKKOS_INLINE_FUNCTION
-void eval_row_scaling(member_type team, int i, ScratchVecView diagonal_entries,
-                      ScratchMatView matrix)
-{
-
-  int row = matrix.extent(0);
-  int column = matrix.extent(1);
-  int vector_size = diagonal_entries.size();
-
-  OMEGA_H_CHECK_PRINTF(
-    vector_size == row,
-    "[ERROR]: for row scaling the size of diagonal entries vector should be "
-    "equal to the row of the matrix which is to be scaled\n"
-    "size of vector = %d, row of a matrix = %d\n",
-    vector_size, row);
-  ScratchVecView matrix_row = Kokkos::subview(matrix, i, Kokkos::ALL());
-  for (int j = 0; j < column; ++j) {
-    matrix(i, j) *=
-      diagonal_entries(i); // scales each row of a matrix with corresponding
-                           // entry in digonal entries vector
-  }
-}
 /**
  * @struct ResultConvertNormal
  * @brief Stores the results of matrix and vector transformations.
@@ -484,328 +428,407 @@ void solve_matrix(const ScratchMatView& square_matrix,
 }
 
 KOKKOS_INLINE_FUNCTION
-void solve_weighted_matrix_serial_qr(member_type team, int league_rank,
-                                     ScratchVecView& weight,
-                                     ScratchMatView& matrix,
-                                     ScratchVecView& rhs_values)
+void solve_matrix_serial_svd(member_type team, int league_rank,
+                             const ScratchVecView& rhs_values,
+                             ScratchMatView& matrix,
+                             ScratchMatView& solution_vector)
 {
 
   int row = matrix.extent(0);
 
   int column = matrix.extent(1);
 
-  int weight_size = weight.size();
+  int weight_size = solution.size();
 
-  find_sq_root_each(team, weight);
+  // find_sq_root_each(team, weight);
 
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
-    eval_row_scaling(team, league_rank, weight, matrix);
-  });
+  // Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+  //   eval_row_scaling(team, league_rank, weight, matrix);
+  // });
+  //
+  // A = UEV^T
+  // initilize U (orthogonal matrices)  array
+  ScratchMatView U(team.team_scratch(0), row, row);
+  fill(-5.0, team, U);
 
-  // initilize tau array
-  ScratchVecView tau(team.team_scratch(0), column);
-  fill(0.0, team, tau);
+  // initialize Vt
+  ScratchMatView Vt(team.team_scratch(0), column, column);
+  fill(-5.0, team, Vt);
 
-  // initialize work array
-  ScratchVecView work(team.team_scratch(0), column);
-  fill(0.0, team, work);
+  int maxrank = std::min(row, column);
 
-  // for serial run
-  if (league_rank == 0) {
+  ScratchVecView sigma(team.team_scratch(0), maxrank);
+  fill(-5.0, team, sigma);
 
-    // compute QR factorization of matrix and stores the R in A and
-    // tau is used to recover Q
-    KokkosBatched::SerialQR<KokkosBlas::Algo::QR::Unblocked>::invoke(matrix,
-                                                                     tau, work);
+  ScratchVecView work(team.team_scratch(0), std::max(row, column));
+  fill(-5.0, team, work);
 
-    // b = Q^{T}b
+  // sigma_mat_inv
+  ScratchMatView temp_matrix(team.team_scratch(0), column, row);
+  fill(0, team, temp_matrix);
 
-    KokkosBatched::SerialApplyQ<
-      KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
-      KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values,
-                                                   work);
+  ScratchMatView sigmaInvUtMul(team.team_scratch(0), column, row);
+  fill(0, team, sigmaInvUtMul);
 
-    // b = R^{-1}b
-    KokkosBatched::SerialTrsv<
-      KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose,
-      KokkosBatched::Diag::NonUnit,
-      KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values);
+  // for serial run
+  if (league_rank == 0) {
+    KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U,
+                                     sigma, Vt, work);
+    find_inverse_each(team, sigma);
+    auto Ut = find_transpose(team, U);
+    scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T
+
+    // performing (S^-1 U^T)
+    //  KokkosBatched::SerialGemm<
+    //    KokkosBatched::Trans::NoTranspose,
+    //    KokkosBatched::Trans::Transpose,
+    //    KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, temp_matrix, U,
+    //    0.0, sigmaInvUtMul);
+    //  }
+
+    // perform V (S^-1 U^T)
+
+    KokkosBatched::SerialGemm<
+      KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose,
+      KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0,
+                                                    sigmaInvUtMul);
+
+    KokkosBlas::SerialGemv<
+      KokkosBlas::Trans::NoTranspose,
+      KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, sigmaInvUtMul, rhs_values,
+                                                 0.0, solution_vector);
   }
-}
 
-/**
- * @brief Maps the data from source mesh to target mesh
- *
- * @param source_values Scalar array view of source field values
- * @param source_coordinates Scalar array view of the coordinates of control
- * points of source field
- * @param target_coordinates Scalar array view of the coordinates of control
- * points of target field
- * @param support The object that enpasulates support info
- * @param dim The dimension of the simulations
- * @param degree The degree of the interpolation order
- * @param rbf_func The radial basis function choice
- * @param[in/out] Scalar array view of the interpolated field in target mesh
- *
- *
- */
-template <typename Func>
-void mls_interpolation(RealConstDefaultScalarArrayView source_values,
-                       RealConstDefaultScalarArrayView source_coordinates,
-                       RealConstDefaultScalarArrayView target_coordinates,
-                       const SupportResults& support, const LO& dim,
-                       const LO& degree, Func rbf_func,
-                       RealDefaultScalarArrayView approx_target_values)
-{
-  PCMS_FUNCTION_TIMER;
-  static_assert(std::is_invocable_r_v<double, Func, double, double>,
-                "rbf_func, takes radius and cutoff, returns weight");
-  static_assert(!std::is_pointer_v<Func>,
-                "function pointer will fail in GPU execution context");
-
-  int nsources = source_coordinates.size() / dim;
-
-  OMEGA_H_CHECK_PRINTF(
-    source_values.size() == nsources,
-    "[ERROR] The size of the source values and source coordinates is not same. "
-    "The current sizes are :\n"
-    "source_values_size = %d, source_coordinates_size = %d\n",
-    source_values.size(), nsources);
-
-  const auto ntargets = target_coordinates.size() / dim;
-
-  OMEGA_H_CHECK_PRINTF(
-    approx_target_values.size() == ntargets,
-    "[ERROR] The size of the approx target values and the number of targets is "
-    "not same. The current numbers are :\n"
-    "approx_target_values = %d, ntargets = %d\n",
-    approx_target_values.size(), ntargets);
-
-  IntHostMatView host_slice_length(
-    "stores slice length of  polynomial basis in host", degree, dim);
-
-  Kokkos::deep_copy(host_slice_length, 0);
-
-  calculate_basis_slice_lengths(host_slice_length);
-
-  auto basis_size = calculate_basis_vector_size(host_slice_length);
-
-  IntDeviceMatView slice_length(
-    "stores slice length of polynomial basis in device", degree, dim);
-
-  auto slice_length_hd = Kokkos::create_mirror_view(slice_length);
-  Kokkos::deep_copy(slice_length_hd, host_slice_length);
-  Kokkos::deep_copy(slice_length, slice_length_hd);
-
-  int shared_size =
-    calculate_scratch_shared_size(support, ntargets, basis_size);
-
-  team_policy tp(ntargets, Kokkos::AUTO);
-
-  int scratch_size = tp.scratch_size_max(0);
-
-  PCMS_ALWAYS_ASSERT(scratch_size > shared_size);
-
-  // calculates the interpolated values
-  Kokkos::parallel_for(
-    "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)),
-    KOKKOS_LAMBDA(const member_type& team) {
-      int league_rank = team.league_rank();
-      int start_ptr = support.supports_ptr[league_rank];
-      int end_ptr = support.supports_ptr[league_rank + 1];
-      int nsupports = end_ptr - start_ptr;
-
-      // local_source_point stores the coordinates of source supports of a given
-      // target
-      ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim);
-
-      // rbf function values of source supports Phi(n,n)
-      ScratchVecView phi_vector(team.team_scratch(0), nsupports);
-
-      //  vondermonde matrix P from the vectors of basis vector of supports
-      ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports,
-                                        basis_size);
-      // stores known vector (b)
-      ScratchVecView support_values(team.team_scratch(0), nsupports);
-
-      // basis of target
-      ScratchVecView target_basis_vector(team.team_scratch(0), nsupports);
-
-      // Initialize the scratch matrices and  vectors
-      fill(0.0, team, local_source_points);
-      fill(0.0, team, vandermonde_matrix);
-      fill(0.0, team, phi_vector);
-      fill(0.0, team, support_values);
-      fill(0.0, team, target_basis_vector);
-
-      // storing the coords of local supports
-      int count = -1;
-      for (int j = start_ptr; j < end_ptr; ++j) {
-        count++;
-        auto index = support.supports_idx[j];
-        local_source_points(count, 0) = source_coordinates[index * dim];
-        local_source_points(count, 1) = source_coordinates[index * dim + 1];
-        if (dim == 3) {
-          local_source_points(count, 2) = source_coordinates[index * dim + 2];
-        }
-      }
+  KOKKOS_INLINE_FUNCTION
+  void solve_weighted_matrix_serial_qr(
+    member_type team, int league_rank, ScratchVecView& weight,
+    ScratchMatView& matrix, ScratchVecView& rhs_values)
+  {
 
-      // evaluates the basis vector of a given target point
-      Coord target_point;
-      target_point.x = target_coordinates[league_rank * dim];
-      target_point.y = target_coordinates[league_rank * dim + 1];
+    int row = matrix.extent(0);
 
-      if (dim == 3) {
-        target_point.z = target_coordinates[league_rank * dim + 2];
-      }
+    int column = matrix.extent(1);
 
-      /** phi(nsupports) is the array of rbf functions evaluated at the
-       * source supports In the actual implementation, Phi(nsupports, nsupports)
-       * is the diagonal matrix & each diagonal element is the phi evaluated at
-       * each source points
-       *
-       * step 1: evaluate the phi vector with the original dimension
-       */
-
-      Kokkos::parallel_for(
-        Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
-          compute_phi_vector(target_point, local_source_points, j,
-                             support.radii2[league_rank], rbf_func, phi_vector);
-        });
-
-      team.team_barrier();
-
-      /** support_values(nsupports) (or known rhs vector b) is the vector of the
-       * quantity that we want interpolate
-       *
-       *
-       * step 4: find local supports function values
-       */
-      Kokkos::parallel_for(
-        Kokkos::TeamThreadRange(team, nsupports), [=](const int j) {
-          support_values(j) =
-            source_values[support.supports_idx[start_ptr + j]];
-          OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)),
-                               "ERROR: NaN found: at support %d\n", j);
-        });
-
-      team.team_barrier();
-
-      /**
-       *
-       * the local_source_points is of the type ScratchMatView with
-       * coordinates information; row is number of local supports
-       * & column is dim
-       *
-       * step 2: normalize local source supports and target point
-       */
-
-      //      normalize_supports(team, target_point, local_source_points);
-
-      //      team.team_barrier();
-      /**
-       *
-       * this can evaluate monomial basis vector for any degree of polynomial
-       * step 3: call basis vector evaluation function (eval_basis_vector);
-       */
-      eval_basis_vector(slice_length, target_point, target_basis_vector);
-
-      /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is
-       * created with the basis vector of source supports stacking on top of
-       * each other
-       *
-       * step 4: create vandermonde matrix
-       */
-      Kokkos::parallel_for(
-        Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
-          create_vandermonde_matrix(local_source_points, j, slice_length,
-                                    vandermonde_matrix);
-        });
-
-      team.team_barrier();
-
-      OMEGA_H_CHECK_PRINTF(
-        support.radii2[league_rank] > 0,
-        "ERROR: radius2 has to be positive but found to be %.16f\n",
-        support.radii2[league_rank]);
-
-      // convert normal equation to Ax = b where A is a square matrix
-      //      auto result = convert_normal_equation(vandermonde_matrix,
-      //      phi_vector,
-      //                                            support_values, team);
-
-      //      team.team_barrier();
-
-      // It stores the solution in rhs vector itself
-      //      solve_matrix(result.square_matrix, result.transformed_rhs, team);
-
-      solve_weighted_matrix_serial_qr(team, league_rank, phi_vector,
-                                      vandermonde_matrix, support_values);
-
-      team.team_barrier();
-
-      double target_value = KokkosBlas::Experimental::dot(team, support_values,
-                                                          target_basis_vector);
-
-      if (team.team_rank() == 0) {
-        OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",
-                             league_rank);
-        approx_target_values[league_rank] = target_value;
-      }
+    int weight_size = weight.size();
+
+    find_sq_root_each(team, weight);
+
+    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+      eval_row_scaling(team, league_rank, weight, matrix);
     });
-}
 
-/**
- * @brief \overload
- * Maps the data from source mesh to target mesh
- *
- * @param source_values Read array of Source field values
- * @param source_coordinates Read array of the coordinates of control points of
- * source field
- * @param target_coordinates Read array of the coordinates of control points of
- * target field
- * @param support The object that enpasulates support info
- * @param dim The dimension of the simulations
- * @param degree The degree of the interpolation order
- * @param rbf_func The radial basis function choice
- * @return  Write array of the interpolated field in target mesh
- *
- *
- */
-template <typename Func>
-Write<Real> mls_interpolation(const Reals source_values,
-                              const Reals source_coordinates,
-                              const Reals target_coordinates,
-                              const SupportResults& support, const LO& dim,
-                              const LO& degree, Func rbf_func)
-{
-  const auto nsources = source_coordinates.size() / dim;
+    // initilize tau array
+    ScratchVecView tau(team.team_scratch(0), column);
+    fill(0.0, team, tau);
+
+    // initialize work array
+    ScratchVecView work(team.team_scratch(0), column);
+    fill(0.0, team, work);
 
-  const auto ntargets = target_coordinates.size() / dim;
+    // for serial run
+    if (league_rank == 0) {
 
-  RealConstDefaultScalarArrayView source_values_array_view(
-    source_values.data(), source_values.size());
+      // compute QR factorization of matrix and stores the R in A and
+      // tau is used to recover Q
+      KokkosBatched::SerialQR<KokkosBlas::Algo::QR::Unblocked>::invoke(
+        matrix, tau, work);
 
-  RealConstDefaultScalarArrayView source_coordinates_array_view(
-    source_coordinates.data(), source_coordinates.size());
+      // b = Q^{T}b
 
-  RealConstDefaultScalarArrayView target_coordinates_array_view(
-    target_coordinates.data(), target_coordinates.size());
+      KokkosBatched::SerialApplyQ<
+        KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
+        KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values,
+                                                     work);
+
+      // b = R^{-1}b
+      KokkosBatched::SerialTrsv<
+        KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose,
+        KokkosBatched::Diag::NonUnit,
+        KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values);
+    }
+  }
 
-  RealDefaultScalarArrayView radii2_array_view(support.radii2.data(),
-                                               support.radii2.size());
+  /**
+   * @brief Maps the data from source mesh to target mesh
+   *
+   * @param source_values Scalar array view of source field values
+   * @param source_coordinates Scalar array view of the coordinates of control
+   * points of source field
+   * @param target_coordinates Scalar array view of the coordinates of control
+   * points of target field
+   * @param support The object that enpasulates support info
+   * @param dim The dimension of the simulations
+   * @param degree The degree of the interpolation order
+   * @param rbf_func The radial basis function choice
+   * @param[in/out] Scalar array view of the interpolated field in target mesh
+   *
+   *
+   */
+  template <typename Func>
+  void mls_interpolation(RealConstDefaultScalarArrayView source_values,
+                         RealConstDefaultScalarArrayView source_coordinates,
+                         RealConstDefaultScalarArrayView target_coordinates,
+                         const SupportResults& support, const LO& dim,
+                         const LO& degree, Func rbf_func,
+                         RealDefaultScalarArrayView approx_target_values)
+  {
+    PCMS_FUNCTION_TIMER;
+    static_assert(std::is_invocable_r_v<double, Func, double, double>,
+                  "rbf_func, takes radius and cutoff, returns weight");
+    static_assert(!std::is_pointer_v<Func>,
+                  "function pointer will fail in GPU execution context");
+
+    int nsources = source_coordinates.size() / dim;
+
+    OMEGA_H_CHECK_PRINTF(
+      source_values.size() == nsources,
+      "[ERROR] The size of the source values and source coordinates is not "
+      "same. "
+      "The current sizes are :\n"
+      "source_values_size = %d, source_coordinates_size = %d\n",
+      source_values.size(), nsources);
+
+    const auto ntargets = target_coordinates.size() / dim;
+
+    OMEGA_H_CHECK_PRINTF(approx_target_values.size() == ntargets,
+                         "[ERROR] The size of the approx target values and the "
+                         "number of targets is "
+                         "not same. The current numbers are :\n"
+                         "approx_target_values = %d, ntargets = %d\n",
+                         approx_target_values.size(), ntargets);
+
+    IntHostMatView host_slice_length(
+      "stores slice length of  polynomial basis in host", degree, dim);
+
+    Kokkos::deep_copy(host_slice_length, 0);
+
+    calculate_basis_slice_lengths(host_slice_length);
+
+    auto basis_size = calculate_basis_vector_size(host_slice_length);
+
+    IntDeviceMatView slice_length(
+      "stores slice length of polynomial basis in device", degree, dim);
+
+    auto slice_length_hd = Kokkos::create_mirror_view(slice_length);
+    Kokkos::deep_copy(slice_length_hd, host_slice_length);
+    Kokkos::deep_copy(slice_length, slice_length_hd);
+
+    int shared_size =
+      calculate_scratch_shared_size(support, ntargets, basis_size, dim);
+
+    team_policy tp(ntargets, Kokkos::AUTO);
+
+    int scratch_size = tp.scratch_size_max(0);
+    printf("Scratch Size = %d\n", scratch_size);
+    printf("Shared Size = %d\n", shared_size);
+    PCMS_ALWAYS_ASSERT(scratch_size > shared_size);
+
+    printf("does it fail or below\n");
+    // calculates the interpolated values
+    Kokkos::parallel_for(
+      "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)),
+      KOKKOS_LAMBDA(const member_type& team) {
+        int league_rank = team.league_rank();
+        int start_ptr = support.supports_ptr[league_rank];
+        int end_ptr = support.supports_ptr[league_rank + 1];
+        int nsupports = end_ptr - start_ptr;
+
+        // local_source_point stores the coordinates of source supports of a
+        // given target
+        ScratchMatView local_source_points(team.team_scratch(0), nsupports,
+                                           dim);
+
+        // rbf function values of source supports Phi(n,n)
+        ScratchVecView phi_vector(team.team_scratch(0), nsupports);
+
+        //  vondermonde matrix P from the vectors of basis vector of supports
+        ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports,
+                                          basis_size);
+        // stores known vector (b)
+        ScratchVecView support_values(team.team_scratch(0), nsupports);
+
+        // basis of target
+        ScratchVecView target_basis_vector(team.team_scratch(0), basis_size);
+
+        // Initialize the scratch matrices and  vectors
+        fill(0.0, team, local_source_points);
+        fill(0.0, team, vandermonde_matrix);
+        fill(0.0, team, phi_vector);
+        fill(0.0, team, support_values);
+        fill(0.0, team, target_basis_vector);
+
+        // storing the coords of local supports
+        int count = -1;
+        for (int j = start_ptr; j < end_ptr; ++j) {
+          count++;
+          auto index = support.supports_idx[j];
+          local_source_points(count, 0) = source_coordinates[index * dim];
+          local_source_points(count, 1) = source_coordinates[index * dim + 1];
+          if (dim == 3) {
+            local_source_points(count, 2) = source_coordinates[index * dim + 2];
+          }
+        }
 
-  Write<Real> interpolated_values(ntargets, 0, "approximated target values");
+        // evaluates the basis vector of a given target point
+        Coord target_point;
+        target_point.x = target_coordinates[league_rank * dim];
+        target_point.y = target_coordinates[league_rank * dim + 1];
 
-  RealDefaultScalarArrayView interpolated_values_array_view(
-    interpolated_values.data(), interpolated_values.size());
+        if (dim == 3) {
+          target_point.z = target_coordinates[league_rank * dim + 2];
+        }
 
-  mls_interpolation(source_values_array_view, source_coordinates_array_view,
-                    target_coordinates_array_view, support, dim, degree,
-                    rbf_func, interpolated_values_array_view);
+        /** phi(nsupports) is the array of rbf functions evaluated at the
+         * source supports In the actual implementation, Phi(nsupports,
+         * nsupports) is the diagonal matrix & each diagonal element is the phi
+         * evaluated at each source points
+         *
+         * step 1: evaluate the phi vector with the original dimension
+         */
+
+        Kokkos::parallel_for(
+          Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
+            compute_phi_vector(target_point, local_source_points, j,
+                               support.radii2[league_rank], rbf_func,
+                               phi_vector);
+          });
+
+        team.team_barrier();
+
+        /** support_values(nsupports) (or known rhs vector b) is the vector of
+         * the quantity that we want interpolate
+         *
+         *
+         * step 4: find local supports function values
+         */
+        Kokkos::parallel_for(
+          Kokkos::TeamThreadRange(team, nsupports), [=](const int j) {
+            support_values(j) =
+              source_values[support.supports_idx[start_ptr + j]];
+            OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)),
+                                 "ERROR: NaN found: at support %d\n", j);
+          });
+
+        team.team_barrier();
+
+        /**
+         *
+         * the local_source_points is of the type ScratchMatView with
+         * coordinates information; row is number of local supports
+         * & column is dim
+         *
+         * step 2: normalize local source supports and target point
+         */
+
+        //      normalize_supports(team, target_point, local_source_points);
+
+        //      team.team_barrier();
+        /**
+         *
+         * this can evaluate monomial basis vector for any degree of polynomial
+         * step 3: call basis vector evaluation function (eval_basis_vector);
+         */
+        eval_basis_vector(slice_length, target_point, target_basis_vector);
+
+        /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is
+         * created with the basis vector of source supports stacking on top of
+         * each other
+         *
+         * step 4: create vandermonde matrix
+         */
+        Kokkos::parallel_for(
+          Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
+            create_vandermonde_matrix(local_source_points, j, slice_length,
+                                      vandermonde_matrix);
+          });
+
+        team.team_barrier();
+
+        OMEGA_H_CHECK_PRINTF(
+          support.radii2[league_rank] > 0,
+          "ERROR: radius2 has to be positive but found to be %.16f\n",
+          support.radii2[league_rank]);
+
+        // convert normal equation to Ax = b where A is a square matrix
+        //      auto result = convert_normal_equation(vandermonde_matrix,
+        //      phi_vector,
+        //                                            support_values, team);
+
+        //      team.team_barrier();
+
+        // It stores the solution in rhs vector itself
+        //      solve_matrix(result.square_matrix, result.transformed_rhs,
+        //      team);
+
+        //      solve_weighted_matrix_serial_qr(team, league_rank, phi_vector,
+        //                                      vandermonde_matrix,
+        //                                      support_values);
+
+        solve_matrix_serial_svd(team, league_rank, phi_vector,
+                                vandermonde_matrix, support_values);
+        team.team_barrier();
+
+        double target_value = KokkosBlas::Experimental::dot(
+          team, solution_coefficients, target_basis_vector);
+
+        if (team.team_rank() == 0) {
+          OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",
+                               league_rank);
+          approx_target_values[league_rank] = target_value;
+        }
+      });
+  }
 
-  return interpolated_values;
-}
+  /**
+   * @brief \overload
+   * Maps the data from source mesh to target mesh
+   *
+   * @param source_values Read array of Source field values
+   * @param source_coordinates Read array of the coordinates of control points
+   * of source field
+   * @param target_coordinates Read array of the coordinates of control points
+   * of target field
+   * @param support The object that enpasulates support info
+   * @param dim The dimension of the simulations
+   * @param degree The degree of the interpolation order
+   * @param rbf_func The radial basis function choice
+   * @return  Write array of the interpolated field in target mesh
+   *
+   *
+   */
+  template <typename Func>
+  Write<Real> mls_interpolation(
+    const Reals source_values, const Reals source_coordinates,
+    const Reals target_coordinates, const SupportResults& support,
+    const LO& dim, const LO& degree, Func rbf_func)
+  {
+    const auto nsources = source_coordinates.size() / dim;
+
+    const auto ntargets = target_coordinates.size() / dim;
+
+    RealConstDefaultScalarArrayView source_values_array_view(
+      source_values.data(), source_values.size());
+
+    RealConstDefaultScalarArrayView source_coordinates_array_view(
+      source_coordinates.data(), source_coordinates.size());
+
+    RealConstDefaultScalarArrayView target_coordinates_array_view(
+      target_coordinates.data(), target_coordinates.size());
+
+    RealDefaultScalarArrayView radii2_array_view(support.radii2.data(),
+                                                 support.radii2.size());
+
+    Write<Real> interpolated_values(ntargets, 0, "approximated target values");
+
+    RealDefaultScalarArrayView interpolated_values_array_view(
+      interpolated_values.data(), interpolated_values.size());
+
+    mls_interpolation(source_values_array_view, source_coordinates_array_view,
+                      target_coordinates_array_view, support, dim, degree,
+                      rbf_func, interpolated_values_array_view);
+
+    return interpolated_values;
+  }
 
 } // namespace detail
 } // namespace pcms
diff --git a/src/pcms/interpolator/pcms_interpolator_array_ops.hpp b/src/pcms/interpolator/pcms_interpolator_array_ops.hpp
deleted file mode 100644
index 67291dc4..00000000
--- a/src/pcms/interpolator/pcms_interpolator_array_ops.hpp
+++ /dev/null
@@ -1,28 +0,0 @@
-#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP
-#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP
-
-#include "pcms_interpolator_aliases.hpp"
-#include <Omega_h_array_ops.hpp>
-#include <cmath>
-#include <Omega_h_fail.hpp>
-
-namespace pcms
-{
-KOKKOS_INLINE_FUNCTION
-void find_sq_root_each(member_type team, ScratchVecView& array)
-{
-  int size = array.size();
-
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) {
-    OMEGA_H_CHECK_PRINTF(
-      array(i) >= 0,
-      "[Error:] Square root of a negative number is invalid!\n"
-      "value is %12.6f\n",
-      array(i));
-    array(i) = sqrt(array(i));
-  });
-}
-
-} // namespace pcms
-
-#endif
diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
new file mode 100644
index 00000000..cd0fdc1f
--- /dev/null
+++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
@@ -0,0 +1,148 @@
+#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP
+#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP
+
+#include "pcms_interpolator_aliases.hpp"
+#include <Omega_h_array_ops.hpp>
+#include <cmath>
+#include <Omega_h_fail.hpp>
+
+namespace pcms
+{
+namespace detail
+{
+/**
+ * @brief Fills the kokkos scratch view
+ *
+ * @param value The value to be populated
+ * @param team The team member
+ * @param matrix The scratch matrix
+ *
+ */
+KOKKOS_INLINE_FUNCTION
+void fill(double value, member_type team, ScratchMatView matrix)
+{
+
+  int row = matrix.extent(0);
+  int col = matrix.extent(1);
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) {
+    for (int k = 0; k < col; ++k) {
+      matrix(j, k) = value;
+    }
+  });
+}
+
+/**
+ * @brief Fills the kokkos scratch view
+ *
+ * @param value The value to be populated
+ * @param team The team member
+ * @param vector The scratch vector
+ *
+ */
+KOKKOS_INLINE_FUNCTION
+void fill(double value, member_type team, ScratchVecView vector)
+{
+
+  int size = vector.extent(0);
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size),
+                       [=](int j) { vector(j) = value; });
+}
+
+KOKKOS_INLINE_FUNCTION
+void find_sq_root_each(member_type team, ScratchVecView& array)
+{
+  int size = array.size();
+
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) {
+    OMEGA_H_CHECK_PRINTF(
+      array(i) >= 0,
+      "[Error:] Square root of a negative number is invalid!\n"
+      "value is %12.6f\n",
+      array(i));
+    array(i) = sqrt(array(i));
+  });
+}
+
+KOKKOS_INLINE_FUNCTION
+void find_inverse_each(member_type team, ScratchVecView& array)
+{
+  int size = array.size();
+  double eps = 1e-8;
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) {
+    if (array(i) > eps) {
+      array(i) = 1 / array(i);
+    } else {
+      array(i) = 0.0;
+    }
+  });
+}
+
+KOKKOS_INLINE_FUNCTION
+ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix)
+{
+  int row = matrix.extent(0);
+  int column = matrix.extent(1);
+
+  ScratchMatView transMatrix(team.team_scratch(0), column, row);
+  fill(0.0, team, transMatrix);
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+    for (int j = 0; j < column; ++j) {
+      transMatrix(j, i) = matrix(j, i);
+    }
+  });
+  return transMatrix;
+}
+
+KOKKOS_INLINE_FUNCTION
+void fill_diagonal(member_type team, const ScratchVecView& diagonal_entries,
+                   ScratchMatView& matrix)
+{
+  int size = diagonal_entries.size();
+  OMEGA_H_CHECK(size == matrix.extent(0) || size == matrix.extent(1));
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size),
+                       [=](int i) { matrix(i, i) = diagonal_entries(i); });
+}
+
+KOKKOS_INLINE_FUNCTION
+void eval_row_scaling(member_type team, const ScratchVecView& diagonal_entries,
+                      ScratchMatView& matrix)
+{
+
+  int row = matrix.extent(0);
+  int column = matrix.extent(1);
+  int vector_size = diagonal_entries.size();
+
+  OMEGA_H_CHECK_PRINTF(
+    vector_size <= row,
+    "[ERROR]: for row scaling the size of diagonal entries vector should be "
+    "equal or less than the row of the matrix which is to be scaled\n"
+    "size of vector = %d, row of a matrix = %d\n",
+    vector_size, row);
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+    for (int j = 0; j < column; ++j) {
+      matrix(i, j) *=
+        diagonal_entries(i); // scales each row of a matrix with a corresponding
+                             // value in a digonal entries
+    });
+
+}
+
+KOKKOS_INLINE_FUNCTION
+void scale_and_adjust(member_type team, const ScratchVecView& diagonal_entries, ScratchMatView& matrixToScale, ScratchMatView& adjustedMatrix){
+    size_t rowA = matrixToScale.extent(0);
+    size_t columnA = matrixToScale.extent(1);
+
+    size_t rowB = adjustedMatrix.extent(0);
+    size_t columnB = adjustedMatrix.extent(0);
+
+    OMEGA_CHECK(columnB == rowA)
+    eval_row_scaling(team, diagonal_entries, matrixtoScale);
+    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) {
+      for (int j = 0; j < columnB; == j) {
+        adjustedMatrix(i, j) = matrixtoScale(i, j);
+      }
+    });
+}
+} // end namespace pcms
+} // end namespace detail
+#endif

From 8ec71556071ba4565c57626ebea9169059773bfc Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 11 Mar 2025 16:47:05 -0400
Subject: [PATCH 20/73] implement serial svd

---
 src/pcms/interpolator/mls_interpolation.cpp   |  22 +-
 .../interpolator/mls_interpolation_impl.hpp   | 658 +++++++++---------
 .../pcms_interpolator_view_utils.hpp          |  64 +-
 test/CMakeLists.txt                           |   1 +
 4 files changed, 385 insertions(+), 360 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index 417e2bc4..d19e9fc6 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -197,13 +197,21 @@ int calculate_scratch_shared_size(const SupportResults& support,
         min_size = nsupports;
       }
       size_t total_shared_size = 0;
-      total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size);
-      total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports);
-      total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size);
-      total_shared_size += ScratchVecView::shmem_size(basis_size);
-      total_shared_size += ScratchVecView::shmem_size(nsupports);
-      total_shared_size += ScratchMatView::shmem_size(nsupports, dim);
-      total_shared_size += ScratchMatView::shmem_size(nsupports, nsupports);
+      total_shared_size +=
+        ScratchMatView::shmem_size(basis_size, basis_size); // Vt
+      total_shared_size +=
+        ScratchMatView::shmem_size(basis_size, nsupports) * 2; // temp_matrix
+      total_shared_size +=
+        ScratchMatView::shmem_size(nsupports, basis_size); // vandermonde matrix
+      total_shared_size +=
+        ScratchVecView::shmem_size(basis_size) *
+        3; // sigma, target_basis_vector, solution_coefficients
+      total_shared_size += ScratchVecView::shmem_size(nsupports) *
+                           3; // work, phi_vector, support_values
+      total_shared_size +=
+        ScratchMatView::shmem_size(nsupports, dim); // local_source_points
+      total_shared_size +=
+        ScratchMatView::shmem_size(nsupports, nsupports) * 2; // U, Ut
       // total_shared_size += ScratchVecView::shmem_size(max_size);
       // total_shared_size += ScratchVecView::shmem_size(min_size);
       shmem_each_team(i) = total_shared_size;
diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 210b4dfb..f4eb6c26 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -357,11 +357,11 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix,
   int m = matrix.extent(0);
   int n = matrix.extent(1);
 
-  ScratchMatView scaled_matrix(team.team_scratch(0), n, m);
+  ScratchMatView scaled_matrix(team.team_scratch(1), n, m);
 
-  ScratchMatView square_matrix(team.team_scratch(0), n, n);
+  ScratchMatView square_matrix(team.team_scratch(1), n, n);
 
-  ScratchVecView transformed_rhs(team.team_scratch(0), n);
+  ScratchVecView transformed_rhs(team.team_scratch(1), n);
 
   // performing P^T Q
   Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) {
@@ -428,50 +428,55 @@ void solve_matrix(const ScratchMatView& square_matrix,
 }
 
 KOKKOS_INLINE_FUNCTION
-void solve_matrix_serial_svd(member_type team, int league_rank,
-                             const ScratchVecView& rhs_values,
-                             ScratchMatView& matrix,
-                             ScratchMatView& solution_vector)
+void solve_matrix_serial_svd(member_type team, const ScratchVecView& weight,
+                             ScratchVecView rhs_values, ScratchMatView matrix,
+                             ScratchVecView solution_vector)
 {
 
   int row = matrix.extent(0);
 
   int column = matrix.extent(1);
 
-  int weight_size = solution.size();
+  int weight_size = weight.size();
 
+  OMEGA_H_CHECK_PRINTF(
+    weight_size == row,
+    "the size of the weight vector should be equal to the row of the matrix\n"
+    "weight vector size = %d, row of matrix = %d\n",
+    weight_size, row);
   // find_sq_root_each(team, weight);
 
-  // Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
-  //   eval_row_scaling(team, league_rank, weight, matrix);
-  // });
-  //
+  eval_row_scaling(team, weight, matrix);
+
+  team.team_barrier();
+
+  eval_rhs_scaling(team, weight, rhs_values);
   // A = UEV^T
   // initilize U (orthogonal matrices)  array
-  ScratchMatView U(team.team_scratch(0), row, row);
+  ScratchMatView U(team.team_scratch(1), row, row);
   fill(-5.0, team, U);
 
   // initialize Vt
-  ScratchMatView Vt(team.team_scratch(0), column, column);
+  ScratchMatView Vt(team.team_scratch(1), column, column);
   fill(-5.0, team, Vt);
 
-  int maxrank = std::min(row, column);
+  // int maxrank = std::min(row, column);
 
-  ScratchVecView sigma(team.team_scratch(0), maxrank);
+  ScratchVecView sigma(team.team_scratch(1), column);
   fill(-5.0, team, sigma);
 
-  ScratchVecView work(team.team_scratch(0), std::max(row, column));
+  ScratchVecView work(team.team_scratch(1), row);
   fill(-5.0, team, work);
 
   // sigma_mat_inv
-  ScratchMatView temp_matrix(team.team_scratch(0), column, row);
+  ScratchMatView temp_matrix(team.team_scratch(1), column, row);
   fill(0, team, temp_matrix);
 
-  ScratchMatView sigmaInvUtMul(team.team_scratch(0), column, row);
+  ScratchMatView sigmaInvUtMul(team.team_scratch(1), column, row);
   fill(0, team, sigmaInvUtMul);
 
   // for serial run
-  if (league_rank == 0) {
+  if (team.league_rank() == 0) {
     KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U,
                                      sigma, Vt, work);
     find_inverse_each(team, sigma);
@@ -498,337 +503,326 @@ void solve_matrix_serial_svd(member_type team, int league_rank,
       KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, sigmaInvUtMul, rhs_values,
                                                  0.0, solution_vector);
   }
+}
+KOKKOS_INLINE_FUNCTION
+void solve_weighted_matrix_serial_qr(const member_type& team,
+                                     const int league_rank,
+                                     ScratchVecView weight,
+                                     ScratchMatView& matrix,
+                                     ScratchVecView& rhs_values)
+{
 
-  KOKKOS_INLINE_FUNCTION
-  void solve_weighted_matrix_serial_qr(
-    member_type team, int league_rank, ScratchVecView& weight,
-    ScratchMatView& matrix, ScratchVecView& rhs_values)
-  {
-
-    int row = matrix.extent(0);
+  int row = matrix.extent(0);
 
-    int column = matrix.extent(1);
+  int column = matrix.extent(1);
 
-    int weight_size = weight.size();
+  int weight_size = weight.size();
 
-    find_sq_root_each(team, weight);
+  find_sq_root_each(team, weight);
 
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
-      eval_row_scaling(team, league_rank, weight, matrix);
-    });
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row),
+                       [=](int i) { eval_row_scaling(team, weight, matrix); });
 
-    // initilize tau array
-    ScratchVecView tau(team.team_scratch(0), column);
-    fill(0.0, team, tau);
+  // initilize tau array
+  ScratchVecView tau(team.team_scratch(1), column);
+  fill(0.0, team, tau);
 
-    // initialize work array
-    ScratchVecView work(team.team_scratch(0), column);
-    fill(0.0, team, work);
+  // initialize work array
+  ScratchVecView work(team.team_scratch(1), column);
+  fill(0.0, team, work);
 
-    // for serial run
-    if (league_rank == 0) {
+  // for serial run
+  if (league_rank == 0) {
 
-      // compute QR factorization of matrix and stores the R in A and
-      // tau is used to recover Q
-      KokkosBatched::SerialQR<KokkosBlas::Algo::QR::Unblocked>::invoke(
-        matrix, tau, work);
+    // compute QR factorization of matrix and stores the R in A and
+    // tau is used to recover Q
+    KokkosBatched::SerialQR<KokkosBlas::Algo::QR::Unblocked>::invoke(matrix,
+                                                                     tau, work);
 
-      // b = Q^{T}b
+    // b = Q^{T}b
 
-      KokkosBatched::SerialApplyQ<
-        KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
-        KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values,
-                                                     work);
+    KokkosBatched::SerialApplyQ<
+      KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
+      KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values,
+                                                   work);
 
-      // b = R^{-1}b
-      KokkosBatched::SerialTrsv<
-        KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose,
-        KokkosBatched::Diag::NonUnit,
-        KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values);
-    }
+    // b = R^{-1}b
+    KokkosBatched::SerialTrsv<
+      KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose,
+      KokkosBatched::Diag::NonUnit,
+      KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values);
   }
+}
 
-  /**
-   * @brief Maps the data from source mesh to target mesh
-   *
-   * @param source_values Scalar array view of source field values
-   * @param source_coordinates Scalar array view of the coordinates of control
-   * points of source field
-   * @param target_coordinates Scalar array view of the coordinates of control
-   * points of target field
-   * @param support The object that enpasulates support info
-   * @param dim The dimension of the simulations
-   * @param degree The degree of the interpolation order
-   * @param rbf_func The radial basis function choice
-   * @param[in/out] Scalar array view of the interpolated field in target mesh
-   *
-   *
-   */
-  template <typename Func>
-  void mls_interpolation(RealConstDefaultScalarArrayView source_values,
-                         RealConstDefaultScalarArrayView source_coordinates,
-                         RealConstDefaultScalarArrayView target_coordinates,
-                         const SupportResults& support, const LO& dim,
-                         const LO& degree, Func rbf_func,
-                         RealDefaultScalarArrayView approx_target_values)
-  {
-    PCMS_FUNCTION_TIMER;
-    static_assert(std::is_invocable_r_v<double, Func, double, double>,
-                  "rbf_func, takes radius and cutoff, returns weight");
-    static_assert(!std::is_pointer_v<Func>,
-                  "function pointer will fail in GPU execution context");
-
-    int nsources = source_coordinates.size() / dim;
-
-    OMEGA_H_CHECK_PRINTF(
-      source_values.size() == nsources,
-      "[ERROR] The size of the source values and source coordinates is not "
-      "same. "
-      "The current sizes are :\n"
-      "source_values_size = %d, source_coordinates_size = %d\n",
-      source_values.size(), nsources);
-
-    const auto ntargets = target_coordinates.size() / dim;
-
-    OMEGA_H_CHECK_PRINTF(approx_target_values.size() == ntargets,
-                         "[ERROR] The size of the approx target values and the "
-                         "number of targets is "
-                         "not same. The current numbers are :\n"
-                         "approx_target_values = %d, ntargets = %d\n",
-                         approx_target_values.size(), ntargets);
-
-    IntHostMatView host_slice_length(
-      "stores slice length of  polynomial basis in host", degree, dim);
-
-    Kokkos::deep_copy(host_slice_length, 0);
-
-    calculate_basis_slice_lengths(host_slice_length);
-
-    auto basis_size = calculate_basis_vector_size(host_slice_length);
-
-    IntDeviceMatView slice_length(
-      "stores slice length of polynomial basis in device", degree, dim);
-
-    auto slice_length_hd = Kokkos::create_mirror_view(slice_length);
-    Kokkos::deep_copy(slice_length_hd, host_slice_length);
-    Kokkos::deep_copy(slice_length, slice_length_hd);
-
-    int shared_size =
-      calculate_scratch_shared_size(support, ntargets, basis_size, dim);
-
-    team_policy tp(ntargets, Kokkos::AUTO);
-
-    int scratch_size = tp.scratch_size_max(0);
-    printf("Scratch Size = %d\n", scratch_size);
-    printf("Shared Size = %d\n", shared_size);
-    PCMS_ALWAYS_ASSERT(scratch_size > shared_size);
-
-    printf("does it fail or below\n");
-    // calculates the interpolated values
-    Kokkos::parallel_for(
-      "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)),
-      KOKKOS_LAMBDA(const member_type& team) {
-        int league_rank = team.league_rank();
-        int start_ptr = support.supports_ptr[league_rank];
-        int end_ptr = support.supports_ptr[league_rank + 1];
-        int nsupports = end_ptr - start_ptr;
-
-        // local_source_point stores the coordinates of source supports of a
-        // given target
-        ScratchMatView local_source_points(team.team_scratch(0), nsupports,
-                                           dim);
-
-        // rbf function values of source supports Phi(n,n)
-        ScratchVecView phi_vector(team.team_scratch(0), nsupports);
-
-        //  vondermonde matrix P from the vectors of basis vector of supports
-        ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports,
-                                          basis_size);
-        // stores known vector (b)
-        ScratchVecView support_values(team.team_scratch(0), nsupports);
-
-        // basis of target
-        ScratchVecView target_basis_vector(team.team_scratch(0), basis_size);
-
-        // Initialize the scratch matrices and  vectors
-        fill(0.0, team, local_source_points);
-        fill(0.0, team, vandermonde_matrix);
-        fill(0.0, team, phi_vector);
-        fill(0.0, team, support_values);
-        fill(0.0, team, target_basis_vector);
-
-        // storing the coords of local supports
-        int count = -1;
-        for (int j = start_ptr; j < end_ptr; ++j) {
-          count++;
-          auto index = support.supports_idx[j];
-          local_source_points(count, 0) = source_coordinates[index * dim];
-          local_source_points(count, 1) = source_coordinates[index * dim + 1];
-          if (dim == 3) {
-            local_source_points(count, 2) = source_coordinates[index * dim + 2];
-          }
+/**
+ * @brief Maps the data from source mesh to target mesh
+ *
+ * @param source_values Scalar array view of source field values
+ * @param source_coordinates Scalar array view of the coordinates of control
+ * points of source field
+ * @param target_coordinates Scalar array view of the coordinates of control
+ * points of target field
+ * @param support The object that enpasulates support info
+ * @param dim The dimension of the simulations
+ * @param degree The degree of the interpolation order
+ * @param rbf_func The radial basis function choice
+ * @param[in/out] Scalar array view of the interpolated field in target mesh
+ *
+ *
+ */
+template <typename Func>
+void mls_interpolation(RealConstDefaultScalarArrayView source_values,
+                       RealConstDefaultScalarArrayView source_coordinates,
+                       RealConstDefaultScalarArrayView target_coordinates,
+                       const SupportResults& support, const LO& dim,
+                       const LO& degree, Func rbf_func,
+                       RealDefaultScalarArrayView approx_target_values)
+{
+  PCMS_FUNCTION_TIMER;
+  static_assert(std::is_invocable_r_v<double, Func, double, double>,
+                "rbf_func, takes radius and cutoff, returns weight");
+  static_assert(!std::is_pointer_v<Func>,
+                "function pointer will fail in GPU execution context");
+
+  int nsources = source_coordinates.size() / dim;
+
+  OMEGA_H_CHECK_PRINTF(
+    source_values.size() == nsources,
+    "[ERROR] The size of the source values and source coordinates is not "
+    "same. "
+    "The current sizes are :\n"
+    "source_values_size = %d, source_coordinates_size = %d\n",
+    source_values.size(), nsources);
+
+  const auto ntargets = target_coordinates.size() / dim;
+
+  OMEGA_H_CHECK_PRINTF(approx_target_values.size() == ntargets,
+                       "[ERROR] The size of the approx target values and the "
+                       "number of targets is "
+                       "not same. The current numbers are :\n"
+                       "approx_target_values = %d, ntargets = %d\n",
+                       approx_target_values.size(), ntargets);
+
+  IntHostMatView host_slice_length(
+    "stores slice length of  polynomial basis in host", degree, dim);
+
+  Kokkos::deep_copy(host_slice_length, 0);
+
+  calculate_basis_slice_lengths(host_slice_length);
+
+  auto basis_size = calculate_basis_vector_size(host_slice_length);
+
+  IntDeviceMatView slice_length(
+    "stores slice length of polynomial basis in device", degree, dim);
+
+  auto slice_length_hd = Kokkos::create_mirror_view(slice_length);
+  Kokkos::deep_copy(slice_length_hd, host_slice_length);
+  Kokkos::deep_copy(slice_length, slice_length_hd);
+
+  int shared_size =
+    calculate_scratch_shared_size(support, ntargets, basis_size, dim);
+
+  team_policy tp(ntargets, Kokkos::AUTO);
+
+  int scratch_size = tp.scratch_size_max(1);
+  printf("Scratch Size = %d\n", scratch_size);
+  printf("Shared Size = %d\n", shared_size);
+  PCMS_ALWAYS_ASSERT(scratch_size > shared_size);
+
+  printf("does it fail or below\n");
+  // calculates the interpolated values
+  Kokkos::parallel_for(
+    "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(shared_size)),
+    KOKKOS_LAMBDA(const member_type& team) {
+      int league_rank = team.league_rank();
+      int start_ptr = support.supports_ptr[league_rank];
+      int end_ptr = support.supports_ptr[league_rank + 1];
+      int nsupports = end_ptr - start_ptr;
+
+      // local_source_point stores the coordinates of source supports of a
+      // given target
+      ScratchMatView local_source_points(team.team_scratch(1), nsupports, dim);
+
+      // rbf function values of source supports Phi(n,n)
+      ScratchVecView phi_vector(team.team_scratch(1), nsupports);
+
+      //  vondermonde matrix P from the vectors of basis vector of supports
+      ScratchMatView vandermonde_matrix(team.team_scratch(1), nsupports,
+                                        basis_size);
+      // stores known vector (b)
+      ScratchVecView support_values(team.team_scratch(1), nsupports);
+
+      // basis of target
+      ScratchVecView target_basis_vector(team.team_scratch(1), basis_size);
+
+      // solution coefficients (solution vector)
+      //
+      ScratchVecView solution_coefficients(team.team_scratch(1), basis_size);
+      // Initialize the scratch matrices and  vectors
+      fill(0.0, team, local_source_points);
+      fill(0.0, team, vandermonde_matrix);
+      fill(0.0, team, phi_vector);
+      fill(0.0, team, support_values);
+      fill(0.0, team, target_basis_vector);
+      fill(0.0, team, solution_coefficients);
+
+      // storing the coords of local supports
+      int count = -1;
+      for (int j = start_ptr; j < end_ptr; ++j) {
+        count++;
+        auto index = support.supports_idx[j];
+        local_source_points(count, 0) = source_coordinates[index * dim];
+        local_source_points(count, 1) = source_coordinates[index * dim + 1];
+        if (dim == 3) {
+          local_source_points(count, 2) = source_coordinates[index * dim + 2];
         }
+      }
 
-        // evaluates the basis vector of a given target point
-        Coord target_point;
-        target_point.x = target_coordinates[league_rank * dim];
-        target_point.y = target_coordinates[league_rank * dim + 1];
+      // evaluates the basis vector of a given target point
+      Coord target_point;
+      target_point.x = target_coordinates[league_rank * dim];
+      target_point.y = target_coordinates[league_rank * dim + 1];
 
-        if (dim == 3) {
-          target_point.z = target_coordinates[league_rank * dim + 2];
-        }
+      if (dim == 3) {
+        target_point.z = target_coordinates[league_rank * dim + 2];
+      }
 
-        /** phi(nsupports) is the array of rbf functions evaluated at the
-         * source supports In the actual implementation, Phi(nsupports,
-         * nsupports) is the diagonal matrix & each diagonal element is the phi
-         * evaluated at each source points
-         *
-         * step 1: evaluate the phi vector with the original dimension
-         */
-
-        Kokkos::parallel_for(
-          Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
-            compute_phi_vector(target_point, local_source_points, j,
-                               support.radii2[league_rank], rbf_func,
-                               phi_vector);
-          });
-
-        team.team_barrier();
-
-        /** support_values(nsupports) (or known rhs vector b) is the vector of
-         * the quantity that we want interpolate
-         *
-         *
-         * step 4: find local supports function values
-         */
-        Kokkos::parallel_for(
-          Kokkos::TeamThreadRange(team, nsupports), [=](const int j) {
-            support_values(j) =
-              source_values[support.supports_idx[start_ptr + j]];
-            OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)),
-                                 "ERROR: NaN found: at support %d\n", j);
-          });
-
-        team.team_barrier();
-
-        /**
-         *
-         * the local_source_points is of the type ScratchMatView with
-         * coordinates information; row is number of local supports
-         * & column is dim
-         *
-         * step 2: normalize local source supports and target point
-         */
-
-        //      normalize_supports(team, target_point, local_source_points);
-
-        //      team.team_barrier();
-        /**
-         *
-         * this can evaluate monomial basis vector for any degree of polynomial
-         * step 3: call basis vector evaluation function (eval_basis_vector);
-         */
-        eval_basis_vector(slice_length, target_point, target_basis_vector);
-
-        /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is
-         * created with the basis vector of source supports stacking on top of
-         * each other
-         *
-         * step 4: create vandermonde matrix
-         */
-        Kokkos::parallel_for(
-          Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
-            create_vandermonde_matrix(local_source_points, j, slice_length,
-                                      vandermonde_matrix);
-          });
-
-        team.team_barrier();
-
-        OMEGA_H_CHECK_PRINTF(
-          support.radii2[league_rank] > 0,
-          "ERROR: radius2 has to be positive but found to be %.16f\n",
-          support.radii2[league_rank]);
-
-        // convert normal equation to Ax = b where A is a square matrix
-        //      auto result = convert_normal_equation(vandermonde_matrix,
-        //      phi_vector,
-        //                                            support_values, team);
-
-        //      team.team_barrier();
-
-        // It stores the solution in rhs vector itself
-        //      solve_matrix(result.square_matrix, result.transformed_rhs,
-        //      team);
-
-        //      solve_weighted_matrix_serial_qr(team, league_rank, phi_vector,
-        //                                      vandermonde_matrix,
-        //                                      support_values);
-
-        solve_matrix_serial_svd(team, league_rank, phi_vector,
-                                vandermonde_matrix, support_values);
-        team.team_barrier();
-
-        double target_value = KokkosBlas::Experimental::dot(
-          team, solution_coefficients, target_basis_vector);
-
-        if (team.team_rank() == 0) {
-          OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",
-                               league_rank);
-          approx_target_values[league_rank] = target_value;
-        }
-      });
-  }
+      /** phi(nsupports) is the array of rbf functions evaluated at the
+       * source supports In the actual implementation, Phi(nsupports,
+       * nsupports) is the diagonal matrix & each diagonal element is the phi
+       * evaluated at each source points
+       *
+       * step 1: evaluate the phi vector with the original dimension
+       */
+
+      Kokkos::parallel_for(
+        Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
+          compute_phi_vector(target_point, local_source_points, j,
+                             support.radii2[league_rank], rbf_func, phi_vector);
+        });
+
+      team.team_barrier();
+
+      /** support_values(nsupports) (or known rhs vector b) is the vector of
+       * the quantity that we want interpolate
+       *
+       *
+       * step 4: find local supports function values
+       */
+      Kokkos::parallel_for(
+        Kokkos::TeamThreadRange(team, nsupports), [=](const int j) {
+          support_values(j) =
+            source_values[support.supports_idx[start_ptr + j]];
+          OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)),
+                               "ERROR: NaN found: at support %d\n", j);
+        });
+
+      team.team_barrier();
+
+      /**
+       *
+       * the local_source_points is of the type ScratchMatView with
+       * coordinates information; row is number of local supports
+       * & column is dim
+       *
+       * step 2: normalize local source supports and target point
+       */
+
+      normalize_supports(team, target_point, local_source_points);
+
+      team.team_barrier();
+      /**
+       *
+       * this can evaluate monomial basis vector for any degree of polynomial
+       * step 3: call basis vector evaluation function (eval_basis_vector);
+       */
+      eval_basis_vector(slice_length, target_point, target_basis_vector);
+
+      /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is
+       * created with the basis vector of source supports stacking on top of
+       * each other
+       *
+       * step 4: create vandermonde matrix
+       */
+      Kokkos::parallel_for(
+        Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
+          create_vandermonde_matrix(local_source_points, j, slice_length,
+                                    vandermonde_matrix);
+        });
+
+      team.team_barrier();
+
+      OMEGA_H_CHECK_PRINTF(
+        support.radii2[league_rank] > 0,
+        "ERROR: radius2 has to be positive but found to be %.16f\n",
+        support.radii2[league_rank]);
+
+      solve_matrix_serial_svd(team, phi_vector, support_values,
+                              vandermonde_matrix, solution_coefficients);
+      team.team_barrier();
+
+      double target_value = KokkosBlas::Experimental::dot(
+        team, solution_coefficients, target_basis_vector);
+
+      if (team.team_rank() == 0) {
+        OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",
+                             league_rank);
+        approx_target_values[league_rank] = target_value;
+      }
+    });
+}
 
-  /**
-   * @brief \overload
-   * Maps the data from source mesh to target mesh
-   *
-   * @param source_values Read array of Source field values
-   * @param source_coordinates Read array of the coordinates of control points
-   * of source field
-   * @param target_coordinates Read array of the coordinates of control points
-   * of target field
-   * @param support The object that enpasulates support info
-   * @param dim The dimension of the simulations
-   * @param degree The degree of the interpolation order
-   * @param rbf_func The radial basis function choice
-   * @return  Write array of the interpolated field in target mesh
-   *
-   *
-   */
-  template <typename Func>
-  Write<Real> mls_interpolation(
-    const Reals source_values, const Reals source_coordinates,
-    const Reals target_coordinates, const SupportResults& support,
-    const LO& dim, const LO& degree, Func rbf_func)
-  {
-    const auto nsources = source_coordinates.size() / dim;
-
-    const auto ntargets = target_coordinates.size() / dim;
-
-    RealConstDefaultScalarArrayView source_values_array_view(
-      source_values.data(), source_values.size());
-
-    RealConstDefaultScalarArrayView source_coordinates_array_view(
-      source_coordinates.data(), source_coordinates.size());
-
-    RealConstDefaultScalarArrayView target_coordinates_array_view(
-      target_coordinates.data(), target_coordinates.size());
-
-    RealDefaultScalarArrayView radii2_array_view(support.radii2.data(),
-                                                 support.radii2.size());
-
-    Write<Real> interpolated_values(ntargets, 0, "approximated target values");
-
-    RealDefaultScalarArrayView interpolated_values_array_view(
-      interpolated_values.data(), interpolated_values.size());
-
-    mls_interpolation(source_values_array_view, source_coordinates_array_view,
-                      target_coordinates_array_view, support, dim, degree,
-                      rbf_func, interpolated_values_array_view);
-
-    return interpolated_values;
-  }
+/**
+ * @brief \overload
+ * Maps the data from source mesh to target mesh
+ *
+ * @param source_values Read array of Source field values
+ * @param source_coordinates Read array of the coordinates of control points
+ * of source field
+ * @param target_coordinates Read array of the coordinates of control points
+ * of target field
+ * @param support The object that enpasulates support info
+ * @param dim The dimension of the simulations
+ * @param degree The degree of the interpolation order
+ * @param rbf_func The radial basis function choice
+ * @return  Write array of the interpolated field in target mesh
+ *
+ *
+ */
+template <typename Func>
+Write<Real> mls_interpolation(const Reals source_values,
+                              const Reals source_coordinates,
+                              const Reals target_coordinates,
+                              const SupportResults& support, const LO& dim,
+                              const LO& degree, Func rbf_func)
+{
+  const auto nsources = source_coordinates.size() / dim;
+
+  const auto ntargets = target_coordinates.size() / dim;
+
+  RealConstDefaultScalarArrayView source_values_array_view(
+    source_values.data(), source_values.size());
+
+  RealConstDefaultScalarArrayView source_coordinates_array_view(
+    source_coordinates.data(), source_coordinates.size());
+
+  RealConstDefaultScalarArrayView target_coordinates_array_view(
+    target_coordinates.data(), target_coordinates.size());
+
+  RealDefaultScalarArrayView radii2_array_view(support.radii2.data(),
+                                               support.radii2.size());
+
+  Write<Real> interpolated_values(ntargets, 0, "approximated target values");
+
+  RealDefaultScalarArrayView interpolated_values_array_view(
+    interpolated_values.data(), interpolated_values.size());
+
+  mls_interpolation(source_values_array_view, source_coordinates_array_view,
+                    target_coordinates_array_view, support, dim, degree,
+                    rbf_func, interpolated_values_array_view);
+
+  return interpolated_values;
+}
 
 } // namespace detail
 } // namespace pcms
diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
index cd0fdc1f..202f7ec8 100644
--- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
@@ -87,7 +87,7 @@ ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix)
   fill(0.0, team, transMatrix);
   Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
     for (int j = 0; j < column; ++j) {
-      transMatrix(j, i) = matrix(j, i);
+      transMatrix(i, j) = matrix(j, i);
     }
   });
   return transMatrix;
@@ -104,8 +104,8 @@ void fill_diagonal(member_type team, const ScratchVecView& diagonal_entries,
 }
 
 KOKKOS_INLINE_FUNCTION
-void eval_row_scaling(member_type team, const ScratchVecView& diagonal_entries,
-                      ScratchMatView& matrix)
+void eval_row_scaling(member_type team, ScratchVecView diagonal_entries,
+                      ScratchMatView matrix)
 {
 
   int row = matrix.extent(0);
@@ -122,27 +122,49 @@ void eval_row_scaling(member_type team, const ScratchVecView& diagonal_entries,
     for (int j = 0; j < column; ++j) {
       matrix(i, j) *=
         diagonal_entries(i); // scales each row of a matrix with a corresponding
-                             // value in a digonal entries
-    });
+    } // value in a digonal entries
+  });
+}
+
+KOKKOS_INLINE_FUNCTION
+void eval_rhs_scaling(member_type team, ScratchVecView diagonal_entries,
+                      ScratchVecView rhs_values)
+{
+
+  int weight_size = diagonal_entries.size();
+  int rhs_size = rhs_values.size();
 
+  OMEGA_H_CHECK_PRINTF(
+    weight_size == rhs_size,
+    "[ERROR]: for row scaling the size of diagonal entries vector should be "
+    "equal or less than the row of the matrix which is to be scaled\n"
+    "size of weight vector = %d, size of a rhs vector = %d\n",
+    weight_size, rhs_size);
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, weight_size), [=](int i) {
+    rhs_values(i) *= diagonal_entries(
+      i); // scales each element  of a rhs with a corresponding weights
+  });
 }
 
 KOKKOS_INLINE_FUNCTION
-void scale_and_adjust(member_type team, const ScratchVecView& diagonal_entries, ScratchMatView& matrixToScale, ScratchMatView& adjustedMatrix){
-    size_t rowA = matrixToScale.extent(0);
-    size_t columnA = matrixToScale.extent(1);
-
-    size_t rowB = adjustedMatrix.extent(0);
-    size_t columnB = adjustedMatrix.extent(0);
-
-    OMEGA_CHECK(columnB == rowA)
-    eval_row_scaling(team, diagonal_entries, matrixtoScale);
-    Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) {
-      for (int j = 0; j < columnB; == j) {
-        adjustedMatrix(i, j) = matrixtoScale(i, j);
-      }
-    });
+void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries,
+                      ScratchMatView& matrixToScale,
+                      ScratchMatView& adjustedMatrix)
+{
+  size_t rowA = matrixToScale.extent(0);
+  size_t colA = matrixToScale.extent(1);
+
+  size_t rowB = adjustedMatrix.extent(0);
+  size_t colB = adjustedMatrix.extent(1);
+
+  OMEGA_H_CHECK(colB == rowA);
+  eval_row_scaling(team, diagonal_entries, matrixToScale);
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) {
+    for (int j = 0; j < colB; ++j) {
+      adjustedMatrix(i, j) = matrixToScale(i, j);
+    }
+  });
 }
-} // end namespace pcms
-} // end namespace detail
+} // namespace detail
+} // namespace pcms
 #endif
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 4ea80c35..188dc7ec 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -114,6 +114,7 @@ if(Catch2_FOUND)
 		  test_normalisation.cpp
 		  test_spr_meshfields.cpp
 		  test_serial_qr.cpp
+		  test_svd_serial.cpp
               )
   endif ()
   add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES})

From 646d89c9afa6c7ece908950f512d89b573e04340 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 11 Mar 2025 16:47:44 -0400
Subject: [PATCH 21/73] add test for svd

---
 test/test_svd_serial.cpp | 290 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 290 insertions(+)
 create mode 100644 test/test_svd_serial.cpp

diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp
new file mode 100644
index 00000000..b86a9e4a
--- /dev/null
+++ b/test/test_svd_serial.cpp
@@ -0,0 +1,290 @@
+#include <catch2/catch_test_macros.hpp>
+#include <catch2/catch_approx.hpp>
+#include <pcms/interpolator/mls_interpolation_impl.hpp>
+#include <pcms/interpolator/pcms_interpolator_aliases.hpp>
+#include <pcms/interpolator/pcms_interpolator_view_utils.hpp>
+#include <catch2/matchers/catch_matchers_floating_point.hpp>
+#include "KokkosBatched_SVD_Decl.hpp"
+#include "KokkosBatched_SVD_Serial_Impl.hpp"
+#include <KokkosBlas2_serial_gemv_impl.hpp>
+#include <KokkosBatched_Gemm_Decl.hpp>
+#include <vector>
+#include <iostream>
+
+using namespace std;
+using namespace Omega_h;
+using namespace pcms;
+
+TEST_CASE("test_serial_svd")
+{
+
+  constexpr int row = 3;
+  constexpr int column = 3;
+
+  double tolerance = 1e-4;
+  double A_data[row][column] = {
+    {4.0, 2.0, 3.0}, {3.0, 5.0, 1.0}, {2.0, 3.0, 6.0}};
+
+  double rhs_data[row] = {1.28571, 1.42857, 3.1428};
+
+  double expected_solution[column] = {-0.142857, 0.285714,
+                                      0.428571}; // Approximate values
+
+  SECTION("test_svd_factorization")
+  {
+    Kokkos::View<double**> result("result", row, column);
+    team_policy tp(1, Kokkos::AUTO);
+    Kokkos::parallel_for(
+      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)),
+      KOKKOS_LAMBDA(const member_type& team) {
+        ScratchMatView A(team.team_scratch(1), row, column);
+        ScratchMatView U(team.team_scratch(1), row, row);
+        ScratchMatView Vt(team.team_scratch(1), column, column);
+        ScratchVecView sigma(team.team_scratch(1), column);
+        ScratchVecView work(team.team_scratch(1), row);
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+          for (int j = 0; j < column; ++j) {
+            A(i, j) = A_data[i][j];
+          }
+        });
+
+        if (team.league_rank() == 0) {
+          KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U,
+                                           sigma, Vt, work);
+        }
+        team.team_barrier();
+        ScratchMatView sigma_mat(team.team_scratch(1), row, column);
+        ScratchMatView Usigma(team.team_scratch(1), row, column);
+        detail::fill(0.0, team, Usigma);
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+          for (int j = 0; j < column; ++j) {
+            sigma_mat(i, j) = (i == j) ? sigma(i) : 0.0;
+          }
+        });
+        team.team_barrier();
+
+        ScratchMatView reconstructedA(team.team_scratch(1), row, column);
+        if (team.league_rank() == 0) {
+          KokkosBatched::SerialGemm<
+            KokkosBatched::Trans::NoTranspose,
+            KokkosBatched::Trans::NoTranspose,
+            KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, U, sigma_mat,
+                                                          0.0, Usigma);
+
+          KokkosBatched::SerialGemm<
+            KokkosBatched::Trans::NoTranspose,
+            KokkosBatched::Trans::NoTranspose,
+            KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Usigma, Vt, 0.0,
+                                                          reconstructedA);
+        }
+
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+          for (int j = 0; j < column; ++j) {
+            result(i, j) = reconstructedA(i, j);
+          }
+        });
+      });
+    auto host_result = Kokkos::create_mirror_view(result);
+    Kokkos::deep_copy(host_result, result);
+
+    for (int i = 0; i < row; ++i) {
+      for (int j = 0; j < column; ++j) {
+        CHECK_THAT(host_result(i, j),
+                   Catch::Matchers::WithinAbs(A_data[i][j], tolerance));
+      }
+    }
+  } // end section
+
+  SECTION("test_transpose_function")
+  {
+
+    Kokkos::View<double**> result("result", row, row);
+    Kokkos::View<double**> transpose_expected("result", row, row);
+    Kokkos::deep_copy(result, 0.0);
+    Kokkos::deep_copy(transpose_expected, 0.0);
+    team_policy tp(1, Kokkos::AUTO);
+    Kokkos::parallel_for(
+      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)),
+      KOKKOS_LAMBDA(const member_type& team) {
+        ScratchMatView A(team.team_scratch(1), row, column);
+        ScratchMatView U(team.team_scratch(1), row, row);
+        ScratchMatView Vt(team.team_scratch(1), column, column);
+        ScratchVecView sigma(team.team_scratch(1), column);
+        ScratchVecView work(team.team_scratch(1), row);
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+          for (int j = 0; j < column; ++j) {
+            A(i, j) = A_data[i][j];
+          }
+        });
+
+        if (team.league_rank() == 0) {
+          KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U,
+                                           sigma, Vt, work);
+        }
+        team.team_barrier();
+        // detail::fill(0.0, team, sigma_inv_expected);
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+          for (int j = 0; j < row; ++j) {
+            transpose_expected(i, j) = U(j, i);
+          }
+        });
+        auto transposedU = detail::find_transpose(team, U);
+        team.team_barrier();
+
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+          for (int j = 0; j < row; ++j) {
+            result(i, j) = transposedU(i, j);
+          }
+        });
+      });
+    auto host_result = Kokkos::create_mirror_view(result);
+    auto host_expected = Kokkos::create_mirror_view(transpose_expected);
+    Kokkos::deep_copy(host_result, result);
+    Kokkos::deep_copy(host_expected, transpose_expected);
+
+    for (int i = 0; i < row; ++i) {
+      for (int j = 0; j < row; ++j) {
+        CHECK_THAT(host_result(i, j),
+                   Catch::Matchers::WithinAbs(host_expected(i, j), tolerance));
+      }
+    }
+
+  } // end section
+
+  SECTION("test_row_scaling_function")
+  {
+    double diagonal_entries[row] = {2.0, 0.5, -1.0};
+    double expected_result[row][column] = {
+      {8.0, 4.0, 6.0}, {1.5, 2.5, 0.5}, {-2.0, -3.0, -6.0}};
+
+    Kokkos::View<double**> result("result", row, column);
+    Kokkos::deep_copy(result, 0.0);
+    team_policy tp(1, Kokkos::AUTO);
+    Kokkos::parallel_for(
+      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)),
+      KOKKOS_LAMBDA(const member_type& team) {
+        ScratchMatView A(team.team_scratch(1), row, column);
+        ScratchVecView weight(team.team_scratch(1), row);
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+          for (int j = 0; j < column; ++j) {
+            A(i, j) = A_data[i][j];
+          }
+          weight(i) = diagonal_entries[i];
+        });
+
+        team.team_barrier();
+
+        detail::eval_row_scaling(team, weight, A);
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+          for (int j = 0; j < column; ++j) {
+            result(i, j) = A(i, j);
+          }
+        });
+      });
+    auto host_result = Kokkos::create_mirror_view(result);
+    Kokkos::deep_copy(host_result, result);
+
+    for (int i = 0; i < row; ++i) {
+      for (int j = 0; j < row; ++j) {
+        CHECK_THAT(host_result(i, j), Catch::Matchers::WithinAbs(
+                                        expected_result[i][j], tolerance));
+      }
+    }
+
+  } // end section
+
+  SECTION("test_scale_adjust_function")
+  {
+    constexpr int rowA = 6; // Matrix to scale (6x6)
+    constexpr int colA = 6;
+    constexpr int rowB = 3; // Adjusted matrix (3x6)
+    constexpr int colB = 6;
+
+    double A_data[rowA][colA] = {
+      {4.0, 2.0, 3.0, 1.0, 0.0, 5.0}, {3.0, 5.0, 1.0, 4.0, 2.0, 6.0},
+      {2.0, 3.0, 6.0, 5.0, 1.0, 7.0}, {7.0, 8.0, 9.0, 3.0, 2.0, 1.0},
+      {1.0, 3.0, 2.0, 6.0, 4.0, 5.0}, {5.0, 7.0, 3.0, 8.0, 9.0, 6.0}};
+
+    double diagonal_entries_data[rowB] = {2.0, 0.5, -1.0};
+
+    double expected_result[rowB][colB] = {{8.0, 4.0, 6.0, 2.0, 0.0, 10.0},
+                                          {1.5, 2.5, 0.5, 2.0, 1.0, 3.0},
+                                          {-2.0, -3.0, -6.0, -5.0, -1.0, -7.0}};
+
+    Kokkos::View<double**> result("result", rowB, colB);
+    Kokkos::deep_copy(result, 0.0);
+    team_policy tp(1, Kokkos::AUTO);
+    Kokkos::parallel_for(
+      tp.set_scratch_size(1, Kokkos::PerTeam(500)),
+      KOKKOS_LAMBDA(const member_type& team) {
+        ScratchMatView A(team.team_scratch(1), rowA, colA);
+        ScratchMatView result_matrix(team.team_scratch(1), rowB, colB);
+        ScratchVecView weight(team.team_scratch(1), rowB);
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowA), [=](int i) {
+          for (int j = 0; j < colA; ++j) {
+            A(i, j) = A_data[i][j];
+          }
+        });
+
+        team.team_barrier();
+
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) {
+          weight(i) = diagonal_entries_data[i];
+        });
+
+        team.team_barrier();
+
+        detail::scale_and_adjust(team, weight, A, result_matrix);
+        team.team_barrier();
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) {
+          for (int j = 0; j < colB; ++j) {
+            result(i, j) = result_matrix(i, j);
+          }
+        });
+      });
+    auto host_result = Kokkos::create_mirror_view(result);
+    Kokkos::deep_copy(host_result, result);
+
+    for (int i = 0; i < row; ++i) {
+      for (int j = 0; j < row; ++j) {
+        CHECK_THAT(host_result(i, j), Catch::Matchers::WithinAbs(
+                                        expected_result[i][j], tolerance));
+      }
+    }
+
+  } // end section
+
+  SECTION("test_svd_solver")
+  {
+    Kokkos::View<double*> result("result", column);
+    team_policy tp(1, Kokkos::AUTO);
+    Kokkos::parallel_for(
+      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(800)),
+      KOKKOS_LAMBDA(const member_type& team) {
+        ScratchMatView A(team.team_scratch(1), row, column);
+        ScratchVecView rhs(team.team_scratch(1), row);
+        ScratchVecView x(team.team_scratch(1), column);
+        detail::fill(0.0, team, x);
+
+        ScratchVecView weight(team.team_scratch(1), row);
+        detail::fill(1.0, team, weight);
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
+          for (int j = 0; j < column; ++j) {
+            A(i, j) = A_data[i][j];
+          }
+          rhs(i) = rhs_data[i];
+        });
+
+        detail::solve_matrix_serial_svd(team, weight, rhs, A, x);
+        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, column),
+                             [=](int i) { result(i) = x(i); });
+      });
+    auto host_result = Kokkos::create_mirror_view(result);
+    Kokkos::deep_copy(host_result, result);
+
+    for (int i = 0; i < column; ++i) {
+      CHECK_THAT(host_result(i),
+                 Catch::Matchers::WithinAbs(expected_solution[i], tolerance));
+    }
+  } // end section
+}

From 3496b63b37a33568ddb46ce0dfff6acd212330fd Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 11 Mar 2025 16:49:47 -0400
Subject: [PATCH 22/73] add logger

---
 .../interpolator/pcms_interpolator_logger.hpp | 126 ++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 src/pcms/interpolator/pcms_interpolator_logger.hpp

diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp
new file mode 100644
index 00000000..c0bbf66a
--- /dev/null
+++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp
@@ -0,0 +1,126 @@
+#ifndef PCMS_INTERPOLATOR_LOGGER_HPP
+#define PCMS_INTERPOLATOR_LOGGER_HPP
+
+#include "pcms_interpolator_aliases.hpp"
+
+namespace pcms
+{
+enum class LogLevel
+{
+  INFO,
+  WARNING,
+  ERROR,
+  DEBUG
+};
+
+class Logger
+{
+private:
+  size_t selected_league_rank_;
+
+public:
+  KOKKOS_INLINE_FUNCTION
+  explicit Logger(size_t selected_league_rank)
+    : selected_league_rank_(selected_league_rank)
+  {
+  }
+
+  // log level info
+  template <typename... Args>
+  KOKKOS_INLINE_FUNCTION void log(const member_type& team, const LogLevel level,
+                                  const char* fmt, Args... args)
+  {
+    if (team.league_rank() == selected_league_rank_) {
+      Kokkos::single(Kokkos::PerTeam(team), [&]() {
+        printf("[%s] (League %d) ", logLevelToString(level),
+               team.league_rank());
+        printf(fmt, args...);
+        printf("\n");
+      });
+    }
+  }
+
+  KOKKOS_INLINE_FUNCTION
+  void logStruct(const member_type& team, const LogLevel level, const Coord& p,
+                 const char* name)
+  {
+
+    if (team.league_rank() == selected_league_rank_) {
+      Kokkos::single(Kokkos::PerTeam(team), [&]() {
+        printf("[%s] (League %d) %s: ", logLevelToString(level),
+               team.league_rank(), name);
+        printf("(%12.6f, %12.6f)", p.x, p.y);
+        printf("\n");
+      });
+    }
+  }
+  // log scratch vector
+  KOKKOS_INLINE_FUNCTION
+  void logVector(const member_type& team, const LogLevel level,
+                 const ScratchVecView& vector, const char* name) const
+  {
+    if (team.league_rank() == selected_league_rank_) {
+      Kokkos::single(Kokkos::PerTeam(team), [&]() {
+        printf("[%s] (League %d) %s: ", logLevelToString(level),
+               team.league_rank(), name);
+        for (int i = 0; i < vector.size(); ++i) {
+          printf("%12.6f", vector(i));
+        }
+        printf("\n");
+      });
+    }
+  }
+
+  // log scratch vector
+  KOKKOS_INLINE_FUNCTION
+  void logMatrix(const member_type& team, const LogLevel level,
+                 const ScratchMatView& matrix, const char* name) const
+  {
+    if (team.league_rank() == selected_league_rank_) {
+      Kokkos::single(Kokkos::PerTeam(team), [&]() {
+        printf("[%s] (League %d) %s: \n", logLevelToString(level),
+               team.league_rank(), name);
+        for (int i = 0; i < matrix.extent(0); ++i) {
+          for (int j = 0; j < matrix.extent(1); ++j) {
+            printf("%12.6f", matrix(i, j));
+          }
+          printf("\n");
+        }
+
+        printf("\n");
+      });
+    }
+  }
+
+  // log scalar
+  KOKKOS_INLINE_FUNCTION
+  void logScalar(const member_type& team, const LogLevel level,
+                 const double value, const char* name) const
+  {
+    if (team.league_rank() == selected_league_rank_) {
+      Kokkos::single(Kokkos::PerTeam(team), [&]() {
+        printf("[%s] (League %d) %s: ", logLevelToString(level),
+               team.league_rank(), name);
+        printf("%12.6f", value);
+        printf("\n");
+      });
+    }
+  }
+
+private:
+  KOKKOS_INLINE_FUNCTION
+  const char* logLevelToString(LogLevel loglevel) const
+  {
+    switch (loglevel) {
+      case LogLevel::INFO: return "INFO";
+
+      case LogLevel::WARNING: return "WARNING";
+
+      case LogLevel::ERROR: return "ERROR";
+
+      case LogLevel::DEBUG: return "DEBUG";
+    }
+  }
+};
+} // end namespace pcms
+#endif

From d5a331e9de84a7b2effa84dd5bf460dbe7b09313 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 11 Mar 2025 16:51:31 -0400
Subject: [PATCH 23/73] add logger file in CMakeList

---
 src/pcms/interpolator/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt
index 10dd51a1..7d1d0178 100644
--- a/src/pcms/interpolator/CMakeLists.txt
+++ b/src/pcms/interpolator/CMakeLists.txt
@@ -9,6 +9,7 @@ set(PCMS_FIELD_TRANSFER_HEADERS
     multidimarray.hpp
     mls_interpolation.hpp
 	pcms_interpolator_view_utils.hpp
+	pcms_interpolator_logger.hpp
 )
 
 set(PCMS_FIELD_TRANSFER_SOURCES

From 861d8b35b91f1e83cf5f9641109c359b599e064e Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 14 Mar 2025 14:58:08 -0400
Subject: [PATCH 24/73] svd implementation but broken

---
 src/pcms/interpolator/mls_interpolation.cpp   |  14 +-
 src/pcms/interpolator/mls_interpolation.hpp   |   3 +-
 .../interpolator/mls_interpolation_impl.hpp   | 203 +++++++++++-------
 .../interpolator/pcms_interpolator_logger.hpp |   3 +-
 .../pcms_interpolator_view_utils.hpp          |   9 +
 5 files changed, 149 insertions(+), 83 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index d19e9fc6..ce472b34 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -24,7 +24,7 @@ struct RBF_GAUSSIAN
     // 'a' is a spreading factor/decay factor
     // the value of 'a' is higher if the data is localized
     // the value of 'a' is smaller if the data is farther
-    int a = 20;
+    int a = 2;
 
     double r = sqrt(r_sq);
     double rho = sqrt(rho_sq);
@@ -101,6 +101,12 @@ struct RBF_CONST
   }
 };
 
+struct NoOp
+{
+  OMEGA_H_INLINE
+  double operator()(double, double) const { return 1.0; }
+};
+
 Write<Real> mls_interpolation(const Reals source_values,
                               const Reals source_coordinates,
                               const Reals target_coordinates,
@@ -130,6 +136,12 @@ Write<Real> mls_interpolation(const Reals source_values,
         source_values, source_coordinates, target_coordinates, support, dim,
         degree, RBF_CONST{});
       break;
+
+    case RadialBasisFunction::NO_OP:
+      interpolated_values = detail::mls_interpolation(
+        source_values, source_coordinates, target_coordinates, support, dim,
+        degree, NoOp{});
+      break;
   }
 
   return interpolated_values;
diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp
index 17b085aa..8e8218aa 100644
--- a/src/pcms/interpolator/mls_interpolation.hpp
+++ b/src/pcms/interpolator/mls_interpolation.hpp
@@ -11,7 +11,8 @@ enum class RadialBasisFunction : LO
 {
   RBF_GAUSSIAN = 0,
   RBF_C4,
-  RBF_CONST
+  RBF_CONST,
+  NO_OP
 
 };
 
diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index f4eb6c26..16ee43d3 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -18,11 +18,12 @@
 #include "../assert.h"
 #include "../profile.h"
 #include "pcms_interpolator_view_utils.hpp"
+#include "pcms_interpolator_logger.hpp"
 
-#include <KokkosBatched_ApplyQ_Decl.hpp> //KokkosBlas::ApplyQ
-#include <KokkosBatched_QR_Decl.hpp>     //KokkosBlas::QR
-#include <KokkosBatched_Trsv_Decl.hpp>   //KokkosBlas::Trsv
-#include <KokkosBlas2_gemv.hpp>          //KokkosBlas::gemv
+// #include <KokkosBatched_ApplyQ_Decl.hpp> //KokkosBlas::ApplyQ
+// #include <KokkosBatched_QR_Decl.hpp>     //KokkosBlas::QR
+// #include <KokkosBatched_Trsv_Decl.hpp>   //KokkosBlas::Trsv
+#include <KokkosBlas2_gemv.hpp> //KokkosBlas::gemv
 #include "KokkosBatched_SVD_Decl.hpp"
 #include "KokkosBatched_SVD_Serial_Impl.hpp"
 #include <KokkosBlas2_serial_gemv_impl.hpp>
@@ -428,9 +429,9 @@ void solve_matrix(const ScratchMatView& square_matrix,
 }
 
 KOKKOS_INLINE_FUNCTION
-void solve_matrix_serial_svd(member_type team, const ScratchVecView& weight,
-                             ScratchVecView rhs_values, ScratchMatView matrix,
-                             ScratchVecView solution_vector)
+void solve_matrix_svd(member_type team, const ScratchVecView& weight,
+                      ScratchVecView rhs_values, ScratchMatView matrix,
+                      ScratchVecView solution_vector, double lambda = 0)
 {
 
   int row = matrix.extent(0);
@@ -439,6 +440,8 @@ void solve_matrix_serial_svd(member_type team, const ScratchVecView& weight,
 
   int weight_size = weight.size();
 
+  printf("inside svd solver \n");
+  Logger logger(21);
   OMEGA_H_CHECK_PRINTF(
     weight_size == row,
     "the size of the weight vector should be equal to the row of the matrix\n"
@@ -447,15 +450,17 @@ void solve_matrix_serial_svd(member_type team, const ScratchVecView& weight,
   // find_sq_root_each(team, weight);
 
   eval_row_scaling(team, weight, matrix);
+  logger.logMatrix(team, LogLevel::DEBUG, matrix, "QA : ");
 
   team.team_barrier();
 
   eval_rhs_scaling(team, weight, rhs_values);
+  logger.logVector(team, LogLevel::DEBUG, rhs_values, "Qb : ");
   // A = UEV^T
   // initilize U (orthogonal matrices)  array
   ScratchMatView U(team.team_scratch(1), row, row);
   fill(-5.0, team, U);
-
+  logger.logMatrix(team, LogLevel::DEBUG, U, "initialised U : ");
   // initialize Vt
   ScratchMatView Vt(team.team_scratch(1), column, column);
   fill(-5.0, team, Vt);
@@ -472,87 +477,105 @@ void solve_matrix_serial_svd(member_type team, const ScratchVecView& weight,
   ScratchMatView temp_matrix(team.team_scratch(1), column, row);
   fill(0, team, temp_matrix);
 
-  ScratchMatView sigmaInvUtMul(team.team_scratch(1), column, row);
-  fill(0, team, sigmaInvUtMul);
+  ScratchMatView vsigmaInvUtMul(team.team_scratch(1), column, row);
+  fill(0, team, vsigmaInvUtMul);
 
   // for serial run
-  if (team.league_rank() == 0) {
+  // if (team.league_rank() == 0) {
+  if (team.team_rank() == 0) {
     KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U,
-                                     sigma, Vt, work);
-    find_inverse_each(team, sigma);
-    auto Ut = find_transpose(team, U);
-    scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T
+                                     sigma, Vt, work, 1e-6);
 
-    // performing (S^-1 U^T)
-    //  KokkosBatched::SerialGemm<
-    //    KokkosBatched::Trans::NoTranspose,
-    //    KokkosBatched::Trans::Transpose,
-    //    KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, temp_matrix, U,
-    //    0.0, sigmaInvUtMul);
-    //  }
+    logger.logMatrix(team, LogLevel::DEBUG, U, "U : ");
+    logger.logVector(team, LogLevel::DEBUG, sigma, "S : ");
+    logger.logMatrix(team, LogLevel::DEBUG, Vt, "Vt : ");
+    // A = UEV^T
+    // find_inverse_each(team, sigma);
 
-    // perform V (S^-1 U^T)
+    calculate_shrinkage_factor(team, lambda, sigma);
 
-    KokkosBatched::SerialGemm<
-      KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose,
-      KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0,
-                                                    sigmaInvUtMul);
+    logger.logVector(team, LogLevel::DEBUG, sigma,
+                     "S/(S^2 + gamma) (basis_size x nsupports) : ");
 
-    KokkosBlas::SerialGemv<
-      KokkosBlas::Trans::NoTranspose,
-      KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, sigmaInvUtMul, rhs_values,
-                                                 0.0, solution_vector);
-  }
-}
-KOKKOS_INLINE_FUNCTION
-void solve_weighted_matrix_serial_qr(const member_type& team,
-                                     const int league_rank,
-                                     ScratchVecView weight,
-                                     ScratchMatView& matrix,
-                                     ScratchVecView& rhs_values)
-{
-
-  int row = matrix.extent(0);
-
-  int column = matrix.extent(1);
+    // A = UEV^T
 
-  int weight_size = weight.size();
-
-  find_sq_root_each(team, weight);
-
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row),
-                       [=](int i) { eval_row_scaling(team, weight, matrix); });
-
-  // initilize tau array
-  ScratchVecView tau(team.team_scratch(1), column);
-  fill(0.0, team, tau);
-
-  // initialize work array
-  ScratchVecView work(team.team_scratch(1), column);
-  fill(0.0, team, work);
+    auto Ut = find_transpose(team, U);
 
-  // for serial run
-  if (league_rank == 0) {
+    logger.logMatrix(team, LogLevel::DEBUG, Ut, "Ut (nsupprts x nsupports) : ");
 
-    // compute QR factorization of matrix and stores the R in A and
-    // tau is used to recover Q
-    KokkosBatched::SerialQR<KokkosBlas::Algo::QR::Unblocked>::invoke(matrix,
-                                                                     tau, work);
+    scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T
 
-    // b = Q^{T}b
+    logger.logMatrix(team, LogLevel::DEBUG, temp_matrix,
+                     "S^-1Ut (basis_size x nsupports) : ");
 
-    KokkosBatched::SerialApplyQ<
-      KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
-      KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values,
-                                                   work);
+    KokkosBatched::SerialGemm<
+      KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose,
+      KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0,
+                                                    vsigmaInvUtMul);
 
-    // b = R^{-1}b
-    KokkosBatched::SerialTrsv<
-      KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose,
-      KokkosBatched::Diag::NonUnit,
-      KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values);
+    logger.logMatrix(team, LogLevel::DEBUG, vsigmaInvUtMul,
+                     "VS^-1Ut (basis_size x nsupports) : ");
+    KokkosBlas::SerialGemv<
+      KokkosBlas::Trans::NoTranspose,
+      KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, vsigmaInvUtMul,
+                                                 rhs_values, 0.0,
+                                                 solution_vector);
+    logger.logVector(team, LogLevel::DEBUG, solution_vector,
+                     "VS^-1Ut (basis_size x nsupports) : ");
   }
+  printf("coming out form svd solver..\n");
 }
+// KOKKOS_INLINE_FUNCTION
+// void solve_weighted_matrix_serial_qr(const member_type& team,
+//                                      const int league_rank,
+//                                      ScratchVecView weight,
+//                                      ScratchMatView& matrix,
+//                                      ScratchVecView& rhs_values)
+//{
+//
+//   int row = matrix.extent(0);
+//
+//   int column = matrix.extent(1);
+//
+//   int weight_size = weight.size();
+//
+//   find_sq_root_each(team, weight);
+//
+//   Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row),
+//                        [=](int i) { eval_row_scaling(team, weight, matrix);
+//                        });
+//
+//   // initilize tau array
+//   ScratchVecView tau(team.team_scratch(1), column);
+//   fill(0.0, team, tau);
+//
+//   // initialize work array
+//   ScratchVecView work(team.team_scratch(1), column);
+//   fill(0.0, team, work);
+//
+//   // for serial run
+//   if (league_rank == 0) {
+//
+//     // compute QR factorization of matrix and stores the R in A and
+//     // tau is used to recover Q
+//     KokkosBatched::SerialQR<KokkosBlas::Algo::QR::Unblocked>::invoke(matrix,
+//                                                                      tau,
+//                                                                      work);
+//
+//     // b = Q^{T}b
+//
+//     KokkosBatched::SerialApplyQ<
+//       KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
+//       KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values,
+//                                                    work);
+//
+//     // b = R^{-1}b
+//     KokkosBatched::SerialTrsv<
+//       KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose,
+//       KokkosBatched::Diag::NonUnit,
+//       KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values);
+//   }
+// }
 
 /**
  * @brief Maps the data from source mesh to target mesh
@@ -629,7 +652,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
   printf("Shared Size = %d\n", shared_size);
   PCMS_ALWAYS_ASSERT(scratch_size > shared_size);
 
-  printf("does it fail or below\n");
   // calculates the interpolated values
   Kokkos::parallel_for(
     "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(shared_size)),
@@ -638,7 +660,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       int start_ptr = support.supports_ptr[league_rank];
       int end_ptr = support.supports_ptr[league_rank + 1];
       int nsupports = end_ptr - start_ptr;
-
+      printf("calculating for %d target point\n", league_rank);
+      Logger logger(21);
+      // logger.log(team, LogLevel::INFO, "The search starts from here....\n");
       // local_source_point stores the coordinates of source supports of a
       // given target
       ScratchMatView local_source_points(team.team_scratch(1), nsupports, dim);
@@ -666,6 +690,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       fill(0.0, team, target_basis_vector);
       fill(0.0, team, solution_coefficients);
 
+      // logger.log(team, LogLevel::INFO, "The search starts from here....\n");
       // storing the coords of local supports
       int count = -1;
       for (int j = start_ptr; j < end_ptr; ++j) {
@@ -678,7 +703,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         }
       }
 
-      // evaluates the basis vector of a given target point
+      logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
+                       "The local source points (x, y, z):\n ");
+
       Coord target_point;
       target_point.x = target_coordinates[league_rank * dim];
       target_point.y = target_coordinates[league_rank * dim + 1];
@@ -687,6 +714,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         target_point.z = target_coordinates[league_rank * dim + 2];
       }
 
+      logger.logStruct(team, LogLevel::DEBUG, target_point,
+                       "Target Point before pivoting :\n");
       /** phi(nsupports) is the array of rbf functions evaluated at the
        * source supports In the actual implementation, Phi(nsupports,
        * nsupports) is the diagonal matrix & each diagonal element is the phi
@@ -701,6 +730,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
                              support.radii2[league_rank], rbf_func, phi_vector);
         });
 
+      logger.logVector(team, LogLevel::DEBUG, phi_vector,
+                       "Phi Values: (phi)\n ");
       team.team_barrier();
 
       /** support_values(nsupports) (or known rhs vector b) is the vector of
@@ -718,6 +749,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         });
 
       team.team_barrier();
+      logger.logVector(team, LogLevel::DEBUG, support_values,
+                       "Support Values (b):\n ");
 
       /**
        *
@@ -729,15 +762,19 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        */
 
       normalize_supports(team, target_point, local_source_points);
-
+      logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
+                       "Local Source points after pivoting");
       team.team_barrier();
       /**
        *
+       * evaluates the basis vector of a given target point Coord target_point;
        * this can evaluate monomial basis vector for any degree of polynomial
        * step 3: call basis vector evaluation function (eval_basis_vector);
        */
       eval_basis_vector(slice_length, target_point, target_basis_vector);
 
+      logger.logVector(team, LogLevel::DEBUG, target_basis_vector,
+                       "target basis vector after pivot : ");
       /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is
        * created with the basis vector of source supports stacking on top of
        * each other
@@ -752,18 +789,24 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       team.team_barrier();
 
+      logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix,
+                       "Vandermonde  matrix : ");
+
       OMEGA_H_CHECK_PRINTF(
         support.radii2[league_rank] > 0,
         "ERROR: radius2 has to be positive but found to be %.16f\n",
         support.radii2[league_rank]);
 
-      solve_matrix_serial_svd(team, phi_vector, support_values,
-                              vandermonde_matrix, solution_coefficients);
+      solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix,
+                       solution_coefficients);
       team.team_barrier();
+      logger.logVector(team, LogLevel::DEBUG, solution_coefficients,
+                       "Solution Vector : ");
 
       double target_value = KokkosBlas::Experimental::dot(
         team, solution_coefficients, target_basis_vector);
 
+      logger.logScalar(team, LogLevel::DEBUG, target_value, "target_value : ");
       if (team.team_rank() == 0) {
         OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",
                              league_rank);
diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp
index c0bbf66a..d919ad58 100644
--- a/src/pcms/interpolator/pcms_interpolator_logger.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp
@@ -64,7 +64,7 @@ class Logger
         printf("[%s] (League %d) %s: ", logLevelToString(level),
                team.league_rank(), name);
         for (int i = 0; i < vector.size(); ++i) {
-          printf("%12.6f", vector(i));
+          printf("%12.6f\n", vector(i));
         }
         printf("\n");
       });
@@ -119,6 +119,7 @@ class Logger
       case LogLevel::ERROR: return "ERROR";
 
       case LogLevel::DEBUG: return "DEBUG";
+      default: return "UNKNOWN";
     }
   }
 };
diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
index 202f7ec8..5dd419e8 100644
--- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
@@ -77,6 +77,15 @@ void find_inverse_each(member_type team, ScratchVecView& array)
   });
 }
 
+KOKKOS_INLINE_FUNCTION
+void calculate_shrinkage_factor(member_type team, double lambda,
+                                ScratchVecView& sigma)
+{
+  int size = sigma.size();
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) {
+    sigma(i) /= (sigma(i) * sigma(i) + lambda);
+  });
+}
 KOKKOS_INLINE_FUNCTION
 ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix)
 {

From 7d6976bd60978fdbac8f89b538db87d3ffe78848 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 14 Mar 2025 14:59:07 -0400
Subject: [PATCH 25/73] svd implementation but broken

---
 test/CMakeLists.txt          |  4 ++--
 test/test_rbf_interp.cpp     |  3 ++-
 test/test_spr_meshfields.cpp | 20 ++++++++------------
 test/test_svd_serial.cpp     |  2 +-
 4 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 188dc7ec..f538c4af 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -113,8 +113,8 @@ if(Catch2_FOUND)
 	      test_linear_solver.cpp
 		  test_normalisation.cpp
 		  test_spr_meshfields.cpp
-		  test_serial_qr.cpp
-		  test_svd_serial.cpp
+		  test_svd_serial.cpp 
+		  # test_serial_qr.cpp
               )
   endif ()
   add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES})
diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp
index 1a92780a..e76091c6 100644
--- a/test/test_rbf_interp.cpp
+++ b/test/test_rbf_interp.cpp
@@ -41,9 +41,10 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports,
 {
 
   int dim = mesh.dim();
-  Real tolerance = 0.0005;
+  Real tolerance = 5e-4;
 
   std::vector<RadialBasisFunction> rbf_types = {
+    // RadialBasisFunction::RBF_GAUSSIAN
     RadialBasisFunction::RBF_GAUSSIAN, RadialBasisFunction::RBF_C4,
     RadialBasisFunction::RBF_CONST
 
diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp
index c8757a36..16768b5a 100644
--- a/test/test_spr_meshfields.cpp
+++ b/test/test_spr_meshfields.cpp
@@ -65,14 +65,14 @@ void test(Mesh& mesh, Omega_h::Graph& patches, int degree, Reals source_values,
 {
 
   int dim = mesh.dim();
-  Real tolerance = 1e-3;
+  Real tolerance = 0.0005;
 
-  Omega_h::Write<Real> ignored(patches.a2ab.size(), 1);
+  Omega_h::Write<Real> ignored(patches.a2ab.size() - 1, 1);
   SupportResults support{patches.a2ab, patches.ab2b, ignored};
 
   auto approx_target_values = pcms::mls_interpolation(
     source_values, source_coordinates, target_coordinates, support, dim, degree,
-    pcms::RadialBasisFunction::RBF_CONST);
+    pcms::RadialBasisFunction::NO_OP);
 
   const auto delta_abs = Omega_h::fabs_each(
     Omega_h::subtract_each(exact_target_values, read(approx_target_values)));
@@ -92,7 +92,6 @@ void test(Mesh& mesh, Omega_h::Graph& patches, int degree, Reals source_values,
   REQUIRE(m == n);
 
   for (size_t i = 0; i < m; ++i) {
-    //   std::cout << "vtx " << i << "\n";
     //    std::cout << "vtx " << i << "\n";
     CHECK_THAT(
       host_exact_target_values[i],
@@ -111,20 +110,18 @@ TEST_CASE("meshfields_spr_test")
   auto rank = lib.world()->rank();
   const auto boxSize = 1.0;
   const auto nElms = 6;
-  // auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms,
-  // nElms, 0, false);
+  //  auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms,
+  //  nElms, 0, false);
+  //
+  Omega_h::Mesh mesh(&lib);
   const auto thwaitesMeshFile =
     "/lore/smithc11/projects/landice/thwaites_basal/thwaites_basalClass.osh";
-  Omega_h::Mesh mesh(&lib);
   Omega_h::binary::read(thwaitesMeshFile, lib.world(), &mesh);
-  // auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms,
-  // nElms, 0, false);
   std::cout << "mesh: elms " << mesh.nelems() << " verts " << mesh.nverts()
             << "\n";
 
   const auto dim = mesh.dim();
 
-  // const auto& target_coordinates = mesh.coords();
   const auto& target_coordinates = mesh.coords();
 
   const auto& nfaces = mesh.nfaces();
@@ -184,7 +181,6 @@ TEST_CASE("meshfields_spr_test")
         std::cerr << "minPatchSize " << minPatchSize << "\n";
         auto patches = mesh.get_vtx_patches(minPatchSize);
         // print_patches(patches);
-        // print_patches(patches);
 
         Write<Real> source_values(nfaces, 0, "exact target values");
 
@@ -204,7 +200,7 @@ TEST_CASE("meshfields_spr_test")
           });
         mesh.add_tag(OMEGA_H_VERT, "target_values", 1,
                      read(exact_target_values));
-
+        std::cerr << "I am here above test\n";
         test(mesh, patches, interp_degree, Reals(source_values),
              Reals(exact_target_values), Reals(source_coordinates),
              Reals(target_coordinates));
diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp
index b86a9e4a..862ce412 100644
--- a/test/test_svd_serial.cpp
+++ b/test/test_svd_serial.cpp
@@ -275,7 +275,7 @@ TEST_CASE("test_serial_svd")
           rhs(i) = rhs_data[i];
         });
 
-        detail::solve_matrix_serial_svd(team, weight, rhs, A, x);
+        detail::solve_matrix_svd(team, weight, rhs, A, x);
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, column),
                              [=](int i) { result(i) = x(i); });
       });

From 826e82ccc9d8cf9998ac05252e47f222ad73dd41 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 17 Mar 2025 21:19:42 -0400
Subject: [PATCH 26/73] add ridge regularization

---
 .../interpolator/mls_interpolation_impl.hpp   | 27 +++++++++++++++----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 5a79f64a..8ae936ce 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -231,6 +231,16 @@ void scale_column_trans_matrix(const ScratchMatView& matrix,
   }
 }
 
+KOKKOS_INLINE_FUNCTION
+void add_regularization(const member_type& team, ScratchMatView& square_matrix,
+                        Real lambda_factor)
+{
+  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, square_matrix.extent(0)),
+                       [=](int i) {
+                         square_matrix(i, i) += lambda_factor;
+                       });
+}
+
 /**
  * @struct ResultConvertNormal
  * @brief Stores the results of matrix and vector transformations.
@@ -283,7 +293,8 @@ KOKKOS_INLINE_FUNCTION
 ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix,
                                             const ScratchVecView& weight_vector,
                                             const ScratchVecView& rhs,
-                                            member_type team)
+                                            member_type team,
+                                            double lambda_factor)
 {
 
   int m = matrix.extent(0);
@@ -319,6 +330,10 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix,
 
   team.team_barrier();
 
+  add_regularization(team, square_matrix, lambda_factor);
+
+  team.team_barrier();
+
   KokkosBlas::Experimental::
     Gemv<KokkosBlas::Mode::Team, KokkosBlas::Algo::Gemv::Unblocked>::invoke(
       team, 'N', 1.0, scaled_matrix, rhs, 0.0, transformed_rhs);
@@ -419,7 +434,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
                        RealConstDefaultScalarArrayView target_coordinates,
                        const SupportResults& support, const LO& dim,
                        const LO& degree, Func rbf_func,
-                       RealDefaultScalarArrayView approx_target_values)
+                       RealDefaultScalarArrayView approx_target_values,
+                       double lambda_factor)
 {
   PCMS_FUNCTION_TIMER;
   static_assert(std::is_invocable_r_v<double, Func, double, double>,
@@ -550,8 +566,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       team.team_barrier();
 
-      auto result = convert_normal_equation(vandermonde_matrix, phi_vector,
-                                            support_values, team);
+      auto result = convert_normal_equation(
+        vandermonde_matrix, phi_vector, support_values, team, lambda_factor);
 
       team.team_barrier();
 
@@ -593,7 +609,8 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals source_coordinates,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
-                              const LO& degree, Func rbf_func)
+                              const LO& degree, Func rbf_func,
+                              double lambda_factor)
 {
   const auto nvertices_source = source_coordinates.size() / dim;
 

From c5de3e52237614abf4c3fdce6c6bc77cf7349ee0 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 17 Mar 2025 21:20:34 -0400
Subject: [PATCH 27/73] update lambda parameter in function argument

---
 src/pcms/interpolator/mls_interpolation.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index 113986a1..3bbf9767 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -105,7 +105,8 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals source_coordinates,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
-                              const LO& degree, RadialBasisFunction bf)
+                              const LO& degree, RadialBasisFunction bf,
+                              double lambda_factor)
 {
 
   const auto nvertices_target = target_coordinates.size() / dim;
@@ -116,19 +117,19 @@ Write<Real> mls_interpolation(const Reals source_values,
     case RadialBasisFunction::RBF_GAUSSIAN:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, RBF_GAUSSIAN{});
+        degree, RBF_GAUSSIAN{}, lambda_factor);
       break;
 
     case RadialBasisFunction::RBF_C4:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, RBF_C4{});
+        degree, RBF_C4{}, lambda_factor);
       break;
 
     case RadialBasisFunction::RBF_CONST:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, RBF_CONST{});
+        degree, RBF_CONST{}, lambda_factor);
       break;
   }
 

From 7dc46693a531c4c94ae7fb41eae6d9cce22c4994 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 17 Mar 2025 21:21:20 -0400
Subject: [PATCH 28/73] add defaualt argument for lambda parameter

---
 src/pcms/interpolator/mls_interpolation.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp
index 17b085aa..4a3a5503 100644
--- a/src/pcms/interpolator/mls_interpolation.hpp
+++ b/src/pcms/interpolator/mls_interpolation.hpp
@@ -19,6 +19,7 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals source_coordinates,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
-                              const LO& degree, RadialBasisFunction bf);
+                              const LO& degree, RadialBasisFunction bf,
+                              double lambda_factor = 0);
 } // namespace pcms
 #endif

From 1b7388451e597576ea124067076c0fb6d0310c29 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 17 Mar 2025 22:37:04 -0400
Subject: [PATCH 29/73] update lambda

---
 src/pcms/interpolator/mls_interpolation_impl.hpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 8ae936ce..b8bd2553 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -236,9 +236,7 @@ void add_regularization(const member_type& team, ScratchMatView& square_matrix,
                         Real lambda_factor)
 {
   Kokkos::parallel_for(Kokkos::TeamThreadRange(team, square_matrix.extent(0)),
-                       [=](int i) {
-                         square_matrix(i, i) += lambda_factor;
-                       });
+                       [=](int i) { square_matrix(i, i) += lambda_factor; });
 }
 
 /**
@@ -636,7 +634,7 @@ Write<Real> mls_interpolation(const Reals source_values,
 
   mls_interpolation(source_values_array_view, source_coordinates_array_view,
                     target_coordinates_array_view, support, dim, degree,
-                    rbf_func, interpolated_values_array_view);
+                    rbf_func, interpolated_values_array_view, lambda_factor);
 
   return interpolated_values;
 }

From beb74d7de9f2f182c3d73e4cf5dc4021c04e16f9 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 17 Mar 2025 22:37:35 -0400
Subject: [PATCH 30/73] update expected solution

---
 test/test_linear_solver.cpp | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp
index 2f8664ec..e11e8fbb 100644
--- a/test/test_linear_solver.cpp
+++ b/test/test_linear_solver.cpp
@@ -103,8 +103,9 @@ TEST_CASE("solver test")
 
         team.team_barrier();
 
+        double lambda = 0.5;
         auto result = convert_normal_equation(vandermonde_matrix, phi,
-                                              support_values, team);
+                                              support_values, team, lambda);
 
         team.team_barrier();
 
@@ -159,10 +160,10 @@ TEST_CASE("solver test")
     Kokkos::View<double**, Kokkos::HostSpace> expected_solution(
       "expected solution", nvertices_target, size);
 
-    expected_lhs_matrix(0, 0, 0) = 35.0;
+    expected_lhs_matrix(0, 0, 0) = 35.5;
     expected_lhs_matrix(0, 0, 1) = 44.0;
     expected_lhs_matrix(0, 1, 0) = 44.0;
-    expected_lhs_matrix(0, 1, 1) = 56.0;
+    expected_lhs_matrix(0, 1, 1) = 56.5;
 
     expected_rhs_vector(0, 0) = 76.0;
     expected_rhs_vector(0, 1) = 100.0;
@@ -174,13 +175,13 @@ TEST_CASE("solver test")
     expected_scaled_matrix(0, 1, 1) = 4.0;
     expected_scaled_matrix(0, 1, 2) = 6.0;
 
-    expected_solution(0, 0) = -6.0;
-    expected_solution(0, 1) = 6.5;
+    expected_solution(0, 0) = -1.519713;
+    expected_solution(0, 1) = 2.953405;
 
-    expected_lhs_matrix(1, 0, 0) = 30.0;
+    expected_lhs_matrix(1, 0, 0) = 30.5;
     expected_lhs_matrix(1, 0, 1) = 20.0;
     expected_lhs_matrix(1, 1, 0) = 20.0;
-    expected_lhs_matrix(1, 1, 1) = 30.0;
+    expected_lhs_matrix(1, 1, 1) = 30.5;
 
     expected_rhs_vector(1, 0) = 70.0;
     expected_rhs_vector(1, 1) = 40.0;
@@ -192,8 +193,8 @@ TEST_CASE("solver test")
     expected_scaled_matrix(1, 1, 1) = -2.0;
     expected_scaled_matrix(1, 1, 2) = 1.0;
 
-    expected_solution(1, 0) = 2.6;
-    expected_solution(1, 1) = -0.4;
+    expected_solution(1, 0) = 2.517680;
+    expected_solution(1, 1) = -0.339463;
 
     for (int i = 0; i < nvertices_target; ++i) {
       for (int j = 0; j < size; ++j) {
@@ -203,7 +204,7 @@ TEST_CASE("solver test")
                  i, j, l);
           REQUIRE_THAT(
             expected_scaled_matrix(i, j, l),
-            Catch::Matchers::WithinAbs(host_result_scaled(i, j, l), 1E-10));
+            Catch::Matchers::WithinAbs(host_result_scaled(i, j, l), 1E-6));
         }
       }
       for (int j = 0; j < size; ++j) {
@@ -213,14 +214,14 @@ TEST_CASE("solver test")
                  k);
           REQUIRE_THAT(
             expected_lhs_matrix(i, j, k),
-            Catch::Matchers::WithinAbs(host_result_lhs(i, j, k), 1E-10));
+            Catch::Matchers::WithinAbs(host_result_lhs(i, j, k), 1E-6));
         }
       }
       for (int j = 0; j < size; ++j) {
         printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_rhs_vector(i, j),
                host_result_rhs(i, j), i, j);
         REQUIRE_THAT(expected_rhs_vector(i, j),
-                     Catch::Matchers::WithinAbs(host_result_rhs(i, j), 1E-10));
+                     Catch::Matchers::WithinAbs(host_result_rhs(i, j), 1E-6));
       }
 
       for (int j = 0; j < size; ++j) {
@@ -228,7 +229,7 @@ TEST_CASE("solver test")
                host_result_solution(i, j), i, j);
         REQUIRE_THAT(
           expected_solution(i, j),
-          Catch::Matchers::WithinAbs(host_result_solution(i, j), 1E-10));
+          Catch::Matchers::WithinAbs(host_result_solution(i, j), 1E-6));
       }
     }
   }

From 8fae10b38d632ed8997483ed99df39a723b7925b Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 16:32:52 -0400
Subject: [PATCH 31/73] remove commented lines

---
 .../interpolator/mls_interpolation_impl.hpp   | 91 +++++++++----------
 1 file changed, 45 insertions(+), 46 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 16ee43d3..600eee1f 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -20,9 +20,6 @@
 #include "pcms_interpolator_view_utils.hpp"
 #include "pcms_interpolator_logger.hpp"
 
-// #include <KokkosBatched_ApplyQ_Decl.hpp> //KokkosBlas::ApplyQ
-// #include <KokkosBatched_QR_Decl.hpp>     //KokkosBlas::QR
-// #include <KokkosBatched_Trsv_Decl.hpp>   //KokkosBlas::Trsv
 #include <KokkosBlas2_gemv.hpp> //KokkosBlas::gemv
 #include "KokkosBatched_SVD_Decl.hpp"
 #include "KokkosBatched_SVD_Serial_Impl.hpp"
@@ -431,7 +428,7 @@ void solve_matrix(const ScratchMatView& square_matrix,
 KOKKOS_INLINE_FUNCTION
 void solve_matrix_svd(member_type team, const ScratchVecView& weight,
                       ScratchVecView rhs_values, ScratchMatView matrix,
-                      ScratchVecView solution_vector, double lambda = 0)
+                      ScratchVecView solution_vector, double lambda = 1e-2)
 {
 
   int row = matrix.extent(0);
@@ -440,8 +437,8 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
 
   int weight_size = weight.size();
 
-  printf("inside svd solver \n");
-  Logger logger(21);
+  // printf("inside svd solver \n");
+  // Logger logger(21);
   OMEGA_H_CHECK_PRINTF(
     weight_size == row,
     "the size of the weight vector should be equal to the row of the matrix\n"
@@ -450,17 +447,17 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
   // find_sq_root_each(team, weight);
 
   eval_row_scaling(team, weight, matrix);
-  logger.logMatrix(team, LogLevel::DEBUG, matrix, "QA : ");
+  // logger.logMatrix(team, LogLevel::DEBUG, matrix, "QA : ");
 
   team.team_barrier();
 
   eval_rhs_scaling(team, weight, rhs_values);
-  logger.logVector(team, LogLevel::DEBUG, rhs_values, "Qb : ");
+  //  logger.logVector(team, LogLevel::DEBUG, rhs_values, "Qb : ");
   // A = UEV^T
   // initilize U (orthogonal matrices)  array
   ScratchMatView U(team.team_scratch(1), row, row);
   fill(-5.0, team, U);
-  logger.logMatrix(team, LogLevel::DEBUG, U, "initialised U : ");
+  //  logger.logMatrix(team, LogLevel::DEBUG, U, "initialised U : ");
   // initialize Vt
   ScratchMatView Vt(team.team_scratch(1), column, column);
   fill(-5.0, team, Vt);
@@ -486,44 +483,45 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
     KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U,
                                      sigma, Vt, work, 1e-6);
 
-    logger.logMatrix(team, LogLevel::DEBUG, U, "U : ");
-    logger.logVector(team, LogLevel::DEBUG, sigma, "S : ");
-    logger.logMatrix(team, LogLevel::DEBUG, Vt, "Vt : ");
-    // A = UEV^T
-    // find_inverse_each(team, sigma);
+    // logger.logMatrix(team, LogLevel::DEBUG, U, "U : ");
+    // logger.logVector(team, LogLevel::DEBUG, sigma, "S : ");
+    // logger.logMatrix(team, LogLevel::DEBUG, Vt, "Vt : ");
+    //  A = UEV^T
+    //  find_inverse_each(team, sigma);
 
     calculate_shrinkage_factor(team, lambda, sigma);
 
-    logger.logVector(team, LogLevel::DEBUG, sigma,
-                     "S/(S^2 + gamma) (basis_size x nsupports) : ");
+    // logger.logVector(team, LogLevel::DEBUG, sigma,
+    //                 "S/(S^2 + gamma) (basis_size x nsupports) : ");
 
     // A = UEV^T
 
     auto Ut = find_transpose(team, U);
 
-    logger.logMatrix(team, LogLevel::DEBUG, Ut, "Ut (nsupprts x nsupports) : ");
+    // logger.logMatrix(team, LogLevel::DEBUG, Ut, "Ut (nsupprts x nsupports) :
+    // ");
 
     scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T
 
-    logger.logMatrix(team, LogLevel::DEBUG, temp_matrix,
-                     "S^-1Ut (basis_size x nsupports) : ");
+    // logger.logMatrix(team, LogLevel::DEBUG, temp_matrix,
+    //                 "S^-1Ut (basis_size x nsupports) : ");
 
     KokkosBatched::SerialGemm<
       KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose,
       KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0,
                                                     vsigmaInvUtMul);
 
-    logger.logMatrix(team, LogLevel::DEBUG, vsigmaInvUtMul,
-                     "VS^-1Ut (basis_size x nsupports) : ");
+    // logger.logMatrix(team, LogLevel::DEBUG, vsigmaInvUtMul,
+    //                  "VS^-1Ut (basis_size x nsupports) : ");
     KokkosBlas::SerialGemv<
       KokkosBlas::Trans::NoTranspose,
       KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, vsigmaInvUtMul,
                                                  rhs_values, 0.0,
                                                  solution_vector);
-    logger.logVector(team, LogLevel::DEBUG, solution_vector,
-                     "VS^-1Ut (basis_size x nsupports) : ");
+    // logger.logVector(team, LogLevel::DEBUG, solution_vector,
+    //                  "VS^-1Ut (basis_size x nsupports) : ");
   }
-  printf("coming out form svd solver..\n");
+  // printf("coming out form svd solver..\n");
 }
 // KOKKOS_INLINE_FUNCTION
 // void solve_weighted_matrix_serial_qr(const member_type& team,
@@ -654,17 +652,17 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
   // calculates the interpolated values
   Kokkos::parallel_for(
-    "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(shared_size)),
+    "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(scratch_size)),
     KOKKOS_LAMBDA(const member_type& team) {
       int league_rank = team.league_rank();
       int start_ptr = support.supports_ptr[league_rank];
       int end_ptr = support.supports_ptr[league_rank + 1];
       int nsupports = end_ptr - start_ptr;
       printf("calculating for %d target point\n", league_rank);
-      Logger logger(21);
-      // logger.log(team, LogLevel::INFO, "The search starts from here....\n");
-      // local_source_point stores the coordinates of source supports of a
-      // given target
+      // Logger logger(21);
+      //  logger.log(team, LogLevel::INFO, "The search starts from here....\n");
+      //  local_source_point stores the coordinates of source supports of a
+      //  given target
       ScratchMatView local_source_points(team.team_scratch(1), nsupports, dim);
 
       // rbf function values of source supports Phi(n,n)
@@ -703,8 +701,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         }
       }
 
-      logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
-                       "The local source points (x, y, z):\n ");
+      // logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
+      //                 "The local source points (x, y, z):\n ");
 
       Coord target_point;
       target_point.x = target_coordinates[league_rank * dim];
@@ -714,8 +712,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         target_point.z = target_coordinates[league_rank * dim + 2];
       }
 
-      logger.logStruct(team, LogLevel::DEBUG, target_point,
-                       "Target Point before pivoting :\n");
+      // logger.logStruct(team, LogLevel::DEBUG, target_point,
+      //                  "Target Point before pivoting :\n");
       /** phi(nsupports) is the array of rbf functions evaluated at the
        * source supports In the actual implementation, Phi(nsupports,
        * nsupports) is the diagonal matrix & each diagonal element is the phi
@@ -730,8 +728,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
                              support.radii2[league_rank], rbf_func, phi_vector);
         });
 
-      logger.logVector(team, LogLevel::DEBUG, phi_vector,
-                       "Phi Values: (phi)\n ");
+      // logger.logVector(team, LogLevel::DEBUG, phi_vector,
+      //                  "Phi Values: (phi)\n ");
       team.team_barrier();
 
       /** support_values(nsupports) (or known rhs vector b) is the vector of
@@ -749,8 +747,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         });
 
       team.team_barrier();
-      logger.logVector(team, LogLevel::DEBUG, support_values,
-                       "Support Values (b):\n ");
+      // logger.logVector(team, LogLevel::DEBUG, support_values,
+      //                  "Support Values (b):\n ");
 
       /**
        *
@@ -762,8 +760,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        */
 
       normalize_supports(team, target_point, local_source_points);
-      logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
-                       "Local Source points after pivoting");
+      // logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
+      //                  "Local Source points after pivoting");
       team.team_barrier();
       /**
        *
@@ -773,8 +771,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        */
       eval_basis_vector(slice_length, target_point, target_basis_vector);
 
-      logger.logVector(team, LogLevel::DEBUG, target_basis_vector,
-                       "target basis vector after pivot : ");
+      // logger.logVector(team, LogLevel::DEBUG, target_basis_vector,
+      //                 "target basis vector after pivot : ");
       /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is
        * created with the basis vector of source supports stacking on top of
        * each other
@@ -789,8 +787,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       team.team_barrier();
 
-      logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix,
-                       "Vandermonde  matrix : ");
+      // logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix,
+      //                "Vandermonde  matrix : ");
 
       OMEGA_H_CHECK_PRINTF(
         support.radii2[league_rank] > 0,
@@ -800,13 +798,14 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix,
                        solution_coefficients);
       team.team_barrier();
-      logger.logVector(team, LogLevel::DEBUG, solution_coefficients,
-                       "Solution Vector : ");
+      // logger.logVector(team, LogLevel::DEBUG, solution_coefficients,
+      //                 "Solution Vector : ");
 
       double target_value = KokkosBlas::Experimental::dot(
         team, solution_coefficients, target_basis_vector);
 
-      logger.logScalar(team, LogLevel::DEBUG, target_value, "target_value : ");
+      // logger.logScalar(team, LogLevel::DEBUG, target_value, "target_value :
+      // ");
       if (team.team_rank() == 0) {
         OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",
                              league_rank);

From 11bd9017d997967b4144682d798878ca1c0c152d Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 16:43:08 -0400
Subject: [PATCH 32/73] remove print statements

---
 src/pcms/interpolator/mls_interpolation.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index ce472b34..f99515a8 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -24,7 +24,7 @@ struct RBF_GAUSSIAN
     // 'a' is a spreading factor/decay factor
     // the value of 'a' is higher if the data is localized
     // the value of 'a' is smaller if the data is farther
-    int a = 2;
+    int a = 5;
 
     double r = sqrt(r_sq);
     double rho = sqrt(rho_sq);
@@ -272,12 +272,9 @@ Reals min_max_normalization(Reals& coordinates, int dim = 2)
   const auto max_x = Omega_h::get_max(read(x_coordinates));
   const auto max_y = Omega_h::get_max(read(y_coordinates));
 
-  printf(" [min_x, max_x] : [%12.6f , %12.6f]\n", min_x, max_x);
-  printf(" [min_y, max_y] : [%12.6f , %12.6f]\n", min_y, max_y);
   const auto del_x = max_x - min_x;
   const auto del_y = max_y - min_y;
 
-  printf(" [delx, dely] : [%12.6f , %12.6f]\n", del_x, del_y);
   Write<Real> normalized_coordinates(coords_size, 0,
                                      "stores scaled coordinates");
 

From 81dc8d4274340048d81d40292b0a092b4dd56eab Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 16:45:47 -0400
Subject: [PATCH 33/73] remove commented code

---
 src/pcms/interpolator/mls_interpolation_impl.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 600eee1f..97153e61 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -15,8 +15,8 @@
 #include <Kokkos_StdAlgorithms.hpp>
 #include "pcms_interpolator_aliases.hpp"
 #include "adj_search.hpp"
-#include "../assert.h"
-#include "../profile.h"
+#include "pcms/assert.h"
+#include "pcms/profile.h"
 #include "pcms_interpolator_view_utils.hpp"
 #include "pcms_interpolator_logger.hpp"
 

From a6f3a075a48124ee406cf4244f89d4dd2ea468c4 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 16:47:59 -0400
Subject: [PATCH 34/73] remove note not required for svd

---
 src/pcms/interpolator/mls_interpolation_impl.hpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 97153e61..0a4be4a1 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -116,8 +116,6 @@ Reals min_max_normalization(Reals& coordinates, int dim);
  *   @param[in] p A reference to the coordinate struct
  *   @param[in,out] basis_vector The polynomial basis vector
  *
- *   @note the basis_vector size is more than basis_size
- *   to accomodate with the output from qr
  *
  */
 KOKKOS_INLINE_FUNCTION

From 5419b86dfbedd98776f249036b826b8904a4b5f7 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 16:48:36 -0400
Subject: [PATCH 35/73] update


From 97c2f4d725add9c7f031f234cdc8c4fd089b3655 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 17:09:03 -0400
Subject: [PATCH 36/73] remove commented

---
 .../interpolator/mls_interpolation_impl.hpp   | 122 ++----------------
 test/test_spr_meshfields.cpp                  |   6 +-
 2 files changed, 14 insertions(+), 114 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 0a4be4a1..742ba25a 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -435,143 +435,65 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
 
   int weight_size = weight.size();
 
-  // printf("inside svd solver \n");
-  // Logger logger(21);
   OMEGA_H_CHECK_PRINTF(
     weight_size == row,
     "the size of the weight vector should be equal to the row of the matrix\n"
     "weight vector size = %d, row of matrix = %d\n",
     weight_size, row);
-  // find_sq_root_each(team, weight);
 
   eval_row_scaling(team, weight, matrix);
-  // logger.logMatrix(team, LogLevel::DEBUG, matrix, "QA : ");
 
   team.team_barrier();
 
   eval_rhs_scaling(team, weight, rhs_values);
-  //  logger.logVector(team, LogLevel::DEBUG, rhs_values, "Qb : ");
-  // A = UEV^T
+
   // initilize U (orthogonal matrices)  array
   ScratchMatView U(team.team_scratch(1), row, row);
   fill(-5.0, team, U);
-  //  logger.logMatrix(team, LogLevel::DEBUG, U, "initialised U : ");
+
   // initialize Vt
   ScratchMatView Vt(team.team_scratch(1), column, column);
   fill(-5.0, team, Vt);
 
-  // int maxrank = std::min(row, column);
-
+  // initialize sigma
   ScratchVecView sigma(team.team_scratch(1), column);
   fill(-5.0, team, sigma);
 
+  // initialize work
   ScratchVecView work(team.team_scratch(1), row);
   fill(-5.0, team, work);
 
-  // sigma_mat_inv
+  // initialise sigma_mat_inv  S^-1
   ScratchMatView temp_matrix(team.team_scratch(1), column, row);
   fill(0, team, temp_matrix);
 
+  // initialise V S^-1 U^T
   ScratchMatView vsigmaInvUtMul(team.team_scratch(1), column, row);
   fill(0, team, vsigmaInvUtMul);
 
-  // for serial run
-  // if (team.league_rank() == 0) {
   if (team.team_rank() == 0) {
     KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U,
                                      sigma, Vt, work, 1e-6);
 
-    // logger.logMatrix(team, LogLevel::DEBUG, U, "U : ");
-    // logger.logVector(team, LogLevel::DEBUG, sigma, "S : ");
-    // logger.logMatrix(team, LogLevel::DEBUG, Vt, "Vt : ");
-    //  A = UEV^T
-    //  find_inverse_each(team, sigma);
-
+    // calculate sigma(i) / (sigma(i)^2 + lambda)
     calculate_shrinkage_factor(team, lambda, sigma);
 
-    // logger.logVector(team, LogLevel::DEBUG, sigma,
-    //                 "S/(S^2 + gamma) (basis_size x nsupports) : ");
-
-    // A = UEV^T
-
     auto Ut = find_transpose(team, U);
 
-    // logger.logMatrix(team, LogLevel::DEBUG, Ut, "Ut (nsupprts x nsupports) :
-    // ");
-
     scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T
 
-    // logger.logMatrix(team, LogLevel::DEBUG, temp_matrix,
-    //                 "S^-1Ut (basis_size x nsupports) : ");
-
     KokkosBatched::SerialGemm<
       KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose,
       KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0,
                                                     vsigmaInvUtMul);
 
-    // logger.logMatrix(team, LogLevel::DEBUG, vsigmaInvUtMul,
-    //                  "VS^-1Ut (basis_size x nsupports) : ");
     KokkosBlas::SerialGemv<
       KokkosBlas::Trans::NoTranspose,
       KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, vsigmaInvUtMul,
                                                  rhs_values, 0.0,
                                                  solution_vector);
-    // logger.logVector(team, LogLevel::DEBUG, solution_vector,
-    //                  "VS^-1Ut (basis_size x nsupports) : ");
   }
-  // printf("coming out form svd solver..\n");
 }
-// KOKKOS_INLINE_FUNCTION
-// void solve_weighted_matrix_serial_qr(const member_type& team,
-//                                      const int league_rank,
-//                                      ScratchVecView weight,
-//                                      ScratchMatView& matrix,
-//                                      ScratchVecView& rhs_values)
-//{
-//
-//   int row = matrix.extent(0);
-//
-//   int column = matrix.extent(1);
-//
-//   int weight_size = weight.size();
-//
-//   find_sq_root_each(team, weight);
-//
-//   Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row),
-//                        [=](int i) { eval_row_scaling(team, weight, matrix);
-//                        });
-//
-//   // initilize tau array
-//   ScratchVecView tau(team.team_scratch(1), column);
-//   fill(0.0, team, tau);
-//
-//   // initialize work array
-//   ScratchVecView work(team.team_scratch(1), column);
-//   fill(0.0, team, work);
-//
-//   // for serial run
-//   if (league_rank == 0) {
-//
-//     // compute QR factorization of matrix and stores the R in A and
-//     // tau is used to recover Q
-//     KokkosBatched::SerialQR<KokkosBlas::Algo::QR::Unblocked>::invoke(matrix,
-//                                                                      tau,
-//                                                                      work);
-//
-//     // b = Q^{T}b
-//
-//     KokkosBatched::SerialApplyQ<
-//       KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
-//       KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values,
-//                                                    work);
-//
-//     // b = R^{-1}b
-//     KokkosBatched::SerialTrsv<
-//       KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose,
-//       KokkosBatched::Diag::NonUnit,
-//       KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values);
-//   }
-// }
 
 /**
  * @brief Maps the data from source mesh to target mesh
@@ -581,7 +503,7 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
  * points of source field
  * @param target_coordinates Scalar array view of the coordinates of control
  * points of target field
- * @param support The object that enpasulates support info
+ * @param support The object that encapsulates support info
  * @param dim The dimension of the simulations
  * @param degree The degree of the interpolation order
  * @param rbf_func The radial basis function choice
@@ -656,9 +578,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       int start_ptr = support.supports_ptr[league_rank];
       int end_ptr = support.supports_ptr[league_rank + 1];
       int nsupports = end_ptr - start_ptr;
-      printf("calculating for %d target point\n", league_rank);
-      // Logger logger(21);
-      //  logger.log(team, LogLevel::INFO, "The search starts from here....\n");
+
       //  local_source_point stores the coordinates of source supports of a
       //  given target
       ScratchMatView local_source_points(team.team_scratch(1), nsupports, dim);
@@ -676,8 +596,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       ScratchVecView target_basis_vector(team.team_scratch(1), basis_size);
 
       // solution coefficients (solution vector)
-      //
       ScratchVecView solution_coefficients(team.team_scratch(1), basis_size);
+
       // Initialize the scratch matrices and  vectors
       fill(0.0, team, local_source_points);
       fill(0.0, team, vandermonde_matrix);
@@ -686,7 +606,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       fill(0.0, team, target_basis_vector);
       fill(0.0, team, solution_coefficients);
 
-      // logger.log(team, LogLevel::INFO, "The search starts from here....\n");
       // storing the coords of local supports
       int count = -1;
       for (int j = start_ptr; j < end_ptr; ++j) {
@@ -699,9 +618,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         }
       }
 
-      // logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
-      //                 "The local source points (x, y, z):\n ");
-
+      // target_point
       Coord target_point;
       target_point.x = target_coordinates[league_rank * dim];
       target_point.y = target_coordinates[league_rank * dim + 1];
@@ -710,8 +627,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         target_point.z = target_coordinates[league_rank * dim + 2];
       }
 
-      // logger.logStruct(team, LogLevel::DEBUG, target_point,
-      //                  "Target Point before pivoting :\n");
       /** phi(nsupports) is the array of rbf functions evaluated at the
        * source supports In the actual implementation, Phi(nsupports,
        * nsupports) is the diagonal matrix & each diagonal element is the phi
@@ -726,8 +641,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
                              support.radii2[league_rank], rbf_func, phi_vector);
         });
 
-      // logger.logVector(team, LogLevel::DEBUG, phi_vector,
-      //                  "Phi Values: (phi)\n ");
       team.team_barrier();
 
       /** support_values(nsupports) (or known rhs vector b) is the vector of
@@ -745,8 +658,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         });
 
       team.team_barrier();
-      // logger.logVector(team, LogLevel::DEBUG, support_values,
-      //                  "Support Values (b):\n ");
 
       /**
        *
@@ -758,8 +669,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        */
 
       normalize_supports(team, target_point, local_source_points);
-      // logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
-      //                  "Local Source points after pivoting");
       team.team_barrier();
       /**
        *
@@ -769,8 +678,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        */
       eval_basis_vector(slice_length, target_point, target_basis_vector);
 
-      // logger.logVector(team, LogLevel::DEBUG, target_basis_vector,
-      //                 "target basis vector after pivot : ");
       /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is
        * created with the basis vector of source supports stacking on top of
        * each other
@@ -785,9 +692,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       team.team_barrier();
 
-      // logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix,
-      //                "Vandermonde  matrix : ");
-
       OMEGA_H_CHECK_PRINTF(
         support.radii2[league_rank] > 0,
         "ERROR: radius2 has to be positive but found to be %.16f\n",
@@ -796,14 +700,10 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix,
                        solution_coefficients);
       team.team_barrier();
-      // logger.logVector(team, LogLevel::DEBUG, solution_coefficients,
-      //                 "Solution Vector : ");
 
       double target_value = KokkosBlas::Experimental::dot(
         team, solution_coefficients, target_basis_vector);
 
-      // logger.logScalar(team, LogLevel::DEBUG, target_value, "target_value :
-      // ");
       if (team.team_rank() == 0) {
         OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",
                              league_rank);
diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp
index 16768b5a..535a9010 100644
--- a/test/test_spr_meshfields.cpp
+++ b/test/test_spr_meshfields.cpp
@@ -173,11 +173,11 @@ TEST_CASE("meshfields_spr_test")
         std::cerr << "start " << interp_degree << ", " << func_degree << " \n";
         int minPatchSize;
         if (interp_degree == 1)
-          minPatchSize = 3;
+          minPatchSize = 4;
         if (interp_degree == 2)
-          minPatchSize = 8; // why so large?
-        if (interp_degree == 3)
           minPatchSize = 10; // why so large?
+        if (interp_degree == 3)
+          minPatchSize = 12; // why so large?
         std::cerr << "minPatchSize " << minPatchSize << "\n";
         auto patches = mesh.get_vtx_patches(minPatchSize);
         // print_patches(patches);

From 0a3cf0c195a45d0f369f0519ab830c3fee6e9ea0 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 21:26:55 -0400
Subject: [PATCH 37/73] generalize for any dimension

---
 .../interpolator/mls_interpolation_impl.hpp   | 65 ++++++++-----------
 1 file changed, 27 insertions(+), 38 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 742ba25a..3bcd8831 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -119,7 +119,7 @@ Reals min_max_normalization(Reals& coordinates, int dim);
  *
  */
 KOKKOS_INLINE_FUNCTION
-void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p,
+void eval_basis_vector(const IntDeviceMatView& slice_length, const double* p,
                        ScratchVecView& basis_vector)
 {
   basis_vector(0) = 1;
@@ -130,11 +130,9 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p,
   int curr_col = 1;
 
   double point[MAX_DIM];
-  point[0] = p.x;
-  point[1] = p.y;
 
-  if (dim == 3) {
-    point[2] = p.z;
+  for (int i = 0; i < dim; ++i) {
+    point[i] = p[i];
   }
 
   for (int i = 0; i < degree; ++i) {
@@ -166,7 +164,7 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p,
  **
  */
 KOKKOS_INLINE_FUNCTION
-void normalize_supports(member_type team, Coord& pivot,
+void normalize_supports(member_type team, double* target_point,
                         ScratchMatView& support_coordinates)
 {
   int nsupports = support_coordinates.extent(0);
@@ -175,23 +173,13 @@ void normalize_supports(member_type team, Coord& pivot,
   Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), [=](int i) {
     Real pivot_point[MAX_DIM];
 
-    pivot_point[0] = pivot.x;
-    pivot_point[1] = pivot.y;
-
-    if (dim == 3) {
-      pivot_point[2] = pivot.z;
-    }
-
     for (int j = 0; j < dim; ++j) {
-      support_coordinates(i, j) -= pivot_point[j];
+      support_coordinates(i, j) -= target_point[j];
     }
   });
 
-  pivot.x = 0;
-  pivot.y = 0;
-
-  if (dim == 3) {
-    pivot.z = 0;
+  for (int j = 0; j < dim; ++j) {
+    target_point[j] = 0.0;
   }
 }
 
@@ -212,12 +200,11 @@ void create_vandermonde_matrix(const ScratchMatView& local_source_points, int j,
   int N = local_source_points.extent(0);
   int dim = local_source_points.extent(1);
 
-  Coord source_point;
-  source_point.x = local_source_points(j, 0);
-  source_point.y = local_source_points(j, 1);
-  if (dim == 3) {
-    source_point.z = local_source_points(j, 2);
+  double source_point[MAX_DIM] = {};
+  for (int i = 0; i < dim; ++i) {
+    source_point[i] = local_source_points(j, i);
   }
+
   ScratchVecView basis_vector_supports =
     Kokkos::subview(vandermonde_matrix, j, Kokkos::ALL());
   eval_basis_vector(slice_length, source_point, basis_vector_supports);
@@ -243,13 +230,19 @@ template <typename Func,
           std::enable_if_t<std::is_invocable_r_v<double, Func, double, double>,
                            bool> = true>
 KOKKOS_INLINE_FUNCTION void compute_phi_vector(
-  const Coord& target_point, const ScratchMatView& local_source_points, int j,
+  const double* target_point, const ScratchMatView& local_source_points, int j,
   double cuttoff_dis_sq, Func rbf_func, ScratchVecView phi)
 {
   int N = local_source_points.extent(0);
-  double dx = target_point.x - local_source_points(j, 0);
-  double dy = target_point.y - local_source_points(j, 1);
-  double ds_sq = dx * dx + dy * dy;
+  int dim = local_source_points.extent(1);
+
+  double ds_sq = 0;
+
+  double ds_sq = 0;
+  for (int i = 0; i < dim; ++i) {
+    double temp = target_point[i] - local_source_points(j, i);
+    ds_sq += temp * temp;
+  }
   phi(j) = rbf_func(ds_sq, cuttoff_dis_sq);
   OMEGA_H_CHECK_PRINTF(!std::isnan(phi(j)),
                        "ERROR: Phi(j) in compute_phi_vector is NaN for j = %d "
@@ -611,20 +604,16 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       for (int j = start_ptr; j < end_ptr; ++j) {
         count++;
         auto index = support.supports_idx[j];
-        local_source_points(count, 0) = source_coordinates[index * dim];
-        local_source_points(count, 1) = source_coordinates[index * dim + 1];
-        if (dim == 3) {
-          local_source_points(count, 2) = source_coordinates[index * dim + 2];
+
+        for (int i = 0; i < dim; ++i) {
+          local_source_points(count, i) = source_coordinates[index * dim + i];
         }
       }
 
-      // target_point
-      Coord target_point;
-      target_point.x = target_coordinates[league_rank * dim];
-      target_point.y = target_coordinates[league_rank * dim + 1];
+      double target_point[MAX_DIM] = {};
 
-      if (dim == 3) {
-        target_point.z = target_coordinates[league_rank * dim + 2];
+      for (int i = 0; i < dim; ++i) {
+        target_point[i] = target_coordinates[league_rank * dim + i];
       }
 
       /** phi(nsupports) is the array of rbf functions evaluated at the

From 3c8d4921f9d6fc156f1c23527e87064b9a1e4ea3 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 21:34:48 -0400
Subject: [PATCH 38/73] remove min max normalization function

---
 src/pcms/interpolator/mls_interpolation.cpp | 48 ---------------------
 1 file changed, 48 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index f99515a8..6bda3b86 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -224,14 +224,9 @@ int calculate_scratch_shared_size(const SupportResults& support,
         ScratchMatView::shmem_size(nsupports, dim); // local_source_points
       total_shared_size +=
         ScratchMatView::shmem_size(nsupports, nsupports) * 2; // U, Ut
-      // total_shared_size += ScratchVecView::shmem_size(max_size);
-      // total_shared_size += ScratchVecView::shmem_size(min_size);
       shmem_each_team(i) = total_shared_size;
     });
 
-  // namespace KE = Kokkos::Experimental;
-  // auto shared_size = KE::max_element(Kokkos::DefaultExecutionSpace(),
-  // shmem_each_team); printf("shared size = %d", shared_size);
   int shared_size = 0;
   Kokkos::parallel_reduce(
     "find_max", nvertices_target,
@@ -245,48 +240,5 @@ int calculate_scratch_shared_size(const SupportResults& support,
   return shared_size;
 }
 
-Reals min_max_normalization(Reals& coordinates, int dim = 2)
-{
-  int num_points = coordinates.size() / dim;
-
-  int coords_size = coordinates.size();
-
-  Write<Real> x_coordinates(num_points, 0, "x coordinates");
-
-  Write<Real> y_coordinates(num_points, 0, "y coordinates");
-
-  parallel_for(
-    "separates x and y coordinates", num_points, KOKKOS_LAMBDA(const int id) {
-      int index = id * dim;
-
-      x_coordinates[id] = coordinates[index];
-      y_coordinates[id] = coordinates[index + 1];
-    });
-
-  HostRead<Real> host_x(read(x_coordinates));
-  HostRead<Real> host_y(read(y_coordinates));
-
-  const auto min_x = Omega_h::get_min(read(x_coordinates));
-  const auto min_y = Omega_h::get_min(read(y_coordinates));
-
-  const auto max_x = Omega_h::get_max(read(x_coordinates));
-  const auto max_y = Omega_h::get_max(read(y_coordinates));
-
-  const auto del_x = max_x - min_x;
-  const auto del_y = max_y - min_y;
-
-  Write<Real> normalized_coordinates(coords_size, 0,
-                                     "stores scaled coordinates");
-
-  parallel_for(
-    "scale coordinates", num_points, KOKKOS_LAMBDA(const int id) {
-      int index = id * dim;
-      normalized_coordinates[index] = (x_coordinates[id] - min_x) / del_x;
-      normalized_coordinates[index + 1] = (y_coordinates[id] - min_y) / del_y;
-    });
-
-  return read(normalized_coordinates);
-}
-
 } // namespace detail
 } // namespace pcms

From 1a80098b7447c264865adf913f6cf3e7022ccb44 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 23:00:21 -0400
Subject: [PATCH 39/73] add new argument

---
 src/pcms/interpolator/mls_interpolation.cpp | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index 6bda3b86..a98ed14e 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -111,7 +111,8 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals source_coordinates,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
-                              const LO& degree, RadialBasisFunction bf)
+                              const LO& degree, RadialBasisFunction bf,
+                              double lambda)
 {
 
   const auto nvertices_target = target_coordinates.size() / dim;
@@ -122,25 +123,25 @@ Write<Real> mls_interpolation(const Reals source_values,
     case RadialBasisFunction::RBF_GAUSSIAN:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, RBF_GAUSSIAN{});
+        degree, RBF_GAUSSIAN{}, lambda);
       break;
 
     case RadialBasisFunction::RBF_C4:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, RBF_C4{});
+        degree, RBF_C4{}, lambda);
       break;
 
     case RadialBasisFunction::RBF_CONST:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, RBF_CONST{});
+        degree, RBF_CONST{}, lambda);
       break;
 
     case RadialBasisFunction::NO_OP:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, NoOp{});
+        degree, NoOp{}, lambda);
       break;
   }
 

From 4ce25f56bd5ad466efefd2f87a6c6071927ed228 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 23:00:54 -0400
Subject: [PATCH 40/73] add new argument lambda

---
 src/pcms/interpolator/mls_interpolation.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp
index 8e8218aa..8169d453 100644
--- a/src/pcms/interpolator/mls_interpolation.hpp
+++ b/src/pcms/interpolator/mls_interpolation.hpp
@@ -20,6 +20,7 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals source_coordinates,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
-                              const LO& degree, RadialBasisFunction bf);
+                              const LO& degree, RadialBasisFunction bf,
+                              double lambda = 0);
 } // namespace pcms
 #endif

From 1a2511143386848ff7bdf62084f83e8e15d6360a Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 23:01:16 -0400
Subject: [PATCH 41/73] add new argument lambda

---
 src/pcms/interpolator/mls_interpolation_impl.hpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 3bcd8831..90ba7555 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -238,7 +238,6 @@ KOKKOS_INLINE_FUNCTION void compute_phi_vector(
 
   double ds_sq = 0;
 
-  double ds_sq = 0;
   for (int i = 0; i < dim; ++i) {
     double temp = target_point[i] - local_source_points(j, i);
     ds_sq += temp * temp;
@@ -419,7 +418,7 @@ void solve_matrix(const ScratchMatView& square_matrix,
 KOKKOS_INLINE_FUNCTION
 void solve_matrix_svd(member_type team, const ScratchVecView& weight,
                       ScratchVecView rhs_values, ScratchMatView matrix,
-                      ScratchVecView solution_vector, double lambda = 1e-2)
+                      ScratchVecView solution_vector, double lambda)
 {
 
   int row = matrix.extent(0);
@@ -466,7 +465,7 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
 
   if (team.team_rank() == 0) {
     KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U,
-                                     sigma, Vt, work, 1e-6);
+                                     sigma, Vt, work, 1e-8);
 
     // calculate sigma(i) / (sigma(i)^2 + lambda)
     calculate_shrinkage_factor(team, lambda, sigma);
@@ -510,7 +509,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
                        RealConstDefaultScalarArrayView target_coordinates,
                        const SupportResults& support, const LO& dim,
                        const LO& degree, Func rbf_func,
-                       RealDefaultScalarArrayView approx_target_values)
+                       RealDefaultScalarArrayView approx_target_values,
+                       double lambda)
 {
   PCMS_FUNCTION_TIMER;
   static_assert(std::is_invocable_r_v<double, Func, double, double>,
@@ -687,7 +687,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         support.radii2[league_rank]);
 
       solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix,
-                       solution_coefficients);
+                       solution_coefficients, lambda);
       team.team_barrier();
 
       double target_value = KokkosBlas::Experimental::dot(
@@ -723,7 +723,7 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals source_coordinates,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
-                              const LO& degree, Func rbf_func)
+                              const LO& degree, Func rbf_func, double lambda)
 {
   const auto nsources = source_coordinates.size() / dim;
 
@@ -748,7 +748,7 @@ Write<Real> mls_interpolation(const Reals source_values,
 
   mls_interpolation(source_values_array_view, source_coordinates_array_view,
                     target_coordinates_array_view, support, dim, degree,
-                    rbf_func, interpolated_values_array_view);
+                    rbf_func, interpolated_values_array_view, lambda);
 
   return interpolated_values;
 }

From 7b6e325421c8110f5100674a3ec2acc815d57997 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 23:03:27 -0400
Subject: [PATCH 42/73] add lambda

---
 test/test_mls_basis.cpp      | 21 ++++++------
 test/test_normalisation.cpp  | 66 ++++--------------------------------
 test/test_spr_meshfields.cpp |  2 +-
 test/test_svd_serial.cpp     |  2 +-
 4 files changed, 19 insertions(+), 72 deletions(-)

diff --git a/test/test_mls_basis.cpp b/test/test_mls_basis.cpp
index 5664508d..d50ed4f8 100644
--- a/test/test_mls_basis.cpp
+++ b/test/test_mls_basis.cpp
@@ -47,13 +47,13 @@ TEST_CASE("basis test")
       KOKKOS_LAMBDA(const member_type& team) {
         int i = team.league_rank();
         ScratchVecView basis_vector(team.team_scratch(0), size);
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size),
-                             [=](int j) { basis_vector(j) = 0; });
+        pcms::detail::fill(0, team, basis_vector);
+
+        double target_point[MAX_DIM] = {};
 
-        Coord target_point;
-        target_point.x = coords[i][0];
-        target_point.y = coords[i][1];
-        target_point.z = coords[i][2];
+        for (int j = 0; j < dim; ++j) {
+          target_point[j] = coords[i][j];
+        }
 
         detail::eval_basis_vector(d_array, target_point, basis_vector);
 
@@ -145,10 +145,11 @@ TEST_CASE("basis test")
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size),
                              [=](int j) { basis_vector(j) = 0; });
 
-        Coord target_point;
-        target_point.x = coords[i][0];
-        target_point.y = coords[i][1];
-        target_point.z = coords[i][2];
+        double target_point[MAX_DIM] = {};
+
+        for (int j = 0; j < dim; ++j) {
+          target_point[j] = coords[i][j];
+        }
 
         detail::eval_basis_vector(d_array, target_point, basis_vector);
 
diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp
index 31e5727a..88773567 100644
--- a/test/test_normalisation.cpp
+++ b/test/test_normalisation.cpp
@@ -31,55 +31,6 @@ Reals fillFromMatrix(const Matrix<2, 10>& matrix)
 TEST_CASE("test_normalization_routine")
 {
 
-  SECTION("test_minmax_normalization")
-  {
-
-    printf("-------------the test for min_max starts-----------\n");
-
-    Real tolerance = 1E-5;
-    int num_points = 10;
-
-    Omega_h::Matrix<2, 10> matrix{{0.45, 3.18},   {0.12, -1.2},  {-2.04, -1.45},
-                                  {1.52, -0.98},  {-0.22, 1.45}, {0.96, 3.62},
-                                  {-0.26, -1.62}, {0.82, 4.22},  {5.6, 3.62},
-                                  {1.2, 1.2}};
-
-    auto coordinates = fillFromMatrix(matrix);
-
-    auto results = pcms::detail::min_max_normalization(coordinates, 2);
-
-    printf("results after normalization");
-    HostRead<Real> host_results(results);
-
-    double expected_results[10][2] = {
-      {0.325916, 0.821918}, {0.282723, 0.071918}, {0.000000, 0.029110},
-      {0.465969, 0.109589}, {0.238220, 0.525685}, {0.392670, 0.897260},
-      {0.232984, 0.000000}, {0.374346, 1.000000}, {1.000000, 0.897260},
-      {0.424084, 0.482877}};
-
-    printf("<scaled coordinates>\n");
-
-    for (int i = 0; i < 20; ++i) {
-      printf("%12.6f\n", host_results[i]);
-    }
-
-    for (int i = 0; i < 10; ++i) {
-
-      int index = i * 2;
-
-      CAPTURE(i, host_results[index], expected_results[i][0], tolerance);
-      CHECK_THAT(host_results[index],
-                 Catch::Matchers::WithinAbs(expected_results[i][0], tolerance));
-
-      CAPTURE(i, host_results[index + 1], expected_results[i][1], tolerance);
-      CHECK_THAT(host_results[index + 1],
-                 Catch::Matchers::WithinAbs(expected_results[i][1], tolerance));
-
-      printf("-------------the test for min_max starts-----------\n");
-    }
-
-  } // end SECTION
-
   SECTION("test_normalization_relative_distance")
   {
     Real tolerance = 1E-5;
@@ -129,10 +80,11 @@ TEST_CASE("test_normalization_routine")
         ScratchMatView local_supports(team.team_scratch(0), num_supports, dim);
         detail::fill(0, team, local_supports);
 
-        Coord target_point;
-        target_point.x = target_coordinates[league_rank][0];
-        target_point.y = target_coordinates[league_rank][1];
-        target_point.z = target_coordinates[league_rank][2];
+        double target_point[MAX_DIM] = {};
+
+        for (int i = 0; i < dim; ++i) {
+          target_point[i] = target_coordinates[league_rank][i];
+        }
 
         Kokkos::parallel_for(
           Kokkos::TeamThreadRange(team, num_supports), [=](int i) {
@@ -162,13 +114,7 @@ TEST_CASE("test_normalization_routine")
             for (int j = 0; j < dim; ++j) {
               int index = league_rank * num_supports * dim + i * dim + j;
               normalized_support_coordinates[index] = local_supports(i, j);
-              if (j == 0) {
-                normalized_target_coordinates[index] = target_point.x;
-              } else if (j == 1) {
-                normalized_target_coordinates[index] = target_point.y;
-              } else {
-                normalized_target_coordinates[index] = target_point.z;
-              }
+              normalized_target_coordinates[index] = target_point[j];
             }
           });
       });
diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp
index 535a9010..5b81bd7f 100644
--- a/test/test_spr_meshfields.cpp
+++ b/test/test_spr_meshfields.cpp
@@ -72,7 +72,7 @@ void test(Mesh& mesh, Omega_h::Graph& patches, int degree, Reals source_values,
 
   auto approx_target_values = pcms::mls_interpolation(
     source_values, source_coordinates, target_coordinates, support, dim, degree,
-    pcms::RadialBasisFunction::NO_OP);
+    pcms::RadialBasisFunction::NO_OP, 1e-5);
 
   const auto delta_abs = Omega_h::fabs_each(
     Omega_h::subtract_each(exact_target_values, read(approx_target_values)));
diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp
index 862ce412..8d02de0e 100644
--- a/test/test_svd_serial.cpp
+++ b/test/test_svd_serial.cpp
@@ -275,7 +275,7 @@ TEST_CASE("test_serial_svd")
           rhs(i) = rhs_data[i];
         });
 
-        detail::solve_matrix_svd(team, weight, rhs, A, x);
+        detail::solve_matrix_svd(team, weight, rhs, A, x, 0);
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, column),
                              [=](int i) { result(i) = x(i); });
       });

From 7d46e8d22aabd64f365126f9a4290e81d6eaa79d Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 25 Mar 2025 23:21:00 -0400
Subject: [PATCH 43/73] delete file

---
 .../pcms_interpolator_array_ops.cpp           | 20 -------------------
 1 file changed, 20 deletions(-)
 delete mode 100644 src/pcms/interpolator/pcms_interpolator_array_ops.cpp

diff --git a/src/pcms/interpolator/pcms_interpolator_array_ops.cpp b/src/pcms/interpolator/pcms_interpolator_array_ops.cpp
deleted file mode 100644
index 2233002a..00000000
--- a/src/pcms/interpolator/pcms_interpolator_array_ops.cpp
+++ /dev/null
@@ -1,20 +0,0 @@
-#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP
-#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP
-
-#include "pcms_interpolator_aliases.hpp"
-#include <Omega_h_array_ops.hpp>
-#include <cmath>
-
-KOKKOS_INLINE_FUNCTION
-void find_sq_root_each(ScratchVecView& array)
-{
-  int size = array.size();
-
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) {
-    OMEGA_CHECK_PRINTF(array(i) < 0,
-                       "[Error:] Square root of a negative number is invalid!\n"
-                       "value is %12.6f\d",
-                       array(i));
-    array(i) = sqrt(array(i));
-  });
-}

From 43c42efb610c52fc641c152a1b3d610ca795aef7 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 14:37:20 -0400
Subject: [PATCH 44/73] change MAX_DIM to 6

---
 src/pcms/interpolator/mls_interpolation_impl.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 90ba7555..d451ca5d 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -26,7 +26,7 @@
 #include <KokkosBlas2_serial_gemv_impl.hpp>
 #include <KokkosBatched_Gemm_Decl.hpp>
 
-static constexpr int MAX_DIM = 3;
+static constexpr int MAX_DIM = 6;
 
 /**
  * calculate_basis_slice_lengths, calculate_basis_vector_size and
@@ -164,7 +164,7 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const double* p,
  **
  */
 KOKKOS_INLINE_FUNCTION
-void normalize_supports(member_type team, double* target_point,
+void normalize_supports(const member_type& team, double* target_point,
                         ScratchMatView& support_coordinates)
 {
   int nsupports = support_coordinates.extent(0);
@@ -465,7 +465,7 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
 
   if (team.team_rank() == 0) {
     KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U,
-                                     sigma, Vt, work, 1e-8);
+                                     sigma, Vt, work, 1e-6);
 
     // calculate sigma(i) / (sigma(i)^2 + lambda)
     calculate_shrinkage_factor(team, lambda, sigma);

From e8bb7fad35487d7d00036157b658dd190cbda73f Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 14:38:06 -0400
Subject: [PATCH 45/73] remove Omega_h_array_ops

---
 src/pcms/interpolator/pcms_interpolator_aliases.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pcms/interpolator/pcms_interpolator_aliases.hpp b/src/pcms/interpolator/pcms_interpolator_aliases.hpp
index 55a5f187..fd482371 100644
--- a/src/pcms/interpolator/pcms_interpolator_aliases.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_aliases.hpp
@@ -2,8 +2,8 @@
 #define PCMS_INTERPOLATOR_ALIASES_HPP
 
 #include <Kokkos_Core.hpp>
-#include "../arrays.h"
-#include <Omega_h_array_ops.hpp>
+#include "pcms/arrays.h"
+
 namespace pcms
 {
 

From ceae062bcf085e285bba4c661b593fc80c45c344 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 14:38:56 -0400
Subject: [PATCH 46/73] use kokkos sqrt function

---
 src/pcms/interpolator/pcms_interpolator_view_utils.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
index 5dd419e8..8edd50f4 100644
--- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
@@ -5,6 +5,7 @@
 #include <Omega_h_array_ops.hpp>
 #include <cmath>
 #include <Omega_h_fail.hpp>
+#include <Kokkos_MathematicalFunctions.hpp>
 
 namespace pcms
 {
@@ -59,7 +60,7 @@ void find_sq_root_each(member_type team, ScratchVecView& array)
       "[Error:] Square root of a negative number is invalid!\n"
       "value is %12.6f\n",
       array(i));
-    array(i) = sqrt(array(i));
+    array(i) = Kokkos::sqrt(array(i));
   });
 }
 

From 5337afac6b1e91a9340185ab20f01cb699b4354a Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 14:47:36 -0400
Subject: [PATCH 47/73] remove commented line

---
 test/CMakeLists.txt | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index f538c4af..0510c846 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -104,18 +104,16 @@ if(Catch2_FOUND)
       add_executable(field_transfer_example field_transfer_example.cpp)
       target_link_libraries(field_transfer_example PUBLIC pcms::core pcms_interpolator)
       list(APPEND PCMS_UNIT_TEST_SOURCES
-              test_field_transfer.cpp
-              test_uniform_grid.cpp
-              test_omega_h_copy.cpp
-              test_point_search.cpp
-	      test_mls_basis.cpp
-	      test_rbf_interp.cpp
-	      test_linear_solver.cpp
-		  test_normalisation.cpp
+          test_field_transfer.cpp
+          test_uniform_grid.cpp
+          test_omega_h_copy.cpp
+          test_point_search.cpp
+          test_mls_basis.cpp
+          test_rbf_interp.cpp
+          test_linear_solver.cpp
+          test_normalisation.cpp
 		  test_spr_meshfields.cpp
-		  test_svd_serial.cpp 
-		  # test_serial_qr.cpp
-              )
+		  test_svd_serial.cpp)
   endif ()
   add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES})
   target_link_libraries(unit_tests PUBLIC Catch2::Catch2 pcms::core pcms_interpolator)

From 0b3b7d5615b8a7cd9ad6b0bb5209b3ca392f4af3 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 14:54:41 -0400
Subject: [PATCH 48/73] remove using namespace

---
 test/test_normalisation.cpp | 42 ++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 22 deletions(-)

diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp
index 88773567..c2612e21 100644
--- a/test/test_normalisation.cpp
+++ b/test/test_normalisation.cpp
@@ -10,14 +10,11 @@
 #include <vector>
 #include <iostream>
 #include <Omega_h_array_ops.hpp>
-using namespace std;
-using namespace Omega_h;
-using namespace pcms;
 
-Reals fillFromMatrix(const Matrix<2, 10>& matrix)
+Omega_h::Omega_h::Reals fillFromMatrix(const Omega_h::Matrix<2, 10>& matrix)
 {
   int num_points = 10;
-  Write<Real> array(20, 0, "array to store coordinates");
+  Omega_h::Write<Omega_h::Real> array(20, 0, "array to store coordinates");
   parallel_for(
     "fills the array view from matrix", num_points, KOKKOS_LAMBDA(const int i) {
       int index = i * 2;
@@ -25,7 +22,7 @@ Reals fillFromMatrix(const Matrix<2, 10>& matrix)
       array[index] = matrix[i][0];
       array[index + 1] = matrix[i][1];
     });
-  return read(array);
+  return Omega_h::read(array);
 }
 
 TEST_CASE("test_normalization_routine")
@@ -33,7 +30,7 @@ TEST_CASE("test_normalization_routine")
 
   SECTION("test_normalization_relative_distance")
   {
-    Real tolerance = 1E-5;
+    Omega_h::Real tolerance = 1E-5;
     int nvertices_target = 3;
 
     Omega_h::Matrix<3, 3> target_coordinates{
@@ -59,16 +56,16 @@ TEST_CASE("test_normalization_routine")
     auto total_coordinates = Omega_h::get_sum(nsupports) * dim;
 
     REQUIRE(total_coordinates == 54);
-    Write<Real> normalized_support_coordinates(
+    Omega_h::Write<Omega_h::Real> normalized_support_coordinates(
       total_coordinates, 0, "coordinates after normalization");
 
-    Write<Real> normalized_target_coordinates(
+    Omega_h::Write<Omega_h::Real> normalized_target_coordinates(
       total_coordinates, 0, "coordinates after normalization");
 
-    Write<Real> all_support_coordinates(total_coordinates, 0,
-                                        "support coordinates all");
-    Write<Real> all_target_coordinates(total_coordinates, 0,
-                                       "target coordinates all");
+    Omega_h::Write<Omega_h::Real> all_support_coordinates(
+      total_coordinates, 0, "support coordinates all");
+    Omega_h::Write<Omega_h::Real> all_target_coordinates(
+      total_coordinates, 0, "target coordinates all");
 
     team_policy tp(nvertices_target, Kokkos::AUTO);
     Kokkos::parallel_for(
@@ -77,7 +74,8 @@ TEST_CASE("test_normalization_routine")
         int league_rank = team.league_rank();
 
         int num_supports = nsupports[league_rank];
-        ScratchMatView local_supports(team.team_scratch(0), num_supports, dim);
+        pcms::ScratchMatView local_supports(team.team_scratch(0), num_supports,
+                                            dim);
         detail::fill(0, team, local_supports);
 
         double target_point[MAX_DIM] = {};
@@ -87,7 +85,7 @@ TEST_CASE("test_normalization_routine")
         }
 
         Kokkos::parallel_for(
-          Kokkos::TeamThreadRange(team, num_supports), [=](int i) {
+          Kokkos::TeamThOmega_h::readRange(team, num_supports), [=](int i) {
             for (int j = 0; j < dim; ++j) {
               int index = league_rank * num_supports * dim + i * dim + j;
 
@@ -110,7 +108,7 @@ TEST_CASE("test_normalization_routine")
         detail::normalize_supports(team, target_point, local_supports);
 
         Kokkos::parallel_for(
-          Kokkos::TeamThreadRange(team, num_supports), [=](int i) {
+          Kokkos::TeamThOmega_h::readRange(team, num_supports), [=](int i) {
             for (int j = 0; j < dim; ++j) {
               int index = league_rank * num_supports * dim + i * dim + j;
               normalized_support_coordinates[index] = local_supports(i, j);
@@ -120,16 +118,16 @@ TEST_CASE("test_normalization_routine")
       });
 
     auto host_all_support_coordinates =
-      HostRead<Real>(read(all_support_coordinates));
+      Omega_h::HostRead<Omega_h::Real>(Omega_h::read(all_support_coordinates));
 
     auto host_all_target_coordinates =
-      HostRead<Real>(read(all_target_coordinates));
+      Omega_h::HostRead<Omega_h::Real>(Omega_h::read(all_target_coordinates));
 
-    auto host_normalized_support_coordinates =
-      HostRead<Real>(read(normalized_support_coordinates));
+    auto host_normalized_support_coordinates = Omega_h::HostRead<Omega_h::Real>(
+      Omega_h::read(normalized_support_coordinates));
 
-    auto host_normalized_target_coordinates =
-      HostRead<Real>(read(normalized_target_coordinates));
+    auto host_normalized_target_coordinates = Omega_h::HostRead<Omega_h::Real>(
+      Omega_h::read(normalized_target_coordinates));
 
     for (int i = 0; i < total_coordinates; ++i) {
       auto result_support =

From c1d10f7e101381d6f0300bef690b08926d2597f6 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 14:55:26 -0400
Subject: [PATCH 49/73] remove commented line

---
 test/test_rbf_interp.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp
index e76091c6..85d9bef9 100644
--- a/test/test_rbf_interp.cpp
+++ b/test/test_rbf_interp.cpp
@@ -44,7 +44,6 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports,
   Real tolerance = 5e-4;
 
   std::vector<RadialBasisFunction> rbf_types = {
-    // RadialBasisFunction::RBF_GAUSSIAN
     RadialBasisFunction::RBF_GAUSSIAN, RadialBasisFunction::RBF_C4,
     RadialBasisFunction::RBF_CONST
 

From f8cd5585d93f8b75eedfe5826722045979765174 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 14:56:30 -0400
Subject: [PATCH 50/73] remove test_serial_qr.cpp

---
 test/test_serial_qr.cpp | 147 ----------------------------------------
 1 file changed, 147 deletions(-)
 delete mode 100644 test/test_serial_qr.cpp

diff --git a/test/test_serial_qr.cpp b/test/test_serial_qr.cpp
deleted file mode 100644
index 10e8bffc..00000000
--- a/test/test_serial_qr.cpp
+++ /dev/null
@@ -1,147 +0,0 @@
-#include <KokkosBatched_ApplyQ_Decl.hpp> //KokkosBlas::ApplyQ
-#include <KokkosBatched_QR_Decl.hpp>     //KokkosBlas::QR
-#include <KokkosBatched_Trsv_Decl.hpp>   //KokkosBlas::Trsv
-#include <KokkosBatched_Util.hpp>        //KokkosBlas::Algo, KokkosBatched::Diag
-#include <KokkosBlas2_gemv.hpp>          //KokkosBlas::gemv
-#include <Kokkos_Core.hpp>
-#include <catch2/catch_test_macros.hpp>
-#include <catch2/catch_approx.hpp>
-#include <pcms/interpolator/pcms_interpolator_aliases.hpp>
-#include <catch2/matchers/catch_matchers_floating_point.hpp>
-#include <vector>
-#include <iostream>
-
-TEST_CASE("test_qr_serial")
-{
-  /* here is a test case run with Octave */
-  static double const a_data[16][10] = {
-    {1.0000000e+00, -4.9112749e-02, -1.5629814e+00, -8.2662407e-02,
-     7.6762316e-02, 1.2919981e-01, 4.0597781e-03, 2.4120621e-03, 2.4429110e+00,
-     6.8330735e-03},
-    {1.0000000e+00, -7.4718700e-01, 1.1447982e+00, -6.1608208e-01,
-     -8.5537834e-01, -7.0528966e-01, 4.6032852e-01, 5.5828842e-01,
-     1.3105629e+00, 3.7955713e-01},
-    {1.0000000e+00, -4.8564839e-01, -7.2143765e-01, -5.3574860e-02,
-     3.5036503e-01, 3.8650921e-02, 2.6018545e-02, 2.3585436e-01, 5.2047228e-01,
-     2.8702657e-03},
-    {1.0000000e+00, -8.1013248e-01, -1.0234062e+00, 3.4345012e-01,
-     8.2909460e-01, -3.5148898e-01, -2.7824010e-01, 6.5631463e-01,
-     1.0473602e+00, 1.1795799e-01},
-    {1.0000000e+00, -1.0508609e+00, -8.6973926e-01, 1.4570417e+00,
-     9.1397495e-01, -1.2672464e+00, -1.5311481e+00, 1.1043086e+00,
-     7.5644638e-01, 2.1229706e+00},
-    {1.0000000e+00, -1.7802012e-01, 1.8697947e+00, 8.3576559e-01,
-     -3.3286107e-01, 1.5627100e+00, -1.4878309e-01, 3.1691163e-02,
-     3.4961320e+00, 6.9850413e-01},
-    {1.0000000e+00, -6.4594368e-01, -5.5999878e-01, 2.5848452e+00,
-     3.6172768e-01, -1.4475102e+00, -1.6696645e+00, 4.1724324e-01,
-     3.1359864e-01, 6.6814250e+00},
-    {1.0000000e+00, -3.0007589e-01, -5.6481843e-01, -3.0670467e-01,
-     1.6948839e-01, 1.7323245e-01, 9.2034678e-02, 9.0045542e-02, 3.1901985e-01,
-     9.4067754e-02},
-    {1.0000000e+00, 1.8896909e-01, 2.7899549e-01, -2.5907897e-01, 5.2721524e-02,
-     -7.2281865e-02, -4.8957918e-02, 3.5709317e-02, 7.7838484e-02,
-     6.7121914e-02},
-    {1.0000000e+00, 1.8586762e+00, -4.5760801e-01, -9.5892373e-02,
-     -8.5054510e-01, 4.3881118e-02, -1.7823287e-01, 3.4546770e+00,
-     2.0940510e-01, 9.1953472e-03},
-    {1.0000000e+00, -1.0915266e-01, 1.9050743e+00, 3.2218623e-01,
-     -2.0794394e-01, 6.1378871e-01, -3.5167484e-02, 1.1914304e-02,
-     3.6293082e+00, 1.0380396e-01},
-    {1.0000000e+00, -3.2178312e-01, -6.9392454e-01, -6.1112393e-01,
-     2.2329320e-01, 4.2407389e-01, 1.9664936e-01, 1.0354437e-01, 4.8153127e-01,
-     3.7347245e-01},
-    {1.0000000e+00, 1.4229431e+00, 3.8000845e-01, -1.5989849e-01, 5.4073041e-01,
-     -6.0762776e-02, -2.2752645e-01, 2.0247671e+00, 1.4440643e-01,
-     2.5567526e-02},
-    {1.0000000e+00, 9.0145077e-01, -9.6991898e-01, 7.9086551e-01,
-     -8.7433421e-01, -7.6707547e-01, 7.1292632e-01, 8.1261349e-01,
-     9.4074284e-01, 6.2546825e-01},
-    {1.0000000e+00, -7.2533533e-01, 1.4790603e-01, 7.7147564e-01,
-     -1.0728147e-01, 1.1410590e-01, -5.5957854e-01, 5.2611134e-01,
-     2.1876193e-02, 5.9517466e-01},
-    {1.0000000e+00, 5.3974943e-01, -7.7853625e-01, -1.2196455e+00,
-     -4.2021450e-01, 9.4953820e-01, -6.5830294e-01, 2.9132945e-01,
-     6.0611869e-01, 1.4875350e+00},
-  };
-
-  static double const x_data[10] = {
-    4.4283983e-01,  -2.9416715e-01, 2.5654724e-01,  3.3493879e-01,
-    -6.3165447e-01, -5.1360652e-01, -2.4971659e-01, -2.5776427e-01,
-    -8.8432424e-02, -5.2756825e-01};
-
-  using ExecutionSpace = Kokkos::DefaultExecutionSpace;
-  using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space;
-  using DeviceType = ExecutionSpace::device_type;
-
-  // based on pumi (github.com/SCOREC/core) test/qr.cc
-  typedef Kokkos::View<double[16][10], Kokkos::LayoutLeft, DeviceType>
-    MatrixViewType;
-  typedef Kokkos::View<double[16], Kokkos::LayoutLeft, DeviceType>
-    RowVectorViewType;
-  typedef Kokkos::View<double[10], Kokkos::LayoutLeft, DeviceType>
-    ColVectorViewType;
-  typedef Kokkos::View<double[10], Kokkos::LayoutRight, DeviceType>
-    ColWorkViewType;
-  const auto one = 1.0;
-  const auto zero = 0.0;
-
-  MatrixViewType A("A");
-  typename MatrixViewType::HostMirror A_host = Kokkos::create_mirror_view(A);
-  for (int i = 0; i < 16; i++)
-    for (int j = 0; j < 10; j++)
-      A_host(i, j) = a_data[i][j];
-  Kokkos::deep_copy(A, A_host);
-
-  ColVectorViewType kx("kx");
-  typename ColVectorViewType::HostMirror kx_host =
-    Kokkos::create_mirror_view(kx);
-  for (int j = 0; j < 10; j++)
-    kx_host(j) = x_data[j];
-  Kokkos::deep_copy(kx, kx_host);
-  // b = A*kx
-  RowVectorViewType b("b");
-  KokkosBlas::gemv("N", one, A, kx, zero, b);
-
-  // x = b
-  RowVectorViewType x("x");
-  Kokkos::deep_copy(x, b);
-
-  // t: tau (see SerialQR call below)
-  ColVectorViewType t("t");
-
-  // w: working space for SerialQR
-  ColWorkViewType w("w");
-
-  // roughly following
-  // kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR.hpp
-  typedef KokkosBlas::Algo::QR::Unblocked AlgoTagType;
-  Kokkos::parallel_for(
-    "solveQR", 1, KOKKOS_LAMBDA(int) {
-      // compute the QR factorization of A and store the results in A and t
-      // (tau) - see the lapack dgeqp3(...) documentation:
-      // www.netlib.org/lapack/explore-html-3.6.1/dd/d9a/group__double_g_ecomputational_ga1b0500f49e03d2771b797c6e88adabbb.html
-      KokkosBatched::SerialQR<AlgoTagType>::invoke(A, t, w);
-      // x = Q^{T}x
-      KokkosBatched::SerialApplyQ<
-        KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose,
-        KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(A, t, x, w);
-      // x = R^{-1}x
-      KokkosBatched::SerialTrsv<KokkosBatched::Uplo::Upper,
-                                KokkosBatched::Trans::NoTranspose,
-                                KokkosBatched::Diag::NonUnit,
-                                KokkosBlas::Algo::Trsv::Unblocked>::invoke(one,
-                                                                           A,
-                                                                           x);
-    });
-
-  typename RowVectorViewType::HostMirror sol_host =
-    Kokkos::create_mirror_view(x);
-  Kokkos::deep_copy(sol_host, x);
-
-  for (int i = 0; i < kx_host.size(); ++i) {
-    REQUIRE(sol_host.size() == 16);
-    CAPTURE(sol_host(i), kx_host(i), 1e-15);
-    CHECK_THAT(sol_host(i), Catch::Matchers::WithinAbs(kx_host[i], 1e-15));
-  }
-}

From af76eb320fb42a2357368072e7f28db816cceea5 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 15:03:52 -0400
Subject: [PATCH 51/73] remove using namespace

---
 test/test_spr_meshfields.cpp | 51 ++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 25 deletions(-)

diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp
index 5b81bd7f..946dfd1c 100644
--- a/test/test_spr_meshfields.cpp
+++ b/test/test_spr_meshfields.cpp
@@ -12,16 +12,13 @@
 #include <vector>
 #include <iostream>
 
-using namespace std;
-using namespace Omega_h;
-
 namespace
 {
 
 void print_patches(Omega_h::Graph& patches)
 {
-  auto offsets = HostRead(patches.a2ab);
-  auto values = HostRead(patches.ab2b);
+  auto offsets = Omega_h::HostRead(patches.a2ab);
+  auto values = Omega_h::HostRead(patches.ab2b);
   std::cout << "num patches " << patches.nnodes() << "\n";
   for (int patch = 0; patch < patches.nnodes(); patch++) {
     std::cout << "patch " << patch << " patchElms ";
@@ -59,32 +56,34 @@ double func(pcms::Coord& p, int degree)
   return -1;
 }
 
-void test(Mesh& mesh, Omega_h::Graph& patches, int degree, Reals source_values,
-          Reals exact_target_values, Reals source_coordinates,
-          Reals target_coordinates)
+void test(Mesh& mesh, Omega_h::Graph& patches, int degree,
+          Omega_h::Reals source_values, Omega_h::Reals exact_target_values,
+          Omega_h::Reals source_coordinates, Omega_h::Reals target_coordinates)
 {
 
   int dim = mesh.dim();
-  Real tolerance = 0.0005;
+  Omega_h::Realtolerance = 0.0005;
 
-  Omega_h::Write<Real> ignored(patches.a2ab.size() - 1, 1);
+  Omega_h::Write<Omega_h::Real> ignored(patches.a2ab.size() - 1, 1);
   SupportResults support{patches.a2ab, patches.ab2b, ignored};
 
   auto approx_target_values = pcms::mls_interpolation(
     source_values, source_coordinates, target_coordinates, support, dim, degree,
     pcms::RadialBasisFunction::NO_OP, 1e-5);
 
-  const auto delta_abs = Omega_h::fabs_each(
-    Omega_h::subtract_each(exact_target_values, read(approx_target_values)));
+  const auto delta_abs = Omega_h::fabs_each(Omega_h::subtract_each(
+    exact_target_values, Omega_h::read(approx_target_values)));
   const auto max_delta_abs = Omega_h::get_max(delta_abs);
   std::cout << "max_delta_abs " << max_delta_abs << "\n";
 
-  auto host_approx_target_values = HostRead<Real>(approx_target_values);
+  auto host_approx_target_values =
+    Omega_h::HostRead<Omega_h::Real>(approx_target_values);
   mesh.add_tag(OMEGA_H_VERT, "approx_target_values", 1,
-               read(approx_target_values));
+               Omega_h::read(approx_target_values));
   Omega_h::vtk::write_parallel("box.vtk", &mesh);
 
-  auto host_exact_target_values = HostRead<Real>(exact_target_values);
+  auto host_exact_target_values =
+    Omega_h::HostRead<Omega_h::Real>(exact_target_values);
 
   int m = exact_target_values.size();
   int n = approx_target_values.size();
@@ -116,7 +115,7 @@ TEST_CASE("meshfields_spr_test")
   Omega_h::Mesh mesh(&lib);
   const auto thwaitesMeshFile =
     "/lore/smithc11/projects/landice/thwaites_basal/thwaites_basalClass.osh";
-  Omega_h::binary::read(thwaitesMeshFile, lib.world(), &mesh);
+  Omega_h::binary::Omega_h::read(thwaitesMeshFile, lib.world(), &mesh);
   std::cout << "mesh: elms " << mesh.nelems() << " verts " << mesh.nverts()
             << "\n";
 
@@ -128,7 +127,7 @@ TEST_CASE("meshfields_spr_test")
 
   const auto& ntargets = mesh.nverts();
 
-  Write<Real> source_coordinates(
+  Write<Omega_h::Real> source_coordinates(
     dim * nfaces, 0, "stores coordinates of cell centroid of each tri element");
 
   const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b;
@@ -182,16 +181,17 @@ TEST_CASE("meshfields_spr_test")
         auto patches = mesh.get_vtx_patches(minPatchSize);
         // print_patches(patches);
 
-        Write<Real> source_values(nfaces, 0, "exact target values");
+        Write<Omega_h::Real> source_values(nfaces, 0, "exact target values");
 
         Kokkos::parallel_for(
           nfaces, KOKKOS_LAMBDA(int i) {
             source_values[i] = func(source_points.coordinates(i), func_degree);
           });
-        mesh.add_tag(mesh.dim(), "source_values", 1, read(source_values));
+        mesh.add_tag(mesh.dim(), "source_values", 1,
+                     Omega_h::read(source_values));
 
-        Write<Real> exact_target_values(mesh.nverts(), 0,
-                                        "exact target values");
+        Write<Omega_h::Real> exact_target_values(mesh.nverts(), 0,
+                                                 "exact target values");
 
         Kokkos::parallel_for(
           mesh.nverts(), KOKKOS_LAMBDA(int i) {
@@ -199,11 +199,12 @@ TEST_CASE("meshfields_spr_test")
               func(target_points.coordinates(i), func_degree);
           });
         mesh.add_tag(OMEGA_H_VERT, "target_values", 1,
-                     read(exact_target_values));
+                     Omega_h::read(exact_target_values));
         std::cerr << "I am here above test\n";
-        test(mesh, patches, interp_degree, Reals(source_values),
-             Reals(exact_target_values), Reals(source_coordinates),
-             Reals(target_coordinates));
+        test(mesh, patches, interp_degree, Omega_h::Reals(source_values),
+             Omega_h::Reals(exact_target_values),
+             Omega_h::Reals(source_coordinates),
+             Omega_h::Reals(target_coordinates));
         std::cerr << "done " << interp_degree << ", " << func_degree << " \n";
       } // end SECTION
     }

From 64b150098742c8ecd4b9c67ec5f74c338bab16bf Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 15:06:16 -0400
Subject: [PATCH 52/73] remove print statement

---
 test/test_spr_meshfields.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp
index 946dfd1c..7c27af5c 100644
--- a/test/test_spr_meshfields.cpp
+++ b/test/test_spr_meshfields.cpp
@@ -179,7 +179,6 @@ TEST_CASE("meshfields_spr_test")
           minPatchSize = 12; // why so large?
         std::cerr << "minPatchSize " << minPatchSize << "\n";
         auto patches = mesh.get_vtx_patches(minPatchSize);
-        // print_patches(patches);
 
         Write<Omega_h::Real> source_values(nfaces, 0, "exact target values");
 

From a34d804f8be0ea23baaf3b13311764ab0a407ff3 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 15:07:02 -0400
Subject: [PATCH 53/73] fix indentation

---
 src/pcms/interpolator/CMakeLists.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt
index 7d1d0178..5e3a5589 100644
--- a/src/pcms/interpolator/CMakeLists.txt
+++ b/src/pcms/interpolator/CMakeLists.txt
@@ -8,8 +8,8 @@ set(PCMS_FIELD_TRANSFER_HEADERS
     linear_interpolant.hpp
     multidimarray.hpp
     mls_interpolation.hpp
-	pcms_interpolator_view_utils.hpp
-	pcms_interpolator_logger.hpp
+    pcms_interpolator_view_utils.hpp
+    pcms_interpolator_logger.hpp
 )
 
 set(PCMS_FIELD_TRANSFER_SOURCES

From 2a7077fe0f8797c1ce1e74518eac91262b3d7ab7 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 16:13:33 -0400
Subject: [PATCH 54/73] remove using namespace

---
 test/test_linear_solver.cpp | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp
index 2f8664ec..97b43692 100644
--- a/test/test_linear_solver.cpp
+++ b/test/test_linear_solver.cpp
@@ -7,10 +7,7 @@
 #include <Omega_h_fail.hpp>
 #include <catch2/matchers/catch_matchers_floating_point.hpp>
 
-using namespace pcms;
-using namespace pcms::detail;
-
-TEST_CASE("solver test")
+TEST_CASE("LU solver test")
 {
   // This test computes P^TQP and P^TQ b for the normal equation P^TQP x = P^TQ
   // b where P(n,m) (n >= m) is a rectangular matrix, Q(m,m) is a diagonal
@@ -82,15 +79,15 @@ TEST_CASE("solver test")
 
     Kokkos::deep_copy(b, host_b);
 
-    team_policy tp(nvertices_target, Kokkos::AUTO);
+    pcms::team_policy tp(nvertices_target, Kokkos::AUTO);
     Kokkos::parallel_for(
       "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)),
-      KOKKOS_LAMBDA(const member_type& team) {
+      KOKKOS_LAMBDA(const pcms::member_type& team) {
         int i = team.league_rank();
-        ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports,
-                                          size);
-        ScratchVecView phi(team.team_scratch(0), nsupports);
-        ScratchVecView support_values(team.team_scratch(0), nsupports);
+        pcms::ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports,
+                                                size);
+        pcms::ScratchVecView phi(team.team_scratch(0), nsupports);
+        pcms::ScratchVecView support_values(team.team_scratch(0), nsupports);
 
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports),
                              [=](int j) {
@@ -103,8 +100,8 @@ TEST_CASE("solver test")
 
         team.team_barrier();
 
-        auto result = convert_normal_equation(vandermonde_matrix, phi,
-                                              support_values, team);
+        auto result = pcms::detail::convert_normal_equation(
+          vandermonde_matrix, phi, support_values, team);
 
         team.team_barrier();
 
@@ -126,7 +123,8 @@ TEST_CASE("solver test")
 
         team.team_barrier();
 
-        solve_matrix(result.square_matrix, result.transformed_rhs, team);
+        pcms::detail::solve_matrix(result.square_matrix, result.transformed_rhs,
+                                   team);
 
         team.team_barrier();
 

From 8bfb6a682adcaf98b2d8419d8a251b66dc5d043f Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 28 Mar 2025 16:14:22 -0400
Subject: [PATCH 55/73] remove using namespace

---
 test/test_mls_basis.cpp      |  51 +++++++-------
 test/test_normalisation.cpp  |  14 ++--
 test/test_rbf_interp.cpp     | 130 ++++++++++++++++++++---------------
 test/test_spr_meshfields.cpp |   5 +-
 4 files changed, 109 insertions(+), 91 deletions(-)

diff --git a/test/test_mls_basis.cpp b/test/test_mls_basis.cpp
index d50ed4f8..d35092b0 100644
--- a/test/test_mls_basis.cpp
+++ b/test/test_mls_basis.cpp
@@ -5,8 +5,6 @@
 #include <Omega_h_mesh.hpp>
 #include <Omega_h_build.hpp>
 
-using namespace pcms;
-
 TEST_CASE("basis test")
 {
   SECTION("check basis slice lengths, size of the basis vector and basis "
@@ -17,8 +15,8 @@ TEST_CASE("basis test")
     const int degree = 3;
     Kokkos::View<int**, Kokkos::HostSpace> array("array", degree, dim);
     Kokkos::deep_copy(array, 0);
-    detail::calculate_basis_slice_lengths(array);
-    auto size = detail::calculate_basis_vector_size(array);
+    pcms::detail::calculate_basis_slice_lengths(array);
+    auto size = pcms::detail::calculate_basis_vector_size(array);
 
     int expected[degree][dim] = {{1, 1, 1}, {1, 2, 3}, {1, 3, 6}};
 
@@ -30,7 +28,7 @@ TEST_CASE("basis test")
 
     REQUIRE(size == 20);
 
-    IntDeviceMatView d_array("array in device", degree, dim);
+    pcms::IntDeviceMatView d_array("array in device", degree, dim);
     auto array_hd = Kokkos::create_mirror_view(d_array);
     Kokkos::deep_copy(array_hd, array);
     Kokkos::deep_copy(d_array, array_hd);
@@ -41,12 +39,12 @@ TEST_CASE("basis test")
 
     auto host_results = Kokkos::create_mirror_view(results);
 
-    team_policy tp(nvertices_target, Kokkos::AUTO);
+    pcms::team_policy tp(nvertices_target, Kokkos::AUTO);
     Kokkos::parallel_for(
       "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)),
-      KOKKOS_LAMBDA(const member_type& team) {
+      KOKKOS_LAMBDA(const pcms::member_type& team) {
         int i = team.league_rank();
-        ScratchVecView basis_vector(team.team_scratch(0), size);
+        pcms::ScratchVecView basis_vector(team.team_scratch(0), size);
         pcms::detail::fill(0, team, basis_vector);
 
         double target_point[MAX_DIM] = {};
@@ -55,7 +53,7 @@ TEST_CASE("basis test")
           target_point[j] = coords[i][j];
         }
 
-        detail::eval_basis_vector(d_array, target_point, basis_vector);
+        pcms::detail::eval_basis_vector(d_array, target_point, basis_vector);
 
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size),
                              [=](int j) { results(i, j) = basis_vector(j); });
@@ -109,9 +107,9 @@ TEST_CASE("basis test")
     const int degree = 1;
     Kokkos::View<int**, Kokkos::HostSpace> array("array", degree, dim);
     Kokkos::deep_copy(array, 0);
-    detail::calculate_basis_slice_lengths(array);
+    pcms::detail::calculate_basis_slice_lengths(array);
 
-    auto size = detail::calculate_basis_vector_size(array);
+    auto size = pcms::detail::calculate_basis_vector_size(array);
 
     int expected[degree][dim] = {
       {1, 1, 1},
@@ -125,7 +123,7 @@ TEST_CASE("basis test")
 
     REQUIRE(size == 4);
 
-    IntDeviceMatView d_array("array in device", degree, dim);
+    pcms::IntDeviceMatView d_array("array in device", degree, dim);
     auto array_hd = Kokkos::create_mirror_view(d_array);
     Kokkos::deep_copy(array_hd, array);
     Kokkos::deep_copy(d_array, array_hd);
@@ -136,12 +134,12 @@ TEST_CASE("basis test")
 
     auto host_results = Kokkos::create_mirror_view(results);
 
-    team_policy tp(nvertices_target, Kokkos::AUTO);
+    pcms::team_policy tp(nvertices_target, Kokkos::AUTO);
     Kokkos::parallel_for(
       "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)),
-      KOKKOS_LAMBDA(const member_type& team) {
+      KOKKOS_LAMBDA(const pcms::member_type& team) {
         int i = team.league_rank();
-        ScratchVecView basis_vector(team.team_scratch(0), size);
+        pcms::ScratchVecView basis_vector(team.team_scratch(0), size);
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size),
                              [=](int j) { basis_vector(j) = 0; });
 
@@ -151,7 +149,7 @@ TEST_CASE("basis test")
           target_point[j] = coords[i][j];
         }
 
-        detail::eval_basis_vector(d_array, target_point, basis_vector);
+        pcms::detail::eval_basis_vector(d_array, target_point, basis_vector);
 
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size),
                              [=](int j) { results(i, j) = basis_vector(j); });
@@ -187,13 +185,13 @@ TEST_CASE("basis test")
     Kokkos::View<int**, Kokkos::HostSpace> host_slice_length("array", degree,
                                                              dim);
     Kokkos::deep_copy(host_slice_length, 0);
-    detail::calculate_basis_slice_lengths(host_slice_length);
+    pcms::detail::calculate_basis_slice_lengths(host_slice_length);
 
-    IntDeviceMatView slice_length("slice array in device", degree, dim);
+    pcms::IntDeviceMatView slice_length("slice array in device", degree, dim);
     auto slice_length_hd = Kokkos::create_mirror_view(slice_length);
     Kokkos::deep_copy(slice_length_hd, host_slice_length);
     Kokkos::deep_copy(slice_length, slice_length_hd);
-    auto size = detail::calculate_basis_vector_size(host_slice_length);
+    auto size = pcms::detail::calculate_basis_vector_size(host_slice_length);
     int nvertices_target = 2;
     int nsupports = 5;
     Kokkos::View<double**> points("vandermonde matrix", nvertices_target,
@@ -220,15 +218,16 @@ TEST_CASE("basis test")
 
     Kokkos::deep_copy(points, host_points);
 
-    team_policy tp(nvertices_target, Kokkos::AUTO);
+    pcms::team_policy tp(nvertices_target, Kokkos::AUTO);
     Kokkos::parallel_for(
       "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)),
-      KOKKOS_LAMBDA(const member_type& team) {
+      KOKKOS_LAMBDA(const pcms::member_type& team) {
         int i = team.league_rank();
-        ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports,
-                                          size);
+        pcms::ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports,
+                                                size);
 
-        ScratchMatView local_source_points(team.team_scratch(0), nsupports, 1);
+        pcms::ScratchMatView local_source_points(team.team_scratch(0),
+                                                 nsupports, 1);
         for (int j = 0; j < nsupports; ++j) {
           local_source_points(j, 0) = points(i, j);
         }
@@ -244,8 +243,8 @@ TEST_CASE("basis test")
 
         Kokkos::parallel_for(
           Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
-            detail::create_vandermonde_matrix(local_source_points, j,
-                                              slice_length, vandermonde_matrix);
+            pcms::detail::create_vandermonde_matrix(
+              local_source_points, j, slice_length, vandermonde_matrix);
           });
 
         Kokkos::parallel_for(
diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp
index c2612e21..4c6c38d9 100644
--- a/test/test_normalisation.cpp
+++ b/test/test_normalisation.cpp
@@ -11,7 +11,7 @@
 #include <iostream>
 #include <Omega_h_array_ops.hpp>
 
-Omega_h::Omega_h::Reals fillFromMatrix(const Omega_h::Matrix<2, 10>& matrix)
+Omega_h::Reals fillFromMatrix(const Omega_h::Matrix<2, 10>& matrix)
 {
   int num_points = 10;
   Omega_h::Write<Omega_h::Real> array(20, 0, "array to store coordinates");
@@ -67,16 +67,16 @@ TEST_CASE("test_normalization_routine")
     Omega_h::Write<Omega_h::Real> all_target_coordinates(
       total_coordinates, 0, "target coordinates all");
 
-    team_policy tp(nvertices_target, Kokkos::AUTO);
+    pcms::team_policy tp(nvertices_target, Kokkos::AUTO);
     Kokkos::parallel_for(
       "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)),
-      KOKKOS_LAMBDA(const member_type& team) {
+      KOKKOS_LAMBDA(const pcms::member_type& team) {
         int league_rank = team.league_rank();
 
         int num_supports = nsupports[league_rank];
         pcms::ScratchMatView local_supports(team.team_scratch(0), num_supports,
                                             dim);
-        detail::fill(0, team, local_supports);
+        pcms::detail::fill(0, team, local_supports);
 
         double target_point[MAX_DIM] = {};
 
@@ -85,7 +85,7 @@ TEST_CASE("test_normalization_routine")
         }
 
         Kokkos::parallel_for(
-          Kokkos::TeamThOmega_h::readRange(team, num_supports), [=](int i) {
+          Kokkos::TeamThreadRange(team, num_supports), [=](int i) {
             for (int j = 0; j < dim; ++j) {
               int index = league_rank * num_supports * dim + i * dim + j;
 
@@ -105,10 +105,10 @@ TEST_CASE("test_normalization_routine")
           });
 
         team.team_barrier();
-        detail::normalize_supports(team, target_point, local_supports);
+        pcms::detail::normalize_supports(team, target_point, local_supports);
 
         Kokkos::parallel_for(
-          Kokkos::TeamThOmega_h::readRange(team, num_supports), [=](int i) {
+          Kokkos::TeamThreadRange(team, num_supports), [=](int i) {
             for (int j = 0; j < dim; ++j) {
               int index = league_rank * num_supports * dim + i * dim + j;
               normalized_support_coordinates[index] = local_supports(i, j);
diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp
index 85d9bef9..cdafea62 100644
--- a/test/test_rbf_interp.cpp
+++ b/test/test_rbf_interp.cpp
@@ -11,12 +11,8 @@
 #include <vector>
 #include <iostream>
 
-using namespace std;
-using namespace Omega_h;
-using namespace pcms;
-
 KOKKOS_INLINE_FUNCTION
-double func(Coord& p, int degree)
+double func(pcms::Coord& p, int degree)
 {
   [[maybe_unused]] auto x = p.x;
   [[maybe_unused]] auto y = p.y;
@@ -35,17 +31,18 @@ double func(Coord& p, int degree)
   return -1;
 }
 
-void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports,
-          Reals source_values, Reals exact_target_values,
-          Reals source_coordinates, Reals target_coordinates)
+void test(Mesh& mesh, Omega_h::Real cutoffDistance, int degree,
+          LO min_num_supports, Omega_h::Reals source_values,
+          Omega_h::Reals exact_target_values, Omega_h::Reals source_coordinates,
+          Omega_h::Reals target_coordinates)
 {
 
   int dim = mesh.dim();
-  Real tolerance = 5e-4;
+  Omega_h::Real tolerance = 5e-4;
 
-  std::vector<RadialBasisFunction> rbf_types = {
-    RadialBasisFunction::RBF_GAUSSIAN, RadialBasisFunction::RBF_C4,
-    RadialBasisFunction::RBF_CONST
+  std::vector<pcms::RadialBasisFunction> rbf_types = {
+    pcms::RadialBasisFunction::RBF_GAUSSIAN, pcms::RadialBasisFunction::RBF_C4,
+    pcms::RadialBasisFunction::RBF_CONST
 
   };
 
@@ -57,9 +54,11 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports,
       mls_interpolation(source_values, source_coordinates, target_coordinates,
                         support, dim, degree, rbf);
 
-    auto host_approx_target_values = HostRead<Real>(approx_target_values);
+    auto host_approx_target_values =
+      Omega_h::HostRead<Omega_h::Real>(approx_target_values);
 
-    auto host_exact_target_values = HostRead<Real>(exact_target_values);
+    auto host_exact_target_values =
+      Omega_h::HostRead<Omega_h::Real>(exact_target_values);
 
     int m = exact_target_values.size();
     int n = approx_target_values.size();
@@ -83,7 +82,7 @@ TEST_CASE("test_mls_interpolation")
   auto rank = lib.world()->rank();
   auto mesh = build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false);
 
-  Real cutoffDistance = 0.3;
+  Omega_h::Real cutoffDistance = 0.3;
   cutoffDistance = cutoffDistance * cutoffDistance;
 
   const auto dim = mesh.dim();
@@ -94,7 +93,7 @@ TEST_CASE("test_mls_interpolation")
 
   const auto& ntargets = mesh.nverts();
 
-  Write<Real> source_coordinates(
+  Omega_h::Write<Omega_h::Real> source_coordinates(
     dim * nfaces, 0, "stores coordinates of cell centroid of each tri element");
 
   const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b;
@@ -111,19 +110,19 @@ TEST_CASE("test_mls_interpolation")
       source_coordinates[index + 1] = centroid[1];
     });
 
-  Points source_points;
+  pcms::Points source_points;
   source_points.coordinates =
-    PointsViewType("Number of local source supports", nfaces);
+    pcms::PointsViewType("Number of local source supports", nfaces);
   Kokkos::parallel_for(
     "target points", nfaces, KOKKOS_LAMBDA(int j) {
       source_points.coordinates(j).x = source_coordinates[j * dim];
       source_points.coordinates(j).y = source_coordinates[j * dim + 1];
     });
 
-  Points target_points;
+  pcms::Points target_points;
 
   target_points.coordinates =
-    PointsViewType("Number of local source supports", mesh.nverts());
+    pcms::PointsViewType("Number of local source supports", mesh.nverts());
   Kokkos::parallel_for(
     "target points", mesh.nverts(), KOKKOS_LAMBDA(int j) {
       target_points.coordinates(j).x = target_coordinates[j * dim];
@@ -136,23 +135,26 @@ TEST_CASE("test_mls_interpolation")
     int degree = 1;
     LO min_num_supports = 10;
 
-    Write<Real> source_values(nfaces, 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
+                                                "exact target values");
 
     Kokkos::parallel_for(
       nfaces, KOKKOS_LAMBDA(int i) {
         source_values[i] = func(source_points.coordinates(i), degree - 1);
       });
 
-    Write<Real> exact_target_values(mesh.nverts(), 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> exact_target_values(mesh.nverts(), 0,
+                                                      "exact target values");
 
     Kokkos::parallel_for(
       mesh.nverts(), KOKKOS_LAMBDA(int i) {
         exact_target_values[i] = func(target_points.coordinates(i), degree - 1);
       });
 
-    test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
-         Reals(exact_target_values), Reals(source_coordinates),
-         Reals(target_coordinates));
+    test(mesh, cutoffDistance, degree, min_num_supports,
+         Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values),
+         Omega_h::Reals(source_coordinates),
+         Omega_h::Reals(target_coordinates));
 
     std::cout << " The test for d=0, p=1, passed " << "\n";
     std::cout << "----------finishing  test: d0p1-------" << "\n";
@@ -165,23 +167,26 @@ TEST_CASE("test_mls_interpolation")
     int degree = 1;
     LO min_num_supports = 10;
 
-    Write<Real> source_values(nfaces, 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
+                                                "exact target values");
 
     Kokkos::parallel_for(
       nfaces, KOKKOS_LAMBDA(int i) {
         source_values[i] = func(source_points.coordinates(i), degree);
       });
 
-    Write<Real> exact_target_values(mesh.nverts(), 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> exact_target_values(mesh.nverts(), 0,
+                                                      "exact target values");
 
     Kokkos::parallel_for(
       mesh.nverts(), KOKKOS_LAMBDA(int i) {
         exact_target_values[i] = func(target_points.coordinates(i), degree);
       });
 
-    test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
-         Reals(exact_target_values), Reals(source_coordinates),
-         Reals(target_coordinates));
+    test(mesh, cutoffDistance, degree, min_num_supports,
+         Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values),
+         Omega_h::Reals(source_coordinates),
+         Omega_h::Reals(target_coordinates));
 
     std::cout << " The test for d=1, p=1, passed " << "\n";
     std::cout << "------------finishing test: d1p1--------" << "\n";
@@ -194,23 +199,26 @@ TEST_CASE("test_mls_interpolation")
     int degree = 2;
     LO min_num_supports = 16;
 
-    Write<Real> source_values(nfaces, 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
+                                                "exact target values");
 
     Kokkos::parallel_for(
       nfaces, KOKKOS_LAMBDA(int i) {
         source_values[i] = func(source_points.coordinates(i), degree - 2);
       });
 
-    Write<Real> exact_target_values(mesh.nverts(), 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> exact_target_values(mesh.nverts(), 0,
+                                                      "exact target values");
 
     Kokkos::parallel_for(
       mesh.nverts(), KOKKOS_LAMBDA(int i) {
         exact_target_values[i] = func(target_points.coordinates(i), degree - 2);
       });
 
-    test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
-         Reals(exact_target_values), Reals(source_coordinates),
-         Reals(target_coordinates));
+    test(mesh, cutoffDistance, degree, min_num_supports,
+         Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values),
+         Omega_h::Reals(source_coordinates),
+         Omega_h::Reals(target_coordinates));
 
     std::cout << " The test for d=0, p=2, passed " << "\n";
     std::cout << "-------------finishing test: d0p2------------" << "\n";
@@ -223,23 +231,26 @@ TEST_CASE("test_mls_interpolation")
     int degree = 2;
     LO min_num_supports = 16;
 
-    Write<Real> source_values(nfaces, 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
+                                                "exact target values");
 
     Kokkos::parallel_for(
       nfaces, KOKKOS_LAMBDA(int i) {
         source_values[i] = func(source_points.coordinates(i), degree - 1);
       });
 
-    Write<Real> exact_target_values(mesh.nverts(), 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> exact_target_values(mesh.nverts(), 0,
+                                                      "exact target values");
 
     Kokkos::parallel_for(
       mesh.nverts(), KOKKOS_LAMBDA(int i) {
         exact_target_values[i] = func(target_points.coordinates(i), degree - 1);
       });
 
-    test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
-         Reals(exact_target_values), Reals(source_coordinates),
-         Reals(target_coordinates));
+    test(mesh, cutoffDistance, degree, min_num_supports,
+         Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values),
+         Omega_h::Reals(source_coordinates),
+         Omega_h::Reals(target_coordinates));
 
     std::cout << " The test for d=1, p=2, passed " << "\n";
     std::cout << "----------------finishing test: d1p2--------------" << "\n";
@@ -252,23 +263,26 @@ TEST_CASE("test_mls_interpolation")
     int degree = 2;
     LO min_num_supports = 16;
 
-    Write<Real> source_values(nfaces, 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
+                                                "exact target values");
 
     Kokkos::parallel_for(
       nfaces, KOKKOS_LAMBDA(int i) {
         source_values[i] = func(source_points.coordinates(i), degree);
       });
 
-    Write<Real> exact_target_values(mesh.nverts(), 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> exact_target_values(mesh.nverts(), 0,
+                                                      "exact target values");
 
     Kokkos::parallel_for(
       mesh.nverts(), KOKKOS_LAMBDA(int i) {
         exact_target_values[i] = func(target_points.coordinates(i), degree);
       });
 
-    test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
-         Reals(exact_target_values), Reals(source_coordinates),
-         Reals(target_coordinates));
+    test(mesh, cutoffDistance, degree, min_num_supports,
+         Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values),
+         Omega_h::Reals(source_coordinates),
+         Omega_h::Reals(target_coordinates));
     std::cout << " The test for d=2, p=2, passed " << "\n";
     std::cout << "-------------finishing test: d2p2--------------------"
               << "\n";
@@ -281,23 +295,26 @@ TEST_CASE("test_mls_interpolation")
     int degree = 3;
     LO min_num_supports = 20;
 
-    Write<Real> source_values(nfaces, 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
+                                                "exact target values");
 
     Kokkos::parallel_for(
       nfaces, KOKKOS_LAMBDA(int i) {
         source_values[i] = func(source_points.coordinates(i), degree - 1);
       });
 
-    Write<Real> exact_target_values(mesh.nverts(), 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> exact_target_values(mesh.nverts(), 0,
+                                                      "exact target values");
 
     Kokkos::parallel_for(
       mesh.nverts(), KOKKOS_LAMBDA(int i) {
         exact_target_values[i] = func(target_points.coordinates(i), degree - 1);
       });
 
-    test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
-         Reals(exact_target_values), Reals(source_coordinates),
-         Reals(target_coordinates));
+    test(mesh, cutoffDistance, degree, min_num_supports,
+         Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values),
+         Omega_h::Reals(source_coordinates),
+         Omega_h::Reals(target_coordinates));
     std::cout << " The test for d=2, p=3, passed " << "\n";
     std::cout << "-------------finishing test: d2p3--------------------"
               << "\n";
@@ -310,23 +327,26 @@ TEST_CASE("test_mls_interpolation")
     int degree = 3;
     LO min_num_supports = 20;
 
-    Write<Real> source_values(nfaces, 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
+                                                "exact target values");
 
     Kokkos::parallel_for(
       nfaces, KOKKOS_LAMBDA(int i) {
         source_values[i] = func(source_points.coordinates(i), degree);
       });
 
-    Write<Real> exact_target_values(mesh.nverts(), 0, "exact target values");
+    Omega_h::Write<Omega_h::Real> exact_target_values(mesh.nverts(), 0,
+                                                      "exact target values");
 
     Kokkos::parallel_for(
       mesh.nverts(), KOKKOS_LAMBDA(int i) {
         exact_target_values[i] = func(target_points.coordinates(i), degree);
       });
 
-    test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values),
-         Reals(exact_target_values), Reals(source_coordinates),
-         Reals(target_coordinates));
+    test(mesh, cutoffDistance, degree, min_num_supports,
+         Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values),
+         Omega_h::Reals(source_coordinates),
+         Omega_h::Reals(target_coordinates));
 
     std::cout << " The test for d=3, p=3, passed " << "\n";
     std::cout << "-------------finishing test: d3p3--------------------"
diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp
index 7c27af5c..3d58e21f 100644
--- a/test/test_spr_meshfields.cpp
+++ b/test/test_spr_meshfields.cpp
@@ -62,7 +62,7 @@ void test(Mesh& mesh, Omega_h::Graph& patches, int degree,
 {
 
   int dim = mesh.dim();
-  Omega_h::Realtolerance = 0.0005;
+  Omega_h::Real tolerance = 0.0005;
 
   Omega_h::Write<Omega_h::Real> ignored(patches.a2ab.size() - 1, 1);
   SupportResults support{patches.a2ab, patches.ab2b, ignored};
@@ -115,7 +115,7 @@ TEST_CASE("meshfields_spr_test")
   Omega_h::Mesh mesh(&lib);
   const auto thwaitesMeshFile =
     "/lore/smithc11/projects/landice/thwaites_basal/thwaites_basalClass.osh";
-  Omega_h::binary::Omega_h::read(thwaitesMeshFile, lib.world(), &mesh);
+  Omega_h::binary::read(thwaitesMeshFile, lib.world(), &mesh);
   std::cout << "mesh: elms " << mesh.nelems() << " verts " << mesh.nverts()
             << "\n";
 
@@ -199,7 +199,6 @@ TEST_CASE("meshfields_spr_test")
           });
         mesh.add_tag(OMEGA_H_VERT, "target_values", 1,
                      Omega_h::read(exact_target_values));
-        std::cerr << "I am here above test\n";
         test(mesh, patches, interp_degree, Omega_h::Reals(source_values),
              Omega_h::Reals(exact_target_values),
              Omega_h::Reals(source_coordinates),

From 0d7f5a9abf9953ef0b492a658eb8e3183e842384 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 7 Apr 2025 04:44:58 -0400
Subject: [PATCH 56/73] works for gtc-gnet coupling

---
 .../interpolator/mls_interpolation_impl.hpp   |  17 +-
 .../interpolator/pcms_interpolator_logger.hpp |  26 ++-
 .../pcms_interpolator_view_utils.hpp          |   1 +
 test/test_svd_serial.cpp                      | 171 ++++++++++++++----
 4 files changed, 177 insertions(+), 38 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index d451ca5d..8effb6ea 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -9,7 +9,6 @@
 #include <KokkosBatched_LU_Decl.hpp>
 #include <KokkosBatched_LU_Team_Impl.hpp>
 #include <KokkosBatched_SolveLU_Decl.hpp>
-#include <KokkosBatched_Gemm_Decl.hpp>
 #include <KokkosBlas.hpp>
 #include <KokkosBlas1_team_dot.hpp>
 #include <Kokkos_StdAlgorithms.hpp>
@@ -467,7 +466,6 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
     KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U,
                                      sigma, Vt, work, 1e-6);
 
-    // calculate sigma(i) / (sigma(i)^2 + lambda)
     calculate_shrinkage_factor(team, lambda, sigma);
 
     auto Ut = find_transpose(team, U);
@@ -599,6 +597,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       fill(0.0, team, target_basis_vector);
       fill(0.0, team, solution_coefficients);
 
+      Logger logger(10);
       // storing the coords of local supports
       int count = -1;
       for (int j = start_ptr; j < end_ptr; ++j) {
@@ -610,12 +609,17 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         }
       }
 
+      logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
+                       "Support Coordinates");
       double target_point[MAX_DIM] = {};
 
       for (int i = 0; i < dim; ++i) {
         target_point[i] = target_coordinates[league_rank * dim + i];
       }
 
+      logger.logArray(team, LogLevel::DEBUG, target_point, dim,
+                      "Target points");
+
       /** phi(nsupports) is the array of rbf functions evaluated at the
        * source supports In the actual implementation, Phi(nsupports,
        * nsupports) is the diagonal matrix & each diagonal element is the phi
@@ -648,6 +652,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       team.team_barrier();
 
+      logger.log(team, LogLevel::DEBUG, "The search  starts");
+      logger.logVector(team, LogLevel::DEBUG, support_values, "Support values");
+
       /**
        *
        * the local_source_points is of the type ScratchMatView with
@@ -681,6 +688,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       team.team_barrier();
 
+      logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix,
+                       "vandermonde matrix");
+
       OMEGA_H_CHECK_PRINTF(
         support.radii2[league_rank] > 0,
         "ERROR: radius2 has to be positive but found to be %.16f\n",
@@ -692,7 +702,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       double target_value = KokkosBlas::Experimental::dot(
         team, solution_coefficients, target_basis_vector);
-
+      logger.logScalar(team, LogLevel::DEBUG, target_value,
+                       "interpolated value");
       if (team.team_rank() == 0) {
         OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",
                              league_rank);
diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp
index d919ad58..e825f72a 100644
--- a/src/pcms/interpolator/pcms_interpolator_logger.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp
@@ -47,13 +47,31 @@ class Logger
 
     if (team.league_rank() == selected_league_rank_) {
       Kokkos::single(Kokkos::PerTeam(team), [&]() {
-        printf("[%s] (League %d) %s: ", logLevelToString(level),
+        printf("[%s] (League %d) %s: \n", logLevelToString(level),
                team.league_rank(), name);
         printf("(%12.6f, %12.6f)", p.x, p.y);
         printf("\n");
       });
     }
   }
+
+  KOKKOS_INLINE_FUNCTION
+  void logArray(const member_type& team, const LogLevel level,
+                const double* array, const int size, const char* name)
+  {
+
+    if (team.league_rank() == selected_league_rank_) {
+      Kokkos::single(Kokkos::PerTeam(team), [&]() {
+        printf("[%s] (League %d) %s: \n", logLevelToString(level),
+               team.league_rank(), name);
+
+        for (int i = 0; i < size; ++i) {
+          printf("%12.6f\n", array[i]);
+        }
+        printf("\n");
+      });
+    }
+  }
   // log scratch vector
   KOKKOS_INLINE_FUNCTION
   void logVector(const member_type& team, const LogLevel level,
@@ -61,7 +79,7 @@ class Logger
   {
     if (team.league_rank() == selected_league_rank_) {
       Kokkos::single(Kokkos::PerTeam(team), [&]() {
-        printf("[%s] (League %d) %s: ", logLevelToString(level),
+        printf("[%s] (League %d) %s: \n", logLevelToString(level),
                team.league_rank(), name);
         for (int i = 0; i < vector.size(); ++i) {
           printf("%12.6f\n", vector(i));
@@ -82,7 +100,7 @@ class Logger
                team.league_rank(), name);
         for (int i = 0; i < matrix.extent(0); ++i) {
           for (int j = 0; j < matrix.extent(1); ++j) {
-            printf("%12.6f", matrix(i, j));
+            printf("%20.8f", matrix(i, j));
           }
           printf("\n");
         }
@@ -99,7 +117,7 @@ class Logger
   {
     if (team.league_rank() == selected_league_rank_) {
       Kokkos::single(Kokkos::PerTeam(team), [&]() {
-        printf("[%s] (League %d) %s: ", logLevelToString(level),
+        printf("[%s] (League %d) %s: \n", logLevelToString(level),
                team.league_rank(), name);
         printf("%12.6f", value);
         printf("\n");
diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
index 8edd50f4..5e1f2b88 100644
--- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
@@ -175,6 +175,7 @@ void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries,
     }
   });
 }
+
 } // namespace detail
 } // namespace pcms
 #endif
diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp
index 8d02de0e..e7feb253 100644
--- a/test/test_svd_serial.cpp
+++ b/test/test_svd_serial.cpp
@@ -22,10 +22,30 @@ TEST_CASE("test_serial_svd")
   constexpr int column = 3;
 
   double tolerance = 1e-4;
-  double A_data[row][column] = {
-    {4.0, 2.0, 3.0}, {3.0, 5.0, 1.0}, {2.0, 3.0, 6.0}};
 
-  double rhs_data[row] = {1.28571, 1.42857, 3.1428};
+  Kokkos::View<double**> A_data("Device A data", row, column);
+  auto host_A_data = Kokkos::create_mirror_view(A_data);
+
+  host_A_data(0, 0) = 4.0;
+  host_A_data(0, 1) = 2.0;
+  host_A_data(0, 2) = 3.0;
+  host_A_data(1, 0) = 3.0;
+  host_A_data(1, 1) = 5.0;
+  host_A_data(1, 2) = 1.0;
+  host_A_data(2, 0) = 2.0;
+  host_A_data(2, 1) = 3.0;
+  host_A_data(2, 2) = 6.0;
+
+  Kokkos::deep_copy(A_data, host_A_data);
+
+  Kokkos::View<double*> rhs_data("Device rhs data", row, column);
+  auto host_rhs_data = Kokkos::create_mirror_view(rhs_data);
+
+  host_rhs_data(0) = 1.28571;
+  host_rhs_data(1) = 1.42857;
+  host_rhs_data(2) = 3.1428;
+
+  Kokkos::deep_copy(rhs_data, host_rhs_data);
 
   double expected_solution[column] = {-0.142857, 0.285714,
                                       0.428571}; // Approximate values
@@ -42,17 +62,13 @@ TEST_CASE("test_serial_svd")
         ScratchMatView Vt(team.team_scratch(1), column, column);
         ScratchVecView sigma(team.team_scratch(1), column);
         ScratchVecView work(team.team_scratch(1), row);
+        ScratchMatView reconstructedA(team.team_scratch(1), row, column);
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
           for (int j = 0; j < column; ++j) {
-            A(i, j) = A_data[i][j];
+            A(i, j) = A_data(i, j);
           }
         });
 
-        if (team.league_rank() == 0) {
-          KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U,
-                                           sigma, Vt, work);
-        }
-        team.team_barrier();
         ScratchMatView sigma_mat(team.team_scratch(1), row, column);
         ScratchMatView Usigma(team.team_scratch(1), row, column);
         detail::fill(0.0, team, Usigma);
@@ -61,36 +77,76 @@ TEST_CASE("test_serial_svd")
             sigma_mat(i, j) = (i == j) ? sigma(i) : 0.0;
           }
         });
-        team.team_barrier();
 
-        ScratchMatView reconstructedA(team.team_scratch(1), row, column);
-        if (team.league_rank() == 0) {
-          KokkosBatched::SerialGemm<
+        if (team.team_rank() == 0) {
+          printf("I am above svd solver function");
+          printf("A before SVD:\n");
+          for (int i = 0; i < row; ++i) {
+            for (int j = 0; j < column; ++j) {
+              printf("%f ", A(i, j));
+            }
+            printf("\n");
+          }
+
+          KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U,
+                                           sigma, Vt, work, 1e-6);
+
+          printf("=== Sigma Values ===\n");
+          for (int i = 0; i < column; ++i) {
+            printf("sigma[%d] = %f\n", i, sigma(i));
+          }
+
+          printf("\n=== First few entries of U ===\n");
+          for (int i = 0; i < row; ++i) {
+            for (int j = 0; j < row; ++j) {
+              printf("%f ", U(i, j));
+            }
+            printf("\n");
+          }
+
+          printf("\n=== First few entries of Vt ===\n");
+          for (int i = 0; i < column; ++i) {
+            for (int j = 0; j < column; ++j) {
+              printf("%f ", Vt(i, j));
+            }
+            printf("\n");
+          }
+
+          printf("=============================\n");
+
+          printf("I am below svd solver function");
+          KokkosBatched::TeamGemm<
             KokkosBatched::Trans::NoTranspose,
             KokkosBatched::Trans::NoTranspose,
             KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, U, sigma_mat,
                                                           0.0, Usigma);
 
-          KokkosBatched::SerialGemm<
+          printf("I am below serialgemm");
+          KokkosBatched::TeamGemm<
             KokkosBatched::Trans::NoTranspose,
             KokkosBatched::Trans::NoTranspose,
             KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Usigma, Vt, 0.0,
                                                           reconstructedA);
+          printf("I am below another serialgemm");
         }
-
+        team.team_barrier();
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
           for (int j = 0; j < column; ++j) {
             result(i, j) = reconstructedA(i, j);
+            printf("Copied reconstructedA(%d,%d) = %f\n", i, j,
+                   reconstructedA(i, j));
           }
         });
       });
+
+    Kokkos::fence();
     auto host_result = Kokkos::create_mirror_view(result);
     Kokkos::deep_copy(host_result, result);
 
     for (int i = 0; i < row; ++i) {
       for (int j = 0; j < column; ++j) {
         CHECK_THAT(host_result(i, j),
-                   Catch::Matchers::WithinAbs(A_data[i][j], tolerance));
+                   Catch::Matchers::WithinAbs(host_A_data(i, j), tolerance));
       }
     }
   } // end section
@@ -113,11 +169,11 @@ TEST_CASE("test_serial_svd")
         ScratchVecView work(team.team_scratch(1), row);
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
           for (int j = 0; j < column; ++j) {
-            A(i, j) = A_data[i][j];
+            A(i, j) = A_data(i, j);
           }
         });
 
-        if (team.league_rank() == 0) {
+        if (team.team_rank() == 0) {
           KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U,
                                            sigma, Vt, work);
         }
@@ -153,7 +209,15 @@ TEST_CASE("test_serial_svd")
 
   SECTION("test_row_scaling_function")
   {
-    double diagonal_entries[row] = {2.0, 0.5, -1.0};
+    Kokkos::View<double*> diagonal_entries("Device diagonal enetries", row);
+    auto host_diagonal_entries = Kokkos::create_mirror_view(diagonal_entries);
+
+    host_diagonal_entries(0) = 2.0;
+    host_diagonal_entries(1) = 0.5;
+    host_diagonal_entries(2) = -1.0;
+
+    Kokkos::deep_copy(diagonal_entries, host_diagonal_entries);
+
     double expected_result[row][column] = {
       {8.0, 4.0, 6.0}, {1.5, 2.5, 0.5}, {-2.0, -3.0, -6.0}};
 
@@ -167,9 +231,9 @@ TEST_CASE("test_serial_svd")
         ScratchVecView weight(team.team_scratch(1), row);
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
           for (int j = 0; j < column; ++j) {
-            A(i, j) = A_data[i][j];
+            A(i, j) = A_data(i, j);
           }
-          weight(i) = diagonal_entries[i];
+          weight(i) = diagonal_entries(i);
         });
 
         team.team_barrier();
@@ -200,12 +264,57 @@ TEST_CASE("test_serial_svd")
     constexpr int rowB = 3; // Adjusted matrix (3x6)
     constexpr int colB = 6;
 
-    double A_data[rowA][colA] = {
-      {4.0, 2.0, 3.0, 1.0, 0.0, 5.0}, {3.0, 5.0, 1.0, 4.0, 2.0, 6.0},
-      {2.0, 3.0, 6.0, 5.0, 1.0, 7.0}, {7.0, 8.0, 9.0, 3.0, 2.0, 1.0},
-      {1.0, 3.0, 2.0, 6.0, 4.0, 5.0}, {5.0, 7.0, 3.0, 8.0, 9.0, 6.0}};
-
-    double diagonal_entries_data[rowB] = {2.0, 0.5, -1.0};
+    Kokkos::View<double**> A_data("Device A data", row, column);
+    auto host_A_data = Kokkos::create_mirror_view(A_data);
+
+    host_A_data(0, 0) = 4.0;
+    host_A_data(0, 1) = 2.0;
+    host_A_data(0, 2) = 3.0;
+    host_A_data(0, 3) = 1.0;
+    host_A_data(0, 4) = 0.0;
+    host_A_data(0, 5) = 5.0;
+    host_A_data(1, 0) = 3.0;
+    host_A_data(1, 1) = 5.0;
+    host_A_data(1, 2) = 1.0;
+    host_A_data(1, 3) = 4.0;
+    host_A_data(1, 4) = 2.0;
+    host_A_data(1, 5) = 6.0;
+    host_A_data(2, 0) = 2.0;
+    host_A_data(2, 1) = 3.0;
+    host_A_data(2, 2) = 6.0;
+    host_A_data(2, 3) = 5.0;
+    host_A_data(2, 4) = 1.0;
+    host_A_data(2, 5) = 7.0;
+    host_A_data(3, 0) = 7.0;
+    host_A_data(3, 1) = 8.0;
+    host_A_data(3, 2) = 9.0;
+    host_A_data(3, 3) = 3.0;
+    host_A_data(3, 4) = 2.0;
+    host_A_data(3, 5) = 1.0;
+    host_A_data(4, 0) = 1.0;
+    host_A_data(4, 1) = 3.0;
+    host_A_data(4, 2) = 2.0;
+    host_A_data(4, 3) = 6.0;
+    host_A_data(4, 4) = 4.0;
+    host_A_data(4, 5) = 5.0;
+    host_A_data(5, 0) = 5.0;
+    host_A_data(5, 1) = 7.0;
+    host_A_data(5, 2) = 3.0;
+    host_A_data(5, 3) = 8.0;
+    host_A_data(5, 4) = 9.0;
+    host_A_data(5, 5) = 6.0;
+
+    Kokkos::deep_copy(A_data, host_A_data);
+
+    Kokkos::View<double*> diagonal_entries_data("Device rhs data");
+    auto host_diagonal_entries_data =
+      Kokkos::create_mirror_view(diagonal_entries_data);
+
+    host_diagonal_entries_data(0) = 2.0;
+    host_diagonal_entries_data(1) = 0.5;
+    host_diagonal_entries_data(2) = -1.0;
+
+    Kokkos::deep_copy(diagonal_entries_data, host_diagonal_entries_data);
 
     double expected_result[rowB][colB] = {{8.0, 4.0, 6.0, 2.0, 0.0, 10.0},
                                           {1.5, 2.5, 0.5, 2.0, 1.0, 3.0},
@@ -222,14 +331,14 @@ TEST_CASE("test_serial_svd")
         ScratchVecView weight(team.team_scratch(1), rowB);
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowA), [=](int i) {
           for (int j = 0; j < colA; ++j) {
-            A(i, j) = A_data[i][j];
+            A(i, j) = A_data(i, j);
           }
         });
 
         team.team_barrier();
 
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) {
-          weight(i) = diagonal_entries_data[i];
+          weight(i) = diagonal_entries_data(i);
         });
 
         team.team_barrier();
@@ -270,9 +379,9 @@ TEST_CASE("test_serial_svd")
         detail::fill(1.0, team, weight);
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
           for (int j = 0; j < column; ++j) {
-            A(i, j) = A_data[i][j];
+            A(i, j) = A_data(i, j);
           }
-          rhs(i) = rhs_data[i];
+          rhs(i) = rhs_data(i);
         });
 
         detail::solve_matrix_svd(team, weight, rhs, A, x, 0);

From c7ff6c4d9952b5eed590ab7df90493e199e4de29 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 8 Apr 2025 13:25:12 -0400
Subject: [PATCH 57/73] CUDA backend works

---
 .../interpolator/mls_interpolation_impl.hpp   |  31 ++---
 .../pcms_interpolator_view_utils.hpp          |   3 +-
 test/test_svd_serial.cpp                      | 110 +++++++++---------
 3 files changed, 77 insertions(+), 67 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 8effb6ea..effae5dc 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -437,6 +437,7 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
   team.team_barrier();
 
   eval_rhs_scaling(team, weight, rhs_values);
+  team.team_barrier();
 
   // initilize U (orthogonal matrices)  array
   ScratchMatView U(team.team_scratch(1), row, row);
@@ -465,24 +466,26 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
   if (team.team_rank() == 0) {
     KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U,
                                      sigma, Vt, work, 1e-6);
+  }
+  team.team_barrier();
 
-    calculate_shrinkage_factor(team, lambda, sigma);
-
-    auto Ut = find_transpose(team, U);
+  calculate_shrinkage_factor(team, lambda, sigma);
 
-    scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T
+  auto Ut = find_transpose(team, U);
 
-    KokkosBatched::SerialGemm<
-      KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose,
-      KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0,
-                                                    vsigmaInvUtMul);
+  scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T
 
-    KokkosBlas::SerialGemv<
-      KokkosBlas::Trans::NoTranspose,
-      KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, vsigmaInvUtMul,
-                                                 rhs_values, 0.0,
-                                                 solution_vector);
-  }
+  KokkosBatched::TeamGemm<
+    member_type, KokkosBatched::Trans::Transpose,
+    KokkosBatched::Trans::NoTranspose,
+    KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, Vt, temp_matrix,
+                                                  0.0, vsigmaInvUtMul);
+
+  KokkosBlas::TeamGemv<
+    member_type, KokkosBlas::Trans::NoTranspose,
+    KokkosBlas::Algo::Gemv::Unblocked>::invoke(team, 1.0, vsigmaInvUtMul,
+                                               rhs_values, 0.0,
+                                               solution_vector);
 }
 
 /**
diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
index 5e1f2b88..f07c9a89 100644
--- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
@@ -93,7 +93,7 @@ ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix)
   int row = matrix.extent(0);
   int column = matrix.extent(1);
 
-  ScratchMatView transMatrix(team.team_scratch(0), column, row);
+  ScratchMatView transMatrix(team.team_scratch(1), column, row);
   fill(0.0, team, transMatrix);
   Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
     for (int j = 0; j < column; ++j) {
@@ -167,6 +167,7 @@ void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries,
   size_t rowB = adjustedMatrix.extent(0);
   size_t colB = adjustedMatrix.extent(1);
 
+  size_t nWeights = diagonal_entries.extent(0);
   OMEGA_H_CHECK(colB == rowA);
   eval_row_scaling(team, diagonal_entries, matrixToScale);
   Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) {
diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp
index e7feb253..9087e233 100644
--- a/test/test_svd_serial.cpp
+++ b/test/test_svd_serial.cpp
@@ -55,7 +55,7 @@ TEST_CASE("test_serial_svd")
     Kokkos::View<double**> result("result", row, column);
     team_policy tp(1, Kokkos::AUTO);
     Kokkos::parallel_for(
-      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)),
+      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(2000)),
       KOKKOS_LAMBDA(const member_type& team) {
         ScratchMatView A(team.team_scratch(1), row, column);
         ScratchMatView U(team.team_scratch(1), row, row);
@@ -70,71 +70,35 @@ TEST_CASE("test_serial_svd")
         });
 
         ScratchMatView sigma_mat(team.team_scratch(1), row, column);
+        detail::fill(0.0, team, sigma_mat);
         ScratchMatView Usigma(team.team_scratch(1), row, column);
         detail::fill(0.0, team, Usigma);
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
-          for (int j = 0; j < column; ++j) {
-            sigma_mat(i, j) = (i == j) ? sigma(i) : 0.0;
-          }
-        });
 
         if (team.team_rank() == 0) {
-          printf("I am above svd solver function");
-          printf("A before SVD:\n");
-          for (int i = 0; i < row; ++i) {
-            for (int j = 0; j < column; ++j) {
-              printf("%f ", A(i, j));
-            }
-            printf("\n");
-          }
 
           KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U,
                                            sigma, Vt, work, 1e-6);
 
-          printf("=== Sigma Values ===\n");
           for (int i = 0; i < column; ++i) {
-            printf("sigma[%d] = %f\n", i, sigma(i));
-          }
-
-          printf("\n=== First few entries of U ===\n");
-          for (int i = 0; i < row; ++i) {
-            for (int j = 0; j < row; ++j) {
-              printf("%f ", U(i, j));
-            }
-            printf("\n");
+            sigma_mat(i, i) = sigma(i);
           }
 
-          printf("\n=== First few entries of Vt ===\n");
-          for (int i = 0; i < column; ++i) {
-            for (int j = 0; j < column; ++j) {
-              printf("%f ", Vt(i, j));
-            }
-            printf("\n");
-          }
-
-          printf("=============================\n");
-
-          printf("I am below svd solver function");
-          KokkosBatched::TeamGemm<
+          KokkosBatched::SerialGemm<
             KokkosBatched::Trans::NoTranspose,
             KokkosBatched::Trans::NoTranspose,
             KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, U, sigma_mat,
                                                           0.0, Usigma);
 
-          printf("I am below serialgemm");
-          KokkosBatched::TeamGemm<
+          KokkosBatched::SerialGemm<
             KokkosBatched::Trans::NoTranspose,
             KokkosBatched::Trans::NoTranspose,
             KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Usigma, Vt, 0.0,
                                                           reconstructedA);
-          printf("I am below another serialgemm");
         }
         team.team_barrier();
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
           for (int j = 0; j < column; ++j) {
             result(i, j) = reconstructedA(i, j);
-            printf("Copied reconstructedA(%d,%d) = %f\n", i, j,
-                   reconstructedA(i, j));
           }
         });
       });
@@ -160,7 +124,7 @@ TEST_CASE("test_serial_svd")
     Kokkos::deep_copy(transpose_expected, 0.0);
     team_policy tp(1, Kokkos::AUTO);
     Kokkos::parallel_for(
-      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)),
+      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(1000)),
       KOKKOS_LAMBDA(const member_type& team) {
         ScratchMatView A(team.team_scratch(1), row, column);
         ScratchMatView U(team.team_scratch(1), row, row);
@@ -178,21 +142,23 @@ TEST_CASE("test_serial_svd")
                                            sigma, Vt, work);
         }
         team.team_barrier();
-        // detail::fill(0.0, team, sigma_inv_expected);
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
           for (int j = 0; j < row; ++j) {
             transpose_expected(i, j) = U(j, i);
           }
         });
-        auto transposedU = detail::find_transpose(team, U);
+
         team.team_barrier();
+        auto transposedU = detail::find_transpose(team, U);
 
+        team.team_barrier();
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) {
           for (int j = 0; j < row; ++j) {
             result(i, j) = transposedU(i, j);
           }
         });
       });
+
     auto host_result = Kokkos::create_mirror_view(result);
     auto host_expected = Kokkos::create_mirror_view(transpose_expected);
     Kokkos::deep_copy(host_result, result);
@@ -225,7 +191,7 @@ TEST_CASE("test_serial_svd")
     Kokkos::deep_copy(result, 0.0);
     team_policy tp(1, Kokkos::AUTO);
     Kokkos::parallel_for(
-      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)),
+      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(1000)),
       KOKKOS_LAMBDA(const member_type& team) {
         ScratchMatView A(team.team_scratch(1), row, column);
         ScratchVecView weight(team.team_scratch(1), row);
@@ -264,7 +230,7 @@ TEST_CASE("test_serial_svd")
     constexpr int rowB = 3; // Adjusted matrix (3x6)
     constexpr int colB = 6;
 
-    Kokkos::View<double**> A_data("Device A data", row, column);
+    Kokkos::View<double**> A_data("Device A data", rowA, colA);
     auto host_A_data = Kokkos::create_mirror_view(A_data);
 
     host_A_data(0, 0) = 4.0;
@@ -306,7 +272,8 @@ TEST_CASE("test_serial_svd")
 
     Kokkos::deep_copy(A_data, host_A_data);
 
-    Kokkos::View<double*> diagonal_entries_data("Device rhs data");
+    Kokkos::View<double*> diagonal_entries_data("Device diagonal entries data",
+                                                rowB);
     auto host_diagonal_entries_data =
       Kokkos::create_mirror_view(diagonal_entries_data);
 
@@ -324,9 +291,10 @@ TEST_CASE("test_serial_svd")
     Kokkos::deep_copy(result, 0.0);
     team_policy tp(1, Kokkos::AUTO);
     Kokkos::parallel_for(
-      tp.set_scratch_size(1, Kokkos::PerTeam(500)),
+      tp.set_scratch_size(1, Kokkos::PerTeam(1000)),
       KOKKOS_LAMBDA(const member_type& team) {
         ScratchMatView A(team.team_scratch(1), rowA, colA);
+        detail::fill(0.0, team, A);
         ScratchMatView result_matrix(team.team_scratch(1), rowB, colB);
         ScratchVecView weight(team.team_scratch(1), rowB);
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowA), [=](int i) {
@@ -342,6 +310,17 @@ TEST_CASE("test_serial_svd")
         });
 
         team.team_barrier();
+        if (team.team_rank() == 0) {
+          for (int i = 0; i < rowA; ++i) {
+            for (int j = 0; j < colA; ++j) {
+              printf("A(%d,%d) = %f\n", i, j, A(i, j));
+            }
+          }
+
+          for (int i = 0; i < rowB; ++i) {
+            printf("weight(%d) = %f\n", i, weight(i));
+          }
+        }
 
         detail::scale_and_adjust(team, weight, A, result_matrix);
         team.team_barrier();
@@ -354,8 +333,9 @@ TEST_CASE("test_serial_svd")
     auto host_result = Kokkos::create_mirror_view(result);
     Kokkos::deep_copy(host_result, result);
 
-    for (int i = 0; i < row; ++i) {
-      for (int j = 0; j < row; ++j) {
+    for (int i = 0; i < rowB; ++i) {
+      for (int j = 0; j < colB; ++j) {
+        CAPTURE(i, j, host_result(i, j), expected_result[i][j]);
         CHECK_THAT(host_result(i, j), Catch::Matchers::WithinAbs(
                                         expected_result[i][j], tolerance));
       }
@@ -368,7 +348,7 @@ TEST_CASE("test_serial_svd")
     Kokkos::View<double*> result("result", column);
     team_policy tp(1, Kokkos::AUTO);
     Kokkos::parallel_for(
-      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(800)),
+      "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(1000)),
       KOKKOS_LAMBDA(const member_type& team) {
         ScratchMatView A(team.team_scratch(1), row, column);
         ScratchVecView rhs(team.team_scratch(1), row);
@@ -384,7 +364,32 @@ TEST_CASE("test_serial_svd")
           rhs(i) = rhs_data(i);
         });
 
-        detail::solve_matrix_svd(team, weight, rhs, A, x, 0);
+        team.team_barrier();
+
+        if (team.team_rank() == 0) {
+          printf("\n[A matrix before solve_matrix_svd]\n");
+          for (int i = 0; i < row; ++i) {
+            for (int j = 0; j < column; ++j) {
+              printf("A(%d, %d) = %f\t", i, j, A(i, j));
+            }
+            printf("\n");
+          }
+
+          printf("[rhs vector before solve_matrix_svd]\n");
+          for (int i = 0; i < row; ++i) {
+            printf("rhs[%d] = %f\n", i, rhs(i));
+          }
+        }
+
+        team.team_barrier();
+        detail::solve_matrix_svd(team, weight, rhs, A, x, 1e-12);
+        team.team_barrier();
+        if (team.team_rank() == 0) {
+          printf("[solution vector x after solve_matrix_svd]\n");
+          for (int i = 0; i < column; ++i) {
+            printf("x[%d] = %f\n", i, x(i));
+          }
+        }
         Kokkos::parallel_for(Kokkos::TeamThreadRange(team, column),
                              [=](int i) { result(i) = x(i); });
       });
@@ -392,6 +397,7 @@ TEST_CASE("test_serial_svd")
     Kokkos::deep_copy(host_result, result);
 
     for (int i = 0; i < column; ++i) {
+      CAPTURE(i, host_result(i), expected_solution[i]);
       CHECK_THAT(host_result(i),
                  Catch::Matchers::WithinAbs(expected_solution[i], tolerance));
     }

From c038fceab8c496de15400b109f81830568d2f7df Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Wed, 9 Apr 2025 10:42:23 -0400
Subject: [PATCH 58/73] change lambda factor to lambda

---
 src/pcms/interpolator/mls_interpolation_impl.hpp | 6 ++----
 test/test_linear_solver.cpp                      | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index bd36d17f..95be580d 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -346,8 +346,7 @@ KOKKOS_INLINE_FUNCTION
 ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix,
                                             const ScratchVecView& weight_vector,
                                             const ScratchVecView& rhs,
-                                            member_type team,
-                                            double lambda_factor)
+                                            member_type team, double lambda)
 {
 
   int m = matrix.extent(0);
@@ -383,7 +382,7 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix,
 
   team.team_barrier();
 
-  add_regularization(team, square_matrix, lambda_factor);
+  add_regularization(team, square_matrix, lambda);
 
   team.team_barrier();
 
@@ -701,7 +700,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
           create_vandermonde_matrix(local_source_points, j, slice_length,
                                     vandermonde_matrix);
         });
->>>>>>> mls_interpolation_svd_solver
 
       team.team_barrier();
 
diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp
index a4fe9df2..d63477dd 100644
--- a/test/test_linear_solver.cpp
+++ b/test/test_linear_solver.cpp
@@ -101,7 +101,7 @@ TEST_CASE("LU solver test")
         team.team_barrier();
 
         auto result = pcms::detail::convert_normal_equation(
-          vandermonde_matrix, phi, support_values, team);
+          vandermonde_matrix, phi, support_values, team, 0.0);
 
         team.team_barrier();
 

From fe5b7d1d7e155620dedc39f487501d316d892518 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 6 May 2025 11:10:33 -0400
Subject: [PATCH 59/73] clean up

---
 src/pcms/interpolator/mls_interpolation_impl.hpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 95be580d..59486cd3 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -612,7 +612,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       fill(0.0, team, target_basis_vector);
       fill(0.0, team, solution_coefficients);
 
-      Logger logger(10);
+      Logger logger(15);
       // storing the coords of local supports
       int count = -1;
       for (int j = start_ptr; j < end_ptr; ++j) {
@@ -707,6 +707,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
                        "vandermonde matrix");
 
       OMEGA_H_CHECK_PRINTF(
+
         support.radii2[league_rank] > 0,
         "ERROR: radius2 has to be positive but found to be %.16f\n",
         support.radii2[league_rank]);
@@ -717,6 +718,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       double target_value = KokkosBlas::Experimental::dot(
         team, solution_coefficients, target_basis_vector);
+
+      // printf("Target Point : %d \t\t Value: %5.6f\n", league_rank,
+      // target_value);
       logger.logScalar(team, LogLevel::DEBUG, target_value,
                        "interpolated value");
       if (team.team_rank() == 0) {

From 8d6d705f256724e138e7d6d1404d3ddf1bf47e43 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 6 May 2025 11:15:10 -0400
Subject: [PATCH 60/73] comment out logger

---
 .../interpolator/mls_interpolation_impl.hpp   | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 59486cd3..d60aec97 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -612,8 +612,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       fill(0.0, team, target_basis_vector);
       fill(0.0, team, solution_coefficients);
 
-      Logger logger(15);
-      // storing the coords of local supports
+      // Logger logger(15);
+      //  storing the coords of local supports
       int count = -1;
       for (int j = start_ptr; j < end_ptr; ++j) {
         count++;
@@ -624,16 +624,16 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         }
       }
 
-      logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
-                       "Support Coordinates");
+      // logger.logMatrix(team, LogLevel::DEBUG, local_source_points,
+      //                  "Support Coordinates");
       double target_point[MAX_DIM] = {};
 
       for (int i = 0; i < dim; ++i) {
         target_point[i] = target_coordinates[league_rank * dim + i];
       }
 
-      logger.logArray(team, LogLevel::DEBUG, target_point, dim,
-                      "Target points");
+      // logger.logArray(team, LogLevel::DEBUG, target_point, dim,
+      //                 "Target points");
 
       /** phi(nsupports) is the array of rbf functions evaluated at the
        * source supports In the actual implementation, Phi(nsupports,
@@ -667,8 +667,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       team.team_barrier();
 
-      logger.log(team, LogLevel::DEBUG, "The search  starts");
-      logger.logVector(team, LogLevel::DEBUG, support_values, "Support values");
+      // logger.log(team, LogLevel::DEBUG, "The search  starts");
+      // logger.logVector(team, LogLevel::DEBUG, support_values, "Support
+      // values");
 
       /**
        *
@@ -703,8 +704,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       team.team_barrier();
 
-      logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix,
-                       "vandermonde matrix");
+      // logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix,
+      //                  "vandermonde matrix");
 
       OMEGA_H_CHECK_PRINTF(
 
@@ -721,8 +722,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
 
       // printf("Target Point : %d \t\t Value: %5.6f\n", league_rank,
       // target_value);
-      logger.logScalar(team, LogLevel::DEBUG, target_value,
-                       "interpolated value");
+      // logger.logScalar(team, LogLevel::DEBUG, target_value,
+      //                  "interpolated value");
       if (team.team_rank() == 0) {
         OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n",
                              league_rank);

From abae37a592144a19dd164486991d61013a9de399 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 6 May 2025 11:44:56 -0400
Subject: [PATCH 61/73] bring tol to user level

---
 src/pcms/interpolator/mls_interpolation.cpp   | 10 ++---
 src/pcms/interpolator/mls_interpolation.hpp   |  2 +-
 .../interpolator/mls_interpolation_impl.hpp   | 43 +++++++++++++++----
 3 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index a98ed14e..65af2b5e 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -112,7 +112,7 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
                               const LO& degree, RadialBasisFunction bf,
-                              double lambda)
+                              double lambda, double tol)
 {
 
   const auto nvertices_target = target_coordinates.size() / dim;
@@ -123,25 +123,25 @@ Write<Real> mls_interpolation(const Reals source_values,
     case RadialBasisFunction::RBF_GAUSSIAN:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, RBF_GAUSSIAN{}, lambda);
+        degree, RBF_GAUSSIAN{}, lambda, tol);
       break;
 
     case RadialBasisFunction::RBF_C4:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, RBF_C4{}, lambda);
+        degree, RBF_C4{}, lambda, tol);
       break;
 
     case RadialBasisFunction::RBF_CONST:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, RBF_CONST{}, lambda);
+        degree, RBF_CONST{}, lambda, tol);
       break;
 
     case RadialBasisFunction::NO_OP:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, NoOp{}, lambda);
+        degree, NoOp{}, lambda, tol);
       break;
   }
 
diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp
index 8169d453..842e2015 100644
--- a/src/pcms/interpolator/mls_interpolation.hpp
+++ b/src/pcms/interpolator/mls_interpolation.hpp
@@ -21,6 +21,6 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
                               const LO& degree, RadialBasisFunction bf,
-                              double lambda = 0);
+                              double lambda = 0, double tol = 1e-6);
 } // namespace pcms
 #endif
diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index d60aec97..f80d0a59 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -409,8 +409,8 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix,
  * @return None. The solution is directly written to the `rhs` vector.
  */
 KOKKOS_INLINE_FUNCTION
-void solve_matrix(const ScratchMatView& square_matrix,
-                  const ScratchVecView& rhs, member_type team)
+void solve_matrix_lu(const ScratchMatView& square_matrix,
+                     const ScratchVecView& rhs, member_type team)
 {
 
   // Perform LU decomposition
@@ -426,10 +426,36 @@ void solve_matrix(const ScratchMatView& square_matrix,
     KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs);
 }
 
+/**
+ * @brief Solves a regularized linear system Ax = b using SVD decomposition
+ * within a Kokkos parallel team.
+ *
+ * This function solves a (possibly overdetermined) linear system using Singular
+ * Value Decomposition (SVD), The regularization term (lambda) helps stabilize
+ * the solution especially in ill-conditioned cases.
+ *
+ *
+ * @param team The Kokkos team member executing this function.
+ * @param weight A scratch-space vector of weights (RBF weights).
+ * @param rhs_values A scratch-space vector representing the right-hand side
+ * vector b (m). It may be modified during the computation.
+ * @param matrix A scratch-space matrix representing the system matrix A (m x
+ * n).
+ * @param solution_vector The output vector x (n). It will contain the computed
+ * solution after the function completes.
+ * @param lambda The regularization coefficient. Set to 0.0 for pure SVD; use a
+ * positive value for Tikhonov regularization.
+ * @param tol  The  tolerance threshold used to discard small singular values in
+ * the decomposition
+ *
+ *
+ * @note This function does not return a value; the solution is written directly
+ * to `solution_vector`.
+ */
 KOKKOS_INLINE_FUNCTION
 void solve_matrix_svd(member_type team, const ScratchVecView& weight,
                       ScratchVecView rhs_values, ScratchMatView matrix,
-                      ScratchVecView solution_vector, double lambda)
+                      ScratchVecView solution_vector, double lambda, double tol)
 {
 
   int row = matrix.extent(0);
@@ -477,7 +503,7 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight,
 
   if (team.team_rank() == 0) {
     KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U,
-                                     sigma, Vt, work, 1e-6);
+                                     sigma, Vt, work, tol);
   }
   team.team_barrier();
 
@@ -523,7 +549,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
                        const SupportResults& support, const LO& dim,
                        const LO& degree, Func rbf_func,
                        RealDefaultScalarArrayView approx_target_values,
-                       double lambda)
+                       double lambda, double tol)
 {
   PCMS_FUNCTION_TIMER;
   static_assert(std::is_invocable_r_v<double, Func, double, double>,
@@ -714,7 +740,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         support.radii2[league_rank]);
 
       solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix,
-                       solution_coefficients, lambda);
+                       solution_coefficients, lambda, tol);
       team.team_barrier();
 
       double target_value = KokkosBlas::Experimental::dot(
@@ -754,7 +780,8 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals source_coordinates,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
-                              const LO& degree, Func rbf_func, double lambda)
+                              const LO& degree, Func rbf_func, double lambda,
+                              double tol)
 {
   const auto nsources = source_coordinates.size() / dim;
 
@@ -779,7 +806,7 @@ Write<Real> mls_interpolation(const Reals source_values,
 
   mls_interpolation(source_values_array_view, source_coordinates_array_view,
                     target_coordinates_array_view, support, dim, degree,
-                    rbf_func, interpolated_values_array_view, lambda);
+                    rbf_func, interpolated_values_array_view, lambda, tol);
 
   return interpolated_values;
 }

From 16776a8f5be7fec3a00707536407c0ac60a314a8 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 6 May 2025 12:01:19 -0400
Subject: [PATCH 62/73] add comment

---
 src/pcms/interpolator/mls_interpolation.hpp | 62 +++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp
index 842e2015..0bbbc74a 100644
--- a/src/pcms/interpolator/mls_interpolation.hpp
+++ b/src/pcms/interpolator/mls_interpolation.hpp
@@ -7,6 +7,33 @@ using namespace Omega_h;
 
 namespace pcms
 {
+
+/**
+ * @brief Enumeration of supported radial basis functions (RBF) for MLS
+ *interpolation.
+ *
+ * This enum specifies the type of radial basis function used to weight source
+ *points in the Moving Least Squares (MLS) interpolation process.
+ *
+ * Members:
+ * - RBF_GAUSSIAN:
+ *     A Gaussian RBF: `exp(- a^2 * r^2)`, smooth and commonly used. Good for
+ *localized support. `a` is a spreading/decay factor
+ *
+ * - RBF_C4:
+ *     A compactly supported C4-continuous function. Useful for bounded support
+ *and efficiency.
+ *
+ * - RBF_CONST:
+ *     Constant basis function. Effectively uses uniform weights.
+ *
+ * - NO_OP:
+ *     No operation. Disables RBF weighting — typically used when weights
+ *     are externally defined or not needed.
+ *
+ * @note These are intended to be passed into function `mls_interpolation` to
+ *control weighting behavior.
+ */
 enum class RadialBasisFunction : LO
 {
   RBF_GAUSSIAN = 0,
@@ -16,6 +43,41 @@ enum class RadialBasisFunction : LO
 
 };
 
+/**
+ * @brief Performs Moving Least Squares (MLS) interpolation at target points.
+ *
+ * This function computes interpolated values at a set of target coordinates
+ * using Moving Least Squares (MLS) based on the provided source values and
+ * coordinates. It supports different radial basis functions (RBFs), polynomial
+ * degrees, dimension, optional regularization and tolerance
+ *
+ * @param source_values        A flat array of source data values. Length should
+ * be `num_sources`.
+ * @param source_coordinates   A flat array of source point coordinates. Length
+ * should be `num_sources * dim`.
+ * @param target_coordinates   A flat array of target point coordinates. Length
+ * should be `num_targets * dim`.
+ * @param support              A data structure holding neighbor information for
+ * each target (in CSR format).
+ * @param dim                  Dimension
+ * @param degree               Polynomial degree
+ * @param bf                   The radial basis function used for weighting
+ * (e.g., Gaussian, C4).
+ * @param lambda               Optional regularization parameter (default is
+ * 0.0). Helps with stability in ill-conditioned systems.
+ * @param tol                  Optional solver tolerance (default is 1e-6).
+ * Small singular values below this are discarded.
+ *
+ * @return A Write<Real> array containing the interpolated values at each target
+ * point.
+ *
+ * @note
+ * - All input arrays are expected to reside in device memory (e.g., Kokkos
+ * device views).
+ * - Ensure consistency in dimensions: coordinate arrays must be sized as
+ * `num_points * dim`.
+ * - The result array length will match the number of target points.
+ */
 Write<Real> mls_interpolation(const Reals source_values,
                               const Reals source_coordinates,
                               const Reals target_coordinates,

From 86b1458e0a97ada45e1bb66a47b77b316930fe6e Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 6 May 2025 12:35:17 -0400
Subject: [PATCH 63/73] update the arguments

---
 test/test_svd_serial.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp
index 9087e233..c638d048 100644
--- a/test/test_svd_serial.cpp
+++ b/test/test_svd_serial.cpp
@@ -382,7 +382,7 @@ TEST_CASE("test_serial_svd")
         }
 
         team.team_barrier();
-        detail::solve_matrix_svd(team, weight, rhs, A, x, 1e-12);
+        detail::solve_matrix_svd(team, weight, rhs, A, x, 0, 1e-12);
         team.team_barrier();
         if (team.team_rank() == 0) {
           printf("[solution vector x after solve_matrix_svd]\n");

From 2d867fb1040d52efb7b3c7dc60bbc7dfc869c920 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 6 May 2025 12:35:49 -0400
Subject: [PATCH 64/73] delete test_linear_solver.cpp

---
 test/test_linear_solver.cpp | 233 ------------------------------------
 1 file changed, 233 deletions(-)
 delete mode 100644 test/test_linear_solver.cpp

diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp
deleted file mode 100644
index d63477dd..00000000
--- a/test/test_linear_solver.cpp
+++ /dev/null
@@ -1,233 +0,0 @@
-#include <catch2/catch_test_macros.hpp>
-#include <catch2/catch_approx.hpp>
-#include <pcms/interpolator/mls_interpolation_impl.hpp>
-#include <pcms/interpolator/pcms_interpolator_aliases.hpp>
-#include <Omega_h_mesh.hpp>
-#include <Omega_h_build.hpp>
-#include <Omega_h_fail.hpp>
-#include <catch2/matchers/catch_matchers_floating_point.hpp>
-
-TEST_CASE("LU solver test")
-{
-  // This test computes P^TQP and P^TQ b for the normal equation P^TQP x = P^TQ
-  // b where P(n,m) (n >= m) is a rectangular matrix, Q(m,m) is a diagonal
-  // matrix with diagonal elements stored as vector and b(n,1) is a vector
-  SECTION("check convert normal equation ")
-  {
-
-    int nvertices_target = 2;
-
-    int nsupports = 3;
-
-    int size = 2;
-
-    Kokkos::View<double**> Q("phi vector", nvertices_target, nsupports);
-
-    Kokkos::View<double**> b("rhs vector", nvertices_target, nsupports);
-
-    Kokkos::View<double***> P("vandermonde matrix", nvertices_target, nsupports,
-                              size);
-
-    Kokkos::View<double***> TransPQP("moment matrix", nvertices_target, size,
-                                     size);
-
-    Kokkos::View<double***> TransPQ("scaled matrix", nvertices_target, size,
-                                    nsupports);
-
-    Kokkos::View<double**> TransPQb("transformed rhs", nvertices_target, size);
-
-    Kokkos::View<double**> solution("solution unknown vector", nvertices_target,
-                                    size);
-
-    auto host_Q = Kokkos::create_mirror_view(Q);
-    auto host_P = Kokkos::create_mirror_view(P);
-    auto host_b = Kokkos::create_mirror_view(b);
-
-    host_Q(0, 0) = 1.0;
-    host_Q(0, 1) = 1.0;
-    host_Q(0, 2) = 1.0;
-
-    host_Q(1, 0) = 3.0;
-    host_Q(1, 1) = 2.0;
-    host_Q(1, 2) = 1.0;
-
-    Kokkos::deep_copy(Q, host_Q);
-
-    host_P(0, 0, 0) = 1.0;
-    host_P(0, 0, 1) = 2.0;
-    host_P(0, 1, 0) = 3.0;
-    host_P(0, 1, 1) = 4.0;
-    host_P(0, 2, 0) = 5.0;
-    host_P(0, 2, 1) = 6.0;
-
-    host_P(1, 0, 0) = 2.0;
-    host_P(1, 0, 1) = 3.0;
-    host_P(1, 1, 0) = 1.0;
-    host_P(1, 1, 1) = -1.0;
-    host_P(1, 2, 0) = 4.0;
-    host_P(1, 2, 1) = 1.0;
-
-    Kokkos::deep_copy(P, host_P);
-
-    host_b(0, 0) = 7.0;
-    host_b(0, 1) = 8.0;
-    host_b(0, 2) = 9.0;
-
-    host_b(1, 0) = 5.0;
-    host_b(1, 1) = 6.0;
-    host_b(1, 2) = 7.0;
-
-    Kokkos::deep_copy(b, host_b);
-
-    pcms::team_policy tp(nvertices_target, Kokkos::AUTO);
-    Kokkos::parallel_for(
-      "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)),
-      KOKKOS_LAMBDA(const pcms::member_type& team) {
-        int i = team.league_rank();
-        pcms::ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports,
-                                                size);
-        pcms::ScratchVecView phi(team.team_scratch(0), nsupports);
-        pcms::ScratchVecView support_values(team.team_scratch(0), nsupports);
-
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports),
-                             [=](int j) {
-                               phi(j) = Q(i, j);
-                               support_values(j) = b(i, j);
-                               for (int k = 0; k < size; ++k) {
-                                 vandermonde_matrix(j, k) = P(i, j, k);
-                               }
-                             });
-
-        team.team_barrier();
-
-        auto result = pcms::detail::convert_normal_equation(
-          vandermonde_matrix, phi, support_values, team, 0.0);
-
-        team.team_barrier();
-
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) {
-          TransPQb(i, j) = result.transformed_rhs(j);
-          for (int k = 0; k < size; ++k) {
-            TransPQP(i, j, k) = result.square_matrix(j, k);
-          }
-        });
-
-        team.team_barrier();
-
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports),
-                             [=](int j) {
-                               for (int k = 0; k < size; ++k) {
-                                 TransPQ(i, k, j) = result.scaled_matrix(k, j);
-                               }
-                             });
-
-        team.team_barrier();
-
-        pcms::detail::solve_matrix(result.square_matrix, result.transformed_rhs,
-                                   team);
-
-        team.team_barrier();
-
-        Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) {
-          solution(i, j) = result.transformed_rhs(j);
-        });
-
-        team.team_barrier();
-      });
-
-    auto host_result_lhs = Kokkos::create_mirror_view(TransPQP);
-    auto host_result_rhs = Kokkos::create_mirror_view(TransPQb);
-    auto host_result_scaled = Kokkos::create_mirror_view(TransPQ);
-    auto host_result_solution = Kokkos::create_mirror_view(solution);
-
-    Kokkos::deep_copy(host_result_lhs, TransPQP);
-    Kokkos::deep_copy(host_result_rhs, TransPQb);
-    Kokkos::deep_copy(host_result_scaled, TransPQ);
-    Kokkos::deep_copy(host_result_solution, solution);
-
-    Kokkos::View<double***, Kokkos::HostSpace> expected_lhs_matrix(
-      "expected lhs matrix", nvertices_target, size, size);
-
-    Kokkos::View<double**, Kokkos::HostSpace> expected_rhs_vector(
-      "expected rhs vector", nvertices_target, size);
-
-    Kokkos::View<double***, Kokkos::HostSpace> expected_scaled_matrix(
-      "expected scaled matrix", nvertices_target, size, nsupports);
-
-    Kokkos::View<double**, Kokkos::HostSpace> expected_solution(
-      "expected solution", nvertices_target, size);
-
-    expected_lhs_matrix(0, 0, 0) = 35.5;
-    expected_lhs_matrix(0, 0, 1) = 44.0;
-    expected_lhs_matrix(0, 1, 0) = 44.0;
-    expected_lhs_matrix(0, 1, 1) = 56.5;
-
-    expected_rhs_vector(0, 0) = 76.0;
-    expected_rhs_vector(0, 1) = 100.0;
-
-    expected_scaled_matrix(0, 0, 0) = 1.0;
-    expected_scaled_matrix(0, 0, 1) = 3.0;
-    expected_scaled_matrix(0, 0, 2) = 5.0;
-    expected_scaled_matrix(0, 1, 0) = 2.0;
-    expected_scaled_matrix(0, 1, 1) = 4.0;
-    expected_scaled_matrix(0, 1, 2) = 6.0;
-
-    expected_solution(0, 0) = -1.519713;
-    expected_solution(0, 1) = 2.953405;
-
-    expected_lhs_matrix(1, 0, 0) = 30.5;
-    expected_lhs_matrix(1, 0, 1) = 20.0;
-    expected_lhs_matrix(1, 1, 0) = 20.0;
-    expected_lhs_matrix(1, 1, 1) = 30.5;
-
-    expected_rhs_vector(1, 0) = 70.0;
-    expected_rhs_vector(1, 1) = 40.0;
-
-    expected_scaled_matrix(1, 0, 0) = 6.0;
-    expected_scaled_matrix(1, 0, 1) = 2.0;
-    expected_scaled_matrix(1, 0, 2) = 4.0;
-    expected_scaled_matrix(1, 1, 0) = 9.0;
-    expected_scaled_matrix(1, 1, 1) = -2.0;
-    expected_scaled_matrix(1, 1, 2) = 1.0;
-
-    expected_solution(1, 0) = 2.517680;
-    expected_solution(1, 1) = -0.339463;
-
-    for (int i = 0; i < nvertices_target; ++i) {
-      for (int j = 0; j < size; ++j) {
-        for (int l = 0; l < nsupports; ++l) {
-          printf("scaled_matrix (A^T Q) (%f, %f) at (%d,%d,%d)\n",
-                 expected_scaled_matrix(i, j, l), host_result_scaled(i, j, l),
-                 i, j, l);
-          REQUIRE_THAT(
-            expected_scaled_matrix(i, j, l),
-            Catch::Matchers::WithinAbs(host_result_scaled(i, j, l), 1E-6));
-        }
-      }
-      for (int j = 0; j < size; ++j) {
-        for (int k = 0; k < size; ++k) {
-          printf("A^T Q A (%f, %f) at (%d,%d,%d)\n",
-                 expected_lhs_matrix(i, j, k), host_result_lhs(i, j, k), i, j,
-                 k);
-          REQUIRE_THAT(
-            expected_lhs_matrix(i, j, k),
-            Catch::Matchers::WithinAbs(host_result_lhs(i, j, k), 1E-6));
-        }
-      }
-      for (int j = 0; j < size; ++j) {
-        printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_rhs_vector(i, j),
-               host_result_rhs(i, j), i, j);
-        REQUIRE_THAT(expected_rhs_vector(i, j),
-                     Catch::Matchers::WithinAbs(host_result_rhs(i, j), 1E-6));
-      }
-
-      for (int j = 0; j < size; ++j) {
-        printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_solution(i, j),
-               host_result_solution(i, j), i, j);
-        REQUIRE_THAT(
-          expected_solution(i, j),
-          Catch::Matchers::WithinAbs(host_result_solution(i, j), 1E-6));
-      }
-    }
-  }
-}

From 0f6b255b2b861e513412a5ff893174836261b11f Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 6 May 2025 12:40:24 -0400
Subject: [PATCH 65/73] remove test_linear_solver

---
 test/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index ad3a7b25..5f9662cc 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -110,7 +110,6 @@ if(Catch2_FOUND)
           test_point_search.cpp
           test_mls_basis.cpp
           test_rbf_interp.cpp
-          test_linear_solver.cpp
           test_normalisation.cpp
 		  test_spr_meshfields.cpp
 		  test_svd_serial.cpp)

From cfd8c8bbd8fef29b0b7e9a3bd48e6e25d6c513f9 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Tue, 6 May 2025 19:05:08 -0400
Subject: [PATCH 66/73] remmove lu solver and add svd solver

Previously we implemented a normal equation based solver using LU
decomposition. The solver didn't work well for ill conditioned systems.
The current implementation is based on requalrized SVD decomposition
---
 .../interpolator/mls_interpolation_impl.hpp   | 151 ++----------------
 1 file changed, 10 insertions(+), 141 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index f80d0a59..4bc8ba59 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -286,149 +286,11 @@ void scale_column_trans_matrix(const ScratchMatView& matrix,
   }
 }
 
-KOKKOS_INLINE_FUNCTION
-void add_regularization(const member_type& team, ScratchMatView& square_matrix,
-                        Real lambda_factor)
-{
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, square_matrix.extent(0)),
-                       [=](int i) { square_matrix(i, i) += lambda_factor; });
 }
 
 /**
- * @struct ResultConvertNormal
- * @brief Stores the results of matrix and vector transformations.
- *
- * This struct represents the results of the following operations:
- * - `P^T Q` (scaled matrix): The matrix obtained after scaling the columns of
- * the given matrix.
- * - `P^T Q P` (square matrix): The matrix obtained after performing the `P^T Q
- * P` operation.
- * - `P^T b` (transformed RHS): The vector obtained after applying the `P^T b`
- * operation.
- *
- * The struct is used to store:
- * - The scaled matrix (`scaled_matrix`).
- * - The square matrix (`square_matrix`).
- * - The transformed right-hand side vector (`transformed_rhs`).
- */
-struct ResultConvertNormal
-{
-  ScratchMatView scaled_matrix;
-  ScratchMatView square_matrix;
-  ScratchVecView transformed_rhs;
-};
-
-/**
- * @brief Converts a normal equation into a simplified form.
- *
- * This function takes a matrix, a basis/weight vector, and a right-hand side
- * (RHS) vector and computes the scaled matrix, square matrix, and transformed
- * RHS.
- *
- * For example, the normal equation `P^T Q P x = P^T Q b` is converted into the
- * form `Ax = b'`, where:
- * - `A` (square matrix) = `P^T Q P`
- * - `b'` (transformed RHS) = `P^T Q b`
- *
- * @param matrix The input matrix.
- * @param weight_vector The weight or basis vector.
- * @param rhs The right-hand side vector of the equation.
- * @param team The team member executing the task (optional, if applicable).
- *
- * @return The result of the normal equation conversion as a struct containing:
- * - The scaled matrix.
- * - The square matrix.
- * - The transformed RHS.
- *
- * @todo Implement QR decomposition to solve the linear system.
- */
-KOKKOS_INLINE_FUNCTION
-ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix,
-                                            const ScratchVecView& weight_vector,
-                                            const ScratchVecView& rhs,
-                                            member_type team, double lambda)
-{
-
-  int m = matrix.extent(0);
-  int n = matrix.extent(1);
-
-  ScratchMatView scaled_matrix(team.team_scratch(1), n, m);
-
-  ScratchMatView square_matrix(team.team_scratch(1), n, n);
-
-  ScratchVecView transformed_rhs(team.team_scratch(1), n);
-
-  // performing P^T Q
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) {
-    for (int k = 0; k < m; ++k) {
-      scaled_matrix(j, k) = 0;
-    }
-  });
-
-  team.team_barrier();
-
-  Kokkos::parallel_for(Kokkos::TeamThreadRange(team, m), [=](int j) {
-    scale_column_trans_matrix(matrix, weight_vector, team, j, scaled_matrix);
-  });
-
-  team.team_barrier();
-
-  // performing (P^T Q P)
-  KokkosBatched::TeamGemm<
-    member_type, KokkosBatched::Trans::NoTranspose,
-    KokkosBatched::Trans::NoTranspose,
-    KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, scaled_matrix,
-                                                  matrix, 0.0, square_matrix);
-
-  team.team_barrier();
-
-  add_regularization(team, square_matrix, lambda);
-
-  team.team_barrier();
-
-  KokkosBlas::Experimental::
-    Gemv<KokkosBlas::Mode::Team, KokkosBlas::Algo::Gemv::Unblocked>::invoke(
-      team, 'N', 1.0, scaled_matrix, rhs, 0.0, transformed_rhs);
-
-  team.team_barrier();
-
-  return ResultConvertNormal{scaled_matrix, square_matrix, transformed_rhs};
-}
-
-/**
- * @brief Solves the matrix equation Ax = b' using LU decomposition.
- *
- * This function solves the given matrix equation using LU decomposition.
- * The solution vector `x'` overwrites the input `rhs` vector.
- *
- * @param square_matrix The input matrix A (must be square).
- * @param rhs The right-hand side vector b. After execution, it is overwritten
- * with the solution vector x.
- * @param team The team member executing the task.
- *
- * @return None. The solution is directly written to the `rhs` vector.
- */
-KOKKOS_INLINE_FUNCTION
-void solve_matrix_lu(const ScratchMatView& square_matrix,
-                     const ScratchVecView& rhs, member_type team)
-{
-
-  // Perform LU decomposition
-  KokkosBatched::TeamLU<
-    member_type, KokkosBatched::Algo::LU::Unblocked>::invoke(team,
-                                                             square_matrix);
-
-  team.team_barrier();
-
-  //   Solve the equation with forward and backward solves
-  KokkosBatched::TeamSolveLU<
-    member_type, KokkosBatched::Trans::NoTranspose,
-    KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs);
-}
-
-/**
- * @brief Solves a regularized linear system Ax = b using SVD decomposition
- * within a Kokkos parallel team.
+ * @brief Solves a regularized weighted linear system WAx = Wb using SVD
+ * decomposition within a Kokkos parallel team.
  *
  * This function solves a (possibly overdetermined) linear system using Singular
  * Value Decomposition (SVD), The regularization term (lambda) helps stabilize
@@ -436,15 +298,22 @@ void solve_matrix_lu(const ScratchMatView& square_matrix,
  *
  *
  * @param team The Kokkos team member executing this function.
- * @param weight A scratch-space vector of weights (RBF weights).
+ *
+ * @param weight A scratch-space vector of RBF weights (diagonal elements of
+ * weight matrix W). size is m.
+ *
  * @param rhs_values A scratch-space vector representing the right-hand side
  * vector b (m). It may be modified during the computation.
+ *
  * @param matrix A scratch-space matrix representing the system matrix A (m x
  * n).
+ *
  * @param solution_vector The output vector x (n). It will contain the computed
  * solution after the function completes.
+ *
  * @param lambda The regularization coefficient. Set to 0.0 for pure SVD; use a
  * positive value for Tikhonov regularization.
+ *
  * @param tol  The  tolerance threshold used to discard small singular values in
  * the decomposition
  *

From cb74274b42d2983fb11e9447a121fb7e31262765 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Thu, 8 May 2025 12:31:12 -0400
Subject: [PATCH 67/73] make decay factor user defined parameter

---
 src/pcms/interpolator/mls_interpolation.hpp | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp
index 0bbbc74a..94480802 100644
--- a/src/pcms/interpolator/mls_interpolation.hpp
+++ b/src/pcms/interpolator/mls_interpolation.hpp
@@ -3,8 +3,6 @@
 
 #include "mls_interpolation_impl.hpp"
 
-using namespace Omega_h;
-
 namespace pcms
 {
 
@@ -58,7 +56,8 @@ enum class RadialBasisFunction : LO
  * @param target_coordinates   A flat array of target point coordinates. Length
  * should be `num_targets * dim`.
  * @param support              A data structure holding neighbor information for
- * each target (in CSR format).
+ * 								each target (in
+ * CSR format).
  * @param dim                  Dimension
  * @param degree               Polynomial degree
  * @param bf                   The radial basis function used for weighting
@@ -78,11 +77,12 @@ enum class RadialBasisFunction : LO
  * `num_points * dim`.
  * - The result array length will match the number of target points.
  */
-Write<Real> mls_interpolation(const Reals source_values,
-                              const Reals source_coordinates,
-                              const Reals target_coordinates,
-                              const SupportResults& support, const LO& dim,
-                              const LO& degree, RadialBasisFunction bf,
-                              double lambda = 0, double tol = 1e-6);
+Write<Real> mls_interpolation(const Omega_h::Reals source_values,
+                              const Omega_h::Reals source_coordinates,
+                              const Omega_h::Reals target_coordinates,
+                              const SupportResults& support,
+                              const Omega_h::LO& dim, const Omega_h::LO& degree,
+                              RadialBasisFunction bf, double lambda = 0,
+                              double tol = 1e-6);
 } // namespace pcms
 #endif

From bf1229f37ad680ea108b682eac876b3e9395ca53 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Thu, 8 May 2025 12:32:13 -0400
Subject: [PATCH 68/73] make decay factor user defined parameter

---
 src/pcms/interpolator/mls_interpolation.cpp | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index 65af2b5e..c0e59179 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -7,6 +7,14 @@ namespace pcms
 // RBF_GAUSSIAN Functor
 struct RBF_GAUSSIAN
 {
+  // 'a' is a spreading factor/decay factor
+  // the value of 'a' is higher if the data is localized
+  // the value of 'a' is smaller if the data is farther
+
+  double a;
+
+  RBF_GAUSSIAN(double a_val) : a(a_val) {}
+
   OMEGA_H_INLINE
   double operator()(double r_sq, double rho_sq) const
   {
@@ -21,11 +29,6 @@ struct RBF_GAUSSIAN
                          "but the value is %.16f\n",
                          r_sq);
 
-    // 'a' is a spreading factor/decay factor
-    // the value of 'a' is higher if the data is localized
-    // the value of 'a' is smaller if the data is farther
-    int a = 5;
-
     double r = sqrt(r_sq);
     double rho = sqrt(rho_sq);
     double ratio = r / rho;
@@ -45,6 +48,7 @@ struct RBF_GAUSSIAN
 // RBF_C4 Functor
 struct RBF_C4
 {
+
   OMEGA_H_INLINE
   double operator()(double r_sq, double rho_sq) const
   {
@@ -76,6 +80,7 @@ struct RBF_C4
 //
 struct RBF_CONST
 {
+
   OMEGA_H_INLINE
   double operator()(double r_sq, double rho_sq) const
   {
@@ -112,7 +117,8 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
                               const LO& degree, RadialBasisFunction bf,
-                              double lambda, double tol)
+                              double lambda, double tol,
+                              double decay_factor = 5.0)
 {
 
   const auto nvertices_target = target_coordinates.size() / dim;
@@ -123,7 +129,7 @@ Write<Real> mls_interpolation(const Reals source_values,
     case RadialBasisFunction::RBF_GAUSSIAN:
       interpolated_values = detail::mls_interpolation(
         source_values, source_coordinates, target_coordinates, support, dim,
-        degree, RBF_GAUSSIAN{}, lambda, tol);
+        degree, RBF_GAUSSIAN{decay_factor}, lambda, tol);
       break;
 
     case RadialBasisFunction::RBF_C4:

From ef56be9070170f2591950c1694db8cbfcfab8a18 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Thu, 8 May 2025 12:34:30 -0400
Subject: [PATCH 69/73] make decay factor user defined parameter

---
 src/pcms/interpolator/mls_interpolation.cpp | 3 +--
 src/pcms/interpolator/mls_interpolation.hpp | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index c0e59179..904c83ae 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -117,8 +117,7 @@ Write<Real> mls_interpolation(const Reals source_values,
                               const Reals target_coordinates,
                               const SupportResults& support, const LO& dim,
                               const LO& degree, RadialBasisFunction bf,
-                              double lambda, double tol,
-                              double decay_factor = 5.0)
+                              double lambda, double tol, double decay_factor)
 {
 
   const auto nvertices_target = target_coordinates.size() / dim;
diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp
index 94480802..2a490614 100644
--- a/src/pcms/interpolator/mls_interpolation.hpp
+++ b/src/pcms/interpolator/mls_interpolation.hpp
@@ -83,6 +83,6 @@ Write<Real> mls_interpolation(const Omega_h::Reals source_values,
                               const SupportResults& support,
                               const Omega_h::LO& dim, const Omega_h::LO& degree,
                               RadialBasisFunction bf, double lambda = 0,
-                              double tol = 1e-6);
+                              double tol = 1e-6, double decay_factor = 5.0);
 } // namespace pcms
 #endif

From 0c76714d471e99f764657093342b9d4b67284549 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Fri, 9 May 2025 09:22:04 -0400
Subject: [PATCH 70/73] compilation error

---
 src/pcms/interpolator/adj_search.hpp          | 325 +++++++++---------
 src/pcms/interpolator/mls_interpolation.cpp   |  15 +-
 .../interpolator/mls_interpolation_impl.hpp   |  66 ++--
 .../interpolator/pcms_interpolator_logger.hpp |   3 +-
 .../pcms_interpolator_view_utils.hpp          |  91 +++++
 src/pcms/interpolator/queue_visited.hpp       |   6 +-
 6 files changed, 309 insertions(+), 197 deletions(-)

diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp
index a5a20b78..aca256fb 100644
--- a/src/pcms/interpolator/adj_search.hpp
+++ b/src/pcms/interpolator/adj_search.hpp
@@ -5,15 +5,14 @@
 
 #include "queue_visited.hpp"
 
-using namespace Omega_h;
-
 static constexpr int max_dim = 3;
 
 // TODO change this into span/mdspan
 OMEGA_H_INLINE
-Real calculateDistance(const Real* p1, const Real* p2, const int dim)
+Omega_h::Real calculateDistance(const Omega_h::Real* p1,
+                                const Omega_h::Real* p2, const int dim)
 {
-  Real dx, dy, dz;
+  Omega_h::Real dx, dy, dz;
   dx = p1[0] - p2[0];
   dy = p1[1] - p2[1];
   if (dim != 3) {
@@ -29,7 +28,7 @@ inline void checkTargetPoints(
 {
   Kokkos::fence();
   printf("INFO: Checking target points...\n");
-  auto check_target_points = OMEGA_H_LAMBDA(LO i)
+  auto check_target_points = OMEGA_H_LAMBDA(Omega_h::LO i)
   {
     if (results(i).tri_id < 0) {
       OMEGA_H_CHECK_PRINTF(results(i).tri_id >= 0,
@@ -38,25 +37,26 @@ inline void checkTargetPoints(
       printf("%d, ", i);
     }
   };
-  parallel_for(results.size(), check_target_points, "check_target_points");
+  Omega_h::parallel_for(results.size(), check_target_points,
+                        "check_target_points");
   Kokkos::fence();
   printf("\n");
 }
 
-inline void printSupportsForTarget(const LO target_id,
-                                   const Write<LO>& supports_ptr,
-                                   const Write<LO>& nSupports,
-                                   const Write<LO>& support_idx)
+inline void printSupportsForTarget(
+  const Omega_h::LO target_id, const Omega_h::Write<Omega_h::LO>& supports_ptr,
+  const Omega_h::Write<Omega_h::LO>& nSupports,
+  const Omega_h::Write<Omega_h::LO>& support_idx)
 {
-  parallel_for(
-    nSupports.size(), OMEGA_H_LAMBDA(const LO id) {
+  Omega_h::parallel_for(
+    nSupports.size(), OMEGA_H_LAMBDA(const Omega_h::LO id) {
       if (id == target_id) {
-        LO start_ptr = supports_ptr[id];
-        LO end_ptr = supports_ptr[id + 1];
+        Omega_h::LO start_ptr = supports_ptr[id];
+        Omega_h::LO end_ptr = supports_ptr[id + 1];
         printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", id,
                end_ptr - start_ptr, nSupports[id]);
-        for (LO i = start_ptr; i < end_ptr; ++i) {
-          LO cell_id = support_idx[i];
+        for (Omega_h::LO i = start_ptr; i < end_ptr; ++i) {
+          Omega_h::LO cell_id = support_idx[i];
           printf(", %d", cell_id);
         }
         printf("\n");
@@ -67,29 +67,34 @@ inline void printSupportsForTarget(const LO target_id,
 class FindSupports
 {
 private:
-  Mesh& source_mesh;
-  Mesh& target_mesh; // TODO it's null when one mesh is used
+  Omega_h::Mesh& source_mesh;
+  Omega_h::Mesh& target_mesh; // TODO it's null when one mesh is used
 
 public:
-  FindSupports(Mesh& source_mesh_, Mesh& target_mesh_)
+  FindSupports(Omega_h::Mesh& source_mesh_, Omega_h::Mesh& target_mesh_)
     : source_mesh(source_mesh_), target_mesh(target_mesh_) {};
 
-  FindSupports(Mesh& mesh_) : source_mesh(mesh_), target_mesh(mesh_) {};
+  FindSupports(Omega_h::Mesh& mesh_)
+    : source_mesh(mesh_), target_mesh(mesh_) {};
 
-  void adjBasedSearch(Write<LO>& supports_ptr, Write<LO>& nSupports,
-                      Write<LO>& support_idx, Write<Real>& radii2,
+  void adjBasedSearch(Omega_h::Write<Omega_h::LO>& supports_ptr,
+                      Omega_h::Write<Omega_h::LO>& nSupports,
+                      Omega_h::Write<Omega_h::LO>& support_idx,
+                      Omega_h::Write<Omega_h::Real>& radii2,
                       bool is_build_csr_call);
 
-  void adjBasedSearchCentroidNodes(Write<LO>& supports_ptr,
-                                   Write<LO>& nSupports, Write<LO>& support_idx,
-                                   Write<Real>& radii2, bool is_build_csr_call);
+  void adjBasedSearchCentroidNodes(Omega_h::Write<Omega_h::LO>& supports_ptr,
+                                   Omega_h::Write<Omega_h::LO>& nSupports,
+                                   Omega_h::Write<Omega_h::LO>& support_idx,
+                                   Omega_h::Write<Omega_h::Real>& radii2,
+                                   bool is_build_csr_call);
 };
 
-inline void FindSupports::adjBasedSearch(Write<LO>& supports_ptr,
-                                         Write<LO>& nSupports,
-                                         Write<LO>& support_idx,
-                                         Write<Real>& radii2,
-                                         bool is_build_csr_call)
+inline void FindSupports::adjBasedSearch(
+  Omega_h::Write<Omega_h::LO>& supports_ptr,
+  Omega_h::Write<Omega_h::LO>& nSupports,
+  Omega_h::Write<Omega_h::LO>& support_idx,
+  Omega_h::Write<Omega_h::Real>& radii2, bool is_build_csr_call)
 {
 
   const auto& sourcePoints_coords = source_mesh.coords();
@@ -100,14 +105,15 @@ inline void FindSupports::adjBasedSearch(Write<LO>& supports_ptr,
   const auto nvertices_target = target_mesh.nverts();
   OMEGA_H_CHECK(radii2.size() == nvertices_target);
 
-  const auto& vert2vert = source_mesh.ask_star(VERT);
+  const auto& vert2vert = source_mesh.ask_star(Omega_h::VERT);
   const auto& v2v_ptr = vert2vert.a2ab;
   const auto& v2v_data = vert2vert.ab2b;
   const auto& cells2verts = source_mesh.ask_verts_of(dim);
 
-  Kokkos::View<pcms::Real* [2]> target_points("test_points", nvertices_target);
-  parallel_for(
-    nvertices_target, OMEGA_H_LAMBDA(const LO i) {
+  Kokkos::View<Omega_h::Real* [2]> target_points("test_points",
+                                                 nvertices_target);
+  Omega_h::parallel_for(
+    nvertices_target, OMEGA_H_LAMBDA(const Omega_h::LO i) {
       target_points(i, 0) = targetPoints_coords[i * dim];
       target_points(i, 1) = targetPoints_coords[i * dim + 1];
     });
@@ -117,30 +123,30 @@ inline void FindSupports::adjBasedSearch(Write<LO>& supports_ptr,
   auto results = search_cell(target_points);
   checkTargetPoints(results);
 
-  parallel_for(
+  Omega_h::parallel_for(
     nvertices_target,
-    OMEGA_H_LAMBDA(const LO id) {
+    OMEGA_H_LAMBDA(const Omega_h::LO id) {
       Queue queue;
       Track visited;
-      Real cutoffDistance = radii2[id];
+      Omega_h::Real cutoffDistance = radii2[id];
 
-      LO source_cell_id = results(id).tri_id;
+      Omega_h::LO source_cell_id = results(id).tri_id;
       OMEGA_H_CHECK_PRINTF(
         source_cell_id >= 0,
         "ERROR: Source cell id not found for target %d (%f,%f)\n", id,
         target_points(id, 0), target_points(id, 1));
 
-      const LO num_verts_in_dim = dim + 1;
-      LO start_ptr = source_cell_id * num_verts_in_dim;
-      LO end_ptr = start_ptr + num_verts_in_dim;
-      Real target_coords[max_dim];
-      Real support_coords[max_dim];
+      const Omega_h::LO num_verts_in_dim = dim + 1;
+      Omega_h::LO start_ptr = source_cell_id * num_verts_in_dim;
+      Omega_h::LO end_ptr = start_ptr + num_verts_in_dim;
+      Omega_h::Real target_coords[max_dim];
+      Omega_h::Real support_coords[max_dim];
 
-      for (LO k = 0; k < dim; ++k) {
+      for (Omega_h::LO k = 0; k < dim; ++k) {
         target_coords[k] = target_points(id, k);
       }
 
-      LO start_counter;
+      Omega_h::LO start_counter;
       if (!is_build_csr_call) {
         start_counter = supports_ptr[id];
       }
@@ -156,15 +162,16 @@ inline void FindSupports::adjBasedSearch(Write<LO>& supports_ptr,
       // * Method
 
       int count = 0;
-      for (LO i = start_ptr; i < end_ptr; ++i) {
-        LO vert_id = cells2verts[i];
+      for (Omega_h::LO i = start_ptr; i < end_ptr; ++i) {
+        Omega_h::LO vert_id = cells2verts[i];
         visited.push_back(vert_id);
 
-        for (LO k = 0; k < dim; ++k) {
+        for (Omega_h::LO k = 0; k < dim; ++k) {
           support_coords[k] = sourcePoints_coords[vert_id * dim + k];
         }
 
-        Real dist = calculateDistance(target_coords, support_coords, dim);
+        Omega_h::Real dist =
+          calculateDistance(target_coords, support_coords, dim);
         if (dist <= cutoffDistance) {
           count++;
           if (count >= 500) {
@@ -178,19 +185,19 @@ inline void FindSupports::adjBasedSearch(Write<LO>& supports_ptr,
           }
           queue.push_back(vert_id);
           if (!is_build_csr_call) {
-            LO idx_count = count - 1;
+            Omega_h::LO idx_count = count - 1;
             support_idx[start_counter + idx_count] = vert_id;
           }
         }
       }
 
       while (!queue.isEmpty()) {
-        LO currentVertex = queue.front();
+        Omega_h::LO currentVertex = queue.front();
         queue.pop_front();
-        LO start = v2v_ptr[currentVertex];
-        LO end = v2v_ptr[currentVertex + 1];
+        Omega_h::LO start = v2v_ptr[currentVertex];
+        Omega_h::LO end = v2v_ptr[currentVertex + 1];
 
-        for (LO i = start; i < end; ++i) {
+        for (Omega_h::LO i = start; i < end; ++i) {
           auto neighborIndex = v2v_data[i];
 
           // check if neighbor index is already in the queue to be checked
@@ -202,7 +209,8 @@ inline void FindSupports::adjBasedSearch(Write<LO>& supports_ptr,
               support_coords[k] = sourcePoints_coords[neighborIndex * dim + k];
             }
 
-            Real dist = calculateDistance(target_coords, support_coords, dim);
+            Omega_h::Real dist =
+              calculateDistance(target_coords, support_coords, dim);
 
             if (dist <= cutoffDistance) {
               count++;
@@ -213,7 +221,7 @@ inline void FindSupports::adjBasedSearch(Write<LO>& supports_ptr,
               }
               queue.push_back(neighborIndex);
               if (!is_build_csr_call) {
-                LO idx_count = count - 1;
+                Omega_h::LO idx_count = count - 1;
                 support_idx[start_counter + idx_count] = neighborIndex;
               }
             }
@@ -229,11 +237,11 @@ inline void FindSupports::adjBasedSearch(Write<LO>& supports_ptr,
   }
 }
 
-inline void FindSupports::adjBasedSearchCentroidNodes(Write<LO>& supports_ptr,
-                                                      Write<LO>& nSupports,
-                                                      Write<LO>& support_idx,
-                                                      Write<Real>& radii2,
-                                                      bool is_build_csr_call)
+inline void FindSupports::adjBasedSearchCentroidNodes(
+  Omega_h::Write<Omega_h::LO>& supports_ptr,
+  Omega_h::Write<Omega_h::LO>& nSupports,
+  Omega_h::Write<Omega_h::LO>& support_idx,
+  Omega_h::Write<Omega_h::Real>& radii2, bool is_build_csr_call)
 {
   // Mesh Info
   const auto& mesh_coords = source_mesh.coords();
@@ -241,81 +249,83 @@ inline void FindSupports::adjBasedSearchCentroidNodes(Write<LO>& supports_ptr,
   const auto& dim = source_mesh.dim();
   const auto& nfaces = source_mesh.nfaces();
 
-  const auto& nodes2faces = source_mesh.ask_up(VERT, FACE);
+  const auto& nodes2faces = source_mesh.ask_up(Omega_h::VERT, Omega_h::FACE);
   const auto& n2f_ptr = nodes2faces.a2ab;
   const auto& n2f_data = nodes2faces.ab2b;
-  const auto& faces2nodes = source_mesh.ask_down(FACE, VERT).ab2b;
+  const auto& faces2nodes =
+    source_mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b;
 
-  Write<Real> cell_centroids(
+  Omega_h::Write<Omega_h::Real> cell_centroids(
     dim * nfaces, 0, "stores coordinates of cell centroid of each tri element");
 
-  parallel_for(
+  Omega_h::parallel_for(
     "calculate the centroid in each tri element", nfaces,
-    OMEGA_H_LAMBDA(const LO id) {
-      const auto current_el_verts = gather_verts<3>(faces2nodes, id);
+    OMEGA_H_LAMBDA(const Omega_h::LO id) {
+      const auto current_el_verts = Omega_h::gather_verts<3>(faces2nodes, id);
       const Omega_h::Few<Omega_h::Vector<2>, 3> current_el_vert_coords =
-        gather_vectors<3, 2>(mesh_coords, current_el_verts);
-      auto centroid = average(current_el_vert_coords);
+        Omega_h::gather_vectors<3, 2>(mesh_coords, current_el_verts);
+      auto centroid = Omega_h::average(current_el_vert_coords);
       int index = dim * id;
       cell_centroids[index] = centroid[0];
       cell_centroids[index + 1] = centroid[1];
     });
   // * Got the adj data and cell centroids
 
-  parallel_for(
+  Omega_h::parallel_for(
     nvertices,
-    OMEGA_H_LAMBDA(const LO id) {
+    OMEGA_H_LAMBDA(const Omega_h::LO id) {
       Queue queue;
       Track visited;
-      const LO num_verts_in_dim = dim + 1;
-      Real target_coords[max_dim];
-      Real support_coords[max_dim];
-      Real cutoffDistance = radii2[id];
+      const Omega_h::LO num_verts_in_dim = dim + 1;
+      Omega_h::Real target_coords[max_dim];
+      Omega_h::Real support_coords[max_dim];
+      Omega_h::Real cutoffDistance = radii2[id];
 
       //? copying the target vertex coordinates
-      for (LO k = 0; k < dim; ++k) {
+      for (Omega_h::LO k = 0; k < dim; ++k) {
         target_coords[k] = mesh_coords[id * dim + k];
       }
 
-      LO start_counter;
+      Omega_h::LO start_counter;
       if (!is_build_csr_call) {
         start_counter = supports_ptr[id];
       }
-      LO start_ptr = n2f_ptr[id];
-      LO end_ptr = n2f_ptr[id + 1];
+      Omega_h::LO start_ptr = n2f_ptr[id];
+      Omega_h::LO end_ptr = n2f_ptr[id + 1];
 
       int count = 0;
-      for (LO i = start_ptr; i < end_ptr; ++i) {
-        LO cell_id = n2f_data[i];
+      for (Omega_h::LO i = start_ptr; i < end_ptr; ++i) {
+        Omega_h::LO cell_id = n2f_data[i];
         visited.push_back(cell_id);
 
-        for (LO k = 0; k < dim; ++k) {
+        for (Omega_h::LO k = 0; k < dim; ++k) {
           support_coords[k] = cell_centroids[cell_id * dim + k];
         }
 
-        Real dist = calculateDistance(target_coords, support_coords, dim);
+        Omega_h::Real dist =
+          calculateDistance(target_coords, support_coords, dim);
         if (dist <= cutoffDistance) {
           count++;
           queue.push_back(cell_id);
           if (!is_build_csr_call) {
-            LO idx_count = count - 1;
+            Omega_h::LO idx_count = count - 1;
             support_idx[start_counter + idx_count] = cell_id;
           }
         }
       }
 
       while (!queue.isEmpty()) { // ? can queue be empty?
-        LO currentCell = queue.front();
+        Omega_h::LO currentCell = queue.front();
         queue.pop_front();
-        LO start = currentCell * num_verts_in_dim;
-        LO end = start + num_verts_in_dim;
-
-        for (LO i = start; i < end; ++i) {
-          LO current_vert_id = faces2nodes[i];
-          LO start_ptr_current_vert = n2f_ptr[current_vert_id];
-          LO end_ptr_vert_current_vert = n2f_ptr[current_vert_id + 1];
-          for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert;
-               ++j) {
+        Omega_h::LO start = currentCell * num_verts_in_dim;
+        Omega_h::LO end = start + num_verts_in_dim;
+
+        for (Omega_h::LO i = start; i < end; ++i) {
+          Omega_h::LO current_vert_id = faces2nodes[i];
+          Omega_h::LO start_ptr_current_vert = n2f_ptr[current_vert_id];
+          Omega_h::LO end_ptr_vert_current_vert = n2f_ptr[current_vert_id + 1];
+          for (Omega_h::LO j = start_ptr_current_vert;
+               j < end_ptr_vert_current_vert; ++j) {
             auto neighbor_cell_index = n2f_data[j];
 
             // check if neighbor index is already in the queue to be checked
@@ -328,13 +338,14 @@ inline void FindSupports::adjBasedSearchCentroidNodes(Write<LO>& supports_ptr,
                   cell_centroids[neighbor_cell_index * dim + k];
               }
 
-              Real dist = calculateDistance(target_coords, support_coords, dim);
+              Omega_h::Real dist =
+                calculateDistance(target_coords, support_coords, dim);
 
               if (dist <= cutoffDistance) {
                 count++;
                 queue.push_back(neighbor_cell_index);
                 if (!is_build_csr_call) {
-                  LO idx_count = count - 1;
+                  Omega_h::LO idx_count = count - 1;
                   support_idx[start_counter + idx_count] = neighbor_cell_index;
                 } // end of support_idx check
               } // end of distance check
@@ -355,28 +366,29 @@ inline void FindSupports::adjBasedSearchCentroidNodes(Write<LO>& supports_ptr,
 
 struct SupportResults
 {
-  LOs supports_ptr;
-  LOs supports_idx;
-  Write<Real> radii2;
+  Omega_h::LOs supports_ptr;
+  Omega_h::LOs supports_idx;
+  Omega_h::Write<Omega_h::Real> radii2;
 };
 
-inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh,
-                                      Real& cutoffDistance,
-                                      LO min_req_support = 12,
+inline SupportResults searchNeighbors(Omega_h::Mesh& source_mesh,
+                                      Omega_h::Mesh& target_mesh,
+                                      Omega_h::Real& cutoffDistance,
+                                      Omega_h::LO min_req_support = 12,
                                       bool adapt_radius = true)
 {
   FindSupports search(source_mesh, target_mesh);
-  LO nvertices_source = source_mesh.nverts();
-  LO nvertices_target = target_mesh.nverts();
+  Omega_h::LO nvertices_source = source_mesh.nverts();
+  Omega_h::LO nvertices_target = target_mesh.nverts();
 
-  Write<LO> nSupports(nvertices_target, 0,
-                      "number of supports in each target vertex");
+  Omega_h::Write<Omega_h::LO> nSupports(
+    nvertices_target, 0, "number of supports in each target vertex");
   printf("INFO: Cut off distance: %f\n", cutoffDistance);
-  Write<Real> radii2 = Write<Real>(nvertices_target, cutoffDistance,
-                                   "squared radii of the supports");
+  Omega_h::Write<Omega_h::Real> radii2 = Omega_h::Write<Omega_h::Real>(
+    nvertices_target, cutoffDistance, "squared radii of the supports");
 
-  Write<LO> supports_ptr;
-  Write<LO> supports_idx;
+  Omega_h::Write<Omega_h::LO> supports_ptr;
+  Omega_h::Write<Omega_h::LO> supports_idx;
 
   if (!adapt_radius) {
     printf("INFO: Fixed radius search *(disregarding required minimum "
@@ -386,31 +398,31 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh,
     printf("INFO: Adaptive radius search... \n");
     int r_adjust_loop = 0;
     while (true) {
-      nSupports = Write<LO>(nvertices_target, 0,
-                            "number of supports in each target vertex");
+      nSupports = Omega_h::Write<Omega_h::LO>(
+        nvertices_target, 0, "number of supports in each target vertex");
 
-      Real max_radius = 0.0;
+      Omega_h::Real max_radius = 0.0;
       Kokkos::parallel_reduce(
         "find max radius", nvertices_target,
-        OMEGA_H_LAMBDA(const LO i, Real& local_max) {
+        OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::Real& local_max) {
           local_max = (radii2[i] > local_max) ? radii2[i] : local_max;
         },
-        Kokkos::Max<Real>(max_radius));
+        Kokkos::Max<Omega_h::Real>(max_radius));
       printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius);
 
       // create storage every time to avoid complexity
-      Write<LO> supports_ptr;
-      Write<LO> supports_idx;
+      Omega_h::Write<Omega_h::LO> supports_ptr;
+      Omega_h::Write<Omega_h::LO> supports_idx;
       Kokkos::fence();
       search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2,
                             true);
       Kokkos::fence();
 
-      LO min_supports_found = 0;
-      Kokkos::Min<LO> min_reducer(min_supports_found);
+      Omega_h::LO min_supports_found = 0;
+      Kokkos::Min<Omega_h::LO> min_reducer(min_supports_found);
       Kokkos::parallel_reduce(
         "find min number of supports", nvertices_target,
-        OMEGA_H_LAMBDA(const LO i, LO& local_min) {
+        OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::LO& local_min) {
           min_reducer.join(local_min, nSupports[i]);
         },
         min_reducer);
@@ -424,10 +436,11 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh,
       }
 
       Kokkos::fence();
-      parallel_for(
-        nvertices_target, OMEGA_H_LAMBDA(const LO i) {
+      Omega_h::parallel_for(
+        nvertices_target, OMEGA_H_LAMBDA(const Omega_h::LO i) {
           if (nSupports[i] < min_req_support) {
-            Real factor = Real(min_req_support) / Real(nSupports[i]);
+            Omega_h::Real factor =
+              Omega_h::Real(min_req_support) / Omega_h::Real(nSupports[i]);
             factor = (factor > 1.1 || nSupports[i] == 0) ? 1.1 : factor;
             radii2[i] = radii2[i] * factor;
           }
@@ -438,10 +451,10 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh,
     printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop);
   }
 
-  supports_ptr = Write<LO>(nvertices_target + 1, 0,
-                           "number of support source vertices in CSR format");
+  supports_ptr = Omega_h::Write<Omega_h::LO>(
+    nvertices_target + 1, 0, "number of support source vertices in CSR format");
 
-  LO total_supports = 0;
+  Omega_h::LO total_supports = 0;
 
   Kokkos::parallel_scan(
     nvertices_target,
@@ -455,29 +468,30 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh,
 
   Kokkos::fence();
 
-  supports_idx = Write<LO>(total_supports, 0,
-                           "index of source supports of each target node");
+  supports_idx = Omega_h::Write<Omega_h::LO>(
+    total_supports, 0, "index of source supports of each target node");
 
   search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, false);
 
-  target_mesh.add_tag<Real>(VERT, "radii2", 1, radii2);
+  target_mesh.add_tag<Omega_h::Real>(Omega_h::VERT, "radii2", 1, radii2);
   return SupportResults{read(supports_ptr), read(supports_idx), radii2};
 }
 
-inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance,
-                                      LO min_support = 12,
+inline SupportResults searchNeighbors(Omega_h::Mesh& mesh,
+                                      Omega_h::Real cutoffDistance,
+                                      Omega_h::LO min_support = 12,
                                       bool adapt_radius = true)
 {
-  Write<LO> supports_ptr;
-  Write<LO> supports_idx;
+  Omega_h::Write<Omega_h::LO> supports_ptr;
+  Omega_h::Write<Omega_h::LO> supports_idx;
   FindSupports search(mesh);
-  LO nvertices_target = mesh.nverts();
-  Write<LO> nSupports(nvertices_target, 0,
-                      "number of supports in each target vertex");
+  Omega_h::LO nvertices_target = mesh.nverts();
+  Omega_h::Write<Omega_h::LO> nSupports(
+    nvertices_target, 0, "number of supports in each target vertex");
 
   printf("INFO: Inside searchNeighbors 1\n");
-  Write<Real> radii2 = Write<Real>(nvertices_target, cutoffDistance,
-                                   "squared radii of the supports");
+  Omega_h::Write<Omega_h::Real> radii2 = Omega_h::Write<Omega_h::Real>(
+    nvertices_target, cutoffDistance, "squared radii of the supports");
   printf("INFO: Cutoff distance: %f\n", cutoffDistance);
 
   if (!adapt_radius) {
@@ -488,29 +502,29 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance,
     printf("INFO: Adaptive radius search... \n");
     int r_adjust_loop = 0;
     while (true) { // until the number of minimum support is met
-      Real max_radius = 0.0;
+      Omega_h::Real max_radius = 0.0;
       Kokkos::parallel_reduce(
         "find max radius", nvertices_target,
-        OMEGA_H_LAMBDA(const LO i, Real& local_max) {
+        OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::Real& local_max) {
           local_max = (radii2[i] > local_max) ? radii2[i] : local_max;
         },
-        Kokkos::Max<Real>(max_radius));
+        Kokkos::Max<Omega_h::Real>(max_radius));
       printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius);
 
-      nSupports = Write<LO>(nvertices_target, 0,
-                            "number of supports in each target vertex");
+      nSupports = Omega_h::Write<Omega_h::LO>(
+        nvertices_target, 0, "number of supports in each target vertex");
       SupportResults support; // create support every time to avoid complexity
       search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, supports_idx,
                                          radii2, true);
 
       Kokkos::fence();
-      LO min_nSupports = 0;
+      Omega_h::LO min_nSupports = 0;
       Kokkos::parallel_reduce(
         "find min number of supports", nvertices_target,
-        OMEGA_H_LAMBDA(const LO i, LO& local_min) {
+        OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::LO& local_min) {
           local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min;
         },
-        Kokkos::Min<LO>(min_nSupports));
+        Kokkos::Min<Omega_h::LO>(min_nSupports));
 
       printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports,
              r_adjust_loop, max_radius);
@@ -521,10 +535,11 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance,
       }
 
       Kokkos::fence();
-      parallel_for(
-        nvertices_target, OMEGA_H_LAMBDA(const LO i) {
+      Omega_h::parallel_for(
+        nvertices_target, OMEGA_H_LAMBDA(const Omega_h::LO i) {
           if (nSupports[i] < min_support) {
-            Real factor = Real(min_support) / Real(nSupports[i]);
+            Omega_h::Real factor =
+              Omega_h::Real(min_support) / Omega_h::Real(nSupports[i]);
             factor = (nSupports[i] == 0 || factor > 1.5) ? 1.5 : factor;
             radii2[i] *= factor;
           }
@@ -535,10 +550,10 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance,
   } // adaptive radius search
 
   // offset array for the supports of each target vertex
-  supports_ptr = Write<LO>(nvertices_target + 1, 0,
-                           "number of support source vertices in CSR format");
+  supports_ptr = Omega_h::Write<Omega_h::LO>(
+    nvertices_target + 1, 0, "number of support source vertices in CSR format");
 
-  LO total_supports = 0;
+  Omega_h::LO total_supports = 0;
   Kokkos::parallel_scan(
     nvertices_target,
     OMEGA_H_LAMBDA(int j, int& update, bool final) {
@@ -552,14 +567,14 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance,
   printf("INFO: Inside searchNeighbors 3\n");
   Kokkos::fence();
 
-  supports_idx = Write<LO>(total_supports, 0,
-                           "index of source supports of each target node");
+  supports_idx = Omega_h::Write<Omega_h::LO>(
+    total_supports, 0, "index of source supports of each target node");
   printf("INFO: Total_supports: %d\n", total_supports);
 
   search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, supports_idx,
                                      radii2, false);
 
-  mesh.add_tag<Omega_h::Real>(VERT, "support_radius", 1, radii2);
+  mesh.add_tag<Omega_h::Real>(Omega_h::VERT, "support_radius", 1, radii2);
   return SupportResults{read(supports_ptr), read(supports_idx), radii2};
 }
 
diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index 904c83ae..51005fc2 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -112,18 +112,17 @@ struct NoOp
   double operator()(double, double) const { return 1.0; }
 };
 
-Write<Real> mls_interpolation(const Reals source_values,
-                              const Reals source_coordinates,
-                              const Reals target_coordinates,
-                              const SupportResults& support, const LO& dim,
-                              const LO& degree, RadialBasisFunction bf,
-                              double lambda, double tol, double decay_factor)
+Omega_h::Write<Omega_h::Real> mls_interpolation(
+  const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates,
+  const Omega_h::Reals target_coordinates, const SupportResults& support,
+  const Omega_h::LO& dim, const Omega_h::LO& degree, RadialBasisFunction bf,
+  double lambda, double tol, double decay_factor)
 {
 
   const auto nvertices_target = target_coordinates.size() / dim;
 
-  Write<Real> interpolated_values(nvertices_target, 0,
-                                  "approximated target values");
+  Omega_h::Write<Omega_h::Real> interpolated_values(
+    nvertices_target, 0, "approximated target values");
   switch (bf) {
     case RadialBasisFunction::RBF_GAUSSIAN:
       interpolated_values = detail::mls_interpolation(
diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 4bc8ba59..27a239fb 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -12,16 +12,16 @@
 #include <KokkosBlas.hpp>
 #include <KokkosBlas1_team_dot.hpp>
 #include <Kokkos_StdAlgorithms.hpp>
-#include "pcms_interpolator_aliases.hpp"
-#include "adj_search.hpp"
-#include "pcms/assert.h"
-#include "pcms/profile.h"
-#include "pcms_interpolator_view_utils.hpp"
-#include "pcms_interpolator_logger.hpp"
+#include <pcms/interpolator/pcms_interpolator_aliases.hpp>
+#include <pcms/interpolator/adj_search.hpp>
+#include <pcms/assert.h>
+#include <pcms/profile.h>
+#include <pcms/interpolator/pcms_interpolator_view_utils.hpp>
+#include <pcms/interpolator/pcms_interpolator_logger.hpp>
 
 #include <KokkosBlas2_gemv.hpp> //KokkosBlas::gemv
-#include "KokkosBatched_SVD_Decl.hpp"
-#include "KokkosBatched_SVD_Serial_Impl.hpp"
+#include <KokkosBatched_SVD_Decl.hpp>
+#include <KokkosBatched_SVD_Serial_Impl.hpp>
 #include <KokkosBlas2_serial_gemv_impl.hpp>
 #include <KokkosBatched_Gemm_Decl.hpp>
 
@@ -103,7 +103,7 @@ int calculate_scratch_shared_size(const SupportResults& support,
  *
  * @return normalized coordinates The normlaised coordinates
  */
-Reals min_max_normalization(Reals& coordinates, int dim);
+Omega_h::Reals min_max_normalization(Omega_h::Reals& coordinates, int dim);
 
 /**
  * @brief Evaluates the polynomial basis
@@ -415,8 +415,8 @@ template <typename Func>
 void mls_interpolation(RealConstDefaultScalarArrayView source_values,
                        RealConstDefaultScalarArrayView source_coordinates,
                        RealConstDefaultScalarArrayView target_coordinates,
-                       const SupportResults& support, const LO& dim,
-                       const LO& degree, Func rbf_func,
+                       const SupportResults& support, const Omega_h::LO& dim,
+                       const Omega_h::LO& degree, Func rbf_func,
                        RealDefaultScalarArrayView approx_target_values,
                        double lambda, double tol)
 {
@@ -508,7 +508,14 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       fill(0.0, team, solution_coefficients);
 
       // Logger logger(15);
-      //  storing the coords of local supports
+
+      /**
+       *
+       * the local_source_points is of the type ScratchMatView with
+       * coordinates information; row is number of local supports
+       * & column is dim
+       */
+
       int count = -1;
       for (int j = start_ptr; j < end_ptr; ++j) {
         count++;
@@ -550,7 +557,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        * the quantity that we want interpolate
        *
        *
-       * step 4: find local supports function values
+       * step 2: find local supports function values
        */
       Kokkos::parallel_for(
         Kokkos::TeamThreadRange(team, nsupports), [=](const int j) {
@@ -567,12 +574,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
       // values");
 
       /**
-       *
-       * the local_source_points is of the type ScratchMatView with
-       * coordinates information; row is number of local supports
-       * & column is dim
-       *
-       * step 2: normalize local source supports and target point
+       * step 3: normalize local source supports and target point
        */
 
       normalize_supports(team, target_point, local_source_points);
@@ -581,7 +583,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        *
        * evaluates the basis vector of a given target point Coord target_point;
        * this can evaluate monomial basis vector for any degree of polynomial
-       * step 3: call basis vector evaluation function (eval_basis_vector);
+       *
+       * step 4: call basis vector evaluation function (eval_basis_vector);
        */
       eval_basis_vector(slice_length, target_point, target_basis_vector);
 
@@ -589,7 +592,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
        * created with the basis vector of source supports stacking on top of
        * each other
        *
-       * step 4: create vandermonde matrix
+       * step 5: create vandermonde matrix
        */
       Kokkos::parallel_for(
         Kokkos::TeamThreadRange(team, nsupports), [=](int j) {
@@ -608,6 +611,10 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
         "ERROR: radius2 has to be positive but found to be %.16f\n",
         support.radii2[league_rank]);
 
+      /**
+       * step 6: solve overdetermined system using SVD
+       *
+       */
       solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix,
                        solution_coefficients, lambda, tol);
       team.team_barrier();
@@ -640,17 +647,16 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values,
  * @param dim The dimension of the simulations
  * @param degree The degree of the interpolation order
  * @param rbf_func The radial basis function choice
- * @return  Write array of the interpolated field in target mesh
+ * @return  Omega_h::Write array of the interpolated field in target mesh
  *
  *
  */
 template <typename Func>
-Write<Real> mls_interpolation(const Reals source_values,
-                              const Reals source_coordinates,
-                              const Reals target_coordinates,
-                              const SupportResults& support, const LO& dim,
-                              const LO& degree, Func rbf_func, double lambda,
-                              double tol)
+Omega_h::Write<Omega_h::Real> mls_interpolation(
+  const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates,
+  const Omega_h::Reals target_coordinates, const SupportResults& support,
+  const Omega_h::LO& dim, const Omega_h::LO& degree, Func rbf_func,
+  double lambda, double tol)
 {
   const auto nsources = source_coordinates.size() / dim;
 
@@ -668,7 +674,8 @@ Write<Real> mls_interpolation(const Reals source_values,
   RealDefaultScalarArrayView radii2_array_view(support.radii2.data(),
                                                support.radii2.size());
 
-  Write<Real> interpolated_values(ntargets, 0, "approximated target values");
+  Omega_h::Write<Omega_h::Real> interpolated_values(
+    ntargets, 0, "approximated target values");
 
   RealDefaultScalarArrayView interpolated_values_array_view(
     interpolated_values.data(), interpolated_values.size());
@@ -682,4 +689,5 @@ Write<Real> mls_interpolation(const Reals source_values,
 
 } // namespace detail
 } // namespace pcms
+
 #endif
diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp
index e825f72a..1a43da6d 100644
--- a/src/pcms/interpolator/pcms_interpolator_logger.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp
@@ -55,6 +55,7 @@ class Logger
     }
   }
 
+  // log array
   KOKKOS_INLINE_FUNCTION
   void logArray(const member_type& team, const LogLevel level,
                 const double* array, const int size, const char* name)
@@ -89,7 +90,7 @@ class Logger
     }
   }
 
-  // log scratch vector
+  // log scratch matrix
   KOKKOS_INLINE_FUNCTION
   void logMatrix(const member_type& team, const LogLevel level,
                  const ScratchMatView& matrix, const char* name) const
diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
index f07c9a89..bf803f11 100644
--- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
@@ -64,6 +64,14 @@ void find_sq_root_each(member_type team, ScratchVecView& array)
   });
 }
 
+/**
+ *
+ *@brief Finds the inverse of each element in an array
+ *
+ *@param team The team member
+ *@param array The scratch vector view
+ *
+ */
 KOKKOS_INLINE_FUNCTION
 void find_inverse_each(member_type team, ScratchVecView& array)
 {
@@ -78,6 +86,14 @@ void find_inverse_each(member_type team, ScratchVecView& array)
   });
 }
 
+/**
+ *@brief Evaluates the shrinkage factor in SVD (S/(S^2 + lambda)
+ *
+ *@param team The team member
+ *@param lambda The regularization parameter
+ *@param sigma The scratch vector view of singular values
+ *
+ */
 KOKKOS_INLINE_FUNCTION
 void calculate_shrinkage_factor(member_type team, double lambda,
                                 ScratchVecView& sigma)
@@ -87,6 +103,15 @@ void calculate_shrinkage_factor(member_type team, double lambda,
     sigma(i) /= (sigma(i) * sigma(i) + lambda);
   });
 }
+
+/**
+ *
+ *@brief Finds the transpose of the scratch matrix
+ *
+ *@param team The team member
+ *@param array The scratch matrix view
+ *
+ */
 KOKKOS_INLINE_FUNCTION
 ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix)
 {
@@ -103,6 +128,16 @@ ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix)
   return transMatrix;
 }
 
+/**
+ *
+ *@brief Fills the diagonal entries of a matrix
+ *
+ *@param team The team member
+ *@param diagonal_entries The scratch vector view of the entries thats to be
+ *filled in a matrix diagonal
+ *@param array The scratch matrix whose digonal entries to be filled
+ *
+ */
 KOKKOS_INLINE_FUNCTION
 void fill_diagonal(member_type team, const ScratchVecView& diagonal_entries,
                    ScratchMatView& matrix)
@@ -113,6 +148,24 @@ void fill_diagonal(member_type team, const ScratchVecView& diagonal_entries,
                        [=](int i) { matrix(i, i) = diagonal_entries(i); });
 }
 
+/**
+ * @brief Scales each row of the input matrix by corresponding diagonal values.
+ *
+ * This function performs **row scaling** of the matrix, meaning each row `i`
+ * of the matrix is multiplied by the scalar value `diagonal_entries(i)`.
+ *
+ * Mathematically, if the matrix is `A` and the diagonal scaling matrix is `D_A
+ * = diag(diagonal_entries)`, the operation performed is:
+ *
+ *     A_scaled = D_A * A
+ *
+ * where `D_A` is a diagonal matrix that scales rows of `A`.
+ *
+ * @param team Kokkos team member used for team-level parallelism
+ * @param diagonal_entries A vector of diagonal values used for scaling (should
+ * match or be smaller than the number of rows)
+ * @param matrix The 2D matrix whose rows will be scaled
+ */
 KOKKOS_INLINE_FUNCTION
 void eval_row_scaling(member_type team, ScratchVecView diagonal_entries,
                       ScratchMatView matrix)
@@ -136,6 +189,21 @@ void eval_row_scaling(member_type team, ScratchVecView diagonal_entries,
   });
 }
 
+/**
+ * @brief Scales the right-hand-side (RHS) vector using the given diagonal
+ * weights.
+ *
+ * This function performs element-wise scaling of the RHS vector. Each entry
+ * `rhs_values(i)` is multiplied by the corresponding entry
+ * `diagonal_entries(i)`.
+ *
+ * Mathematically:
+ *     rhs_scaled(i) = diagonal_entries(i) * rhs_values(i)
+ *
+ * @param team Kokkos team member used for team-level parallelism
+ * @param diagonal_entries A vector of scaling weights (same size as rhs_values)
+ * @param rhs_values The RHS vector to be scaled
+ */
 KOKKOS_INLINE_FUNCTION
 void eval_rhs_scaling(member_type team, ScratchVecView diagonal_entries,
                       ScratchVecView rhs_values)
@@ -156,6 +224,28 @@ void eval_rhs_scaling(member_type team, ScratchVecView diagonal_entries,
   });
 }
 
+/**
+ * @brief Scales the input matrix row-wise and writes the result into an
+ * adjusted matrix.
+ *
+ * This function first scales the rows of `matrixToScale` using the
+ * corresponding entries from `diagonal_entries` (i.e., performs D * A, where D
+ * is a diagonal matrix), and then copies the scaled values into
+ * `adjustedMatrix`.
+ *
+ * Preconditions:
+ * - The number of columns in `adjustedMatrix` must match the number of rows in
+ * `matrixToScale` (colB == rowA)
+ * - `diagonal_entries` must have size ≤ rowA (validated inside
+ * `eval_row_scaling`)
+ *
+ * Typical use case: used in evaluating SV in U^TSV for SVD solver
+ *
+ * @param team Kokkos team member used for team-level parallelism
+ * @param diagonal_entries A vector of scaling values for each row
+ * @param matrixToScale The matrix to be row-scaled (modified in place)
+ * @param adjustedMatrix The output matrix to store the scaled version
+ */
 KOKKOS_INLINE_FUNCTION
 void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries,
                       ScratchMatView& matrixToScale,
@@ -179,4 +269,5 @@ void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries,
 
 } // namespace detail
 } // namespace pcms
+
 #endif
diff --git a/src/pcms/interpolator/queue_visited.hpp b/src/pcms/interpolator/queue_visited.hpp
index f09d38af..5a366c8f 100644
--- a/src/pcms/interpolator/queue_visited.hpp
+++ b/src/pcms/interpolator/queue_visited.hpp
@@ -9,13 +9,11 @@
 #include <Omega_h_reduce.hpp>
 #define MAX_SIZE_QUEUE 500
 #define MAX_SIZE_TRACK 800
-using namespace std;
-using namespace Omega_h;
 
 class Queue
 {
 private:
-  LO queue_array[MAX_SIZE_QUEUE];
+  Omega_h::LO queue_array[MAX_SIZE_QUEUE];
   int first = 0, last = -1, count = 0;
 
 public:
@@ -44,7 +42,7 @@ class Queue
 class Track
 {
 private:
-  LO tracking_array[MAX_SIZE_TRACK];
+  Omega_h::LO tracking_array[MAX_SIZE_TRACK];
   int first = 0, last = -1, count = 0;
 
 public:

From ab7e764b30b06bde413f2cae1f38784076970b98 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 12 May 2025 09:54:23 -0400
Subject: [PATCH 71/73] update parameters

---
 src/pcms/interpolator/mls_interpolation.hpp   | 12 +++----
 .../interpolator/mls_interpolation_impl.hpp   |  2 --
 test/CMakeLists.txt                           |  1 -
 test/test_normalisation.cpp                   |  4 +--
 test/test_rbf_interp.cpp                      | 33 ++++++++++---------
 5 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp
index 2a490614..dfb387b1 100644
--- a/src/pcms/interpolator/mls_interpolation.hpp
+++ b/src/pcms/interpolator/mls_interpolation.hpp
@@ -77,12 +77,10 @@ enum class RadialBasisFunction : LO
  * `num_points * dim`.
  * - The result array length will match the number of target points.
  */
-Write<Real> mls_interpolation(const Omega_h::Reals source_values,
-                              const Omega_h::Reals source_coordinates,
-                              const Omega_h::Reals target_coordinates,
-                              const SupportResults& support,
-                              const Omega_h::LO& dim, const Omega_h::LO& degree,
-                              RadialBasisFunction bf, double lambda = 0,
-                              double tol = 1e-6, double decay_factor = 5.0);
+Omega_h::Write<Omega_h::Real> mls_interpolation(
+  const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates,
+  const Omega_h::Reals target_coordinates, const SupportResults& support,
+  const Omega_h::LO& dim, const Omega_h::LO& degree, RadialBasisFunction bf,
+  double lambda = 0, double tol = 1e-6, double decay_factor = 5.0);
 } // namespace pcms
 #endif
diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp
index 27a239fb..ad91047b 100644
--- a/src/pcms/interpolator/mls_interpolation_impl.hpp
+++ b/src/pcms/interpolator/mls_interpolation_impl.hpp
@@ -286,8 +286,6 @@ void scale_column_trans_matrix(const ScratchMatView& matrix,
   }
 }
 
-}
-
 /**
  * @brief Solves a regularized weighted linear system WAx = Wb using SVD
  * decomposition within a Kokkos parallel team.
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 5f9662cc..27b0a3c0 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -111,7 +111,6 @@ if(Catch2_FOUND)
           test_mls_basis.cpp
           test_rbf_interp.cpp
           test_normalisation.cpp
-		  test_spr_meshfields.cpp
 		  test_svd_serial.cpp)
   endif ()
   add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES})
diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp
index 4c6c38d9..ae45ad35 100644
--- a/test/test_normalisation.cpp
+++ b/test/test_normalisation.cpp
@@ -15,7 +15,7 @@ Omega_h::Reals fillFromMatrix(const Omega_h::Matrix<2, 10>& matrix)
 {
   int num_points = 10;
   Omega_h::Write<Omega_h::Real> array(20, 0, "array to store coordinates");
-  parallel_for(
+  Kokkos::parallel_for(
     "fills the array view from matrix", num_points, KOKKOS_LAMBDA(const int i) {
       int index = i * 2;
 
@@ -50,7 +50,7 @@ TEST_CASE("test_normalization_routine")
                                             {0.12, 4.29, 1.98},
                                             {1.24, 1.54, 4.2}};
 
-    LOs nsupports({6, 8, 4}, "number of supports");
+    Omega_h::LOs nsupports({6, 8, 4}, "number of supports");
     int dim = 3;
 
     auto total_coordinates = Omega_h::get_sum(nsupports) * dim;
diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp
index cdafea62..24ac9a18 100644
--- a/test/test_rbf_interp.cpp
+++ b/test/test_rbf_interp.cpp
@@ -31,8 +31,8 @@ double func(pcms::Coord& p, int degree)
   return -1;
 }
 
-void test(Mesh& mesh, Omega_h::Real cutoffDistance, int degree,
-          LO min_num_supports, Omega_h::Reals source_values,
+void test(Omega_h::Mesh& mesh, Omega_h::Real cutoffDistance, int degree,
+          Omega_h::LO min_num_supports, Omega_h::Reals source_values,
           Omega_h::Reals exact_target_values, Omega_h::Reals source_coordinates,
           Omega_h::Reals target_coordinates)
 {
@@ -77,10 +77,11 @@ void test(Mesh& mesh, Omega_h::Real cutoffDistance, int degree,
 TEST_CASE("test_mls_interpolation")
 {
 
-  auto lib = Library{};
+  auto lib = Omega_h::Library{};
   auto world = lib.world();
   auto rank = lib.world()->rank();
-  auto mesh = build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false);
+  auto mesh =
+    Omega_h::build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false);
 
   Omega_h::Real cutoffDistance = 0.3;
   cutoffDistance = cutoffDistance * cutoffDistance;
@@ -96,15 +97,15 @@ TEST_CASE("test_mls_interpolation")
   Omega_h::Write<Omega_h::Real> source_coordinates(
     dim * nfaces, 0, "stores coordinates of cell centroid of each tri element");
 
-  const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b;
+  const auto& faces2nodes = mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b;
 
   Kokkos::parallel_for(
     "calculate the centroid in each tri element", nfaces,
-    OMEGA_H_LAMBDA(const LO id) {
-      const auto current_el_verts = gather_verts<3>(faces2nodes, id);
+    OMEGA_H_LAMBDA(const Omega_h::LO id) {
+      const auto current_el_verts = Omega_h::gather_verts<3>(faces2nodes, id);
       const Omega_h::Few<Omega_h::Vector<2>, 3> current_el_vert_coords =
-        gather_vectors<3, 2>(target_coordinates, current_el_verts);
-      auto centroid = average(current_el_vert_coords);
+        Omega_h::gather_vectors<3, 2>(target_coordinates, current_el_verts);
+      auto centroid = Omega_h::average(current_el_vert_coords);
       int index = 2 * id;
       source_coordinates[index] = centroid[0];
       source_coordinates[index + 1] = centroid[1];
@@ -133,7 +134,7 @@ TEST_CASE("test_mls_interpolation")
   {
     std::cout << "-------starting test: d0p1------------" << "\n";
     int degree = 1;
-    LO min_num_supports = 10;
+    Omega_h::LO min_num_supports = 10;
 
     Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
                                                 "exact target values");
@@ -165,7 +166,7 @@ TEST_CASE("test_mls_interpolation")
 
     std::cout << "--------starting test: d1p1---------" << "\n";
     int degree = 1;
-    LO min_num_supports = 10;
+    Omega_h::LO min_num_supports = 10;
 
     Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
                                                 "exact target values");
@@ -197,7 +198,7 @@ TEST_CASE("test_mls_interpolation")
 
     std::cout << "-------------starting test: d0p2------------" << "\n";
     int degree = 2;
-    LO min_num_supports = 16;
+    Omega_h::LO min_num_supports = 16;
 
     Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
                                                 "exact target values");
@@ -229,7 +230,7 @@ TEST_CASE("test_mls_interpolation")
 
     std::cout << "----------------satrting test: d1p2--------------" << "\n";
     int degree = 2;
-    LO min_num_supports = 16;
+    Omega_h::LO min_num_supports = 16;
 
     Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
                                                 "exact target values");
@@ -261,7 +262,7 @@ TEST_CASE("test_mls_interpolation")
 
     std::cout << "-------------starting test: d2p2--------------------" << "\n";
     int degree = 2;
-    LO min_num_supports = 16;
+    Omega_h::LO min_num_supports = 16;
 
     Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
                                                 "exact target values");
@@ -293,7 +294,7 @@ TEST_CASE("test_mls_interpolation")
 
     std::cout << "-------------starting test: d2p3--------------------" << "\n";
     int degree = 3;
-    LO min_num_supports = 20;
+    Omega_h::LO min_num_supports = 20;
 
     Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
                                                 "exact target values");
@@ -325,7 +326,7 @@ TEST_CASE("test_mls_interpolation")
 
     std::cout << "-------------starting test: d3p3--------------------" << "\n";
     int degree = 3;
-    LO min_num_supports = 20;
+    Omega_h::LO min_num_supports = 20;
 
     Omega_h::Write<Omega_h::Real> source_values(nfaces, 0,
                                                 "exact target values");

From 88d49d267d72dde4c290c56c9b7f571eb3269c81 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 12 May 2025 09:55:02 -0400
Subject: [PATCH 72/73] update parameter

---
 src/pcms/interpolator/pcms_interpolator_view_utils.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
index bf803f11..1c52752f 100644
--- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp
@@ -1,7 +1,7 @@
 #ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP
 #define PCMS_INTERPOLATOR_ARRAY_OPS_HPP
 
-#include "pcms_interpolator_aliases.hpp"
+#include <pcms/interpolator/pcms_interpolator_aliases.hpp>
 #include <Omega_h_array_ops.hpp>
 #include <cmath>
 #include <Omega_h_fail.hpp>

From 2e0de9d3d7b365b5547493a972316f547303a671 Mon Sep 17 00:00:00 2001
From: abhiyanpaudel <abhiyanp93@gmail.com>
Date: Mon, 12 May 2025 10:36:15 -0400
Subject: [PATCH 73/73] replace printf with Kokkos::printf

---
 src/pcms/interpolator/mls_interpolation.cpp   | 17 +++---
 .../interpolator/pcms_interpolator_logger.hpp | 53 ++++++++++---------
 2 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp
index 51005fc2..00140f8e 100644
--- a/src/pcms/interpolator/mls_interpolation.cpp
+++ b/src/pcms/interpolator/mls_interpolation.cpp
@@ -1,5 +1,6 @@
+#include <pcms/interpolator/mls_interpolation.hpp>
+#include <Kokkos_MathematicalFunctions.hpp>
 #include <cmath>
-#include "mls_interpolation.hpp"
 
 namespace pcms
 {
@@ -29,8 +30,8 @@ struct RBF_GAUSSIAN
                          "but the value is %.16f\n",
                          r_sq);
 
-    double r = sqrt(r_sq);
-    double rho = sqrt(rho_sq);
+    double r = Kokkos::sqrt(r_sq);
+    double rho = Kokkos::sqrt(rho_sq);
     double ratio = r / rho;
     double limit = 1 - ratio;
 
@@ -38,7 +39,7 @@ struct RBF_GAUSSIAN
       phi = 0;
 
     } else {
-      phi = exp(-a * a * r * r);
+      phi = Kokkos::exp(-a * a * r * r);
     }
 
     return phi;
@@ -53,11 +54,11 @@ struct RBF_C4
   double operator()(double r_sq, double rho_sq) const
   {
     double phi;
-    double r = sqrt(r_sq);
+    double r = Kokkos::sqrt(r_sq);
     OMEGA_H_CHECK_PRINTF(
       rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n",
       rho_sq);
-    double rho = sqrt(rho_sq);
+    double rho = Kokkos::sqrt(rho_sq);
     double ratio = r / rho;
     double limit = 1 - ratio;
     if (limit < 0) {
@@ -85,11 +86,11 @@ struct RBF_CONST
   double operator()(double r_sq, double rho_sq) const
   {
     double phi;
-    double r = sqrt(r_sq);
+    double r = Kokkos::sqrt(r_sq);
     OMEGA_H_CHECK_PRINTF(
       rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n",
       rho_sq);
-    double rho = sqrt(rho_sq);
+    double rho = Kokkos::sqrt(rho_sq);
     double ratio = r / rho;
     double limit = 1 - ratio;
     if (limit < 0) {
diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp
index 1a43da6d..60c7310f 100644
--- a/src/pcms/interpolator/pcms_interpolator_logger.hpp
+++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp
@@ -1,7 +1,8 @@
 #ifndef PCMS_INTERPOLATOR_LOGGER_HPP
 #define PCMS_INTERPOLATOR_LOGGER_HPP
 
-#include "pcms_interpolator_aliases.hpp"
+#include <pcms/interpolator/pcms_interpolator_aliases.hpp>
+#include <Kokkos_Printf.hpp>
 
 namespace pcms
 {
@@ -32,10 +33,10 @@ class Logger
   {
     if (team.league_rank() == selected_league_rank_) {
       Kokkos::single(Kokkos::PerTeam(team), [&]() {
-        printf("[%s] (League %d) ", logLevelToString(level),
-               team.league_rank());
-        printf(fmt, args...);
-        printf("\n");
+        Kokkos::printf("[%s] (League %d) ", logLevelToString(level),
+                       team.league_rank());
+        Kokkos::printf(fmt, args...);
+        Kokkos::printf("\n");
       });
     }
   }
@@ -47,10 +48,10 @@ class Logger
 
     if (team.league_rank() == selected_league_rank_) {
       Kokkos::single(Kokkos::PerTeam(team), [&]() {
-        printf("[%s] (League %d) %s: \n", logLevelToString(level),
-               team.league_rank(), name);
-        printf("(%12.6f, %12.6f)", p.x, p.y);
-        printf("\n");
+        Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level),
+                       team.league_rank(), name);
+        Kokkos::printf("(%12.6f, %12.6f)", p.x, p.y);
+        Kokkos::printf("\n");
       });
     }
   }
@@ -63,13 +64,13 @@ class Logger
 
     if (team.league_rank() == selected_league_rank_) {
       Kokkos::single(Kokkos::PerTeam(team), [&]() {
-        printf("[%s] (League %d) %s: \n", logLevelToString(level),
-               team.league_rank(), name);
+        Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level),
+                       team.league_rank(), name);
 
         for (int i = 0; i < size; ++i) {
-          printf("%12.6f\n", array[i]);
+          Kokkos::printf("%12.6f\n", array[i]);
         }
-        printf("\n");
+        Kokkos::printf("\n");
       });
     }
   }
@@ -80,12 +81,12 @@ class Logger
   {
     if (team.league_rank() == selected_league_rank_) {
       Kokkos::single(Kokkos::PerTeam(team), [&]() {
-        printf("[%s] (League %d) %s: \n", logLevelToString(level),
-               team.league_rank(), name);
+        Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level),
+                       team.league_rank(), name);
         for (int i = 0; i < vector.size(); ++i) {
-          printf("%12.6f\n", vector(i));
+          Kokkos::printf("%12.6f\n", vector(i));
         }
-        printf("\n");
+        Kokkos::printf("\n");
       });
     }
   }
@@ -97,16 +98,16 @@ class Logger
   {
     if (team.league_rank() == selected_league_rank_) {
       Kokkos::single(Kokkos::PerTeam(team), [&]() {
-        printf("[%s] (League %d) %s: \n", logLevelToString(level),
-               team.league_rank(), name);
+        Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level),
+                       team.league_rank(), name);
         for (int i = 0; i < matrix.extent(0); ++i) {
           for (int j = 0; j < matrix.extent(1); ++j) {
-            printf("%20.8f", matrix(i, j));
+            Kokkos::printf("%20.8f", matrix(i, j));
           }
-          printf("\n");
+          Kokkos::printf("\n");
         }
 
-        printf("\n");
+        Kokkos::printf("\n");
       });
     }
   }
@@ -118,10 +119,10 @@ class Logger
   {
     if (team.league_rank() == selected_league_rank_) {
       Kokkos::single(Kokkos::PerTeam(team), [&]() {
-        printf("[%s] (League %d) %s: \n", logLevelToString(level),
-               team.league_rank(), name);
-        printf("%12.6f", value);
-        printf("\n");
+        Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level),
+                       team.league_rank(), name);
+        Kokkos::printf("%12.6f", value);
+        Kokkos::printf("\n");
       });
     }
   }