From 4cc237c9b9940810c4c7921b7e532bb6fbef192c Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sat, 1 Mar 2025 11:43:19 -0500 Subject: [PATCH 01/73] working code --- src/pcms/interpolator/mls_interpolation.cpp | 52 +++++- .../interpolator/mls_interpolation_impl.hpp | 172 ++++++++++++++---- .../pcms_interpolator_aliases.hpp | 2 +- test/CMakeLists.txt | 3 +- test/test_rbf_interp.cpp | 24 ++- 5 files changed, 209 insertions(+), 44 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 113986a1..114aeee0 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -11,12 +11,12 @@ struct RBF_GAUSSIAN double operator()(double r_sq, double rho_sq) const { double phi; - OMEGA_H_CHECK_PRINTF(rho_sq > 0, + OMEGA_H_CHECK_PRINTF(rho_sq >= 0, "ERROR: square of cutoff distance should always be " "positive but the value is %.16f\n", rho_sq); - OMEGA_H_CHECK_PRINTF(r_sq > 0, + OMEGA_H_CHECK_PRINTF(r_sq >= 0, "ERROR: square of distance should always be positive " "but the value is %.16f\n", r_sq); @@ -213,5 +213,53 @@ int calculate_scratch_shared_size(const SupportResults& support, return shared_size; } + +Reals min_max_normalization(RealConstDefaultScalarArrayView coordinates, + int dim = 2) +{ + int num_points = coordinates.size() / dim; + + int coords_size = coordinates.size(); + + Write x_coordinates(num_points, 0, "x coordinates"); + + Write y_coordinates(num_points, 0, "y coordinates"); + + parallel_for( + "separates x and y coordinates", num_points, KOKKOS_LAMBDA(const int id) { + int index = id * dim; + + x_coordinates[id] = coordinates[index]; + y_coordinates[id] = coordinates[index + 1]; + }); + + HostRead host_x(read(x_coordinates)); + HostRead host_y(read(y_coordinates)); + + const auto min_x = Omega_h::get_min(read(x_coordinates)); + const auto min_y = Omega_h::get_min(read(y_coordinates)); + + const auto max_x = Omega_h::get_max(read(x_coordinates)); + const auto max_y = Omega_h::get_max(read(y_coordinates)); + + printf(" [min_x, max_x] : [%12.6f , %12.6f]\n", min_x, max_x); + printf(" [min_y, max_y] : [%12.6f , %12.6f]\n", min_y, max_y); + const auto del_x = max_x - min_x; + const auto del_y = max_y - min_y; + + printf(" [delx, dely] : [%12.6f , %12.6f]\n", del_x, del_y); + Write normalized_coordinates(coords_size, 0, + "stores scaled coordinates"); + + parallel_for( + "scale coordinates", num_points, KOKKOS_LAMBDA(const int id) { + int index = id * dim; + normalized_coordinates[index] = (x_coordinates[id] - min_x) / del_x; + normalized_coordinates[index + 1] = (y_coordinates[id] - min_y) / del_y; + }); + + return read(normalized_coordinates); +} + } // namespace detail } // namespace pcms diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 5a79f64a..7bfab251 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -84,7 +84,21 @@ int calculate_basis_vector_size(const IntHostMatView& array); * @return The scratch size */ int calculate_scratch_shared_size(const SupportResults& support, - const int nvertices_target, int basis_size); + const int ntargets, int basis_size); +/** + * + * @brief Performs minmax normalization + * + * This function takes coordinates and dimension + * and outputs the normalized coordinates + * + * @param coordinates The Coordinates + * @param dim The Dimension + * + * @return normalized coordinates The normlaised coordinates + */ +Reals min_max_normalization(RealConstDefaultScalarArrayView coordinates, + int dim); /** * @brief Evaluates the polynomial basis @@ -131,6 +145,49 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, } } +/** + * @Brief Normalizes the support coordinates about the given target point + * + * This function takes the pivot point Coord object + * and support coordinates as input and gives normalized coordinates + * + * @param[in] team The team member + * @param[in, out] pivot The Coord object that stores the coordinate of the + *target pivot + * @param[in,out] support_coordinates The orginal coordinates when in, when out + * normalized coordinates + ** + */ +KOKKOS_INLINE_FUNCTION +void normalize_coordinates(member_type team, Coord& pivot, + ScratchMatView& support_coordinates) +{ + int nsupports = support_coordinates.extent(0); + int dim = support_coordinates.extent(1); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), [=](int i) { + Real pivot_point[MAX_DIM]; + + pivot_point[0] = pivot.x; + pivot_point[1] = pivot.y; + + if (dim == 3) { + pivot_point[2] = pivot.z; + } + + for (int j = 0; j < dim; ++j) { + support_coordinates(i, j) -= pivot_point[j]; + } + }); + + pivot.x = 0; + pivot.y = 0; + + if (dim == 3) { + pivot.z = 0; + } +} + /** * @brief Creates a vandermonde matrix * @@ -427,8 +484,26 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, static_assert(!std::is_pointer_v, "function pointer will fail in GPU execution context"); - const auto nvertices_source = source_coordinates.size() / dim; - const auto nvertices_target = target_coordinates.size() / dim; + int nsources = source_coordinates.size() / dim; + + OMEGA_H_CHECK_PRINTF( + source_values.size() == nsources, + "[ERROR] The size of the source values and source coordinates is not same. " + "The current sizes are :\n" + "source_values_size = %d, source_coordinates_size = %d\n", + source_values.size(), nsources); + + const auto ntargets = target_coordinates.size() / dim; + + OMEGA_H_CHECK_PRINTF( + approx_target_values.size() == ntargets, + "[ERROR] The size of the approx target values and the number of targets is " + "not same. The current numbers are :\n" + "approx_target_values = %d, ntargets = %d\n", + approx_target_values.size(), ntargets); + + // auto source_coordinates = min_max_normalization(src_coordinates, dim); + // auto target_coordinates = min_max_normalization(tgt_coordinates, dim); IntHostMatView host_slice_length( "stores slice length of polynomial basis in host", degree, dim); @@ -447,9 +522,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, Kokkos::deep_copy(slice_length, slice_length_hd); int shared_size = - calculate_scratch_shared_size(support, nvertices_target, basis_size); + calculate_scratch_shared_size(support, ntargets, basis_size); - team_policy tp(nvertices_target, Kokkos::AUTO); + team_policy tp(ntargets, Kokkos::AUTO); int scratch_size = tp.scratch_size_max(0); @@ -464,25 +539,16 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, int end_ptr = support.supports_ptr[league_rank + 1]; int nsupports = end_ptr - start_ptr; + // local_source_point stores the coordinates of source supports of a given + // target ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); - int count = -1; - for (int j = start_ptr; j < end_ptr; ++j) { - count++; - auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - if (dim == 3) { - local_source_points(count, 2) = source_coordinates[index * dim + 2]; - } - } - - // vondermonde matrix P from the vectors of basis vector of supports - ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - basis_size); // rbf function values of source supports Phi(n,n) ScratchVecView phi_vector(team.team_scratch(0), nsupports); + // vondermonde matrix P from the vectors of basis vector of supports + ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + basis_size); // stores known vector (b) ScratchVecView support_values(team.team_scratch(0), nsupports); @@ -490,11 +556,24 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); // Initialize the scratch matrices and vectors + fill(0.0, team, local_source_points); fill(0.0, team, vandermonde_matrix); fill(0.0, team, phi_vector); fill(0.0, team, support_values); fill(0.0, team, target_basis_vector); + // storing the coords of local supports + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + if (dim == 3) { + local_source_points(count, 2) = source_coordinates[index * dim + 2]; + } + } + // evaluates the basis vector of a given target point Coord target_point; target_point.x = target_coordinates[league_rank * dim]; @@ -503,6 +582,40 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, if (dim == 3) { target_point.z = target_coordinates[league_rank * dim + 2]; } + + /** phi(nsupports) is the array of rbf functions evaluated at the + * source supports In the actual implementation, Phi(nsupports, nsupports) + * is the diagonal matrix & each diagonal element is the phi evaluated at + * each source points + * + * step 1: evaluate the phi vector + */ + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + compute_phi_vector(target_point, local_source_points, j, + support.radii2[league_rank], rbf_func, phi_vector); + }); + + team.team_barrier(); + + /** + * + * the local_source_points is of the type ScratchMatView with + * coordinates information; row is number of local supports + * & column is dim + * + * step 2: normalize local source supports and target point + */ + + // normalize_coordinates(team, target_point, local_source_points); + + // team.team_barrier(); + /** + * + * this can evaluate monomial basis vector for any degree of polynomial + * step 3: call basis vector evaluation function (eval_basis_vector); + */ eval_basis_vector(slice_length, target_point, target_basis_vector); /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is @@ -523,20 +636,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, "ERROR: radius2 has to be positive but found to be %.16f\n", support.radii2[league_rank]); - /** phi(nsupports) is the array of rbf functions evaluated at the - * source supports In the actual implementation, Phi(nsupports, nsupports) - * is the diagonal matrix & each diagonal element is the phi evaluated at - * each source points - */ - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - compute_phi_vector(target_point, local_source_points, j, - support.radii2[league_rank], rbf_func, phi_vector); - }); - - team.team_barrier(); - /** support_values(nsupports) (or known rhs vector b) is the vector of the * quantity that we want interpolate */ @@ -595,9 +694,9 @@ Write mls_interpolation(const Reals source_values, const SupportResults& support, const LO& dim, const LO& degree, Func rbf_func) { - const auto nvertices_source = source_coordinates.size() / dim; + const auto nsources = source_coordinates.size() / dim; - const auto nvertices_target = target_coordinates.size() / dim; + const auto ntargets = target_coordinates.size() / dim; RealConstDefaultScalarArrayView source_values_array_view( source_values.data(), source_values.size()); @@ -611,8 +710,7 @@ Write mls_interpolation(const Reals source_values, RealDefaultScalarArrayView radii2_array_view(support.radii2.data(), support.radii2.size()); - Write interpolated_values(nvertices_target, 0, - "approximated target values"); + Write interpolated_values(ntargets, 0, "approximated target values"); RealDefaultScalarArrayView interpolated_values_array_view( interpolated_values.data(), interpolated_values.size()); diff --git a/src/pcms/interpolator/pcms_interpolator_aliases.hpp b/src/pcms/interpolator/pcms_interpolator_aliases.hpp index 06d83640..76b1bcd6 100644 --- a/src/pcms/interpolator/pcms_interpolator_aliases.hpp +++ b/src/pcms/interpolator/pcms_interpolator_aliases.hpp @@ -3,7 +3,7 @@ #include #include "../arrays.h" - +#include namespace pcms { diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index a23262b2..c06cd337 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -111,6 +111,7 @@ if(Catch2_FOUND) test_mls_basis.cpp test_rbf_interp.cpp test_linear_solver.cpp + test_normalisation.cpp ) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) @@ -159,4 +160,4 @@ if(PCMS_ENABLE_Fortran) endif() add_executable(xgc_n0_server xgc_n0_coupling_server.cpp) -target_link_libraries(xgc_n0_server PUBLIC pcms::core test_support) \ No newline at end of file +target_link_libraries(xgc_n0_server PUBLIC pcms::core test_support) diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index 31413597..5040fa5e 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -63,7 +63,6 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, int m = exact_target_values.size(); int n = approx_target_values.size(); - REQUIRE(m == n); for (size_t i = 0; i < m; ++i) { @@ -74,7 +73,7 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, } } // Test cases for centroid to node mapping using MLS -TEST_CASE("mls_interp_test") +TEST_CASE("test_mls_interpolation") { auto lib = Library{}; @@ -131,7 +130,7 @@ TEST_CASE("mls_interp_test") SECTION("test interpolation degree 1, function degree 0") { - + std::cout << "-------starting test: d0p1------------" << "\n"; int degree = 1; LO min_num_supports = 10; @@ -152,11 +151,15 @@ TEST_CASE("mls_interp_test") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + + std::cout << " The test for d=0, p=1, passed " << "\n"; + std::cout << "----------finishing test: d0p1-------" << "\n"; } SECTION("test interpolation degree 1, function degree 1") { + std::cout << "--------starting test: d1p1---------" << "\n"; int degree = 1; LO min_num_supports = 10; @@ -177,11 +180,13 @@ TEST_CASE("mls_interp_test") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + std::cout << "------------finishing test: d1p1--------" << "\n"; } SECTION("test interpo degree 2 poly degree 0") { + std::cout << "-------------starting test: d0p2------------" << "\n"; int degree = 2; LO min_num_supports = 16; @@ -202,11 +207,13 @@ TEST_CASE("mls_interp_test") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + std::cout << "-------------finishing test: d0p2------------" << "\n"; } SECTION("test interpolation degree 2 poly degree 1") { + std::cout << "----------------satrting test: d1p2--------------" << "\n"; int degree = 2; LO min_num_supports = 16; @@ -227,11 +234,13 @@ TEST_CASE("mls_interp_test") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + std::cout << "----------------finishing test: d1p2--------------" << "\n"; } SECTION("test interpolation degree 2, function degree 2") { + std::cout << "-------------starting test: d2p2--------------------" << "\n"; int degree = 2; LO min_num_supports = 16; @@ -252,11 +261,14 @@ TEST_CASE("mls_interp_test") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + std::cout << "-------------finishing test: d2p2--------------------" + << "\n"; } SECTION("test interpolation degree 3, function degree 2") { + std::cout << "-------------starting test: d2p3--------------------" << "\n"; int degree = 3; LO min_num_supports = 20; @@ -277,11 +289,14 @@ TEST_CASE("mls_interp_test") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + std::cout << "-------------finishing test: d2p3--------------------" + << "\n"; } SECTION("test interpolation degree 3, function degree 3") { + std::cout << "-------------starting test: d3p3--------------------" << "\n"; int degree = 3; LO min_num_supports = 20; @@ -302,5 +317,8 @@ TEST_CASE("mls_interp_test") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + + std::cout << "-------------finishing test: d3p3--------------------" + << "\n"; } } From d5b2cc0d6c5398a203847da3df70b0ec15d32cba Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sat, 1 Mar 2025 11:44:38 -0500 Subject: [PATCH 02/73] add normalization routine --- test/test_normalisation.cpp | 202 ++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 test/test_normalisation.cpp diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp new file mode 100644 index 00000000..41d8a442 --- /dev/null +++ b/test/test_normalisation.cpp @@ -0,0 +1,202 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; +using namespace Omega_h; +using namespace pcms; + +Reals fillFromMatrix(const Matrix<2, 10>& matrix) +{ + int num_points = 10; + Write array(20, 0, "array to store coordinates"); + parallel_for( + "fills the array view from matrix", num_points, KOKKOS_LAMBDA(const int i) { + int index = i * 2; + + array[index] = matrix[i][0]; + array[index + 1] = matrix[i][1]; + }); + return read(array); +} + +TEST_CASE("test_normalization_routine") +{ + + SECTION("check min max omega_h function") + { + Reals vector({0.45, 0.12, -2.04, 1.52, -0.22, 0.96, 5.6}); + + const auto min_value = get_min(vector); + const auto max_value = get_max(vector); + + printf("min value : %f , max_value : %f", min_value, max_value); + REQUIRE(min_value == -2.04); + REQUIRE(max_value == 5.6); + + } // end SECTION + + SECTION("test_minmax_normalization") + { + + printf("-------------the test for min_max starts-----------\n"); + + Real tolerance = 1E-5; + int num_points = 10; + + Omega_h::Matrix<2, 10> matrix{{0.45, 3.18}, + {0.12, -1.2}, + {-2.04, -1.45}, + { + 1.52, + -0.98, + }, + {-0.22, 1.45}, + {0.96, 3.62}, + {-0.26, -1.62}, + {0.82, 4.22}, + {5.6, 3.62}, + {1.2, 1.2}}; + + auto coordinates = fillFromMatrix(matrix); + + RealConstDefaultScalarArrayView coordinates_scalar_array_view( + coordinates.data(), 20); + + auto results = min_max_normalization(coordinates_scalar_array_view, 2); + + printf("results after normalization"); + HostRead host_results(results); + + double expected_results[10][2] = { + {0.325916, 0.821918}, {0.282723, 0.071918}, {0.000000, 0.029110}, + {0.465969, 0.109589}, {0.238220, 0.525685}, {0.392670, 0.897260}, + {0.232984, 0.000000}, {0.374346, 1.000000}, {1.000000, 0.897260}, + {0.424084, 0.482877}}; + + printf("\n"); + + for (int i = 0; i < 20; ++i) { + printf("%12.6f\n", host_results[i]); + } + + for (int i = 0; i < 10; ++i) { + + int index = i * 2; + + CAPTURE(i, host_results[index], expected_results[i][0], tolerance); + CHECK_THAT(host_results[index], + Catch::Matchers::WithinAbs(expected_results[i][0], tolerance)); + + CAPTURE(i, host_results[index + 1], expected_results[i][1], tolerance); + CHECK_THAT(host_results[index + 1], + Catch::Matchers::WithinAbs(expected_results[i][1], tolerance)); + } + + } // end SECTION + + SECTION("test_normalization_relative_distance") + { + Real tolerance = 1E-5; + int nvertices_target = 3; + + Omega_h::Matrix<3, 3> target_coordinates{ + {0.5, 2.0, 1.4}, {0.1, 1.2, 0.3}, {0.9, 2.8, 2.2}}; + + Omega_h::Matrix<3, 6> supports_target_1{ + {0.45, 3.18, 0.38}, {0.12, 1.2, 1.0}, {2.04, 1.45, 3.2}, + {1.52, 0.98, 3.2}, {0.22, 1.45, 3.2}, {0.96, 3.62, 2.2}}; + + Omega_h::Matrix<3, 8> supports_target_2{ + {0.88, 0.98, -0.8}, {1.2, 0.23, -1.21}, {-1.23, 2.85, 1.45}, + {0.63, 0.98, 0.2}, {-0.5, 1.45, 0.33}, {1.4, 2.61, -0.98}, + {0.88, 1.72, 0.38}, {1.2, 0.50, 2.1}}; + + Omega_h::Matrix<3, 4> supports_target_3{{3.15, 3.12, 2.91}, + {2.24, 1.12, 1.12}, + {0.12, 4.29, 1.98}, + {1.24, 1.54, 4.2}}; + + LOs nsupports({6, 8, 4}, "number of supports"); + int dim = 3; + + auto total_coordinates = Omega_h::get_sum(nsupports) * dim; + + REQUIRE(total_coordinates == 54); + Write normalized_support_coordinates( + total_coordinates, 0, "coordinates after normalization"); + Write all_support_coordinates(total_coordinates, 0, + "support coordinates all"); + Write all_target_coordinates(total_coordinates, 0, + "target coordinates all"); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for( + "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team) { + int league_rank = team.league_rank(); + + int num_supports = nsupports[league_rank]; + ScratchMatView local_supports(team.team_scratch(0), num_supports, dim); + detail::fill(0, team, local_supports); + + Coord target_point; + target_point.x = target_coordinates[league_rank][0]; + target_point.y = target_coordinates[league_rank][1]; + target_point.z = target_coordinates[league_rank][2]; + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, num_supports), [=](int i) { + for (int j = 0; j < dim; ++j) { + int index = league_rank * num_supports * dim + i * dim + j; + + all_target_coordinates[index] = + target_coordinates[league_rank][j]; + if (league_rank == 0) { + local_supports(i, j) = supports_target_1[i][j]; + all_support_coordinates[index] = supports_target_1[i][j]; + } else if (league_rank == 1) { + local_supports(i, j) = supports_target_2[i][j]; + all_support_coordinates[index] = supports_target_2[i][j]; + } else { + local_supports(i, j) = supports_target_3[i][j]; + all_support_coordinates[index] = supports_target_3[i][j]; + } + } + }); + + team.team_barrier(); + detail::normalize_coordinates(team, target_point, local_supports); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, num_supports), [=](int i) { + for (int j = 0; j < dim; ++j) { + int index = league_rank * num_supports * dim + i * dim + j; + normalized_support_coordinates[index] = local_supports(i, j); + } + }); + }); + + auto host_all_support_coordinates = + HostRead(read(all_support_coordinates)); + auto host_normalized_support_coordinates = + HostRead(read(normalized_support_coordinates)); + auto host_all_target_coordinates = + HostRead(read(all_target_coordinates)); + for (int i = 0; i < total_coordinates; ++i) { + auto result = + host_all_support_coordinates[i] - host_all_target_coordinates[i]; + CHECK_THAT(host_normalized_support_coordinates[i], + Catch::Matchers::WithinAbs(result, tolerance)); + } + } // end SECTION + +} // end TESTCASE From c4b06bc899281bf7175cd3ad59b796be87d713ce Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sun, 2 Mar 2025 08:30:57 -0500 Subject: [PATCH 03/73] add min_max normalization routine --- src/pcms/interpolator/mls_interpolation.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 114aeee0..6596a26e 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -214,8 +214,7 @@ int calculate_scratch_shared_size(const SupportResults& support, return shared_size; } -Reals min_max_normalization(RealConstDefaultScalarArrayView coordinates, - int dim = 2) +Reals min_max_normalization(Reals& coordinates, int dim = 2) { int num_points = coordinates.size() / dim; From 0473cbf49ffc04597c8f27c0905bb86e36614224 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sun, 2 Mar 2025 08:31:59 -0500 Subject: [PATCH 04/73] add relative distance normalization --- .../interpolator/mls_interpolation_impl.hpp | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 7bfab251..2986d4e9 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -97,8 +97,7 @@ int calculate_scratch_shared_size(const SupportResults& support, * * @return normalized coordinates The normlaised coordinates */ -Reals min_max_normalization(RealConstDefaultScalarArrayView coordinates, - int dim); +Reals min_max_normalization(Reals& coordinates, int dim); /** * @brief Evaluates the polynomial basis @@ -159,8 +158,8 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, ** */ KOKKOS_INLINE_FUNCTION -void normalize_coordinates(member_type team, Coord& pivot, - ScratchMatView& support_coordinates) +void normalize_supports(member_type team, Coord& pivot, + ScratchMatView& support_coordinates) { int nsupports = support_coordinates.extent(0); int dim = support_coordinates.extent(1); @@ -588,7 +587,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, * is the diagonal matrix & each diagonal element is the phi evaluated at * each source points * - * step 1: evaluate the phi vector + * step 1: evaluate the phi vector with the original dimension */ Kokkos::parallel_for( @@ -608,9 +607,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, * step 2: normalize local source supports and target point */ - // normalize_coordinates(team, target_point, local_source_points); + normalize_supports(team, target_point, local_source_points); - // team.team_barrier(); + team.team_barrier(); /** * * this can evaluate monomial basis vector for any degree of polynomial @@ -621,8 +620,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is * created with the basis vector of source supports stacking on top of * each other + * + * step 4: create vandermonde matrix */ - Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { create_vandermonde_matrix(local_source_points, j, slice_length, @@ -638,6 +638,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, /** support_values(nsupports) (or known rhs vector b) is the vector of the * quantity that we want interpolate + * + * + * step 4: find local supports function values */ Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { @@ -649,6 +652,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); + // convert normal equation to Ax = b where A is a square matrix auto result = convert_normal_equation(vandermonde_matrix, phi_vector, support_values, team); From 3b951b5c41c0fd81a3c5783fb5972fff59b77e52 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sun, 2 Mar 2025 08:32:37 -0500 Subject: [PATCH 05/73] add test cases for normalization routine --- test/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c06cd337..fc6ea135 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -112,6 +112,7 @@ if(Catch2_FOUND) test_rbf_interp.cpp test_linear_solver.cpp test_normalisation.cpp + test_spr_meshfields.cpp ) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) From 87ca3a190c2321cb4788d2afee5fac5c976f37db Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sun, 2 Mar 2025 08:33:26 -0500 Subject: [PATCH 06/73] add test for normalization --- test/test_normalisation.cpp | 69 +++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 34 deletions(-) diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp index 41d8a442..31e5727a 100644 --- a/test/test_normalisation.cpp +++ b/test/test_normalisation.cpp @@ -31,19 +31,6 @@ Reals fillFromMatrix(const Matrix<2, 10>& matrix) TEST_CASE("test_normalization_routine") { - SECTION("check min max omega_h function") - { - Reals vector({0.45, 0.12, -2.04, 1.52, -0.22, 0.96, 5.6}); - - const auto min_value = get_min(vector); - const auto max_value = get_max(vector); - - printf("min value : %f , max_value : %f", min_value, max_value); - REQUIRE(min_value == -2.04); - REQUIRE(max_value == 5.6); - - } // end SECTION - SECTION("test_minmax_normalization") { @@ -52,26 +39,14 @@ TEST_CASE("test_normalization_routine") Real tolerance = 1E-5; int num_points = 10; - Omega_h::Matrix<2, 10> matrix{{0.45, 3.18}, - {0.12, -1.2}, - {-2.04, -1.45}, - { - 1.52, - -0.98, - }, - {-0.22, 1.45}, - {0.96, 3.62}, - {-0.26, -1.62}, - {0.82, 4.22}, - {5.6, 3.62}, + Omega_h::Matrix<2, 10> matrix{{0.45, 3.18}, {0.12, -1.2}, {-2.04, -1.45}, + {1.52, -0.98}, {-0.22, 1.45}, {0.96, 3.62}, + {-0.26, -1.62}, {0.82, 4.22}, {5.6, 3.62}, {1.2, 1.2}}; auto coordinates = fillFromMatrix(matrix); - RealConstDefaultScalarArrayView coordinates_scalar_array_view( - coordinates.data(), 20); - - auto results = min_max_normalization(coordinates_scalar_array_view, 2); + auto results = pcms::detail::min_max_normalization(coordinates, 2); printf("results after normalization"); HostRead host_results(results); @@ -99,6 +74,8 @@ TEST_CASE("test_normalization_routine") CAPTURE(i, host_results[index + 1], expected_results[i][1], tolerance); CHECK_THAT(host_results[index + 1], Catch::Matchers::WithinAbs(expected_results[i][1], tolerance)); + + printf("-------------the test for min_max starts-----------\n"); } } // end SECTION @@ -133,6 +110,10 @@ TEST_CASE("test_normalization_routine") REQUIRE(total_coordinates == 54); Write normalized_support_coordinates( total_coordinates, 0, "coordinates after normalization"); + + Write normalized_target_coordinates( + total_coordinates, 0, "coordinates after normalization"); + Write all_support_coordinates(total_coordinates, 0, "support coordinates all"); Write all_target_coordinates(total_coordinates, 0, @@ -174,28 +155,48 @@ TEST_CASE("test_normalization_routine") }); team.team_barrier(); - detail::normalize_coordinates(team, target_point, local_supports); + detail::normalize_supports(team, target_point, local_supports); Kokkos::parallel_for( Kokkos::TeamThreadRange(team, num_supports), [=](int i) { for (int j = 0; j < dim; ++j) { int index = league_rank * num_supports * dim + i * dim + j; normalized_support_coordinates[index] = local_supports(i, j); + if (j == 0) { + normalized_target_coordinates[index] = target_point.x; + } else if (j == 1) { + normalized_target_coordinates[index] = target_point.y; + } else { + normalized_target_coordinates[index] = target_point.z; + } } }); }); auto host_all_support_coordinates = HostRead(read(all_support_coordinates)); - auto host_normalized_support_coordinates = - HostRead(read(normalized_support_coordinates)); + auto host_all_target_coordinates = HostRead(read(all_target_coordinates)); + + auto host_normalized_support_coordinates = + HostRead(read(normalized_support_coordinates)); + + auto host_normalized_target_coordinates = + HostRead(read(normalized_target_coordinates)); + for (int i = 0; i < total_coordinates; ++i) { - auto result = + auto result_support = host_all_support_coordinates[i] - host_all_target_coordinates[i]; + + auto result_target = + host_all_target_coordinates[i] - host_all_target_coordinates[i]; + CHECK_THAT(host_normalized_support_coordinates[i], - Catch::Matchers::WithinAbs(result, tolerance)); + Catch::Matchers::WithinAbs(result_support, tolerance)); + + CHECK_THAT(host_normalized_target_coordinates[i], + Catch::Matchers::WithinAbs(result_target, tolerance)); } } // end SECTION From c527ccf19226ea0b2d406bf9db2944f93efbda7e Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sun, 2 Mar 2025 08:34:37 -0500 Subject: [PATCH 07/73] update file --- test/test_rbf_interp.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index 5040fa5e..1a92780a 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -66,6 +66,8 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, REQUIRE(m == n); for (size_t i = 0; i < m; ++i) { + CAPTURE(i, host_exact_target_values[i], host_approx_target_values[i], + tolerance); CHECK_THAT( host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); @@ -180,6 +182,8 @@ TEST_CASE("test_mls_interpolation") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + + std::cout << " The test for d=1, p=1, passed " << "\n"; std::cout << "------------finishing test: d1p1--------" << "\n"; } @@ -207,6 +211,8 @@ TEST_CASE("test_mls_interpolation") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + + std::cout << " The test for d=0, p=2, passed " << "\n"; std::cout << "-------------finishing test: d0p2------------" << "\n"; } @@ -234,6 +240,8 @@ TEST_CASE("test_mls_interpolation") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + + std::cout << " The test for d=1, p=2, passed " << "\n"; std::cout << "----------------finishing test: d1p2--------------" << "\n"; } @@ -261,6 +269,7 @@ TEST_CASE("test_mls_interpolation") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + std::cout << " The test for d=2, p=2, passed " << "\n"; std::cout << "-------------finishing test: d2p2--------------------" << "\n"; } @@ -289,6 +298,7 @@ TEST_CASE("test_mls_interpolation") test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + std::cout << " The test for d=2, p=3, passed " << "\n"; std::cout << "-------------finishing test: d2p3--------------------" << "\n"; } @@ -318,6 +328,7 @@ TEST_CASE("test_mls_interpolation") Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); + std::cout << " The test for d=3, p=3, passed " << "\n"; std::cout << "-------------finishing test: d3p3--------------------" << "\n"; } From 2e3cee71a51b0203a93ba4eac69a7ef951dd9590 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sun, 2 Mar 2025 08:35:49 -0500 Subject: [PATCH 08/73] add file for spr test --- test/test_spr_meshfields.cpp | 215 +++++++++++++++++++++++++++++++++++ 1 file changed, 215 insertions(+) create mode 100644 test/test_spr_meshfields.cpp diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp new file mode 100644 index 00000000..c8757a36 --- /dev/null +++ b/test/test_spr_meshfields.cpp @@ -0,0 +1,215 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace Omega_h; + +namespace +{ + +void print_patches(Omega_h::Graph& patches) +{ + auto offsets = HostRead(patches.a2ab); + auto values = HostRead(patches.ab2b); + std::cout << "num patches " << patches.nnodes() << "\n"; + for (int patch = 0; patch < patches.nnodes(); patch++) { + std::cout << "patch " << patch << " patchElms "; + for (auto valIdx = offsets[patch]; valIdx < offsets[patch + 1]; ++valIdx) { + auto patchElm = values[valIdx]; + std::cout << patchElm << " "; + } + std::cout << "\n"; + } +} + +std::string getTestName(size_t interp_degree, size_t func_degree) +{ + return "test interpolation degree " + std::to_string(interp_degree) + + ", polynomial degree " + std::to_string(func_degree); +} + +KOKKOS_INLINE_FUNCTION +double func(pcms::Coord& p, int degree) +{ + [[maybe_unused]] auto x = p.x; + [[maybe_unused]] auto y = p.y; + if (degree == 0) { + return 3; + } else if (degree == 1) { + return x + y; + } else if (degree == 2) { + return pow(x, 2) + pow(y, 2); + } else if (degree == 3) { + + return pow(x, 3) + pow(y, 3); + } else { + printf("No polynomials with degree = %d\n", degree); + } + return -1; +} + +void test(Mesh& mesh, Omega_h::Graph& patches, int degree, Reals source_values, + Reals exact_target_values, Reals source_coordinates, + Reals target_coordinates) +{ + + int dim = mesh.dim(); + Real tolerance = 1e-3; + + Omega_h::Write ignored(patches.a2ab.size(), 1); + SupportResults support{patches.a2ab, patches.ab2b, ignored}; + + auto approx_target_values = pcms::mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, degree, + pcms::RadialBasisFunction::RBF_CONST); + + const auto delta_abs = Omega_h::fabs_each( + Omega_h::subtract_each(exact_target_values, read(approx_target_values))); + const auto max_delta_abs = Omega_h::get_max(delta_abs); + std::cout << "max_delta_abs " << max_delta_abs << "\n"; + + auto host_approx_target_values = HostRead(approx_target_values); + mesh.add_tag(OMEGA_H_VERT, "approx_target_values", 1, + read(approx_target_values)); + Omega_h::vtk::write_parallel("box.vtk", &mesh); + + auto host_exact_target_values = HostRead(exact_target_values); + + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + // std::cout << "vtx " << i << "\n"; + // std::cout << "vtx " << i << "\n"; + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } +} + +} // end anonymous namespace + +// Test cases for centroid to node mapping using MLS +TEST_CASE("meshfields_spr_test") +{ + + auto lib = Library{}; + auto world = lib.world(); + auto rank = lib.world()->rank(); + const auto boxSize = 1.0; + const auto nElms = 6; + // auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms, + // nElms, 0, false); + const auto thwaitesMeshFile = + "/lore/smithc11/projects/landice/thwaites_basal/thwaites_basalClass.osh"; + Omega_h::Mesh mesh(&lib); + Omega_h::binary::read(thwaitesMeshFile, lib.world(), &mesh); + // auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms, + // nElms, 0, false); + std::cout << "mesh: elms " << mesh.nelems() << " verts " << mesh.nverts() + << "\n"; + + const auto dim = mesh.dim(); + + // const auto& target_coordinates = mesh.coords(); + const auto& target_coordinates = mesh.coords(); + + const auto& nfaces = mesh.nfaces(); + + const auto& ntargets = mesh.nverts(); + + Write source_coordinates( + dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); + + const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; + + Kokkos::parallel_for( + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(target_coordinates, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = 2 * id; + source_coordinates[index] = centroid[0]; + source_coordinates[index + 1] = centroid[1]; + }); + + pcms::Points source_points; + + source_points.coordinates = + pcms::PointsViewType("Number of local source supports", nfaces); + Kokkos::parallel_for( + "target points", nfaces, KOKKOS_LAMBDA(int j) { + source_points.coordinates(j).x = source_coordinates[j * dim]; + source_points.coordinates(j).y = source_coordinates[j * dim + 1]; + }); + + pcms::Points target_points; + + target_points.coordinates = + pcms::PointsViewType("Number of local source supports", mesh.nverts()); + Kokkos::parallel_for( + "target points", mesh.nverts(), KOKKOS_LAMBDA(int j) { + target_points.coordinates(j).x = target_coordinates[j * dim]; + target_points.coordinates(j).y = target_coordinates[j * dim + 1]; + }); + + // Define tests + for (size_t interp_degree = 1; interp_degree <= 3; interp_degree++) { + for (int func_degree = interp_degree; func_degree >= 0; func_degree--) { + SECTION(getTestName(interp_degree, func_degree).c_str()) + { + std::cerr << "start " << interp_degree << ", " << func_degree << " \n"; + int minPatchSize; + if (interp_degree == 1) + minPatchSize = 3; + if (interp_degree == 2) + minPatchSize = 8; // why so large? + if (interp_degree == 3) + minPatchSize = 10; // why so large? + std::cerr << "minPatchSize " << minPatchSize << "\n"; + auto patches = mesh.get_vtx_patches(minPatchSize); + // print_patches(patches); + // print_patches(patches); + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), func_degree); + }); + mesh.add_tag(mesh.dim(), "source_values", 1, read(source_values)); + + Write exact_target_values(mesh.nverts(), 0, + "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = + func(target_points.coordinates(i), func_degree); + }); + mesh.add_tag(OMEGA_H_VERT, "target_values", 1, + read(exact_target_values)); + + test(mesh, patches, interp_degree, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); + std::cerr << "done " << interp_degree << ", " << func_degree << " \n"; + } // end SECTION + } + } +} From 5557207cccdf64b24a7f6944113e5cc12cbf7834 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 3 Mar 2025 16:34:37 -0500 Subject: [PATCH 09/73] add test for qr serial --- test/test_serial_qr.cpp | 147 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 test/test_serial_qr.cpp diff --git a/test/test_serial_qr.cpp b/test/test_serial_qr.cpp new file mode 100644 index 00000000..9635bb7a --- /dev/null +++ b/test/test_serial_qr.cpp @@ -0,0 +1,147 @@ +#include //KokkosBlas::ApplyQ +#include //KokkosBlas::QR +#include //KokkosBlas::Trsv +#include //KokkosBlas::Algo, KokkosBatched::Diag +#include //KokkosBlas::gemv +#include +#include +#include +#include +#include +#include +#include + +TEST_CASE("test_qr_serial") +{ + /* here is a test case run with Octave */ + static double const a_data[16][10] = { + {1.0000000e+00, -4.9112749e-02, -1.5629814e+00, -8.2662407e-02, + 7.6762316e-02, 1.2919981e-01, 4.0597781e-03, 2.4120621e-03, 2.4429110e+00, + 6.8330735e-03}, + {1.0000000e+00, -7.4718700e-01, 1.1447982e+00, -6.1608208e-01, + -8.5537834e-01, -7.0528966e-01, 4.6032852e-01, 5.5828842e-01, + 1.3105629e+00, 3.7955713e-01}, + {1.0000000e+00, -4.8564839e-01, -7.2143765e-01, -5.3574860e-02, + 3.5036503e-01, 3.8650921e-02, 2.6018545e-02, 2.3585436e-01, 5.2047228e-01, + 2.8702657e-03}, + {1.0000000e+00, -8.1013248e-01, -1.0234062e+00, 3.4345012e-01, + 8.2909460e-01, -3.5148898e-01, -2.7824010e-01, 6.5631463e-01, + 1.0473602e+00, 1.1795799e-01}, + {1.0000000e+00, -1.0508609e+00, -8.6973926e-01, 1.4570417e+00, + 9.1397495e-01, -1.2672464e+00, -1.5311481e+00, 1.1043086e+00, + 7.5644638e-01, 2.1229706e+00}, + {1.0000000e+00, -1.7802012e-01, 1.8697947e+00, 8.3576559e-01, + -3.3286107e-01, 1.5627100e+00, -1.4878309e-01, 3.1691163e-02, + 3.4961320e+00, 6.9850413e-01}, + {1.0000000e+00, -6.4594368e-01, -5.5999878e-01, 2.5848452e+00, + 3.6172768e-01, -1.4475102e+00, -1.6696645e+00, 4.1724324e-01, + 3.1359864e-01, 6.6814250e+00}, + {1.0000000e+00, -3.0007589e-01, -5.6481843e-01, -3.0670467e-01, + 1.6948839e-01, 1.7323245e-01, 9.2034678e-02, 9.0045542e-02, 3.1901985e-01, + 9.4067754e-02}, + {1.0000000e+00, 1.8896909e-01, 2.7899549e-01, -2.5907897e-01, 5.2721524e-02, + -7.2281865e-02, -4.8957918e-02, 3.5709317e-02, 7.7838484e-02, + 6.7121914e-02}, + {1.0000000e+00, 1.8586762e+00, -4.5760801e-01, -9.5892373e-02, + -8.5054510e-01, 4.3881118e-02, -1.7823287e-01, 3.4546770e+00, + 2.0940510e-01, 9.1953472e-03}, + {1.0000000e+00, -1.0915266e-01, 1.9050743e+00, 3.2218623e-01, + -2.0794394e-01, 6.1378871e-01, -3.5167484e-02, 1.1914304e-02, + 3.6293082e+00, 1.0380396e-01}, + {1.0000000e+00, -3.2178312e-01, -6.9392454e-01, -6.1112393e-01, + 2.2329320e-01, 4.2407389e-01, 1.9664936e-01, 1.0354437e-01, 4.8153127e-01, + 3.7347245e-01}, + {1.0000000e+00, 1.4229431e+00, 3.8000845e-01, -1.5989849e-01, 5.4073041e-01, + -6.0762776e-02, -2.2752645e-01, 2.0247671e+00, 1.4440643e-01, + 2.5567526e-02}, + {1.0000000e+00, 9.0145077e-01, -9.6991898e-01, 7.9086551e-01, + -8.7433421e-01, -7.6707547e-01, 7.1292632e-01, 8.1261349e-01, + 9.4074284e-01, 6.2546825e-01}, + {1.0000000e+00, -7.2533533e-01, 1.4790603e-01, 7.7147564e-01, + -1.0728147e-01, 1.1410590e-01, -5.5957854e-01, 5.2611134e-01, + 2.1876193e-02, 5.9517466e-01}, + {1.0000000e+00, 5.3974943e-01, -7.7853625e-01, -1.2196455e+00, + -4.2021450e-01, 9.4953820e-01, -6.5830294e-01, 2.9132945e-01, + 6.0611869e-01, 1.4875350e+00}, + }; + + static double const x_data[10] = { + 4.4283983e-01, -2.9416715e-01, 2.5654724e-01, 3.3493879e-01, + -6.3165447e-01, -5.1360652e-01, -2.4971659e-01, -2.5776427e-01, + -8.8432424e-02, -5.2756825e-01}; + + using ExecutionSpace = Kokkos::DefaultExecutionSpace; + using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space; + using DeviceType = ExecutionSpace::device_type; + + // based on pumi (github.com/SCOREC/core) test/qr.cc + typedef Kokkos::View + MatrixViewType; + typedef Kokkos::View + RowVectorViewType; + typedef Kokkos::View + ColVectorViewType; + typedef Kokkos::View + ColWorkViewType; + const auto one = 1.0; + const auto zero = 0.0; + + MatrixViewType A("A"); + typename MatrixViewType::HostMirror A_host = Kokkos::create_mirror_view(A); + for (int i = 0; i < 16; i++) + for (int j = 0; j < 10; j++) + A_host(i, j) = a_data[i][j]; + Kokkos::deep_copy(A, A_host); + + ColVectorViewType kx("kx"); + typename ColVectorViewType::HostMirror kx_host = + Kokkos::create_mirror_view(kx); + for (int j = 0; j < 10; j++) + kx_host(j) = x_data[j]; + Kokkos::deep_copy(kx, kx_host); + // b = A*kx + RowVectorViewType b("b"); + KokkosBlas::gemv("N", one, A, kx, zero, b); + + // x = b + RowVectorViewType x("x"); + Kokkos::deep_copy(x, b); + + // t: tau (see SerialQR call below) + ColVectorViewType t("t"); + + // w: working space for SerialQR + ColWorkViewType w("w"); + + // roughly following + // kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR.hpp + typedef KokkosBlas::Algo::QR::Unblocked AlgoTagType; + Kokkos::parallel_for( + "solveQR", 1, KOKKOS_LAMBDA(int) { + // compute the QR factorization of A and store the results in A and t + // (tau) - see the lapack dgeqp3(...) documentation: + // www.netlib.org/lapack/explore-html-3.6.1/dd/d9a/group__double_g_ecomputational_ga1b0500f49e03d2771b797c6e88adabbb.html + KokkosBatched::SerialQR::invoke(A, t, w); + // x = Q^{T}x + KokkosBatched::SerialApplyQ< + KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, + KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(A, t, x, w); + // x = R^{-1}x + KokkosBatched::SerialTrsv::invoke(one, + A, + x); + }); + + typename ColVectorViewType::HostMirror sol_host = + Kokkos::create_mirror_view(x); + Kokkos::deep_copy(sol_host, x); + + for (int i = 0; i < sol_host.size(); ++i) { + REQUIRE(sol_host.size() == kx_host.size()); + CAPTURE(sol_host(i), kx_host(i), 1e-15); + CHECK_THAT(sol_host(i), CATCH::Matchers::WithinAbs(kx_host[i], 1e-15)); + } +} From bc2cae0d9f6faf6b65901a7498833b60965971a8 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 3 Mar 2025 16:35:25 -0500 Subject: [PATCH 10/73] update --- .../interpolator/mls_interpolation_impl.hpp | 35 +++++++++---------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 2986d4e9..6fe28231 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -501,9 +501,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, "approx_target_values = %d, ntargets = %d\n", approx_target_values.size(), ntargets); - // auto source_coordinates = min_max_normalization(src_coordinates, dim); - // auto target_coordinates = min_max_normalization(tgt_coordinates, dim); - IntHostMatView host_slice_length( "stores slice length of polynomial basis in host", degree, dim); @@ -598,6 +595,22 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); + /** support_values(nsupports) (or known rhs vector b) is the vector of the + * quantity that we want interpolate + * + * + * step 4: find local supports function values + */ + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { + support_values(j) = + source_values[support.supports_idx[start_ptr + j]]; + OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)), + "ERROR: NaN found: at support %d\n", j); + }); + + team.team_barrier(); + /** * * the local_source_points is of the type ScratchMatView with @@ -636,22 +649,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, "ERROR: radius2 has to be positive but found to be %.16f\n", support.radii2[league_rank]); - /** support_values(nsupports) (or known rhs vector b) is the vector of the - * quantity that we want interpolate - * - * - * step 4: find local supports function values - */ - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { - support_values(j) = - source_values[support.supports_idx[start_ptr + j]]; - OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)), - "ERROR: NaN found: at support %d\n", j); - }); - - team.team_barrier(); - // convert normal equation to Ax = b where A is a square matrix auto result = convert_normal_equation(vandermonde_matrix, phi_vector, support_values, team); From d8eedf32a01882ab9365d2f241b0c0f99a76e47e Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 3 Mar 2025 17:01:28 -0500 Subject: [PATCH 11/73] add serial qr test --- test/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index fc6ea135..4ea80c35 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -113,6 +113,7 @@ if(Catch2_FOUND) test_linear_solver.cpp test_normalisation.cpp test_spr_meshfields.cpp + test_serial_qr.cpp ) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) From 914af9ebd6b4180aafa198d28fbfe9dc3deedccb Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 3 Mar 2025 19:53:38 -0500 Subject: [PATCH 12/73] add a header for team level array operations --- .../pcms_interpolator_array_ops.cpp | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 src/pcms/interpolator/pcms_interpolator_array_ops.cpp diff --git a/src/pcms/interpolator/pcms_interpolator_array_ops.cpp b/src/pcms/interpolator/pcms_interpolator_array_ops.cpp new file mode 100644 index 00000000..2233002a --- /dev/null +++ b/src/pcms/interpolator/pcms_interpolator_array_ops.cpp @@ -0,0 +1,20 @@ +#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP +#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP + +#include "pcms_interpolator_aliases.hpp" +#include +#include + +KOKKOS_INLINE_FUNCTION +void find_sq_root_each(ScratchVecView& array) +{ + int size = array.size(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) { + OMEGA_CHECK_PRINTF(array(i) < 0, + "[Error:] Square root of a negative number is invalid!\n" + "value is %12.6f\d", + array(i)); + array(i) = sqrt(array(i)); + }); +} From 213f1af8150074330d3ddcac7912862b7fde1151 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 3 Mar 2025 19:55:24 -0500 Subject: [PATCH 13/73] add test case for serial qr solver --- test/test_serial_qr.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_serial_qr.cpp b/test/test_serial_qr.cpp index 9635bb7a..10e8bffc 100644 --- a/test/test_serial_qr.cpp +++ b/test/test_serial_qr.cpp @@ -135,13 +135,13 @@ TEST_CASE("test_qr_serial") x); }); - typename ColVectorViewType::HostMirror sol_host = + typename RowVectorViewType::HostMirror sol_host = Kokkos::create_mirror_view(x); Kokkos::deep_copy(sol_host, x); - for (int i = 0; i < sol_host.size(); ++i) { - REQUIRE(sol_host.size() == kx_host.size()); + for (int i = 0; i < kx_host.size(); ++i) { + REQUIRE(sol_host.size() == 16); CAPTURE(sol_host(i), kx_host(i), 1e-15); - CHECK_THAT(sol_host(i), CATCH::Matchers::WithinAbs(kx_host[i], 1e-15)); + CHECK_THAT(sol_host(i), Catch::Matchers::WithinAbs(kx_host[i], 1e-15)); } } From b72696c8fcca41ef201df7585c929fdbb4557184 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 3 Mar 2025 19:56:42 -0500 Subject: [PATCH 14/73] add functions for row scaling --- .../interpolator/mls_interpolation_impl.hpp | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 6fe28231..73e01471 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -287,6 +287,27 @@ void scale_column_trans_matrix(const ScratchMatView& matrix, } } +KOKKOS_INLINE_FUNCTION +void eval_row_scaling(member_type team, int i, + const ScratchVecView& diagonal_entries, + ScratchMatView& matrix) +{ + + int row = matrix.extent(0); + int column = matrix.extent(1); + int vector_size = diagonal_entries.size() OMEGA_H_CHECK_PRINTF( + vector_size == row, + "[ERROR]: for row scaling the size of diagonal entries vector should be " + "equal to the row of the matrix which is to be scaled\n" + "size of vector = %d, row of a matrix = %d\n", + vector_size, row); + ScratchVecView matrix_row = Kokkos::subview(matrix, j, Kokkos::ALL()); + for (int j = 0; j < column; ++k) { + matrix(i, j) *= + diagonal_entries(i); // scales each row of a matrix with corresponding + // entry in digonal entries vector + } +} /** * @struct ResultConvertNormal * @brief Stores the results of matrix and vector transformations. @@ -620,9 +641,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, * step 2: normalize local source supports and target point */ - normalize_supports(team, target_point, local_source_points); + // normalize_supports(team, target_point, local_source_points); - team.team_barrier(); + // team.team_barrier(); /** * * this can evaluate monomial basis vector for any degree of polynomial @@ -650,13 +671,14 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, support.radii2[league_rank]); // convert normal equation to Ax = b where A is a square matrix - auto result = convert_normal_equation(vandermonde_matrix, phi_vector, - support_values, team); + // auto result = convert_normal_equation(vandermonde_matrix, + // phi_vector, + // support_values, team); - team.team_barrier(); + // team.team_barrier(); // It stores the solution in rhs vector itself - solve_matrix(result.square_matrix, result.transformed_rhs, team); + // solve_matrix(result.square_matrix, result.transformed_rhs, team); team.team_barrier(); From 00d4c95310cd7d1304bf551bce0710224a4ba28a Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 4 Mar 2025 11:38:53 -0500 Subject: [PATCH 15/73] update test_serial_qr.cpp --- src/pcms/interpolator/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 08728df7..37f9363b 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -8,6 +8,7 @@ set(PCMS_FIELD_TRANSFER_HEADERS linear_interpolant.hpp multidimarray.hpp mls_interpolation.hpp + pcms_interpolator_array_ops.hpp ) set(PCMS_FIELD_TRANSFER_SOURCES From 9dc3da0c2aa707cd2af780e0f4c7de044da695a9 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 4 Mar 2025 11:39:43 -0500 Subject: [PATCH 16/73] implement serial qr --- .../interpolator/mls_interpolation_impl.hpp | 140 +++++++++++++----- 1 file changed, 101 insertions(+), 39 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 73e01471..647f00d6 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -13,11 +13,16 @@ #include #include #include - #include "pcms_interpolator_aliases.hpp" #include "adj_search.hpp" #include "../assert.h" #include "../profile.h" +#include "pcms_interpolator_array_ops.hpp" + +#include //KokkosBlas::ApplyQ +#include //KokkosBlas::QR +#include //KokkosBlas::Trsv +#include //KokkosBlas::gemv static constexpr int MAX_DIM = 3; @@ -109,6 +114,9 @@ Reals min_max_normalization(Reals& coordinates, int dim); * @param[in] p A reference to the coordinate struct * @param[in,out] basis_vector The polynomial basis vector * + * @note the basis_vector size is more than basis_size + * to accomodate with the output from qr + * */ KOKKOS_INLINE_FUNCTION void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, @@ -187,6 +195,44 @@ void normalize_supports(member_type team, Coord& pivot, } } +/** + * @brief Fills the kokkos scratch view + * + * @param value The value to be populated + * @param team The team member + * @param matrix The scratch matrix + * + */ +KOKKOS_INLINE_FUNCTION +void fill(double value, member_type team, ScratchMatView matrix) +{ + + int row = matrix.extent(0); + int col = matrix.extent(1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) { + for (int k = 0; k < col; ++k) { + matrix(j, k) = value; + } + }); +} + +/** + * @brief Fills the kokkos scratch view + * + * @param value The value to be populated + * @param team The team member + * @param vector The scratch vector + * + */ +KOKKOS_INLINE_FUNCTION +void fill(double value, member_type team, ScratchVecView vector) +{ + + int size = vector.extent(0); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { vector(j) = value; }); +} + /** * @brief Creates a vandermonde matrix * @@ -288,21 +334,22 @@ void scale_column_trans_matrix(const ScratchMatView& matrix, } KOKKOS_INLINE_FUNCTION -void eval_row_scaling(member_type team, int i, - const ScratchVecView& diagonal_entries, - ScratchMatView& matrix) +void eval_row_scaling(member_type team, int i, ScratchVecView diagonal_entries, + ScratchMatView matrix) { int row = matrix.extent(0); int column = matrix.extent(1); - int vector_size = diagonal_entries.size() OMEGA_H_CHECK_PRINTF( + int vector_size = diagonal_entries.size(); + + OMEGA_H_CHECK_PRINTF( vector_size == row, "[ERROR]: for row scaling the size of diagonal entries vector should be " "equal to the row of the matrix which is to be scaled\n" "size of vector = %d, row of a matrix = %d\n", vector_size, row); - ScratchVecView matrix_row = Kokkos::subview(matrix, j, Kokkos::ALL()); - for (int j = 0; j < column; ++k) { + ScratchVecView matrix_row = Kokkos::subview(matrix, i, Kokkos::ALL()); + for (int j = 0; j < column; ++j) { matrix(i, j) *= diagonal_entries(i); // scales each row of a matrix with corresponding // entry in digonal entries vector @@ -436,42 +483,54 @@ void solve_matrix(const ScratchMatView& square_matrix, KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs); } -/** - * @brief Fills the kokkos scratch view - * - * @param value The value to be populated - * @param team The team member - * @param matrix The scratch matrix - * - */ KOKKOS_INLINE_FUNCTION -void fill(double value, member_type team, ScratchMatView matrix) +void solve_weighted_matrix_serial_qr(member_type team, int league_rank, + ScratchVecView& weight, + ScratchMatView& matrix, + ScratchVecView& rhs_values) { int row = matrix.extent(0); - int col = matrix.extent(1); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) { - for (int k = 0; k < col; ++k) { - matrix(j, k) = value; - } + + int column = matrix.extent(1); + + int weight_size = weight.size(); + + find_sq_root_each(team, weight); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + eval_row_scaling(team, league_rank, weight, matrix); }); -} -/** - * @brief Fills the kokkos scratch view - * - * @param value The value to be populated - * @param team The team member - * @param vector The scratch vector - * - */ -KOKKOS_INLINE_FUNCTION -void fill(double value, member_type team, ScratchVecView vector) -{ + // initilize tau array + ScratchVecView tau(team.team_scratch(0), column); + fill(0.0, team, tau); - int size = vector.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), - [=](int j) { vector(j) = value; }); + // initialize work array + ScratchVecView work(team.team_scratch(0), column); + fill(0.0, team, work); + + // for serial run + if (league_rank == 0) { + + // compute QR factorization of matrix and stores the R in A and + // tau is used to recover Q + KokkosBatched::SerialQR::invoke(matrix, + tau, work); + + // b = Q^{T}b + + KokkosBatched::SerialApplyQ< + KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, + KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values, + work); + + // b = R^{-1}b + KokkosBatched::SerialTrsv< + KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Diag::NonUnit, + KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values); + } } /** @@ -570,7 +629,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, ScratchVecView support_values(team.team_scratch(0), nsupports); // basis of target - ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); + ScratchVecView target_basis_vector(team.team_scratch(0), nsupports); // Initialize the scratch matrices and vectors fill(0.0, team, local_source_points); @@ -680,10 +739,13 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, // It stores the solution in rhs vector itself // solve_matrix(result.square_matrix, result.transformed_rhs, team); + solve_weighted_matrix_serial_qr(team, league_rank, phi_vector, + vandermonde_matrix, support_values); + team.team_barrier(); - double target_value = KokkosBlas::Experimental::dot( - team, result.transformed_rhs, target_basis_vector); + double target_value = KokkosBlas::Experimental::dot(team, support_values, + target_basis_vector); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", From 42daeb47a9791c55b13713116bce389581b3276c Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 4 Mar 2025 11:40:26 -0500 Subject: [PATCH 17/73] add new file for team level matrix operations --- .../pcms_interpolator_array_ops.hpp | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 src/pcms/interpolator/pcms_interpolator_array_ops.hpp diff --git a/src/pcms/interpolator/pcms_interpolator_array_ops.hpp b/src/pcms/interpolator/pcms_interpolator_array_ops.hpp new file mode 100644 index 00000000..67291dc4 --- /dev/null +++ b/src/pcms/interpolator/pcms_interpolator_array_ops.hpp @@ -0,0 +1,28 @@ +#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP +#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP + +#include "pcms_interpolator_aliases.hpp" +#include +#include +#include + +namespace pcms +{ +KOKKOS_INLINE_FUNCTION +void find_sq_root_each(member_type team, ScratchVecView& array) +{ + int size = array.size(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) { + OMEGA_H_CHECK_PRINTF( + array(i) >= 0, + "[Error:] Square root of a negative number is invalid!\n" + "value is %12.6f\n", + array(i)); + array(i) = sqrt(array(i)); + }); +} + +} // namespace pcms + +#endif From 08433ceab6b30f1e0b5a6cb55918e0c2681a0c91 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 4 Mar 2025 11:41:09 -0500 Subject: [PATCH 18/73] update a file --- src/pcms/interpolator/pcms_interpolator_aliases.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pcms/interpolator/pcms_interpolator_aliases.hpp b/src/pcms/interpolator/pcms_interpolator_aliases.hpp index 76b1bcd6..55a5f187 100644 --- a/src/pcms/interpolator/pcms_interpolator_aliases.hpp +++ b/src/pcms/interpolator/pcms_interpolator_aliases.hpp @@ -17,6 +17,7 @@ using team_policy = typename Kokkos::TeamPolicy<>; using member_type = typename Kokkos::TeamPolicy<>::member_type; // alias for scratch view + using ScratchSpace = typename Kokkos::DefaultExecutionSpace::scratch_memory_space; From f6475607676779256dc5830c2472b97e236648da Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 4 Mar 2025 21:55:20 -0500 Subject: [PATCH 19/73] add serial scd routine --- src/pcms/interpolator/CMakeLists.txt | 2 +- src/pcms/interpolator/mls_interpolation.cpp | 21 +- .../interpolator/mls_interpolation_impl.hpp | 729 +++++++++--------- .../pcms_interpolator_array_ops.hpp | 28 - .../pcms_interpolator_view_utils.hpp | 148 ++++ 5 files changed, 541 insertions(+), 387 deletions(-) delete mode 100644 src/pcms/interpolator/pcms_interpolator_array_ops.hpp create mode 100644 src/pcms/interpolator/pcms_interpolator_view_utils.hpp diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 37f9363b..10dd51a1 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -8,7 +8,7 @@ set(PCMS_FIELD_TRANSFER_HEADERS linear_interpolant.hpp multidimarray.hpp mls_interpolation.hpp - pcms_interpolator_array_ops.hpp + pcms_interpolator_view_utils.hpp ) set(PCMS_FIELD_TRANSFER_SOURCES diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 6596a26e..417e2bc4 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -174,7 +174,8 @@ int calculate_basis_vector_size(const IntHostMatView& array) } int calculate_scratch_shared_size(const SupportResults& support, - const int nvertices_target, int basis_size) + const int nvertices_target, int basis_size, + int dim) { IntDeviceVecView shmem_each_team("stores the size required for each team", @@ -186,15 +187,25 @@ int calculate_scratch_shared_size(const SupportResults& support, int end_ptr = support.supports_ptr[i + 1]; int nsupports = end_ptr - start_ptr; + int max_size; + int min_size; + if (nsupports > basis_size) { + max_size = nsupports; + min_size = basis_size; + } else { + max_size = basis_size; + min_size = nsupports; + } size_t total_shared_size = 0; - total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size); total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports); total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); total_shared_size += ScratchVecView::shmem_size(basis_size); - total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; - total_shared_size += ScratchMatView::shmem_size(nsupports, 2); - total_shared_size += ScratchMatView::shmem_size(nsupports, 1); + total_shared_size += ScratchVecView::shmem_size(nsupports); + total_shared_size += ScratchMatView::shmem_size(nsupports, dim); + total_shared_size += ScratchMatView::shmem_size(nsupports, nsupports); + // total_shared_size += ScratchVecView::shmem_size(max_size); + // total_shared_size += ScratchVecView::shmem_size(min_size); shmem_each_team(i) = total_shared_size; }); diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 647f00d6..210b4dfb 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -17,12 +17,16 @@ #include "adj_search.hpp" #include "../assert.h" #include "../profile.h" -#include "pcms_interpolator_array_ops.hpp" +#include "pcms_interpolator_view_utils.hpp" #include //KokkosBlas::ApplyQ #include //KokkosBlas::QR #include //KokkosBlas::Trsv #include //KokkosBlas::gemv +#include "KokkosBatched_SVD_Decl.hpp" +#include "KokkosBatched_SVD_Serial_Impl.hpp" +#include +#include static constexpr int MAX_DIM = 3; @@ -89,7 +93,7 @@ int calculate_basis_vector_size(const IntHostMatView& array); * @return The scratch size */ int calculate_scratch_shared_size(const SupportResults& support, - const int ntargets, int basis_size); + const int ntargets, int basis_size, int dim); /** * * @brief Performs minmax normalization @@ -195,44 +199,6 @@ void normalize_supports(member_type team, Coord& pivot, } } -/** - * @brief Fills the kokkos scratch view - * - * @param value The value to be populated - * @param team The team member - * @param matrix The scratch matrix - * - */ -KOKKOS_INLINE_FUNCTION -void fill(double value, member_type team, ScratchMatView matrix) -{ - - int row = matrix.extent(0); - int col = matrix.extent(1); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) { - for (int k = 0; k < col; ++k) { - matrix(j, k) = value; - } - }); -} - -/** - * @brief Fills the kokkos scratch view - * - * @param value The value to be populated - * @param team The team member - * @param vector The scratch vector - * - */ -KOKKOS_INLINE_FUNCTION -void fill(double value, member_type team, ScratchVecView vector) -{ - - int size = vector.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), - [=](int j) { vector(j) = value; }); -} - /** * @brief Creates a vandermonde matrix * @@ -333,28 +299,6 @@ void scale_column_trans_matrix(const ScratchMatView& matrix, } } -KOKKOS_INLINE_FUNCTION -void eval_row_scaling(member_type team, int i, ScratchVecView diagonal_entries, - ScratchMatView matrix) -{ - - int row = matrix.extent(0); - int column = matrix.extent(1); - int vector_size = diagonal_entries.size(); - - OMEGA_H_CHECK_PRINTF( - vector_size == row, - "[ERROR]: for row scaling the size of diagonal entries vector should be " - "equal to the row of the matrix which is to be scaled\n" - "size of vector = %d, row of a matrix = %d\n", - vector_size, row); - ScratchVecView matrix_row = Kokkos::subview(matrix, i, Kokkos::ALL()); - for (int j = 0; j < column; ++j) { - matrix(i, j) *= - diagonal_entries(i); // scales each row of a matrix with corresponding - // entry in digonal entries vector - } -} /** * @struct ResultConvertNormal * @brief Stores the results of matrix and vector transformations. @@ -484,328 +428,407 @@ void solve_matrix(const ScratchMatView& square_matrix, } KOKKOS_INLINE_FUNCTION -void solve_weighted_matrix_serial_qr(member_type team, int league_rank, - ScratchVecView& weight, - ScratchMatView& matrix, - ScratchVecView& rhs_values) +void solve_matrix_serial_svd(member_type team, int league_rank, + const ScratchVecView& rhs_values, + ScratchMatView& matrix, + ScratchMatView& solution_vector) { int row = matrix.extent(0); int column = matrix.extent(1); - int weight_size = weight.size(); + int weight_size = solution.size(); - find_sq_root_each(team, weight); + // find_sq_root_each(team, weight); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { - eval_row_scaling(team, league_rank, weight, matrix); - }); + // Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + // eval_row_scaling(team, league_rank, weight, matrix); + // }); + // + // A = UEV^T + // initilize U (orthogonal matrices) array + ScratchMatView U(team.team_scratch(0), row, row); + fill(-5.0, team, U); - // initilize tau array - ScratchVecView tau(team.team_scratch(0), column); - fill(0.0, team, tau); + // initialize Vt + ScratchMatView Vt(team.team_scratch(0), column, column); + fill(-5.0, team, Vt); - // initialize work array - ScratchVecView work(team.team_scratch(0), column); - fill(0.0, team, work); + int maxrank = std::min(row, column); - // for serial run - if (league_rank == 0) { + ScratchVecView sigma(team.team_scratch(0), maxrank); + fill(-5.0, team, sigma); - // compute QR factorization of matrix and stores the R in A and - // tau is used to recover Q - KokkosBatched::SerialQR::invoke(matrix, - tau, work); + ScratchVecView work(team.team_scratch(0), std::max(row, column)); + fill(-5.0, team, work); - // b = Q^{T}b + // sigma_mat_inv + ScratchMatView temp_matrix(team.team_scratch(0), column, row); + fill(0, team, temp_matrix); - KokkosBatched::SerialApplyQ< - KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, - KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values, - work); + ScratchMatView sigmaInvUtMul(team.team_scratch(0), column, row); + fill(0, team, sigmaInvUtMul); - // b = R^{-1}b - KokkosBatched::SerialTrsv< - KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Diag::NonUnit, - KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values); + // for serial run + if (league_rank == 0) { + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, + sigma, Vt, work); + find_inverse_each(team, sigma); + auto Ut = find_transpose(team, U); + scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T + + // performing (S^-1 U^T) + // KokkosBatched::SerialGemm< + // KokkosBatched::Trans::NoTranspose, + // KokkosBatched::Trans::Transpose, + // KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, temp_matrix, U, + // 0.0, sigmaInvUtMul); + // } + + // perform V (S^-1 U^T) + + KokkosBatched::SerialGemm< + KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0, + sigmaInvUtMul); + + KokkosBlas::SerialGemv< + KokkosBlas::Trans::NoTranspose, + KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, sigmaInvUtMul, rhs_values, + 0.0, solution_vector); } -} -/** - * @brief Maps the data from source mesh to target mesh - * - * @param source_values Scalar array view of source field values - * @param source_coordinates Scalar array view of the coordinates of control - * points of source field - * @param target_coordinates Scalar array view of the coordinates of control - * points of target field - * @param support The object that enpasulates support info - * @param dim The dimension of the simulations - * @param degree The degree of the interpolation order - * @param rbf_func The radial basis function choice - * @param[in/out] Scalar array view of the interpolated field in target mesh - * - * - */ -template -void mls_interpolation(RealConstDefaultScalarArrayView source_values, - RealConstDefaultScalarArrayView source_coordinates, - RealConstDefaultScalarArrayView target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func, - RealDefaultScalarArrayView approx_target_values) -{ - PCMS_FUNCTION_TIMER; - static_assert(std::is_invocable_r_v, - "rbf_func, takes radius and cutoff, returns weight"); - static_assert(!std::is_pointer_v, - "function pointer will fail in GPU execution context"); - - int nsources = source_coordinates.size() / dim; - - OMEGA_H_CHECK_PRINTF( - source_values.size() == nsources, - "[ERROR] The size of the source values and source coordinates is not same. " - "The current sizes are :\n" - "source_values_size = %d, source_coordinates_size = %d\n", - source_values.size(), nsources); - - const auto ntargets = target_coordinates.size() / dim; - - OMEGA_H_CHECK_PRINTF( - approx_target_values.size() == ntargets, - "[ERROR] The size of the approx target values and the number of targets is " - "not same. The current numbers are :\n" - "approx_target_values = %d, ntargets = %d\n", - approx_target_values.size(), ntargets); - - IntHostMatView host_slice_length( - "stores slice length of polynomial basis in host", degree, dim); - - Kokkos::deep_copy(host_slice_length, 0); - - calculate_basis_slice_lengths(host_slice_length); - - auto basis_size = calculate_basis_vector_size(host_slice_length); - - IntDeviceMatView slice_length( - "stores slice length of polynomial basis in device", degree, dim); - - auto slice_length_hd = Kokkos::create_mirror_view(slice_length); - Kokkos::deep_copy(slice_length_hd, host_slice_length); - Kokkos::deep_copy(slice_length, slice_length_hd); - - int shared_size = - calculate_scratch_shared_size(support, ntargets, basis_size); - - team_policy tp(ntargets, Kokkos::AUTO); - - int scratch_size = tp.scratch_size_max(0); - - PCMS_ALWAYS_ASSERT(scratch_size > shared_size); - - // calculates the interpolated values - Kokkos::parallel_for( - "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), - KOKKOS_LAMBDA(const member_type& team) { - int league_rank = team.league_rank(); - int start_ptr = support.supports_ptr[league_rank]; - int end_ptr = support.supports_ptr[league_rank + 1]; - int nsupports = end_ptr - start_ptr; - - // local_source_point stores the coordinates of source supports of a given - // target - ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); - - // rbf function values of source supports Phi(n,n) - ScratchVecView phi_vector(team.team_scratch(0), nsupports); - - // vondermonde matrix P from the vectors of basis vector of supports - ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - basis_size); - // stores known vector (b) - ScratchVecView support_values(team.team_scratch(0), nsupports); - - // basis of target - ScratchVecView target_basis_vector(team.team_scratch(0), nsupports); - - // Initialize the scratch matrices and vectors - fill(0.0, team, local_source_points); - fill(0.0, team, vandermonde_matrix); - fill(0.0, team, phi_vector); - fill(0.0, team, support_values); - fill(0.0, team, target_basis_vector); - - // storing the coords of local supports - int count = -1; - for (int j = start_ptr; j < end_ptr; ++j) { - count++; - auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - if (dim == 3) { - local_source_points(count, 2) = source_coordinates[index * dim + 2]; - } - } + KOKKOS_INLINE_FUNCTION + void solve_weighted_matrix_serial_qr( + member_type team, int league_rank, ScratchVecView& weight, + ScratchMatView& matrix, ScratchVecView& rhs_values) + { - // evaluates the basis vector of a given target point - Coord target_point; - target_point.x = target_coordinates[league_rank * dim]; - target_point.y = target_coordinates[league_rank * dim + 1]; + int row = matrix.extent(0); - if (dim == 3) { - target_point.z = target_coordinates[league_rank * dim + 2]; - } + int column = matrix.extent(1); - /** phi(nsupports) is the array of rbf functions evaluated at the - * source supports In the actual implementation, Phi(nsupports, nsupports) - * is the diagonal matrix & each diagonal element is the phi evaluated at - * each source points - * - * step 1: evaluate the phi vector with the original dimension - */ - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - compute_phi_vector(target_point, local_source_points, j, - support.radii2[league_rank], rbf_func, phi_vector); - }); - - team.team_barrier(); - - /** support_values(nsupports) (or known rhs vector b) is the vector of the - * quantity that we want interpolate - * - * - * step 4: find local supports function values - */ - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { - support_values(j) = - source_values[support.supports_idx[start_ptr + j]]; - OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)), - "ERROR: NaN found: at support %d\n", j); - }); - - team.team_barrier(); - - /** - * - * the local_source_points is of the type ScratchMatView with - * coordinates information; row is number of local supports - * & column is dim - * - * step 2: normalize local source supports and target point - */ - - // normalize_supports(team, target_point, local_source_points); - - // team.team_barrier(); - /** - * - * this can evaluate monomial basis vector for any degree of polynomial - * step 3: call basis vector evaluation function (eval_basis_vector); - */ - eval_basis_vector(slice_length, target_point, target_basis_vector); - - /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is - * created with the basis vector of source supports stacking on top of - * each other - * - * step 4: create vandermonde matrix - */ - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - create_vandermonde_matrix(local_source_points, j, slice_length, - vandermonde_matrix); - }); - - team.team_barrier(); - - OMEGA_H_CHECK_PRINTF( - support.radii2[league_rank] > 0, - "ERROR: radius2 has to be positive but found to be %.16f\n", - support.radii2[league_rank]); - - // convert normal equation to Ax = b where A is a square matrix - // auto result = convert_normal_equation(vandermonde_matrix, - // phi_vector, - // support_values, team); - - // team.team_barrier(); - - // It stores the solution in rhs vector itself - // solve_matrix(result.square_matrix, result.transformed_rhs, team); - - solve_weighted_matrix_serial_qr(team, league_rank, phi_vector, - vandermonde_matrix, support_values); - - team.team_barrier(); - - double target_value = KokkosBlas::Experimental::dot(team, support_values, - target_basis_vector); - - if (team.team_rank() == 0) { - OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", - league_rank); - approx_target_values[league_rank] = target_value; - } + int weight_size = weight.size(); + + find_sq_root_each(team, weight); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + eval_row_scaling(team, league_rank, weight, matrix); }); -} -/** - * @brief \overload - * Maps the data from source mesh to target mesh - * - * @param source_values Read array of Source field values - * @param source_coordinates Read array of the coordinates of control points of - * source field - * @param target_coordinates Read array of the coordinates of control points of - * target field - * @param support The object that enpasulates support info - * @param dim The dimension of the simulations - * @param degree The degree of the interpolation order - * @param rbf_func The radial basis function choice - * @return Write array of the interpolated field in target mesh - * - * - */ -template -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func) -{ - const auto nsources = source_coordinates.size() / dim; + // initilize tau array + ScratchVecView tau(team.team_scratch(0), column); + fill(0.0, team, tau); + + // initialize work array + ScratchVecView work(team.team_scratch(0), column); + fill(0.0, team, work); - const auto ntargets = target_coordinates.size() / dim; + // for serial run + if (league_rank == 0) { - RealConstDefaultScalarArrayView source_values_array_view( - source_values.data(), source_values.size()); + // compute QR factorization of matrix and stores the R in A and + // tau is used to recover Q + KokkosBatched::SerialQR::invoke( + matrix, tau, work); - RealConstDefaultScalarArrayView source_coordinates_array_view( - source_coordinates.data(), source_coordinates.size()); + // b = Q^{T}b - RealConstDefaultScalarArrayView target_coordinates_array_view( - target_coordinates.data(), target_coordinates.size()); + KokkosBatched::SerialApplyQ< + KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, + KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values, + work); + + // b = R^{-1}b + KokkosBatched::SerialTrsv< + KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Diag::NonUnit, + KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values); + } + } - RealDefaultScalarArrayView radii2_array_view(support.radii2.data(), - support.radii2.size()); + /** + * @brief Maps the data from source mesh to target mesh + * + * @param source_values Scalar array view of source field values + * @param source_coordinates Scalar array view of the coordinates of control + * points of source field + * @param target_coordinates Scalar array view of the coordinates of control + * points of target field + * @param support The object that enpasulates support info + * @param dim The dimension of the simulations + * @param degree The degree of the interpolation order + * @param rbf_func The radial basis function choice + * @param[in/out] Scalar array view of the interpolated field in target mesh + * + * + */ + template + void mls_interpolation(RealConstDefaultScalarArrayView source_values, + RealConstDefaultScalarArrayView source_coordinates, + RealConstDefaultScalarArrayView target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, Func rbf_func, + RealDefaultScalarArrayView approx_target_values) + { + PCMS_FUNCTION_TIMER; + static_assert(std::is_invocable_r_v, + "rbf_func, takes radius and cutoff, returns weight"); + static_assert(!std::is_pointer_v, + "function pointer will fail in GPU execution context"); + + int nsources = source_coordinates.size() / dim; + + OMEGA_H_CHECK_PRINTF( + source_values.size() == nsources, + "[ERROR] The size of the source values and source coordinates is not " + "same. " + "The current sizes are :\n" + "source_values_size = %d, source_coordinates_size = %d\n", + source_values.size(), nsources); + + const auto ntargets = target_coordinates.size() / dim; + + OMEGA_H_CHECK_PRINTF(approx_target_values.size() == ntargets, + "[ERROR] The size of the approx target values and the " + "number of targets is " + "not same. The current numbers are :\n" + "approx_target_values = %d, ntargets = %d\n", + approx_target_values.size(), ntargets); + + IntHostMatView host_slice_length( + "stores slice length of polynomial basis in host", degree, dim); + + Kokkos::deep_copy(host_slice_length, 0); + + calculate_basis_slice_lengths(host_slice_length); + + auto basis_size = calculate_basis_vector_size(host_slice_length); + + IntDeviceMatView slice_length( + "stores slice length of polynomial basis in device", degree, dim); + + auto slice_length_hd = Kokkos::create_mirror_view(slice_length); + Kokkos::deep_copy(slice_length_hd, host_slice_length); + Kokkos::deep_copy(slice_length, slice_length_hd); + + int shared_size = + calculate_scratch_shared_size(support, ntargets, basis_size, dim); + + team_policy tp(ntargets, Kokkos::AUTO); + + int scratch_size = tp.scratch_size_max(0); + printf("Scratch Size = %d\n", scratch_size); + printf("Shared Size = %d\n", shared_size); + PCMS_ALWAYS_ASSERT(scratch_size > shared_size); + + printf("does it fail or below\n"); + // calculates the interpolated values + Kokkos::parallel_for( + "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), + KOKKOS_LAMBDA(const member_type& team) { + int league_rank = team.league_rank(); + int start_ptr = support.supports_ptr[league_rank]; + int end_ptr = support.supports_ptr[league_rank + 1]; + int nsupports = end_ptr - start_ptr; + + // local_source_point stores the coordinates of source supports of a + // given target + ScratchMatView local_source_points(team.team_scratch(0), nsupports, + dim); + + // rbf function values of source supports Phi(n,n) + ScratchVecView phi_vector(team.team_scratch(0), nsupports); + + // vondermonde matrix P from the vectors of basis vector of supports + ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + basis_size); + // stores known vector (b) + ScratchVecView support_values(team.team_scratch(0), nsupports); + + // basis of target + ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); + + // Initialize the scratch matrices and vectors + fill(0.0, team, local_source_points); + fill(0.0, team, vandermonde_matrix); + fill(0.0, team, phi_vector); + fill(0.0, team, support_values); + fill(0.0, team, target_basis_vector); + + // storing the coords of local supports + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + if (dim == 3) { + local_source_points(count, 2) = source_coordinates[index * dim + 2]; + } + } - Write interpolated_values(ntargets, 0, "approximated target values"); + // evaluates the basis vector of a given target point + Coord target_point; + target_point.x = target_coordinates[league_rank * dim]; + target_point.y = target_coordinates[league_rank * dim + 1]; - RealDefaultScalarArrayView interpolated_values_array_view( - interpolated_values.data(), interpolated_values.size()); + if (dim == 3) { + target_point.z = target_coordinates[league_rank * dim + 2]; + } - mls_interpolation(source_values_array_view, source_coordinates_array_view, - target_coordinates_array_view, support, dim, degree, - rbf_func, interpolated_values_array_view); + /** phi(nsupports) is the array of rbf functions evaluated at the + * source supports In the actual implementation, Phi(nsupports, + * nsupports) is the diagonal matrix & each diagonal element is the phi + * evaluated at each source points + * + * step 1: evaluate the phi vector with the original dimension + */ + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + compute_phi_vector(target_point, local_source_points, j, + support.radii2[league_rank], rbf_func, + phi_vector); + }); + + team.team_barrier(); + + /** support_values(nsupports) (or known rhs vector b) is the vector of + * the quantity that we want interpolate + * + * + * step 4: find local supports function values + */ + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { + support_values(j) = + source_values[support.supports_idx[start_ptr + j]]; + OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)), + "ERROR: NaN found: at support %d\n", j); + }); + + team.team_barrier(); + + /** + * + * the local_source_points is of the type ScratchMatView with + * coordinates information; row is number of local supports + * & column is dim + * + * step 2: normalize local source supports and target point + */ + + // normalize_supports(team, target_point, local_source_points); + + // team.team_barrier(); + /** + * + * this can evaluate monomial basis vector for any degree of polynomial + * step 3: call basis vector evaluation function (eval_basis_vector); + */ + eval_basis_vector(slice_length, target_point, target_basis_vector); + + /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is + * created with the basis vector of source supports stacking on top of + * each other + * + * step 4: create vandermonde matrix + */ + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + create_vandermonde_matrix(local_source_points, j, slice_length, + vandermonde_matrix); + }); + + team.team_barrier(); + + OMEGA_H_CHECK_PRINTF( + support.radii2[league_rank] > 0, + "ERROR: radius2 has to be positive but found to be %.16f\n", + support.radii2[league_rank]); + + // convert normal equation to Ax = b where A is a square matrix + // auto result = convert_normal_equation(vandermonde_matrix, + // phi_vector, + // support_values, team); + + // team.team_barrier(); + + // It stores the solution in rhs vector itself + // solve_matrix(result.square_matrix, result.transformed_rhs, + // team); + + // solve_weighted_matrix_serial_qr(team, league_rank, phi_vector, + // vandermonde_matrix, + // support_values); + + solve_matrix_serial_svd(team, league_rank, phi_vector, + vandermonde_matrix, support_values); + team.team_barrier(); + + double target_value = KokkosBlas::Experimental::dot( + team, solution_coefficients, target_basis_vector); + + if (team.team_rank() == 0) { + OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", + league_rank); + approx_target_values[league_rank] = target_value; + } + }); + } - return interpolated_values; -} + /** + * @brief \overload + * Maps the data from source mesh to target mesh + * + * @param source_values Read array of Source field values + * @param source_coordinates Read array of the coordinates of control points + * of source field + * @param target_coordinates Read array of the coordinates of control points + * of target field + * @param support The object that enpasulates support info + * @param dim The dimension of the simulations + * @param degree The degree of the interpolation order + * @param rbf_func The radial basis function choice + * @return Write array of the interpolated field in target mesh + * + * + */ + template + Write mls_interpolation( + const Reals source_values, const Reals source_coordinates, + const Reals target_coordinates, const SupportResults& support, + const LO& dim, const LO& degree, Func rbf_func) + { + const auto nsources = source_coordinates.size() / dim; + + const auto ntargets = target_coordinates.size() / dim; + + RealConstDefaultScalarArrayView source_values_array_view( + source_values.data(), source_values.size()); + + RealConstDefaultScalarArrayView source_coordinates_array_view( + source_coordinates.data(), source_coordinates.size()); + + RealConstDefaultScalarArrayView target_coordinates_array_view( + target_coordinates.data(), target_coordinates.size()); + + RealDefaultScalarArrayView radii2_array_view(support.radii2.data(), + support.radii2.size()); + + Write interpolated_values(ntargets, 0, "approximated target values"); + + RealDefaultScalarArrayView interpolated_values_array_view( + interpolated_values.data(), interpolated_values.size()); + + mls_interpolation(source_values_array_view, source_coordinates_array_view, + target_coordinates_array_view, support, dim, degree, + rbf_func, interpolated_values_array_view); + + return interpolated_values; + } } // namespace detail } // namespace pcms diff --git a/src/pcms/interpolator/pcms_interpolator_array_ops.hpp b/src/pcms/interpolator/pcms_interpolator_array_ops.hpp deleted file mode 100644 index 67291dc4..00000000 --- a/src/pcms/interpolator/pcms_interpolator_array_ops.hpp +++ /dev/null @@ -1,28 +0,0 @@ -#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP -#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP - -#include "pcms_interpolator_aliases.hpp" -#include -#include -#include - -namespace pcms -{ -KOKKOS_INLINE_FUNCTION -void find_sq_root_each(member_type team, ScratchVecView& array) -{ - int size = array.size(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) { - OMEGA_H_CHECK_PRINTF( - array(i) >= 0, - "[Error:] Square root of a negative number is invalid!\n" - "value is %12.6f\n", - array(i)); - array(i) = sqrt(array(i)); - }); -} - -} // namespace pcms - -#endif diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp new file mode 100644 index 00000000..cd0fdc1f --- /dev/null +++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp @@ -0,0 +1,148 @@ +#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP +#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP + +#include "pcms_interpolator_aliases.hpp" +#include +#include +#include + +namespace pcms +{ +namespace detail +{ +/** + * @brief Fills the kokkos scratch view + * + * @param value The value to be populated + * @param team The team member + * @param matrix The scratch matrix + * + */ +KOKKOS_INLINE_FUNCTION +void fill(double value, member_type team, ScratchMatView matrix) +{ + + int row = matrix.extent(0); + int col = matrix.extent(1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) { + for (int k = 0; k < col; ++k) { + matrix(j, k) = value; + } + }); +} + +/** + * @brief Fills the kokkos scratch view + * + * @param value The value to be populated + * @param team The team member + * @param vector The scratch vector + * + */ +KOKKOS_INLINE_FUNCTION +void fill(double value, member_type team, ScratchVecView vector) +{ + + int size = vector.extent(0); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { vector(j) = value; }); +} + +KOKKOS_INLINE_FUNCTION +void find_sq_root_each(member_type team, ScratchVecView& array) +{ + int size = array.size(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) { + OMEGA_H_CHECK_PRINTF( + array(i) >= 0, + "[Error:] Square root of a negative number is invalid!\n" + "value is %12.6f\n", + array(i)); + array(i) = sqrt(array(i)); + }); +} + +KOKKOS_INLINE_FUNCTION +void find_inverse_each(member_type team, ScratchVecView& array) +{ + int size = array.size(); + double eps = 1e-8; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) { + if (array(i) > eps) { + array(i) = 1 / array(i); + } else { + array(i) = 0.0; + } + }); +} + +KOKKOS_INLINE_FUNCTION +ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix) +{ + int row = matrix.extent(0); + int column = matrix.extent(1); + + ScratchMatView transMatrix(team.team_scratch(0), column, row); + fill(0.0, team, transMatrix); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + transMatrix(j, i) = matrix(j, i); + } + }); + return transMatrix; +} + +KOKKOS_INLINE_FUNCTION +void fill_diagonal(member_type team, const ScratchVecView& diagonal_entries, + ScratchMatView& matrix) +{ + int size = diagonal_entries.size(); + OMEGA_H_CHECK(size == matrix.extent(0) || size == matrix.extent(1)); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int i) { matrix(i, i) = diagonal_entries(i); }); +} + +KOKKOS_INLINE_FUNCTION +void eval_row_scaling(member_type team, const ScratchVecView& diagonal_entries, + ScratchMatView& matrix) +{ + + int row = matrix.extent(0); + int column = matrix.extent(1); + int vector_size = diagonal_entries.size(); + + OMEGA_H_CHECK_PRINTF( + vector_size <= row, + "[ERROR]: for row scaling the size of diagonal entries vector should be " + "equal or less than the row of the matrix which is to be scaled\n" + "size of vector = %d, row of a matrix = %d\n", + vector_size, row); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + matrix(i, j) *= + diagonal_entries(i); // scales each row of a matrix with a corresponding + // value in a digonal entries + }); + +} + +KOKKOS_INLINE_FUNCTION +void scale_and_adjust(member_type team, const ScratchVecView& diagonal_entries, ScratchMatView& matrixToScale, ScratchMatView& adjustedMatrix){ + size_t rowA = matrixToScale.extent(0); + size_t columnA = matrixToScale.extent(1); + + size_t rowB = adjustedMatrix.extent(0); + size_t columnB = adjustedMatrix.extent(0); + + OMEGA_CHECK(columnB == rowA) + eval_row_scaling(team, diagonal_entries, matrixtoScale); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) { + for (int j = 0; j < columnB; == j) { + adjustedMatrix(i, j) = matrixtoScale(i, j); + } + }); +} +} // end namespace pcms +} // end namespace detail +#endif From 8ec71556071ba4565c57626ebea9169059773bfc Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 11 Mar 2025 16:47:05 -0400 Subject: [PATCH 20/73] implement serial svd --- src/pcms/interpolator/mls_interpolation.cpp | 22 +- .../interpolator/mls_interpolation_impl.hpp | 658 +++++++++--------- .../pcms_interpolator_view_utils.hpp | 64 +- test/CMakeLists.txt | 1 + 4 files changed, 385 insertions(+), 360 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 417e2bc4..d19e9fc6 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -197,13 +197,21 @@ int calculate_scratch_shared_size(const SupportResults& support, min_size = nsupports; } size_t total_shared_size = 0; - total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size); - total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports); - total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); - total_shared_size += ScratchVecView::shmem_size(basis_size); - total_shared_size += ScratchVecView::shmem_size(nsupports); - total_shared_size += ScratchMatView::shmem_size(nsupports, dim); - total_shared_size += ScratchMatView::shmem_size(nsupports, nsupports); + total_shared_size += + ScratchMatView::shmem_size(basis_size, basis_size); // Vt + total_shared_size += + ScratchMatView::shmem_size(basis_size, nsupports) * 2; // temp_matrix + total_shared_size += + ScratchMatView::shmem_size(nsupports, basis_size); // vandermonde matrix + total_shared_size += + ScratchVecView::shmem_size(basis_size) * + 3; // sigma, target_basis_vector, solution_coefficients + total_shared_size += ScratchVecView::shmem_size(nsupports) * + 3; // work, phi_vector, support_values + total_shared_size += + ScratchMatView::shmem_size(nsupports, dim); // local_source_points + total_shared_size += + ScratchMatView::shmem_size(nsupports, nsupports) * 2; // U, Ut // total_shared_size += ScratchVecView::shmem_size(max_size); // total_shared_size += ScratchVecView::shmem_size(min_size); shmem_each_team(i) = total_shared_size; diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 210b4dfb..f4eb6c26 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -357,11 +357,11 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, int m = matrix.extent(0); int n = matrix.extent(1); - ScratchMatView scaled_matrix(team.team_scratch(0), n, m); + ScratchMatView scaled_matrix(team.team_scratch(1), n, m); - ScratchMatView square_matrix(team.team_scratch(0), n, n); + ScratchMatView square_matrix(team.team_scratch(1), n, n); - ScratchVecView transformed_rhs(team.team_scratch(0), n); + ScratchVecView transformed_rhs(team.team_scratch(1), n); // performing P^T Q Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) { @@ -428,50 +428,55 @@ void solve_matrix(const ScratchMatView& square_matrix, } KOKKOS_INLINE_FUNCTION -void solve_matrix_serial_svd(member_type team, int league_rank, - const ScratchVecView& rhs_values, - ScratchMatView& matrix, - ScratchMatView& solution_vector) +void solve_matrix_serial_svd(member_type team, const ScratchVecView& weight, + ScratchVecView rhs_values, ScratchMatView matrix, + ScratchVecView solution_vector) { int row = matrix.extent(0); int column = matrix.extent(1); - int weight_size = solution.size(); + int weight_size = weight.size(); + OMEGA_H_CHECK_PRINTF( + weight_size == row, + "the size of the weight vector should be equal to the row of the matrix\n" + "weight vector size = %d, row of matrix = %d\n", + weight_size, row); // find_sq_root_each(team, weight); - // Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { - // eval_row_scaling(team, league_rank, weight, matrix); - // }); - // + eval_row_scaling(team, weight, matrix); + + team.team_barrier(); + + eval_rhs_scaling(team, weight, rhs_values); // A = UEV^T // initilize U (orthogonal matrices) array - ScratchMatView U(team.team_scratch(0), row, row); + ScratchMatView U(team.team_scratch(1), row, row); fill(-5.0, team, U); // initialize Vt - ScratchMatView Vt(team.team_scratch(0), column, column); + ScratchMatView Vt(team.team_scratch(1), column, column); fill(-5.0, team, Vt); - int maxrank = std::min(row, column); + // int maxrank = std::min(row, column); - ScratchVecView sigma(team.team_scratch(0), maxrank); + ScratchVecView sigma(team.team_scratch(1), column); fill(-5.0, team, sigma); - ScratchVecView work(team.team_scratch(0), std::max(row, column)); + ScratchVecView work(team.team_scratch(1), row); fill(-5.0, team, work); // sigma_mat_inv - ScratchMatView temp_matrix(team.team_scratch(0), column, row); + ScratchMatView temp_matrix(team.team_scratch(1), column, row); fill(0, team, temp_matrix); - ScratchMatView sigmaInvUtMul(team.team_scratch(0), column, row); + ScratchMatView sigmaInvUtMul(team.team_scratch(1), column, row); fill(0, team, sigmaInvUtMul); // for serial run - if (league_rank == 0) { + if (team.league_rank() == 0) { KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, sigma, Vt, work); find_inverse_each(team, sigma); @@ -498,337 +503,326 @@ void solve_matrix_serial_svd(member_type team, int league_rank, KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, sigmaInvUtMul, rhs_values, 0.0, solution_vector); } +} +KOKKOS_INLINE_FUNCTION +void solve_weighted_matrix_serial_qr(const member_type& team, + const int league_rank, + ScratchVecView weight, + ScratchMatView& matrix, + ScratchVecView& rhs_values) +{ - KOKKOS_INLINE_FUNCTION - void solve_weighted_matrix_serial_qr( - member_type team, int league_rank, ScratchVecView& weight, - ScratchMatView& matrix, ScratchVecView& rhs_values) - { - - int row = matrix.extent(0); + int row = matrix.extent(0); - int column = matrix.extent(1); + int column = matrix.extent(1); - int weight_size = weight.size(); + int weight_size = weight.size(); - find_sq_root_each(team, weight); + find_sq_root_each(team, weight); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { - eval_row_scaling(team, league_rank, weight, matrix); - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), + [=](int i) { eval_row_scaling(team, weight, matrix); }); - // initilize tau array - ScratchVecView tau(team.team_scratch(0), column); - fill(0.0, team, tau); + // initilize tau array + ScratchVecView tau(team.team_scratch(1), column); + fill(0.0, team, tau); - // initialize work array - ScratchVecView work(team.team_scratch(0), column); - fill(0.0, team, work); + // initialize work array + ScratchVecView work(team.team_scratch(1), column); + fill(0.0, team, work); - // for serial run - if (league_rank == 0) { + // for serial run + if (league_rank == 0) { - // compute QR factorization of matrix and stores the R in A and - // tau is used to recover Q - KokkosBatched::SerialQR::invoke( - matrix, tau, work); + // compute QR factorization of matrix and stores the R in A and + // tau is used to recover Q + KokkosBatched::SerialQR::invoke(matrix, + tau, work); - // b = Q^{T}b + // b = Q^{T}b - KokkosBatched::SerialApplyQ< - KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, - KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values, - work); + KokkosBatched::SerialApplyQ< + KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, + KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values, + work); - // b = R^{-1}b - KokkosBatched::SerialTrsv< - KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Diag::NonUnit, - KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values); - } + // b = R^{-1}b + KokkosBatched::SerialTrsv< + KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Diag::NonUnit, + KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values); } +} - /** - * @brief Maps the data from source mesh to target mesh - * - * @param source_values Scalar array view of source field values - * @param source_coordinates Scalar array view of the coordinates of control - * points of source field - * @param target_coordinates Scalar array view of the coordinates of control - * points of target field - * @param support The object that enpasulates support info - * @param dim The dimension of the simulations - * @param degree The degree of the interpolation order - * @param rbf_func The radial basis function choice - * @param[in/out] Scalar array view of the interpolated field in target mesh - * - * - */ - template - void mls_interpolation(RealConstDefaultScalarArrayView source_values, - RealConstDefaultScalarArrayView source_coordinates, - RealConstDefaultScalarArrayView target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func, - RealDefaultScalarArrayView approx_target_values) - { - PCMS_FUNCTION_TIMER; - static_assert(std::is_invocable_r_v, - "rbf_func, takes radius and cutoff, returns weight"); - static_assert(!std::is_pointer_v, - "function pointer will fail in GPU execution context"); - - int nsources = source_coordinates.size() / dim; - - OMEGA_H_CHECK_PRINTF( - source_values.size() == nsources, - "[ERROR] The size of the source values and source coordinates is not " - "same. " - "The current sizes are :\n" - "source_values_size = %d, source_coordinates_size = %d\n", - source_values.size(), nsources); - - const auto ntargets = target_coordinates.size() / dim; - - OMEGA_H_CHECK_PRINTF(approx_target_values.size() == ntargets, - "[ERROR] The size of the approx target values and the " - "number of targets is " - "not same. The current numbers are :\n" - "approx_target_values = %d, ntargets = %d\n", - approx_target_values.size(), ntargets); - - IntHostMatView host_slice_length( - "stores slice length of polynomial basis in host", degree, dim); - - Kokkos::deep_copy(host_slice_length, 0); - - calculate_basis_slice_lengths(host_slice_length); - - auto basis_size = calculate_basis_vector_size(host_slice_length); - - IntDeviceMatView slice_length( - "stores slice length of polynomial basis in device", degree, dim); - - auto slice_length_hd = Kokkos::create_mirror_view(slice_length); - Kokkos::deep_copy(slice_length_hd, host_slice_length); - Kokkos::deep_copy(slice_length, slice_length_hd); - - int shared_size = - calculate_scratch_shared_size(support, ntargets, basis_size, dim); - - team_policy tp(ntargets, Kokkos::AUTO); - - int scratch_size = tp.scratch_size_max(0); - printf("Scratch Size = %d\n", scratch_size); - printf("Shared Size = %d\n", shared_size); - PCMS_ALWAYS_ASSERT(scratch_size > shared_size); - - printf("does it fail or below\n"); - // calculates the interpolated values - Kokkos::parallel_for( - "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), - KOKKOS_LAMBDA(const member_type& team) { - int league_rank = team.league_rank(); - int start_ptr = support.supports_ptr[league_rank]; - int end_ptr = support.supports_ptr[league_rank + 1]; - int nsupports = end_ptr - start_ptr; - - // local_source_point stores the coordinates of source supports of a - // given target - ScratchMatView local_source_points(team.team_scratch(0), nsupports, - dim); - - // rbf function values of source supports Phi(n,n) - ScratchVecView phi_vector(team.team_scratch(0), nsupports); - - // vondermonde matrix P from the vectors of basis vector of supports - ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - basis_size); - // stores known vector (b) - ScratchVecView support_values(team.team_scratch(0), nsupports); - - // basis of target - ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); - - // Initialize the scratch matrices and vectors - fill(0.0, team, local_source_points); - fill(0.0, team, vandermonde_matrix); - fill(0.0, team, phi_vector); - fill(0.0, team, support_values); - fill(0.0, team, target_basis_vector); - - // storing the coords of local supports - int count = -1; - for (int j = start_ptr; j < end_ptr; ++j) { - count++; - auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - if (dim == 3) { - local_source_points(count, 2) = source_coordinates[index * dim + 2]; - } +/** + * @brief Maps the data from source mesh to target mesh + * + * @param source_values Scalar array view of source field values + * @param source_coordinates Scalar array view of the coordinates of control + * points of source field + * @param target_coordinates Scalar array view of the coordinates of control + * points of target field + * @param support The object that enpasulates support info + * @param dim The dimension of the simulations + * @param degree The degree of the interpolation order + * @param rbf_func The radial basis function choice + * @param[in/out] Scalar array view of the interpolated field in target mesh + * + * + */ +template +void mls_interpolation(RealConstDefaultScalarArrayView source_values, + RealConstDefaultScalarArrayView source_coordinates, + RealConstDefaultScalarArrayView target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, Func rbf_func, + RealDefaultScalarArrayView approx_target_values) +{ + PCMS_FUNCTION_TIMER; + static_assert(std::is_invocable_r_v, + "rbf_func, takes radius and cutoff, returns weight"); + static_assert(!std::is_pointer_v, + "function pointer will fail in GPU execution context"); + + int nsources = source_coordinates.size() / dim; + + OMEGA_H_CHECK_PRINTF( + source_values.size() == nsources, + "[ERROR] The size of the source values and source coordinates is not " + "same. " + "The current sizes are :\n" + "source_values_size = %d, source_coordinates_size = %d\n", + source_values.size(), nsources); + + const auto ntargets = target_coordinates.size() / dim; + + OMEGA_H_CHECK_PRINTF(approx_target_values.size() == ntargets, + "[ERROR] The size of the approx target values and the " + "number of targets is " + "not same. The current numbers are :\n" + "approx_target_values = %d, ntargets = %d\n", + approx_target_values.size(), ntargets); + + IntHostMatView host_slice_length( + "stores slice length of polynomial basis in host", degree, dim); + + Kokkos::deep_copy(host_slice_length, 0); + + calculate_basis_slice_lengths(host_slice_length); + + auto basis_size = calculate_basis_vector_size(host_slice_length); + + IntDeviceMatView slice_length( + "stores slice length of polynomial basis in device", degree, dim); + + auto slice_length_hd = Kokkos::create_mirror_view(slice_length); + Kokkos::deep_copy(slice_length_hd, host_slice_length); + Kokkos::deep_copy(slice_length, slice_length_hd); + + int shared_size = + calculate_scratch_shared_size(support, ntargets, basis_size, dim); + + team_policy tp(ntargets, Kokkos::AUTO); + + int scratch_size = tp.scratch_size_max(1); + printf("Scratch Size = %d\n", scratch_size); + printf("Shared Size = %d\n", shared_size); + PCMS_ALWAYS_ASSERT(scratch_size > shared_size); + + printf("does it fail or below\n"); + // calculates the interpolated values + Kokkos::parallel_for( + "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(shared_size)), + KOKKOS_LAMBDA(const member_type& team) { + int league_rank = team.league_rank(); + int start_ptr = support.supports_ptr[league_rank]; + int end_ptr = support.supports_ptr[league_rank + 1]; + int nsupports = end_ptr - start_ptr; + + // local_source_point stores the coordinates of source supports of a + // given target + ScratchMatView local_source_points(team.team_scratch(1), nsupports, dim); + + // rbf function values of source supports Phi(n,n) + ScratchVecView phi_vector(team.team_scratch(1), nsupports); + + // vondermonde matrix P from the vectors of basis vector of supports + ScratchMatView vandermonde_matrix(team.team_scratch(1), nsupports, + basis_size); + // stores known vector (b) + ScratchVecView support_values(team.team_scratch(1), nsupports); + + // basis of target + ScratchVecView target_basis_vector(team.team_scratch(1), basis_size); + + // solution coefficients (solution vector) + // + ScratchVecView solution_coefficients(team.team_scratch(1), basis_size); + // Initialize the scratch matrices and vectors + fill(0.0, team, local_source_points); + fill(0.0, team, vandermonde_matrix); + fill(0.0, team, phi_vector); + fill(0.0, team, support_values); + fill(0.0, team, target_basis_vector); + fill(0.0, team, solution_coefficients); + + // storing the coords of local supports + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + if (dim == 3) { + local_source_points(count, 2) = source_coordinates[index * dim + 2]; } + } - // evaluates the basis vector of a given target point - Coord target_point; - target_point.x = target_coordinates[league_rank * dim]; - target_point.y = target_coordinates[league_rank * dim + 1]; + // evaluates the basis vector of a given target point + Coord target_point; + target_point.x = target_coordinates[league_rank * dim]; + target_point.y = target_coordinates[league_rank * dim + 1]; - if (dim == 3) { - target_point.z = target_coordinates[league_rank * dim + 2]; - } + if (dim == 3) { + target_point.z = target_coordinates[league_rank * dim + 2]; + } - /** phi(nsupports) is the array of rbf functions evaluated at the - * source supports In the actual implementation, Phi(nsupports, - * nsupports) is the diagonal matrix & each diagonal element is the phi - * evaluated at each source points - * - * step 1: evaluate the phi vector with the original dimension - */ - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - compute_phi_vector(target_point, local_source_points, j, - support.radii2[league_rank], rbf_func, - phi_vector); - }); - - team.team_barrier(); - - /** support_values(nsupports) (or known rhs vector b) is the vector of - * the quantity that we want interpolate - * - * - * step 4: find local supports function values - */ - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { - support_values(j) = - source_values[support.supports_idx[start_ptr + j]]; - OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)), - "ERROR: NaN found: at support %d\n", j); - }); - - team.team_barrier(); - - /** - * - * the local_source_points is of the type ScratchMatView with - * coordinates information; row is number of local supports - * & column is dim - * - * step 2: normalize local source supports and target point - */ - - // normalize_supports(team, target_point, local_source_points); - - // team.team_barrier(); - /** - * - * this can evaluate monomial basis vector for any degree of polynomial - * step 3: call basis vector evaluation function (eval_basis_vector); - */ - eval_basis_vector(slice_length, target_point, target_basis_vector); - - /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is - * created with the basis vector of source supports stacking on top of - * each other - * - * step 4: create vandermonde matrix - */ - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - create_vandermonde_matrix(local_source_points, j, slice_length, - vandermonde_matrix); - }); - - team.team_barrier(); - - OMEGA_H_CHECK_PRINTF( - support.radii2[league_rank] > 0, - "ERROR: radius2 has to be positive but found to be %.16f\n", - support.radii2[league_rank]); - - // convert normal equation to Ax = b where A is a square matrix - // auto result = convert_normal_equation(vandermonde_matrix, - // phi_vector, - // support_values, team); - - // team.team_barrier(); - - // It stores the solution in rhs vector itself - // solve_matrix(result.square_matrix, result.transformed_rhs, - // team); - - // solve_weighted_matrix_serial_qr(team, league_rank, phi_vector, - // vandermonde_matrix, - // support_values); - - solve_matrix_serial_svd(team, league_rank, phi_vector, - vandermonde_matrix, support_values); - team.team_barrier(); - - double target_value = KokkosBlas::Experimental::dot( - team, solution_coefficients, target_basis_vector); - - if (team.team_rank() == 0) { - OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", - league_rank); - approx_target_values[league_rank] = target_value; - } - }); - } + /** phi(nsupports) is the array of rbf functions evaluated at the + * source supports In the actual implementation, Phi(nsupports, + * nsupports) is the diagonal matrix & each diagonal element is the phi + * evaluated at each source points + * + * step 1: evaluate the phi vector with the original dimension + */ + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + compute_phi_vector(target_point, local_source_points, j, + support.radii2[league_rank], rbf_func, phi_vector); + }); + + team.team_barrier(); + + /** support_values(nsupports) (or known rhs vector b) is the vector of + * the quantity that we want interpolate + * + * + * step 4: find local supports function values + */ + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { + support_values(j) = + source_values[support.supports_idx[start_ptr + j]]; + OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)), + "ERROR: NaN found: at support %d\n", j); + }); + + team.team_barrier(); + + /** + * + * the local_source_points is of the type ScratchMatView with + * coordinates information; row is number of local supports + * & column is dim + * + * step 2: normalize local source supports and target point + */ + + normalize_supports(team, target_point, local_source_points); + + team.team_barrier(); + /** + * + * this can evaluate monomial basis vector for any degree of polynomial + * step 3: call basis vector evaluation function (eval_basis_vector); + */ + eval_basis_vector(slice_length, target_point, target_basis_vector); + + /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is + * created with the basis vector of source supports stacking on top of + * each other + * + * step 4: create vandermonde matrix + */ + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + create_vandermonde_matrix(local_source_points, j, slice_length, + vandermonde_matrix); + }); + + team.team_barrier(); + + OMEGA_H_CHECK_PRINTF( + support.radii2[league_rank] > 0, + "ERROR: radius2 has to be positive but found to be %.16f\n", + support.radii2[league_rank]); + + solve_matrix_serial_svd(team, phi_vector, support_values, + vandermonde_matrix, solution_coefficients); + team.team_barrier(); + + double target_value = KokkosBlas::Experimental::dot( + team, solution_coefficients, target_basis_vector); + + if (team.team_rank() == 0) { + OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", + league_rank); + approx_target_values[league_rank] = target_value; + } + }); +} - /** - * @brief \overload - * Maps the data from source mesh to target mesh - * - * @param source_values Read array of Source field values - * @param source_coordinates Read array of the coordinates of control points - * of source field - * @param target_coordinates Read array of the coordinates of control points - * of target field - * @param support The object that enpasulates support info - * @param dim The dimension of the simulations - * @param degree The degree of the interpolation order - * @param rbf_func The radial basis function choice - * @return Write array of the interpolated field in target mesh - * - * - */ - template - Write mls_interpolation( - const Reals source_values, const Reals source_coordinates, - const Reals target_coordinates, const SupportResults& support, - const LO& dim, const LO& degree, Func rbf_func) - { - const auto nsources = source_coordinates.size() / dim; - - const auto ntargets = target_coordinates.size() / dim; - - RealConstDefaultScalarArrayView source_values_array_view( - source_values.data(), source_values.size()); - - RealConstDefaultScalarArrayView source_coordinates_array_view( - source_coordinates.data(), source_coordinates.size()); - - RealConstDefaultScalarArrayView target_coordinates_array_view( - target_coordinates.data(), target_coordinates.size()); - - RealDefaultScalarArrayView radii2_array_view(support.radii2.data(), - support.radii2.size()); - - Write interpolated_values(ntargets, 0, "approximated target values"); - - RealDefaultScalarArrayView interpolated_values_array_view( - interpolated_values.data(), interpolated_values.size()); - - mls_interpolation(source_values_array_view, source_coordinates_array_view, - target_coordinates_array_view, support, dim, degree, - rbf_func, interpolated_values_array_view); - - return interpolated_values; - } +/** + * @brief \overload + * Maps the data from source mesh to target mesh + * + * @param source_values Read array of Source field values + * @param source_coordinates Read array of the coordinates of control points + * of source field + * @param target_coordinates Read array of the coordinates of control points + * of target field + * @param support The object that enpasulates support info + * @param dim The dimension of the simulations + * @param degree The degree of the interpolation order + * @param rbf_func The radial basis function choice + * @return Write array of the interpolated field in target mesh + * + * + */ +template +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, Func rbf_func) +{ + const auto nsources = source_coordinates.size() / dim; + + const auto ntargets = target_coordinates.size() / dim; + + RealConstDefaultScalarArrayView source_values_array_view( + source_values.data(), source_values.size()); + + RealConstDefaultScalarArrayView source_coordinates_array_view( + source_coordinates.data(), source_coordinates.size()); + + RealConstDefaultScalarArrayView target_coordinates_array_view( + target_coordinates.data(), target_coordinates.size()); + + RealDefaultScalarArrayView radii2_array_view(support.radii2.data(), + support.radii2.size()); + + Write interpolated_values(ntargets, 0, "approximated target values"); + + RealDefaultScalarArrayView interpolated_values_array_view( + interpolated_values.data(), interpolated_values.size()); + + mls_interpolation(source_values_array_view, source_coordinates_array_view, + target_coordinates_array_view, support, dim, degree, + rbf_func, interpolated_values_array_view); + + return interpolated_values; +} } // namespace detail } // namespace pcms diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp index cd0fdc1f..202f7ec8 100644 --- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp +++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp @@ -87,7 +87,7 @@ ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix) fill(0.0, team, transMatrix); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { for (int j = 0; j < column; ++j) { - transMatrix(j, i) = matrix(j, i); + transMatrix(i, j) = matrix(j, i); } }); return transMatrix; @@ -104,8 +104,8 @@ void fill_diagonal(member_type team, const ScratchVecView& diagonal_entries, } KOKKOS_INLINE_FUNCTION -void eval_row_scaling(member_type team, const ScratchVecView& diagonal_entries, - ScratchMatView& matrix) +void eval_row_scaling(member_type team, ScratchVecView diagonal_entries, + ScratchMatView matrix) { int row = matrix.extent(0); @@ -122,27 +122,49 @@ void eval_row_scaling(member_type team, const ScratchVecView& diagonal_entries, for (int j = 0; j < column; ++j) { matrix(i, j) *= diagonal_entries(i); // scales each row of a matrix with a corresponding - // value in a digonal entries - }); + } // value in a digonal entries + }); +} + +KOKKOS_INLINE_FUNCTION +void eval_rhs_scaling(member_type team, ScratchVecView diagonal_entries, + ScratchVecView rhs_values) +{ + + int weight_size = diagonal_entries.size(); + int rhs_size = rhs_values.size(); + OMEGA_H_CHECK_PRINTF( + weight_size == rhs_size, + "[ERROR]: for row scaling the size of diagonal entries vector should be " + "equal or less than the row of the matrix which is to be scaled\n" + "size of weight vector = %d, size of a rhs vector = %d\n", + weight_size, rhs_size); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, weight_size), [=](int i) { + rhs_values(i) *= diagonal_entries( + i); // scales each element of a rhs with a corresponding weights + }); } KOKKOS_INLINE_FUNCTION -void scale_and_adjust(member_type team, const ScratchVecView& diagonal_entries, ScratchMatView& matrixToScale, ScratchMatView& adjustedMatrix){ - size_t rowA = matrixToScale.extent(0); - size_t columnA = matrixToScale.extent(1); - - size_t rowB = adjustedMatrix.extent(0); - size_t columnB = adjustedMatrix.extent(0); - - OMEGA_CHECK(columnB == rowA) - eval_row_scaling(team, diagonal_entries, matrixtoScale); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) { - for (int j = 0; j < columnB; == j) { - adjustedMatrix(i, j) = matrixtoScale(i, j); - } - }); +void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries, + ScratchMatView& matrixToScale, + ScratchMatView& adjustedMatrix) +{ + size_t rowA = matrixToScale.extent(0); + size_t colA = matrixToScale.extent(1); + + size_t rowB = adjustedMatrix.extent(0); + size_t colB = adjustedMatrix.extent(1); + + OMEGA_H_CHECK(colB == rowA); + eval_row_scaling(team, diagonal_entries, matrixToScale); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) { + for (int j = 0; j < colB; ++j) { + adjustedMatrix(i, j) = matrixToScale(i, j); + } + }); } -} // end namespace pcms -} // end namespace detail +} // namespace detail +} // namespace pcms #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 4ea80c35..188dc7ec 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -114,6 +114,7 @@ if(Catch2_FOUND) test_normalisation.cpp test_spr_meshfields.cpp test_serial_qr.cpp + test_svd_serial.cpp ) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) From 646d89c9afa6c7ece908950f512d89b573e04340 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 11 Mar 2025 16:47:44 -0400 Subject: [PATCH 21/73] add test for svd --- test/test_svd_serial.cpp | 290 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 290 insertions(+) create mode 100644 test/test_svd_serial.cpp diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp new file mode 100644 index 00000000..b86a9e4a --- /dev/null +++ b/test/test_svd_serial.cpp @@ -0,0 +1,290 @@ +#include +#include +#include +#include +#include +#include +#include "KokkosBatched_SVD_Decl.hpp" +#include "KokkosBatched_SVD_Serial_Impl.hpp" +#include +#include +#include +#include + +using namespace std; +using namespace Omega_h; +using namespace pcms; + +TEST_CASE("test_serial_svd") +{ + + constexpr int row = 3; + constexpr int column = 3; + + double tolerance = 1e-4; + double A_data[row][column] = { + {4.0, 2.0, 3.0}, {3.0, 5.0, 1.0}, {2.0, 3.0, 6.0}}; + + double rhs_data[row] = {1.28571, 1.42857, 3.1428}; + + double expected_solution[column] = {-0.142857, 0.285714, + 0.428571}; // Approximate values + + SECTION("test_svd_factorization") + { + Kokkos::View result("result", row, column); + team_policy tp(1, Kokkos::AUTO); + Kokkos::parallel_for( + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)), + KOKKOS_LAMBDA(const member_type& team) { + ScratchMatView A(team.team_scratch(1), row, column); + ScratchMatView U(team.team_scratch(1), row, row); + ScratchMatView Vt(team.team_scratch(1), column, column); + ScratchVecView sigma(team.team_scratch(1), column); + ScratchVecView work(team.team_scratch(1), row); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + A(i, j) = A_data[i][j]; + } + }); + + if (team.league_rank() == 0) { + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, + sigma, Vt, work); + } + team.team_barrier(); + ScratchMatView sigma_mat(team.team_scratch(1), row, column); + ScratchMatView Usigma(team.team_scratch(1), row, column); + detail::fill(0.0, team, Usigma); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + sigma_mat(i, j) = (i == j) ? sigma(i) : 0.0; + } + }); + team.team_barrier(); + + ScratchMatView reconstructedA(team.team_scratch(1), row, column); + if (team.league_rank() == 0) { + KokkosBatched::SerialGemm< + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, U, sigma_mat, + 0.0, Usigma); + + KokkosBatched::SerialGemm< + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Usigma, Vt, 0.0, + reconstructedA); + } + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + result(i, j) = reconstructedA(i, j); + } + }); + }); + auto host_result = Kokkos::create_mirror_view(result); + Kokkos::deep_copy(host_result, result); + + for (int i = 0; i < row; ++i) { + for (int j = 0; j < column; ++j) { + CHECK_THAT(host_result(i, j), + Catch::Matchers::WithinAbs(A_data[i][j], tolerance)); + } + } + } // end section + + SECTION("test_transpose_function") + { + + Kokkos::View result("result", row, row); + Kokkos::View transpose_expected("result", row, row); + Kokkos::deep_copy(result, 0.0); + Kokkos::deep_copy(transpose_expected, 0.0); + team_policy tp(1, Kokkos::AUTO); + Kokkos::parallel_for( + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)), + KOKKOS_LAMBDA(const member_type& team) { + ScratchMatView A(team.team_scratch(1), row, column); + ScratchMatView U(team.team_scratch(1), row, row); + ScratchMatView Vt(team.team_scratch(1), column, column); + ScratchVecView sigma(team.team_scratch(1), column); + ScratchVecView work(team.team_scratch(1), row); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + A(i, j) = A_data[i][j]; + } + }); + + if (team.league_rank() == 0) { + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, + sigma, Vt, work); + } + team.team_barrier(); + // detail::fill(0.0, team, sigma_inv_expected); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < row; ++j) { + transpose_expected(i, j) = U(j, i); + } + }); + auto transposedU = detail::find_transpose(team, U); + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < row; ++j) { + result(i, j) = transposedU(i, j); + } + }); + }); + auto host_result = Kokkos::create_mirror_view(result); + auto host_expected = Kokkos::create_mirror_view(transpose_expected); + Kokkos::deep_copy(host_result, result); + Kokkos::deep_copy(host_expected, transpose_expected); + + for (int i = 0; i < row; ++i) { + for (int j = 0; j < row; ++j) { + CHECK_THAT(host_result(i, j), + Catch::Matchers::WithinAbs(host_expected(i, j), tolerance)); + } + } + + } // end section + + SECTION("test_row_scaling_function") + { + double diagonal_entries[row] = {2.0, 0.5, -1.0}; + double expected_result[row][column] = { + {8.0, 4.0, 6.0}, {1.5, 2.5, 0.5}, {-2.0, -3.0, -6.0}}; + + Kokkos::View result("result", row, column); + Kokkos::deep_copy(result, 0.0); + team_policy tp(1, Kokkos::AUTO); + Kokkos::parallel_for( + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)), + KOKKOS_LAMBDA(const member_type& team) { + ScratchMatView A(team.team_scratch(1), row, column); + ScratchVecView weight(team.team_scratch(1), row); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + A(i, j) = A_data[i][j]; + } + weight(i) = diagonal_entries[i]; + }); + + team.team_barrier(); + + detail::eval_row_scaling(team, weight, A); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + result(i, j) = A(i, j); + } + }); + }); + auto host_result = Kokkos::create_mirror_view(result); + Kokkos::deep_copy(host_result, result); + + for (int i = 0; i < row; ++i) { + for (int j = 0; j < row; ++j) { + CHECK_THAT(host_result(i, j), Catch::Matchers::WithinAbs( + expected_result[i][j], tolerance)); + } + } + + } // end section + + SECTION("test_scale_adjust_function") + { + constexpr int rowA = 6; // Matrix to scale (6x6) + constexpr int colA = 6; + constexpr int rowB = 3; // Adjusted matrix (3x6) + constexpr int colB = 6; + + double A_data[rowA][colA] = { + {4.0, 2.0, 3.0, 1.0, 0.0, 5.0}, {3.0, 5.0, 1.0, 4.0, 2.0, 6.0}, + {2.0, 3.0, 6.0, 5.0, 1.0, 7.0}, {7.0, 8.0, 9.0, 3.0, 2.0, 1.0}, + {1.0, 3.0, 2.0, 6.0, 4.0, 5.0}, {5.0, 7.0, 3.0, 8.0, 9.0, 6.0}}; + + double diagonal_entries_data[rowB] = {2.0, 0.5, -1.0}; + + double expected_result[rowB][colB] = {{8.0, 4.0, 6.0, 2.0, 0.0, 10.0}, + {1.5, 2.5, 0.5, 2.0, 1.0, 3.0}, + {-2.0, -3.0, -6.0, -5.0, -1.0, -7.0}}; + + Kokkos::View result("result", rowB, colB); + Kokkos::deep_copy(result, 0.0); + team_policy tp(1, Kokkos::AUTO); + Kokkos::parallel_for( + tp.set_scratch_size(1, Kokkos::PerTeam(500)), + KOKKOS_LAMBDA(const member_type& team) { + ScratchMatView A(team.team_scratch(1), rowA, colA); + ScratchMatView result_matrix(team.team_scratch(1), rowB, colB); + ScratchVecView weight(team.team_scratch(1), rowB); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowA), [=](int i) { + for (int j = 0; j < colA; ++j) { + A(i, j) = A_data[i][j]; + } + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) { + weight(i) = diagonal_entries_data[i]; + }); + + team.team_barrier(); + + detail::scale_and_adjust(team, weight, A, result_matrix); + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) { + for (int j = 0; j < colB; ++j) { + result(i, j) = result_matrix(i, j); + } + }); + }); + auto host_result = Kokkos::create_mirror_view(result); + Kokkos::deep_copy(host_result, result); + + for (int i = 0; i < row; ++i) { + for (int j = 0; j < row; ++j) { + CHECK_THAT(host_result(i, j), Catch::Matchers::WithinAbs( + expected_result[i][j], tolerance)); + } + } + + } // end section + + SECTION("test_svd_solver") + { + Kokkos::View result("result", column); + team_policy tp(1, Kokkos::AUTO); + Kokkos::parallel_for( + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(800)), + KOKKOS_LAMBDA(const member_type& team) { + ScratchMatView A(team.team_scratch(1), row, column); + ScratchVecView rhs(team.team_scratch(1), row); + ScratchVecView x(team.team_scratch(1), column); + detail::fill(0.0, team, x); + + ScratchVecView weight(team.team_scratch(1), row); + detail::fill(1.0, team, weight); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + A(i, j) = A_data[i][j]; + } + rhs(i) = rhs_data[i]; + }); + + detail::solve_matrix_serial_svd(team, weight, rhs, A, x); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, column), + [=](int i) { result(i) = x(i); }); + }); + auto host_result = Kokkos::create_mirror_view(result); + Kokkos::deep_copy(host_result, result); + + for (int i = 0; i < column; ++i) { + CHECK_THAT(host_result(i), + Catch::Matchers::WithinAbs(expected_solution[i], tolerance)); + } + } // end section +} From 3496b63b37a33568ddb46ce0dfff6acd212330fd Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 11 Mar 2025 16:49:47 -0400 Subject: [PATCH 22/73] add logger --- .../interpolator/pcms_interpolator_logger.hpp | 126 ++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 src/pcms/interpolator/pcms_interpolator_logger.hpp diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp new file mode 100644 index 00000000..c0bbf66a --- /dev/null +++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp @@ -0,0 +1,126 @@ +#ifndef PCMS_INTERPOLATOR_LOGGER_HPP +#define PCMS_INTERPOLATOR_LOGGER_HPP + +#include "pcms_interpolator_aliases.hpp" + +namespace pcms +{ +enum class LogLevel +{ + INFO, + WARNING, + ERROR, + DEBUG +}; + +class Logger +{ +private: + size_t selected_league_rank_; + +public: + KOKKOS_INLINE_FUNCTION + explicit Logger(size_t selected_league_rank) + : selected_league_rank_(selected_league_rank) + { + } + + // log level info + template + KOKKOS_INLINE_FUNCTION void log(const member_type& team, const LogLevel level, + const char* fmt, Args... args) + { + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + printf("[%s] (League %d) ", logLevelToString(level), + team.league_rank()); + printf(fmt, args...); + printf("\n"); + }); + } + } + + KOKKOS_INLINE_FUNCTION + void logStruct(const member_type& team, const LogLevel level, const Coord& p, + const char* name) + { + + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + printf("[%s] (League %d) %s: ", logLevelToString(level), + team.league_rank(), name); + printf("(%12.6f, %12.6f)", p.x, p.y); + printf("\n"); + }); + } + } + // log scratch vector + KOKKOS_INLINE_FUNCTION + void logVector(const member_type& team, const LogLevel level, + const ScratchVecView& vector, const char* name) const + { + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + printf("[%s] (League %d) %s: ", logLevelToString(level), + team.league_rank(), name); + for (int i = 0; i < vector.size(); ++i) { + printf("%12.6f", vector(i)); + } + printf("\n"); + }); + } + } + + // log scratch vector + KOKKOS_INLINE_FUNCTION + void logMatrix(const member_type& team, const LogLevel level, + const ScratchMatView& matrix, const char* name) const + { + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); + for (int i = 0; i < matrix.extent(0); ++i) { + for (int j = 0; j < matrix.extent(1); ++j) { + printf("%12.6f", matrix(i, j)); + } + printf("\n"); + } + + printf("\n"); + }); + } + } + + // log scalar + KOKKOS_INLINE_FUNCTION + void logScalar(const member_type& team, const LogLevel level, + const double value, const char* name) const + { + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + printf("[%s] (League %d) %s: ", logLevelToString(level), + team.league_rank(), name); + printf("%12.6f", value); + printf("\n"); + }); + } + } + +private: + KOKKOS_INLINE_FUNCTION + const char* logLevelToString(LogLevel loglevel) const + { + switch (loglevel) { + case LogLevel::INFO: return "INFO"; + + case LogLevel::WARNING: return "WARNING"; + + case LogLevel::ERROR: return "ERROR"; + + case LogLevel::DEBUG: return "DEBUG"; + } + } +}; +} // end namespace pcms +#endif From d5a331e9de84a7b2effa84dd5bf460dbe7b09313 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 11 Mar 2025 16:51:31 -0400 Subject: [PATCH 23/73] add logger file in CMakeList --- src/pcms/interpolator/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 10dd51a1..7d1d0178 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -9,6 +9,7 @@ set(PCMS_FIELD_TRANSFER_HEADERS multidimarray.hpp mls_interpolation.hpp pcms_interpolator_view_utils.hpp + pcms_interpolator_logger.hpp ) set(PCMS_FIELD_TRANSFER_SOURCES From 861d8b35b91f1e83cf5f9641109c359b599e064e Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 14 Mar 2025 14:58:08 -0400 Subject: [PATCH 24/73] svd implementation but broken --- src/pcms/interpolator/mls_interpolation.cpp | 14 +- src/pcms/interpolator/mls_interpolation.hpp | 3 +- .../interpolator/mls_interpolation_impl.hpp | 203 +++++++++++------- .../interpolator/pcms_interpolator_logger.hpp | 3 +- .../pcms_interpolator_view_utils.hpp | 9 + 5 files changed, 149 insertions(+), 83 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index d19e9fc6..ce472b34 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -24,7 +24,7 @@ struct RBF_GAUSSIAN // 'a' is a spreading factor/decay factor // the value of 'a' is higher if the data is localized // the value of 'a' is smaller if the data is farther - int a = 20; + int a = 2; double r = sqrt(r_sq); double rho = sqrt(rho_sq); @@ -101,6 +101,12 @@ struct RBF_CONST } }; +struct NoOp +{ + OMEGA_H_INLINE + double operator()(double, double) const { return 1.0; } +}; + Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, @@ -130,6 +136,12 @@ Write mls_interpolation(const Reals source_values, source_values, source_coordinates, target_coordinates, support, dim, degree, RBF_CONST{}); break; + + case RadialBasisFunction::NO_OP: + interpolated_values = detail::mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, NoOp{}); + break; } return interpolated_values; diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp index 17b085aa..8e8218aa 100644 --- a/src/pcms/interpolator/mls_interpolation.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -11,7 +11,8 @@ enum class RadialBasisFunction : LO { RBF_GAUSSIAN = 0, RBF_C4, - RBF_CONST + RBF_CONST, + NO_OP }; diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index f4eb6c26..16ee43d3 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -18,11 +18,12 @@ #include "../assert.h" #include "../profile.h" #include "pcms_interpolator_view_utils.hpp" +#include "pcms_interpolator_logger.hpp" -#include //KokkosBlas::ApplyQ -#include //KokkosBlas::QR -#include //KokkosBlas::Trsv -#include //KokkosBlas::gemv +// #include //KokkosBlas::ApplyQ +// #include //KokkosBlas::QR +// #include //KokkosBlas::Trsv +#include //KokkosBlas::gemv #include "KokkosBatched_SVD_Decl.hpp" #include "KokkosBatched_SVD_Serial_Impl.hpp" #include @@ -428,9 +429,9 @@ void solve_matrix(const ScratchMatView& square_matrix, } KOKKOS_INLINE_FUNCTION -void solve_matrix_serial_svd(member_type team, const ScratchVecView& weight, - ScratchVecView rhs_values, ScratchMatView matrix, - ScratchVecView solution_vector) +void solve_matrix_svd(member_type team, const ScratchVecView& weight, + ScratchVecView rhs_values, ScratchMatView matrix, + ScratchVecView solution_vector, double lambda = 0) { int row = matrix.extent(0); @@ -439,6 +440,8 @@ void solve_matrix_serial_svd(member_type team, const ScratchVecView& weight, int weight_size = weight.size(); + printf("inside svd solver \n"); + Logger logger(21); OMEGA_H_CHECK_PRINTF( weight_size == row, "the size of the weight vector should be equal to the row of the matrix\n" @@ -447,15 +450,17 @@ void solve_matrix_serial_svd(member_type team, const ScratchVecView& weight, // find_sq_root_each(team, weight); eval_row_scaling(team, weight, matrix); + logger.logMatrix(team, LogLevel::DEBUG, matrix, "QA : "); team.team_barrier(); eval_rhs_scaling(team, weight, rhs_values); + logger.logVector(team, LogLevel::DEBUG, rhs_values, "Qb : "); // A = UEV^T // initilize U (orthogonal matrices) array ScratchMatView U(team.team_scratch(1), row, row); fill(-5.0, team, U); - + logger.logMatrix(team, LogLevel::DEBUG, U, "initialised U : "); // initialize Vt ScratchMatView Vt(team.team_scratch(1), column, column); fill(-5.0, team, Vt); @@ -472,87 +477,105 @@ void solve_matrix_serial_svd(member_type team, const ScratchVecView& weight, ScratchMatView temp_matrix(team.team_scratch(1), column, row); fill(0, team, temp_matrix); - ScratchMatView sigmaInvUtMul(team.team_scratch(1), column, row); - fill(0, team, sigmaInvUtMul); + ScratchMatView vsigmaInvUtMul(team.team_scratch(1), column, row); + fill(0, team, vsigmaInvUtMul); // for serial run - if (team.league_rank() == 0) { + // if (team.league_rank() == 0) { + if (team.team_rank() == 0) { KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, - sigma, Vt, work); - find_inverse_each(team, sigma); - auto Ut = find_transpose(team, U); - scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T + sigma, Vt, work, 1e-6); - // performing (S^-1 U^T) - // KokkosBatched::SerialGemm< - // KokkosBatched::Trans::NoTranspose, - // KokkosBatched::Trans::Transpose, - // KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, temp_matrix, U, - // 0.0, sigmaInvUtMul); - // } + logger.logMatrix(team, LogLevel::DEBUG, U, "U : "); + logger.logVector(team, LogLevel::DEBUG, sigma, "S : "); + logger.logMatrix(team, LogLevel::DEBUG, Vt, "Vt : "); + // A = UEV^T + // find_inverse_each(team, sigma); - // perform V (S^-1 U^T) + calculate_shrinkage_factor(team, lambda, sigma); - KokkosBatched::SerialGemm< - KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0, - sigmaInvUtMul); + logger.logVector(team, LogLevel::DEBUG, sigma, + "S/(S^2 + gamma) (basis_size x nsupports) : "); - KokkosBlas::SerialGemv< - KokkosBlas::Trans::NoTranspose, - KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, sigmaInvUtMul, rhs_values, - 0.0, solution_vector); - } -} -KOKKOS_INLINE_FUNCTION -void solve_weighted_matrix_serial_qr(const member_type& team, - const int league_rank, - ScratchVecView weight, - ScratchMatView& matrix, - ScratchVecView& rhs_values) -{ - - int row = matrix.extent(0); - - int column = matrix.extent(1); + // A = UEV^T - int weight_size = weight.size(); - - find_sq_root_each(team, weight); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), - [=](int i) { eval_row_scaling(team, weight, matrix); }); - - // initilize tau array - ScratchVecView tau(team.team_scratch(1), column); - fill(0.0, team, tau); - - // initialize work array - ScratchVecView work(team.team_scratch(1), column); - fill(0.0, team, work); + auto Ut = find_transpose(team, U); - // for serial run - if (league_rank == 0) { + logger.logMatrix(team, LogLevel::DEBUG, Ut, "Ut (nsupprts x nsupports) : "); - // compute QR factorization of matrix and stores the R in A and - // tau is used to recover Q - KokkosBatched::SerialQR::invoke(matrix, - tau, work); + scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T - // b = Q^{T}b + logger.logMatrix(team, LogLevel::DEBUG, temp_matrix, + "S^-1Ut (basis_size x nsupports) : "); - KokkosBatched::SerialApplyQ< - KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, - KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values, - work); + KokkosBatched::SerialGemm< + KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0, + vsigmaInvUtMul); - // b = R^{-1}b - KokkosBatched::SerialTrsv< - KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Diag::NonUnit, - KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values); + logger.logMatrix(team, LogLevel::DEBUG, vsigmaInvUtMul, + "VS^-1Ut (basis_size x nsupports) : "); + KokkosBlas::SerialGemv< + KokkosBlas::Trans::NoTranspose, + KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, vsigmaInvUtMul, + rhs_values, 0.0, + solution_vector); + logger.logVector(team, LogLevel::DEBUG, solution_vector, + "VS^-1Ut (basis_size x nsupports) : "); } + printf("coming out form svd solver..\n"); } +// KOKKOS_INLINE_FUNCTION +// void solve_weighted_matrix_serial_qr(const member_type& team, +// const int league_rank, +// ScratchVecView weight, +// ScratchMatView& matrix, +// ScratchVecView& rhs_values) +//{ +// +// int row = matrix.extent(0); +// +// int column = matrix.extent(1); +// +// int weight_size = weight.size(); +// +// find_sq_root_each(team, weight); +// +// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), +// [=](int i) { eval_row_scaling(team, weight, matrix); +// }); +// +// // initilize tau array +// ScratchVecView tau(team.team_scratch(1), column); +// fill(0.0, team, tau); +// +// // initialize work array +// ScratchVecView work(team.team_scratch(1), column); +// fill(0.0, team, work); +// +// // for serial run +// if (league_rank == 0) { +// +// // compute QR factorization of matrix and stores the R in A and +// // tau is used to recover Q +// KokkosBatched::SerialQR::invoke(matrix, +// tau, +// work); +// +// // b = Q^{T}b +// +// KokkosBatched::SerialApplyQ< +// KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, +// KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values, +// work); +// +// // b = R^{-1}b +// KokkosBatched::SerialTrsv< +// KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, +// KokkosBatched::Diag::NonUnit, +// KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values); +// } +// } /** * @brief Maps the data from source mesh to target mesh @@ -629,7 +652,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, printf("Shared Size = %d\n", shared_size); PCMS_ALWAYS_ASSERT(scratch_size > shared_size); - printf("does it fail or below\n"); // calculates the interpolated values Kokkos::parallel_for( "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(shared_size)), @@ -638,7 +660,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, int start_ptr = support.supports_ptr[league_rank]; int end_ptr = support.supports_ptr[league_rank + 1]; int nsupports = end_ptr - start_ptr; - + printf("calculating for %d target point\n", league_rank); + Logger logger(21); + // logger.log(team, LogLevel::INFO, "The search starts from here....\n"); // local_source_point stores the coordinates of source supports of a // given target ScratchMatView local_source_points(team.team_scratch(1), nsupports, dim); @@ -666,6 +690,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, fill(0.0, team, target_basis_vector); fill(0.0, team, solution_coefficients); + // logger.log(team, LogLevel::INFO, "The search starts from here....\n"); // storing the coords of local supports int count = -1; for (int j = start_ptr; j < end_ptr; ++j) { @@ -678,7 +703,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, } } - // evaluates the basis vector of a given target point + logger.logMatrix(team, LogLevel::DEBUG, local_source_points, + "The local source points (x, y, z):\n "); + Coord target_point; target_point.x = target_coordinates[league_rank * dim]; target_point.y = target_coordinates[league_rank * dim + 1]; @@ -687,6 +714,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, target_point.z = target_coordinates[league_rank * dim + 2]; } + logger.logStruct(team, LogLevel::DEBUG, target_point, + "Target Point before pivoting :\n"); /** phi(nsupports) is the array of rbf functions evaluated at the * source supports In the actual implementation, Phi(nsupports, * nsupports) is the diagonal matrix & each diagonal element is the phi @@ -701,6 +730,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, support.radii2[league_rank], rbf_func, phi_vector); }); + logger.logVector(team, LogLevel::DEBUG, phi_vector, + "Phi Values: (phi)\n "); team.team_barrier(); /** support_values(nsupports) (or known rhs vector b) is the vector of @@ -718,6 +749,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, }); team.team_barrier(); + logger.logVector(team, LogLevel::DEBUG, support_values, + "Support Values (b):\n "); /** * @@ -729,15 +762,19 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, */ normalize_supports(team, target_point, local_source_points); - + logger.logMatrix(team, LogLevel::DEBUG, local_source_points, + "Local Source points after pivoting"); team.team_barrier(); /** * + * evaluates the basis vector of a given target point Coord target_point; * this can evaluate monomial basis vector for any degree of polynomial * step 3: call basis vector evaluation function (eval_basis_vector); */ eval_basis_vector(slice_length, target_point, target_basis_vector); + logger.logVector(team, LogLevel::DEBUG, target_basis_vector, + "target basis vector after pivot : "); /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is * created with the basis vector of source supports stacking on top of * each other @@ -752,18 +789,24 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); + logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix, + "Vandermonde matrix : "); + OMEGA_H_CHECK_PRINTF( support.radii2[league_rank] > 0, "ERROR: radius2 has to be positive but found to be %.16f\n", support.radii2[league_rank]); - solve_matrix_serial_svd(team, phi_vector, support_values, - vandermonde_matrix, solution_coefficients); + solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix, + solution_coefficients); team.team_barrier(); + logger.logVector(team, LogLevel::DEBUG, solution_coefficients, + "Solution Vector : "); double target_value = KokkosBlas::Experimental::dot( team, solution_coefficients, target_basis_vector); + logger.logScalar(team, LogLevel::DEBUG, target_value, "target_value : "); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", league_rank); diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp index c0bbf66a..d919ad58 100644 --- a/src/pcms/interpolator/pcms_interpolator_logger.hpp +++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp @@ -64,7 +64,7 @@ class Logger printf("[%s] (League %d) %s: ", logLevelToString(level), team.league_rank(), name); for (int i = 0; i < vector.size(); ++i) { - printf("%12.6f", vector(i)); + printf("%12.6f\n", vector(i)); } printf("\n"); }); @@ -119,6 +119,7 @@ class Logger case LogLevel::ERROR: return "ERROR"; case LogLevel::DEBUG: return "DEBUG"; + default: return "UNKNOWN"; } } }; diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp index 202f7ec8..5dd419e8 100644 --- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp +++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp @@ -77,6 +77,15 @@ void find_inverse_each(member_type team, ScratchVecView& array) }); } +KOKKOS_INLINE_FUNCTION +void calculate_shrinkage_factor(member_type team, double lambda, + ScratchVecView& sigma) +{ + int size = sigma.size(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) { + sigma(i) /= (sigma(i) * sigma(i) + lambda); + }); +} KOKKOS_INLINE_FUNCTION ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix) { From 7d6976bd60978fdbac8f89b538db87d3ffe78848 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 14 Mar 2025 14:59:07 -0400 Subject: [PATCH 25/73] svd implementation but broken --- test/CMakeLists.txt | 4 ++-- test/test_rbf_interp.cpp | 3 ++- test/test_spr_meshfields.cpp | 20 ++++++++------------ test/test_svd_serial.cpp | 2 +- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 188dc7ec..f538c4af 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -113,8 +113,8 @@ if(Catch2_FOUND) test_linear_solver.cpp test_normalisation.cpp test_spr_meshfields.cpp - test_serial_qr.cpp - test_svd_serial.cpp + test_svd_serial.cpp + # test_serial_qr.cpp ) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index 1a92780a..e76091c6 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -41,9 +41,10 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, { int dim = mesh.dim(); - Real tolerance = 0.0005; + Real tolerance = 5e-4; std::vector rbf_types = { + // RadialBasisFunction::RBF_GAUSSIAN RadialBasisFunction::RBF_GAUSSIAN, RadialBasisFunction::RBF_C4, RadialBasisFunction::RBF_CONST diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp index c8757a36..16768b5a 100644 --- a/test/test_spr_meshfields.cpp +++ b/test/test_spr_meshfields.cpp @@ -65,14 +65,14 @@ void test(Mesh& mesh, Omega_h::Graph& patches, int degree, Reals source_values, { int dim = mesh.dim(); - Real tolerance = 1e-3; + Real tolerance = 0.0005; - Omega_h::Write ignored(patches.a2ab.size(), 1); + Omega_h::Write ignored(patches.a2ab.size() - 1, 1); SupportResults support{patches.a2ab, patches.ab2b, ignored}; auto approx_target_values = pcms::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, degree, - pcms::RadialBasisFunction::RBF_CONST); + pcms::RadialBasisFunction::NO_OP); const auto delta_abs = Omega_h::fabs_each( Omega_h::subtract_each(exact_target_values, read(approx_target_values))); @@ -92,7 +92,6 @@ void test(Mesh& mesh, Omega_h::Graph& patches, int degree, Reals source_values, REQUIRE(m == n); for (size_t i = 0; i < m; ++i) { - // std::cout << "vtx " << i << "\n"; // std::cout << "vtx " << i << "\n"; CHECK_THAT( host_exact_target_values[i], @@ -111,20 +110,18 @@ TEST_CASE("meshfields_spr_test") auto rank = lib.world()->rank(); const auto boxSize = 1.0; const auto nElms = 6; - // auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms, - // nElms, 0, false); + // auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms, + // nElms, 0, false); + // + Omega_h::Mesh mesh(&lib); const auto thwaitesMeshFile = "/lore/smithc11/projects/landice/thwaites_basal/thwaites_basalClass.osh"; - Omega_h::Mesh mesh(&lib); Omega_h::binary::read(thwaitesMeshFile, lib.world(), &mesh); - // auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms, - // nElms, 0, false); std::cout << "mesh: elms " << mesh.nelems() << " verts " << mesh.nverts() << "\n"; const auto dim = mesh.dim(); - // const auto& target_coordinates = mesh.coords(); const auto& target_coordinates = mesh.coords(); const auto& nfaces = mesh.nfaces(); @@ -184,7 +181,6 @@ TEST_CASE("meshfields_spr_test") std::cerr << "minPatchSize " << minPatchSize << "\n"; auto patches = mesh.get_vtx_patches(minPatchSize); // print_patches(patches); - // print_patches(patches); Write source_values(nfaces, 0, "exact target values"); @@ -204,7 +200,7 @@ TEST_CASE("meshfields_spr_test") }); mesh.add_tag(OMEGA_H_VERT, "target_values", 1, read(exact_target_values)); - + std::cerr << "I am here above test\n"; test(mesh, patches, interp_degree, Reals(source_values), Reals(exact_target_values), Reals(source_coordinates), Reals(target_coordinates)); diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp index b86a9e4a..862ce412 100644 --- a/test/test_svd_serial.cpp +++ b/test/test_svd_serial.cpp @@ -275,7 +275,7 @@ TEST_CASE("test_serial_svd") rhs(i) = rhs_data[i]; }); - detail::solve_matrix_serial_svd(team, weight, rhs, A, x); + detail::solve_matrix_svd(team, weight, rhs, A, x); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, column), [=](int i) { result(i) = x(i); }); }); From 826e82ccc9d8cf9998ac05252e47f222ad73dd41 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 17 Mar 2025 21:19:42 -0400 Subject: [PATCH 26/73] add ridge regularization --- .../interpolator/mls_interpolation_impl.hpp | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 5a79f64a..8ae936ce 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -231,6 +231,16 @@ void scale_column_trans_matrix(const ScratchMatView& matrix, } } +KOKKOS_INLINE_FUNCTION +void add_regularization(const member_type& team, ScratchMatView& square_matrix, + Real lambda_factor) +{ + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, square_matrix.extent(0)), + [=](int i) { + square_matrix(i, i) += lambda_factor; + }); +} + /** * @struct ResultConvertNormal * @brief Stores the results of matrix and vector transformations. @@ -283,7 +293,8 @@ KOKKOS_INLINE_FUNCTION ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, const ScratchVecView& weight_vector, const ScratchVecView& rhs, - member_type team) + member_type team, + double lambda_factor) { int m = matrix.extent(0); @@ -319,6 +330,10 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, team.team_barrier(); + add_regularization(team, square_matrix, lambda_factor); + + team.team_barrier(); + KokkosBlas::Experimental:: Gemv::invoke( team, 'N', 1.0, scaled_matrix, rhs, 0.0, transformed_rhs); @@ -419,7 +434,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, RealConstDefaultScalarArrayView target_coordinates, const SupportResults& support, const LO& dim, const LO& degree, Func rbf_func, - RealDefaultScalarArrayView approx_target_values) + RealDefaultScalarArrayView approx_target_values, + double lambda_factor) { PCMS_FUNCTION_TIMER; static_assert(std::is_invocable_r_v, @@ -550,8 +566,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); - auto result = convert_normal_equation(vandermonde_matrix, phi_vector, - support_values, team); + auto result = convert_normal_equation( + vandermonde_matrix, phi_vector, support_values, team, lambda_factor); team.team_barrier(); @@ -593,7 +609,8 @@ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func) + const LO& degree, Func rbf_func, + double lambda_factor) { const auto nvertices_source = source_coordinates.size() / dim; From c5de3e52237614abf4c3fdce6c6bc77cf7349ee0 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 17 Mar 2025 21:20:34 -0400 Subject: [PATCH 27/73] update lambda parameter in function argument --- src/pcms/interpolator/mls_interpolation.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 113986a1..3bbf9767 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -105,7 +105,8 @@ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, RadialBasisFunction bf) + const LO& degree, RadialBasisFunction bf, + double lambda_factor) { const auto nvertices_target = target_coordinates.size() / dim; @@ -116,19 +117,19 @@ Write mls_interpolation(const Reals source_values, case RadialBasisFunction::RBF_GAUSSIAN: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_GAUSSIAN{}); + degree, RBF_GAUSSIAN{}, lambda_factor); break; case RadialBasisFunction::RBF_C4: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_C4{}); + degree, RBF_C4{}, lambda_factor); break; case RadialBasisFunction::RBF_CONST: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_CONST{}); + degree, RBF_CONST{}, lambda_factor); break; } From 7dc46693a531c4c94ae7fb41eae6d9cce22c4994 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 17 Mar 2025 21:21:20 -0400 Subject: [PATCH 28/73] add defaualt argument for lambda parameter --- src/pcms/interpolator/mls_interpolation.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp index 17b085aa..4a3a5503 100644 --- a/src/pcms/interpolator/mls_interpolation.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -19,6 +19,7 @@ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, RadialBasisFunction bf); + const LO& degree, RadialBasisFunction bf, + double lambda_factor = 0); } // namespace pcms #endif From 1b7388451e597576ea124067076c0fb6d0310c29 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 17 Mar 2025 22:37:04 -0400 Subject: [PATCH 29/73] update lambda --- src/pcms/interpolator/mls_interpolation_impl.hpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 8ae936ce..b8bd2553 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -236,9 +236,7 @@ void add_regularization(const member_type& team, ScratchMatView& square_matrix, Real lambda_factor) { Kokkos::parallel_for(Kokkos::TeamThreadRange(team, square_matrix.extent(0)), - [=](int i) { - square_matrix(i, i) += lambda_factor; - }); + [=](int i) { square_matrix(i, i) += lambda_factor; }); } /** @@ -636,7 +634,7 @@ Write mls_interpolation(const Reals source_values, mls_interpolation(source_values_array_view, source_coordinates_array_view, target_coordinates_array_view, support, dim, degree, - rbf_func, interpolated_values_array_view); + rbf_func, interpolated_values_array_view, lambda_factor); return interpolated_values; } From beb74d7de9f2f182c3d73e4cf5dc4021c04e16f9 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 17 Mar 2025 22:37:35 -0400 Subject: [PATCH 30/73] update expected solution --- test/test_linear_solver.cpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp index 2f8664ec..e11e8fbb 100644 --- a/test/test_linear_solver.cpp +++ b/test/test_linear_solver.cpp @@ -103,8 +103,9 @@ TEST_CASE("solver test") team.team_barrier(); + double lambda = 0.5; auto result = convert_normal_equation(vandermonde_matrix, phi, - support_values, team); + support_values, team, lambda); team.team_barrier(); @@ -159,10 +160,10 @@ TEST_CASE("solver test") Kokkos::View expected_solution( "expected solution", nvertices_target, size); - expected_lhs_matrix(0, 0, 0) = 35.0; + expected_lhs_matrix(0, 0, 0) = 35.5; expected_lhs_matrix(0, 0, 1) = 44.0; expected_lhs_matrix(0, 1, 0) = 44.0; - expected_lhs_matrix(0, 1, 1) = 56.0; + expected_lhs_matrix(0, 1, 1) = 56.5; expected_rhs_vector(0, 0) = 76.0; expected_rhs_vector(0, 1) = 100.0; @@ -174,13 +175,13 @@ TEST_CASE("solver test") expected_scaled_matrix(0, 1, 1) = 4.0; expected_scaled_matrix(0, 1, 2) = 6.0; - expected_solution(0, 0) = -6.0; - expected_solution(0, 1) = 6.5; + expected_solution(0, 0) = -1.519713; + expected_solution(0, 1) = 2.953405; - expected_lhs_matrix(1, 0, 0) = 30.0; + expected_lhs_matrix(1, 0, 0) = 30.5; expected_lhs_matrix(1, 0, 1) = 20.0; expected_lhs_matrix(1, 1, 0) = 20.0; - expected_lhs_matrix(1, 1, 1) = 30.0; + expected_lhs_matrix(1, 1, 1) = 30.5; expected_rhs_vector(1, 0) = 70.0; expected_rhs_vector(1, 1) = 40.0; @@ -192,8 +193,8 @@ TEST_CASE("solver test") expected_scaled_matrix(1, 1, 1) = -2.0; expected_scaled_matrix(1, 1, 2) = 1.0; - expected_solution(1, 0) = 2.6; - expected_solution(1, 1) = -0.4; + expected_solution(1, 0) = 2.517680; + expected_solution(1, 1) = -0.339463; for (int i = 0; i < nvertices_target; ++i) { for (int j = 0; j < size; ++j) { @@ -203,7 +204,7 @@ TEST_CASE("solver test") i, j, l); REQUIRE_THAT( expected_scaled_matrix(i, j, l), - Catch::Matchers::WithinAbs(host_result_scaled(i, j, l), 1E-10)); + Catch::Matchers::WithinAbs(host_result_scaled(i, j, l), 1E-6)); } } for (int j = 0; j < size; ++j) { @@ -213,14 +214,14 @@ TEST_CASE("solver test") k); REQUIRE_THAT( expected_lhs_matrix(i, j, k), - Catch::Matchers::WithinAbs(host_result_lhs(i, j, k), 1E-10)); + Catch::Matchers::WithinAbs(host_result_lhs(i, j, k), 1E-6)); } } for (int j = 0; j < size; ++j) { printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_rhs_vector(i, j), host_result_rhs(i, j), i, j); REQUIRE_THAT(expected_rhs_vector(i, j), - Catch::Matchers::WithinAbs(host_result_rhs(i, j), 1E-10)); + Catch::Matchers::WithinAbs(host_result_rhs(i, j), 1E-6)); } for (int j = 0; j < size; ++j) { @@ -228,7 +229,7 @@ TEST_CASE("solver test") host_result_solution(i, j), i, j); REQUIRE_THAT( expected_solution(i, j), - Catch::Matchers::WithinAbs(host_result_solution(i, j), 1E-10)); + Catch::Matchers::WithinAbs(host_result_solution(i, j), 1E-6)); } } } From 8fae10b38d632ed8997483ed99df39a723b7925b Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 16:32:52 -0400 Subject: [PATCH 31/73] remove commented lines --- .../interpolator/mls_interpolation_impl.hpp | 91 +++++++++---------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 16ee43d3..600eee1f 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -20,9 +20,6 @@ #include "pcms_interpolator_view_utils.hpp" #include "pcms_interpolator_logger.hpp" -// #include //KokkosBlas::ApplyQ -// #include //KokkosBlas::QR -// #include //KokkosBlas::Trsv #include //KokkosBlas::gemv #include "KokkosBatched_SVD_Decl.hpp" #include "KokkosBatched_SVD_Serial_Impl.hpp" @@ -431,7 +428,7 @@ void solve_matrix(const ScratchMatView& square_matrix, KOKKOS_INLINE_FUNCTION void solve_matrix_svd(member_type team, const ScratchVecView& weight, ScratchVecView rhs_values, ScratchMatView matrix, - ScratchVecView solution_vector, double lambda = 0) + ScratchVecView solution_vector, double lambda = 1e-2) { int row = matrix.extent(0); @@ -440,8 +437,8 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, int weight_size = weight.size(); - printf("inside svd solver \n"); - Logger logger(21); + // printf("inside svd solver \n"); + // Logger logger(21); OMEGA_H_CHECK_PRINTF( weight_size == row, "the size of the weight vector should be equal to the row of the matrix\n" @@ -450,17 +447,17 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, // find_sq_root_each(team, weight); eval_row_scaling(team, weight, matrix); - logger.logMatrix(team, LogLevel::DEBUG, matrix, "QA : "); + // logger.logMatrix(team, LogLevel::DEBUG, matrix, "QA : "); team.team_barrier(); eval_rhs_scaling(team, weight, rhs_values); - logger.logVector(team, LogLevel::DEBUG, rhs_values, "Qb : "); + // logger.logVector(team, LogLevel::DEBUG, rhs_values, "Qb : "); // A = UEV^T // initilize U (orthogonal matrices) array ScratchMatView U(team.team_scratch(1), row, row); fill(-5.0, team, U); - logger.logMatrix(team, LogLevel::DEBUG, U, "initialised U : "); + // logger.logMatrix(team, LogLevel::DEBUG, U, "initialised U : "); // initialize Vt ScratchMatView Vt(team.team_scratch(1), column, column); fill(-5.0, team, Vt); @@ -486,44 +483,45 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, sigma, Vt, work, 1e-6); - logger.logMatrix(team, LogLevel::DEBUG, U, "U : "); - logger.logVector(team, LogLevel::DEBUG, sigma, "S : "); - logger.logMatrix(team, LogLevel::DEBUG, Vt, "Vt : "); - // A = UEV^T - // find_inverse_each(team, sigma); + // logger.logMatrix(team, LogLevel::DEBUG, U, "U : "); + // logger.logVector(team, LogLevel::DEBUG, sigma, "S : "); + // logger.logMatrix(team, LogLevel::DEBUG, Vt, "Vt : "); + // A = UEV^T + // find_inverse_each(team, sigma); calculate_shrinkage_factor(team, lambda, sigma); - logger.logVector(team, LogLevel::DEBUG, sigma, - "S/(S^2 + gamma) (basis_size x nsupports) : "); + // logger.logVector(team, LogLevel::DEBUG, sigma, + // "S/(S^2 + gamma) (basis_size x nsupports) : "); // A = UEV^T auto Ut = find_transpose(team, U); - logger.logMatrix(team, LogLevel::DEBUG, Ut, "Ut (nsupprts x nsupports) : "); + // logger.logMatrix(team, LogLevel::DEBUG, Ut, "Ut (nsupprts x nsupports) : + // "); scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T - logger.logMatrix(team, LogLevel::DEBUG, temp_matrix, - "S^-1Ut (basis_size x nsupports) : "); + // logger.logMatrix(team, LogLevel::DEBUG, temp_matrix, + // "S^-1Ut (basis_size x nsupports) : "); KokkosBatched::SerialGemm< KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose, KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0, vsigmaInvUtMul); - logger.logMatrix(team, LogLevel::DEBUG, vsigmaInvUtMul, - "VS^-1Ut (basis_size x nsupports) : "); + // logger.logMatrix(team, LogLevel::DEBUG, vsigmaInvUtMul, + // "VS^-1Ut (basis_size x nsupports) : "); KokkosBlas::SerialGemv< KokkosBlas::Trans::NoTranspose, KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, vsigmaInvUtMul, rhs_values, 0.0, solution_vector); - logger.logVector(team, LogLevel::DEBUG, solution_vector, - "VS^-1Ut (basis_size x nsupports) : "); + // logger.logVector(team, LogLevel::DEBUG, solution_vector, + // "VS^-1Ut (basis_size x nsupports) : "); } - printf("coming out form svd solver..\n"); + // printf("coming out form svd solver..\n"); } // KOKKOS_INLINE_FUNCTION // void solve_weighted_matrix_serial_qr(const member_type& team, @@ -654,17 +652,17 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, // calculates the interpolated values Kokkos::parallel_for( - "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(shared_size)), + "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(scratch_size)), KOKKOS_LAMBDA(const member_type& team) { int league_rank = team.league_rank(); int start_ptr = support.supports_ptr[league_rank]; int end_ptr = support.supports_ptr[league_rank + 1]; int nsupports = end_ptr - start_ptr; printf("calculating for %d target point\n", league_rank); - Logger logger(21); - // logger.log(team, LogLevel::INFO, "The search starts from here....\n"); - // local_source_point stores the coordinates of source supports of a - // given target + // Logger logger(21); + // logger.log(team, LogLevel::INFO, "The search starts from here....\n"); + // local_source_point stores the coordinates of source supports of a + // given target ScratchMatView local_source_points(team.team_scratch(1), nsupports, dim); // rbf function values of source supports Phi(n,n) @@ -703,8 +701,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, } } - logger.logMatrix(team, LogLevel::DEBUG, local_source_points, - "The local source points (x, y, z):\n "); + // logger.logMatrix(team, LogLevel::DEBUG, local_source_points, + // "The local source points (x, y, z):\n "); Coord target_point; target_point.x = target_coordinates[league_rank * dim]; @@ -714,8 +712,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, target_point.z = target_coordinates[league_rank * dim + 2]; } - logger.logStruct(team, LogLevel::DEBUG, target_point, - "Target Point before pivoting :\n"); + // logger.logStruct(team, LogLevel::DEBUG, target_point, + // "Target Point before pivoting :\n"); /** phi(nsupports) is the array of rbf functions evaluated at the * source supports In the actual implementation, Phi(nsupports, * nsupports) is the diagonal matrix & each diagonal element is the phi @@ -730,8 +728,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, support.radii2[league_rank], rbf_func, phi_vector); }); - logger.logVector(team, LogLevel::DEBUG, phi_vector, - "Phi Values: (phi)\n "); + // logger.logVector(team, LogLevel::DEBUG, phi_vector, + // "Phi Values: (phi)\n "); team.team_barrier(); /** support_values(nsupports) (or known rhs vector b) is the vector of @@ -749,8 +747,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, }); team.team_barrier(); - logger.logVector(team, LogLevel::DEBUG, support_values, - "Support Values (b):\n "); + // logger.logVector(team, LogLevel::DEBUG, support_values, + // "Support Values (b):\n "); /** * @@ -762,8 +760,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, */ normalize_supports(team, target_point, local_source_points); - logger.logMatrix(team, LogLevel::DEBUG, local_source_points, - "Local Source points after pivoting"); + // logger.logMatrix(team, LogLevel::DEBUG, local_source_points, + // "Local Source points after pivoting"); team.team_barrier(); /** * @@ -773,8 +771,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, */ eval_basis_vector(slice_length, target_point, target_basis_vector); - logger.logVector(team, LogLevel::DEBUG, target_basis_vector, - "target basis vector after pivot : "); + // logger.logVector(team, LogLevel::DEBUG, target_basis_vector, + // "target basis vector after pivot : "); /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is * created with the basis vector of source supports stacking on top of * each other @@ -789,8 +787,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); - logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix, - "Vandermonde matrix : "); + // logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix, + // "Vandermonde matrix : "); OMEGA_H_CHECK_PRINTF( support.radii2[league_rank] > 0, @@ -800,13 +798,14 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix, solution_coefficients); team.team_barrier(); - logger.logVector(team, LogLevel::DEBUG, solution_coefficients, - "Solution Vector : "); + // logger.logVector(team, LogLevel::DEBUG, solution_coefficients, + // "Solution Vector : "); double target_value = KokkosBlas::Experimental::dot( team, solution_coefficients, target_basis_vector); - logger.logScalar(team, LogLevel::DEBUG, target_value, "target_value : "); + // logger.logScalar(team, LogLevel::DEBUG, target_value, "target_value : + // "); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", league_rank); From 11bd9017d997967b4144682d798878ca1c0c152d Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 16:43:08 -0400 Subject: [PATCH 32/73] remove print statements --- src/pcms/interpolator/mls_interpolation.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index ce472b34..f99515a8 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -24,7 +24,7 @@ struct RBF_GAUSSIAN // 'a' is a spreading factor/decay factor // the value of 'a' is higher if the data is localized // the value of 'a' is smaller if the data is farther - int a = 2; + int a = 5; double r = sqrt(r_sq); double rho = sqrt(rho_sq); @@ -272,12 +272,9 @@ Reals min_max_normalization(Reals& coordinates, int dim = 2) const auto max_x = Omega_h::get_max(read(x_coordinates)); const auto max_y = Omega_h::get_max(read(y_coordinates)); - printf(" [min_x, max_x] : [%12.6f , %12.6f]\n", min_x, max_x); - printf(" [min_y, max_y] : [%12.6f , %12.6f]\n", min_y, max_y); const auto del_x = max_x - min_x; const auto del_y = max_y - min_y; - printf(" [delx, dely] : [%12.6f , %12.6f]\n", del_x, del_y); Write normalized_coordinates(coords_size, 0, "stores scaled coordinates"); From 81dc8d4274340048d81d40292b0a092b4dd56eab Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 16:45:47 -0400 Subject: [PATCH 33/73] remove commented code --- src/pcms/interpolator/mls_interpolation_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 600eee1f..97153e61 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -15,8 +15,8 @@ #include #include "pcms_interpolator_aliases.hpp" #include "adj_search.hpp" -#include "../assert.h" -#include "../profile.h" +#include "pcms/assert.h" +#include "pcms/profile.h" #include "pcms_interpolator_view_utils.hpp" #include "pcms_interpolator_logger.hpp" From a6f3a075a48124ee406cf4244f89d4dd2ea468c4 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 16:47:59 -0400 Subject: [PATCH 34/73] remove note not required for svd --- src/pcms/interpolator/mls_interpolation_impl.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 97153e61..0a4be4a1 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -116,8 +116,6 @@ Reals min_max_normalization(Reals& coordinates, int dim); * @param[in] p A reference to the coordinate struct * @param[in,out] basis_vector The polynomial basis vector * - * @note the basis_vector size is more than basis_size - * to accomodate with the output from qr * */ KOKKOS_INLINE_FUNCTION From 5419b86dfbedd98776f249036b826b8904a4b5f7 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 16:48:36 -0400 Subject: [PATCH 35/73] update From 97c2f4d725add9c7f031f234cdc8c4fd089b3655 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 17:09:03 -0400 Subject: [PATCH 36/73] remove commented --- .../interpolator/mls_interpolation_impl.hpp | 122 ++---------------- test/test_spr_meshfields.cpp | 6 +- 2 files changed, 14 insertions(+), 114 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 0a4be4a1..742ba25a 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -435,143 +435,65 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, int weight_size = weight.size(); - // printf("inside svd solver \n"); - // Logger logger(21); OMEGA_H_CHECK_PRINTF( weight_size == row, "the size of the weight vector should be equal to the row of the matrix\n" "weight vector size = %d, row of matrix = %d\n", weight_size, row); - // find_sq_root_each(team, weight); eval_row_scaling(team, weight, matrix); - // logger.logMatrix(team, LogLevel::DEBUG, matrix, "QA : "); team.team_barrier(); eval_rhs_scaling(team, weight, rhs_values); - // logger.logVector(team, LogLevel::DEBUG, rhs_values, "Qb : "); - // A = UEV^T + // initilize U (orthogonal matrices) array ScratchMatView U(team.team_scratch(1), row, row); fill(-5.0, team, U); - // logger.logMatrix(team, LogLevel::DEBUG, U, "initialised U : "); + // initialize Vt ScratchMatView Vt(team.team_scratch(1), column, column); fill(-5.0, team, Vt); - // int maxrank = std::min(row, column); - + // initialize sigma ScratchVecView sigma(team.team_scratch(1), column); fill(-5.0, team, sigma); + // initialize work ScratchVecView work(team.team_scratch(1), row); fill(-5.0, team, work); - // sigma_mat_inv + // initialise sigma_mat_inv S^-1 ScratchMatView temp_matrix(team.team_scratch(1), column, row); fill(0, team, temp_matrix); + // initialise V S^-1 U^T ScratchMatView vsigmaInvUtMul(team.team_scratch(1), column, row); fill(0, team, vsigmaInvUtMul); - // for serial run - // if (team.league_rank() == 0) { if (team.team_rank() == 0) { KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, sigma, Vt, work, 1e-6); - // logger.logMatrix(team, LogLevel::DEBUG, U, "U : "); - // logger.logVector(team, LogLevel::DEBUG, sigma, "S : "); - // logger.logMatrix(team, LogLevel::DEBUG, Vt, "Vt : "); - // A = UEV^T - // find_inverse_each(team, sigma); - + // calculate sigma(i) / (sigma(i)^2 + lambda) calculate_shrinkage_factor(team, lambda, sigma); - // logger.logVector(team, LogLevel::DEBUG, sigma, - // "S/(S^2 + gamma) (basis_size x nsupports) : "); - - // A = UEV^T - auto Ut = find_transpose(team, U); - // logger.logMatrix(team, LogLevel::DEBUG, Ut, "Ut (nsupprts x nsupports) : - // "); - scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T - // logger.logMatrix(team, LogLevel::DEBUG, temp_matrix, - // "S^-1Ut (basis_size x nsupports) : "); - KokkosBatched::SerialGemm< KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose, KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0, vsigmaInvUtMul); - // logger.logMatrix(team, LogLevel::DEBUG, vsigmaInvUtMul, - // "VS^-1Ut (basis_size x nsupports) : "); KokkosBlas::SerialGemv< KokkosBlas::Trans::NoTranspose, KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, vsigmaInvUtMul, rhs_values, 0.0, solution_vector); - // logger.logVector(team, LogLevel::DEBUG, solution_vector, - // "VS^-1Ut (basis_size x nsupports) : "); } - // printf("coming out form svd solver..\n"); } -// KOKKOS_INLINE_FUNCTION -// void solve_weighted_matrix_serial_qr(const member_type& team, -// const int league_rank, -// ScratchVecView weight, -// ScratchMatView& matrix, -// ScratchVecView& rhs_values) -//{ -// -// int row = matrix.extent(0); -// -// int column = matrix.extent(1); -// -// int weight_size = weight.size(); -// -// find_sq_root_each(team, weight); -// -// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), -// [=](int i) { eval_row_scaling(team, weight, matrix); -// }); -// -// // initilize tau array -// ScratchVecView tau(team.team_scratch(1), column); -// fill(0.0, team, tau); -// -// // initialize work array -// ScratchVecView work(team.team_scratch(1), column); -// fill(0.0, team, work); -// -// // for serial run -// if (league_rank == 0) { -// -// // compute QR factorization of matrix and stores the R in A and -// // tau is used to recover Q -// KokkosBatched::SerialQR::invoke(matrix, -// tau, -// work); -// -// // b = Q^{T}b -// -// KokkosBatched::SerialApplyQ< -// KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, -// KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(matrix, tau, rhs_values, -// work); -// -// // b = R^{-1}b -// KokkosBatched::SerialTrsv< -// KokkosBatched::Uplo::Upper, KokkosBatched::Trans::NoTranspose, -// KokkosBatched::Diag::NonUnit, -// KokkosBlas::Algo::Trsv::Unblocked>::invoke(1.0, matrix, rhs_values); -// } -// } /** * @brief Maps the data from source mesh to target mesh @@ -581,7 +503,7 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, * points of source field * @param target_coordinates Scalar array view of the coordinates of control * points of target field - * @param support The object that enpasulates support info + * @param support The object that encapsulates support info * @param dim The dimension of the simulations * @param degree The degree of the interpolation order * @param rbf_func The radial basis function choice @@ -656,9 +578,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, int start_ptr = support.supports_ptr[league_rank]; int end_ptr = support.supports_ptr[league_rank + 1]; int nsupports = end_ptr - start_ptr; - printf("calculating for %d target point\n", league_rank); - // Logger logger(21); - // logger.log(team, LogLevel::INFO, "The search starts from here....\n"); + // local_source_point stores the coordinates of source supports of a // given target ScratchMatView local_source_points(team.team_scratch(1), nsupports, dim); @@ -676,8 +596,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, ScratchVecView target_basis_vector(team.team_scratch(1), basis_size); // solution coefficients (solution vector) - // ScratchVecView solution_coefficients(team.team_scratch(1), basis_size); + // Initialize the scratch matrices and vectors fill(0.0, team, local_source_points); fill(0.0, team, vandermonde_matrix); @@ -686,7 +606,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, fill(0.0, team, target_basis_vector); fill(0.0, team, solution_coefficients); - // logger.log(team, LogLevel::INFO, "The search starts from here....\n"); // storing the coords of local supports int count = -1; for (int j = start_ptr; j < end_ptr; ++j) { @@ -699,9 +618,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, } } - // logger.logMatrix(team, LogLevel::DEBUG, local_source_points, - // "The local source points (x, y, z):\n "); - + // target_point Coord target_point; target_point.x = target_coordinates[league_rank * dim]; target_point.y = target_coordinates[league_rank * dim + 1]; @@ -710,8 +627,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, target_point.z = target_coordinates[league_rank * dim + 2]; } - // logger.logStruct(team, LogLevel::DEBUG, target_point, - // "Target Point before pivoting :\n"); /** phi(nsupports) is the array of rbf functions evaluated at the * source supports In the actual implementation, Phi(nsupports, * nsupports) is the diagonal matrix & each diagonal element is the phi @@ -726,8 +641,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, support.radii2[league_rank], rbf_func, phi_vector); }); - // logger.logVector(team, LogLevel::DEBUG, phi_vector, - // "Phi Values: (phi)\n "); team.team_barrier(); /** support_values(nsupports) (or known rhs vector b) is the vector of @@ -745,8 +658,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, }); team.team_barrier(); - // logger.logVector(team, LogLevel::DEBUG, support_values, - // "Support Values (b):\n "); /** * @@ -758,8 +669,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, */ normalize_supports(team, target_point, local_source_points); - // logger.logMatrix(team, LogLevel::DEBUG, local_source_points, - // "Local Source points after pivoting"); team.team_barrier(); /** * @@ -769,8 +678,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, */ eval_basis_vector(slice_length, target_point, target_basis_vector); - // logger.logVector(team, LogLevel::DEBUG, target_basis_vector, - // "target basis vector after pivot : "); /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is * created with the basis vector of source supports stacking on top of * each other @@ -785,9 +692,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); - // logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix, - // "Vandermonde matrix : "); - OMEGA_H_CHECK_PRINTF( support.radii2[league_rank] > 0, "ERROR: radius2 has to be positive but found to be %.16f\n", @@ -796,14 +700,10 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix, solution_coefficients); team.team_barrier(); - // logger.logVector(team, LogLevel::DEBUG, solution_coefficients, - // "Solution Vector : "); double target_value = KokkosBlas::Experimental::dot( team, solution_coefficients, target_basis_vector); - // logger.logScalar(team, LogLevel::DEBUG, target_value, "target_value : - // "); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", league_rank); diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp index 16768b5a..535a9010 100644 --- a/test/test_spr_meshfields.cpp +++ b/test/test_spr_meshfields.cpp @@ -173,11 +173,11 @@ TEST_CASE("meshfields_spr_test") std::cerr << "start " << interp_degree << ", " << func_degree << " \n"; int minPatchSize; if (interp_degree == 1) - minPatchSize = 3; + minPatchSize = 4; if (interp_degree == 2) - minPatchSize = 8; // why so large? - if (interp_degree == 3) minPatchSize = 10; // why so large? + if (interp_degree == 3) + minPatchSize = 12; // why so large? std::cerr << "minPatchSize " << minPatchSize << "\n"; auto patches = mesh.get_vtx_patches(minPatchSize); // print_patches(patches); From 0a3cf0c195a45d0f369f0519ab830c3fee6e9ea0 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 21:26:55 -0400 Subject: [PATCH 37/73] generalize for any dimension --- .../interpolator/mls_interpolation_impl.hpp | 65 ++++++++----------- 1 file changed, 27 insertions(+), 38 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 742ba25a..3bcd8831 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -119,7 +119,7 @@ Reals min_max_normalization(Reals& coordinates, int dim); * */ KOKKOS_INLINE_FUNCTION -void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, +void eval_basis_vector(const IntDeviceMatView& slice_length, const double* p, ScratchVecView& basis_vector) { basis_vector(0) = 1; @@ -130,11 +130,9 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, int curr_col = 1; double point[MAX_DIM]; - point[0] = p.x; - point[1] = p.y; - if (dim == 3) { - point[2] = p.z; + for (int i = 0; i < dim; ++i) { + point[i] = p[i]; } for (int i = 0; i < degree; ++i) { @@ -166,7 +164,7 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, ** */ KOKKOS_INLINE_FUNCTION -void normalize_supports(member_type team, Coord& pivot, +void normalize_supports(member_type team, double* target_point, ScratchMatView& support_coordinates) { int nsupports = support_coordinates.extent(0); @@ -175,23 +173,13 @@ void normalize_supports(member_type team, Coord& pivot, Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), [=](int i) { Real pivot_point[MAX_DIM]; - pivot_point[0] = pivot.x; - pivot_point[1] = pivot.y; - - if (dim == 3) { - pivot_point[2] = pivot.z; - } - for (int j = 0; j < dim; ++j) { - support_coordinates(i, j) -= pivot_point[j]; + support_coordinates(i, j) -= target_point[j]; } }); - pivot.x = 0; - pivot.y = 0; - - if (dim == 3) { - pivot.z = 0; + for (int j = 0; j < dim; ++j) { + target_point[j] = 0.0; } } @@ -212,12 +200,11 @@ void create_vandermonde_matrix(const ScratchMatView& local_source_points, int j, int N = local_source_points.extent(0); int dim = local_source_points.extent(1); - Coord source_point; - source_point.x = local_source_points(j, 0); - source_point.y = local_source_points(j, 1); - if (dim == 3) { - source_point.z = local_source_points(j, 2); + double source_point[MAX_DIM] = {}; + for (int i = 0; i < dim; ++i) { + source_point[i] = local_source_points(j, i); } + ScratchVecView basis_vector_supports = Kokkos::subview(vandermonde_matrix, j, Kokkos::ALL()); eval_basis_vector(slice_length, source_point, basis_vector_supports); @@ -243,13 +230,19 @@ template , bool> = true> KOKKOS_INLINE_FUNCTION void compute_phi_vector( - const Coord& target_point, const ScratchMatView& local_source_points, int j, + const double* target_point, const ScratchMatView& local_source_points, int j, double cuttoff_dis_sq, Func rbf_func, ScratchVecView phi) { int N = local_source_points.extent(0); - double dx = target_point.x - local_source_points(j, 0); - double dy = target_point.y - local_source_points(j, 1); - double ds_sq = dx * dx + dy * dy; + int dim = local_source_points.extent(1); + + double ds_sq = 0; + + double ds_sq = 0; + for (int i = 0; i < dim; ++i) { + double temp = target_point[i] - local_source_points(j, i); + ds_sq += temp * temp; + } phi(j) = rbf_func(ds_sq, cuttoff_dis_sq); OMEGA_H_CHECK_PRINTF(!std::isnan(phi(j)), "ERROR: Phi(j) in compute_phi_vector is NaN for j = %d " @@ -611,20 +604,16 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, for (int j = start_ptr; j < end_ptr; ++j) { count++; auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - if (dim == 3) { - local_source_points(count, 2) = source_coordinates[index * dim + 2]; + + for (int i = 0; i < dim; ++i) { + local_source_points(count, i) = source_coordinates[index * dim + i]; } } - // target_point - Coord target_point; - target_point.x = target_coordinates[league_rank * dim]; - target_point.y = target_coordinates[league_rank * dim + 1]; + double target_point[MAX_DIM] = {}; - if (dim == 3) { - target_point.z = target_coordinates[league_rank * dim + 2]; + for (int i = 0; i < dim; ++i) { + target_point[i] = target_coordinates[league_rank * dim + i]; } /** phi(nsupports) is the array of rbf functions evaluated at the From 3c8d4921f9d6fc156f1c23527e87064b9a1e4ea3 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 21:34:48 -0400 Subject: [PATCH 38/73] remove min max normalization function --- src/pcms/interpolator/mls_interpolation.cpp | 48 --------------------- 1 file changed, 48 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index f99515a8..6bda3b86 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -224,14 +224,9 @@ int calculate_scratch_shared_size(const SupportResults& support, ScratchMatView::shmem_size(nsupports, dim); // local_source_points total_shared_size += ScratchMatView::shmem_size(nsupports, nsupports) * 2; // U, Ut - // total_shared_size += ScratchVecView::shmem_size(max_size); - // total_shared_size += ScratchVecView::shmem_size(min_size); shmem_each_team(i) = total_shared_size; }); - // namespace KE = Kokkos::Experimental; - // auto shared_size = KE::max_element(Kokkos::DefaultExecutionSpace(), - // shmem_each_team); printf("shared size = %d", shared_size); int shared_size = 0; Kokkos::parallel_reduce( "find_max", nvertices_target, @@ -245,48 +240,5 @@ int calculate_scratch_shared_size(const SupportResults& support, return shared_size; } -Reals min_max_normalization(Reals& coordinates, int dim = 2) -{ - int num_points = coordinates.size() / dim; - - int coords_size = coordinates.size(); - - Write x_coordinates(num_points, 0, "x coordinates"); - - Write y_coordinates(num_points, 0, "y coordinates"); - - parallel_for( - "separates x and y coordinates", num_points, KOKKOS_LAMBDA(const int id) { - int index = id * dim; - - x_coordinates[id] = coordinates[index]; - y_coordinates[id] = coordinates[index + 1]; - }); - - HostRead host_x(read(x_coordinates)); - HostRead host_y(read(y_coordinates)); - - const auto min_x = Omega_h::get_min(read(x_coordinates)); - const auto min_y = Omega_h::get_min(read(y_coordinates)); - - const auto max_x = Omega_h::get_max(read(x_coordinates)); - const auto max_y = Omega_h::get_max(read(y_coordinates)); - - const auto del_x = max_x - min_x; - const auto del_y = max_y - min_y; - - Write normalized_coordinates(coords_size, 0, - "stores scaled coordinates"); - - parallel_for( - "scale coordinates", num_points, KOKKOS_LAMBDA(const int id) { - int index = id * dim; - normalized_coordinates[index] = (x_coordinates[id] - min_x) / del_x; - normalized_coordinates[index + 1] = (y_coordinates[id] - min_y) / del_y; - }); - - return read(normalized_coordinates); -} - } // namespace detail } // namespace pcms From 1a80098b7447c264865adf913f6cf3e7022ccb44 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 23:00:21 -0400 Subject: [PATCH 39/73] add new argument --- src/pcms/interpolator/mls_interpolation.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 6bda3b86..a98ed14e 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -111,7 +111,8 @@ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, RadialBasisFunction bf) + const LO& degree, RadialBasisFunction bf, + double lambda) { const auto nvertices_target = target_coordinates.size() / dim; @@ -122,25 +123,25 @@ Write mls_interpolation(const Reals source_values, case RadialBasisFunction::RBF_GAUSSIAN: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_GAUSSIAN{}); + degree, RBF_GAUSSIAN{}, lambda); break; case RadialBasisFunction::RBF_C4: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_C4{}); + degree, RBF_C4{}, lambda); break; case RadialBasisFunction::RBF_CONST: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_CONST{}); + degree, RBF_CONST{}, lambda); break; case RadialBasisFunction::NO_OP: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, NoOp{}); + degree, NoOp{}, lambda); break; } From 4ce25f56bd5ad466efefd2f87a6c6071927ed228 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 23:00:54 -0400 Subject: [PATCH 40/73] add new argument lambda --- src/pcms/interpolator/mls_interpolation.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp index 8e8218aa..8169d453 100644 --- a/src/pcms/interpolator/mls_interpolation.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -20,6 +20,7 @@ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, RadialBasisFunction bf); + const LO& degree, RadialBasisFunction bf, + double lambda = 0); } // namespace pcms #endif From 1a2511143386848ff7bdf62084f83e8e15d6360a Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 23:01:16 -0400 Subject: [PATCH 41/73] add new argument lambda --- src/pcms/interpolator/mls_interpolation_impl.hpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 3bcd8831..90ba7555 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -238,7 +238,6 @@ KOKKOS_INLINE_FUNCTION void compute_phi_vector( double ds_sq = 0; - double ds_sq = 0; for (int i = 0; i < dim; ++i) { double temp = target_point[i] - local_source_points(j, i); ds_sq += temp * temp; @@ -419,7 +418,7 @@ void solve_matrix(const ScratchMatView& square_matrix, KOKKOS_INLINE_FUNCTION void solve_matrix_svd(member_type team, const ScratchVecView& weight, ScratchVecView rhs_values, ScratchMatView matrix, - ScratchVecView solution_vector, double lambda = 1e-2) + ScratchVecView solution_vector, double lambda) { int row = matrix.extent(0); @@ -466,7 +465,7 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, if (team.team_rank() == 0) { KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, - sigma, Vt, work, 1e-6); + sigma, Vt, work, 1e-8); // calculate sigma(i) / (sigma(i)^2 + lambda) calculate_shrinkage_factor(team, lambda, sigma); @@ -510,7 +509,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, RealConstDefaultScalarArrayView target_coordinates, const SupportResults& support, const LO& dim, const LO& degree, Func rbf_func, - RealDefaultScalarArrayView approx_target_values) + RealDefaultScalarArrayView approx_target_values, + double lambda) { PCMS_FUNCTION_TIMER; static_assert(std::is_invocable_r_v, @@ -687,7 +687,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, support.radii2[league_rank]); solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix, - solution_coefficients); + solution_coefficients, lambda); team.team_barrier(); double target_value = KokkosBlas::Experimental::dot( @@ -723,7 +723,7 @@ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func) + const LO& degree, Func rbf_func, double lambda) { const auto nsources = source_coordinates.size() / dim; @@ -748,7 +748,7 @@ Write mls_interpolation(const Reals source_values, mls_interpolation(source_values_array_view, source_coordinates_array_view, target_coordinates_array_view, support, dim, degree, - rbf_func, interpolated_values_array_view); + rbf_func, interpolated_values_array_view, lambda); return interpolated_values; } From 7b6e325421c8110f5100674a3ec2acc815d57997 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 23:03:27 -0400 Subject: [PATCH 42/73] add lambda --- test/test_mls_basis.cpp | 21 ++++++------ test/test_normalisation.cpp | 66 ++++-------------------------------- test/test_spr_meshfields.cpp | 2 +- test/test_svd_serial.cpp | 2 +- 4 files changed, 19 insertions(+), 72 deletions(-) diff --git a/test/test_mls_basis.cpp b/test/test_mls_basis.cpp index 5664508d..d50ed4f8 100644 --- a/test/test_mls_basis.cpp +++ b/test/test_mls_basis.cpp @@ -47,13 +47,13 @@ TEST_CASE("basis test") KOKKOS_LAMBDA(const member_type& team) { int i = team.league_rank(); ScratchVecView basis_vector(team.team_scratch(0), size); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), - [=](int j) { basis_vector(j) = 0; }); + pcms::detail::fill(0, team, basis_vector); + + double target_point[MAX_DIM] = {}; - Coord target_point; - target_point.x = coords[i][0]; - target_point.y = coords[i][1]; - target_point.z = coords[i][2]; + for (int j = 0; j < dim; ++j) { + target_point[j] = coords[i][j]; + } detail::eval_basis_vector(d_array, target_point, basis_vector); @@ -145,10 +145,11 @@ TEST_CASE("basis test") Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { basis_vector(j) = 0; }); - Coord target_point; - target_point.x = coords[i][0]; - target_point.y = coords[i][1]; - target_point.z = coords[i][2]; + double target_point[MAX_DIM] = {}; + + for (int j = 0; j < dim; ++j) { + target_point[j] = coords[i][j]; + } detail::eval_basis_vector(d_array, target_point, basis_vector); diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp index 31e5727a..88773567 100644 --- a/test/test_normalisation.cpp +++ b/test/test_normalisation.cpp @@ -31,55 +31,6 @@ Reals fillFromMatrix(const Matrix<2, 10>& matrix) TEST_CASE("test_normalization_routine") { - SECTION("test_minmax_normalization") - { - - printf("-------------the test for min_max starts-----------\n"); - - Real tolerance = 1E-5; - int num_points = 10; - - Omega_h::Matrix<2, 10> matrix{{0.45, 3.18}, {0.12, -1.2}, {-2.04, -1.45}, - {1.52, -0.98}, {-0.22, 1.45}, {0.96, 3.62}, - {-0.26, -1.62}, {0.82, 4.22}, {5.6, 3.62}, - {1.2, 1.2}}; - - auto coordinates = fillFromMatrix(matrix); - - auto results = pcms::detail::min_max_normalization(coordinates, 2); - - printf("results after normalization"); - HostRead host_results(results); - - double expected_results[10][2] = { - {0.325916, 0.821918}, {0.282723, 0.071918}, {0.000000, 0.029110}, - {0.465969, 0.109589}, {0.238220, 0.525685}, {0.392670, 0.897260}, - {0.232984, 0.000000}, {0.374346, 1.000000}, {1.000000, 0.897260}, - {0.424084, 0.482877}}; - - printf("\n"); - - for (int i = 0; i < 20; ++i) { - printf("%12.6f\n", host_results[i]); - } - - for (int i = 0; i < 10; ++i) { - - int index = i * 2; - - CAPTURE(i, host_results[index], expected_results[i][0], tolerance); - CHECK_THAT(host_results[index], - Catch::Matchers::WithinAbs(expected_results[i][0], tolerance)); - - CAPTURE(i, host_results[index + 1], expected_results[i][1], tolerance); - CHECK_THAT(host_results[index + 1], - Catch::Matchers::WithinAbs(expected_results[i][1], tolerance)); - - printf("-------------the test for min_max starts-----------\n"); - } - - } // end SECTION - SECTION("test_normalization_relative_distance") { Real tolerance = 1E-5; @@ -129,10 +80,11 @@ TEST_CASE("test_normalization_routine") ScratchMatView local_supports(team.team_scratch(0), num_supports, dim); detail::fill(0, team, local_supports); - Coord target_point; - target_point.x = target_coordinates[league_rank][0]; - target_point.y = target_coordinates[league_rank][1]; - target_point.z = target_coordinates[league_rank][2]; + double target_point[MAX_DIM] = {}; + + for (int i = 0; i < dim; ++i) { + target_point[i] = target_coordinates[league_rank][i]; + } Kokkos::parallel_for( Kokkos::TeamThreadRange(team, num_supports), [=](int i) { @@ -162,13 +114,7 @@ TEST_CASE("test_normalization_routine") for (int j = 0; j < dim; ++j) { int index = league_rank * num_supports * dim + i * dim + j; normalized_support_coordinates[index] = local_supports(i, j); - if (j == 0) { - normalized_target_coordinates[index] = target_point.x; - } else if (j == 1) { - normalized_target_coordinates[index] = target_point.y; - } else { - normalized_target_coordinates[index] = target_point.z; - } + normalized_target_coordinates[index] = target_point[j]; } }); }); diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp index 535a9010..5b81bd7f 100644 --- a/test/test_spr_meshfields.cpp +++ b/test/test_spr_meshfields.cpp @@ -72,7 +72,7 @@ void test(Mesh& mesh, Omega_h::Graph& patches, int degree, Reals source_values, auto approx_target_values = pcms::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, degree, - pcms::RadialBasisFunction::NO_OP); + pcms::RadialBasisFunction::NO_OP, 1e-5); const auto delta_abs = Omega_h::fabs_each( Omega_h::subtract_each(exact_target_values, read(approx_target_values))); diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp index 862ce412..8d02de0e 100644 --- a/test/test_svd_serial.cpp +++ b/test/test_svd_serial.cpp @@ -275,7 +275,7 @@ TEST_CASE("test_serial_svd") rhs(i) = rhs_data[i]; }); - detail::solve_matrix_svd(team, weight, rhs, A, x); + detail::solve_matrix_svd(team, weight, rhs, A, x, 0); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, column), [=](int i) { result(i) = x(i); }); }); From 7d46e8d22aabd64f365126f9a4290e81d6eaa79d Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 25 Mar 2025 23:21:00 -0400 Subject: [PATCH 43/73] delete file --- .../pcms_interpolator_array_ops.cpp | 20 ------------------- 1 file changed, 20 deletions(-) delete mode 100644 src/pcms/interpolator/pcms_interpolator_array_ops.cpp diff --git a/src/pcms/interpolator/pcms_interpolator_array_ops.cpp b/src/pcms/interpolator/pcms_interpolator_array_ops.cpp deleted file mode 100644 index 2233002a..00000000 --- a/src/pcms/interpolator/pcms_interpolator_array_ops.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP -#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP - -#include "pcms_interpolator_aliases.hpp" -#include -#include - -KOKKOS_INLINE_FUNCTION -void find_sq_root_each(ScratchVecView& array) -{ - int size = array.size(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) { - OMEGA_CHECK_PRINTF(array(i) < 0, - "[Error:] Square root of a negative number is invalid!\n" - "value is %12.6f\d", - array(i)); - array(i) = sqrt(array(i)); - }); -} From 43c42efb610c52fc641c152a1b3d610ca795aef7 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 14:37:20 -0400 Subject: [PATCH 44/73] change MAX_DIM to 6 --- src/pcms/interpolator/mls_interpolation_impl.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 90ba7555..d451ca5d 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -26,7 +26,7 @@ #include #include -static constexpr int MAX_DIM = 3; +static constexpr int MAX_DIM = 6; /** * calculate_basis_slice_lengths, calculate_basis_vector_size and @@ -164,7 +164,7 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const double* p, ** */ KOKKOS_INLINE_FUNCTION -void normalize_supports(member_type team, double* target_point, +void normalize_supports(const member_type& team, double* target_point, ScratchMatView& support_coordinates) { int nsupports = support_coordinates.extent(0); @@ -465,7 +465,7 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, if (team.team_rank() == 0) { KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, - sigma, Vt, work, 1e-8); + sigma, Vt, work, 1e-6); // calculate sigma(i) / (sigma(i)^2 + lambda) calculate_shrinkage_factor(team, lambda, sigma); From e8bb7fad35487d7d00036157b658dd190cbda73f Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 14:38:06 -0400 Subject: [PATCH 45/73] remove Omega_h_array_ops --- src/pcms/interpolator/pcms_interpolator_aliases.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pcms/interpolator/pcms_interpolator_aliases.hpp b/src/pcms/interpolator/pcms_interpolator_aliases.hpp index 55a5f187..fd482371 100644 --- a/src/pcms/interpolator/pcms_interpolator_aliases.hpp +++ b/src/pcms/interpolator/pcms_interpolator_aliases.hpp @@ -2,8 +2,8 @@ #define PCMS_INTERPOLATOR_ALIASES_HPP #include -#include "../arrays.h" -#include +#include "pcms/arrays.h" + namespace pcms { From ceae062bcf085e285bba4c661b593fc80c45c344 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 14:38:56 -0400 Subject: [PATCH 46/73] use kokkos sqrt function --- src/pcms/interpolator/pcms_interpolator_view_utils.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp index 5dd419e8..8edd50f4 100644 --- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp +++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp @@ -5,6 +5,7 @@ #include #include #include +#include namespace pcms { @@ -59,7 +60,7 @@ void find_sq_root_each(member_type team, ScratchVecView& array) "[Error:] Square root of a negative number is invalid!\n" "value is %12.6f\n", array(i)); - array(i) = sqrt(array(i)); + array(i) = Kokkos::sqrt(array(i)); }); } From 5337afac6b1e91a9340185ab20f01cb699b4354a Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 14:47:36 -0400 Subject: [PATCH 47/73] remove commented line --- test/CMakeLists.txt | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f538c4af..0510c846 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -104,18 +104,16 @@ if(Catch2_FOUND) add_executable(field_transfer_example field_transfer_example.cpp) target_link_libraries(field_transfer_example PUBLIC pcms::core pcms_interpolator) list(APPEND PCMS_UNIT_TEST_SOURCES - test_field_transfer.cpp - test_uniform_grid.cpp - test_omega_h_copy.cpp - test_point_search.cpp - test_mls_basis.cpp - test_rbf_interp.cpp - test_linear_solver.cpp - test_normalisation.cpp + test_field_transfer.cpp + test_uniform_grid.cpp + test_omega_h_copy.cpp + test_point_search.cpp + test_mls_basis.cpp + test_rbf_interp.cpp + test_linear_solver.cpp + test_normalisation.cpp test_spr_meshfields.cpp - test_svd_serial.cpp - # test_serial_qr.cpp - ) + test_svd_serial.cpp) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) target_link_libraries(unit_tests PUBLIC Catch2::Catch2 pcms::core pcms_interpolator) From 0b3b7d5615b8a7cd9ad6b0bb5209b3ca392f4af3 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 14:54:41 -0400 Subject: [PATCH 48/73] remove using namespace --- test/test_normalisation.cpp | 42 ++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp index 88773567..c2612e21 100644 --- a/test/test_normalisation.cpp +++ b/test/test_normalisation.cpp @@ -10,14 +10,11 @@ #include #include #include -using namespace std; -using namespace Omega_h; -using namespace pcms; -Reals fillFromMatrix(const Matrix<2, 10>& matrix) +Omega_h::Omega_h::Reals fillFromMatrix(const Omega_h::Matrix<2, 10>& matrix) { int num_points = 10; - Write array(20, 0, "array to store coordinates"); + Omega_h::Write array(20, 0, "array to store coordinates"); parallel_for( "fills the array view from matrix", num_points, KOKKOS_LAMBDA(const int i) { int index = i * 2; @@ -25,7 +22,7 @@ Reals fillFromMatrix(const Matrix<2, 10>& matrix) array[index] = matrix[i][0]; array[index + 1] = matrix[i][1]; }); - return read(array); + return Omega_h::read(array); } TEST_CASE("test_normalization_routine") @@ -33,7 +30,7 @@ TEST_CASE("test_normalization_routine") SECTION("test_normalization_relative_distance") { - Real tolerance = 1E-5; + Omega_h::Real tolerance = 1E-5; int nvertices_target = 3; Omega_h::Matrix<3, 3> target_coordinates{ @@ -59,16 +56,16 @@ TEST_CASE("test_normalization_routine") auto total_coordinates = Omega_h::get_sum(nsupports) * dim; REQUIRE(total_coordinates == 54); - Write normalized_support_coordinates( + Omega_h::Write normalized_support_coordinates( total_coordinates, 0, "coordinates after normalization"); - Write normalized_target_coordinates( + Omega_h::Write normalized_target_coordinates( total_coordinates, 0, "coordinates after normalization"); - Write all_support_coordinates(total_coordinates, 0, - "support coordinates all"); - Write all_target_coordinates(total_coordinates, 0, - "target coordinates all"); + Omega_h::Write all_support_coordinates( + total_coordinates, 0, "support coordinates all"); + Omega_h::Write all_target_coordinates( + total_coordinates, 0, "target coordinates all"); team_policy tp(nvertices_target, Kokkos::AUTO); Kokkos::parallel_for( @@ -77,7 +74,8 @@ TEST_CASE("test_normalization_routine") int league_rank = team.league_rank(); int num_supports = nsupports[league_rank]; - ScratchMatView local_supports(team.team_scratch(0), num_supports, dim); + pcms::ScratchMatView local_supports(team.team_scratch(0), num_supports, + dim); detail::fill(0, team, local_supports); double target_point[MAX_DIM] = {}; @@ -87,7 +85,7 @@ TEST_CASE("test_normalization_routine") } Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, num_supports), [=](int i) { + Kokkos::TeamThOmega_h::readRange(team, num_supports), [=](int i) { for (int j = 0; j < dim; ++j) { int index = league_rank * num_supports * dim + i * dim + j; @@ -110,7 +108,7 @@ TEST_CASE("test_normalization_routine") detail::normalize_supports(team, target_point, local_supports); Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, num_supports), [=](int i) { + Kokkos::TeamThOmega_h::readRange(team, num_supports), [=](int i) { for (int j = 0; j < dim; ++j) { int index = league_rank * num_supports * dim + i * dim + j; normalized_support_coordinates[index] = local_supports(i, j); @@ -120,16 +118,16 @@ TEST_CASE("test_normalization_routine") }); auto host_all_support_coordinates = - HostRead(read(all_support_coordinates)); + Omega_h::HostRead(Omega_h::read(all_support_coordinates)); auto host_all_target_coordinates = - HostRead(read(all_target_coordinates)); + Omega_h::HostRead(Omega_h::read(all_target_coordinates)); - auto host_normalized_support_coordinates = - HostRead(read(normalized_support_coordinates)); + auto host_normalized_support_coordinates = Omega_h::HostRead( + Omega_h::read(normalized_support_coordinates)); - auto host_normalized_target_coordinates = - HostRead(read(normalized_target_coordinates)); + auto host_normalized_target_coordinates = Omega_h::HostRead( + Omega_h::read(normalized_target_coordinates)); for (int i = 0; i < total_coordinates; ++i) { auto result_support = From c1d10f7e101381d6f0300bef690b08926d2597f6 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 14:55:26 -0400 Subject: [PATCH 49/73] remove commented line --- test/test_rbf_interp.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index e76091c6..85d9bef9 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -44,7 +44,6 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, Real tolerance = 5e-4; std::vector rbf_types = { - // RadialBasisFunction::RBF_GAUSSIAN RadialBasisFunction::RBF_GAUSSIAN, RadialBasisFunction::RBF_C4, RadialBasisFunction::RBF_CONST From f8cd5585d93f8b75eedfe5826722045979765174 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 14:56:30 -0400 Subject: [PATCH 50/73] remove test_serial_qr.cpp --- test/test_serial_qr.cpp | 147 ---------------------------------------- 1 file changed, 147 deletions(-) delete mode 100644 test/test_serial_qr.cpp diff --git a/test/test_serial_qr.cpp b/test/test_serial_qr.cpp deleted file mode 100644 index 10e8bffc..00000000 --- a/test/test_serial_qr.cpp +++ /dev/null @@ -1,147 +0,0 @@ -#include //KokkosBlas::ApplyQ -#include //KokkosBlas::QR -#include //KokkosBlas::Trsv -#include //KokkosBlas::Algo, KokkosBatched::Diag -#include //KokkosBlas::gemv -#include -#include -#include -#include -#include -#include -#include - -TEST_CASE("test_qr_serial") -{ - /* here is a test case run with Octave */ - static double const a_data[16][10] = { - {1.0000000e+00, -4.9112749e-02, -1.5629814e+00, -8.2662407e-02, - 7.6762316e-02, 1.2919981e-01, 4.0597781e-03, 2.4120621e-03, 2.4429110e+00, - 6.8330735e-03}, - {1.0000000e+00, -7.4718700e-01, 1.1447982e+00, -6.1608208e-01, - -8.5537834e-01, -7.0528966e-01, 4.6032852e-01, 5.5828842e-01, - 1.3105629e+00, 3.7955713e-01}, - {1.0000000e+00, -4.8564839e-01, -7.2143765e-01, -5.3574860e-02, - 3.5036503e-01, 3.8650921e-02, 2.6018545e-02, 2.3585436e-01, 5.2047228e-01, - 2.8702657e-03}, - {1.0000000e+00, -8.1013248e-01, -1.0234062e+00, 3.4345012e-01, - 8.2909460e-01, -3.5148898e-01, -2.7824010e-01, 6.5631463e-01, - 1.0473602e+00, 1.1795799e-01}, - {1.0000000e+00, -1.0508609e+00, -8.6973926e-01, 1.4570417e+00, - 9.1397495e-01, -1.2672464e+00, -1.5311481e+00, 1.1043086e+00, - 7.5644638e-01, 2.1229706e+00}, - {1.0000000e+00, -1.7802012e-01, 1.8697947e+00, 8.3576559e-01, - -3.3286107e-01, 1.5627100e+00, -1.4878309e-01, 3.1691163e-02, - 3.4961320e+00, 6.9850413e-01}, - {1.0000000e+00, -6.4594368e-01, -5.5999878e-01, 2.5848452e+00, - 3.6172768e-01, -1.4475102e+00, -1.6696645e+00, 4.1724324e-01, - 3.1359864e-01, 6.6814250e+00}, - {1.0000000e+00, -3.0007589e-01, -5.6481843e-01, -3.0670467e-01, - 1.6948839e-01, 1.7323245e-01, 9.2034678e-02, 9.0045542e-02, 3.1901985e-01, - 9.4067754e-02}, - {1.0000000e+00, 1.8896909e-01, 2.7899549e-01, -2.5907897e-01, 5.2721524e-02, - -7.2281865e-02, -4.8957918e-02, 3.5709317e-02, 7.7838484e-02, - 6.7121914e-02}, - {1.0000000e+00, 1.8586762e+00, -4.5760801e-01, -9.5892373e-02, - -8.5054510e-01, 4.3881118e-02, -1.7823287e-01, 3.4546770e+00, - 2.0940510e-01, 9.1953472e-03}, - {1.0000000e+00, -1.0915266e-01, 1.9050743e+00, 3.2218623e-01, - -2.0794394e-01, 6.1378871e-01, -3.5167484e-02, 1.1914304e-02, - 3.6293082e+00, 1.0380396e-01}, - {1.0000000e+00, -3.2178312e-01, -6.9392454e-01, -6.1112393e-01, - 2.2329320e-01, 4.2407389e-01, 1.9664936e-01, 1.0354437e-01, 4.8153127e-01, - 3.7347245e-01}, - {1.0000000e+00, 1.4229431e+00, 3.8000845e-01, -1.5989849e-01, 5.4073041e-01, - -6.0762776e-02, -2.2752645e-01, 2.0247671e+00, 1.4440643e-01, - 2.5567526e-02}, - {1.0000000e+00, 9.0145077e-01, -9.6991898e-01, 7.9086551e-01, - -8.7433421e-01, -7.6707547e-01, 7.1292632e-01, 8.1261349e-01, - 9.4074284e-01, 6.2546825e-01}, - {1.0000000e+00, -7.2533533e-01, 1.4790603e-01, 7.7147564e-01, - -1.0728147e-01, 1.1410590e-01, -5.5957854e-01, 5.2611134e-01, - 2.1876193e-02, 5.9517466e-01}, - {1.0000000e+00, 5.3974943e-01, -7.7853625e-01, -1.2196455e+00, - -4.2021450e-01, 9.4953820e-01, -6.5830294e-01, 2.9132945e-01, - 6.0611869e-01, 1.4875350e+00}, - }; - - static double const x_data[10] = { - 4.4283983e-01, -2.9416715e-01, 2.5654724e-01, 3.3493879e-01, - -6.3165447e-01, -5.1360652e-01, -2.4971659e-01, -2.5776427e-01, - -8.8432424e-02, -5.2756825e-01}; - - using ExecutionSpace = Kokkos::DefaultExecutionSpace; - using MemorySpace = Kokkos::DefaultExecutionSpace::memory_space; - using DeviceType = ExecutionSpace::device_type; - - // based on pumi (github.com/SCOREC/core) test/qr.cc - typedef Kokkos::View - MatrixViewType; - typedef Kokkos::View - RowVectorViewType; - typedef Kokkos::View - ColVectorViewType; - typedef Kokkos::View - ColWorkViewType; - const auto one = 1.0; - const auto zero = 0.0; - - MatrixViewType A("A"); - typename MatrixViewType::HostMirror A_host = Kokkos::create_mirror_view(A); - for (int i = 0; i < 16; i++) - for (int j = 0; j < 10; j++) - A_host(i, j) = a_data[i][j]; - Kokkos::deep_copy(A, A_host); - - ColVectorViewType kx("kx"); - typename ColVectorViewType::HostMirror kx_host = - Kokkos::create_mirror_view(kx); - for (int j = 0; j < 10; j++) - kx_host(j) = x_data[j]; - Kokkos::deep_copy(kx, kx_host); - // b = A*kx - RowVectorViewType b("b"); - KokkosBlas::gemv("N", one, A, kx, zero, b); - - // x = b - RowVectorViewType x("x"); - Kokkos::deep_copy(x, b); - - // t: tau (see SerialQR call below) - ColVectorViewType t("t"); - - // w: working space for SerialQR - ColWorkViewType w("w"); - - // roughly following - // kokkos-kernels/batched/dense/unit_test/Test_Batched_TeamVectorQR.hpp - typedef KokkosBlas::Algo::QR::Unblocked AlgoTagType; - Kokkos::parallel_for( - "solveQR", 1, KOKKOS_LAMBDA(int) { - // compute the QR factorization of A and store the results in A and t - // (tau) - see the lapack dgeqp3(...) documentation: - // www.netlib.org/lapack/explore-html-3.6.1/dd/d9a/group__double_g_ecomputational_ga1b0500f49e03d2771b797c6e88adabbb.html - KokkosBatched::SerialQR::invoke(A, t, w); - // x = Q^{T}x - KokkosBatched::SerialApplyQ< - KokkosBatched::Side::Left, KokkosBatched::Trans::Transpose, - KokkosBlas::Algo::ApplyQ::Unblocked>::invoke(A, t, x, w); - // x = R^{-1}x - KokkosBatched::SerialTrsv::invoke(one, - A, - x); - }); - - typename RowVectorViewType::HostMirror sol_host = - Kokkos::create_mirror_view(x); - Kokkos::deep_copy(sol_host, x); - - for (int i = 0; i < kx_host.size(); ++i) { - REQUIRE(sol_host.size() == 16); - CAPTURE(sol_host(i), kx_host(i), 1e-15); - CHECK_THAT(sol_host(i), Catch::Matchers::WithinAbs(kx_host[i], 1e-15)); - } -} From af76eb320fb42a2357368072e7f28db816cceea5 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 15:03:52 -0400 Subject: [PATCH 51/73] remove using namespace --- test/test_spr_meshfields.cpp | 51 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp index 5b81bd7f..946dfd1c 100644 --- a/test/test_spr_meshfields.cpp +++ b/test/test_spr_meshfields.cpp @@ -12,16 +12,13 @@ #include #include -using namespace std; -using namespace Omega_h; - namespace { void print_patches(Omega_h::Graph& patches) { - auto offsets = HostRead(patches.a2ab); - auto values = HostRead(patches.ab2b); + auto offsets = Omega_h::HostRead(patches.a2ab); + auto values = Omega_h::HostRead(patches.ab2b); std::cout << "num patches " << patches.nnodes() << "\n"; for (int patch = 0; patch < patches.nnodes(); patch++) { std::cout << "patch " << patch << " patchElms "; @@ -59,32 +56,34 @@ double func(pcms::Coord& p, int degree) return -1; } -void test(Mesh& mesh, Omega_h::Graph& patches, int degree, Reals source_values, - Reals exact_target_values, Reals source_coordinates, - Reals target_coordinates) +void test(Mesh& mesh, Omega_h::Graph& patches, int degree, + Omega_h::Reals source_values, Omega_h::Reals exact_target_values, + Omega_h::Reals source_coordinates, Omega_h::Reals target_coordinates) { int dim = mesh.dim(); - Real tolerance = 0.0005; + Omega_h::Realtolerance = 0.0005; - Omega_h::Write ignored(patches.a2ab.size() - 1, 1); + Omega_h::Write ignored(patches.a2ab.size() - 1, 1); SupportResults support{patches.a2ab, patches.ab2b, ignored}; auto approx_target_values = pcms::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, degree, pcms::RadialBasisFunction::NO_OP, 1e-5); - const auto delta_abs = Omega_h::fabs_each( - Omega_h::subtract_each(exact_target_values, read(approx_target_values))); + const auto delta_abs = Omega_h::fabs_each(Omega_h::subtract_each( + exact_target_values, Omega_h::read(approx_target_values))); const auto max_delta_abs = Omega_h::get_max(delta_abs); std::cout << "max_delta_abs " << max_delta_abs << "\n"; - auto host_approx_target_values = HostRead(approx_target_values); + auto host_approx_target_values = + Omega_h::HostRead(approx_target_values); mesh.add_tag(OMEGA_H_VERT, "approx_target_values", 1, - read(approx_target_values)); + Omega_h::read(approx_target_values)); Omega_h::vtk::write_parallel("box.vtk", &mesh); - auto host_exact_target_values = HostRead(exact_target_values); + auto host_exact_target_values = + Omega_h::HostRead(exact_target_values); int m = exact_target_values.size(); int n = approx_target_values.size(); @@ -116,7 +115,7 @@ TEST_CASE("meshfields_spr_test") Omega_h::Mesh mesh(&lib); const auto thwaitesMeshFile = "/lore/smithc11/projects/landice/thwaites_basal/thwaites_basalClass.osh"; - Omega_h::binary::read(thwaitesMeshFile, lib.world(), &mesh); + Omega_h::binary::Omega_h::read(thwaitesMeshFile, lib.world(), &mesh); std::cout << "mesh: elms " << mesh.nelems() << " verts " << mesh.nverts() << "\n"; @@ -128,7 +127,7 @@ TEST_CASE("meshfields_spr_test") const auto& ntargets = mesh.nverts(); - Write source_coordinates( + Write source_coordinates( dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; @@ -182,16 +181,17 @@ TEST_CASE("meshfields_spr_test") auto patches = mesh.get_vtx_patches(minPatchSize); // print_patches(patches); - Write source_values(nfaces, 0, "exact target values"); + Write source_values(nfaces, 0, "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), func_degree); }); - mesh.add_tag(mesh.dim(), "source_values", 1, read(source_values)); + mesh.add_tag(mesh.dim(), "source_values", 1, + Omega_h::read(source_values)); - Write exact_target_values(mesh.nverts(), 0, - "exact target values"); + Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { @@ -199,11 +199,12 @@ TEST_CASE("meshfields_spr_test") func(target_points.coordinates(i), func_degree); }); mesh.add_tag(OMEGA_H_VERT, "target_values", 1, - read(exact_target_values)); + Omega_h::read(exact_target_values)); std::cerr << "I am here above test\n"; - test(mesh, patches, interp_degree, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, patches, interp_degree, Omega_h::Reals(source_values), + Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); std::cerr << "done " << interp_degree << ", " << func_degree << " \n"; } // end SECTION } From 64b150098742c8ecd4b9c67ec5f74c338bab16bf Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 15:06:16 -0400 Subject: [PATCH 52/73] remove print statement --- test/test_spr_meshfields.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp index 946dfd1c..7c27af5c 100644 --- a/test/test_spr_meshfields.cpp +++ b/test/test_spr_meshfields.cpp @@ -179,7 +179,6 @@ TEST_CASE("meshfields_spr_test") minPatchSize = 12; // why so large? std::cerr << "minPatchSize " << minPatchSize << "\n"; auto patches = mesh.get_vtx_patches(minPatchSize); - // print_patches(patches); Write source_values(nfaces, 0, "exact target values"); From a34d804f8be0ea23baaf3b13311764ab0a407ff3 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 15:07:02 -0400 Subject: [PATCH 53/73] fix indentation --- src/pcms/interpolator/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 7d1d0178..5e3a5589 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -8,8 +8,8 @@ set(PCMS_FIELD_TRANSFER_HEADERS linear_interpolant.hpp multidimarray.hpp mls_interpolation.hpp - pcms_interpolator_view_utils.hpp - pcms_interpolator_logger.hpp + pcms_interpolator_view_utils.hpp + pcms_interpolator_logger.hpp ) set(PCMS_FIELD_TRANSFER_SOURCES From 2a7077fe0f8797c1ce1e74518eac91262b3d7ab7 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 16:13:33 -0400 Subject: [PATCH 54/73] remove using namespace --- test/test_linear_solver.cpp | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp index 2f8664ec..97b43692 100644 --- a/test/test_linear_solver.cpp +++ b/test/test_linear_solver.cpp @@ -7,10 +7,7 @@ #include #include -using namespace pcms; -using namespace pcms::detail; - -TEST_CASE("solver test") +TEST_CASE("LU solver test") { // This test computes P^TQP and P^TQ b for the normal equation P^TQP x = P^TQ // b where P(n,m) (n >= m) is a rectangular matrix, Q(m,m) is a diagonal @@ -82,15 +79,15 @@ TEST_CASE("solver test") Kokkos::deep_copy(b, host_b); - team_policy tp(nvertices_target, Kokkos::AUTO); + pcms::team_policy tp(nvertices_target, Kokkos::AUTO); Kokkos::parallel_for( "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team) { + KOKKOS_LAMBDA(const pcms::member_type& team) { int i = team.league_rank(); - ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - size); - ScratchVecView phi(team.team_scratch(0), nsupports); - ScratchVecView support_values(team.team_scratch(0), nsupports); + pcms::ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + size); + pcms::ScratchVecView phi(team.team_scratch(0), nsupports); + pcms::ScratchVecView support_values(team.team_scratch(0), nsupports); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), [=](int j) { @@ -103,8 +100,8 @@ TEST_CASE("solver test") team.team_barrier(); - auto result = convert_normal_equation(vandermonde_matrix, phi, - support_values, team); + auto result = pcms::detail::convert_normal_equation( + vandermonde_matrix, phi, support_values, team); team.team_barrier(); @@ -126,7 +123,8 @@ TEST_CASE("solver test") team.team_barrier(); - solve_matrix(result.square_matrix, result.transformed_rhs, team); + pcms::detail::solve_matrix(result.square_matrix, result.transformed_rhs, + team); team.team_barrier(); From 8bfb6a682adcaf98b2d8419d8a251b66dc5d043f Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 28 Mar 2025 16:14:22 -0400 Subject: [PATCH 55/73] remove using namespace --- test/test_mls_basis.cpp | 51 +++++++------- test/test_normalisation.cpp | 14 ++-- test/test_rbf_interp.cpp | 130 ++++++++++++++++++++--------------- test/test_spr_meshfields.cpp | 5 +- 4 files changed, 109 insertions(+), 91 deletions(-) diff --git a/test/test_mls_basis.cpp b/test/test_mls_basis.cpp index d50ed4f8..d35092b0 100644 --- a/test/test_mls_basis.cpp +++ b/test/test_mls_basis.cpp @@ -5,8 +5,6 @@ #include #include -using namespace pcms; - TEST_CASE("basis test") { SECTION("check basis slice lengths, size of the basis vector and basis " @@ -17,8 +15,8 @@ TEST_CASE("basis test") const int degree = 3; Kokkos::View array("array", degree, dim); Kokkos::deep_copy(array, 0); - detail::calculate_basis_slice_lengths(array); - auto size = detail::calculate_basis_vector_size(array); + pcms::detail::calculate_basis_slice_lengths(array); + auto size = pcms::detail::calculate_basis_vector_size(array); int expected[degree][dim] = {{1, 1, 1}, {1, 2, 3}, {1, 3, 6}}; @@ -30,7 +28,7 @@ TEST_CASE("basis test") REQUIRE(size == 20); - IntDeviceMatView d_array("array in device", degree, dim); + pcms::IntDeviceMatView d_array("array in device", degree, dim); auto array_hd = Kokkos::create_mirror_view(d_array); Kokkos::deep_copy(array_hd, array); Kokkos::deep_copy(d_array, array_hd); @@ -41,12 +39,12 @@ TEST_CASE("basis test") auto host_results = Kokkos::create_mirror_view(results); - team_policy tp(nvertices_target, Kokkos::AUTO); + pcms::team_policy tp(nvertices_target, Kokkos::AUTO); Kokkos::parallel_for( "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team) { + KOKKOS_LAMBDA(const pcms::member_type& team) { int i = team.league_rank(); - ScratchVecView basis_vector(team.team_scratch(0), size); + pcms::ScratchVecView basis_vector(team.team_scratch(0), size); pcms::detail::fill(0, team, basis_vector); double target_point[MAX_DIM] = {}; @@ -55,7 +53,7 @@ TEST_CASE("basis test") target_point[j] = coords[i][j]; } - detail::eval_basis_vector(d_array, target_point, basis_vector); + pcms::detail::eval_basis_vector(d_array, target_point, basis_vector); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { results(i, j) = basis_vector(j); }); @@ -109,9 +107,9 @@ TEST_CASE("basis test") const int degree = 1; Kokkos::View array("array", degree, dim); Kokkos::deep_copy(array, 0); - detail::calculate_basis_slice_lengths(array); + pcms::detail::calculate_basis_slice_lengths(array); - auto size = detail::calculate_basis_vector_size(array); + auto size = pcms::detail::calculate_basis_vector_size(array); int expected[degree][dim] = { {1, 1, 1}, @@ -125,7 +123,7 @@ TEST_CASE("basis test") REQUIRE(size == 4); - IntDeviceMatView d_array("array in device", degree, dim); + pcms::IntDeviceMatView d_array("array in device", degree, dim); auto array_hd = Kokkos::create_mirror_view(d_array); Kokkos::deep_copy(array_hd, array); Kokkos::deep_copy(d_array, array_hd); @@ -136,12 +134,12 @@ TEST_CASE("basis test") auto host_results = Kokkos::create_mirror_view(results); - team_policy tp(nvertices_target, Kokkos::AUTO); + pcms::team_policy tp(nvertices_target, Kokkos::AUTO); Kokkos::parallel_for( "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team) { + KOKKOS_LAMBDA(const pcms::member_type& team) { int i = team.league_rank(); - ScratchVecView basis_vector(team.team_scratch(0), size); + pcms::ScratchVecView basis_vector(team.team_scratch(0), size); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { basis_vector(j) = 0; }); @@ -151,7 +149,7 @@ TEST_CASE("basis test") target_point[j] = coords[i][j]; } - detail::eval_basis_vector(d_array, target_point, basis_vector); + pcms::detail::eval_basis_vector(d_array, target_point, basis_vector); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { results(i, j) = basis_vector(j); }); @@ -187,13 +185,13 @@ TEST_CASE("basis test") Kokkos::View host_slice_length("array", degree, dim); Kokkos::deep_copy(host_slice_length, 0); - detail::calculate_basis_slice_lengths(host_slice_length); + pcms::detail::calculate_basis_slice_lengths(host_slice_length); - IntDeviceMatView slice_length("slice array in device", degree, dim); + pcms::IntDeviceMatView slice_length("slice array in device", degree, dim); auto slice_length_hd = Kokkos::create_mirror_view(slice_length); Kokkos::deep_copy(slice_length_hd, host_slice_length); Kokkos::deep_copy(slice_length, slice_length_hd); - auto size = detail::calculate_basis_vector_size(host_slice_length); + auto size = pcms::detail::calculate_basis_vector_size(host_slice_length); int nvertices_target = 2; int nsupports = 5; Kokkos::View points("vandermonde matrix", nvertices_target, @@ -220,15 +218,16 @@ TEST_CASE("basis test") Kokkos::deep_copy(points, host_points); - team_policy tp(nvertices_target, Kokkos::AUTO); + pcms::team_policy tp(nvertices_target, Kokkos::AUTO); Kokkos::parallel_for( "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team) { + KOKKOS_LAMBDA(const pcms::member_type& team) { int i = team.league_rank(); - ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - size); + pcms::ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + size); - ScratchMatView local_source_points(team.team_scratch(0), nsupports, 1); + pcms::ScratchMatView local_source_points(team.team_scratch(0), + nsupports, 1); for (int j = 0; j < nsupports; ++j) { local_source_points(j, 0) = points(i, j); } @@ -244,8 +243,8 @@ TEST_CASE("basis test") Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - detail::create_vandermonde_matrix(local_source_points, j, - slice_length, vandermonde_matrix); + pcms::detail::create_vandermonde_matrix( + local_source_points, j, slice_length, vandermonde_matrix); }); Kokkos::parallel_for( diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp index c2612e21..4c6c38d9 100644 --- a/test/test_normalisation.cpp +++ b/test/test_normalisation.cpp @@ -11,7 +11,7 @@ #include #include -Omega_h::Omega_h::Reals fillFromMatrix(const Omega_h::Matrix<2, 10>& matrix) +Omega_h::Reals fillFromMatrix(const Omega_h::Matrix<2, 10>& matrix) { int num_points = 10; Omega_h::Write array(20, 0, "array to store coordinates"); @@ -67,16 +67,16 @@ TEST_CASE("test_normalization_routine") Omega_h::Write all_target_coordinates( total_coordinates, 0, "target coordinates all"); - team_policy tp(nvertices_target, Kokkos::AUTO); + pcms::team_policy tp(nvertices_target, Kokkos::AUTO); Kokkos::parallel_for( "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team) { + KOKKOS_LAMBDA(const pcms::member_type& team) { int league_rank = team.league_rank(); int num_supports = nsupports[league_rank]; pcms::ScratchMatView local_supports(team.team_scratch(0), num_supports, dim); - detail::fill(0, team, local_supports); + pcms::detail::fill(0, team, local_supports); double target_point[MAX_DIM] = {}; @@ -85,7 +85,7 @@ TEST_CASE("test_normalization_routine") } Kokkos::parallel_for( - Kokkos::TeamThOmega_h::readRange(team, num_supports), [=](int i) { + Kokkos::TeamThreadRange(team, num_supports), [=](int i) { for (int j = 0; j < dim; ++j) { int index = league_rank * num_supports * dim + i * dim + j; @@ -105,10 +105,10 @@ TEST_CASE("test_normalization_routine") }); team.team_barrier(); - detail::normalize_supports(team, target_point, local_supports); + pcms::detail::normalize_supports(team, target_point, local_supports); Kokkos::parallel_for( - Kokkos::TeamThOmega_h::readRange(team, num_supports), [=](int i) { + Kokkos::TeamThreadRange(team, num_supports), [=](int i) { for (int j = 0; j < dim; ++j) { int index = league_rank * num_supports * dim + i * dim + j; normalized_support_coordinates[index] = local_supports(i, j); diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index 85d9bef9..cdafea62 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -11,12 +11,8 @@ #include #include -using namespace std; -using namespace Omega_h; -using namespace pcms; - KOKKOS_INLINE_FUNCTION -double func(Coord& p, int degree) +double func(pcms::Coord& p, int degree) { [[maybe_unused]] auto x = p.x; [[maybe_unused]] auto y = p.y; @@ -35,17 +31,18 @@ double func(Coord& p, int degree) return -1; } -void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, - Reals source_values, Reals exact_target_values, - Reals source_coordinates, Reals target_coordinates) +void test(Mesh& mesh, Omega_h::Real cutoffDistance, int degree, + LO min_num_supports, Omega_h::Reals source_values, + Omega_h::Reals exact_target_values, Omega_h::Reals source_coordinates, + Omega_h::Reals target_coordinates) { int dim = mesh.dim(); - Real tolerance = 5e-4; + Omega_h::Real tolerance = 5e-4; - std::vector rbf_types = { - RadialBasisFunction::RBF_GAUSSIAN, RadialBasisFunction::RBF_C4, - RadialBasisFunction::RBF_CONST + std::vector rbf_types = { + pcms::RadialBasisFunction::RBF_GAUSSIAN, pcms::RadialBasisFunction::RBF_C4, + pcms::RadialBasisFunction::RBF_CONST }; @@ -57,9 +54,11 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, mls_interpolation(source_values, source_coordinates, target_coordinates, support, dim, degree, rbf); - auto host_approx_target_values = HostRead(approx_target_values); + auto host_approx_target_values = + Omega_h::HostRead(approx_target_values); - auto host_exact_target_values = HostRead(exact_target_values); + auto host_exact_target_values = + Omega_h::HostRead(exact_target_values); int m = exact_target_values.size(); int n = approx_target_values.size(); @@ -83,7 +82,7 @@ TEST_CASE("test_mls_interpolation") auto rank = lib.world()->rank(); auto mesh = build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false); - Real cutoffDistance = 0.3; + Omega_h::Real cutoffDistance = 0.3; cutoffDistance = cutoffDistance * cutoffDistance; const auto dim = mesh.dim(); @@ -94,7 +93,7 @@ TEST_CASE("test_mls_interpolation") const auto& ntargets = mesh.nverts(); - Write source_coordinates( + Omega_h::Write source_coordinates( dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; @@ -111,19 +110,19 @@ TEST_CASE("test_mls_interpolation") source_coordinates[index + 1] = centroid[1]; }); - Points source_points; + pcms::Points source_points; source_points.coordinates = - PointsViewType("Number of local source supports", nfaces); + pcms::PointsViewType("Number of local source supports", nfaces); Kokkos::parallel_for( "target points", nfaces, KOKKOS_LAMBDA(int j) { source_points.coordinates(j).x = source_coordinates[j * dim]; source_points.coordinates(j).y = source_coordinates[j * dim + 1]; }); - Points target_points; + pcms::Points target_points; target_points.coordinates = - PointsViewType("Number of local source supports", mesh.nverts()); + pcms::PointsViewType("Number of local source supports", mesh.nverts()); Kokkos::parallel_for( "target points", mesh.nverts(), KOKKOS_LAMBDA(int j) { target_points.coordinates(j).x = target_coordinates[j * dim]; @@ -136,23 +135,26 @@ TEST_CASE("test_mls_interpolation") int degree = 1; LO min_num_supports = 10; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree - 1); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree - 1); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); std::cout << " The test for d=0, p=1, passed " << "\n"; std::cout << "----------finishing test: d0p1-------" << "\n"; @@ -165,23 +167,26 @@ TEST_CASE("test_mls_interpolation") int degree = 1; LO min_num_supports = 10; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); std::cout << " The test for d=1, p=1, passed " << "\n"; std::cout << "------------finishing test: d1p1--------" << "\n"; @@ -194,23 +199,26 @@ TEST_CASE("test_mls_interpolation") int degree = 2; LO min_num_supports = 16; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree - 2); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree - 2); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); std::cout << " The test for d=0, p=2, passed " << "\n"; std::cout << "-------------finishing test: d0p2------------" << "\n"; @@ -223,23 +231,26 @@ TEST_CASE("test_mls_interpolation") int degree = 2; LO min_num_supports = 16; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree - 1); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree - 1); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); std::cout << " The test for d=1, p=2, passed " << "\n"; std::cout << "----------------finishing test: d1p2--------------" << "\n"; @@ -252,23 +263,26 @@ TEST_CASE("test_mls_interpolation") int degree = 2; LO min_num_supports = 16; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); std::cout << " The test for d=2, p=2, passed " << "\n"; std::cout << "-------------finishing test: d2p2--------------------" << "\n"; @@ -281,23 +295,26 @@ TEST_CASE("test_mls_interpolation") int degree = 3; LO min_num_supports = 20; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree - 1); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree - 1); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); std::cout << " The test for d=2, p=3, passed " << "\n"; std::cout << "-------------finishing test: d2p3--------------------" << "\n"; @@ -310,23 +327,26 @@ TEST_CASE("test_mls_interpolation") int degree = 3; LO min_num_supports = 20; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); std::cout << " The test for d=3, p=3, passed " << "\n"; std::cout << "-------------finishing test: d3p3--------------------" diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp index 7c27af5c..3d58e21f 100644 --- a/test/test_spr_meshfields.cpp +++ b/test/test_spr_meshfields.cpp @@ -62,7 +62,7 @@ void test(Mesh& mesh, Omega_h::Graph& patches, int degree, { int dim = mesh.dim(); - Omega_h::Realtolerance = 0.0005; + Omega_h::Real tolerance = 0.0005; Omega_h::Write ignored(patches.a2ab.size() - 1, 1); SupportResults support{patches.a2ab, patches.ab2b, ignored}; @@ -115,7 +115,7 @@ TEST_CASE("meshfields_spr_test") Omega_h::Mesh mesh(&lib); const auto thwaitesMeshFile = "/lore/smithc11/projects/landice/thwaites_basal/thwaites_basalClass.osh"; - Omega_h::binary::Omega_h::read(thwaitesMeshFile, lib.world(), &mesh); + Omega_h::binary::read(thwaitesMeshFile, lib.world(), &mesh); std::cout << "mesh: elms " << mesh.nelems() << " verts " << mesh.nverts() << "\n"; @@ -199,7 +199,6 @@ TEST_CASE("meshfields_spr_test") }); mesh.add_tag(OMEGA_H_VERT, "target_values", 1, Omega_h::read(exact_target_values)); - std::cerr << "I am here above test\n"; test(mesh, patches, interp_degree, Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), Omega_h::Reals(source_coordinates), From 0d7f5a9abf9953ef0b492a658eb8e3183e842384 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 7 Apr 2025 04:44:58 -0400 Subject: [PATCH 56/73] works for gtc-gnet coupling --- .../interpolator/mls_interpolation_impl.hpp | 17 +- .../interpolator/pcms_interpolator_logger.hpp | 26 ++- .../pcms_interpolator_view_utils.hpp | 1 + test/test_svd_serial.cpp | 171 ++++++++++++++---- 4 files changed, 177 insertions(+), 38 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index d451ca5d..8effb6ea 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include #include @@ -467,7 +466,6 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, sigma, Vt, work, 1e-6); - // calculate sigma(i) / (sigma(i)^2 + lambda) calculate_shrinkage_factor(team, lambda, sigma); auto Ut = find_transpose(team, U); @@ -599,6 +597,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, fill(0.0, team, target_basis_vector); fill(0.0, team, solution_coefficients); + Logger logger(10); // storing the coords of local supports int count = -1; for (int j = start_ptr; j < end_ptr; ++j) { @@ -610,12 +609,17 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, } } + logger.logMatrix(team, LogLevel::DEBUG, local_source_points, + "Support Coordinates"); double target_point[MAX_DIM] = {}; for (int i = 0; i < dim; ++i) { target_point[i] = target_coordinates[league_rank * dim + i]; } + logger.logArray(team, LogLevel::DEBUG, target_point, dim, + "Target points"); + /** phi(nsupports) is the array of rbf functions evaluated at the * source supports In the actual implementation, Phi(nsupports, * nsupports) is the diagonal matrix & each diagonal element is the phi @@ -648,6 +652,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); + logger.log(team, LogLevel::DEBUG, "The search starts"); + logger.logVector(team, LogLevel::DEBUG, support_values, "Support values"); + /** * * the local_source_points is of the type ScratchMatView with @@ -681,6 +688,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); + logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix, + "vandermonde matrix"); + OMEGA_H_CHECK_PRINTF( support.radii2[league_rank] > 0, "ERROR: radius2 has to be positive but found to be %.16f\n", @@ -692,7 +702,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, double target_value = KokkosBlas::Experimental::dot( team, solution_coefficients, target_basis_vector); - + logger.logScalar(team, LogLevel::DEBUG, target_value, + "interpolated value"); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", league_rank); diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp index d919ad58..e825f72a 100644 --- a/src/pcms/interpolator/pcms_interpolator_logger.hpp +++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp @@ -47,13 +47,31 @@ class Logger if (team.league_rank() == selected_league_rank_) { Kokkos::single(Kokkos::PerTeam(team), [&]() { - printf("[%s] (League %d) %s: ", logLevelToString(level), + printf("[%s] (League %d) %s: \n", logLevelToString(level), team.league_rank(), name); printf("(%12.6f, %12.6f)", p.x, p.y); printf("\n"); }); } } + + KOKKOS_INLINE_FUNCTION + void logArray(const member_type& team, const LogLevel level, + const double* array, const int size, const char* name) + { + + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); + + for (int i = 0; i < size; ++i) { + printf("%12.6f\n", array[i]); + } + printf("\n"); + }); + } + } // log scratch vector KOKKOS_INLINE_FUNCTION void logVector(const member_type& team, const LogLevel level, @@ -61,7 +79,7 @@ class Logger { if (team.league_rank() == selected_league_rank_) { Kokkos::single(Kokkos::PerTeam(team), [&]() { - printf("[%s] (League %d) %s: ", logLevelToString(level), + printf("[%s] (League %d) %s: \n", logLevelToString(level), team.league_rank(), name); for (int i = 0; i < vector.size(); ++i) { printf("%12.6f\n", vector(i)); @@ -82,7 +100,7 @@ class Logger team.league_rank(), name); for (int i = 0; i < matrix.extent(0); ++i) { for (int j = 0; j < matrix.extent(1); ++j) { - printf("%12.6f", matrix(i, j)); + printf("%20.8f", matrix(i, j)); } printf("\n"); } @@ -99,7 +117,7 @@ class Logger { if (team.league_rank() == selected_league_rank_) { Kokkos::single(Kokkos::PerTeam(team), [&]() { - printf("[%s] (League %d) %s: ", logLevelToString(level), + printf("[%s] (League %d) %s: \n", logLevelToString(level), team.league_rank(), name); printf("%12.6f", value); printf("\n"); diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp index 8edd50f4..5e1f2b88 100644 --- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp +++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp @@ -175,6 +175,7 @@ void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries, } }); } + } // namespace detail } // namespace pcms #endif diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp index 8d02de0e..e7feb253 100644 --- a/test/test_svd_serial.cpp +++ b/test/test_svd_serial.cpp @@ -22,10 +22,30 @@ TEST_CASE("test_serial_svd") constexpr int column = 3; double tolerance = 1e-4; - double A_data[row][column] = { - {4.0, 2.0, 3.0}, {3.0, 5.0, 1.0}, {2.0, 3.0, 6.0}}; - double rhs_data[row] = {1.28571, 1.42857, 3.1428}; + Kokkos::View A_data("Device A data", row, column); + auto host_A_data = Kokkos::create_mirror_view(A_data); + + host_A_data(0, 0) = 4.0; + host_A_data(0, 1) = 2.0; + host_A_data(0, 2) = 3.0; + host_A_data(1, 0) = 3.0; + host_A_data(1, 1) = 5.0; + host_A_data(1, 2) = 1.0; + host_A_data(2, 0) = 2.0; + host_A_data(2, 1) = 3.0; + host_A_data(2, 2) = 6.0; + + Kokkos::deep_copy(A_data, host_A_data); + + Kokkos::View rhs_data("Device rhs data", row, column); + auto host_rhs_data = Kokkos::create_mirror_view(rhs_data); + + host_rhs_data(0) = 1.28571; + host_rhs_data(1) = 1.42857; + host_rhs_data(2) = 3.1428; + + Kokkos::deep_copy(rhs_data, host_rhs_data); double expected_solution[column] = {-0.142857, 0.285714, 0.428571}; // Approximate values @@ -42,17 +62,13 @@ TEST_CASE("test_serial_svd") ScratchMatView Vt(team.team_scratch(1), column, column); ScratchVecView sigma(team.team_scratch(1), column); ScratchVecView work(team.team_scratch(1), row); + ScratchMatView reconstructedA(team.team_scratch(1), row, column); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { for (int j = 0; j < column; ++j) { - A(i, j) = A_data[i][j]; + A(i, j) = A_data(i, j); } }); - if (team.league_rank() == 0) { - KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, - sigma, Vt, work); - } - team.team_barrier(); ScratchMatView sigma_mat(team.team_scratch(1), row, column); ScratchMatView Usigma(team.team_scratch(1), row, column); detail::fill(0.0, team, Usigma); @@ -61,36 +77,76 @@ TEST_CASE("test_serial_svd") sigma_mat(i, j) = (i == j) ? sigma(i) : 0.0; } }); - team.team_barrier(); - ScratchMatView reconstructedA(team.team_scratch(1), row, column); - if (team.league_rank() == 0) { - KokkosBatched::SerialGemm< + if (team.team_rank() == 0) { + printf("I am above svd solver function"); + printf("A before SVD:\n"); + for (int i = 0; i < row; ++i) { + for (int j = 0; j < column; ++j) { + printf("%f ", A(i, j)); + } + printf("\n"); + } + + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, + sigma, Vt, work, 1e-6); + + printf("=== Sigma Values ===\n"); + for (int i = 0; i < column; ++i) { + printf("sigma[%d] = %f\n", i, sigma(i)); + } + + printf("\n=== First few entries of U ===\n"); + for (int i = 0; i < row; ++i) { + for (int j = 0; j < row; ++j) { + printf("%f ", U(i, j)); + } + printf("\n"); + } + + printf("\n=== First few entries of Vt ===\n"); + for (int i = 0; i < column; ++i) { + for (int j = 0; j < column; ++j) { + printf("%f ", Vt(i, j)); + } + printf("\n"); + } + + printf("=============================\n"); + + printf("I am below svd solver function"); + KokkosBatched::TeamGemm< KokkosBatched::Trans::NoTranspose, KokkosBatched::Trans::NoTranspose, KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, U, sigma_mat, 0.0, Usigma); - KokkosBatched::SerialGemm< + printf("I am below serialgemm"); + KokkosBatched::TeamGemm< KokkosBatched::Trans::NoTranspose, KokkosBatched::Trans::NoTranspose, KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Usigma, Vt, 0.0, reconstructedA); + printf("I am below another serialgemm"); } - + team.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { for (int j = 0; j < column; ++j) { result(i, j) = reconstructedA(i, j); + printf("Copied reconstructedA(%d,%d) = %f\n", i, j, + reconstructedA(i, j)); } }); }); + + Kokkos::fence(); auto host_result = Kokkos::create_mirror_view(result); Kokkos::deep_copy(host_result, result); for (int i = 0; i < row; ++i) { for (int j = 0; j < column; ++j) { CHECK_THAT(host_result(i, j), - Catch::Matchers::WithinAbs(A_data[i][j], tolerance)); + Catch::Matchers::WithinAbs(host_A_data(i, j), tolerance)); } } } // end section @@ -113,11 +169,11 @@ TEST_CASE("test_serial_svd") ScratchVecView work(team.team_scratch(1), row); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { for (int j = 0; j < column; ++j) { - A(i, j) = A_data[i][j]; + A(i, j) = A_data(i, j); } }); - if (team.league_rank() == 0) { + if (team.team_rank() == 0) { KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, sigma, Vt, work); } @@ -153,7 +209,15 @@ TEST_CASE("test_serial_svd") SECTION("test_row_scaling_function") { - double diagonal_entries[row] = {2.0, 0.5, -1.0}; + Kokkos::View diagonal_entries("Device diagonal enetries", row); + auto host_diagonal_entries = Kokkos::create_mirror_view(diagonal_entries); + + host_diagonal_entries(0) = 2.0; + host_diagonal_entries(1) = 0.5; + host_diagonal_entries(2) = -1.0; + + Kokkos::deep_copy(diagonal_entries, host_diagonal_entries); + double expected_result[row][column] = { {8.0, 4.0, 6.0}, {1.5, 2.5, 0.5}, {-2.0, -3.0, -6.0}}; @@ -167,9 +231,9 @@ TEST_CASE("test_serial_svd") ScratchVecView weight(team.team_scratch(1), row); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { for (int j = 0; j < column; ++j) { - A(i, j) = A_data[i][j]; + A(i, j) = A_data(i, j); } - weight(i) = diagonal_entries[i]; + weight(i) = diagonal_entries(i); }); team.team_barrier(); @@ -200,12 +264,57 @@ TEST_CASE("test_serial_svd") constexpr int rowB = 3; // Adjusted matrix (3x6) constexpr int colB = 6; - double A_data[rowA][colA] = { - {4.0, 2.0, 3.0, 1.0, 0.0, 5.0}, {3.0, 5.0, 1.0, 4.0, 2.0, 6.0}, - {2.0, 3.0, 6.0, 5.0, 1.0, 7.0}, {7.0, 8.0, 9.0, 3.0, 2.0, 1.0}, - {1.0, 3.0, 2.0, 6.0, 4.0, 5.0}, {5.0, 7.0, 3.0, 8.0, 9.0, 6.0}}; - - double diagonal_entries_data[rowB] = {2.0, 0.5, -1.0}; + Kokkos::View A_data("Device A data", row, column); + auto host_A_data = Kokkos::create_mirror_view(A_data); + + host_A_data(0, 0) = 4.0; + host_A_data(0, 1) = 2.0; + host_A_data(0, 2) = 3.0; + host_A_data(0, 3) = 1.0; + host_A_data(0, 4) = 0.0; + host_A_data(0, 5) = 5.0; + host_A_data(1, 0) = 3.0; + host_A_data(1, 1) = 5.0; + host_A_data(1, 2) = 1.0; + host_A_data(1, 3) = 4.0; + host_A_data(1, 4) = 2.0; + host_A_data(1, 5) = 6.0; + host_A_data(2, 0) = 2.0; + host_A_data(2, 1) = 3.0; + host_A_data(2, 2) = 6.0; + host_A_data(2, 3) = 5.0; + host_A_data(2, 4) = 1.0; + host_A_data(2, 5) = 7.0; + host_A_data(3, 0) = 7.0; + host_A_data(3, 1) = 8.0; + host_A_data(3, 2) = 9.0; + host_A_data(3, 3) = 3.0; + host_A_data(3, 4) = 2.0; + host_A_data(3, 5) = 1.0; + host_A_data(4, 0) = 1.0; + host_A_data(4, 1) = 3.0; + host_A_data(4, 2) = 2.0; + host_A_data(4, 3) = 6.0; + host_A_data(4, 4) = 4.0; + host_A_data(4, 5) = 5.0; + host_A_data(5, 0) = 5.0; + host_A_data(5, 1) = 7.0; + host_A_data(5, 2) = 3.0; + host_A_data(5, 3) = 8.0; + host_A_data(5, 4) = 9.0; + host_A_data(5, 5) = 6.0; + + Kokkos::deep_copy(A_data, host_A_data); + + Kokkos::View diagonal_entries_data("Device rhs data"); + auto host_diagonal_entries_data = + Kokkos::create_mirror_view(diagonal_entries_data); + + host_diagonal_entries_data(0) = 2.0; + host_diagonal_entries_data(1) = 0.5; + host_diagonal_entries_data(2) = -1.0; + + Kokkos::deep_copy(diagonal_entries_data, host_diagonal_entries_data); double expected_result[rowB][colB] = {{8.0, 4.0, 6.0, 2.0, 0.0, 10.0}, {1.5, 2.5, 0.5, 2.0, 1.0, 3.0}, @@ -222,14 +331,14 @@ TEST_CASE("test_serial_svd") ScratchVecView weight(team.team_scratch(1), rowB); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowA), [=](int i) { for (int j = 0; j < colA; ++j) { - A(i, j) = A_data[i][j]; + A(i, j) = A_data(i, j); } }); team.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) { - weight(i) = diagonal_entries_data[i]; + weight(i) = diagonal_entries_data(i); }); team.team_barrier(); @@ -270,9 +379,9 @@ TEST_CASE("test_serial_svd") detail::fill(1.0, team, weight); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { for (int j = 0; j < column; ++j) { - A(i, j) = A_data[i][j]; + A(i, j) = A_data(i, j); } - rhs(i) = rhs_data[i]; + rhs(i) = rhs_data(i); }); detail::solve_matrix_svd(team, weight, rhs, A, x, 0); From c7ff6c4d9952b5eed590ab7df90493e199e4de29 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 8 Apr 2025 13:25:12 -0400 Subject: [PATCH 57/73] CUDA backend works --- .../interpolator/mls_interpolation_impl.hpp | 31 ++--- .../pcms_interpolator_view_utils.hpp | 3 +- test/test_svd_serial.cpp | 110 +++++++++--------- 3 files changed, 77 insertions(+), 67 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 8effb6ea..effae5dc 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -437,6 +437,7 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, team.team_barrier(); eval_rhs_scaling(team, weight, rhs_values); + team.team_barrier(); // initilize U (orthogonal matrices) array ScratchMatView U(team.team_scratch(1), row, row); @@ -465,24 +466,26 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, if (team.team_rank() == 0) { KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, sigma, Vt, work, 1e-6); + } + team.team_barrier(); - calculate_shrinkage_factor(team, lambda, sigma); - - auto Ut = find_transpose(team, U); + calculate_shrinkage_factor(team, lambda, sigma); - scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T + auto Ut = find_transpose(team, U); - KokkosBatched::SerialGemm< - KokkosBatched::Trans::Transpose, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Vt, temp_matrix, 0.0, - vsigmaInvUtMul); + scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T - KokkosBlas::SerialGemv< - KokkosBlas::Trans::NoTranspose, - KokkosBlas::Algo::Gemv::Unblocked>::invoke(1.0, vsigmaInvUtMul, - rhs_values, 0.0, - solution_vector); - } + KokkosBatched::TeamGemm< + member_type, KokkosBatched::Trans::Transpose, + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, Vt, temp_matrix, + 0.0, vsigmaInvUtMul); + + KokkosBlas::TeamGemv< + member_type, KokkosBlas::Trans::NoTranspose, + KokkosBlas::Algo::Gemv::Unblocked>::invoke(team, 1.0, vsigmaInvUtMul, + rhs_values, 0.0, + solution_vector); } /** diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp index 5e1f2b88..f07c9a89 100644 --- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp +++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp @@ -93,7 +93,7 @@ ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix) int row = matrix.extent(0); int column = matrix.extent(1); - ScratchMatView transMatrix(team.team_scratch(0), column, row); + ScratchMatView transMatrix(team.team_scratch(1), column, row); fill(0.0, team, transMatrix); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { for (int j = 0; j < column; ++j) { @@ -167,6 +167,7 @@ void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries, size_t rowB = adjustedMatrix.extent(0); size_t colB = adjustedMatrix.extent(1); + size_t nWeights = diagonal_entries.extent(0); OMEGA_H_CHECK(colB == rowA); eval_row_scaling(team, diagonal_entries, matrixToScale); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) { diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp index e7feb253..9087e233 100644 --- a/test/test_svd_serial.cpp +++ b/test/test_svd_serial.cpp @@ -55,7 +55,7 @@ TEST_CASE("test_serial_svd") Kokkos::View result("result", row, column); team_policy tp(1, Kokkos::AUTO); Kokkos::parallel_for( - "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)), + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(2000)), KOKKOS_LAMBDA(const member_type& team) { ScratchMatView A(team.team_scratch(1), row, column); ScratchMatView U(team.team_scratch(1), row, row); @@ -70,71 +70,35 @@ TEST_CASE("test_serial_svd") }); ScratchMatView sigma_mat(team.team_scratch(1), row, column); + detail::fill(0.0, team, sigma_mat); ScratchMatView Usigma(team.team_scratch(1), row, column); detail::fill(0.0, team, Usigma); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { - for (int j = 0; j < column; ++j) { - sigma_mat(i, j) = (i == j) ? sigma(i) : 0.0; - } - }); if (team.team_rank() == 0) { - printf("I am above svd solver function"); - printf("A before SVD:\n"); - for (int i = 0; i < row; ++i) { - for (int j = 0; j < column; ++j) { - printf("%f ", A(i, j)); - } - printf("\n"); - } KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, sigma, Vt, work, 1e-6); - printf("=== Sigma Values ===\n"); for (int i = 0; i < column; ++i) { - printf("sigma[%d] = %f\n", i, sigma(i)); - } - - printf("\n=== First few entries of U ===\n"); - for (int i = 0; i < row; ++i) { - for (int j = 0; j < row; ++j) { - printf("%f ", U(i, j)); - } - printf("\n"); + sigma_mat(i, i) = sigma(i); } - printf("\n=== First few entries of Vt ===\n"); - for (int i = 0; i < column; ++i) { - for (int j = 0; j < column; ++j) { - printf("%f ", Vt(i, j)); - } - printf("\n"); - } - - printf("=============================\n"); - - printf("I am below svd solver function"); - KokkosBatched::TeamGemm< + KokkosBatched::SerialGemm< KokkosBatched::Trans::NoTranspose, KokkosBatched::Trans::NoTranspose, KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, U, sigma_mat, 0.0, Usigma); - printf("I am below serialgemm"); - KokkosBatched::TeamGemm< + KokkosBatched::SerialGemm< KokkosBatched::Trans::NoTranspose, KokkosBatched::Trans::NoTranspose, KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Usigma, Vt, 0.0, reconstructedA); - printf("I am below another serialgemm"); } team.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { for (int j = 0; j < column; ++j) { result(i, j) = reconstructedA(i, j); - printf("Copied reconstructedA(%d,%d) = %f\n", i, j, - reconstructedA(i, j)); } }); }); @@ -160,7 +124,7 @@ TEST_CASE("test_serial_svd") Kokkos::deep_copy(transpose_expected, 0.0); team_policy tp(1, Kokkos::AUTO); Kokkos::parallel_for( - "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)), + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(1000)), KOKKOS_LAMBDA(const member_type& team) { ScratchMatView A(team.team_scratch(1), row, column); ScratchMatView U(team.team_scratch(1), row, row); @@ -178,21 +142,23 @@ TEST_CASE("test_serial_svd") sigma, Vt, work); } team.team_barrier(); - // detail::fill(0.0, team, sigma_inv_expected); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { for (int j = 0; j < row; ++j) { transpose_expected(i, j) = U(j, i); } }); - auto transposedU = detail::find_transpose(team, U); + team.team_barrier(); + auto transposedU = detail::find_transpose(team, U); + team.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { for (int j = 0; j < row; ++j) { result(i, j) = transposedU(i, j); } }); }); + auto host_result = Kokkos::create_mirror_view(result); auto host_expected = Kokkos::create_mirror_view(transpose_expected); Kokkos::deep_copy(host_result, result); @@ -225,7 +191,7 @@ TEST_CASE("test_serial_svd") Kokkos::deep_copy(result, 0.0); team_policy tp(1, Kokkos::AUTO); Kokkos::parallel_for( - "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(500)), + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(1000)), KOKKOS_LAMBDA(const member_type& team) { ScratchMatView A(team.team_scratch(1), row, column); ScratchVecView weight(team.team_scratch(1), row); @@ -264,7 +230,7 @@ TEST_CASE("test_serial_svd") constexpr int rowB = 3; // Adjusted matrix (3x6) constexpr int colB = 6; - Kokkos::View A_data("Device A data", row, column); + Kokkos::View A_data("Device A data", rowA, colA); auto host_A_data = Kokkos::create_mirror_view(A_data); host_A_data(0, 0) = 4.0; @@ -306,7 +272,8 @@ TEST_CASE("test_serial_svd") Kokkos::deep_copy(A_data, host_A_data); - Kokkos::View diagonal_entries_data("Device rhs data"); + Kokkos::View diagonal_entries_data("Device diagonal entries data", + rowB); auto host_diagonal_entries_data = Kokkos::create_mirror_view(diagonal_entries_data); @@ -324,9 +291,10 @@ TEST_CASE("test_serial_svd") Kokkos::deep_copy(result, 0.0); team_policy tp(1, Kokkos::AUTO); Kokkos::parallel_for( - tp.set_scratch_size(1, Kokkos::PerTeam(500)), + tp.set_scratch_size(1, Kokkos::PerTeam(1000)), KOKKOS_LAMBDA(const member_type& team) { ScratchMatView A(team.team_scratch(1), rowA, colA); + detail::fill(0.0, team, A); ScratchMatView result_matrix(team.team_scratch(1), rowB, colB); ScratchVecView weight(team.team_scratch(1), rowB); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowA), [=](int i) { @@ -342,6 +310,17 @@ TEST_CASE("test_serial_svd") }); team.team_barrier(); + if (team.team_rank() == 0) { + for (int i = 0; i < rowA; ++i) { + for (int j = 0; j < colA; ++j) { + printf("A(%d,%d) = %f\n", i, j, A(i, j)); + } + } + + for (int i = 0; i < rowB; ++i) { + printf("weight(%d) = %f\n", i, weight(i)); + } + } detail::scale_and_adjust(team, weight, A, result_matrix); team.team_barrier(); @@ -354,8 +333,9 @@ TEST_CASE("test_serial_svd") auto host_result = Kokkos::create_mirror_view(result); Kokkos::deep_copy(host_result, result); - for (int i = 0; i < row; ++i) { - for (int j = 0; j < row; ++j) { + for (int i = 0; i < rowB; ++i) { + for (int j = 0; j < colB; ++j) { + CAPTURE(i, j, host_result(i, j), expected_result[i][j]); CHECK_THAT(host_result(i, j), Catch::Matchers::WithinAbs( expected_result[i][j], tolerance)); } @@ -368,7 +348,7 @@ TEST_CASE("test_serial_svd") Kokkos::View result("result", column); team_policy tp(1, Kokkos::AUTO); Kokkos::parallel_for( - "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(800)), + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(1000)), KOKKOS_LAMBDA(const member_type& team) { ScratchMatView A(team.team_scratch(1), row, column); ScratchVecView rhs(team.team_scratch(1), row); @@ -384,7 +364,32 @@ TEST_CASE("test_serial_svd") rhs(i) = rhs_data(i); }); - detail::solve_matrix_svd(team, weight, rhs, A, x, 0); + team.team_barrier(); + + if (team.team_rank() == 0) { + printf("\n[A matrix before solve_matrix_svd]\n"); + for (int i = 0; i < row; ++i) { + for (int j = 0; j < column; ++j) { + printf("A(%d, %d) = %f\t", i, j, A(i, j)); + } + printf("\n"); + } + + printf("[rhs vector before solve_matrix_svd]\n"); + for (int i = 0; i < row; ++i) { + printf("rhs[%d] = %f\n", i, rhs(i)); + } + } + + team.team_barrier(); + detail::solve_matrix_svd(team, weight, rhs, A, x, 1e-12); + team.team_barrier(); + if (team.team_rank() == 0) { + printf("[solution vector x after solve_matrix_svd]\n"); + for (int i = 0; i < column; ++i) { + printf("x[%d] = %f\n", i, x(i)); + } + } Kokkos::parallel_for(Kokkos::TeamThreadRange(team, column), [=](int i) { result(i) = x(i); }); }); @@ -392,6 +397,7 @@ TEST_CASE("test_serial_svd") Kokkos::deep_copy(host_result, result); for (int i = 0; i < column; ++i) { + CAPTURE(i, host_result(i), expected_solution[i]); CHECK_THAT(host_result(i), Catch::Matchers::WithinAbs(expected_solution[i], tolerance)); } From c038fceab8c496de15400b109f81830568d2f7df Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Wed, 9 Apr 2025 10:42:23 -0400 Subject: [PATCH 58/73] change lambda factor to lambda --- src/pcms/interpolator/mls_interpolation_impl.hpp | 6 ++---- test/test_linear_solver.cpp | 2 +- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index bd36d17f..95be580d 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -346,8 +346,7 @@ KOKKOS_INLINE_FUNCTION ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, const ScratchVecView& weight_vector, const ScratchVecView& rhs, - member_type team, - double lambda_factor) + member_type team, double lambda) { int m = matrix.extent(0); @@ -383,7 +382,7 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, team.team_barrier(); - add_regularization(team, square_matrix, lambda_factor); + add_regularization(team, square_matrix, lambda); team.team_barrier(); @@ -701,7 +700,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, create_vandermonde_matrix(local_source_points, j, slice_length, vandermonde_matrix); }); ->>>>>>> mls_interpolation_svd_solver team.team_barrier(); diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp index a4fe9df2..d63477dd 100644 --- a/test/test_linear_solver.cpp +++ b/test/test_linear_solver.cpp @@ -101,7 +101,7 @@ TEST_CASE("LU solver test") team.team_barrier(); auto result = pcms::detail::convert_normal_equation( - vandermonde_matrix, phi, support_values, team); + vandermonde_matrix, phi, support_values, team, 0.0); team.team_barrier(); From fe5b7d1d7e155620dedc39f487501d316d892518 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 6 May 2025 11:10:33 -0400 Subject: [PATCH 59/73] clean up --- src/pcms/interpolator/mls_interpolation_impl.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 95be580d..59486cd3 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -612,7 +612,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, fill(0.0, team, target_basis_vector); fill(0.0, team, solution_coefficients); - Logger logger(10); + Logger logger(15); // storing the coords of local supports int count = -1; for (int j = start_ptr; j < end_ptr; ++j) { @@ -707,6 +707,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, "vandermonde matrix"); OMEGA_H_CHECK_PRINTF( + support.radii2[league_rank] > 0, "ERROR: radius2 has to be positive but found to be %.16f\n", support.radii2[league_rank]); @@ -717,6 +718,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, double target_value = KokkosBlas::Experimental::dot( team, solution_coefficients, target_basis_vector); + + // printf("Target Point : %d \t\t Value: %5.6f\n", league_rank, + // target_value); logger.logScalar(team, LogLevel::DEBUG, target_value, "interpolated value"); if (team.team_rank() == 0) { From 8d6d705f256724e138e7d6d1404d3ddf1bf47e43 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 6 May 2025 11:15:10 -0400 Subject: [PATCH 60/73] comment out logger --- .../interpolator/mls_interpolation_impl.hpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 59486cd3..d60aec97 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -612,8 +612,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, fill(0.0, team, target_basis_vector); fill(0.0, team, solution_coefficients); - Logger logger(15); - // storing the coords of local supports + // Logger logger(15); + // storing the coords of local supports int count = -1; for (int j = start_ptr; j < end_ptr; ++j) { count++; @@ -624,16 +624,16 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, } } - logger.logMatrix(team, LogLevel::DEBUG, local_source_points, - "Support Coordinates"); + // logger.logMatrix(team, LogLevel::DEBUG, local_source_points, + // "Support Coordinates"); double target_point[MAX_DIM] = {}; for (int i = 0; i < dim; ++i) { target_point[i] = target_coordinates[league_rank * dim + i]; } - logger.logArray(team, LogLevel::DEBUG, target_point, dim, - "Target points"); + // logger.logArray(team, LogLevel::DEBUG, target_point, dim, + // "Target points"); /** phi(nsupports) is the array of rbf functions evaluated at the * source supports In the actual implementation, Phi(nsupports, @@ -667,8 +667,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); - logger.log(team, LogLevel::DEBUG, "The search starts"); - logger.logVector(team, LogLevel::DEBUG, support_values, "Support values"); + // logger.log(team, LogLevel::DEBUG, "The search starts"); + // logger.logVector(team, LogLevel::DEBUG, support_values, "Support + // values"); /** * @@ -703,8 +704,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); - logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix, - "vandermonde matrix"); + // logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix, + // "vandermonde matrix"); OMEGA_H_CHECK_PRINTF( @@ -721,8 +722,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, // printf("Target Point : %d \t\t Value: %5.6f\n", league_rank, // target_value); - logger.logScalar(team, LogLevel::DEBUG, target_value, - "interpolated value"); + // logger.logScalar(team, LogLevel::DEBUG, target_value, + // "interpolated value"); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", league_rank); From abae37a592144a19dd164486991d61013a9de399 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 6 May 2025 11:44:56 -0400 Subject: [PATCH 61/73] bring tol to user level --- src/pcms/interpolator/mls_interpolation.cpp | 10 ++--- src/pcms/interpolator/mls_interpolation.hpp | 2 +- .../interpolator/mls_interpolation_impl.hpp | 43 +++++++++++++++---- 3 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index a98ed14e..65af2b5e 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -112,7 +112,7 @@ Write mls_interpolation(const Reals source_values, const Reals target_coordinates, const SupportResults& support, const LO& dim, const LO& degree, RadialBasisFunction bf, - double lambda) + double lambda, double tol) { const auto nvertices_target = target_coordinates.size() / dim; @@ -123,25 +123,25 @@ Write mls_interpolation(const Reals source_values, case RadialBasisFunction::RBF_GAUSSIAN: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_GAUSSIAN{}, lambda); + degree, RBF_GAUSSIAN{}, lambda, tol); break; case RadialBasisFunction::RBF_C4: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_C4{}, lambda); + degree, RBF_C4{}, lambda, tol); break; case RadialBasisFunction::RBF_CONST: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_CONST{}, lambda); + degree, RBF_CONST{}, lambda, tol); break; case RadialBasisFunction::NO_OP: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, NoOp{}, lambda); + degree, NoOp{}, lambda, tol); break; } diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp index 8169d453..842e2015 100644 --- a/src/pcms/interpolator/mls_interpolation.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -21,6 +21,6 @@ Write mls_interpolation(const Reals source_values, const Reals target_coordinates, const SupportResults& support, const LO& dim, const LO& degree, RadialBasisFunction bf, - double lambda = 0); + double lambda = 0, double tol = 1e-6); } // namespace pcms #endif diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index d60aec97..f80d0a59 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -409,8 +409,8 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, * @return None. The solution is directly written to the `rhs` vector. */ KOKKOS_INLINE_FUNCTION -void solve_matrix(const ScratchMatView& square_matrix, - const ScratchVecView& rhs, member_type team) +void solve_matrix_lu(const ScratchMatView& square_matrix, + const ScratchVecView& rhs, member_type team) { // Perform LU decomposition @@ -426,10 +426,36 @@ void solve_matrix(const ScratchMatView& square_matrix, KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs); } +/** + * @brief Solves a regularized linear system Ax = b using SVD decomposition + * within a Kokkos parallel team. + * + * This function solves a (possibly overdetermined) linear system using Singular + * Value Decomposition (SVD), The regularization term (lambda) helps stabilize + * the solution especially in ill-conditioned cases. + * + * + * @param team The Kokkos team member executing this function. + * @param weight A scratch-space vector of weights (RBF weights). + * @param rhs_values A scratch-space vector representing the right-hand side + * vector b (m). It may be modified during the computation. + * @param matrix A scratch-space matrix representing the system matrix A (m x + * n). + * @param solution_vector The output vector x (n). It will contain the computed + * solution after the function completes. + * @param lambda The regularization coefficient. Set to 0.0 for pure SVD; use a + * positive value for Tikhonov regularization. + * @param tol The tolerance threshold used to discard small singular values in + * the decomposition + * + * + * @note This function does not return a value; the solution is written directly + * to `solution_vector`. + */ KOKKOS_INLINE_FUNCTION void solve_matrix_svd(member_type team, const ScratchVecView& weight, ScratchVecView rhs_values, ScratchMatView matrix, - ScratchVecView solution_vector, double lambda) + ScratchVecView solution_vector, double lambda, double tol) { int row = matrix.extent(0); @@ -477,7 +503,7 @@ void solve_matrix_svd(member_type team, const ScratchVecView& weight, if (team.team_rank() == 0) { KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, - sigma, Vt, work, 1e-6); + sigma, Vt, work, tol); } team.team_barrier(); @@ -523,7 +549,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, const SupportResults& support, const LO& dim, const LO& degree, Func rbf_func, RealDefaultScalarArrayView approx_target_values, - double lambda) + double lambda, double tol) { PCMS_FUNCTION_TIMER; static_assert(std::is_invocable_r_v, @@ -714,7 +740,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, support.radii2[league_rank]); solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix, - solution_coefficients, lambda); + solution_coefficients, lambda, tol); team.team_barrier(); double target_value = KokkosBlas::Experimental::dot( @@ -754,7 +780,8 @@ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func, double lambda) + const LO& degree, Func rbf_func, double lambda, + double tol) { const auto nsources = source_coordinates.size() / dim; @@ -779,7 +806,7 @@ Write mls_interpolation(const Reals source_values, mls_interpolation(source_values_array_view, source_coordinates_array_view, target_coordinates_array_view, support, dim, degree, - rbf_func, interpolated_values_array_view, lambda); + rbf_func, interpolated_values_array_view, lambda, tol); return interpolated_values; } From 16776a8f5be7fec3a00707536407c0ac60a314a8 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 6 May 2025 12:01:19 -0400 Subject: [PATCH 62/73] add comment --- src/pcms/interpolator/mls_interpolation.hpp | 62 +++++++++++++++++++++ 1 file changed, 62 insertions(+) diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp index 842e2015..0bbbc74a 100644 --- a/src/pcms/interpolator/mls_interpolation.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -7,6 +7,33 @@ using namespace Omega_h; namespace pcms { + +/** + * @brief Enumeration of supported radial basis functions (RBF) for MLS + *interpolation. + * + * This enum specifies the type of radial basis function used to weight source + *points in the Moving Least Squares (MLS) interpolation process. + * + * Members: + * - RBF_GAUSSIAN: + * A Gaussian RBF: `exp(- a^2 * r^2)`, smooth and commonly used. Good for + *localized support. `a` is a spreading/decay factor + * + * - RBF_C4: + * A compactly supported C4-continuous function. Useful for bounded support + *and efficiency. + * + * - RBF_CONST: + * Constant basis function. Effectively uses uniform weights. + * + * - NO_OP: + * No operation. Disables RBF weighting — typically used when weights + * are externally defined or not needed. + * + * @note These are intended to be passed into function `mls_interpolation` to + *control weighting behavior. + */ enum class RadialBasisFunction : LO { RBF_GAUSSIAN = 0, @@ -16,6 +43,41 @@ enum class RadialBasisFunction : LO }; +/** + * @brief Performs Moving Least Squares (MLS) interpolation at target points. + * + * This function computes interpolated values at a set of target coordinates + * using Moving Least Squares (MLS) based on the provided source values and + * coordinates. It supports different radial basis functions (RBFs), polynomial + * degrees, dimension, optional regularization and tolerance + * + * @param source_values A flat array of source data values. Length should + * be `num_sources`. + * @param source_coordinates A flat array of source point coordinates. Length + * should be `num_sources * dim`. + * @param target_coordinates A flat array of target point coordinates. Length + * should be `num_targets * dim`. + * @param support A data structure holding neighbor information for + * each target (in CSR format). + * @param dim Dimension + * @param degree Polynomial degree + * @param bf The radial basis function used for weighting + * (e.g., Gaussian, C4). + * @param lambda Optional regularization parameter (default is + * 0.0). Helps with stability in ill-conditioned systems. + * @param tol Optional solver tolerance (default is 1e-6). + * Small singular values below this are discarded. + * + * @return A Write array containing the interpolated values at each target + * point. + * + * @note + * - All input arrays are expected to reside in device memory (e.g., Kokkos + * device views). + * - Ensure consistency in dimensions: coordinate arrays must be sized as + * `num_points * dim`. + * - The result array length will match the number of target points. + */ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, From 86b1458e0a97ada45e1bb66a47b77b316930fe6e Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 6 May 2025 12:35:17 -0400 Subject: [PATCH 63/73] update the arguments --- test/test_svd_serial.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp index 9087e233..c638d048 100644 --- a/test/test_svd_serial.cpp +++ b/test/test_svd_serial.cpp @@ -382,7 +382,7 @@ TEST_CASE("test_serial_svd") } team.team_barrier(); - detail::solve_matrix_svd(team, weight, rhs, A, x, 1e-12); + detail::solve_matrix_svd(team, weight, rhs, A, x, 0, 1e-12); team.team_barrier(); if (team.team_rank() == 0) { printf("[solution vector x after solve_matrix_svd]\n"); From 2d867fb1040d52efb7b3c7dc60bbc7dfc869c920 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 6 May 2025 12:35:49 -0400 Subject: [PATCH 64/73] delete test_linear_solver.cpp --- test/test_linear_solver.cpp | 233 ------------------------------------ 1 file changed, 233 deletions(-) delete mode 100644 test/test_linear_solver.cpp diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp deleted file mode 100644 index d63477dd..00000000 --- a/test/test_linear_solver.cpp +++ /dev/null @@ -1,233 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -TEST_CASE("LU solver test") -{ - // This test computes P^TQP and P^TQ b for the normal equation P^TQP x = P^TQ - // b where P(n,m) (n >= m) is a rectangular matrix, Q(m,m) is a diagonal - // matrix with diagonal elements stored as vector and b(n,1) is a vector - SECTION("check convert normal equation ") - { - - int nvertices_target = 2; - - int nsupports = 3; - - int size = 2; - - Kokkos::View Q("phi vector", nvertices_target, nsupports); - - Kokkos::View b("rhs vector", nvertices_target, nsupports); - - Kokkos::View P("vandermonde matrix", nvertices_target, nsupports, - size); - - Kokkos::View TransPQP("moment matrix", nvertices_target, size, - size); - - Kokkos::View TransPQ("scaled matrix", nvertices_target, size, - nsupports); - - Kokkos::View TransPQb("transformed rhs", nvertices_target, size); - - Kokkos::View solution("solution unknown vector", nvertices_target, - size); - - auto host_Q = Kokkos::create_mirror_view(Q); - auto host_P = Kokkos::create_mirror_view(P); - auto host_b = Kokkos::create_mirror_view(b); - - host_Q(0, 0) = 1.0; - host_Q(0, 1) = 1.0; - host_Q(0, 2) = 1.0; - - host_Q(1, 0) = 3.0; - host_Q(1, 1) = 2.0; - host_Q(1, 2) = 1.0; - - Kokkos::deep_copy(Q, host_Q); - - host_P(0, 0, 0) = 1.0; - host_P(0, 0, 1) = 2.0; - host_P(0, 1, 0) = 3.0; - host_P(0, 1, 1) = 4.0; - host_P(0, 2, 0) = 5.0; - host_P(0, 2, 1) = 6.0; - - host_P(1, 0, 0) = 2.0; - host_P(1, 0, 1) = 3.0; - host_P(1, 1, 0) = 1.0; - host_P(1, 1, 1) = -1.0; - host_P(1, 2, 0) = 4.0; - host_P(1, 2, 1) = 1.0; - - Kokkos::deep_copy(P, host_P); - - host_b(0, 0) = 7.0; - host_b(0, 1) = 8.0; - host_b(0, 2) = 9.0; - - host_b(1, 0) = 5.0; - host_b(1, 1) = 6.0; - host_b(1, 2) = 7.0; - - Kokkos::deep_copy(b, host_b); - - pcms::team_policy tp(nvertices_target, Kokkos::AUTO); - Kokkos::parallel_for( - "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const pcms::member_type& team) { - int i = team.league_rank(); - pcms::ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - size); - pcms::ScratchVecView phi(team.team_scratch(0), nsupports); - pcms::ScratchVecView support_values(team.team_scratch(0), nsupports); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { - phi(j) = Q(i, j); - support_values(j) = b(i, j); - for (int k = 0; k < size; ++k) { - vandermonde_matrix(j, k) = P(i, j, k); - } - }); - - team.team_barrier(); - - auto result = pcms::detail::convert_normal_equation( - vandermonde_matrix, phi, support_values, team, 0.0); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { - TransPQb(i, j) = result.transformed_rhs(j); - for (int k = 0; k < size; ++k) { - TransPQP(i, j, k) = result.square_matrix(j, k); - } - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { - for (int k = 0; k < size; ++k) { - TransPQ(i, k, j) = result.scaled_matrix(k, j); - } - }); - - team.team_barrier(); - - pcms::detail::solve_matrix(result.square_matrix, result.transformed_rhs, - team); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { - solution(i, j) = result.transformed_rhs(j); - }); - - team.team_barrier(); - }); - - auto host_result_lhs = Kokkos::create_mirror_view(TransPQP); - auto host_result_rhs = Kokkos::create_mirror_view(TransPQb); - auto host_result_scaled = Kokkos::create_mirror_view(TransPQ); - auto host_result_solution = Kokkos::create_mirror_view(solution); - - Kokkos::deep_copy(host_result_lhs, TransPQP); - Kokkos::deep_copy(host_result_rhs, TransPQb); - Kokkos::deep_copy(host_result_scaled, TransPQ); - Kokkos::deep_copy(host_result_solution, solution); - - Kokkos::View expected_lhs_matrix( - "expected lhs matrix", nvertices_target, size, size); - - Kokkos::View expected_rhs_vector( - "expected rhs vector", nvertices_target, size); - - Kokkos::View expected_scaled_matrix( - "expected scaled matrix", nvertices_target, size, nsupports); - - Kokkos::View expected_solution( - "expected solution", nvertices_target, size); - - expected_lhs_matrix(0, 0, 0) = 35.5; - expected_lhs_matrix(0, 0, 1) = 44.0; - expected_lhs_matrix(0, 1, 0) = 44.0; - expected_lhs_matrix(0, 1, 1) = 56.5; - - expected_rhs_vector(0, 0) = 76.0; - expected_rhs_vector(0, 1) = 100.0; - - expected_scaled_matrix(0, 0, 0) = 1.0; - expected_scaled_matrix(0, 0, 1) = 3.0; - expected_scaled_matrix(0, 0, 2) = 5.0; - expected_scaled_matrix(0, 1, 0) = 2.0; - expected_scaled_matrix(0, 1, 1) = 4.0; - expected_scaled_matrix(0, 1, 2) = 6.0; - - expected_solution(0, 0) = -1.519713; - expected_solution(0, 1) = 2.953405; - - expected_lhs_matrix(1, 0, 0) = 30.5; - expected_lhs_matrix(1, 0, 1) = 20.0; - expected_lhs_matrix(1, 1, 0) = 20.0; - expected_lhs_matrix(1, 1, 1) = 30.5; - - expected_rhs_vector(1, 0) = 70.0; - expected_rhs_vector(1, 1) = 40.0; - - expected_scaled_matrix(1, 0, 0) = 6.0; - expected_scaled_matrix(1, 0, 1) = 2.0; - expected_scaled_matrix(1, 0, 2) = 4.0; - expected_scaled_matrix(1, 1, 0) = 9.0; - expected_scaled_matrix(1, 1, 1) = -2.0; - expected_scaled_matrix(1, 1, 2) = 1.0; - - expected_solution(1, 0) = 2.517680; - expected_solution(1, 1) = -0.339463; - - for (int i = 0; i < nvertices_target; ++i) { - for (int j = 0; j < size; ++j) { - for (int l = 0; l < nsupports; ++l) { - printf("scaled_matrix (A^T Q) (%f, %f) at (%d,%d,%d)\n", - expected_scaled_matrix(i, j, l), host_result_scaled(i, j, l), - i, j, l); - REQUIRE_THAT( - expected_scaled_matrix(i, j, l), - Catch::Matchers::WithinAbs(host_result_scaled(i, j, l), 1E-6)); - } - } - for (int j = 0; j < size; ++j) { - for (int k = 0; k < size; ++k) { - printf("A^T Q A (%f, %f) at (%d,%d,%d)\n", - expected_lhs_matrix(i, j, k), host_result_lhs(i, j, k), i, j, - k); - REQUIRE_THAT( - expected_lhs_matrix(i, j, k), - Catch::Matchers::WithinAbs(host_result_lhs(i, j, k), 1E-6)); - } - } - for (int j = 0; j < size; ++j) { - printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_rhs_vector(i, j), - host_result_rhs(i, j), i, j); - REQUIRE_THAT(expected_rhs_vector(i, j), - Catch::Matchers::WithinAbs(host_result_rhs(i, j), 1E-6)); - } - - for (int j = 0; j < size; ++j) { - printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_solution(i, j), - host_result_solution(i, j), i, j); - REQUIRE_THAT( - expected_solution(i, j), - Catch::Matchers::WithinAbs(host_result_solution(i, j), 1E-6)); - } - } - } -} From 0f6b255b2b861e513412a5ff893174836261b11f Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 6 May 2025 12:40:24 -0400 Subject: [PATCH 65/73] remove test_linear_solver --- test/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ad3a7b25..5f9662cc 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -110,7 +110,6 @@ if(Catch2_FOUND) test_point_search.cpp test_mls_basis.cpp test_rbf_interp.cpp - test_linear_solver.cpp test_normalisation.cpp test_spr_meshfields.cpp test_svd_serial.cpp) From cfd8c8bbd8fef29b0b7e9a3bd48e6e25d6c513f9 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 6 May 2025 19:05:08 -0400 Subject: [PATCH 66/73] remmove lu solver and add svd solver Previously we implemented a normal equation based solver using LU decomposition. The solver didn't work well for ill conditioned systems. The current implementation is based on requalrized SVD decomposition --- .../interpolator/mls_interpolation_impl.hpp | 151 ++---------------- 1 file changed, 10 insertions(+), 141 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index f80d0a59..4bc8ba59 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -286,149 +286,11 @@ void scale_column_trans_matrix(const ScratchMatView& matrix, } } -KOKKOS_INLINE_FUNCTION -void add_regularization(const member_type& team, ScratchMatView& square_matrix, - Real lambda_factor) -{ - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, square_matrix.extent(0)), - [=](int i) { square_matrix(i, i) += lambda_factor; }); } /** - * @struct ResultConvertNormal - * @brief Stores the results of matrix and vector transformations. - * - * This struct represents the results of the following operations: - * - `P^T Q` (scaled matrix): The matrix obtained after scaling the columns of - * the given matrix. - * - `P^T Q P` (square matrix): The matrix obtained after performing the `P^T Q - * P` operation. - * - `P^T b` (transformed RHS): The vector obtained after applying the `P^T b` - * operation. - * - * The struct is used to store: - * - The scaled matrix (`scaled_matrix`). - * - The square matrix (`square_matrix`). - * - The transformed right-hand side vector (`transformed_rhs`). - */ -struct ResultConvertNormal -{ - ScratchMatView scaled_matrix; - ScratchMatView square_matrix; - ScratchVecView transformed_rhs; -}; - -/** - * @brief Converts a normal equation into a simplified form. - * - * This function takes a matrix, a basis/weight vector, and a right-hand side - * (RHS) vector and computes the scaled matrix, square matrix, and transformed - * RHS. - * - * For example, the normal equation `P^T Q P x = P^T Q b` is converted into the - * form `Ax = b'`, where: - * - `A` (square matrix) = `P^T Q P` - * - `b'` (transformed RHS) = `P^T Q b` - * - * @param matrix The input matrix. - * @param weight_vector The weight or basis vector. - * @param rhs The right-hand side vector of the equation. - * @param team The team member executing the task (optional, if applicable). - * - * @return The result of the normal equation conversion as a struct containing: - * - The scaled matrix. - * - The square matrix. - * - The transformed RHS. - * - * @todo Implement QR decomposition to solve the linear system. - */ -KOKKOS_INLINE_FUNCTION -ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, - const ScratchVecView& weight_vector, - const ScratchVecView& rhs, - member_type team, double lambda) -{ - - int m = matrix.extent(0); - int n = matrix.extent(1); - - ScratchMatView scaled_matrix(team.team_scratch(1), n, m); - - ScratchMatView square_matrix(team.team_scratch(1), n, n); - - ScratchVecView transformed_rhs(team.team_scratch(1), n); - - // performing P^T Q - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) { - for (int k = 0; k < m; ++k) { - scaled_matrix(j, k) = 0; - } - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, m), [=](int j) { - scale_column_trans_matrix(matrix, weight_vector, team, j, scaled_matrix); - }); - - team.team_barrier(); - - // performing (P^T Q P) - KokkosBatched::TeamGemm< - member_type, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, scaled_matrix, - matrix, 0.0, square_matrix); - - team.team_barrier(); - - add_regularization(team, square_matrix, lambda); - - team.team_barrier(); - - KokkosBlas::Experimental:: - Gemv::invoke( - team, 'N', 1.0, scaled_matrix, rhs, 0.0, transformed_rhs); - - team.team_barrier(); - - return ResultConvertNormal{scaled_matrix, square_matrix, transformed_rhs}; -} - -/** - * @brief Solves the matrix equation Ax = b' using LU decomposition. - * - * This function solves the given matrix equation using LU decomposition. - * The solution vector `x'` overwrites the input `rhs` vector. - * - * @param square_matrix The input matrix A (must be square). - * @param rhs The right-hand side vector b. After execution, it is overwritten - * with the solution vector x. - * @param team The team member executing the task. - * - * @return None. The solution is directly written to the `rhs` vector. - */ -KOKKOS_INLINE_FUNCTION -void solve_matrix_lu(const ScratchMatView& square_matrix, - const ScratchVecView& rhs, member_type team) -{ - - // Perform LU decomposition - KokkosBatched::TeamLU< - member_type, KokkosBatched::Algo::LU::Unblocked>::invoke(team, - square_matrix); - - team.team_barrier(); - - // Solve the equation with forward and backward solves - KokkosBatched::TeamSolveLU< - member_type, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs); -} - -/** - * @brief Solves a regularized linear system Ax = b using SVD decomposition - * within a Kokkos parallel team. + * @brief Solves a regularized weighted linear system WAx = Wb using SVD + * decomposition within a Kokkos parallel team. * * This function solves a (possibly overdetermined) linear system using Singular * Value Decomposition (SVD), The regularization term (lambda) helps stabilize @@ -436,15 +298,22 @@ void solve_matrix_lu(const ScratchMatView& square_matrix, * * * @param team The Kokkos team member executing this function. - * @param weight A scratch-space vector of weights (RBF weights). + * + * @param weight A scratch-space vector of RBF weights (diagonal elements of + * weight matrix W). size is m. + * * @param rhs_values A scratch-space vector representing the right-hand side * vector b (m). It may be modified during the computation. + * * @param matrix A scratch-space matrix representing the system matrix A (m x * n). + * * @param solution_vector The output vector x (n). It will contain the computed * solution after the function completes. + * * @param lambda The regularization coefficient. Set to 0.0 for pure SVD; use a * positive value for Tikhonov regularization. + * * @param tol The tolerance threshold used to discard small singular values in * the decomposition * From cb74274b42d2983fb11e9447a121fb7e31262765 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 8 May 2025 12:31:12 -0400 Subject: [PATCH 67/73] make decay factor user defined parameter --- src/pcms/interpolator/mls_interpolation.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp index 0bbbc74a..94480802 100644 --- a/src/pcms/interpolator/mls_interpolation.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -3,8 +3,6 @@ #include "mls_interpolation_impl.hpp" -using namespace Omega_h; - namespace pcms { @@ -58,7 +56,8 @@ enum class RadialBasisFunction : LO * @param target_coordinates A flat array of target point coordinates. Length * should be `num_targets * dim`. * @param support A data structure holding neighbor information for - * each target (in CSR format). + * each target (in + * CSR format). * @param dim Dimension * @param degree Polynomial degree * @param bf The radial basis function used for weighting @@ -78,11 +77,12 @@ enum class RadialBasisFunction : LO * `num_points * dim`. * - The result array length will match the number of target points. */ -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, RadialBasisFunction bf, - double lambda = 0, double tol = 1e-6); +Write mls_interpolation(const Omega_h::Reals source_values, + const Omega_h::Reals source_coordinates, + const Omega_h::Reals target_coordinates, + const SupportResults& support, + const Omega_h::LO& dim, const Omega_h::LO& degree, + RadialBasisFunction bf, double lambda = 0, + double tol = 1e-6); } // namespace pcms #endif From bf1229f37ad680ea108b682eac876b3e9395ca53 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 8 May 2025 12:32:13 -0400 Subject: [PATCH 68/73] make decay factor user defined parameter --- src/pcms/interpolator/mls_interpolation.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 65af2b5e..c0e59179 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -7,6 +7,14 @@ namespace pcms // RBF_GAUSSIAN Functor struct RBF_GAUSSIAN { + // 'a' is a spreading factor/decay factor + // the value of 'a' is higher if the data is localized + // the value of 'a' is smaller if the data is farther + + double a; + + RBF_GAUSSIAN(double a_val) : a(a_val) {} + OMEGA_H_INLINE double operator()(double r_sq, double rho_sq) const { @@ -21,11 +29,6 @@ struct RBF_GAUSSIAN "but the value is %.16f\n", r_sq); - // 'a' is a spreading factor/decay factor - // the value of 'a' is higher if the data is localized - // the value of 'a' is smaller if the data is farther - int a = 5; - double r = sqrt(r_sq); double rho = sqrt(rho_sq); double ratio = r / rho; @@ -45,6 +48,7 @@ struct RBF_GAUSSIAN // RBF_C4 Functor struct RBF_C4 { + OMEGA_H_INLINE double operator()(double r_sq, double rho_sq) const { @@ -76,6 +80,7 @@ struct RBF_C4 // struct RBF_CONST { + OMEGA_H_INLINE double operator()(double r_sq, double rho_sq) const { @@ -112,7 +117,8 @@ Write mls_interpolation(const Reals source_values, const Reals target_coordinates, const SupportResults& support, const LO& dim, const LO& degree, RadialBasisFunction bf, - double lambda, double tol) + double lambda, double tol, + double decay_factor = 5.0) { const auto nvertices_target = target_coordinates.size() / dim; @@ -123,7 +129,7 @@ Write mls_interpolation(const Reals source_values, case RadialBasisFunction::RBF_GAUSSIAN: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_GAUSSIAN{}, lambda, tol); + degree, RBF_GAUSSIAN{decay_factor}, lambda, tol); break; case RadialBasisFunction::RBF_C4: From ef56be9070170f2591950c1694db8cbfcfab8a18 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 8 May 2025 12:34:30 -0400 Subject: [PATCH 69/73] make decay factor user defined parameter --- src/pcms/interpolator/mls_interpolation.cpp | 3 +-- src/pcms/interpolator/mls_interpolation.hpp | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index c0e59179..904c83ae 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -117,8 +117,7 @@ Write mls_interpolation(const Reals source_values, const Reals target_coordinates, const SupportResults& support, const LO& dim, const LO& degree, RadialBasisFunction bf, - double lambda, double tol, - double decay_factor = 5.0) + double lambda, double tol, double decay_factor) { const auto nvertices_target = target_coordinates.size() / dim; diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp index 94480802..2a490614 100644 --- a/src/pcms/interpolator/mls_interpolation.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -83,6 +83,6 @@ Write mls_interpolation(const Omega_h::Reals source_values, const SupportResults& support, const Omega_h::LO& dim, const Omega_h::LO& degree, RadialBasisFunction bf, double lambda = 0, - double tol = 1e-6); + double tol = 1e-6, double decay_factor = 5.0); } // namespace pcms #endif From 0c76714d471e99f764657093342b9d4b67284549 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 9 May 2025 09:22:04 -0400 Subject: [PATCH 70/73] compilation error --- src/pcms/interpolator/adj_search.hpp | 325 +++++++++--------- src/pcms/interpolator/mls_interpolation.cpp | 15 +- .../interpolator/mls_interpolation_impl.hpp | 66 ++-- .../interpolator/pcms_interpolator_logger.hpp | 3 +- .../pcms_interpolator_view_utils.hpp | 91 +++++ src/pcms/interpolator/queue_visited.hpp | 6 +- 6 files changed, 309 insertions(+), 197 deletions(-) diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index a5a20b78..aca256fb 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -5,15 +5,14 @@ #include "queue_visited.hpp" -using namespace Omega_h; - static constexpr int max_dim = 3; // TODO change this into span/mdspan OMEGA_H_INLINE -Real calculateDistance(const Real* p1, const Real* p2, const int dim) +Omega_h::Real calculateDistance(const Omega_h::Real* p1, + const Omega_h::Real* p2, const int dim) { - Real dx, dy, dz; + Omega_h::Real dx, dy, dz; dx = p1[0] - p2[0]; dy = p1[1] - p2[1]; if (dim != 3) { @@ -29,7 +28,7 @@ inline void checkTargetPoints( { Kokkos::fence(); printf("INFO: Checking target points...\n"); - auto check_target_points = OMEGA_H_LAMBDA(LO i) + auto check_target_points = OMEGA_H_LAMBDA(Omega_h::LO i) { if (results(i).tri_id < 0) { OMEGA_H_CHECK_PRINTF(results(i).tri_id >= 0, @@ -38,25 +37,26 @@ inline void checkTargetPoints( printf("%d, ", i); } }; - parallel_for(results.size(), check_target_points, "check_target_points"); + Omega_h::parallel_for(results.size(), check_target_points, + "check_target_points"); Kokkos::fence(); printf("\n"); } -inline void printSupportsForTarget(const LO target_id, - const Write& supports_ptr, - const Write& nSupports, - const Write& support_idx) +inline void printSupportsForTarget( + const Omega_h::LO target_id, const Omega_h::Write& supports_ptr, + const Omega_h::Write& nSupports, + const Omega_h::Write& support_idx) { - parallel_for( - nSupports.size(), OMEGA_H_LAMBDA(const LO id) { + Omega_h::parallel_for( + nSupports.size(), OMEGA_H_LAMBDA(const Omega_h::LO id) { if (id == target_id) { - LO start_ptr = supports_ptr[id]; - LO end_ptr = supports_ptr[id + 1]; + Omega_h::LO start_ptr = supports_ptr[id]; + Omega_h::LO end_ptr = supports_ptr[id + 1]; printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", id, end_ptr - start_ptr, nSupports[id]); - for (LO i = start_ptr; i < end_ptr; ++i) { - LO cell_id = support_idx[i]; + for (Omega_h::LO i = start_ptr; i < end_ptr; ++i) { + Omega_h::LO cell_id = support_idx[i]; printf(", %d", cell_id); } printf("\n"); @@ -67,29 +67,34 @@ inline void printSupportsForTarget(const LO target_id, class FindSupports { private: - Mesh& source_mesh; - Mesh& target_mesh; // TODO it's null when one mesh is used + Omega_h::Mesh& source_mesh; + Omega_h::Mesh& target_mesh; // TODO it's null when one mesh is used public: - FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) + FindSupports(Omega_h::Mesh& source_mesh_, Omega_h::Mesh& target_mesh_) : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; - FindSupports(Mesh& mesh_) : source_mesh(mesh_), target_mesh(mesh_) {}; + FindSupports(Omega_h::Mesh& mesh_) + : source_mesh(mesh_), target_mesh(mesh_) {}; - void adjBasedSearch(Write& supports_ptr, Write& nSupports, - Write& support_idx, Write& radii2, + void adjBasedSearch(Omega_h::Write& supports_ptr, + Omega_h::Write& nSupports, + Omega_h::Write& support_idx, + Omega_h::Write& radii2, bool is_build_csr_call); - void adjBasedSearchCentroidNodes(Write& supports_ptr, - Write& nSupports, Write& support_idx, - Write& radii2, bool is_build_csr_call); + void adjBasedSearchCentroidNodes(Omega_h::Write& supports_ptr, + Omega_h::Write& nSupports, + Omega_h::Write& support_idx, + Omega_h::Write& radii2, + bool is_build_csr_call); }; -inline void FindSupports::adjBasedSearch(Write& supports_ptr, - Write& nSupports, - Write& support_idx, - Write& radii2, - bool is_build_csr_call) +inline void FindSupports::adjBasedSearch( + Omega_h::Write& supports_ptr, + Omega_h::Write& nSupports, + Omega_h::Write& support_idx, + Omega_h::Write& radii2, bool is_build_csr_call) { const auto& sourcePoints_coords = source_mesh.coords(); @@ -100,14 +105,15 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, const auto nvertices_target = target_mesh.nverts(); OMEGA_H_CHECK(radii2.size() == nvertices_target); - const auto& vert2vert = source_mesh.ask_star(VERT); + const auto& vert2vert = source_mesh.ask_star(Omega_h::VERT); const auto& v2v_ptr = vert2vert.a2ab; const auto& v2v_data = vert2vert.ab2b; const auto& cells2verts = source_mesh.ask_verts_of(dim); - Kokkos::View target_points("test_points", nvertices_target); - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { + Kokkos::View target_points("test_points", + nvertices_target); + Omega_h::parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const Omega_h::LO i) { target_points(i, 0) = targetPoints_coords[i * dim]; target_points(i, 1) = targetPoints_coords[i * dim + 1]; }); @@ -117,30 +123,30 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, auto results = search_cell(target_points); checkTargetPoints(results); - parallel_for( + Omega_h::parallel_for( nvertices_target, - OMEGA_H_LAMBDA(const LO id) { + OMEGA_H_LAMBDA(const Omega_h::LO id) { Queue queue; Track visited; - Real cutoffDistance = radii2[id]; + Omega_h::Real cutoffDistance = radii2[id]; - LO source_cell_id = results(id).tri_id; + Omega_h::LO source_cell_id = results(id).tri_id; OMEGA_H_CHECK_PRINTF( source_cell_id >= 0, "ERROR: Source cell id not found for target %d (%f,%f)\n", id, target_points(id, 0), target_points(id, 1)); - const LO num_verts_in_dim = dim + 1; - LO start_ptr = source_cell_id * num_verts_in_dim; - LO end_ptr = start_ptr + num_verts_in_dim; - Real target_coords[max_dim]; - Real support_coords[max_dim]; + const Omega_h::LO num_verts_in_dim = dim + 1; + Omega_h::LO start_ptr = source_cell_id * num_verts_in_dim; + Omega_h::LO end_ptr = start_ptr + num_verts_in_dim; + Omega_h::Real target_coords[max_dim]; + Omega_h::Real support_coords[max_dim]; - for (LO k = 0; k < dim; ++k) { + for (Omega_h::LO k = 0; k < dim; ++k) { target_coords[k] = target_points(id, k); } - LO start_counter; + Omega_h::LO start_counter; if (!is_build_csr_call) { start_counter = supports_ptr[id]; } @@ -156,15 +162,16 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, // * Method int count = 0; - for (LO i = start_ptr; i < end_ptr; ++i) { - LO vert_id = cells2verts[i]; + for (Omega_h::LO i = start_ptr; i < end_ptr; ++i) { + Omega_h::LO vert_id = cells2verts[i]; visited.push_back(vert_id); - for (LO k = 0; k < dim; ++k) { + for (Omega_h::LO k = 0; k < dim; ++k) { support_coords[k] = sourcePoints_coords[vert_id * dim + k]; } - Real dist = calculateDistance(target_coords, support_coords, dim); + Omega_h::Real dist = + calculateDistance(target_coords, support_coords, dim); if (dist <= cutoffDistance) { count++; if (count >= 500) { @@ -178,19 +185,19 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, } queue.push_back(vert_id); if (!is_build_csr_call) { - LO idx_count = count - 1; + Omega_h::LO idx_count = count - 1; support_idx[start_counter + idx_count] = vert_id; } } } while (!queue.isEmpty()) { - LO currentVertex = queue.front(); + Omega_h::LO currentVertex = queue.front(); queue.pop_front(); - LO start = v2v_ptr[currentVertex]; - LO end = v2v_ptr[currentVertex + 1]; + Omega_h::LO start = v2v_ptr[currentVertex]; + Omega_h::LO end = v2v_ptr[currentVertex + 1]; - for (LO i = start; i < end; ++i) { + for (Omega_h::LO i = start; i < end; ++i) { auto neighborIndex = v2v_data[i]; // check if neighbor index is already in the queue to be checked @@ -202,7 +209,8 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, support_coords[k] = sourcePoints_coords[neighborIndex * dim + k]; } - Real dist = calculateDistance(target_coords, support_coords, dim); + Omega_h::Real dist = + calculateDistance(target_coords, support_coords, dim); if (dist <= cutoffDistance) { count++; @@ -213,7 +221,7 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, } queue.push_back(neighborIndex); if (!is_build_csr_call) { - LO idx_count = count - 1; + Omega_h::LO idx_count = count - 1; support_idx[start_counter + idx_count] = neighborIndex; } } @@ -229,11 +237,11 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, } } -inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, - Write& nSupports, - Write& support_idx, - Write& radii2, - bool is_build_csr_call) +inline void FindSupports::adjBasedSearchCentroidNodes( + Omega_h::Write& supports_ptr, + Omega_h::Write& nSupports, + Omega_h::Write& support_idx, + Omega_h::Write& radii2, bool is_build_csr_call) { // Mesh Info const auto& mesh_coords = source_mesh.coords(); @@ -241,81 +249,83 @@ inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, const auto& dim = source_mesh.dim(); const auto& nfaces = source_mesh.nfaces(); - const auto& nodes2faces = source_mesh.ask_up(VERT, FACE); + const auto& nodes2faces = source_mesh.ask_up(Omega_h::VERT, Omega_h::FACE); const auto& n2f_ptr = nodes2faces.a2ab; const auto& n2f_data = nodes2faces.ab2b; - const auto& faces2nodes = source_mesh.ask_down(FACE, VERT).ab2b; + const auto& faces2nodes = + source_mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b; - Write cell_centroids( + Omega_h::Write cell_centroids( dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); - parallel_for( + Omega_h::parallel_for( "calculate the centroid in each tri element", nfaces, - OMEGA_H_LAMBDA(const LO id) { - const auto current_el_verts = gather_verts<3>(faces2nodes, id); + OMEGA_H_LAMBDA(const Omega_h::LO id) { + const auto current_el_verts = Omega_h::gather_verts<3>(faces2nodes, id); const Omega_h::Few, 3> current_el_vert_coords = - gather_vectors<3, 2>(mesh_coords, current_el_verts); - auto centroid = average(current_el_vert_coords); + Omega_h::gather_vectors<3, 2>(mesh_coords, current_el_verts); + auto centroid = Omega_h::average(current_el_vert_coords); int index = dim * id; cell_centroids[index] = centroid[0]; cell_centroids[index + 1] = centroid[1]; }); // * Got the adj data and cell centroids - parallel_for( + Omega_h::parallel_for( nvertices, - OMEGA_H_LAMBDA(const LO id) { + OMEGA_H_LAMBDA(const Omega_h::LO id) { Queue queue; Track visited; - const LO num_verts_in_dim = dim + 1; - Real target_coords[max_dim]; - Real support_coords[max_dim]; - Real cutoffDistance = radii2[id]; + const Omega_h::LO num_verts_in_dim = dim + 1; + Omega_h::Real target_coords[max_dim]; + Omega_h::Real support_coords[max_dim]; + Omega_h::Real cutoffDistance = radii2[id]; //? copying the target vertex coordinates - for (LO k = 0; k < dim; ++k) { + for (Omega_h::LO k = 0; k < dim; ++k) { target_coords[k] = mesh_coords[id * dim + k]; } - LO start_counter; + Omega_h::LO start_counter; if (!is_build_csr_call) { start_counter = supports_ptr[id]; } - LO start_ptr = n2f_ptr[id]; - LO end_ptr = n2f_ptr[id + 1]; + Omega_h::LO start_ptr = n2f_ptr[id]; + Omega_h::LO end_ptr = n2f_ptr[id + 1]; int count = 0; - for (LO i = start_ptr; i < end_ptr; ++i) { - LO cell_id = n2f_data[i]; + for (Omega_h::LO i = start_ptr; i < end_ptr; ++i) { + Omega_h::LO cell_id = n2f_data[i]; visited.push_back(cell_id); - for (LO k = 0; k < dim; ++k) { + for (Omega_h::LO k = 0; k < dim; ++k) { support_coords[k] = cell_centroids[cell_id * dim + k]; } - Real dist = calculateDistance(target_coords, support_coords, dim); + Omega_h::Real dist = + calculateDistance(target_coords, support_coords, dim); if (dist <= cutoffDistance) { count++; queue.push_back(cell_id); if (!is_build_csr_call) { - LO idx_count = count - 1; + Omega_h::LO idx_count = count - 1; support_idx[start_counter + idx_count] = cell_id; } } } while (!queue.isEmpty()) { // ? can queue be empty? - LO currentCell = queue.front(); + Omega_h::LO currentCell = queue.front(); queue.pop_front(); - LO start = currentCell * num_verts_in_dim; - LO end = start + num_verts_in_dim; - - for (LO i = start; i < end; ++i) { - LO current_vert_id = faces2nodes[i]; - LO start_ptr_current_vert = n2f_ptr[current_vert_id]; - LO end_ptr_vert_current_vert = n2f_ptr[current_vert_id + 1]; - for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; - ++j) { + Omega_h::LO start = currentCell * num_verts_in_dim; + Omega_h::LO end = start + num_verts_in_dim; + + for (Omega_h::LO i = start; i < end; ++i) { + Omega_h::LO current_vert_id = faces2nodes[i]; + Omega_h::LO start_ptr_current_vert = n2f_ptr[current_vert_id]; + Omega_h::LO end_ptr_vert_current_vert = n2f_ptr[current_vert_id + 1]; + for (Omega_h::LO j = start_ptr_current_vert; + j < end_ptr_vert_current_vert; ++j) { auto neighbor_cell_index = n2f_data[j]; // check if neighbor index is already in the queue to be checked @@ -328,13 +338,14 @@ inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, cell_centroids[neighbor_cell_index * dim + k]; } - Real dist = calculateDistance(target_coords, support_coords, dim); + Omega_h::Real dist = + calculateDistance(target_coords, support_coords, dim); if (dist <= cutoffDistance) { count++; queue.push_back(neighbor_cell_index); if (!is_build_csr_call) { - LO idx_count = count - 1; + Omega_h::LO idx_count = count - 1; support_idx[start_counter + idx_count] = neighbor_cell_index; } // end of support_idx check } // end of distance check @@ -355,28 +366,29 @@ inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, struct SupportResults { - LOs supports_ptr; - LOs supports_idx; - Write radii2; + Omega_h::LOs supports_ptr; + Omega_h::LOs supports_idx; + Omega_h::Write radii2; }; -inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, - Real& cutoffDistance, - LO min_req_support = 12, +inline SupportResults searchNeighbors(Omega_h::Mesh& source_mesh, + Omega_h::Mesh& target_mesh, + Omega_h::Real& cutoffDistance, + Omega_h::LO min_req_support = 12, bool adapt_radius = true) { FindSupports search(source_mesh, target_mesh); - LO nvertices_source = source_mesh.nverts(); - LO nvertices_target = target_mesh.nverts(); + Omega_h::LO nvertices_source = source_mesh.nverts(); + Omega_h::LO nvertices_target = target_mesh.nverts(); - Write nSupports(nvertices_target, 0, - "number of supports in each target vertex"); + Omega_h::Write nSupports( + nvertices_target, 0, "number of supports in each target vertex"); printf("INFO: Cut off distance: %f\n", cutoffDistance); - Write radii2 = Write(nvertices_target, cutoffDistance, - "squared radii of the supports"); + Omega_h::Write radii2 = Omega_h::Write( + nvertices_target, cutoffDistance, "squared radii of the supports"); - Write supports_ptr; - Write supports_idx; + Omega_h::Write supports_ptr; + Omega_h::Write supports_idx; if (!adapt_radius) { printf("INFO: Fixed radius search *(disregarding required minimum " @@ -386,31 +398,31 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, printf("INFO: Adaptive radius search... \n"); int r_adjust_loop = 0; while (true) { - nSupports = Write(nvertices_target, 0, - "number of supports in each target vertex"); + nSupports = Omega_h::Write( + nvertices_target, 0, "number of supports in each target vertex"); - Real max_radius = 0.0; + Omega_h::Real max_radius = 0.0; Kokkos::parallel_reduce( "find max radius", nvertices_target, - OMEGA_H_LAMBDA(const LO i, Real& local_max) { + OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::Real& local_max) { local_max = (radii2[i] > local_max) ? radii2[i] : local_max; }, - Kokkos::Max(max_radius)); + Kokkos::Max(max_radius)); printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); // create storage every time to avoid complexity - Write supports_ptr; - Write supports_idx; + Omega_h::Write supports_ptr; + Omega_h::Write supports_idx; Kokkos::fence(); search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, true); Kokkos::fence(); - LO min_supports_found = 0; - Kokkos::Min min_reducer(min_supports_found); + Omega_h::LO min_supports_found = 0; + Kokkos::Min min_reducer(min_supports_found); Kokkos::parallel_reduce( "find min number of supports", nvertices_target, - OMEGA_H_LAMBDA(const LO i, LO& local_min) { + OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::LO& local_min) { min_reducer.join(local_min, nSupports[i]); }, min_reducer); @@ -424,10 +436,11 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, } Kokkos::fence(); - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { + Omega_h::parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const Omega_h::LO i) { if (nSupports[i] < min_req_support) { - Real factor = Real(min_req_support) / Real(nSupports[i]); + Omega_h::Real factor = + Omega_h::Real(min_req_support) / Omega_h::Real(nSupports[i]); factor = (factor > 1.1 || nSupports[i] == 0) ? 1.1 : factor; radii2[i] = radii2[i] * factor; } @@ -438,10 +451,10 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); } - supports_ptr = Write(nvertices_target + 1, 0, - "number of support source vertices in CSR format"); + supports_ptr = Omega_h::Write( + nvertices_target + 1, 0, "number of support source vertices in CSR format"); - LO total_supports = 0; + Omega_h::LO total_supports = 0; Kokkos::parallel_scan( nvertices_target, @@ -455,29 +468,30 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, Kokkos::fence(); - supports_idx = Write(total_supports, 0, - "index of source supports of each target node"); + supports_idx = Omega_h::Write( + total_supports, 0, "index of source supports of each target node"); search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, false); - target_mesh.add_tag(VERT, "radii2", 1, radii2); + target_mesh.add_tag(Omega_h::VERT, "radii2", 1, radii2); return SupportResults{read(supports_ptr), read(supports_idx), radii2}; } -inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, - LO min_support = 12, +inline SupportResults searchNeighbors(Omega_h::Mesh& mesh, + Omega_h::Real cutoffDistance, + Omega_h::LO min_support = 12, bool adapt_radius = true) { - Write supports_ptr; - Write supports_idx; + Omega_h::Write supports_ptr; + Omega_h::Write supports_idx; FindSupports search(mesh); - LO nvertices_target = mesh.nverts(); - Write nSupports(nvertices_target, 0, - "number of supports in each target vertex"); + Omega_h::LO nvertices_target = mesh.nverts(); + Omega_h::Write nSupports( + nvertices_target, 0, "number of supports in each target vertex"); printf("INFO: Inside searchNeighbors 1\n"); - Write radii2 = Write(nvertices_target, cutoffDistance, - "squared radii of the supports"); + Omega_h::Write radii2 = Omega_h::Write( + nvertices_target, cutoffDistance, "squared radii of the supports"); printf("INFO: Cutoff distance: %f\n", cutoffDistance); if (!adapt_radius) { @@ -488,29 +502,29 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, printf("INFO: Adaptive radius search... \n"); int r_adjust_loop = 0; while (true) { // until the number of minimum support is met - Real max_radius = 0.0; + Omega_h::Real max_radius = 0.0; Kokkos::parallel_reduce( "find max radius", nvertices_target, - OMEGA_H_LAMBDA(const LO i, Real& local_max) { + OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::Real& local_max) { local_max = (radii2[i] > local_max) ? radii2[i] : local_max; }, - Kokkos::Max(max_radius)); + Kokkos::Max(max_radius)); printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); - nSupports = Write(nvertices_target, 0, - "number of supports in each target vertex"); + nSupports = Omega_h::Write( + nvertices_target, 0, "number of supports in each target vertex"); SupportResults support; // create support every time to avoid complexity search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, supports_idx, radii2, true); Kokkos::fence(); - LO min_nSupports = 0; + Omega_h::LO min_nSupports = 0; Kokkos::parallel_reduce( "find min number of supports", nvertices_target, - OMEGA_H_LAMBDA(const LO i, LO& local_min) { + OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::LO& local_min) { local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; }, - Kokkos::Min(min_nSupports)); + Kokkos::Min(min_nSupports)); printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, r_adjust_loop, max_radius); @@ -521,10 +535,11 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, } Kokkos::fence(); - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { + Omega_h::parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const Omega_h::LO i) { if (nSupports[i] < min_support) { - Real factor = Real(min_support) / Real(nSupports[i]); + Omega_h::Real factor = + Omega_h::Real(min_support) / Omega_h::Real(nSupports[i]); factor = (nSupports[i] == 0 || factor > 1.5) ? 1.5 : factor; radii2[i] *= factor; } @@ -535,10 +550,10 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, } // adaptive radius search // offset array for the supports of each target vertex - supports_ptr = Write(nvertices_target + 1, 0, - "number of support source vertices in CSR format"); + supports_ptr = Omega_h::Write( + nvertices_target + 1, 0, "number of support source vertices in CSR format"); - LO total_supports = 0; + Omega_h::LO total_supports = 0; Kokkos::parallel_scan( nvertices_target, OMEGA_H_LAMBDA(int j, int& update, bool final) { @@ -552,14 +567,14 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, printf("INFO: Inside searchNeighbors 3\n"); Kokkos::fence(); - supports_idx = Write(total_supports, 0, - "index of source supports of each target node"); + supports_idx = Omega_h::Write( + total_supports, 0, "index of source supports of each target node"); printf("INFO: Total_supports: %d\n", total_supports); search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, supports_idx, radii2, false); - mesh.add_tag(VERT, "support_radius", 1, radii2); + mesh.add_tag(Omega_h::VERT, "support_radius", 1, radii2); return SupportResults{read(supports_ptr), read(supports_idx), radii2}; } diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 904c83ae..51005fc2 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -112,18 +112,17 @@ struct NoOp double operator()(double, double) const { return 1.0; } }; -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, RadialBasisFunction bf, - double lambda, double tol, double decay_factor) +Omega_h::Write mls_interpolation( + const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates, + const Omega_h::Reals target_coordinates, const SupportResults& support, + const Omega_h::LO& dim, const Omega_h::LO& degree, RadialBasisFunction bf, + double lambda, double tol, double decay_factor) { const auto nvertices_target = target_coordinates.size() / dim; - Write interpolated_values(nvertices_target, 0, - "approximated target values"); + Omega_h::Write interpolated_values( + nvertices_target, 0, "approximated target values"); switch (bf) { case RadialBasisFunction::RBF_GAUSSIAN: interpolated_values = detail::mls_interpolation( diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 4bc8ba59..27a239fb 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -12,16 +12,16 @@ #include #include #include -#include "pcms_interpolator_aliases.hpp" -#include "adj_search.hpp" -#include "pcms/assert.h" -#include "pcms/profile.h" -#include "pcms_interpolator_view_utils.hpp" -#include "pcms_interpolator_logger.hpp" +#include +#include +#include +#include +#include +#include #include //KokkosBlas::gemv -#include "KokkosBatched_SVD_Decl.hpp" -#include "KokkosBatched_SVD_Serial_Impl.hpp" +#include +#include #include #include @@ -103,7 +103,7 @@ int calculate_scratch_shared_size(const SupportResults& support, * * @return normalized coordinates The normlaised coordinates */ -Reals min_max_normalization(Reals& coordinates, int dim); +Omega_h::Reals min_max_normalization(Omega_h::Reals& coordinates, int dim); /** * @brief Evaluates the polynomial basis @@ -415,8 +415,8 @@ template void mls_interpolation(RealConstDefaultScalarArrayView source_values, RealConstDefaultScalarArrayView source_coordinates, RealConstDefaultScalarArrayView target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func, + const SupportResults& support, const Omega_h::LO& dim, + const Omega_h::LO& degree, Func rbf_func, RealDefaultScalarArrayView approx_target_values, double lambda, double tol) { @@ -508,7 +508,14 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, fill(0.0, team, solution_coefficients); // Logger logger(15); - // storing the coords of local supports + + /** + * + * the local_source_points is of the type ScratchMatView with + * coordinates information; row is number of local supports + * & column is dim + */ + int count = -1; for (int j = start_ptr; j < end_ptr; ++j) { count++; @@ -550,7 +557,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, * the quantity that we want interpolate * * - * step 4: find local supports function values + * step 2: find local supports function values */ Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { @@ -567,12 +574,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, // values"); /** - * - * the local_source_points is of the type ScratchMatView with - * coordinates information; row is number of local supports - * & column is dim - * - * step 2: normalize local source supports and target point + * step 3: normalize local source supports and target point */ normalize_supports(team, target_point, local_source_points); @@ -581,7 +583,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, * * evaluates the basis vector of a given target point Coord target_point; * this can evaluate monomial basis vector for any degree of polynomial - * step 3: call basis vector evaluation function (eval_basis_vector); + * + * step 4: call basis vector evaluation function (eval_basis_vector); */ eval_basis_vector(slice_length, target_point, target_basis_vector); @@ -589,7 +592,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, * created with the basis vector of source supports stacking on top of * each other * - * step 4: create vandermonde matrix + * step 5: create vandermonde matrix */ Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { @@ -608,6 +611,10 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, "ERROR: radius2 has to be positive but found to be %.16f\n", support.radii2[league_rank]); + /** + * step 6: solve overdetermined system using SVD + * + */ solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix, solution_coefficients, lambda, tol); team.team_barrier(); @@ -640,17 +647,16 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, * @param dim The dimension of the simulations * @param degree The degree of the interpolation order * @param rbf_func The radial basis function choice - * @return Write array of the interpolated field in target mesh + * @return Omega_h::Write array of the interpolated field in target mesh * * */ template -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func, double lambda, - double tol) +Omega_h::Write mls_interpolation( + const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates, + const Omega_h::Reals target_coordinates, const SupportResults& support, + const Omega_h::LO& dim, const Omega_h::LO& degree, Func rbf_func, + double lambda, double tol) { const auto nsources = source_coordinates.size() / dim; @@ -668,7 +674,8 @@ Write mls_interpolation(const Reals source_values, RealDefaultScalarArrayView radii2_array_view(support.radii2.data(), support.radii2.size()); - Write interpolated_values(ntargets, 0, "approximated target values"); + Omega_h::Write interpolated_values( + ntargets, 0, "approximated target values"); RealDefaultScalarArrayView interpolated_values_array_view( interpolated_values.data(), interpolated_values.size()); @@ -682,4 +689,5 @@ Write mls_interpolation(const Reals source_values, } // namespace detail } // namespace pcms + #endif diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp index e825f72a..1a43da6d 100644 --- a/src/pcms/interpolator/pcms_interpolator_logger.hpp +++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp @@ -55,6 +55,7 @@ class Logger } } + // log array KOKKOS_INLINE_FUNCTION void logArray(const member_type& team, const LogLevel level, const double* array, const int size, const char* name) @@ -89,7 +90,7 @@ class Logger } } - // log scratch vector + // log scratch matrix KOKKOS_INLINE_FUNCTION void logMatrix(const member_type& team, const LogLevel level, const ScratchMatView& matrix, const char* name) const diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp index f07c9a89..bf803f11 100644 --- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp +++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp @@ -64,6 +64,14 @@ void find_sq_root_each(member_type team, ScratchVecView& array) }); } +/** + * + *@brief Finds the inverse of each element in an array + * + *@param team The team member + *@param array The scratch vector view + * + */ KOKKOS_INLINE_FUNCTION void find_inverse_each(member_type team, ScratchVecView& array) { @@ -78,6 +86,14 @@ void find_inverse_each(member_type team, ScratchVecView& array) }); } +/** + *@brief Evaluates the shrinkage factor in SVD (S/(S^2 + lambda) + * + *@param team The team member + *@param lambda The regularization parameter + *@param sigma The scratch vector view of singular values + * + */ KOKKOS_INLINE_FUNCTION void calculate_shrinkage_factor(member_type team, double lambda, ScratchVecView& sigma) @@ -87,6 +103,15 @@ void calculate_shrinkage_factor(member_type team, double lambda, sigma(i) /= (sigma(i) * sigma(i) + lambda); }); } + +/** + * + *@brief Finds the transpose of the scratch matrix + * + *@param team The team member + *@param array The scratch matrix view + * + */ KOKKOS_INLINE_FUNCTION ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix) { @@ -103,6 +128,16 @@ ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix) return transMatrix; } +/** + * + *@brief Fills the diagonal entries of a matrix + * + *@param team The team member + *@param diagonal_entries The scratch vector view of the entries thats to be + *filled in a matrix diagonal + *@param array The scratch matrix whose digonal entries to be filled + * + */ KOKKOS_INLINE_FUNCTION void fill_diagonal(member_type team, const ScratchVecView& diagonal_entries, ScratchMatView& matrix) @@ -113,6 +148,24 @@ void fill_diagonal(member_type team, const ScratchVecView& diagonal_entries, [=](int i) { matrix(i, i) = diagonal_entries(i); }); } +/** + * @brief Scales each row of the input matrix by corresponding diagonal values. + * + * This function performs **row scaling** of the matrix, meaning each row `i` + * of the matrix is multiplied by the scalar value `diagonal_entries(i)`. + * + * Mathematically, if the matrix is `A` and the diagonal scaling matrix is `D_A + * = diag(diagonal_entries)`, the operation performed is: + * + * A_scaled = D_A * A + * + * where `D_A` is a diagonal matrix that scales rows of `A`. + * + * @param team Kokkos team member used for team-level parallelism + * @param diagonal_entries A vector of diagonal values used for scaling (should + * match or be smaller than the number of rows) + * @param matrix The 2D matrix whose rows will be scaled + */ KOKKOS_INLINE_FUNCTION void eval_row_scaling(member_type team, ScratchVecView diagonal_entries, ScratchMatView matrix) @@ -136,6 +189,21 @@ void eval_row_scaling(member_type team, ScratchVecView diagonal_entries, }); } +/** + * @brief Scales the right-hand-side (RHS) vector using the given diagonal + * weights. + * + * This function performs element-wise scaling of the RHS vector. Each entry + * `rhs_values(i)` is multiplied by the corresponding entry + * `diagonal_entries(i)`. + * + * Mathematically: + * rhs_scaled(i) = diagonal_entries(i) * rhs_values(i) + * + * @param team Kokkos team member used for team-level parallelism + * @param diagonal_entries A vector of scaling weights (same size as rhs_values) + * @param rhs_values The RHS vector to be scaled + */ KOKKOS_INLINE_FUNCTION void eval_rhs_scaling(member_type team, ScratchVecView diagonal_entries, ScratchVecView rhs_values) @@ -156,6 +224,28 @@ void eval_rhs_scaling(member_type team, ScratchVecView diagonal_entries, }); } +/** + * @brief Scales the input matrix row-wise and writes the result into an + * adjusted matrix. + * + * This function first scales the rows of `matrixToScale` using the + * corresponding entries from `diagonal_entries` (i.e., performs D * A, where D + * is a diagonal matrix), and then copies the scaled values into + * `adjustedMatrix`. + * + * Preconditions: + * - The number of columns in `adjustedMatrix` must match the number of rows in + * `matrixToScale` (colB == rowA) + * - `diagonal_entries` must have size ≤ rowA (validated inside + * `eval_row_scaling`) + * + * Typical use case: used in evaluating SV in U^TSV for SVD solver + * + * @param team Kokkos team member used for team-level parallelism + * @param diagonal_entries A vector of scaling values for each row + * @param matrixToScale The matrix to be row-scaled (modified in place) + * @param adjustedMatrix The output matrix to store the scaled version + */ KOKKOS_INLINE_FUNCTION void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries, ScratchMatView& matrixToScale, @@ -179,4 +269,5 @@ void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries, } // namespace detail } // namespace pcms + #endif diff --git a/src/pcms/interpolator/queue_visited.hpp b/src/pcms/interpolator/queue_visited.hpp index f09d38af..5a366c8f 100644 --- a/src/pcms/interpolator/queue_visited.hpp +++ b/src/pcms/interpolator/queue_visited.hpp @@ -9,13 +9,11 @@ #include #define MAX_SIZE_QUEUE 500 #define MAX_SIZE_TRACK 800 -using namespace std; -using namespace Omega_h; class Queue { private: - LO queue_array[MAX_SIZE_QUEUE]; + Omega_h::LO queue_array[MAX_SIZE_QUEUE]; int first = 0, last = -1, count = 0; public: @@ -44,7 +42,7 @@ class Queue class Track { private: - LO tracking_array[MAX_SIZE_TRACK]; + Omega_h::LO tracking_array[MAX_SIZE_TRACK]; int first = 0, last = -1, count = 0; public: From ab7e764b30b06bde413f2cae1f38784076970b98 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 12 May 2025 09:54:23 -0400 Subject: [PATCH 71/73] update parameters --- src/pcms/interpolator/mls_interpolation.hpp | 12 +++---- .../interpolator/mls_interpolation_impl.hpp | 2 -- test/CMakeLists.txt | 1 - test/test_normalisation.cpp | 4 +-- test/test_rbf_interp.cpp | 33 ++++++++++--------- 5 files changed, 24 insertions(+), 28 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp index 2a490614..dfb387b1 100644 --- a/src/pcms/interpolator/mls_interpolation.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -77,12 +77,10 @@ enum class RadialBasisFunction : LO * `num_points * dim`. * - The result array length will match the number of target points. */ -Write mls_interpolation(const Omega_h::Reals source_values, - const Omega_h::Reals source_coordinates, - const Omega_h::Reals target_coordinates, - const SupportResults& support, - const Omega_h::LO& dim, const Omega_h::LO& degree, - RadialBasisFunction bf, double lambda = 0, - double tol = 1e-6, double decay_factor = 5.0); +Omega_h::Write mls_interpolation( + const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates, + const Omega_h::Reals target_coordinates, const SupportResults& support, + const Omega_h::LO& dim, const Omega_h::LO& degree, RadialBasisFunction bf, + double lambda = 0, double tol = 1e-6, double decay_factor = 5.0); } // namespace pcms #endif diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 27a239fb..ad91047b 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -286,8 +286,6 @@ void scale_column_trans_matrix(const ScratchMatView& matrix, } } -} - /** * @brief Solves a regularized weighted linear system WAx = Wb using SVD * decomposition within a Kokkos parallel team. diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 5f9662cc..27b0a3c0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -111,7 +111,6 @@ if(Catch2_FOUND) test_mls_basis.cpp test_rbf_interp.cpp test_normalisation.cpp - test_spr_meshfields.cpp test_svd_serial.cpp) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp index 4c6c38d9..ae45ad35 100644 --- a/test/test_normalisation.cpp +++ b/test/test_normalisation.cpp @@ -15,7 +15,7 @@ Omega_h::Reals fillFromMatrix(const Omega_h::Matrix<2, 10>& matrix) { int num_points = 10; Omega_h::Write array(20, 0, "array to store coordinates"); - parallel_for( + Kokkos::parallel_for( "fills the array view from matrix", num_points, KOKKOS_LAMBDA(const int i) { int index = i * 2; @@ -50,7 +50,7 @@ TEST_CASE("test_normalization_routine") {0.12, 4.29, 1.98}, {1.24, 1.54, 4.2}}; - LOs nsupports({6, 8, 4}, "number of supports"); + Omega_h::LOs nsupports({6, 8, 4}, "number of supports"); int dim = 3; auto total_coordinates = Omega_h::get_sum(nsupports) * dim; diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index cdafea62..24ac9a18 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -31,8 +31,8 @@ double func(pcms::Coord& p, int degree) return -1; } -void test(Mesh& mesh, Omega_h::Real cutoffDistance, int degree, - LO min_num_supports, Omega_h::Reals source_values, +void test(Omega_h::Mesh& mesh, Omega_h::Real cutoffDistance, int degree, + Omega_h::LO min_num_supports, Omega_h::Reals source_values, Omega_h::Reals exact_target_values, Omega_h::Reals source_coordinates, Omega_h::Reals target_coordinates) { @@ -77,10 +77,11 @@ void test(Mesh& mesh, Omega_h::Real cutoffDistance, int degree, TEST_CASE("test_mls_interpolation") { - auto lib = Library{}; + auto lib = Omega_h::Library{}; auto world = lib.world(); auto rank = lib.world()->rank(); - auto mesh = build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false); + auto mesh = + Omega_h::build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false); Omega_h::Real cutoffDistance = 0.3; cutoffDistance = cutoffDistance * cutoffDistance; @@ -96,15 +97,15 @@ TEST_CASE("test_mls_interpolation") Omega_h::Write source_coordinates( dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); - const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; + const auto& faces2nodes = mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b; Kokkos::parallel_for( "calculate the centroid in each tri element", nfaces, - OMEGA_H_LAMBDA(const LO id) { - const auto current_el_verts = gather_verts<3>(faces2nodes, id); + OMEGA_H_LAMBDA(const Omega_h::LO id) { + const auto current_el_verts = Omega_h::gather_verts<3>(faces2nodes, id); const Omega_h::Few, 3> current_el_vert_coords = - gather_vectors<3, 2>(target_coordinates, current_el_verts); - auto centroid = average(current_el_vert_coords); + Omega_h::gather_vectors<3, 2>(target_coordinates, current_el_verts); + auto centroid = Omega_h::average(current_el_vert_coords); int index = 2 * id; source_coordinates[index] = centroid[0]; source_coordinates[index + 1] = centroid[1]; @@ -133,7 +134,7 @@ TEST_CASE("test_mls_interpolation") { std::cout << "-------starting test: d0p1------------" << "\n"; int degree = 1; - LO min_num_supports = 10; + Omega_h::LO min_num_supports = 10; Omega_h::Write source_values(nfaces, 0, "exact target values"); @@ -165,7 +166,7 @@ TEST_CASE("test_mls_interpolation") std::cout << "--------starting test: d1p1---------" << "\n"; int degree = 1; - LO min_num_supports = 10; + Omega_h::LO min_num_supports = 10; Omega_h::Write source_values(nfaces, 0, "exact target values"); @@ -197,7 +198,7 @@ TEST_CASE("test_mls_interpolation") std::cout << "-------------starting test: d0p2------------" << "\n"; int degree = 2; - LO min_num_supports = 16; + Omega_h::LO min_num_supports = 16; Omega_h::Write source_values(nfaces, 0, "exact target values"); @@ -229,7 +230,7 @@ TEST_CASE("test_mls_interpolation") std::cout << "----------------satrting test: d1p2--------------" << "\n"; int degree = 2; - LO min_num_supports = 16; + Omega_h::LO min_num_supports = 16; Omega_h::Write source_values(nfaces, 0, "exact target values"); @@ -261,7 +262,7 @@ TEST_CASE("test_mls_interpolation") std::cout << "-------------starting test: d2p2--------------------" << "\n"; int degree = 2; - LO min_num_supports = 16; + Omega_h::LO min_num_supports = 16; Omega_h::Write source_values(nfaces, 0, "exact target values"); @@ -293,7 +294,7 @@ TEST_CASE("test_mls_interpolation") std::cout << "-------------starting test: d2p3--------------------" << "\n"; int degree = 3; - LO min_num_supports = 20; + Omega_h::LO min_num_supports = 20; Omega_h::Write source_values(nfaces, 0, "exact target values"); @@ -325,7 +326,7 @@ TEST_CASE("test_mls_interpolation") std::cout << "-------------starting test: d3p3--------------------" << "\n"; int degree = 3; - LO min_num_supports = 20; + Omega_h::LO min_num_supports = 20; Omega_h::Write source_values(nfaces, 0, "exact target values"); From 88d49d267d72dde4c290c56c9b7f571eb3269c81 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 12 May 2025 09:55:02 -0400 Subject: [PATCH 72/73] update parameter --- src/pcms/interpolator/pcms_interpolator_view_utils.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp index bf803f11..1c52752f 100644 --- a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp +++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp @@ -1,7 +1,7 @@ #ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP #define PCMS_INTERPOLATOR_ARRAY_OPS_HPP -#include "pcms_interpolator_aliases.hpp" +#include #include #include #include From 2e0de9d3d7b365b5547493a972316f547303a671 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 12 May 2025 10:36:15 -0400 Subject: [PATCH 73/73] replace printf with Kokkos::printf --- src/pcms/interpolator/mls_interpolation.cpp | 17 +++--- .../interpolator/pcms_interpolator_logger.hpp | 53 ++++++++++--------- 2 files changed, 36 insertions(+), 34 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 51005fc2..00140f8e 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -1,5 +1,6 @@ +#include +#include #include -#include "mls_interpolation.hpp" namespace pcms { @@ -29,8 +30,8 @@ struct RBF_GAUSSIAN "but the value is %.16f\n", r_sq); - double r = sqrt(r_sq); - double rho = sqrt(rho_sq); + double r = Kokkos::sqrt(r_sq); + double rho = Kokkos::sqrt(rho_sq); double ratio = r / rho; double limit = 1 - ratio; @@ -38,7 +39,7 @@ struct RBF_GAUSSIAN phi = 0; } else { - phi = exp(-a * a * r * r); + phi = Kokkos::exp(-a * a * r * r); } return phi; @@ -53,11 +54,11 @@ struct RBF_C4 double operator()(double r_sq, double rho_sq) const { double phi; - double r = sqrt(r_sq); + double r = Kokkos::sqrt(r_sq); OMEGA_H_CHECK_PRINTF( rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n", rho_sq); - double rho = sqrt(rho_sq); + double rho = Kokkos::sqrt(rho_sq); double ratio = r / rho; double limit = 1 - ratio; if (limit < 0) { @@ -85,11 +86,11 @@ struct RBF_CONST double operator()(double r_sq, double rho_sq) const { double phi; - double r = sqrt(r_sq); + double r = Kokkos::sqrt(r_sq); OMEGA_H_CHECK_PRINTF( rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n", rho_sq); - double rho = sqrt(rho_sq); + double rho = Kokkos::sqrt(rho_sq); double ratio = r / rho; double limit = 1 - ratio; if (limit < 0) { diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp index 1a43da6d..60c7310f 100644 --- a/src/pcms/interpolator/pcms_interpolator_logger.hpp +++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp @@ -1,7 +1,8 @@ #ifndef PCMS_INTERPOLATOR_LOGGER_HPP #define PCMS_INTERPOLATOR_LOGGER_HPP -#include "pcms_interpolator_aliases.hpp" +#include +#include namespace pcms { @@ -32,10 +33,10 @@ class Logger { if (team.league_rank() == selected_league_rank_) { Kokkos::single(Kokkos::PerTeam(team), [&]() { - printf("[%s] (League %d) ", logLevelToString(level), - team.league_rank()); - printf(fmt, args...); - printf("\n"); + Kokkos::printf("[%s] (League %d) ", logLevelToString(level), + team.league_rank()); + Kokkos::printf(fmt, args...); + Kokkos::printf("\n"); }); } } @@ -47,10 +48,10 @@ class Logger if (team.league_rank() == selected_league_rank_) { Kokkos::single(Kokkos::PerTeam(team), [&]() { - printf("[%s] (League %d) %s: \n", logLevelToString(level), - team.league_rank(), name); - printf("(%12.6f, %12.6f)", p.x, p.y); - printf("\n"); + Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); + Kokkos::printf("(%12.6f, %12.6f)", p.x, p.y); + Kokkos::printf("\n"); }); } } @@ -63,13 +64,13 @@ class Logger if (team.league_rank() == selected_league_rank_) { Kokkos::single(Kokkos::PerTeam(team), [&]() { - printf("[%s] (League %d) %s: \n", logLevelToString(level), - team.league_rank(), name); + Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); for (int i = 0; i < size; ++i) { - printf("%12.6f\n", array[i]); + Kokkos::printf("%12.6f\n", array[i]); } - printf("\n"); + Kokkos::printf("\n"); }); } } @@ -80,12 +81,12 @@ class Logger { if (team.league_rank() == selected_league_rank_) { Kokkos::single(Kokkos::PerTeam(team), [&]() { - printf("[%s] (League %d) %s: \n", logLevelToString(level), - team.league_rank(), name); + Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); for (int i = 0; i < vector.size(); ++i) { - printf("%12.6f\n", vector(i)); + Kokkos::printf("%12.6f\n", vector(i)); } - printf("\n"); + Kokkos::printf("\n"); }); } } @@ -97,16 +98,16 @@ class Logger { if (team.league_rank() == selected_league_rank_) { Kokkos::single(Kokkos::PerTeam(team), [&]() { - printf("[%s] (League %d) %s: \n", logLevelToString(level), - team.league_rank(), name); + Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); for (int i = 0; i < matrix.extent(0); ++i) { for (int j = 0; j < matrix.extent(1); ++j) { - printf("%20.8f", matrix(i, j)); + Kokkos::printf("%20.8f", matrix(i, j)); } - printf("\n"); + Kokkos::printf("\n"); } - printf("\n"); + Kokkos::printf("\n"); }); } } @@ -118,10 +119,10 @@ class Logger { if (team.league_rank() == selected_league_rank_) { Kokkos::single(Kokkos::PerTeam(team), [&]() { - printf("[%s] (League %d) %s: \n", logLevelToString(level), - team.league_rank(), name); - printf("%12.6f", value); - printf("\n"); + Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); + Kokkos::printf("%12.6f", value); + Kokkos::printf("\n"); }); } }