diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 08728df7..5e3a5589 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -8,6 +8,8 @@ set(PCMS_FIELD_TRANSFER_HEADERS linear_interpolant.hpp multidimarray.hpp mls_interpolation.hpp + pcms_interpolator_view_utils.hpp + pcms_interpolator_logger.hpp ) set(PCMS_FIELD_TRANSFER_SOURCES diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index a5a20b78..aca256fb 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -5,15 +5,14 @@ #include "queue_visited.hpp" -using namespace Omega_h; - static constexpr int max_dim = 3; // TODO change this into span/mdspan OMEGA_H_INLINE -Real calculateDistance(const Real* p1, const Real* p2, const int dim) +Omega_h::Real calculateDistance(const Omega_h::Real* p1, + const Omega_h::Real* p2, const int dim) { - Real dx, dy, dz; + Omega_h::Real dx, dy, dz; dx = p1[0] - p2[0]; dy = p1[1] - p2[1]; if (dim != 3) { @@ -29,7 +28,7 @@ inline void checkTargetPoints( { Kokkos::fence(); printf("INFO: Checking target points...\n"); - auto check_target_points = OMEGA_H_LAMBDA(LO i) + auto check_target_points = OMEGA_H_LAMBDA(Omega_h::LO i) { if (results(i).tri_id < 0) { OMEGA_H_CHECK_PRINTF(results(i).tri_id >= 0, @@ -38,25 +37,26 @@ inline void checkTargetPoints( printf("%d, ", i); } }; - parallel_for(results.size(), check_target_points, "check_target_points"); + Omega_h::parallel_for(results.size(), check_target_points, + "check_target_points"); Kokkos::fence(); printf("\n"); } -inline void printSupportsForTarget(const LO target_id, - const Write& supports_ptr, - const Write& nSupports, - const Write& support_idx) +inline void printSupportsForTarget( + const Omega_h::LO target_id, const Omega_h::Write& supports_ptr, + const Omega_h::Write& nSupports, + const Omega_h::Write& support_idx) { - parallel_for( - nSupports.size(), OMEGA_H_LAMBDA(const LO id) { + Omega_h::parallel_for( + nSupports.size(), OMEGA_H_LAMBDA(const Omega_h::LO id) { if (id == target_id) { - LO start_ptr = supports_ptr[id]; - LO end_ptr = supports_ptr[id + 1]; + Omega_h::LO start_ptr = supports_ptr[id]; + Omega_h::LO end_ptr = supports_ptr[id + 1]; printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", id, end_ptr - start_ptr, nSupports[id]); - for (LO i = start_ptr; i < end_ptr; ++i) { - LO cell_id = support_idx[i]; + for (Omega_h::LO i = start_ptr; i < end_ptr; ++i) { + Omega_h::LO cell_id = support_idx[i]; printf(", %d", cell_id); } printf("\n"); @@ -67,29 +67,34 @@ inline void printSupportsForTarget(const LO target_id, class FindSupports { private: - Mesh& source_mesh; - Mesh& target_mesh; // TODO it's null when one mesh is used + Omega_h::Mesh& source_mesh; + Omega_h::Mesh& target_mesh; // TODO it's null when one mesh is used public: - FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) + FindSupports(Omega_h::Mesh& source_mesh_, Omega_h::Mesh& target_mesh_) : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; - FindSupports(Mesh& mesh_) : source_mesh(mesh_), target_mesh(mesh_) {}; + FindSupports(Omega_h::Mesh& mesh_) + : source_mesh(mesh_), target_mesh(mesh_) {}; - void adjBasedSearch(Write& supports_ptr, Write& nSupports, - Write& support_idx, Write& radii2, + void adjBasedSearch(Omega_h::Write& supports_ptr, + Omega_h::Write& nSupports, + Omega_h::Write& support_idx, + Omega_h::Write& radii2, bool is_build_csr_call); - void adjBasedSearchCentroidNodes(Write& supports_ptr, - Write& nSupports, Write& support_idx, - Write& radii2, bool is_build_csr_call); + void adjBasedSearchCentroidNodes(Omega_h::Write& supports_ptr, + Omega_h::Write& nSupports, + Omega_h::Write& support_idx, + Omega_h::Write& radii2, + bool is_build_csr_call); }; -inline void FindSupports::adjBasedSearch(Write& supports_ptr, - Write& nSupports, - Write& support_idx, - Write& radii2, - bool is_build_csr_call) +inline void FindSupports::adjBasedSearch( + Omega_h::Write& supports_ptr, + Omega_h::Write& nSupports, + Omega_h::Write& support_idx, + Omega_h::Write& radii2, bool is_build_csr_call) { const auto& sourcePoints_coords = source_mesh.coords(); @@ -100,14 +105,15 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, const auto nvertices_target = target_mesh.nverts(); OMEGA_H_CHECK(radii2.size() == nvertices_target); - const auto& vert2vert = source_mesh.ask_star(VERT); + const auto& vert2vert = source_mesh.ask_star(Omega_h::VERT); const auto& v2v_ptr = vert2vert.a2ab; const auto& v2v_data = vert2vert.ab2b; const auto& cells2verts = source_mesh.ask_verts_of(dim); - Kokkos::View target_points("test_points", nvertices_target); - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { + Kokkos::View target_points("test_points", + nvertices_target); + Omega_h::parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const Omega_h::LO i) { target_points(i, 0) = targetPoints_coords[i * dim]; target_points(i, 1) = targetPoints_coords[i * dim + 1]; }); @@ -117,30 +123,30 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, auto results = search_cell(target_points); checkTargetPoints(results); - parallel_for( + Omega_h::parallel_for( nvertices_target, - OMEGA_H_LAMBDA(const LO id) { + OMEGA_H_LAMBDA(const Omega_h::LO id) { Queue queue; Track visited; - Real cutoffDistance = radii2[id]; + Omega_h::Real cutoffDistance = radii2[id]; - LO source_cell_id = results(id).tri_id; + Omega_h::LO source_cell_id = results(id).tri_id; OMEGA_H_CHECK_PRINTF( source_cell_id >= 0, "ERROR: Source cell id not found for target %d (%f,%f)\n", id, target_points(id, 0), target_points(id, 1)); - const LO num_verts_in_dim = dim + 1; - LO start_ptr = source_cell_id * num_verts_in_dim; - LO end_ptr = start_ptr + num_verts_in_dim; - Real target_coords[max_dim]; - Real support_coords[max_dim]; + const Omega_h::LO num_verts_in_dim = dim + 1; + Omega_h::LO start_ptr = source_cell_id * num_verts_in_dim; + Omega_h::LO end_ptr = start_ptr + num_verts_in_dim; + Omega_h::Real target_coords[max_dim]; + Omega_h::Real support_coords[max_dim]; - for (LO k = 0; k < dim; ++k) { + for (Omega_h::LO k = 0; k < dim; ++k) { target_coords[k] = target_points(id, k); } - LO start_counter; + Omega_h::LO start_counter; if (!is_build_csr_call) { start_counter = supports_ptr[id]; } @@ -156,15 +162,16 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, // * Method int count = 0; - for (LO i = start_ptr; i < end_ptr; ++i) { - LO vert_id = cells2verts[i]; + for (Omega_h::LO i = start_ptr; i < end_ptr; ++i) { + Omega_h::LO vert_id = cells2verts[i]; visited.push_back(vert_id); - for (LO k = 0; k < dim; ++k) { + for (Omega_h::LO k = 0; k < dim; ++k) { support_coords[k] = sourcePoints_coords[vert_id * dim + k]; } - Real dist = calculateDistance(target_coords, support_coords, dim); + Omega_h::Real dist = + calculateDistance(target_coords, support_coords, dim); if (dist <= cutoffDistance) { count++; if (count >= 500) { @@ -178,19 +185,19 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, } queue.push_back(vert_id); if (!is_build_csr_call) { - LO idx_count = count - 1; + Omega_h::LO idx_count = count - 1; support_idx[start_counter + idx_count] = vert_id; } } } while (!queue.isEmpty()) { - LO currentVertex = queue.front(); + Omega_h::LO currentVertex = queue.front(); queue.pop_front(); - LO start = v2v_ptr[currentVertex]; - LO end = v2v_ptr[currentVertex + 1]; + Omega_h::LO start = v2v_ptr[currentVertex]; + Omega_h::LO end = v2v_ptr[currentVertex + 1]; - for (LO i = start; i < end; ++i) { + for (Omega_h::LO i = start; i < end; ++i) { auto neighborIndex = v2v_data[i]; // check if neighbor index is already in the queue to be checked @@ -202,7 +209,8 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, support_coords[k] = sourcePoints_coords[neighborIndex * dim + k]; } - Real dist = calculateDistance(target_coords, support_coords, dim); + Omega_h::Real dist = + calculateDistance(target_coords, support_coords, dim); if (dist <= cutoffDistance) { count++; @@ -213,7 +221,7 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, } queue.push_back(neighborIndex); if (!is_build_csr_call) { - LO idx_count = count - 1; + Omega_h::LO idx_count = count - 1; support_idx[start_counter + idx_count] = neighborIndex; } } @@ -229,11 +237,11 @@ inline void FindSupports::adjBasedSearch(Write& supports_ptr, } } -inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, - Write& nSupports, - Write& support_idx, - Write& radii2, - bool is_build_csr_call) +inline void FindSupports::adjBasedSearchCentroidNodes( + Omega_h::Write& supports_ptr, + Omega_h::Write& nSupports, + Omega_h::Write& support_idx, + Omega_h::Write& radii2, bool is_build_csr_call) { // Mesh Info const auto& mesh_coords = source_mesh.coords(); @@ -241,81 +249,83 @@ inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, const auto& dim = source_mesh.dim(); const auto& nfaces = source_mesh.nfaces(); - const auto& nodes2faces = source_mesh.ask_up(VERT, FACE); + const auto& nodes2faces = source_mesh.ask_up(Omega_h::VERT, Omega_h::FACE); const auto& n2f_ptr = nodes2faces.a2ab; const auto& n2f_data = nodes2faces.ab2b; - const auto& faces2nodes = source_mesh.ask_down(FACE, VERT).ab2b; + const auto& faces2nodes = + source_mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b; - Write cell_centroids( + Omega_h::Write cell_centroids( dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); - parallel_for( + Omega_h::parallel_for( "calculate the centroid in each tri element", nfaces, - OMEGA_H_LAMBDA(const LO id) { - const auto current_el_verts = gather_verts<3>(faces2nodes, id); + OMEGA_H_LAMBDA(const Omega_h::LO id) { + const auto current_el_verts = Omega_h::gather_verts<3>(faces2nodes, id); const Omega_h::Few, 3> current_el_vert_coords = - gather_vectors<3, 2>(mesh_coords, current_el_verts); - auto centroid = average(current_el_vert_coords); + Omega_h::gather_vectors<3, 2>(mesh_coords, current_el_verts); + auto centroid = Omega_h::average(current_el_vert_coords); int index = dim * id; cell_centroids[index] = centroid[0]; cell_centroids[index + 1] = centroid[1]; }); // * Got the adj data and cell centroids - parallel_for( + Omega_h::parallel_for( nvertices, - OMEGA_H_LAMBDA(const LO id) { + OMEGA_H_LAMBDA(const Omega_h::LO id) { Queue queue; Track visited; - const LO num_verts_in_dim = dim + 1; - Real target_coords[max_dim]; - Real support_coords[max_dim]; - Real cutoffDistance = radii2[id]; + const Omega_h::LO num_verts_in_dim = dim + 1; + Omega_h::Real target_coords[max_dim]; + Omega_h::Real support_coords[max_dim]; + Omega_h::Real cutoffDistance = radii2[id]; //? copying the target vertex coordinates - for (LO k = 0; k < dim; ++k) { + for (Omega_h::LO k = 0; k < dim; ++k) { target_coords[k] = mesh_coords[id * dim + k]; } - LO start_counter; + Omega_h::LO start_counter; if (!is_build_csr_call) { start_counter = supports_ptr[id]; } - LO start_ptr = n2f_ptr[id]; - LO end_ptr = n2f_ptr[id + 1]; + Omega_h::LO start_ptr = n2f_ptr[id]; + Omega_h::LO end_ptr = n2f_ptr[id + 1]; int count = 0; - for (LO i = start_ptr; i < end_ptr; ++i) { - LO cell_id = n2f_data[i]; + for (Omega_h::LO i = start_ptr; i < end_ptr; ++i) { + Omega_h::LO cell_id = n2f_data[i]; visited.push_back(cell_id); - for (LO k = 0; k < dim; ++k) { + for (Omega_h::LO k = 0; k < dim; ++k) { support_coords[k] = cell_centroids[cell_id * dim + k]; } - Real dist = calculateDistance(target_coords, support_coords, dim); + Omega_h::Real dist = + calculateDistance(target_coords, support_coords, dim); if (dist <= cutoffDistance) { count++; queue.push_back(cell_id); if (!is_build_csr_call) { - LO idx_count = count - 1; + Omega_h::LO idx_count = count - 1; support_idx[start_counter + idx_count] = cell_id; } } } while (!queue.isEmpty()) { // ? can queue be empty? - LO currentCell = queue.front(); + Omega_h::LO currentCell = queue.front(); queue.pop_front(); - LO start = currentCell * num_verts_in_dim; - LO end = start + num_verts_in_dim; - - for (LO i = start; i < end; ++i) { - LO current_vert_id = faces2nodes[i]; - LO start_ptr_current_vert = n2f_ptr[current_vert_id]; - LO end_ptr_vert_current_vert = n2f_ptr[current_vert_id + 1]; - for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; - ++j) { + Omega_h::LO start = currentCell * num_verts_in_dim; + Omega_h::LO end = start + num_verts_in_dim; + + for (Omega_h::LO i = start; i < end; ++i) { + Omega_h::LO current_vert_id = faces2nodes[i]; + Omega_h::LO start_ptr_current_vert = n2f_ptr[current_vert_id]; + Omega_h::LO end_ptr_vert_current_vert = n2f_ptr[current_vert_id + 1]; + for (Omega_h::LO j = start_ptr_current_vert; + j < end_ptr_vert_current_vert; ++j) { auto neighbor_cell_index = n2f_data[j]; // check if neighbor index is already in the queue to be checked @@ -328,13 +338,14 @@ inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, cell_centroids[neighbor_cell_index * dim + k]; } - Real dist = calculateDistance(target_coords, support_coords, dim); + Omega_h::Real dist = + calculateDistance(target_coords, support_coords, dim); if (dist <= cutoffDistance) { count++; queue.push_back(neighbor_cell_index); if (!is_build_csr_call) { - LO idx_count = count - 1; + Omega_h::LO idx_count = count - 1; support_idx[start_counter + idx_count] = neighbor_cell_index; } // end of support_idx check } // end of distance check @@ -355,28 +366,29 @@ inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, struct SupportResults { - LOs supports_ptr; - LOs supports_idx; - Write radii2; + Omega_h::LOs supports_ptr; + Omega_h::LOs supports_idx; + Omega_h::Write radii2; }; -inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, - Real& cutoffDistance, - LO min_req_support = 12, +inline SupportResults searchNeighbors(Omega_h::Mesh& source_mesh, + Omega_h::Mesh& target_mesh, + Omega_h::Real& cutoffDistance, + Omega_h::LO min_req_support = 12, bool adapt_radius = true) { FindSupports search(source_mesh, target_mesh); - LO nvertices_source = source_mesh.nverts(); - LO nvertices_target = target_mesh.nverts(); + Omega_h::LO nvertices_source = source_mesh.nverts(); + Omega_h::LO nvertices_target = target_mesh.nverts(); - Write nSupports(nvertices_target, 0, - "number of supports in each target vertex"); + Omega_h::Write nSupports( + nvertices_target, 0, "number of supports in each target vertex"); printf("INFO: Cut off distance: %f\n", cutoffDistance); - Write radii2 = Write(nvertices_target, cutoffDistance, - "squared radii of the supports"); + Omega_h::Write radii2 = Omega_h::Write( + nvertices_target, cutoffDistance, "squared radii of the supports"); - Write supports_ptr; - Write supports_idx; + Omega_h::Write supports_ptr; + Omega_h::Write supports_idx; if (!adapt_radius) { printf("INFO: Fixed radius search *(disregarding required minimum " @@ -386,31 +398,31 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, printf("INFO: Adaptive radius search... \n"); int r_adjust_loop = 0; while (true) { - nSupports = Write(nvertices_target, 0, - "number of supports in each target vertex"); + nSupports = Omega_h::Write( + nvertices_target, 0, "number of supports in each target vertex"); - Real max_radius = 0.0; + Omega_h::Real max_radius = 0.0; Kokkos::parallel_reduce( "find max radius", nvertices_target, - OMEGA_H_LAMBDA(const LO i, Real& local_max) { + OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::Real& local_max) { local_max = (radii2[i] > local_max) ? radii2[i] : local_max; }, - Kokkos::Max(max_radius)); + Kokkos::Max(max_radius)); printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); // create storage every time to avoid complexity - Write supports_ptr; - Write supports_idx; + Omega_h::Write supports_ptr; + Omega_h::Write supports_idx; Kokkos::fence(); search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, true); Kokkos::fence(); - LO min_supports_found = 0; - Kokkos::Min min_reducer(min_supports_found); + Omega_h::LO min_supports_found = 0; + Kokkos::Min min_reducer(min_supports_found); Kokkos::parallel_reduce( "find min number of supports", nvertices_target, - OMEGA_H_LAMBDA(const LO i, LO& local_min) { + OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::LO& local_min) { min_reducer.join(local_min, nSupports[i]); }, min_reducer); @@ -424,10 +436,11 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, } Kokkos::fence(); - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { + Omega_h::parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const Omega_h::LO i) { if (nSupports[i] < min_req_support) { - Real factor = Real(min_req_support) / Real(nSupports[i]); + Omega_h::Real factor = + Omega_h::Real(min_req_support) / Omega_h::Real(nSupports[i]); factor = (factor > 1.1 || nSupports[i] == 0) ? 1.1 : factor; radii2[i] = radii2[i] * factor; } @@ -438,10 +451,10 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); } - supports_ptr = Write(nvertices_target + 1, 0, - "number of support source vertices in CSR format"); + supports_ptr = Omega_h::Write( + nvertices_target + 1, 0, "number of support source vertices in CSR format"); - LO total_supports = 0; + Omega_h::LO total_supports = 0; Kokkos::parallel_scan( nvertices_target, @@ -455,29 +468,30 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, Kokkos::fence(); - supports_idx = Write(total_supports, 0, - "index of source supports of each target node"); + supports_idx = Omega_h::Write( + total_supports, 0, "index of source supports of each target node"); search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, false); - target_mesh.add_tag(VERT, "radii2", 1, radii2); + target_mesh.add_tag(Omega_h::VERT, "radii2", 1, radii2); return SupportResults{read(supports_ptr), read(supports_idx), radii2}; } -inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, - LO min_support = 12, +inline SupportResults searchNeighbors(Omega_h::Mesh& mesh, + Omega_h::Real cutoffDistance, + Omega_h::LO min_support = 12, bool adapt_radius = true) { - Write supports_ptr; - Write supports_idx; + Omega_h::Write supports_ptr; + Omega_h::Write supports_idx; FindSupports search(mesh); - LO nvertices_target = mesh.nverts(); - Write nSupports(nvertices_target, 0, - "number of supports in each target vertex"); + Omega_h::LO nvertices_target = mesh.nverts(); + Omega_h::Write nSupports( + nvertices_target, 0, "number of supports in each target vertex"); printf("INFO: Inside searchNeighbors 1\n"); - Write radii2 = Write(nvertices_target, cutoffDistance, - "squared radii of the supports"); + Omega_h::Write radii2 = Omega_h::Write( + nvertices_target, cutoffDistance, "squared radii of the supports"); printf("INFO: Cutoff distance: %f\n", cutoffDistance); if (!adapt_radius) { @@ -488,29 +502,29 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, printf("INFO: Adaptive radius search... \n"); int r_adjust_loop = 0; while (true) { // until the number of minimum support is met - Real max_radius = 0.0; + Omega_h::Real max_radius = 0.0; Kokkos::parallel_reduce( "find max radius", nvertices_target, - OMEGA_H_LAMBDA(const LO i, Real& local_max) { + OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::Real& local_max) { local_max = (radii2[i] > local_max) ? radii2[i] : local_max; }, - Kokkos::Max(max_radius)); + Kokkos::Max(max_radius)); printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); - nSupports = Write(nvertices_target, 0, - "number of supports in each target vertex"); + nSupports = Omega_h::Write( + nvertices_target, 0, "number of supports in each target vertex"); SupportResults support; // create support every time to avoid complexity search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, supports_idx, radii2, true); Kokkos::fence(); - LO min_nSupports = 0; + Omega_h::LO min_nSupports = 0; Kokkos::parallel_reduce( "find min number of supports", nvertices_target, - OMEGA_H_LAMBDA(const LO i, LO& local_min) { + OMEGA_H_LAMBDA(const Omega_h::LO i, Omega_h::LO& local_min) { local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; }, - Kokkos::Min(min_nSupports)); + Kokkos::Min(min_nSupports)); printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, r_adjust_loop, max_radius); @@ -521,10 +535,11 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, } Kokkos::fence(); - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { + Omega_h::parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const Omega_h::LO i) { if (nSupports[i] < min_support) { - Real factor = Real(min_support) / Real(nSupports[i]); + Omega_h::Real factor = + Omega_h::Real(min_support) / Omega_h::Real(nSupports[i]); factor = (nSupports[i] == 0 || factor > 1.5) ? 1.5 : factor; radii2[i] *= factor; } @@ -535,10 +550,10 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, } // adaptive radius search // offset array for the supports of each target vertex - supports_ptr = Write(nvertices_target + 1, 0, - "number of support source vertices in CSR format"); + supports_ptr = Omega_h::Write( + nvertices_target + 1, 0, "number of support source vertices in CSR format"); - LO total_supports = 0; + Omega_h::LO total_supports = 0; Kokkos::parallel_scan( nvertices_target, OMEGA_H_LAMBDA(int j, int& update, bool final) { @@ -552,14 +567,14 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, printf("INFO: Inside searchNeighbors 3\n"); Kokkos::fence(); - supports_idx = Write(total_supports, 0, - "index of source supports of each target node"); + supports_idx = Omega_h::Write( + total_supports, 0, "index of source supports of each target node"); printf("INFO: Total_supports: %d\n", total_supports); search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, supports_idx, radii2, false); - mesh.add_tag(VERT, "support_radius", 1, radii2); + mesh.add_tag(Omega_h::VERT, "support_radius", 1, radii2); return SupportResults{read(supports_ptr), read(supports_idx), radii2}; } diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 113986a1..00140f8e 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -1,5 +1,6 @@ +#include +#include #include -#include "mls_interpolation.hpp" namespace pcms { @@ -7,27 +8,30 @@ namespace pcms // RBF_GAUSSIAN Functor struct RBF_GAUSSIAN { + // 'a' is a spreading factor/decay factor + // the value of 'a' is higher if the data is localized + // the value of 'a' is smaller if the data is farther + + double a; + + RBF_GAUSSIAN(double a_val) : a(a_val) {} + OMEGA_H_INLINE double operator()(double r_sq, double rho_sq) const { double phi; - OMEGA_H_CHECK_PRINTF(rho_sq > 0, + OMEGA_H_CHECK_PRINTF(rho_sq >= 0, "ERROR: square of cutoff distance should always be " "positive but the value is %.16f\n", rho_sq); - OMEGA_H_CHECK_PRINTF(r_sq > 0, + OMEGA_H_CHECK_PRINTF(r_sq >= 0, "ERROR: square of distance should always be positive " "but the value is %.16f\n", r_sq); - // 'a' is a spreading factor/decay factor - // the value of 'a' is higher if the data is localized - // the value of 'a' is smaller if the data is farther - int a = 20; - - double r = sqrt(r_sq); - double rho = sqrt(rho_sq); + double r = Kokkos::sqrt(r_sq); + double rho = Kokkos::sqrt(rho_sq); double ratio = r / rho; double limit = 1 - ratio; @@ -35,7 +39,7 @@ struct RBF_GAUSSIAN phi = 0; } else { - phi = exp(-a * a * r * r); + phi = Kokkos::exp(-a * a * r * r); } return phi; @@ -45,15 +49,16 @@ struct RBF_GAUSSIAN // RBF_C4 Functor struct RBF_C4 { + OMEGA_H_INLINE double operator()(double r_sq, double rho_sq) const { double phi; - double r = sqrt(r_sq); + double r = Kokkos::sqrt(r_sq); OMEGA_H_CHECK_PRINTF( rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n", rho_sq); - double rho = sqrt(rho_sq); + double rho = Kokkos::sqrt(rho_sq); double ratio = r / rho; double limit = 1 - ratio; if (limit < 0) { @@ -76,15 +81,16 @@ struct RBF_C4 // struct RBF_CONST { + OMEGA_H_INLINE double operator()(double r_sq, double rho_sq) const { double phi; - double r = sqrt(r_sq); + double r = Kokkos::sqrt(r_sq); OMEGA_H_CHECK_PRINTF( rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n", rho_sq); - double rho = sqrt(rho_sq); + double rho = Kokkos::sqrt(rho_sq); double ratio = r / rho; double limit = 1 - ratio; if (limit < 0) { @@ -101,34 +107,46 @@ struct RBF_CONST } }; -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, RadialBasisFunction bf) +struct NoOp +{ + OMEGA_H_INLINE + double operator()(double, double) const { return 1.0; } +}; + +Omega_h::Write mls_interpolation( + const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates, + const Omega_h::Reals target_coordinates, const SupportResults& support, + const Omega_h::LO& dim, const Omega_h::LO& degree, RadialBasisFunction bf, + double lambda, double tol, double decay_factor) { const auto nvertices_target = target_coordinates.size() / dim; - Write interpolated_values(nvertices_target, 0, - "approximated target values"); + Omega_h::Write interpolated_values( + nvertices_target, 0, "approximated target values"); switch (bf) { case RadialBasisFunction::RBF_GAUSSIAN: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_GAUSSIAN{}); + degree, RBF_GAUSSIAN{decay_factor}, lambda, tol); break; case RadialBasisFunction::RBF_C4: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_C4{}); + degree, RBF_C4{}, lambda, tol); break; case RadialBasisFunction::RBF_CONST: interpolated_values = detail::mls_interpolation( source_values, source_coordinates, target_coordinates, support, dim, - degree, RBF_CONST{}); + degree, RBF_CONST{}, lambda, tol); + break; + + case RadialBasisFunction::NO_OP: + interpolated_values = detail::mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, NoOp{}, lambda, tol); break; } @@ -174,7 +192,8 @@ int calculate_basis_vector_size(const IntHostMatView& array) } int calculate_scratch_shared_size(const SupportResults& support, - const int nvertices_target, int basis_size) + const int nvertices_target, int basis_size, + int dim) { IntDeviceVecView shmem_each_team("stores the size required for each team", @@ -186,21 +205,34 @@ int calculate_scratch_shared_size(const SupportResults& support, int end_ptr = support.supports_ptr[i + 1]; int nsupports = end_ptr - start_ptr; + int max_size; + int min_size; + if (nsupports > basis_size) { + max_size = nsupports; + min_size = basis_size; + } else { + max_size = basis_size; + min_size = nsupports; + } size_t total_shared_size = 0; - - total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size); - total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports); - total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); - total_shared_size += ScratchVecView::shmem_size(basis_size); - total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; - total_shared_size += ScratchMatView::shmem_size(nsupports, 2); - total_shared_size += ScratchMatView::shmem_size(nsupports, 1); + total_shared_size += + ScratchMatView::shmem_size(basis_size, basis_size); // Vt + total_shared_size += + ScratchMatView::shmem_size(basis_size, nsupports) * 2; // temp_matrix + total_shared_size += + ScratchMatView::shmem_size(nsupports, basis_size); // vandermonde matrix + total_shared_size += + ScratchVecView::shmem_size(basis_size) * + 3; // sigma, target_basis_vector, solution_coefficients + total_shared_size += ScratchVecView::shmem_size(nsupports) * + 3; // work, phi_vector, support_values + total_shared_size += + ScratchMatView::shmem_size(nsupports, dim); // local_source_points + total_shared_size += + ScratchMatView::shmem_size(nsupports, nsupports) * 2; // U, Ut shmem_each_team(i) = total_shared_size; }); - // namespace KE = Kokkos::Experimental; - // auto shared_size = KE::max_element(Kokkos::DefaultExecutionSpace(), - // shmem_each_team); printf("shared size = %d", shared_size); int shared_size = 0; Kokkos::parallel_reduce( "find_max", nvertices_target, @@ -213,5 +245,6 @@ int calculate_scratch_shared_size(const SupportResults& support, return shared_size; } + } // namespace detail } // namespace pcms diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp index 17b085aa..dfb387b1 100644 --- a/src/pcms/interpolator/mls_interpolation.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -3,22 +3,84 @@ #include "mls_interpolation_impl.hpp" -using namespace Omega_h; - namespace pcms { + +/** + * @brief Enumeration of supported radial basis functions (RBF) for MLS + *interpolation. + * + * This enum specifies the type of radial basis function used to weight source + *points in the Moving Least Squares (MLS) interpolation process. + * + * Members: + * - RBF_GAUSSIAN: + * A Gaussian RBF: `exp(- a^2 * r^2)`, smooth and commonly used. Good for + *localized support. `a` is a spreading/decay factor + * + * - RBF_C4: + * A compactly supported C4-continuous function. Useful for bounded support + *and efficiency. + * + * - RBF_CONST: + * Constant basis function. Effectively uses uniform weights. + * + * - NO_OP: + * No operation. Disables RBF weighting — typically used when weights + * are externally defined or not needed. + * + * @note These are intended to be passed into function `mls_interpolation` to + *control weighting behavior. + */ enum class RadialBasisFunction : LO { RBF_GAUSSIAN = 0, RBF_C4, - RBF_CONST + RBF_CONST, + NO_OP }; -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, RadialBasisFunction bf); +/** + * @brief Performs Moving Least Squares (MLS) interpolation at target points. + * + * This function computes interpolated values at a set of target coordinates + * using Moving Least Squares (MLS) based on the provided source values and + * coordinates. It supports different radial basis functions (RBFs), polynomial + * degrees, dimension, optional regularization and tolerance + * + * @param source_values A flat array of source data values. Length should + * be `num_sources`. + * @param source_coordinates A flat array of source point coordinates. Length + * should be `num_sources * dim`. + * @param target_coordinates A flat array of target point coordinates. Length + * should be `num_targets * dim`. + * @param support A data structure holding neighbor information for + * each target (in + * CSR format). + * @param dim Dimension + * @param degree Polynomial degree + * @param bf The radial basis function used for weighting + * (e.g., Gaussian, C4). + * @param lambda Optional regularization parameter (default is + * 0.0). Helps with stability in ill-conditioned systems. + * @param tol Optional solver tolerance (default is 1e-6). + * Small singular values below this are discarded. + * + * @return A Write array containing the interpolated values at each target + * point. + * + * @note + * - All input arrays are expected to reside in device memory (e.g., Kokkos + * device views). + * - Ensure consistency in dimensions: coordinate arrays must be sized as + * `num_points * dim`. + * - The result array length will match the number of target points. + */ +Omega_h::Write mls_interpolation( + const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates, + const Omega_h::Reals target_coordinates, const SupportResults& support, + const Omega_h::LO& dim, const Omega_h::LO& degree, RadialBasisFunction bf, + double lambda = 0, double tol = 1e-6, double decay_factor = 5.0); } // namespace pcms #endif diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 5a79f64a..ad91047b 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -9,17 +9,23 @@ #include #include #include -#include #include #include #include +#include +#include +#include +#include +#include +#include + +#include //KokkosBlas::gemv +#include +#include +#include +#include -#include "pcms_interpolator_aliases.hpp" -#include "adj_search.hpp" -#include "../assert.h" -#include "../profile.h" - -static constexpr int MAX_DIM = 3; +static constexpr int MAX_DIM = 6; /** * calculate_basis_slice_lengths, calculate_basis_vector_size and @@ -84,7 +90,20 @@ int calculate_basis_vector_size(const IntHostMatView& array); * @return The scratch size */ int calculate_scratch_shared_size(const SupportResults& support, - const int nvertices_target, int basis_size); + const int ntargets, int basis_size, int dim); +/** + * + * @brief Performs minmax normalization + * + * This function takes coordinates and dimension + * and outputs the normalized coordinates + * + * @param coordinates The Coordinates + * @param dim The Dimension + * + * @return normalized coordinates The normlaised coordinates + */ +Omega_h::Reals min_max_normalization(Omega_h::Reals& coordinates, int dim); /** * @brief Evaluates the polynomial basis @@ -96,9 +115,10 @@ int calculate_scratch_shared_size(const SupportResults& support, * @param[in] p A reference to the coordinate struct * @param[in,out] basis_vector The polynomial basis vector * + * */ KOKKOS_INLINE_FUNCTION -void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, +void eval_basis_vector(const IntDeviceMatView& slice_length, const double* p, ScratchVecView& basis_vector) { basis_vector(0) = 1; @@ -109,11 +129,9 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, int curr_col = 1; double point[MAX_DIM]; - point[0] = p.x; - point[1] = p.y; - if (dim == 3) { - point[2] = p.z; + for (int i = 0; i < dim; ++i) { + point[i] = p[i]; } for (int i = 0; i < degree; ++i) { @@ -131,6 +149,39 @@ void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, } } +/** + * @Brief Normalizes the support coordinates about the given target point + * + * This function takes the pivot point Coord object + * and support coordinates as input and gives normalized coordinates + * + * @param[in] team The team member + * @param[in, out] pivot The Coord object that stores the coordinate of the + *target pivot + * @param[in,out] support_coordinates The orginal coordinates when in, when out + * normalized coordinates + ** + */ +KOKKOS_INLINE_FUNCTION +void normalize_supports(const member_type& team, double* target_point, + ScratchMatView& support_coordinates) +{ + int nsupports = support_coordinates.extent(0); + int dim = support_coordinates.extent(1); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), [=](int i) { + Real pivot_point[MAX_DIM]; + + for (int j = 0; j < dim; ++j) { + support_coordinates(i, j) -= target_point[j]; + } + }); + + for (int j = 0; j < dim; ++j) { + target_point[j] = 0.0; + } +} + /** * @brief Creates a vandermonde matrix * @@ -148,12 +199,11 @@ void create_vandermonde_matrix(const ScratchMatView& local_source_points, int j, int N = local_source_points.extent(0); int dim = local_source_points.extent(1); - Coord source_point; - source_point.x = local_source_points(j, 0); - source_point.y = local_source_points(j, 1); - if (dim == 3) { - source_point.z = local_source_points(j, 2); + double source_point[MAX_DIM] = {}; + for (int i = 0; i < dim; ++i) { + source_point[i] = local_source_points(j, i); } + ScratchVecView basis_vector_supports = Kokkos::subview(vandermonde_matrix, j, Kokkos::ALL()); eval_basis_vector(slice_length, source_point, basis_vector_supports); @@ -179,13 +229,18 @@ template , bool> = true> KOKKOS_INLINE_FUNCTION void compute_phi_vector( - const Coord& target_point, const ScratchMatView& local_source_points, int j, + const double* target_point, const ScratchMatView& local_source_points, int j, double cuttoff_dis_sq, Func rbf_func, ScratchVecView phi) { int N = local_source_points.extent(0); - double dx = target_point.x - local_source_points(j, 0); - double dy = target_point.y - local_source_points(j, 1); - double ds_sq = dx * dx + dy * dy; + int dim = local_source_points.extent(1); + + double ds_sq = 0; + + for (int i = 0; i < dim; ++i) { + double temp = target_point[i] - local_source_points(j, i); + ds_sq += temp * temp; + } phi(j) = rbf_func(ds_sq, cuttoff_dis_sq); OMEGA_H_CHECK_PRINTF(!std::isnan(phi(j)), "ERROR: Phi(j) in compute_phi_vector is NaN for j = %d " @@ -232,169 +287,110 @@ void scale_column_trans_matrix(const ScratchMatView& matrix, } /** - * @struct ResultConvertNormal - * @brief Stores the results of matrix and vector transformations. - * - * This struct represents the results of the following operations: - * - `P^T Q` (scaled matrix): The matrix obtained after scaling the columns of - * the given matrix. - * - `P^T Q P` (square matrix): The matrix obtained after performing the `P^T Q - * P` operation. - * - `P^T b` (transformed RHS): The vector obtained after applying the `P^T b` - * operation. - * - * The struct is used to store: - * - The scaled matrix (`scaled_matrix`). - * - The square matrix (`square_matrix`). - * - The transformed right-hand side vector (`transformed_rhs`). - */ -struct ResultConvertNormal -{ - ScratchMatView scaled_matrix; - ScratchMatView square_matrix; - ScratchVecView transformed_rhs; -}; - -/** - * @brief Converts a normal equation into a simplified form. + * @brief Solves a regularized weighted linear system WAx = Wb using SVD + * decomposition within a Kokkos parallel team. + * + * This function solves a (possibly overdetermined) linear system using Singular + * Value Decomposition (SVD), The regularization term (lambda) helps stabilize + * the solution especially in ill-conditioned cases. + * + * + * @param team The Kokkos team member executing this function. * - * This function takes a matrix, a basis/weight vector, and a right-hand side - * (RHS) vector and computes the scaled matrix, square matrix, and transformed - * RHS. + * @param weight A scratch-space vector of RBF weights (diagonal elements of + * weight matrix W). size is m. * - * For example, the normal equation `P^T Q P x = P^T Q b` is converted into the - * form `Ax = b'`, where: - * - `A` (square matrix) = `P^T Q P` - * - `b'` (transformed RHS) = `P^T Q b` + * @param rhs_values A scratch-space vector representing the right-hand side + * vector b (m). It may be modified during the computation. * - * @param matrix The input matrix. - * @param weight_vector The weight or basis vector. - * @param rhs The right-hand side vector of the equation. - * @param team The team member executing the task (optional, if applicable). + * @param matrix A scratch-space matrix representing the system matrix A (m x + * n). * - * @return The result of the normal equation conversion as a struct containing: - * - The scaled matrix. - * - The square matrix. - * - The transformed RHS. + * @param solution_vector The output vector x (n). It will contain the computed + * solution after the function completes. * - * @todo Implement QR decomposition to solve the linear system. + * @param lambda The regularization coefficient. Set to 0.0 for pure SVD; use a + * positive value for Tikhonov regularization. + * + * @param tol The tolerance threshold used to discard small singular values in + * the decomposition + * + * + * @note This function does not return a value; the solution is written directly + * to `solution_vector`. */ KOKKOS_INLINE_FUNCTION -ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, - const ScratchVecView& weight_vector, - const ScratchVecView& rhs, - member_type team) +void solve_matrix_svd(member_type team, const ScratchVecView& weight, + ScratchVecView rhs_values, ScratchMatView matrix, + ScratchVecView solution_vector, double lambda, double tol) { - int m = matrix.extent(0); - int n = matrix.extent(1); + int row = matrix.extent(0); - ScratchMatView scaled_matrix(team.team_scratch(0), n, m); + int column = matrix.extent(1); - ScratchMatView square_matrix(team.team_scratch(0), n, n); + int weight_size = weight.size(); - ScratchVecView transformed_rhs(team.team_scratch(0), n); + OMEGA_H_CHECK_PRINTF( + weight_size == row, + "the size of the weight vector should be equal to the row of the matrix\n" + "weight vector size = %d, row of matrix = %d\n", + weight_size, row); - // performing P^T Q - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) { - for (int k = 0; k < m; ++k) { - scaled_matrix(j, k) = 0; - } - }); + eval_row_scaling(team, weight, matrix); team.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, m), [=](int j) { - scale_column_trans_matrix(matrix, weight_vector, team, j, scaled_matrix); - }); - + eval_rhs_scaling(team, weight, rhs_values); team.team_barrier(); - // performing (P^T Q P) - KokkosBatched::TeamGemm< - member_type, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, scaled_matrix, - matrix, 0.0, square_matrix); - - team.team_barrier(); + // initilize U (orthogonal matrices) array + ScratchMatView U(team.team_scratch(1), row, row); + fill(-5.0, team, U); - KokkosBlas::Experimental:: - Gemv::invoke( - team, 'N', 1.0, scaled_matrix, rhs, 0.0, transformed_rhs); + // initialize Vt + ScratchMatView Vt(team.team_scratch(1), column, column); + fill(-5.0, team, Vt); - team.team_barrier(); + // initialize sigma + ScratchVecView sigma(team.team_scratch(1), column); + fill(-5.0, team, sigma); - return ResultConvertNormal{scaled_matrix, square_matrix, transformed_rhs}; -} + // initialize work + ScratchVecView work(team.team_scratch(1), row); + fill(-5.0, team, work); -/** - * @brief Solves the matrix equation Ax = b' using LU decomposition. - * - * This function solves the given matrix equation using LU decomposition. - * The solution vector `x'` overwrites the input `rhs` vector. - * - * @param square_matrix The input matrix A (must be square). - * @param rhs The right-hand side vector b. After execution, it is overwritten - * with the solution vector x. - * @param team The team member executing the task. - * - * @return None. The solution is directly written to the `rhs` vector. - */ -KOKKOS_INLINE_FUNCTION -void solve_matrix(const ScratchMatView& square_matrix, - const ScratchVecView& rhs, member_type team) -{ + // initialise sigma_mat_inv S^-1 + ScratchMatView temp_matrix(team.team_scratch(1), column, row); + fill(0, team, temp_matrix); - // Perform LU decomposition - KokkosBatched::TeamLU< - member_type, KokkosBatched::Algo::LU::Unblocked>::invoke(team, - square_matrix); + // initialise V S^-1 U^T + ScratchMatView vsigmaInvUtMul(team.team_scratch(1), column, row); + fill(0, team, vsigmaInvUtMul); + if (team.team_rank() == 0) { + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), matrix, U, + sigma, Vt, work, tol); + } team.team_barrier(); - // Solve the equation with forward and backward solves - KokkosBatched::TeamSolveLU< - member_type, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs); -} + calculate_shrinkage_factor(team, lambda, sigma); -/** - * @brief Fills the kokkos scratch view - * - * @param value The value to be populated - * @param team The team member - * @param matrix The scratch matrix - * - */ -KOKKOS_INLINE_FUNCTION -void fill(double value, member_type team, ScratchMatView matrix) -{ + auto Ut = find_transpose(team, U); - int row = matrix.extent(0); - int col = matrix.extent(1); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) { - for (int k = 0; k < col; ++k) { - matrix(j, k) = value; - } - }); -} + scale_and_adjust(team, sigma, Ut, temp_matrix); // S^-1 U^T -/** - * @brief Fills the kokkos scratch view - * - * @param value The value to be populated - * @param team The team member - * @param vector The scratch vector - * - */ -KOKKOS_INLINE_FUNCTION -void fill(double value, member_type team, ScratchVecView vector) -{ - - int size = vector.extent(0); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), - [=](int j) { vector(j) = value; }); + KokkosBatched::TeamGemm< + member_type, KokkosBatched::Trans::Transpose, + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, Vt, temp_matrix, + 0.0, vsigmaInvUtMul); + + KokkosBlas::TeamGemv< + member_type, KokkosBlas::Trans::NoTranspose, + KokkosBlas::Algo::Gemv::Unblocked>::invoke(team, 1.0, vsigmaInvUtMul, + rhs_values, 0.0, + solution_vector); } /** @@ -405,7 +401,7 @@ void fill(double value, member_type team, ScratchVecView vector) * points of source field * @param target_coordinates Scalar array view of the coordinates of control * points of target field - * @param support The object that enpasulates support info + * @param support The object that encapsulates support info * @param dim The dimension of the simulations * @param degree The degree of the interpolation order * @param rbf_func The radial basis function choice @@ -417,9 +413,10 @@ template void mls_interpolation(RealConstDefaultScalarArrayView source_values, RealConstDefaultScalarArrayView source_coordinates, RealConstDefaultScalarArrayView target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func, - RealDefaultScalarArrayView approx_target_values) + const SupportResults& support, const Omega_h::LO& dim, + const Omega_h::LO& degree, Func rbf_func, + RealDefaultScalarArrayView approx_target_values, + double lambda, double tol) { PCMS_FUNCTION_TIMER; static_assert(std::is_invocable_r_v, @@ -427,8 +424,24 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, static_assert(!std::is_pointer_v, "function pointer will fail in GPU execution context"); - const auto nvertices_source = source_coordinates.size() / dim; - const auto nvertices_target = target_coordinates.size() / dim; + int nsources = source_coordinates.size() / dim; + + OMEGA_H_CHECK_PRINTF( + source_values.size() == nsources, + "[ERROR] The size of the source values and source coordinates is not " + "same. " + "The current sizes are :\n" + "source_values_size = %d, source_coordinates_size = %d\n", + source_values.size(), nsources); + + const auto ntargets = target_coordinates.size() / dim; + + OMEGA_H_CHECK_PRINTF(approx_target_values.size() == ntargets, + "[ERROR] The size of the approx target values and the " + "number of targets is " + "not same. The current numbers are :\n" + "approx_target_values = %d, ntargets = %d\n", + approx_target_values.size(), ntargets); IntHostMatView host_slice_length( "stores slice length of polynomial basis in host", degree, dim); @@ -447,86 +460,87 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, Kokkos::deep_copy(slice_length, slice_length_hd); int shared_size = - calculate_scratch_shared_size(support, nvertices_target, basis_size); - - team_policy tp(nvertices_target, Kokkos::AUTO); + calculate_scratch_shared_size(support, ntargets, basis_size, dim); - int scratch_size = tp.scratch_size_max(0); + team_policy tp(ntargets, Kokkos::AUTO); + int scratch_size = tp.scratch_size_max(1); + printf("Scratch Size = %d\n", scratch_size); + printf("Shared Size = %d\n", shared_size); PCMS_ALWAYS_ASSERT(scratch_size > shared_size); // calculates the interpolated values Kokkos::parallel_for( - "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), + "MLS coefficients", tp.set_scratch_size(1, Kokkos::PerTeam(scratch_size)), KOKKOS_LAMBDA(const member_type& team) { int league_rank = team.league_rank(); int start_ptr = support.supports_ptr[league_rank]; int end_ptr = support.supports_ptr[league_rank + 1]; int nsupports = end_ptr - start_ptr; - ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); - int count = -1; - for (int j = start_ptr; j < end_ptr; ++j) { - count++; - auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - if (dim == 3) { - local_source_points(count, 2) = source_coordinates[index * dim + 2]; - } - } - - // vondermonde matrix P from the vectors of basis vector of supports - ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - basis_size); + // local_source_point stores the coordinates of source supports of a + // given target + ScratchMatView local_source_points(team.team_scratch(1), nsupports, dim); // rbf function values of source supports Phi(n,n) - ScratchVecView phi_vector(team.team_scratch(0), nsupports); + ScratchVecView phi_vector(team.team_scratch(1), nsupports); + // vondermonde matrix P from the vectors of basis vector of supports + ScratchMatView vandermonde_matrix(team.team_scratch(1), nsupports, + basis_size); // stores known vector (b) - ScratchVecView support_values(team.team_scratch(0), nsupports); + ScratchVecView support_values(team.team_scratch(1), nsupports); // basis of target - ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); + ScratchVecView target_basis_vector(team.team_scratch(1), basis_size); + + // solution coefficients (solution vector) + ScratchVecView solution_coefficients(team.team_scratch(1), basis_size); // Initialize the scratch matrices and vectors + fill(0.0, team, local_source_points); fill(0.0, team, vandermonde_matrix); fill(0.0, team, phi_vector); fill(0.0, team, support_values); fill(0.0, team, target_basis_vector); + fill(0.0, team, solution_coefficients); - // evaluates the basis vector of a given target point - Coord target_point; - target_point.x = target_coordinates[league_rank * dim]; - target_point.y = target_coordinates[league_rank * dim + 1]; + // Logger logger(15); - if (dim == 3) { - target_point.z = target_coordinates[league_rank * dim + 2]; - } - eval_basis_vector(slice_length, target_point, target_basis_vector); - - /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is - * created with the basis vector of source supports stacking on top of - * each other + /** + * + * the local_source_points is of the type ScratchMatView with + * coordinates information; row is number of local supports + * & column is dim */ - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - create_vandermonde_matrix(local_source_points, j, slice_length, - vandermonde_matrix); - }); + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; - team.team_barrier(); + for (int i = 0; i < dim; ++i) { + local_source_points(count, i) = source_coordinates[index * dim + i]; + } + } - OMEGA_H_CHECK_PRINTF( - support.radii2[league_rank] > 0, - "ERROR: radius2 has to be positive but found to be %.16f\n", - support.radii2[league_rank]); + // logger.logMatrix(team, LogLevel::DEBUG, local_source_points, + // "Support Coordinates"); + double target_point[MAX_DIM] = {}; + + for (int i = 0; i < dim; ++i) { + target_point[i] = target_coordinates[league_rank * dim + i]; + } + + // logger.logArray(team, LogLevel::DEBUG, target_point, dim, + // "Target points"); /** phi(nsupports) is the array of rbf functions evaluated at the - * source supports In the actual implementation, Phi(nsupports, nsupports) - * is the diagonal matrix & each diagonal element is the phi evaluated at - * each source points + * source supports In the actual implementation, Phi(nsupports, + * nsupports) is the diagonal matrix & each diagonal element is the phi + * evaluated at each source points + * + * step 1: evaluate the phi vector with the original dimension */ Kokkos::parallel_for( @@ -537,8 +551,11 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); - /** support_values(nsupports) (or known rhs vector b) is the vector of the - * quantity that we want interpolate + /** support_values(nsupports) (or known rhs vector b) is the vector of + * the quantity that we want interpolate + * + * + * step 2: find local supports function values */ Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { @@ -550,19 +567,63 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); - auto result = convert_normal_equation(vandermonde_matrix, phi_vector, - support_values, team); + // logger.log(team, LogLevel::DEBUG, "The search starts"); + // logger.logVector(team, LogLevel::DEBUG, support_values, "Support + // values"); + + /** + * step 3: normalize local source supports and target point + */ + + normalize_supports(team, target_point, local_source_points); + team.team_barrier(); + /** + * + * evaluates the basis vector of a given target point Coord target_point; + * this can evaluate monomial basis vector for any degree of polynomial + * + * step 4: call basis vector evaluation function (eval_basis_vector); + */ + eval_basis_vector(slice_length, target_point, target_basis_vector); + + /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is + * created with the basis vector of source supports stacking on top of + * each other + * + * step 5: create vandermonde matrix + */ + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + create_vandermonde_matrix(local_source_points, j, slice_length, + vandermonde_matrix); + }); team.team_barrier(); - // It stores the solution in rhs vector itself - solve_matrix(result.square_matrix, result.transformed_rhs, team); + // logger.logMatrix(team, LogLevel::DEBUG, vandermonde_matrix, + // "vandermonde matrix"); + + OMEGA_H_CHECK_PRINTF( + support.radii2[league_rank] > 0, + "ERROR: radius2 has to be positive but found to be %.16f\n", + support.radii2[league_rank]); + + /** + * step 6: solve overdetermined system using SVD + * + */ + solve_matrix_svd(team, phi_vector, support_values, vandermonde_matrix, + solution_coefficients, lambda, tol); team.team_barrier(); double target_value = KokkosBlas::Experimental::dot( - team, result.transformed_rhs, target_basis_vector); + team, solution_coefficients, target_basis_vector); + // printf("Target Point : %d \t\t Value: %5.6f\n", league_rank, + // target_value); + // logger.logScalar(team, LogLevel::DEBUG, target_value, + // "interpolated value"); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", league_rank); @@ -576,28 +637,28 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, * Maps the data from source mesh to target mesh * * @param source_values Read array of Source field values - * @param source_coordinates Read array of the coordinates of control points of - * source field - * @param target_coordinates Read array of the coordinates of control points of - * target field + * @param source_coordinates Read array of the coordinates of control points + * of source field + * @param target_coordinates Read array of the coordinates of control points + * of target field * @param support The object that enpasulates support info * @param dim The dimension of the simulations * @param degree The degree of the interpolation order * @param rbf_func The radial basis function choice - * @return Write array of the interpolated field in target mesh + * @return Omega_h::Write array of the interpolated field in target mesh * * */ template -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func) +Omega_h::Write mls_interpolation( + const Omega_h::Reals source_values, const Omega_h::Reals source_coordinates, + const Omega_h::Reals target_coordinates, const SupportResults& support, + const Omega_h::LO& dim, const Omega_h::LO& degree, Func rbf_func, + double lambda, double tol) { - const auto nvertices_source = source_coordinates.size() / dim; + const auto nsources = source_coordinates.size() / dim; - const auto nvertices_target = target_coordinates.size() / dim; + const auto ntargets = target_coordinates.size() / dim; RealConstDefaultScalarArrayView source_values_array_view( source_values.data(), source_values.size()); @@ -611,19 +672,20 @@ Write mls_interpolation(const Reals source_values, RealDefaultScalarArrayView radii2_array_view(support.radii2.data(), support.radii2.size()); - Write interpolated_values(nvertices_target, 0, - "approximated target values"); + Omega_h::Write interpolated_values( + ntargets, 0, "approximated target values"); RealDefaultScalarArrayView interpolated_values_array_view( interpolated_values.data(), interpolated_values.size()); mls_interpolation(source_values_array_view, source_coordinates_array_view, target_coordinates_array_view, support, dim, degree, - rbf_func, interpolated_values_array_view); + rbf_func, interpolated_values_array_view, lambda, tol); return interpolated_values; } } // namespace detail } // namespace pcms + #endif diff --git a/src/pcms/interpolator/pcms_interpolator_aliases.hpp b/src/pcms/interpolator/pcms_interpolator_aliases.hpp index 06d83640..fd482371 100644 --- a/src/pcms/interpolator/pcms_interpolator_aliases.hpp +++ b/src/pcms/interpolator/pcms_interpolator_aliases.hpp @@ -2,7 +2,7 @@ #define PCMS_INTERPOLATOR_ALIASES_HPP #include -#include "../arrays.h" +#include "pcms/arrays.h" namespace pcms { @@ -17,6 +17,7 @@ using team_policy = typename Kokkos::TeamPolicy<>; using member_type = typename Kokkos::TeamPolicy<>::member_type; // alias for scratch view + using ScratchSpace = typename Kokkos::DefaultExecutionSpace::scratch_memory_space; diff --git a/src/pcms/interpolator/pcms_interpolator_logger.hpp b/src/pcms/interpolator/pcms_interpolator_logger.hpp new file mode 100644 index 00000000..60c7310f --- /dev/null +++ b/src/pcms/interpolator/pcms_interpolator_logger.hpp @@ -0,0 +1,147 @@ +#ifndef PCMS_INTERPOLATOR_LOGGER_HPP +#define PCMS_INTERPOLATOR_LOGGER_HPP + +#include +#include + +namespace pcms +{ +enum class LogLevel +{ + INFO, + WARNING, + ERROR, + DEBUG +}; + +class Logger +{ +private: + size_t selected_league_rank_; + +public: + KOKKOS_INLINE_FUNCTION + explicit Logger(size_t selected_league_rank) + : selected_league_rank_(selected_league_rank) + { + } + + // log level info + template + KOKKOS_INLINE_FUNCTION void log(const member_type& team, const LogLevel level, + const char* fmt, Args... args) + { + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + Kokkos::printf("[%s] (League %d) ", logLevelToString(level), + team.league_rank()); + Kokkos::printf(fmt, args...); + Kokkos::printf("\n"); + }); + } + } + + KOKKOS_INLINE_FUNCTION + void logStruct(const member_type& team, const LogLevel level, const Coord& p, + const char* name) + { + + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); + Kokkos::printf("(%12.6f, %12.6f)", p.x, p.y); + Kokkos::printf("\n"); + }); + } + } + + // log array + KOKKOS_INLINE_FUNCTION + void logArray(const member_type& team, const LogLevel level, + const double* array, const int size, const char* name) + { + + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); + + for (int i = 0; i < size; ++i) { + Kokkos::printf("%12.6f\n", array[i]); + } + Kokkos::printf("\n"); + }); + } + } + // log scratch vector + KOKKOS_INLINE_FUNCTION + void logVector(const member_type& team, const LogLevel level, + const ScratchVecView& vector, const char* name) const + { + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); + for (int i = 0; i < vector.size(); ++i) { + Kokkos::printf("%12.6f\n", vector(i)); + } + Kokkos::printf("\n"); + }); + } + } + + // log scratch matrix + KOKKOS_INLINE_FUNCTION + void logMatrix(const member_type& team, const LogLevel level, + const ScratchMatView& matrix, const char* name) const + { + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); + for (int i = 0; i < matrix.extent(0); ++i) { + for (int j = 0; j < matrix.extent(1); ++j) { + Kokkos::printf("%20.8f", matrix(i, j)); + } + Kokkos::printf("\n"); + } + + Kokkos::printf("\n"); + }); + } + } + + // log scalar + KOKKOS_INLINE_FUNCTION + void logScalar(const member_type& team, const LogLevel level, + const double value, const char* name) const + { + if (team.league_rank() == selected_league_rank_) { + Kokkos::single(Kokkos::PerTeam(team), [&]() { + Kokkos::printf("[%s] (League %d) %s: \n", logLevelToString(level), + team.league_rank(), name); + Kokkos::printf("%12.6f", value); + Kokkos::printf("\n"); + }); + } + } + +private: + KOKKOS_INLINE_FUNCTION + const char* logLevelToString(LogLevel loglevel) const + { + switch (loglevel) { + case LogLevel::INFO: return "INFO"; + + case LogLevel::WARNING: return "WARNING"; + + case LogLevel::ERROR: return "ERROR"; + + case LogLevel::DEBUG: return "DEBUG"; + default: return "UNKNOWN"; + } + } +}; +} // end namespace pcms +#endif diff --git a/src/pcms/interpolator/pcms_interpolator_view_utils.hpp b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp new file mode 100644 index 00000000..1c52752f --- /dev/null +++ b/src/pcms/interpolator/pcms_interpolator_view_utils.hpp @@ -0,0 +1,273 @@ +#ifndef PCMS_INTERPOLATOR_ARRAY_OPS_HPP +#define PCMS_INTERPOLATOR_ARRAY_OPS_HPP + +#include +#include +#include +#include +#include + +namespace pcms +{ +namespace detail +{ +/** + * @brief Fills the kokkos scratch view + * + * @param value The value to be populated + * @param team The team member + * @param matrix The scratch matrix + * + */ +KOKKOS_INLINE_FUNCTION +void fill(double value, member_type team, ScratchMatView matrix) +{ + + int row = matrix.extent(0); + int col = matrix.extent(1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) { + for (int k = 0; k < col; ++k) { + matrix(j, k) = value; + } + }); +} + +/** + * @brief Fills the kokkos scratch view + * + * @param value The value to be populated + * @param team The team member + * @param vector The scratch vector + * + */ +KOKKOS_INLINE_FUNCTION +void fill(double value, member_type team, ScratchVecView vector) +{ + + int size = vector.extent(0); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { vector(j) = value; }); +} + +KOKKOS_INLINE_FUNCTION +void find_sq_root_each(member_type team, ScratchVecView& array) +{ + int size = array.size(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) { + OMEGA_H_CHECK_PRINTF( + array(i) >= 0, + "[Error:] Square root of a negative number is invalid!\n" + "value is %12.6f\n", + array(i)); + array(i) = Kokkos::sqrt(array(i)); + }); +} + +/** + * + *@brief Finds the inverse of each element in an array + * + *@param team The team member + *@param array The scratch vector view + * + */ +KOKKOS_INLINE_FUNCTION +void find_inverse_each(member_type team, ScratchVecView& array) +{ + int size = array.size(); + double eps = 1e-8; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) { + if (array(i) > eps) { + array(i) = 1 / array(i); + } else { + array(i) = 0.0; + } + }); +} + +/** + *@brief Evaluates the shrinkage factor in SVD (S/(S^2 + lambda) + * + *@param team The team member + *@param lambda The regularization parameter + *@param sigma The scratch vector view of singular values + * + */ +KOKKOS_INLINE_FUNCTION +void calculate_shrinkage_factor(member_type team, double lambda, + ScratchVecView& sigma) +{ + int size = sigma.size(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int i) { + sigma(i) /= (sigma(i) * sigma(i) + lambda); + }); +} + +/** + * + *@brief Finds the transpose of the scratch matrix + * + *@param team The team member + *@param array The scratch matrix view + * + */ +KOKKOS_INLINE_FUNCTION +ScratchMatView find_transpose(member_type team, const ScratchMatView& matrix) +{ + int row = matrix.extent(0); + int column = matrix.extent(1); + + ScratchMatView transMatrix(team.team_scratch(1), column, row); + fill(0.0, team, transMatrix); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + transMatrix(i, j) = matrix(j, i); + } + }); + return transMatrix; +} + +/** + * + *@brief Fills the diagonal entries of a matrix + * + *@param team The team member + *@param diagonal_entries The scratch vector view of the entries thats to be + *filled in a matrix diagonal + *@param array The scratch matrix whose digonal entries to be filled + * + */ +KOKKOS_INLINE_FUNCTION +void fill_diagonal(member_type team, const ScratchVecView& diagonal_entries, + ScratchMatView& matrix) +{ + int size = diagonal_entries.size(); + OMEGA_H_CHECK(size == matrix.extent(0) || size == matrix.extent(1)); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int i) { matrix(i, i) = diagonal_entries(i); }); +} + +/** + * @brief Scales each row of the input matrix by corresponding diagonal values. + * + * This function performs **row scaling** of the matrix, meaning each row `i` + * of the matrix is multiplied by the scalar value `diagonal_entries(i)`. + * + * Mathematically, if the matrix is `A` and the diagonal scaling matrix is `D_A + * = diag(diagonal_entries)`, the operation performed is: + * + * A_scaled = D_A * A + * + * where `D_A` is a diagonal matrix that scales rows of `A`. + * + * @param team Kokkos team member used for team-level parallelism + * @param diagonal_entries A vector of diagonal values used for scaling (should + * match or be smaller than the number of rows) + * @param matrix The 2D matrix whose rows will be scaled + */ +KOKKOS_INLINE_FUNCTION +void eval_row_scaling(member_type team, ScratchVecView diagonal_entries, + ScratchMatView matrix) +{ + + int row = matrix.extent(0); + int column = matrix.extent(1); + int vector_size = diagonal_entries.size(); + + OMEGA_H_CHECK_PRINTF( + vector_size <= row, + "[ERROR]: for row scaling the size of diagonal entries vector should be " + "equal or less than the row of the matrix which is to be scaled\n" + "size of vector = %d, row of a matrix = %d\n", + vector_size, row); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + matrix(i, j) *= + diagonal_entries(i); // scales each row of a matrix with a corresponding + } // value in a digonal entries + }); +} + +/** + * @brief Scales the right-hand-side (RHS) vector using the given diagonal + * weights. + * + * This function performs element-wise scaling of the RHS vector. Each entry + * `rhs_values(i)` is multiplied by the corresponding entry + * `diagonal_entries(i)`. + * + * Mathematically: + * rhs_scaled(i) = diagonal_entries(i) * rhs_values(i) + * + * @param team Kokkos team member used for team-level parallelism + * @param diagonal_entries A vector of scaling weights (same size as rhs_values) + * @param rhs_values The RHS vector to be scaled + */ +KOKKOS_INLINE_FUNCTION +void eval_rhs_scaling(member_type team, ScratchVecView diagonal_entries, + ScratchVecView rhs_values) +{ + + int weight_size = diagonal_entries.size(); + int rhs_size = rhs_values.size(); + + OMEGA_H_CHECK_PRINTF( + weight_size == rhs_size, + "[ERROR]: for row scaling the size of diagonal entries vector should be " + "equal or less than the row of the matrix which is to be scaled\n" + "size of weight vector = %d, size of a rhs vector = %d\n", + weight_size, rhs_size); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, weight_size), [=](int i) { + rhs_values(i) *= diagonal_entries( + i); // scales each element of a rhs with a corresponding weights + }); +} + +/** + * @brief Scales the input matrix row-wise and writes the result into an + * adjusted matrix. + * + * This function first scales the rows of `matrixToScale` using the + * corresponding entries from `diagonal_entries` (i.e., performs D * A, where D + * is a diagonal matrix), and then copies the scaled values into + * `adjustedMatrix`. + * + * Preconditions: + * - The number of columns in `adjustedMatrix` must match the number of rows in + * `matrixToScale` (colB == rowA) + * - `diagonal_entries` must have size ≤ rowA (validated inside + * `eval_row_scaling`) + * + * Typical use case: used in evaluating SV in U^TSV for SVD solver + * + * @param team Kokkos team member used for team-level parallelism + * @param diagonal_entries A vector of scaling values for each row + * @param matrixToScale The matrix to be row-scaled (modified in place) + * @param adjustedMatrix The output matrix to store the scaled version + */ +KOKKOS_INLINE_FUNCTION +void scale_and_adjust(member_type team, ScratchVecView& diagonal_entries, + ScratchMatView& matrixToScale, + ScratchMatView& adjustedMatrix) +{ + size_t rowA = matrixToScale.extent(0); + size_t colA = matrixToScale.extent(1); + + size_t rowB = adjustedMatrix.extent(0); + size_t colB = adjustedMatrix.extent(1); + + size_t nWeights = diagonal_entries.extent(0); + OMEGA_H_CHECK(colB == rowA); + eval_row_scaling(team, diagonal_entries, matrixToScale); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) { + for (int j = 0; j < colB; ++j) { + adjustedMatrix(i, j) = matrixToScale(i, j); + } + }); +} + +} // namespace detail +} // namespace pcms + +#endif diff --git a/src/pcms/interpolator/queue_visited.hpp b/src/pcms/interpolator/queue_visited.hpp index f09d38af..5a366c8f 100644 --- a/src/pcms/interpolator/queue_visited.hpp +++ b/src/pcms/interpolator/queue_visited.hpp @@ -9,13 +9,11 @@ #include #define MAX_SIZE_QUEUE 500 #define MAX_SIZE_TRACK 800 -using namespace std; -using namespace Omega_h; class Queue { private: - LO queue_array[MAX_SIZE_QUEUE]; + Omega_h::LO queue_array[MAX_SIZE_QUEUE]; int first = 0, last = -1, count = 0; public: @@ -44,7 +42,7 @@ class Queue class Track { private: - LO tracking_array[MAX_SIZE_TRACK]; + Omega_h::LO tracking_array[MAX_SIZE_TRACK]; int first = 0, last = -1, count = 0; public: diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f3845fbd..27b0a3c0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -104,14 +104,14 @@ if(Catch2_FOUND) add_executable(field_transfer_example field_transfer_example.cpp) target_link_libraries(field_transfer_example PUBLIC pcms::core pcms_interpolator) list(APPEND PCMS_UNIT_TEST_SOURCES - test_field_transfer.cpp - test_uniform_grid.cpp - test_omega_h_copy.cpp - test_point_search.cpp - test_mls_basis.cpp - test_rbf_interp.cpp - test_linear_solver.cpp - ) + test_field_transfer.cpp + test_uniform_grid.cpp + test_omega_h_copy.cpp + test_point_search.cpp + test_mls_basis.cpp + test_rbf_interp.cpp + test_normalisation.cpp + test_svd_serial.cpp) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) target_link_libraries(unit_tests PUBLIC Catch2::Catch2 pcms::core pcms_interpolator) @@ -159,4 +159,4 @@ if(PCMS_ENABLE_Fortran) endif() add_executable(xgc_n0_server xgc_n0_coupling_server.cpp) -target_link_libraries(xgc_n0_server PUBLIC pcms::core test_support) \ No newline at end of file +target_link_libraries(xgc_n0_server PUBLIC pcms::core test_support) diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp deleted file mode 100644 index 2f8664ec..00000000 --- a/test/test_linear_solver.cpp +++ /dev/null @@ -1,235 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include - -using namespace pcms; -using namespace pcms::detail; - -TEST_CASE("solver test") -{ - // This test computes P^TQP and P^TQ b for the normal equation P^TQP x = P^TQ - // b where P(n,m) (n >= m) is a rectangular matrix, Q(m,m) is a diagonal - // matrix with diagonal elements stored as vector and b(n,1) is a vector - SECTION("check convert normal equation ") - { - - int nvertices_target = 2; - - int nsupports = 3; - - int size = 2; - - Kokkos::View Q("phi vector", nvertices_target, nsupports); - - Kokkos::View b("rhs vector", nvertices_target, nsupports); - - Kokkos::View P("vandermonde matrix", nvertices_target, nsupports, - size); - - Kokkos::View TransPQP("moment matrix", nvertices_target, size, - size); - - Kokkos::View TransPQ("scaled matrix", nvertices_target, size, - nsupports); - - Kokkos::View TransPQb("transformed rhs", nvertices_target, size); - - Kokkos::View solution("solution unknown vector", nvertices_target, - size); - - auto host_Q = Kokkos::create_mirror_view(Q); - auto host_P = Kokkos::create_mirror_view(P); - auto host_b = Kokkos::create_mirror_view(b); - - host_Q(0, 0) = 1.0; - host_Q(0, 1) = 1.0; - host_Q(0, 2) = 1.0; - - host_Q(1, 0) = 3.0; - host_Q(1, 1) = 2.0; - host_Q(1, 2) = 1.0; - - Kokkos::deep_copy(Q, host_Q); - - host_P(0, 0, 0) = 1.0; - host_P(0, 0, 1) = 2.0; - host_P(0, 1, 0) = 3.0; - host_P(0, 1, 1) = 4.0; - host_P(0, 2, 0) = 5.0; - host_P(0, 2, 1) = 6.0; - - host_P(1, 0, 0) = 2.0; - host_P(1, 0, 1) = 3.0; - host_P(1, 1, 0) = 1.0; - host_P(1, 1, 1) = -1.0; - host_P(1, 2, 0) = 4.0; - host_P(1, 2, 1) = 1.0; - - Kokkos::deep_copy(P, host_P); - - host_b(0, 0) = 7.0; - host_b(0, 1) = 8.0; - host_b(0, 2) = 9.0; - - host_b(1, 0) = 5.0; - host_b(1, 1) = 6.0; - host_b(1, 2) = 7.0; - - Kokkos::deep_copy(b, host_b); - - team_policy tp(nvertices_target, Kokkos::AUTO); - Kokkos::parallel_for( - "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team) { - int i = team.league_rank(); - ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - size); - ScratchVecView phi(team.team_scratch(0), nsupports); - ScratchVecView support_values(team.team_scratch(0), nsupports); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { - phi(j) = Q(i, j); - support_values(j) = b(i, j); - for (int k = 0; k < size; ++k) { - vandermonde_matrix(j, k) = P(i, j, k); - } - }); - - team.team_barrier(); - - auto result = convert_normal_equation(vandermonde_matrix, phi, - support_values, team); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { - TransPQb(i, j) = result.transformed_rhs(j); - for (int k = 0; k < size; ++k) { - TransPQP(i, j, k) = result.square_matrix(j, k); - } - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { - for (int k = 0; k < size; ++k) { - TransPQ(i, k, j) = result.scaled_matrix(k, j); - } - }); - - team.team_barrier(); - - solve_matrix(result.square_matrix, result.transformed_rhs, team); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { - solution(i, j) = result.transformed_rhs(j); - }); - - team.team_barrier(); - }); - - auto host_result_lhs = Kokkos::create_mirror_view(TransPQP); - auto host_result_rhs = Kokkos::create_mirror_view(TransPQb); - auto host_result_scaled = Kokkos::create_mirror_view(TransPQ); - auto host_result_solution = Kokkos::create_mirror_view(solution); - - Kokkos::deep_copy(host_result_lhs, TransPQP); - Kokkos::deep_copy(host_result_rhs, TransPQb); - Kokkos::deep_copy(host_result_scaled, TransPQ); - Kokkos::deep_copy(host_result_solution, solution); - - Kokkos::View expected_lhs_matrix( - "expected lhs matrix", nvertices_target, size, size); - - Kokkos::View expected_rhs_vector( - "expected rhs vector", nvertices_target, size); - - Kokkos::View expected_scaled_matrix( - "expected scaled matrix", nvertices_target, size, nsupports); - - Kokkos::View expected_solution( - "expected solution", nvertices_target, size); - - expected_lhs_matrix(0, 0, 0) = 35.0; - expected_lhs_matrix(0, 0, 1) = 44.0; - expected_lhs_matrix(0, 1, 0) = 44.0; - expected_lhs_matrix(0, 1, 1) = 56.0; - - expected_rhs_vector(0, 0) = 76.0; - expected_rhs_vector(0, 1) = 100.0; - - expected_scaled_matrix(0, 0, 0) = 1.0; - expected_scaled_matrix(0, 0, 1) = 3.0; - expected_scaled_matrix(0, 0, 2) = 5.0; - expected_scaled_matrix(0, 1, 0) = 2.0; - expected_scaled_matrix(0, 1, 1) = 4.0; - expected_scaled_matrix(0, 1, 2) = 6.0; - - expected_solution(0, 0) = -6.0; - expected_solution(0, 1) = 6.5; - - expected_lhs_matrix(1, 0, 0) = 30.0; - expected_lhs_matrix(1, 0, 1) = 20.0; - expected_lhs_matrix(1, 1, 0) = 20.0; - expected_lhs_matrix(1, 1, 1) = 30.0; - - expected_rhs_vector(1, 0) = 70.0; - expected_rhs_vector(1, 1) = 40.0; - - expected_scaled_matrix(1, 0, 0) = 6.0; - expected_scaled_matrix(1, 0, 1) = 2.0; - expected_scaled_matrix(1, 0, 2) = 4.0; - expected_scaled_matrix(1, 1, 0) = 9.0; - expected_scaled_matrix(1, 1, 1) = -2.0; - expected_scaled_matrix(1, 1, 2) = 1.0; - - expected_solution(1, 0) = 2.6; - expected_solution(1, 1) = -0.4; - - for (int i = 0; i < nvertices_target; ++i) { - for (int j = 0; j < size; ++j) { - for (int l = 0; l < nsupports; ++l) { - printf("scaled_matrix (A^T Q) (%f, %f) at (%d,%d,%d)\n", - expected_scaled_matrix(i, j, l), host_result_scaled(i, j, l), - i, j, l); - REQUIRE_THAT( - expected_scaled_matrix(i, j, l), - Catch::Matchers::WithinAbs(host_result_scaled(i, j, l), 1E-10)); - } - } - for (int j = 0; j < size; ++j) { - for (int k = 0; k < size; ++k) { - printf("A^T Q A (%f, %f) at (%d,%d,%d)\n", - expected_lhs_matrix(i, j, k), host_result_lhs(i, j, k), i, j, - k); - REQUIRE_THAT( - expected_lhs_matrix(i, j, k), - Catch::Matchers::WithinAbs(host_result_lhs(i, j, k), 1E-10)); - } - } - for (int j = 0; j < size; ++j) { - printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_rhs_vector(i, j), - host_result_rhs(i, j), i, j); - REQUIRE_THAT(expected_rhs_vector(i, j), - Catch::Matchers::WithinAbs(host_result_rhs(i, j), 1E-10)); - } - - for (int j = 0; j < size; ++j) { - printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_solution(i, j), - host_result_solution(i, j), i, j); - REQUIRE_THAT( - expected_solution(i, j), - Catch::Matchers::WithinAbs(host_result_solution(i, j), 1E-10)); - } - } - } -} diff --git a/test/test_mls_basis.cpp b/test/test_mls_basis.cpp index 1ec00eca..bb22cadc 100644 --- a/test/test_mls_basis.cpp +++ b/test/test_mls_basis.cpp @@ -5,8 +5,6 @@ #include #include -using namespace pcms; - TEST_CASE("basis test") { SECTION("check basis slice lengths, size of the basis vector and basis " @@ -17,8 +15,8 @@ TEST_CASE("basis test") const int degree = 3; Kokkos::View array("array", degree, dim); Kokkos::deep_copy(array, 0); - detail::calculate_basis_slice_lengths(array); - auto size = detail::calculate_basis_vector_size(array); + pcms::detail::calculate_basis_slice_lengths(array); + auto size = pcms::detail::calculate_basis_vector_size(array); int expected[degree][dim] = {{1, 1, 1}, {1, 2, 3}, {1, 3, 6}}; @@ -30,7 +28,7 @@ TEST_CASE("basis test") REQUIRE(size == 20); - IntDeviceMatView d_array("array in device", degree, dim); + pcms::IntDeviceMatView d_array("array in device", degree, dim); auto array_hd = Kokkos::create_mirror_view(d_array); Kokkos::deep_copy(array_hd, array); Kokkos::deep_copy(d_array, array_hd); @@ -41,21 +39,21 @@ TEST_CASE("basis test") auto host_results = Kokkos::create_mirror_view(results); - team_policy tp(nvertices_target, Kokkos::AUTO); + pcms::team_policy tp(nvertices_target, Kokkos::AUTO); Kokkos::parallel_for( "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team) { + KOKKOS_LAMBDA(const pcms::member_type& team) { int i = team.league_rank(); - ScratchVecView basis_vector(team.team_scratch(0), size); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), - [=](int j) { basis_vector(j) = 0; }); + pcms::ScratchVecView basis_vector(team.team_scratch(0), size); + pcms::detail::fill(0, team, basis_vector); - Coord target_point; - target_point.x = coords[i][0]; - target_point.y = coords[i][1]; - target_point.z = coords[i][2]; + double target_point[MAX_DIM] = {}; - detail::eval_basis_vector(d_array, target_point, basis_vector); + for (int j = 0; j < dim; ++j) { + target_point[j] = coords[i][j]; + } + + pcms::detail::eval_basis_vector(d_array, target_point, basis_vector); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { results(i, j) = basis_vector(j); }); @@ -109,9 +107,9 @@ TEST_CASE("basis test") const int degree = 1; Kokkos::View array("array", degree, dim); Kokkos::deep_copy(array, 0); - detail::calculate_basis_slice_lengths(array); + pcms::detail::calculate_basis_slice_lengths(array); - auto size = detail::calculate_basis_vector_size(array); + auto size = pcms::detail::calculate_basis_vector_size(array); int expected[degree][dim] = { {1, 1, 1}, @@ -125,7 +123,7 @@ TEST_CASE("basis test") REQUIRE(size == 4); - IntDeviceMatView d_array("array in device", degree, dim); + pcms::IntDeviceMatView d_array("array in device", degree, dim); auto array_hd = Kokkos::create_mirror_view(d_array); Kokkos::deep_copy(array_hd, array); Kokkos::deep_copy(d_array, array_hd); @@ -136,21 +134,22 @@ TEST_CASE("basis test") auto host_results = Kokkos::create_mirror_view(results); - team_policy tp(nvertices_target, Kokkos::AUTO); + pcms::team_policy tp(nvertices_target, Kokkos::AUTO); Kokkos::parallel_for( "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team) { + KOKKOS_LAMBDA(const pcms::member_type& team) { int i = team.league_rank(); - ScratchVecView basis_vector(team.team_scratch(0), size); + pcms::ScratchVecView basis_vector(team.team_scratch(0), size); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { basis_vector(j) = 0; }); - Coord target_point; - target_point.x = coords[i][0]; - target_point.y = coords[i][1]; - target_point.z = coords[i][2]; + double target_point[MAX_DIM] = {}; + + for (int j = 0; j < dim; ++j) { + target_point[j] = coords[i][j]; + } - detail::eval_basis_vector(d_array, target_point, basis_vector); + pcms::detail::eval_basis_vector(d_array, target_point, basis_vector); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { results(i, j) = basis_vector(j); }); @@ -186,13 +185,13 @@ TEST_CASE("basis test") Kokkos::View host_slice_length("array", degree, dim); Kokkos::deep_copy(host_slice_length, 0); - detail::calculate_basis_slice_lengths(host_slice_length); + pcms::detail::calculate_basis_slice_lengths(host_slice_length); - IntDeviceMatView slice_length("slice array in device", degree, dim); + pcms::IntDeviceMatView slice_length("slice array in device", degree, dim); auto slice_length_hd = Kokkos::create_mirror_view(slice_length); Kokkos::deep_copy(slice_length_hd, host_slice_length); Kokkos::deep_copy(slice_length, slice_length_hd); - auto size = detail::calculate_basis_vector_size(host_slice_length); + auto size = pcms::detail::calculate_basis_vector_size(host_slice_length); int nvertices_target = 2; int nsupports = 5; Kokkos::View points("vandermonde matrix", nvertices_target, @@ -219,15 +218,16 @@ TEST_CASE("basis test") Kokkos::deep_copy(points, host_points); - team_policy tp(nvertices_target, Kokkos::AUTO); + pcms::team_policy tp(nvertices_target, Kokkos::AUTO); Kokkos::parallel_for( "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team) { + KOKKOS_LAMBDA(const pcms::member_type& team) { int i = team.league_rank(); - ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - size); + pcms::ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + size); - ScratchMatView local_source_points(team.team_scratch(0), nsupports, 1); + pcms::ScratchMatView local_source_points(team.team_scratch(0), + nsupports, 1); for (int j = 0; j < nsupports; ++j) { local_source_points(j, 0) = points(i, j); } @@ -243,8 +243,8 @@ TEST_CASE("basis test") Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - detail::create_vandermonde_matrix(local_source_points, j, - slice_length, vandermonde_matrix); + pcms::detail::create_vandermonde_matrix( + local_source_points, j, slice_length, vandermonde_matrix); }); Kokkos::parallel_for( diff --git a/test/test_normalisation.cpp b/test/test_normalisation.cpp new file mode 100644 index 00000000..ae45ad35 --- /dev/null +++ b/test/test_normalisation.cpp @@ -0,0 +1,147 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +Omega_h::Reals fillFromMatrix(const Omega_h::Matrix<2, 10>& matrix) +{ + int num_points = 10; + Omega_h::Write array(20, 0, "array to store coordinates"); + Kokkos::parallel_for( + "fills the array view from matrix", num_points, KOKKOS_LAMBDA(const int i) { + int index = i * 2; + + array[index] = matrix[i][0]; + array[index + 1] = matrix[i][1]; + }); + return Omega_h::read(array); +} + +TEST_CASE("test_normalization_routine") +{ + + SECTION("test_normalization_relative_distance") + { + Omega_h::Real tolerance = 1E-5; + int nvertices_target = 3; + + Omega_h::Matrix<3, 3> target_coordinates{ + {0.5, 2.0, 1.4}, {0.1, 1.2, 0.3}, {0.9, 2.8, 2.2}}; + + Omega_h::Matrix<3, 6> supports_target_1{ + {0.45, 3.18, 0.38}, {0.12, 1.2, 1.0}, {2.04, 1.45, 3.2}, + {1.52, 0.98, 3.2}, {0.22, 1.45, 3.2}, {0.96, 3.62, 2.2}}; + + Omega_h::Matrix<3, 8> supports_target_2{ + {0.88, 0.98, -0.8}, {1.2, 0.23, -1.21}, {-1.23, 2.85, 1.45}, + {0.63, 0.98, 0.2}, {-0.5, 1.45, 0.33}, {1.4, 2.61, -0.98}, + {0.88, 1.72, 0.38}, {1.2, 0.50, 2.1}}; + + Omega_h::Matrix<3, 4> supports_target_3{{3.15, 3.12, 2.91}, + {2.24, 1.12, 1.12}, + {0.12, 4.29, 1.98}, + {1.24, 1.54, 4.2}}; + + Omega_h::LOs nsupports({6, 8, 4}, "number of supports"); + int dim = 3; + + auto total_coordinates = Omega_h::get_sum(nsupports) * dim; + + REQUIRE(total_coordinates == 54); + Omega_h::Write normalized_support_coordinates( + total_coordinates, 0, "coordinates after normalization"); + + Omega_h::Write normalized_target_coordinates( + total_coordinates, 0, "coordinates after normalization"); + + Omega_h::Write all_support_coordinates( + total_coordinates, 0, "support coordinates all"); + Omega_h::Write all_target_coordinates( + total_coordinates, 0, "target coordinates all"); + + pcms::team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for( + "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const pcms::member_type& team) { + int league_rank = team.league_rank(); + + int num_supports = nsupports[league_rank]; + pcms::ScratchMatView local_supports(team.team_scratch(0), num_supports, + dim); + pcms::detail::fill(0, team, local_supports); + + double target_point[MAX_DIM] = {}; + + for (int i = 0; i < dim; ++i) { + target_point[i] = target_coordinates[league_rank][i]; + } + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, num_supports), [=](int i) { + for (int j = 0; j < dim; ++j) { + int index = league_rank * num_supports * dim + i * dim + j; + + all_target_coordinates[index] = + target_coordinates[league_rank][j]; + if (league_rank == 0) { + local_supports(i, j) = supports_target_1[i][j]; + all_support_coordinates[index] = supports_target_1[i][j]; + } else if (league_rank == 1) { + local_supports(i, j) = supports_target_2[i][j]; + all_support_coordinates[index] = supports_target_2[i][j]; + } else { + local_supports(i, j) = supports_target_3[i][j]; + all_support_coordinates[index] = supports_target_3[i][j]; + } + } + }); + + team.team_barrier(); + pcms::detail::normalize_supports(team, target_point, local_supports); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, num_supports), [=](int i) { + for (int j = 0; j < dim; ++j) { + int index = league_rank * num_supports * dim + i * dim + j; + normalized_support_coordinates[index] = local_supports(i, j); + normalized_target_coordinates[index] = target_point[j]; + } + }); + }); + + auto host_all_support_coordinates = + Omega_h::HostRead(Omega_h::read(all_support_coordinates)); + + auto host_all_target_coordinates = + Omega_h::HostRead(Omega_h::read(all_target_coordinates)); + + auto host_normalized_support_coordinates = Omega_h::HostRead( + Omega_h::read(normalized_support_coordinates)); + + auto host_normalized_target_coordinates = Omega_h::HostRead( + Omega_h::read(normalized_target_coordinates)); + + for (int i = 0; i < total_coordinates; ++i) { + auto result_support = + host_all_support_coordinates[i] - host_all_target_coordinates[i]; + + auto result_target = + host_all_target_coordinates[i] - host_all_target_coordinates[i]; + + CHECK_THAT(host_normalized_support_coordinates[i], + Catch::Matchers::WithinAbs(result_support, tolerance)); + + CHECK_THAT(host_normalized_target_coordinates[i], + Catch::Matchers::WithinAbs(result_target, tolerance)); + } + } // end SECTION + +} // end TESTCASE diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index 31413597..24ac9a18 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -11,12 +11,8 @@ #include #include -using namespace std; -using namespace Omega_h; -using namespace pcms; - KOKKOS_INLINE_FUNCTION -double func(Coord& p, int degree) +double func(pcms::Coord& p, int degree) { [[maybe_unused]] auto x = p.x; [[maybe_unused]] auto y = p.y; @@ -35,17 +31,18 @@ double func(Coord& p, int degree) return -1; } -void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, - Reals source_values, Reals exact_target_values, - Reals source_coordinates, Reals target_coordinates) +void test(Omega_h::Mesh& mesh, Omega_h::Real cutoffDistance, int degree, + Omega_h::LO min_num_supports, Omega_h::Reals source_values, + Omega_h::Reals exact_target_values, Omega_h::Reals source_coordinates, + Omega_h::Reals target_coordinates) { int dim = mesh.dim(); - Real tolerance = 0.0005; + Omega_h::Real tolerance = 5e-4; - std::vector rbf_types = { - RadialBasisFunction::RBF_GAUSSIAN, RadialBasisFunction::RBF_C4, - RadialBasisFunction::RBF_CONST + std::vector rbf_types = { + pcms::RadialBasisFunction::RBF_GAUSSIAN, pcms::RadialBasisFunction::RBF_C4, + pcms::RadialBasisFunction::RBF_CONST }; @@ -57,16 +54,19 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, mls_interpolation(source_values, source_coordinates, target_coordinates, support, dim, degree, rbf); - auto host_approx_target_values = HostRead(approx_target_values); + auto host_approx_target_values = + Omega_h::HostRead(approx_target_values); - auto host_exact_target_values = HostRead(exact_target_values); + auto host_exact_target_values = + Omega_h::HostRead(exact_target_values); int m = exact_target_values.size(); int n = approx_target_values.size(); - REQUIRE(m == n); for (size_t i = 0; i < m; ++i) { + CAPTURE(i, host_exact_target_values[i], host_approx_target_values[i], + tolerance); CHECK_THAT( host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); @@ -74,15 +74,16 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, } } // Test cases for centroid to node mapping using MLS -TEST_CASE("mls_interp_test") +TEST_CASE("test_mls_interpolation") { - auto lib = Library{}; + auto lib = Omega_h::Library{}; auto world = lib.world(); auto rank = lib.world()->rank(); - auto mesh = build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false); + auto mesh = + Omega_h::build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false); - Real cutoffDistance = 0.3; + Omega_h::Real cutoffDistance = 0.3; cutoffDistance = cutoffDistance * cutoffDistance; const auto dim = mesh.dim(); @@ -93,36 +94,36 @@ TEST_CASE("mls_interp_test") const auto& ntargets = mesh.nverts(); - Write source_coordinates( + Omega_h::Write source_coordinates( dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); - const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; + const auto& faces2nodes = mesh.ask_down(Omega_h::FACE, Omega_h::VERT).ab2b; Kokkos::parallel_for( "calculate the centroid in each tri element", nfaces, - OMEGA_H_LAMBDA(const LO id) { - const auto current_el_verts = gather_verts<3>(faces2nodes, id); + OMEGA_H_LAMBDA(const Omega_h::LO id) { + const auto current_el_verts = Omega_h::gather_verts<3>(faces2nodes, id); const Omega_h::Few, 3> current_el_vert_coords = - gather_vectors<3, 2>(target_coordinates, current_el_verts); - auto centroid = average(current_el_vert_coords); + Omega_h::gather_vectors<3, 2>(target_coordinates, current_el_verts); + auto centroid = Omega_h::average(current_el_vert_coords); int index = 2 * id; source_coordinates[index] = centroid[0]; source_coordinates[index + 1] = centroid[1]; }); - Points source_points; + pcms::Points source_points; source_points.coordinates = - PointsViewType("Number of local source supports", nfaces); + pcms::PointsViewType("Number of local source supports", nfaces); Kokkos::parallel_for( "target points", nfaces, KOKKOS_LAMBDA(int j) { source_points.coordinates(j).x = source_coordinates[j * dim]; source_points.coordinates(j).y = source_coordinates[j * dim + 1]; }); - Points target_points; + pcms::Points target_points; target_points.coordinates = - PointsViewType("Number of local source supports", mesh.nverts()); + pcms::PointsViewType("Number of local source supports", mesh.nverts()); Kokkos::parallel_for( "target points", mesh.nverts(), KOKKOS_LAMBDA(int j) { target_points.coordinates(j).x = target_coordinates[j * dim]; @@ -131,176 +132,225 @@ TEST_CASE("mls_interp_test") SECTION("test interpolation degree 1, function degree 0") { - + std::cout << "-------starting test: d0p1------------" << "\n"; int degree = 1; - LO min_num_supports = 10; + Omega_h::LO min_num_supports = 10; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree - 1); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree - 1); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); + + std::cout << " The test for d=0, p=1, passed " << "\n"; + std::cout << "----------finishing test: d0p1-------" << "\n"; } SECTION("test interpolation degree 1, function degree 1") { + std::cout << "--------starting test: d1p1---------" << "\n"; int degree = 1; - LO min_num_supports = 10; + Omega_h::LO min_num_supports = 10; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); + + std::cout << " The test for d=1, p=1, passed " << "\n"; + std::cout << "------------finishing test: d1p1--------" << "\n"; } SECTION("test interpo degree 2 poly degree 0") { + std::cout << "-------------starting test: d0p2------------" << "\n"; int degree = 2; - LO min_num_supports = 16; + Omega_h::LO min_num_supports = 16; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree - 2); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree - 2); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); + + std::cout << " The test for d=0, p=2, passed " << "\n"; + std::cout << "-------------finishing test: d0p2------------" << "\n"; } SECTION("test interpolation degree 2 poly degree 1") { + std::cout << "----------------satrting test: d1p2--------------" << "\n"; int degree = 2; - LO min_num_supports = 16; + Omega_h::LO min_num_supports = 16; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree - 1); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree - 1); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); + + std::cout << " The test for d=1, p=2, passed " << "\n"; + std::cout << "----------------finishing test: d1p2--------------" << "\n"; } SECTION("test interpolation degree 2, function degree 2") { + std::cout << "-------------starting test: d2p2--------------------" << "\n"; int degree = 2; - LO min_num_supports = 16; + Omega_h::LO min_num_supports = 16; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); + std::cout << " The test for d=2, p=2, passed " << "\n"; + std::cout << "-------------finishing test: d2p2--------------------" + << "\n"; } SECTION("test interpolation degree 3, function degree 2") { + std::cout << "-------------starting test: d2p3--------------------" << "\n"; int degree = 3; - LO min_num_supports = 20; + Omega_h::LO min_num_supports = 20; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree - 1); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree - 1); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); + std::cout << " The test for d=2, p=3, passed " << "\n"; + std::cout << "-------------finishing test: d2p3--------------------" + << "\n"; } SECTION("test interpolation degree 3, function degree 3") { + std::cout << "-------------starting test: d3p3--------------------" << "\n"; int degree = 3; - LO min_num_supports = 20; + Omega_h::LO min_num_supports = 20; - Write source_values(nfaces, 0, "exact target values"); + Omega_h::Write source_values(nfaces, 0, + "exact target values"); Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { source_values[i] = func(source_points.coordinates(i), degree); }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Omega_h::Write exact_target_values(mesh.nverts(), 0, + "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { exact_target_values[i] = func(target_points.coordinates(i), degree); }); - test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), - Reals(exact_target_values), Reals(source_coordinates), - Reals(target_coordinates)); + test(mesh, cutoffDistance, degree, min_num_supports, + Omega_h::Reals(source_values), Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); + + std::cout << " The test for d=3, p=3, passed " << "\n"; + std::cout << "-------------finishing test: d3p3--------------------" + << "\n"; } } diff --git a/test/test_spr_meshfields.cpp b/test/test_spr_meshfields.cpp new file mode 100644 index 00000000..3d58e21f --- /dev/null +++ b/test/test_spr_meshfields.cpp @@ -0,0 +1,210 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace +{ + +void print_patches(Omega_h::Graph& patches) +{ + auto offsets = Omega_h::HostRead(patches.a2ab); + auto values = Omega_h::HostRead(patches.ab2b); + std::cout << "num patches " << patches.nnodes() << "\n"; + for (int patch = 0; patch < patches.nnodes(); patch++) { + std::cout << "patch " << patch << " patchElms "; + for (auto valIdx = offsets[patch]; valIdx < offsets[patch + 1]; ++valIdx) { + auto patchElm = values[valIdx]; + std::cout << patchElm << " "; + } + std::cout << "\n"; + } +} + +std::string getTestName(size_t interp_degree, size_t func_degree) +{ + return "test interpolation degree " + std::to_string(interp_degree) + + ", polynomial degree " + std::to_string(func_degree); +} + +KOKKOS_INLINE_FUNCTION +double func(pcms::Coord& p, int degree) +{ + [[maybe_unused]] auto x = p.x; + [[maybe_unused]] auto y = p.y; + if (degree == 0) { + return 3; + } else if (degree == 1) { + return x + y; + } else if (degree == 2) { + return pow(x, 2) + pow(y, 2); + } else if (degree == 3) { + + return pow(x, 3) + pow(y, 3); + } else { + printf("No polynomials with degree = %d\n", degree); + } + return -1; +} + +void test(Mesh& mesh, Omega_h::Graph& patches, int degree, + Omega_h::Reals source_values, Omega_h::Reals exact_target_values, + Omega_h::Reals source_coordinates, Omega_h::Reals target_coordinates) +{ + + int dim = mesh.dim(); + Omega_h::Real tolerance = 0.0005; + + Omega_h::Write ignored(patches.a2ab.size() - 1, 1); + SupportResults support{patches.a2ab, patches.ab2b, ignored}; + + auto approx_target_values = pcms::mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, degree, + pcms::RadialBasisFunction::NO_OP, 1e-5); + + const auto delta_abs = Omega_h::fabs_each(Omega_h::subtract_each( + exact_target_values, Omega_h::read(approx_target_values))); + const auto max_delta_abs = Omega_h::get_max(delta_abs); + std::cout << "max_delta_abs " << max_delta_abs << "\n"; + + auto host_approx_target_values = + Omega_h::HostRead(approx_target_values); + mesh.add_tag(OMEGA_H_VERT, "approx_target_values", 1, + Omega_h::read(approx_target_values)); + Omega_h::vtk::write_parallel("box.vtk", &mesh); + + auto host_exact_target_values = + Omega_h::HostRead(exact_target_values); + + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + // std::cout << "vtx " << i << "\n"; + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } +} + +} // end anonymous namespace + +// Test cases for centroid to node mapping using MLS +TEST_CASE("meshfields_spr_test") +{ + + auto lib = Library{}; + auto world = lib.world(); + auto rank = lib.world()->rank(); + const auto boxSize = 1.0; + const auto nElms = 6; + // auto mesh = build_box(world, OMEGA_H_SIMPLEX, boxSize, boxSize, 0, nElms, + // nElms, 0, false); + // + Omega_h::Mesh mesh(&lib); + const auto thwaitesMeshFile = + "/lore/smithc11/projects/landice/thwaites_basal/thwaites_basalClass.osh"; + Omega_h::binary::read(thwaitesMeshFile, lib.world(), &mesh); + std::cout << "mesh: elms " << mesh.nelems() << " verts " << mesh.nverts() + << "\n"; + + const auto dim = mesh.dim(); + + const auto& target_coordinates = mesh.coords(); + + const auto& nfaces = mesh.nfaces(); + + const auto& ntargets = mesh.nverts(); + + Write source_coordinates( + dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); + + const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; + + Kokkos::parallel_for( + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(target_coordinates, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = 2 * id; + source_coordinates[index] = centroid[0]; + source_coordinates[index + 1] = centroid[1]; + }); + + pcms::Points source_points; + + source_points.coordinates = + pcms::PointsViewType("Number of local source supports", nfaces); + Kokkos::parallel_for( + "target points", nfaces, KOKKOS_LAMBDA(int j) { + source_points.coordinates(j).x = source_coordinates[j * dim]; + source_points.coordinates(j).y = source_coordinates[j * dim + 1]; + }); + + pcms::Points target_points; + + target_points.coordinates = + pcms::PointsViewType("Number of local source supports", mesh.nverts()); + Kokkos::parallel_for( + "target points", mesh.nverts(), KOKKOS_LAMBDA(int j) { + target_points.coordinates(j).x = target_coordinates[j * dim]; + target_points.coordinates(j).y = target_coordinates[j * dim + 1]; + }); + + // Define tests + for (size_t interp_degree = 1; interp_degree <= 3; interp_degree++) { + for (int func_degree = interp_degree; func_degree >= 0; func_degree--) { + SECTION(getTestName(interp_degree, func_degree).c_str()) + { + std::cerr << "start " << interp_degree << ", " << func_degree << " \n"; + int minPatchSize; + if (interp_degree == 1) + minPatchSize = 4; + if (interp_degree == 2) + minPatchSize = 10; // why so large? + if (interp_degree == 3) + minPatchSize = 12; // why so large? + std::cerr << "minPatchSize " << minPatchSize << "\n"; + auto patches = mesh.get_vtx_patches(minPatchSize); + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), func_degree); + }); + mesh.add_tag(mesh.dim(), "source_values", 1, + Omega_h::read(source_values)); + + Write exact_target_values(mesh.nverts(), 0, + "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = + func(target_points.coordinates(i), func_degree); + }); + mesh.add_tag(OMEGA_H_VERT, "target_values", 1, + Omega_h::read(exact_target_values)); + test(mesh, patches, interp_degree, Omega_h::Reals(source_values), + Omega_h::Reals(exact_target_values), + Omega_h::Reals(source_coordinates), + Omega_h::Reals(target_coordinates)); + std::cerr << "done " << interp_degree << ", " << func_degree << " \n"; + } // end SECTION + } + } +} diff --git a/test/test_svd_serial.cpp b/test/test_svd_serial.cpp new file mode 100644 index 00000000..c638d048 --- /dev/null +++ b/test/test_svd_serial.cpp @@ -0,0 +1,405 @@ +#include +#include +#include +#include +#include +#include +#include "KokkosBatched_SVD_Decl.hpp" +#include "KokkosBatched_SVD_Serial_Impl.hpp" +#include +#include +#include +#include + +using namespace std; +using namespace Omega_h; +using namespace pcms; + +TEST_CASE("test_serial_svd") +{ + + constexpr int row = 3; + constexpr int column = 3; + + double tolerance = 1e-4; + + Kokkos::View A_data("Device A data", row, column); + auto host_A_data = Kokkos::create_mirror_view(A_data); + + host_A_data(0, 0) = 4.0; + host_A_data(0, 1) = 2.0; + host_A_data(0, 2) = 3.0; + host_A_data(1, 0) = 3.0; + host_A_data(1, 1) = 5.0; + host_A_data(1, 2) = 1.0; + host_A_data(2, 0) = 2.0; + host_A_data(2, 1) = 3.0; + host_A_data(2, 2) = 6.0; + + Kokkos::deep_copy(A_data, host_A_data); + + Kokkos::View rhs_data("Device rhs data", row, column); + auto host_rhs_data = Kokkos::create_mirror_view(rhs_data); + + host_rhs_data(0) = 1.28571; + host_rhs_data(1) = 1.42857; + host_rhs_data(2) = 3.1428; + + Kokkos::deep_copy(rhs_data, host_rhs_data); + + double expected_solution[column] = {-0.142857, 0.285714, + 0.428571}; // Approximate values + + SECTION("test_svd_factorization") + { + Kokkos::View result("result", row, column); + team_policy tp(1, Kokkos::AUTO); + Kokkos::parallel_for( + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(2000)), + KOKKOS_LAMBDA(const member_type& team) { + ScratchMatView A(team.team_scratch(1), row, column); + ScratchMatView U(team.team_scratch(1), row, row); + ScratchMatView Vt(team.team_scratch(1), column, column); + ScratchVecView sigma(team.team_scratch(1), column); + ScratchVecView work(team.team_scratch(1), row); + ScratchMatView reconstructedA(team.team_scratch(1), row, column); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + A(i, j) = A_data(i, j); + } + }); + + ScratchMatView sigma_mat(team.team_scratch(1), row, column); + detail::fill(0.0, team, sigma_mat); + ScratchMatView Usigma(team.team_scratch(1), row, column); + detail::fill(0.0, team, Usigma); + + if (team.team_rank() == 0) { + + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, + sigma, Vt, work, 1e-6); + + for (int i = 0; i < column; ++i) { + sigma_mat(i, i) = sigma(i); + } + + KokkosBatched::SerialGemm< + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, U, sigma_mat, + 0.0, Usigma); + + KokkosBatched::SerialGemm< + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(1.0, Usigma, Vt, 0.0, + reconstructedA); + } + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + result(i, j) = reconstructedA(i, j); + } + }); + }); + + Kokkos::fence(); + auto host_result = Kokkos::create_mirror_view(result); + Kokkos::deep_copy(host_result, result); + + for (int i = 0; i < row; ++i) { + for (int j = 0; j < column; ++j) { + CHECK_THAT(host_result(i, j), + Catch::Matchers::WithinAbs(host_A_data(i, j), tolerance)); + } + } + } // end section + + SECTION("test_transpose_function") + { + + Kokkos::View result("result", row, row); + Kokkos::View transpose_expected("result", row, row); + Kokkos::deep_copy(result, 0.0); + Kokkos::deep_copy(transpose_expected, 0.0); + team_policy tp(1, Kokkos::AUTO); + Kokkos::parallel_for( + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(1000)), + KOKKOS_LAMBDA(const member_type& team) { + ScratchMatView A(team.team_scratch(1), row, column); + ScratchMatView U(team.team_scratch(1), row, row); + ScratchMatView Vt(team.team_scratch(1), column, column); + ScratchVecView sigma(team.team_scratch(1), column); + ScratchVecView work(team.team_scratch(1), row); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + A(i, j) = A_data(i, j); + } + }); + + if (team.team_rank() == 0) { + KokkosBatched::SerialSVD::invoke(KokkosBatched::SVD_USV_Tag(), A, U, + sigma, Vt, work); + } + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < row; ++j) { + transpose_expected(i, j) = U(j, i); + } + }); + + team.team_barrier(); + auto transposedU = detail::find_transpose(team, U); + + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < row; ++j) { + result(i, j) = transposedU(i, j); + } + }); + }); + + auto host_result = Kokkos::create_mirror_view(result); + auto host_expected = Kokkos::create_mirror_view(transpose_expected); + Kokkos::deep_copy(host_result, result); + Kokkos::deep_copy(host_expected, transpose_expected); + + for (int i = 0; i < row; ++i) { + for (int j = 0; j < row; ++j) { + CHECK_THAT(host_result(i, j), + Catch::Matchers::WithinAbs(host_expected(i, j), tolerance)); + } + } + + } // end section + + SECTION("test_row_scaling_function") + { + Kokkos::View diagonal_entries("Device diagonal enetries", row); + auto host_diagonal_entries = Kokkos::create_mirror_view(diagonal_entries); + + host_diagonal_entries(0) = 2.0; + host_diagonal_entries(1) = 0.5; + host_diagonal_entries(2) = -1.0; + + Kokkos::deep_copy(diagonal_entries, host_diagonal_entries); + + double expected_result[row][column] = { + {8.0, 4.0, 6.0}, {1.5, 2.5, 0.5}, {-2.0, -3.0, -6.0}}; + + Kokkos::View result("result", row, column); + Kokkos::deep_copy(result, 0.0); + team_policy tp(1, Kokkos::AUTO); + Kokkos::parallel_for( + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(1000)), + KOKKOS_LAMBDA(const member_type& team) { + ScratchMatView A(team.team_scratch(1), row, column); + ScratchVecView weight(team.team_scratch(1), row); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + A(i, j) = A_data(i, j); + } + weight(i) = diagonal_entries(i); + }); + + team.team_barrier(); + + detail::eval_row_scaling(team, weight, A); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + result(i, j) = A(i, j); + } + }); + }); + auto host_result = Kokkos::create_mirror_view(result); + Kokkos::deep_copy(host_result, result); + + for (int i = 0; i < row; ++i) { + for (int j = 0; j < row; ++j) { + CHECK_THAT(host_result(i, j), Catch::Matchers::WithinAbs( + expected_result[i][j], tolerance)); + } + } + + } // end section + + SECTION("test_scale_adjust_function") + { + constexpr int rowA = 6; // Matrix to scale (6x6) + constexpr int colA = 6; + constexpr int rowB = 3; // Adjusted matrix (3x6) + constexpr int colB = 6; + + Kokkos::View A_data("Device A data", rowA, colA); + auto host_A_data = Kokkos::create_mirror_view(A_data); + + host_A_data(0, 0) = 4.0; + host_A_data(0, 1) = 2.0; + host_A_data(0, 2) = 3.0; + host_A_data(0, 3) = 1.0; + host_A_data(0, 4) = 0.0; + host_A_data(0, 5) = 5.0; + host_A_data(1, 0) = 3.0; + host_A_data(1, 1) = 5.0; + host_A_data(1, 2) = 1.0; + host_A_data(1, 3) = 4.0; + host_A_data(1, 4) = 2.0; + host_A_data(1, 5) = 6.0; + host_A_data(2, 0) = 2.0; + host_A_data(2, 1) = 3.0; + host_A_data(2, 2) = 6.0; + host_A_data(2, 3) = 5.0; + host_A_data(2, 4) = 1.0; + host_A_data(2, 5) = 7.0; + host_A_data(3, 0) = 7.0; + host_A_data(3, 1) = 8.0; + host_A_data(3, 2) = 9.0; + host_A_data(3, 3) = 3.0; + host_A_data(3, 4) = 2.0; + host_A_data(3, 5) = 1.0; + host_A_data(4, 0) = 1.0; + host_A_data(4, 1) = 3.0; + host_A_data(4, 2) = 2.0; + host_A_data(4, 3) = 6.0; + host_A_data(4, 4) = 4.0; + host_A_data(4, 5) = 5.0; + host_A_data(5, 0) = 5.0; + host_A_data(5, 1) = 7.0; + host_A_data(5, 2) = 3.0; + host_A_data(5, 3) = 8.0; + host_A_data(5, 4) = 9.0; + host_A_data(5, 5) = 6.0; + + Kokkos::deep_copy(A_data, host_A_data); + + Kokkos::View diagonal_entries_data("Device diagonal entries data", + rowB); + auto host_diagonal_entries_data = + Kokkos::create_mirror_view(diagonal_entries_data); + + host_diagonal_entries_data(0) = 2.0; + host_diagonal_entries_data(1) = 0.5; + host_diagonal_entries_data(2) = -1.0; + + Kokkos::deep_copy(diagonal_entries_data, host_diagonal_entries_data); + + double expected_result[rowB][colB] = {{8.0, 4.0, 6.0, 2.0, 0.0, 10.0}, + {1.5, 2.5, 0.5, 2.0, 1.0, 3.0}, + {-2.0, -3.0, -6.0, -5.0, -1.0, -7.0}}; + + Kokkos::View result("result", rowB, colB); + Kokkos::deep_copy(result, 0.0); + team_policy tp(1, Kokkos::AUTO); + Kokkos::parallel_for( + tp.set_scratch_size(1, Kokkos::PerTeam(1000)), + KOKKOS_LAMBDA(const member_type& team) { + ScratchMatView A(team.team_scratch(1), rowA, colA); + detail::fill(0.0, team, A); + ScratchMatView result_matrix(team.team_scratch(1), rowB, colB); + ScratchVecView weight(team.team_scratch(1), rowB); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowA), [=](int i) { + for (int j = 0; j < colA; ++j) { + A(i, j) = A_data(i, j); + } + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) { + weight(i) = diagonal_entries_data(i); + }); + + team.team_barrier(); + if (team.team_rank() == 0) { + for (int i = 0; i < rowA; ++i) { + for (int j = 0; j < colA; ++j) { + printf("A(%d,%d) = %f\n", i, j, A(i, j)); + } + } + + for (int i = 0; i < rowB; ++i) { + printf("weight(%d) = %f\n", i, weight(i)); + } + } + + detail::scale_and_adjust(team, weight, A, result_matrix); + team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, rowB), [=](int i) { + for (int j = 0; j < colB; ++j) { + result(i, j) = result_matrix(i, j); + } + }); + }); + auto host_result = Kokkos::create_mirror_view(result); + Kokkos::deep_copy(host_result, result); + + for (int i = 0; i < rowB; ++i) { + for (int j = 0; j < colB; ++j) { + CAPTURE(i, j, host_result(i, j), expected_result[i][j]); + CHECK_THAT(host_result(i, j), Catch::Matchers::WithinAbs( + expected_result[i][j], tolerance)); + } + } + + } // end section + + SECTION("test_svd_solver") + { + Kokkos::View result("result", column); + team_policy tp(1, Kokkos::AUTO); + Kokkos::parallel_for( + "Solve SVD", tp.set_scratch_size(1, Kokkos::PerTeam(1000)), + KOKKOS_LAMBDA(const member_type& team) { + ScratchMatView A(team.team_scratch(1), row, column); + ScratchVecView rhs(team.team_scratch(1), row); + ScratchVecView x(team.team_scratch(1), column); + detail::fill(0.0, team, x); + + ScratchVecView weight(team.team_scratch(1), row); + detail::fill(1.0, team, weight); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int i) { + for (int j = 0; j < column; ++j) { + A(i, j) = A_data(i, j); + } + rhs(i) = rhs_data(i); + }); + + team.team_barrier(); + + if (team.team_rank() == 0) { + printf("\n[A matrix before solve_matrix_svd]\n"); + for (int i = 0; i < row; ++i) { + for (int j = 0; j < column; ++j) { + printf("A(%d, %d) = %f\t", i, j, A(i, j)); + } + printf("\n"); + } + + printf("[rhs vector before solve_matrix_svd]\n"); + for (int i = 0; i < row; ++i) { + printf("rhs[%d] = %f\n", i, rhs(i)); + } + } + + team.team_barrier(); + detail::solve_matrix_svd(team, weight, rhs, A, x, 0, 1e-12); + team.team_barrier(); + if (team.team_rank() == 0) { + printf("[solution vector x after solve_matrix_svd]\n"); + for (int i = 0; i < column; ++i) { + printf("x[%d] = %f\n", i, x(i)); + } + } + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, column), + [=](int i) { result(i) = x(i); }); + }); + auto host_result = Kokkos::create_mirror_view(result); + Kokkos::deep_copy(host_result, result); + + for (int i = 0; i < column; ++i) { + CAPTURE(i, host_result(i), expected_solution[i]); + CHECK_THAT(host_result(i), + Catch::Matchers::WithinAbs(expected_solution[i], tolerance)); + } + } // end section +}