From 6aa1ca3cc4c7ac9731db84e964bf3e0d7dc05613 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sat, 21 Sep 2024 21:45:58 -0400 Subject: [PATCH 01/66] added adj_search.hpp header in MLSInterpolation.hpp file --- interpolator/MLSInterpolation.hpp | 187 ++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 interpolator/MLSInterpolation.hpp diff --git a/interpolator/MLSInterpolation.hpp b/interpolator/MLSInterpolation.hpp new file mode 100644 index 00000000..cb6a0eb5 --- /dev/null +++ b/interpolator/MLSInterpolation.hpp @@ -0,0 +1,187 @@ +#ifndef MLS_INTERPOLATION_HPP +#define MLS_INTERPOLATION_HPP + +#include "MLSCoefficients.hpp" +#include "adj_search_dega2.hpp" +#include "adj_search.hpp" +#include "points.hpp" + +using namespace Omega_h; +using namespace pcms; + +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + Write radii2) { + const auto nvertices_source = source_coordinates.size() / dim; + const auto nvertices_target = target_coordinates.size() / dim; + + // Kokkos::RangePolicy range_policy(1, + // nvertices_target); + + Kokkos::View shmem_each_team( + "stores the size required for each team", nvertices_target); + // Write shmem_each_team(nvertices_target, 0, "stores the size required + // for each team member"); + + Kokkos::parallel_for( + "calculate the size required for scratch for each team", nvertices_target, + KOKKOS_LAMBDA(const int i) { + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + int nsupports = end_ptr - start_ptr; + + size_t total_shared_size = 0; + + total_shared_size += ScratchMatView::shmem_size(6, 6) * 4; + total_shared_size += ScratchMatView::shmem_size(6, nsupports) * 2; + total_shared_size += ScratchMatView::shmem_size(nsupports, 6); + total_shared_size += ScratchVecView::shmem_size(6); + total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; + total_shared_size += ScratchMatView::shmem_size(nsupports, 2); + shmem_each_team(i) = total_shared_size; + }); + + size_t shared_size; + Kokkos::parallel_reduce( + "find_max", nvertices_target, + KOKKOS_LAMBDA(const int i, size_t& max_val_temp) { + if (shmem_each_team(i) > max_val_temp) { + max_val_temp = shmem_each_team(i); + } + }, + Kokkos::Max(shared_size)); + printf("shared size = %d \n", shared_size); + + Write approx_target_values(nvertices_target, 0, + "approximated target values"); + + team_policy tp(nvertices_target, Kokkos::AUTO); + + int scratch_size = tp.scratch_size_max(0); + printf("scratch size is %d \n", scratch_size); + + Kokkos::parallel_for( + "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + + int nsupports = end_ptr - start_ptr; + + ScratchMatView local_source_points(team.team_scratch(0), nsupports, 2); + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + } + + ScratchMatView lower(team.team_scratch(0), 6, 6); + + ScratchMatView forward_matrix(team.team_scratch(0), 6, 6); + + ScratchMatView moment_matrix(team.team_scratch(0), 6, 6); + + ScratchMatView inv_mat(team.team_scratch(0), 6, 6); + + ScratchMatView V(team.team_scratch(0), nsupports, 6); + + ScratchMatView Ptphi(team.team_scratch(0), 6, nsupports); + + ScratchMatView resultant_matrix(team.team_scratch(0), 6, nsupports); + + ScratchVecView targetMonomialVec(team.team_scratch(0), 6); + + ScratchVecView SupportValues(team.team_scratch(0), nsupports); + + ScratchVecView result(team.team_scratch(0), nsupports); + + ScratchVecView Phi(team.team_scratch(0), nsupports); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 6), [=](int j) { + for (int k = 0; k < 6; ++k) { + lower(j, k) = 0; + forward_matrix(j, k) = 0; + moment_matrix(j, k) = 0; + inv_mat(j, k) = 0; + } + + targetMonomialVec(j) = 0; + for (int k = 0; k < nsupports; ++k) { + resultant_matrix(j, k) = 0; + + Ptphi(j, k) = 0; + } + }); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + for (int k = 0; k < 6; ++k) { + V(j, k) = 0; + } + + SupportValues(j) = 0; + result(j) = 0; + Phi(j) = 0; + }); + + Coord target_point; + + target_point.x = target_coordinates[i * dim]; + + target_point.y = target_coordinates[i * dim + 1]; + + BasisPoly(targetMonomialVec, target_point); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { VandermondeMatrix(V, local_source_points, j); }); + + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + PhiVector(Phi, target_point, local_source_points, j, radii2[i]); + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { PTphiMatrix(Ptphi, V, Phi, j); }); + + team.team_barrier(); + + MatMatMul(team, moment_matrix, Ptphi, V); + team.team_barrier(); + + inverse_matrix(team, moment_matrix, lower, forward_matrix, inv_mat); + + team.team_barrier(); + + MatMatMul(team, resultant_matrix, inv_mat, Ptphi); + team.team_barrier(); + + MatVecMul(team, targetMonomialVec, resultant_matrix, result); + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { + SupportValues(i) = + source_values[support.supports_idx[start_ptr + i]]; + }); + + double tgt_value = 0; + dot_product(team, result, SupportValues, tgt_value); + if (team.team_rank() == 0) { + approx_target_values[i] = tgt_value; + } + }); + + return approx_target_values; +} + +#endif From 14f82b212753132bc56224e7ed2b8321792d53b7 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sat, 21 Sep 2024 23:09:26 -0400 Subject: [PATCH 02/66] added regular grid linear interpolator and mls interpolator --- .../linear_interpolator/interpolant.hpp | 184 +++++++++++++ .../linear_interpolator/multidimarray.hpp | 36 +++ interpolator/mls_interpolator/CMakeLists.txt | 52 ++++ .../mls_interpolator/MLSCoefficients.hpp | 212 +++++++++++++++ .../mls_interpolator/MLSInterpolation.hpp | 187 +++++++++++++ interpolator/mls_interpolator/adj_search.hpp | 218 +++++++++++++++ .../mls_interpolator/adj_search_dega2.hpp | 257 ++++++++++++++++++ .../cmake/interpolatorConfig.cmake.in | 6 + interpolator/mls_interpolator/points.hpp | 32 +++ .../mls_interpolator/queue_visited.hpp | 106 ++++++++ 10 files changed, 1290 insertions(+) create mode 100644 interpolator/linear_interpolator/interpolant.hpp create mode 100644 interpolator/linear_interpolator/multidimarray.hpp create mode 100644 interpolator/mls_interpolator/CMakeLists.txt create mode 100644 interpolator/mls_interpolator/MLSCoefficients.hpp create mode 100644 interpolator/mls_interpolator/MLSInterpolation.hpp create mode 100644 interpolator/mls_interpolator/adj_search.hpp create mode 100644 interpolator/mls_interpolator/adj_search_dega2.hpp create mode 100644 interpolator/mls_interpolator/cmake/interpolatorConfig.cmake.in create mode 100644 interpolator/mls_interpolator/points.hpp create mode 100644 interpolator/mls_interpolator/queue_visited.hpp diff --git a/interpolator/linear_interpolator/interpolant.hpp b/interpolator/linear_interpolator/interpolant.hpp new file mode 100644 index 00000000..7aeb02ef --- /dev/null +++ b/interpolator/linear_interpolator/interpolant.hpp @@ -0,0 +1,184 @@ + +#ifndef INTERPOLANT_HPP +#define INTERPOLANT_HPP + + +#include +#include +#include "multidimarray.hpp" +#define MAX_DIM 10 + + +KOKKOS_INLINE_FUNCTION +void find_indices(const IntVecView& num_bins, const RealVecView& range, + const RealVecView& point , int* indices){ + int dim = point.extent(0); + // IntVecView indices("parametric coordinate", dim); + for (int i = 0; i < dim; ++i){ + int id = i * 2; + double length = range(id + 1) - range(id); + double dlen = length/num_bins(i); + indices[i] = ((point(i) - range(id)) / dlen); + + } + + +} + + +KOKKOS_INLINE_FUNCTION +void find_limits(const IntVecView& num_bins, const RealVecView& range, const int* indices, double* limits){ + int dim = num_bins.extent(0); + // RealVecView limits("limits", 2*dim); + for (int i = 0; i < dim; ++i){ + int ptr = i * 2; + double dlen = (range(ptr+1) - range(ptr))/num_bins(i); + limits[ptr] = indices[i] * dlen; + limits[ptr+1] = limits[ptr] + dlen; + } +} + + + +KOKKOS_INLINE_FUNCTION +void evaluateParametricCoord(const RealVecView& point, const double* limits , double* parametric_coord){ + int dim = point.extent(0); + // RealVecView parametric_coord("parametric coordinate", dim); + for (int i = 0; i < dim; ++i){ + int index = i * 2; + parametric_coord[i] = (point(i) - limits[index])/(limits[index+1] - limits[index]); + + } +} + + + +KOKKOS_INLINE_FUNCTION +void basis_function(const RealVecView& parametric_coord, double* linear_basis_each_dir){ + int dim = parametric_coord.extent(0); + // RealVecView linear_basis_each_dir("basis function each direction", 2*dim); + for (int i = 0; i < dim; ++i){ + int index = i * 2; + + linear_basis_each_dir[index] = 1 - parametric_coord(i); + + linear_basis_each_dir[index + 1] = parametric_coord(i); + + + } + +} + + +KOKKOS_INLINE_FUNCTION +double linear_interpolant(const IntVecView& dimensions, const double* linear_basis_each_dir, const IntVecView& indices, const RealVecView& values){ + int dim = dimensions.extent(0); + double sum = 0; + int ids[MAX_DIM]; + + int array_size = 1 << dim; + for (int i = 0; i < array_size; ++i){ + double temp = 1.0; + for (int j = 0; j < dim; ++j){ + int index = 2*j; + int id_left = indices(j); + int id_right = id_left + 1; + if (i & (1 << j)){ + temp *= linear_basis_each_dir[index+1]; + ids[j] = id_right; + } else { + temp *= linear_basis_each_dir[index]; + ids[j] = id_left; + } + } + + int idx = calculateIndex(dimensions, ids); + double corner_values = values(idx); + + sum += temp*corner_values; + } + return sum; +} + + + +class RegularGridInterpolator { + private: + const RealMatView parametric_coords; + const RealVecView values; + const IntMatView indices; + const IntVecView dimensions; + + public: + RegularGridInterpolator(const RealMatView& parametric_coords_, + const RealVecView& values_, const IntMatView& indices_, const IntVecView& dimensions_) + : parametric_coords(parametric_coords_), values(values_), indices(indices_), dimensions(dimensions_) {}; + +RealVecView linear_interpolation() { + int dim = dimensions.extent(0); + int N = parametric_coords.extent(0); + RealVecView interpolated_values("approximated values", N); + auto parametric_coords_ = parametric_coords; + auto dimensions_ = dimensions; + auto values_ = values; + auto indices_ = indices; + Kokkos::parallel_for("linear interpolation function",N, KOKKOS_LAMBDA(int j){ + double linear_basis_each_dir[MAX_DIM] = {0.0}; + auto parametric_coord_each_point = Kokkos::subview(parametric_coords_, j, Kokkos::ALL()); + auto index_each_point = Kokkos::subview(indices_, j , Kokkos::ALL()); + basis_function(parametric_coord_each_point, linear_basis_each_dir); + auto approx_value = linear_interpolant(dimensions_, linear_basis_each_dir,index_each_point, values_); + interpolated_values(j) = approx_value; + }); + + return interpolated_values; + } +}; + + +struct Result{ + IntMatView indices_pts; + RealMatView parametric_coords; +}; + +Result parametric_indices(const RealMatView& points, const IntVecView& num_bins, const RealVecView& range){ + + int dim = num_bins.extent(0); + Result result; + int N = points.extent(0); + result.indices_pts = IntMatView("indices",N,dim); + result.parametric_coords = RealMatView("parametric_coordinates",N, dim); + + Kokkos::parallel_for(N, KOKKOS_LAMBDA(int j){ + int indcs[MAX_DIM] = {0}; + double limits[MAX_DIM] = {0.0}; + double parametric_coord_each[MAX_DIM] = {0.0}; + auto point_coord = Kokkos::subview(points, j , Kokkos::ALL()); + find_indices(num_bins, range, point_coord, indcs); + find_limits(num_bins, range, indcs, limits); + evaluateParametricCoord( point_coord, limits, parametric_coord_each); + for (int i = 0; i < dim; ++i){ + result.indices_pts(j,i) = indcs[i]; + result.parametric_coords(j,i) = parametric_coord_each[i]; + } + + }); + + return result; + +} + +KOKKOS_INLINE_FUNCTION +double test_function(double* coord){ + double fun_value = 0; + int dim = 5; + for (int i = 0; i < dim; ++i){ + fun_value += coord[i]; + } + return fun_value; + } + +#endif + + + diff --git a/interpolator/linear_interpolator/multidimarray.hpp b/interpolator/linear_interpolator/multidimarray.hpp new file mode 100644 index 00000000..04b6fac1 --- /dev/null +++ b/interpolator/linear_interpolator/multidimarray.hpp @@ -0,0 +1,36 @@ +#ifndef MULTIDIMARRAY_HPP +#define MULTIDIMARRAY_HPP + +#include + + +using RealMatView = Kokkos::View; +using IntMatView = Kokkos::View; +using RealVecView = Kokkos::View; +using IntVecView = Kokkos::View; + +using HostIntVecView = Kokkos::View; + +KOKKOS_INLINE_FUNCTION +int calculateIndex(const IntVecView& dimensions, const int* indices) { + + int dim = dimensions.extent(0); + int index = 0; + int multiplier = 1; + for (int i = dim - 1; i >= 0; --i) { + index += indices[i] * multiplier; + multiplier *= dimensions(i); + } + return index; +} + + +int calculateTotalSize(const HostIntVecView& dimensions){ + int dim = dimensions.extent(0); + int size = 1; + for (int i = 0; i < dim; ++i){ + size *= dimensions(i); + } + return size; +} +#endif diff --git a/interpolator/mls_interpolator/CMakeLists.txt b/interpolator/mls_interpolator/CMakeLists.txt new file mode 100644 index 00000000..69f1d979 --- /dev/null +++ b/interpolator/mls_interpolator/CMakeLists.txt @@ -0,0 +1,52 @@ +cmake_minimum_required(VERSION 3.20) +project(Interpolator) + +set(VERSION_MAJOR 0) +set(VERSION_MINOR 1) + +set(HEADER_FILES + adj_search_dega2.hpp + MLSInterpolation.hpp + points.hpp + adj_search.hpp + MLSCoefficients.hpp + queue_visited.hpp +) + +install(FILES ${HEADER_FILES} DESTINATION include/interpolator) + +include(CMakePackageConfigHelpers) + +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/interpolatorConfigVersion.cmake" + VERSION ${VERSION_MAJOR}.${VERSION_MINOR} + COMPATIBILITY SameMajorVersion +) + +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/interpolatorConfigVersion.cmake" + DESTINATION lib/cmake/interpolator) + +add_library(interpolator INTERFACE) +target_include_directories(interpolator INTERFACE + $ + $) + +install(TARGETS interpolator + EXPORT interpolatorTargets + INCLUDES DESTINATION include/interpolator) + +configure_package_config_file( + "${CMAKE_CURRENT_SOURCE_DIR}/cmake/interpolatorConfig.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/interpolatorConfig.cmake" + INSTALL_DESTINATION lib/cmake/interpolator +) + +install(FILES + "${CMAKE_CURRENT_BINARY_DIR}/interpolatorConfig.cmake" + DESTINATION lib/cmake/interpolator) + +install(EXPORT interpolatorTargets + FILE interpolatorTargets.cmake + NAMESPACE interpolator:: + DESTINATION lib/cmake/interpolator) + diff --git a/interpolator/mls_interpolator/MLSCoefficients.hpp b/interpolator/mls_interpolator/MLSCoefficients.hpp new file mode 100644 index 00000000..620c264e --- /dev/null +++ b/interpolator/mls_interpolator/MLSCoefficients.hpp @@ -0,0 +1,212 @@ +#ifndef MLS_COEFFICIENTS_HPP +#define MLS_COEFFICIENTS_HPP + +#include + +#include "points.hpp" + +#define PI_M 3.14159265358979323846 + +KOKKOS_INLINE_FUNCTION +double func(Coord& p) { + auto x = (p.x - 0.5) * PI_M * 2; + auto y = (p.y - 0.5) * PI_M * 2; + double Z = sin(x) * sin(y); + return Z; +} + +// polynomial basis vector +KOKKOS_INLINE_FUNCTION +void BasisPoly(ScratchVecView basis_monomial, Coord& p1) { + basis_monomial(0) = 1.0; + basis_monomial(1) = p1.x; + basis_monomial(2) = p1.y; + basis_monomial(3) = p1.x * p1.x; + basis_monomial(4) = p1.x * p1.y; + basis_monomial(5) = p1.y * p1.y; +} + +// radial basis function +KOKKOS_INLINE_FUNCTION +double rbf(double r_sq, double rho_sq) { + double phi; + double r = sqrt(r_sq); + double rho = sqrt(rho_sq); + double ratio = r / rho; + double limit = 1 - ratio; + if (limit < 0) { + phi = 0; + + } else { + phi = 5 * pow(ratio, 5) + 30 * pow(ratio, 4) + 72 * pow(ratio, 3) + + 82 * pow(ratio, 2) + 36 * ratio + 6; + phi = phi * pow(limit, 6); + } + + return phi; +} + +// create vandermondeMatrix +KOKKOS_INLINE_FUNCTION +void VandermondeMatrix(ScratchMatView V, ScratchMatView local_source_points, + int j) { + int N = local_source_points.extent(0); + Coord source_point; + source_point.x = local_source_points(j, 0); + source_point.y = local_source_points(j, 1); + ScratchVecView basis_monomial = Kokkos::subview(V, j, Kokkos::ALL()); + BasisPoly(basis_monomial, source_point); +} + +// moment matrix +KOKKOS_INLINE_FUNCTION +void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, + int j) { + int M = V.extent(0); + int N = V.extent(1); + + ScratchVecView vandermonde_mat_row = Kokkos::subview(V, j, Kokkos::ALL()); + for (int k = 0; k < N; k++) { + pt_phi(k, j) = vandermonde_mat_row(k) * Phi(j); + } +} + +// radial basis function vector +KOKKOS_INLINE_FUNCTION +void PhiVector(ScratchVecView Phi, Coord target_point, + ScratchMatView local_source_points, int j, + double cuttoff_dis_sq) { + int N = local_source_points.extent(0); + double dx = target_point.x - local_source_points(j, 0); + double dy = target_point.y - local_source_points(j, 1); + double ds_sq = dx * dx + dy * dy; + Phi(j) = rbf(ds_sq, cuttoff_dis_sq); +} + +// matrix matrix multiplication +KOKKOS_INLINE_FUNCTION +void MatMatMul(member_type team, ScratchMatView moment_matrix, + ScratchMatView pt_phi, ScratchMatView vandermonde) { + int M = pt_phi.extent(0); + int N = vandermonde.extent(1); + int K = pt_phi.extent(1); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { + double sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, K), + [=](const int k, double& lsum) { + lsum += pt_phi(i, k) * vandermonde(k, j); + }, + sum); + moment_matrix(i, j) = sum; + }); + }); +} + +// Matrix vector multiplication +KOKKOS_INLINE_FUNCTION +void MatVecMul(member_type team, ScratchVecView vector, ScratchMatView matrix, + ScratchVecView result) { + int M = matrix.extent(0); + int N = matrix.extent(1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { + double sum = 0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, M), + [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, + sum); + result(i) = sum; + }); + // team.team_barrier(); +} + +// dot product +KOKKOS_INLINE_FUNCTION +void dot_product(member_type team, ScratchVecView result_sub, + ScratchVecView exactSupportValues_sub, double& target_value) { + int N = result_sub.extent(0); + for (int j = 0; j < N; ++j) { + target_value += result_sub(j) * exactSupportValues_sub[j]; + } +} + +// moment matrix +KOKKOS_INLINE_FUNCTION +void PtphiPMatrix(ScratchMatView moment_matrix, member_type team, + ScratchMatView pt_phi, ScratchMatView vandermonde) { + int M = pt_phi.extent(0); + int N = vandermonde.extent(1); + int K = pt_phi.extent(1); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { + double sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, K), + [=](const int k, double& lsum) { + lsum += pt_phi(i, k) * vandermonde(k, j); + }, + sum); + moment_matrix(i, j) = sum; + }); + }); +} + +// inverse matrix +KOKKOS_INLINE_FUNCTION +void inverse_matrix(member_type team, ScratchMatView matrix, + ScratchMatView lower, ScratchMatView forward_matrix, + ScratchMatView solution) { + int N = matrix.extent(0); + + for (int j = 0; j < N; ++j) { + Kokkos::single(Kokkos::PerTeam(team), [=]() { + double sum = 0; + for (int k = 0; k < j; ++k) { + sum += lower(j, k) * lower(j, k); + } + lower(j, j) = sqrt(matrix(j, j) - sum); + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, j + 1, N), [=](int i) { + double inner_sum = 0; + for (int k = 0; k < j; ++k) { + inner_sum += lower(i, k) * lower(j, k); + } + lower(i, j) = (matrix(i, j) - inner_sum) / lower(j, j); + lower(j, i) = lower(i, j); + }); + + team.team_barrier(); + } + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { + forward_matrix(i, i) = 1.0 / lower(i, i); + for (int j = i + 1; j < N; ++j) { + forward_matrix(j, i) = 0.0; // Initialize to zero + for (int k = 0; k < j; ++k) { + forward_matrix(j, i) -= lower(j, k) * forward_matrix(k, i); + } + forward_matrix(j, i) /= lower(j, j); + } + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { + solution(N - 1, i) = forward_matrix(N - 1, i) / lower(N - 1, N - 1); + for (int j = N - 2; j >= 0; --j) { + solution(j, i) = forward_matrix(j, i); + for (int k = j + 1; k < N; ++k) { + solution(j, i) -= lower(j, k) * solution(k, i); + } + solution(j, i) /= lower(j, j); + } + }); +} + +#endif diff --git a/interpolator/mls_interpolator/MLSInterpolation.hpp b/interpolator/mls_interpolator/MLSInterpolation.hpp new file mode 100644 index 00000000..cb6a0eb5 --- /dev/null +++ b/interpolator/mls_interpolator/MLSInterpolation.hpp @@ -0,0 +1,187 @@ +#ifndef MLS_INTERPOLATION_HPP +#define MLS_INTERPOLATION_HPP + +#include "MLSCoefficients.hpp" +#include "adj_search_dega2.hpp" +#include "adj_search.hpp" +#include "points.hpp" + +using namespace Omega_h; +using namespace pcms; + +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + Write radii2) { + const auto nvertices_source = source_coordinates.size() / dim; + const auto nvertices_target = target_coordinates.size() / dim; + + // Kokkos::RangePolicy range_policy(1, + // nvertices_target); + + Kokkos::View shmem_each_team( + "stores the size required for each team", nvertices_target); + // Write shmem_each_team(nvertices_target, 0, "stores the size required + // for each team member"); + + Kokkos::parallel_for( + "calculate the size required for scratch for each team", nvertices_target, + KOKKOS_LAMBDA(const int i) { + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + int nsupports = end_ptr - start_ptr; + + size_t total_shared_size = 0; + + total_shared_size += ScratchMatView::shmem_size(6, 6) * 4; + total_shared_size += ScratchMatView::shmem_size(6, nsupports) * 2; + total_shared_size += ScratchMatView::shmem_size(nsupports, 6); + total_shared_size += ScratchVecView::shmem_size(6); + total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; + total_shared_size += ScratchMatView::shmem_size(nsupports, 2); + shmem_each_team(i) = total_shared_size; + }); + + size_t shared_size; + Kokkos::parallel_reduce( + "find_max", nvertices_target, + KOKKOS_LAMBDA(const int i, size_t& max_val_temp) { + if (shmem_each_team(i) > max_val_temp) { + max_val_temp = shmem_each_team(i); + } + }, + Kokkos::Max(shared_size)); + printf("shared size = %d \n", shared_size); + + Write approx_target_values(nvertices_target, 0, + "approximated target values"); + + team_policy tp(nvertices_target, Kokkos::AUTO); + + int scratch_size = tp.scratch_size_max(0); + printf("scratch size is %d \n", scratch_size); + + Kokkos::parallel_for( + "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + + int nsupports = end_ptr - start_ptr; + + ScratchMatView local_source_points(team.team_scratch(0), nsupports, 2); + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + } + + ScratchMatView lower(team.team_scratch(0), 6, 6); + + ScratchMatView forward_matrix(team.team_scratch(0), 6, 6); + + ScratchMatView moment_matrix(team.team_scratch(0), 6, 6); + + ScratchMatView inv_mat(team.team_scratch(0), 6, 6); + + ScratchMatView V(team.team_scratch(0), nsupports, 6); + + ScratchMatView Ptphi(team.team_scratch(0), 6, nsupports); + + ScratchMatView resultant_matrix(team.team_scratch(0), 6, nsupports); + + ScratchVecView targetMonomialVec(team.team_scratch(0), 6); + + ScratchVecView SupportValues(team.team_scratch(0), nsupports); + + ScratchVecView result(team.team_scratch(0), nsupports); + + ScratchVecView Phi(team.team_scratch(0), nsupports); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 6), [=](int j) { + for (int k = 0; k < 6; ++k) { + lower(j, k) = 0; + forward_matrix(j, k) = 0; + moment_matrix(j, k) = 0; + inv_mat(j, k) = 0; + } + + targetMonomialVec(j) = 0; + for (int k = 0; k < nsupports; ++k) { + resultant_matrix(j, k) = 0; + + Ptphi(j, k) = 0; + } + }); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + for (int k = 0; k < 6; ++k) { + V(j, k) = 0; + } + + SupportValues(j) = 0; + result(j) = 0; + Phi(j) = 0; + }); + + Coord target_point; + + target_point.x = target_coordinates[i * dim]; + + target_point.y = target_coordinates[i * dim + 1]; + + BasisPoly(targetMonomialVec, target_point); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { VandermondeMatrix(V, local_source_points, j); }); + + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + PhiVector(Phi, target_point, local_source_points, j, radii2[i]); + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { PTphiMatrix(Ptphi, V, Phi, j); }); + + team.team_barrier(); + + MatMatMul(team, moment_matrix, Ptphi, V); + team.team_barrier(); + + inverse_matrix(team, moment_matrix, lower, forward_matrix, inv_mat); + + team.team_barrier(); + + MatMatMul(team, resultant_matrix, inv_mat, Ptphi); + team.team_barrier(); + + MatVecMul(team, targetMonomialVec, resultant_matrix, result); + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { + SupportValues(i) = + source_values[support.supports_idx[start_ptr + i]]; + }); + + double tgt_value = 0; + dot_product(team, result, SupportValues, tgt_value); + if (team.team_rank() == 0) { + approx_target_values[i] = tgt_value; + } + }); + + return approx_target_values; +} + +#endif diff --git a/interpolator/mls_interpolator/adj_search.hpp b/interpolator/mls_interpolator/adj_search.hpp new file mode 100644 index 00000000..fd5fe60c --- /dev/null +++ b/interpolator/mls_interpolator/adj_search.hpp @@ -0,0 +1,218 @@ +#ifndef ADJ_SEARCH_HPP +#define ADJ_SEARCH_HPP + +#include + +#include "queue_visited.hpp" + +using namespace Omega_h; + +static constexpr int max_dim = 3; + +// TODO change this into span/mdspan +OMEGA_H_INLINE +Real calculateDistance(const Real* p1, const Real* p2, const int dim) { + Real dx, dy, dz; + dx = p1[0] - p2[0]; + dy = p1[1] - p2[1]; + if (dim != 3) { + dz = 0.0; + } else { + dz = p1[2] - p2[2]; + } + + return dx * dx + dy * dy + dz * dz; +} + +class FindSupports { + private: + Mesh& source_mesh; + Mesh& target_mesh; + + public: + FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) + : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; + + void adjBasedSearch(const Real& cutoffDistance, const Write& supports_ptr, + Write& nSupports, Write& support_idx); +}; + +void FindSupports::adjBasedSearch(const Real& cutoffDistance, + const Write& supports_ptr, + Write& nSupports, + Write& support_idx) { + // Source Mesh Info + + const auto& sourcePoints_coords = source_mesh.coords(); + const auto nvertices_source = source_mesh.nverts(); + const auto dim = source_mesh.dim(); + + // Target Mesh Info + const auto& targetPoints_coords = target_mesh.coords(); + const auto nvertices_target = target_mesh.nverts(); + + // CSR data structure of adjacent vertex information of each source vertex + const auto& vert2vert = source_mesh.ask_star(VERT); + const auto& v2v_ptr = vert2vert.a2ab; + const auto& v2v_data = vert2vert.ab2b; + + // CSR data structure of vertex information of each cell in source mesh + + // CSR data structure of adjacent vertex information of each source vertex + // dim == 2; ask vertices of tri (3 vertices for each tri) & if dim ==3; ask + // vertices of tetrahedron (4 vertices for each tet) + const auto& cells2verts = source_mesh.ask_verts_of(dim); + + Kokkos::View target_points("test_points", nvertices_target); + + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + target_points(i, 0) = targetPoints_coords[i * dim]; + target_points(i, 1) = targetPoints_coords[i * dim + 1]; + }); + Kokkos::fence(); + pcms::GridPointSearch search_cell(source_mesh, 10, 10); + + // get the cell id for each target point + auto results = search_cell(target_points); + + parallel_for( + nvertices_target, + OMEGA_H_LAMBDA(const LO id) { + queue queue; + track visited; + + LO source_cell_id = results(id).tri_id; + + const LO num_verts_in_dim = dim + 1; + + LO start_ptr = source_cell_id * num_verts_in_dim; + + LO end_ptr = start_ptr + num_verts_in_dim; + + Real target_coords[max_dim]; + + Real support_coords[max_dim]; + + for (LO k = 0; k < dim; ++k) { + target_coords[k] = target_points(id, k); + } + + LO start_counter; + + if (support_idx.size() > 0) { + start_counter = supports_ptr[id]; + } + + int count = 0; + // Initialize queue by pushing the vertices in the neighborhood of the + // given target point + + for (LO i = start_ptr; i < end_ptr; ++i) { + LO vert_id = cells2verts[i]; + visited.push_back(vert_id); + + for (LO k = 0; k < dim; ++k) { + support_coords[k] = sourcePoints_coords[vert_id * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + queue.push_back(vert_id); + if (support_idx.size() > 0) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = vert_id; + } + } + } + + while (!queue.isEmpty()) { + LO currentVertex = queue.front(); + queue.pop_front(); + LO start = v2v_ptr[currentVertex]; + LO end = v2v_ptr[currentVertex + 1]; + + for (LO i = start; i < end; ++i) { + auto neighborIndex = v2v_data[i]; + + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function + + if (visited.notVisited(neighborIndex)) { + visited.push_back(neighborIndex); + for (int k = 0; k < dim; ++k) { + support_coords[k] = + sourcePoints_coords[neighborIndex * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + + if (dist <= cutoffDistance) { + count++; + queue.push_back(neighborIndex); + if (support_idx.size() > 0) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = neighborIndex; + } + } + } + } + } // end of while loop + + nSupports[id] = count; + }, + "count the number of supports in each target point"); +} + +struct SupportResults { + Write supports_ptr; + Write supports_idx; +}; + +SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, + Real& cutoffDistance) { + SupportResults support; + + FindSupports search(source_mesh, target_mesh); + + LO nvertices_source = source_mesh.nverts(); + + LO nvertices_target = target_mesh.nverts(); + + Write nSupports(nvertices_target, 0, + "number of supports in each target vertex"); + + search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, + support.supports_idx); + + Kokkos::fence(); + + support.supports_ptr = + Write(nvertices_target + 1, 0, + "number of support source vertices in CSR format"); + + LO total_supports = 0; + + Kokkos::parallel_scan( + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { + update += nSupports[j]; + if (final) { + support.supports_ptr[j + 1] = update; + } + }, + total_supports); + + Kokkos::fence(); + + support.supports_idx = Write( + total_supports, 0, "index of source supports of each target node"); + + search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, + support.supports_idx); + + return support; +} + +#endif diff --git a/interpolator/mls_interpolator/adj_search_dega2.hpp b/interpolator/mls_interpolator/adj_search_dega2.hpp new file mode 100644 index 00000000..3a9c4075 --- /dev/null +++ b/interpolator/mls_interpolator/adj_search_dega2.hpp @@ -0,0 +1,257 @@ +#ifndef ADJ_SEARCH_HPP +#define ADJ_SEARCH_HPP + +#include +#include + +#include "queue_visited.hpp" + +using namespace Omega_h; + +static constexpr int max_dim = 3; + +// TODO change this into span/mdspan +OMEGA_H_INLINE +Real calculateDistance(const Real* p1, const Real* p2, const int dim) { + Real dx, dy, dz; + dx = p1[0] - p2[0]; + dy = p1[1] - p2[1]; + if (dim != 3) { + dz = 0.0; + } else { + dz = p1[2] - p2[2]; + } + + return dx * dx + dy * dy + dz * dz; +} + +class FindSupports { + private: + Mesh& mesh; + + public: + FindSupports(Mesh& mesh_) : mesh(mesh_) {}; + + void adjBasedSearch(Write& supports_ptr, Write& nSupports, + Write& support_idx, Write& radii2, + bool is_build_csr_call); +}; + +void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, + Write& support_idx, Write& radii2, + bool is_build_csr_call) { + // Mesh Info + LO min_num_supports = 20; // TODO: make this an input parameter + const auto& mesh_coords = mesh.coords(); + const auto& nvertices = mesh.nverts(); + const auto& dim = mesh.dim(); + const auto& nfaces = mesh.nfaces(); + + // CSR data structure of adjacent cell information of each vertex in a mesh + const auto& nodes2faces = mesh.ask_up(VERT, FACE); + const auto& n2f_ptr = nodes2faces.a2ab; + const auto& n2f_data = nodes2faces.ab2b; + const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; + + Write cell_centroids( + dim * nfaces, 0, + "stores coordinates of cell centroid of each tri element"); + + parallel_for( + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(mesh_coords, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = dim * id; + cell_centroids[index] = centroid[0]; + cell_centroids[index + 1] = centroid[1]; + }); + // * Got the adj data and cell centroids + + parallel_for( + nvertices, // for each target vertex which is a node for this case + OMEGA_H_LAMBDA(const LO id) { + queue queue; + track visited; + const LO num_verts_in_dim = dim + 1; + Real target_coords[max_dim]; + Real support_coords[max_dim]; + Real cutoffDistance = radii2[id]; // squared radii of the supports + + //? copying the target vertex coordinates + for (LO k = 0; k < dim; ++k) { + target_coords[k] = mesh_coords[id * dim + k]; + } + + LO start_counter; // start of support idx for the current target vertex + + // not being done in the first call + // where the support_idx start for the current target vertex + if (!is_build_csr_call) { + start_counter = supports_ptr[id]; // start support location for the + // current target vertex + } + LO start_ptr = + n2f_ptr[id]; // start loc of the adj cells of the target node + LO end_ptr = + n2f_ptr[id + 1]; // end loc of the adj cells of the target node + + int count = 0; // number of supports for the current target vertex + // Initialize queue by pushing the cells in the neighborhood of the + // given target point + + for (LO i = start_ptr; i < end_ptr; + ++i) { // loop over adj cells to the target vertex + LO cell_id = n2f_data[i]; + visited.push_back(cell_id); // cell added to the visited list + + for (LO k = 0; k < dim; ++k) { // support vertex coordinates are the + // centroid of the cell + support_coords[k] = cell_centroids[cell_id * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + queue.push_back(cell_id); + if (!is_build_csr_call) { // not being done in the first call + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = + cell_id; // add the support cell to the support_idx + } // end of support_idx check + } // end of distance check + } // end of loop over adj cells to the target vertex + + // loops over the queued cells from the neighborhood of the target + // vertex + while (!queue.isEmpty()) { // ? can queue be empty? + LO currentCell = queue.front(); + queue.pop_front(); + LO start = currentCell * + num_verts_in_dim; // start vert id of the current cell + LO end = start + num_verts_in_dim; // end vert id of the current cell + + for (LO i = start; i < end; + ++i) { // loop over the vertices of the current cell + LO current_vert_id = faces2nodes[i]; + LO start_ptr_current_vert = + n2f_ptr[current_vert_id]; // start loc of adj cells to current + // vertex + LO end_ptr_vert_current_vert = + n2f_ptr[current_vert_id + + 1]; // end loc of adj cells to current vertex + for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; + ++j) { // loop over adj cells to the current vertex + auto neighbor_cell_index = n2f_data[j]; // current cell + + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function + + if (visited.notVisited(neighbor_cell_index)) { + visited.push_back(neighbor_cell_index); + for (int k = 0; k < dim; ++k) { + support_coords[k] = + cell_centroids[neighbor_cell_index * dim + k]; + } + + Real dist = + calculateDistance(target_coords, support_coords, dim); + + if (dist <= cutoffDistance) { + count++; + queue.push_back(neighbor_cell_index); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = + neighbor_cell_index; + } // end of support_idx check + } // end of distance check + } // end of not visited check + } // end of loop over adj cells to the current vertex + } // end of loop over nodes + + } // end of while loop + + nSupports[id] = count; + }, // end of lambda + "count the number of supports in each target point"); +} + +struct SupportResults { + Write supports_ptr; + Write supports_idx; + Write radii2; // squared radii of the supports +}; + +SupportResults searchNeighbors(Mesh& mesh, Real& cutoffDistance) { + LO min_support = 20; + SupportResults support; + + FindSupports search(mesh); + + LO nvertices_target = mesh.nverts(); + + Write nSupports(nvertices_target, 0, + "number of supports in each target vertex"); + + printf("Inside searchNeighbors 1\n"); + support.radii2 = Write(nvertices_target, cutoffDistance, + "squared radii of the supports"); + // this call gets the number of supports for each target vertex: nSupports and + // the squared radii of the supports (with might be increased in the search to + // have enough supports) + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + support.radii2, true); + + printf("Inside searchNeighbors 2\n"); + Kokkos::fence(); + + // * update radius if nSupport is less that min_support + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + if (nSupports[i] < min_support) { + support.radii2[i] *= float(min_support) / nSupports[i]; + nSupports[i] = 0; // ? might not be needed + } + }); + + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + support.radii2, true); + + // offset array for the supports of each target vertex + support.supports_ptr = + Write(nvertices_target + 1, 0, + "number of support source vertices in CSR format"); + + LO total_supports = 0; + + // get the total number of supports and fill the offset array + Kokkos::parallel_scan( + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { // what final does? + // OMEGA_H_CHECK(nSupports[j] >= 15); + update += nSupports[j]; + if (final) { + support.supports_ptr[j + 1] = update; + } + }, + total_supports); + + printf("Inside searchNeighbors 3\n"); + Kokkos::fence(); + + support.supports_idx = Write( + total_supports, 0, "index of source supports of each target node"); + + // second call to get the actual support indices + // now sizes of support.supports_ptr and support.supports_idx are known and > + // 0 + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + support.radii2, false); + + return support; +} + +#endif diff --git a/interpolator/mls_interpolator/cmake/interpolatorConfig.cmake.in b/interpolator/mls_interpolator/cmake/interpolatorConfig.cmake.in new file mode 100644 index 00000000..c74108bf --- /dev/null +++ b/interpolator/mls_interpolator/cmake/interpolatorConfig.cmake.in @@ -0,0 +1,6 @@ +@PACKAGE_INIT@ + +include("${CMAKE_CURRENT_LIST_DIR}/interpolatorTargets.cmake") + +# Optional: Make the package more discoverable by exposing the include directories +set(INTERPOLATOR_INCLUDE_DIRS "@CMAKE_INSTALL_PREFIX@/include/interpolator") diff --git a/interpolator/mls_interpolator/points.hpp b/interpolator/mls_interpolator/points.hpp new file mode 100644 index 00000000..b54e27f0 --- /dev/null +++ b/interpolator/mls_interpolator/points.hpp @@ -0,0 +1,32 @@ +#ifndef POINTS_HPP +#define POINTS_HPP + +#include +#include + +struct Coord { + double x, y; +}; + +using range_policy = typename Kokkos::RangePolicy<>; +using team_policy = typename Kokkos::TeamPolicy<>; +using member_type = typename Kokkos::TeamPolicy<>::member_type; + +// alias for scratch view +using ScratchSpace = + typename Kokkos::DefaultExecutionSpace::scratch_memory_space; + +using ScratchMatView = + typename Kokkos::View>; +using ScratchVecView = + typename Kokkos::View>; + +using PointsViewType = Kokkos::View; + +struct Points { + PointsViewType coordinates; +}; + +#endif diff --git a/interpolator/mls_interpolator/queue_visited.hpp b/interpolator/mls_interpolator/queue_visited.hpp new file mode 100644 index 00000000..82989def --- /dev/null +++ b/interpolator/mls_interpolator/queue_visited.hpp @@ -0,0 +1,106 @@ +#ifndef QUEUE_VISITED_HPP +#define QUEUE_VISITED_HPP + +#include +#include +#include +#include +#include +#include +#define MAX_SIZE 500 +using namespace std; +using namespace Omega_h; + +class queue { + private: + Real queue_array[MAX_SIZE]; + int first = 0, last = -1, count = 0; + + public: + OMEGA_H_INLINE + queue() {} + + OMEGA_H_INLINE + ~queue() {} + + OMEGA_H_INLINE + void push_back(const int& item); + + OMEGA_H_INLINE + void pop_front(); + + OMEGA_H_INLINE + int front(); + + OMEGA_H_INLINE + bool isEmpty() const; + + OMEGA_H_INLINE + bool isFull() const; +}; + +class track { + private: + Real tracking_array[MAX_SIZE]; + int first = 0, last = -1, count = 0; + + public: + OMEGA_H_INLINE + track() {} + + OMEGA_H_INLINE + ~track() {} + + OMEGA_H_INLINE + void push_back(const int& item); + + OMEGA_H_INLINE + int size(); + + OMEGA_H_INLINE + bool notVisited(const int& item); +}; + +OMEGA_H_INLINE +void queue::push_back(const int& item) { + last = (last + 1) % MAX_SIZE; + queue_array[last] = item; + count++; +} + +OMEGA_H_INLINE +void queue::pop_front() { + first = (first + 1) % MAX_SIZE; + count--; +} + +OMEGA_H_INLINE +int queue::front() { return queue_array[first]; } + +OMEGA_H_INLINE +bool queue::isEmpty() const { return count == 0; } + +OMEGA_H_INLINE +bool queue::isFull() const { return count == MAX_SIZE; } + +OMEGA_H_INLINE +void track::push_back(const int& item) { + last = (last + 1) % MAX_SIZE; + tracking_array[last] = item; + count++; +} + +OMEGA_H_INLINE +bool track::notVisited(const int& item) { + for (int i = 0; i < count; ++i) { + if (tracking_array[i] == item) { + return false; + } + } + return true; +} + +OMEGA_H_INLINE +int track::size() { return count; } + +#endif From 12a26bbff96126878196b9d1d9a9ce86bcc9c632 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sun, 22 Sep 2024 13:24:20 -0400 Subject: [PATCH 03/66] added interpolator subdirectory --- src/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 87dfd651..9880e908 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -117,6 +117,7 @@ if(PCMS_ENABLE_Fortran) target_link_libraries(pcms_pcms INTERFACE pcms::fortranapi) endif() +add_subdirectory(interpolator) install( TARGETS pcms_pcms EXPORT pcms-targets From a3b97f3b881cc0944e734dbcfda2d91497f24efc Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sun, 22 Sep 2024 13:25:29 -0400 Subject: [PATCH 04/66] added the interpolator folder with header files for MLS and linear methods --- src/interpolator/CMakeLists.txt | 38 ++++ src/interpolator/MLSCoefficients.hpp | 212 +++++++++++++++++++ src/interpolator/MLSInterpolation.hpp | 187 +++++++++++++++++ src/interpolator/adj_search.hpp | 218 ++++++++++++++++++++ src/interpolator/adj_search_dega2.hpp | 257 ++++++++++++++++++++++++ src/interpolator/linear_interpolant.hpp | 184 +++++++++++++++++ src/interpolator/multidimarray.hpp | 36 ++++ src/interpolator/points.hpp | 32 +++ src/interpolator/queue_visited.hpp | 106 ++++++++++ 9 files changed, 1270 insertions(+) create mode 100644 src/interpolator/CMakeLists.txt create mode 100644 src/interpolator/MLSCoefficients.hpp create mode 100644 src/interpolator/MLSInterpolation.hpp create mode 100644 src/interpolator/adj_search.hpp create mode 100644 src/interpolator/adj_search_dega2.hpp create mode 100644 src/interpolator/linear_interpolant.hpp create mode 100644 src/interpolator/multidimarray.hpp create mode 100644 src/interpolator/points.hpp create mode 100644 src/interpolator/queue_visited.hpp diff --git a/src/interpolator/CMakeLists.txt b/src/interpolator/CMakeLists.txt new file mode 100644 index 00000000..ee898661 --- /dev/null +++ b/src/interpolator/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.20) + +project(Interpolator) + +set(HEADER_FILES + adj_search_dega2.hpp + MLSInterpolation.hpp + points.hpp + adj_search.hpp + MLSCoefficients.hpp + queue_visited.hpp + linear_interpolant.hpp + multidimarray.hpp +) + +install(FILES ${HEADER_FILES} DESTINATION include/pcms/interpolator) + + +add_library(interpolator INTERFACE) + +target_include_directories(interpolator INTERFACE + $ + $ + $) + +target_link_libraries(interpolator INTERFACE pcms::core) + +install(TARGETS interpolator + EXPORT interpolatorTargets + INCLUDES DESTINATION include/pcms/interpolator) + + + +install(EXPORT interpolatorTargets + FILE interpolatorTargets.cmake + NAMESPACE pcms:: + DESTINATION lib/cmake/pcms) + diff --git a/src/interpolator/MLSCoefficients.hpp b/src/interpolator/MLSCoefficients.hpp new file mode 100644 index 00000000..620c264e --- /dev/null +++ b/src/interpolator/MLSCoefficients.hpp @@ -0,0 +1,212 @@ +#ifndef MLS_COEFFICIENTS_HPP +#define MLS_COEFFICIENTS_HPP + +#include + +#include "points.hpp" + +#define PI_M 3.14159265358979323846 + +KOKKOS_INLINE_FUNCTION +double func(Coord& p) { + auto x = (p.x - 0.5) * PI_M * 2; + auto y = (p.y - 0.5) * PI_M * 2; + double Z = sin(x) * sin(y); + return Z; +} + +// polynomial basis vector +KOKKOS_INLINE_FUNCTION +void BasisPoly(ScratchVecView basis_monomial, Coord& p1) { + basis_monomial(0) = 1.0; + basis_monomial(1) = p1.x; + basis_monomial(2) = p1.y; + basis_monomial(3) = p1.x * p1.x; + basis_monomial(4) = p1.x * p1.y; + basis_monomial(5) = p1.y * p1.y; +} + +// radial basis function +KOKKOS_INLINE_FUNCTION +double rbf(double r_sq, double rho_sq) { + double phi; + double r = sqrt(r_sq); + double rho = sqrt(rho_sq); + double ratio = r / rho; + double limit = 1 - ratio; + if (limit < 0) { + phi = 0; + + } else { + phi = 5 * pow(ratio, 5) + 30 * pow(ratio, 4) + 72 * pow(ratio, 3) + + 82 * pow(ratio, 2) + 36 * ratio + 6; + phi = phi * pow(limit, 6); + } + + return phi; +} + +// create vandermondeMatrix +KOKKOS_INLINE_FUNCTION +void VandermondeMatrix(ScratchMatView V, ScratchMatView local_source_points, + int j) { + int N = local_source_points.extent(0); + Coord source_point; + source_point.x = local_source_points(j, 0); + source_point.y = local_source_points(j, 1); + ScratchVecView basis_monomial = Kokkos::subview(V, j, Kokkos::ALL()); + BasisPoly(basis_monomial, source_point); +} + +// moment matrix +KOKKOS_INLINE_FUNCTION +void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, + int j) { + int M = V.extent(0); + int N = V.extent(1); + + ScratchVecView vandermonde_mat_row = Kokkos::subview(V, j, Kokkos::ALL()); + for (int k = 0; k < N; k++) { + pt_phi(k, j) = vandermonde_mat_row(k) * Phi(j); + } +} + +// radial basis function vector +KOKKOS_INLINE_FUNCTION +void PhiVector(ScratchVecView Phi, Coord target_point, + ScratchMatView local_source_points, int j, + double cuttoff_dis_sq) { + int N = local_source_points.extent(0); + double dx = target_point.x - local_source_points(j, 0); + double dy = target_point.y - local_source_points(j, 1); + double ds_sq = dx * dx + dy * dy; + Phi(j) = rbf(ds_sq, cuttoff_dis_sq); +} + +// matrix matrix multiplication +KOKKOS_INLINE_FUNCTION +void MatMatMul(member_type team, ScratchMatView moment_matrix, + ScratchMatView pt_phi, ScratchMatView vandermonde) { + int M = pt_phi.extent(0); + int N = vandermonde.extent(1); + int K = pt_phi.extent(1); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { + double sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, K), + [=](const int k, double& lsum) { + lsum += pt_phi(i, k) * vandermonde(k, j); + }, + sum); + moment_matrix(i, j) = sum; + }); + }); +} + +// Matrix vector multiplication +KOKKOS_INLINE_FUNCTION +void MatVecMul(member_type team, ScratchVecView vector, ScratchMatView matrix, + ScratchVecView result) { + int M = matrix.extent(0); + int N = matrix.extent(1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { + double sum = 0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, M), + [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, + sum); + result(i) = sum; + }); + // team.team_barrier(); +} + +// dot product +KOKKOS_INLINE_FUNCTION +void dot_product(member_type team, ScratchVecView result_sub, + ScratchVecView exactSupportValues_sub, double& target_value) { + int N = result_sub.extent(0); + for (int j = 0; j < N; ++j) { + target_value += result_sub(j) * exactSupportValues_sub[j]; + } +} + +// moment matrix +KOKKOS_INLINE_FUNCTION +void PtphiPMatrix(ScratchMatView moment_matrix, member_type team, + ScratchMatView pt_phi, ScratchMatView vandermonde) { + int M = pt_phi.extent(0); + int N = vandermonde.extent(1); + int K = pt_phi.extent(1); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { + double sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, K), + [=](const int k, double& lsum) { + lsum += pt_phi(i, k) * vandermonde(k, j); + }, + sum); + moment_matrix(i, j) = sum; + }); + }); +} + +// inverse matrix +KOKKOS_INLINE_FUNCTION +void inverse_matrix(member_type team, ScratchMatView matrix, + ScratchMatView lower, ScratchMatView forward_matrix, + ScratchMatView solution) { + int N = matrix.extent(0); + + for (int j = 0; j < N; ++j) { + Kokkos::single(Kokkos::PerTeam(team), [=]() { + double sum = 0; + for (int k = 0; k < j; ++k) { + sum += lower(j, k) * lower(j, k); + } + lower(j, j) = sqrt(matrix(j, j) - sum); + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, j + 1, N), [=](int i) { + double inner_sum = 0; + for (int k = 0; k < j; ++k) { + inner_sum += lower(i, k) * lower(j, k); + } + lower(i, j) = (matrix(i, j) - inner_sum) / lower(j, j); + lower(j, i) = lower(i, j); + }); + + team.team_barrier(); + } + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { + forward_matrix(i, i) = 1.0 / lower(i, i); + for (int j = i + 1; j < N; ++j) { + forward_matrix(j, i) = 0.0; // Initialize to zero + for (int k = 0; k < j; ++k) { + forward_matrix(j, i) -= lower(j, k) * forward_matrix(k, i); + } + forward_matrix(j, i) /= lower(j, j); + } + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { + solution(N - 1, i) = forward_matrix(N - 1, i) / lower(N - 1, N - 1); + for (int j = N - 2; j >= 0; --j) { + solution(j, i) = forward_matrix(j, i); + for (int k = j + 1; k < N; ++k) { + solution(j, i) -= lower(j, k) * solution(k, i); + } + solution(j, i) /= lower(j, j); + } + }); +} + +#endif diff --git a/src/interpolator/MLSInterpolation.hpp b/src/interpolator/MLSInterpolation.hpp new file mode 100644 index 00000000..cb6a0eb5 --- /dev/null +++ b/src/interpolator/MLSInterpolation.hpp @@ -0,0 +1,187 @@ +#ifndef MLS_INTERPOLATION_HPP +#define MLS_INTERPOLATION_HPP + +#include "MLSCoefficients.hpp" +#include "adj_search_dega2.hpp" +#include "adj_search.hpp" +#include "points.hpp" + +using namespace Omega_h; +using namespace pcms; + +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + Write radii2) { + const auto nvertices_source = source_coordinates.size() / dim; + const auto nvertices_target = target_coordinates.size() / dim; + + // Kokkos::RangePolicy range_policy(1, + // nvertices_target); + + Kokkos::View shmem_each_team( + "stores the size required for each team", nvertices_target); + // Write shmem_each_team(nvertices_target, 0, "stores the size required + // for each team member"); + + Kokkos::parallel_for( + "calculate the size required for scratch for each team", nvertices_target, + KOKKOS_LAMBDA(const int i) { + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + int nsupports = end_ptr - start_ptr; + + size_t total_shared_size = 0; + + total_shared_size += ScratchMatView::shmem_size(6, 6) * 4; + total_shared_size += ScratchMatView::shmem_size(6, nsupports) * 2; + total_shared_size += ScratchMatView::shmem_size(nsupports, 6); + total_shared_size += ScratchVecView::shmem_size(6); + total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; + total_shared_size += ScratchMatView::shmem_size(nsupports, 2); + shmem_each_team(i) = total_shared_size; + }); + + size_t shared_size; + Kokkos::parallel_reduce( + "find_max", nvertices_target, + KOKKOS_LAMBDA(const int i, size_t& max_val_temp) { + if (shmem_each_team(i) > max_val_temp) { + max_val_temp = shmem_each_team(i); + } + }, + Kokkos::Max(shared_size)); + printf("shared size = %d \n", shared_size); + + Write approx_target_values(nvertices_target, 0, + "approximated target values"); + + team_policy tp(nvertices_target, Kokkos::AUTO); + + int scratch_size = tp.scratch_size_max(0); + printf("scratch size is %d \n", scratch_size); + + Kokkos::parallel_for( + "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + + int nsupports = end_ptr - start_ptr; + + ScratchMatView local_source_points(team.team_scratch(0), nsupports, 2); + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + } + + ScratchMatView lower(team.team_scratch(0), 6, 6); + + ScratchMatView forward_matrix(team.team_scratch(0), 6, 6); + + ScratchMatView moment_matrix(team.team_scratch(0), 6, 6); + + ScratchMatView inv_mat(team.team_scratch(0), 6, 6); + + ScratchMatView V(team.team_scratch(0), nsupports, 6); + + ScratchMatView Ptphi(team.team_scratch(0), 6, nsupports); + + ScratchMatView resultant_matrix(team.team_scratch(0), 6, nsupports); + + ScratchVecView targetMonomialVec(team.team_scratch(0), 6); + + ScratchVecView SupportValues(team.team_scratch(0), nsupports); + + ScratchVecView result(team.team_scratch(0), nsupports); + + ScratchVecView Phi(team.team_scratch(0), nsupports); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 6), [=](int j) { + for (int k = 0; k < 6; ++k) { + lower(j, k) = 0; + forward_matrix(j, k) = 0; + moment_matrix(j, k) = 0; + inv_mat(j, k) = 0; + } + + targetMonomialVec(j) = 0; + for (int k = 0; k < nsupports; ++k) { + resultant_matrix(j, k) = 0; + + Ptphi(j, k) = 0; + } + }); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + for (int k = 0; k < 6; ++k) { + V(j, k) = 0; + } + + SupportValues(j) = 0; + result(j) = 0; + Phi(j) = 0; + }); + + Coord target_point; + + target_point.x = target_coordinates[i * dim]; + + target_point.y = target_coordinates[i * dim + 1]; + + BasisPoly(targetMonomialVec, target_point); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { VandermondeMatrix(V, local_source_points, j); }); + + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + PhiVector(Phi, target_point, local_source_points, j, radii2[i]); + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { PTphiMatrix(Ptphi, V, Phi, j); }); + + team.team_barrier(); + + MatMatMul(team, moment_matrix, Ptphi, V); + team.team_barrier(); + + inverse_matrix(team, moment_matrix, lower, forward_matrix, inv_mat); + + team.team_barrier(); + + MatMatMul(team, resultant_matrix, inv_mat, Ptphi); + team.team_barrier(); + + MatVecMul(team, targetMonomialVec, resultant_matrix, result); + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { + SupportValues(i) = + source_values[support.supports_idx[start_ptr + i]]; + }); + + double tgt_value = 0; + dot_product(team, result, SupportValues, tgt_value); + if (team.team_rank() == 0) { + approx_target_values[i] = tgt_value; + } + }); + + return approx_target_values; +} + +#endif diff --git a/src/interpolator/adj_search.hpp b/src/interpolator/adj_search.hpp new file mode 100644 index 00000000..fd5fe60c --- /dev/null +++ b/src/interpolator/adj_search.hpp @@ -0,0 +1,218 @@ +#ifndef ADJ_SEARCH_HPP +#define ADJ_SEARCH_HPP + +#include + +#include "queue_visited.hpp" + +using namespace Omega_h; + +static constexpr int max_dim = 3; + +// TODO change this into span/mdspan +OMEGA_H_INLINE +Real calculateDistance(const Real* p1, const Real* p2, const int dim) { + Real dx, dy, dz; + dx = p1[0] - p2[0]; + dy = p1[1] - p2[1]; + if (dim != 3) { + dz = 0.0; + } else { + dz = p1[2] - p2[2]; + } + + return dx * dx + dy * dy + dz * dz; +} + +class FindSupports { + private: + Mesh& source_mesh; + Mesh& target_mesh; + + public: + FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) + : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; + + void adjBasedSearch(const Real& cutoffDistance, const Write& supports_ptr, + Write& nSupports, Write& support_idx); +}; + +void FindSupports::adjBasedSearch(const Real& cutoffDistance, + const Write& supports_ptr, + Write& nSupports, + Write& support_idx) { + // Source Mesh Info + + const auto& sourcePoints_coords = source_mesh.coords(); + const auto nvertices_source = source_mesh.nverts(); + const auto dim = source_mesh.dim(); + + // Target Mesh Info + const auto& targetPoints_coords = target_mesh.coords(); + const auto nvertices_target = target_mesh.nverts(); + + // CSR data structure of adjacent vertex information of each source vertex + const auto& vert2vert = source_mesh.ask_star(VERT); + const auto& v2v_ptr = vert2vert.a2ab; + const auto& v2v_data = vert2vert.ab2b; + + // CSR data structure of vertex information of each cell in source mesh + + // CSR data structure of adjacent vertex information of each source vertex + // dim == 2; ask vertices of tri (3 vertices for each tri) & if dim ==3; ask + // vertices of tetrahedron (4 vertices for each tet) + const auto& cells2verts = source_mesh.ask_verts_of(dim); + + Kokkos::View target_points("test_points", nvertices_target); + + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + target_points(i, 0) = targetPoints_coords[i * dim]; + target_points(i, 1) = targetPoints_coords[i * dim + 1]; + }); + Kokkos::fence(); + pcms::GridPointSearch search_cell(source_mesh, 10, 10); + + // get the cell id for each target point + auto results = search_cell(target_points); + + parallel_for( + nvertices_target, + OMEGA_H_LAMBDA(const LO id) { + queue queue; + track visited; + + LO source_cell_id = results(id).tri_id; + + const LO num_verts_in_dim = dim + 1; + + LO start_ptr = source_cell_id * num_verts_in_dim; + + LO end_ptr = start_ptr + num_verts_in_dim; + + Real target_coords[max_dim]; + + Real support_coords[max_dim]; + + for (LO k = 0; k < dim; ++k) { + target_coords[k] = target_points(id, k); + } + + LO start_counter; + + if (support_idx.size() > 0) { + start_counter = supports_ptr[id]; + } + + int count = 0; + // Initialize queue by pushing the vertices in the neighborhood of the + // given target point + + for (LO i = start_ptr; i < end_ptr; ++i) { + LO vert_id = cells2verts[i]; + visited.push_back(vert_id); + + for (LO k = 0; k < dim; ++k) { + support_coords[k] = sourcePoints_coords[vert_id * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + queue.push_back(vert_id); + if (support_idx.size() > 0) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = vert_id; + } + } + } + + while (!queue.isEmpty()) { + LO currentVertex = queue.front(); + queue.pop_front(); + LO start = v2v_ptr[currentVertex]; + LO end = v2v_ptr[currentVertex + 1]; + + for (LO i = start; i < end; ++i) { + auto neighborIndex = v2v_data[i]; + + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function + + if (visited.notVisited(neighborIndex)) { + visited.push_back(neighborIndex); + for (int k = 0; k < dim; ++k) { + support_coords[k] = + sourcePoints_coords[neighborIndex * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + + if (dist <= cutoffDistance) { + count++; + queue.push_back(neighborIndex); + if (support_idx.size() > 0) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = neighborIndex; + } + } + } + } + } // end of while loop + + nSupports[id] = count; + }, + "count the number of supports in each target point"); +} + +struct SupportResults { + Write supports_ptr; + Write supports_idx; +}; + +SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, + Real& cutoffDistance) { + SupportResults support; + + FindSupports search(source_mesh, target_mesh); + + LO nvertices_source = source_mesh.nverts(); + + LO nvertices_target = target_mesh.nverts(); + + Write nSupports(nvertices_target, 0, + "number of supports in each target vertex"); + + search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, + support.supports_idx); + + Kokkos::fence(); + + support.supports_ptr = + Write(nvertices_target + 1, 0, + "number of support source vertices in CSR format"); + + LO total_supports = 0; + + Kokkos::parallel_scan( + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { + update += nSupports[j]; + if (final) { + support.supports_ptr[j + 1] = update; + } + }, + total_supports); + + Kokkos::fence(); + + support.supports_idx = Write( + total_supports, 0, "index of source supports of each target node"); + + search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, + support.supports_idx); + + return support; +} + +#endif diff --git a/src/interpolator/adj_search_dega2.hpp b/src/interpolator/adj_search_dega2.hpp new file mode 100644 index 00000000..3a9c4075 --- /dev/null +++ b/src/interpolator/adj_search_dega2.hpp @@ -0,0 +1,257 @@ +#ifndef ADJ_SEARCH_HPP +#define ADJ_SEARCH_HPP + +#include +#include + +#include "queue_visited.hpp" + +using namespace Omega_h; + +static constexpr int max_dim = 3; + +// TODO change this into span/mdspan +OMEGA_H_INLINE +Real calculateDistance(const Real* p1, const Real* p2, const int dim) { + Real dx, dy, dz; + dx = p1[0] - p2[0]; + dy = p1[1] - p2[1]; + if (dim != 3) { + dz = 0.0; + } else { + dz = p1[2] - p2[2]; + } + + return dx * dx + dy * dy + dz * dz; +} + +class FindSupports { + private: + Mesh& mesh; + + public: + FindSupports(Mesh& mesh_) : mesh(mesh_) {}; + + void adjBasedSearch(Write& supports_ptr, Write& nSupports, + Write& support_idx, Write& radii2, + bool is_build_csr_call); +}; + +void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, + Write& support_idx, Write& radii2, + bool is_build_csr_call) { + // Mesh Info + LO min_num_supports = 20; // TODO: make this an input parameter + const auto& mesh_coords = mesh.coords(); + const auto& nvertices = mesh.nverts(); + const auto& dim = mesh.dim(); + const auto& nfaces = mesh.nfaces(); + + // CSR data structure of adjacent cell information of each vertex in a mesh + const auto& nodes2faces = mesh.ask_up(VERT, FACE); + const auto& n2f_ptr = nodes2faces.a2ab; + const auto& n2f_data = nodes2faces.ab2b; + const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; + + Write cell_centroids( + dim * nfaces, 0, + "stores coordinates of cell centroid of each tri element"); + + parallel_for( + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(mesh_coords, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = dim * id; + cell_centroids[index] = centroid[0]; + cell_centroids[index + 1] = centroid[1]; + }); + // * Got the adj data and cell centroids + + parallel_for( + nvertices, // for each target vertex which is a node for this case + OMEGA_H_LAMBDA(const LO id) { + queue queue; + track visited; + const LO num_verts_in_dim = dim + 1; + Real target_coords[max_dim]; + Real support_coords[max_dim]; + Real cutoffDistance = radii2[id]; // squared radii of the supports + + //? copying the target vertex coordinates + for (LO k = 0; k < dim; ++k) { + target_coords[k] = mesh_coords[id * dim + k]; + } + + LO start_counter; // start of support idx for the current target vertex + + // not being done in the first call + // where the support_idx start for the current target vertex + if (!is_build_csr_call) { + start_counter = supports_ptr[id]; // start support location for the + // current target vertex + } + LO start_ptr = + n2f_ptr[id]; // start loc of the adj cells of the target node + LO end_ptr = + n2f_ptr[id + 1]; // end loc of the adj cells of the target node + + int count = 0; // number of supports for the current target vertex + // Initialize queue by pushing the cells in the neighborhood of the + // given target point + + for (LO i = start_ptr; i < end_ptr; + ++i) { // loop over adj cells to the target vertex + LO cell_id = n2f_data[i]; + visited.push_back(cell_id); // cell added to the visited list + + for (LO k = 0; k < dim; ++k) { // support vertex coordinates are the + // centroid of the cell + support_coords[k] = cell_centroids[cell_id * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + queue.push_back(cell_id); + if (!is_build_csr_call) { // not being done in the first call + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = + cell_id; // add the support cell to the support_idx + } // end of support_idx check + } // end of distance check + } // end of loop over adj cells to the target vertex + + // loops over the queued cells from the neighborhood of the target + // vertex + while (!queue.isEmpty()) { // ? can queue be empty? + LO currentCell = queue.front(); + queue.pop_front(); + LO start = currentCell * + num_verts_in_dim; // start vert id of the current cell + LO end = start + num_verts_in_dim; // end vert id of the current cell + + for (LO i = start; i < end; + ++i) { // loop over the vertices of the current cell + LO current_vert_id = faces2nodes[i]; + LO start_ptr_current_vert = + n2f_ptr[current_vert_id]; // start loc of adj cells to current + // vertex + LO end_ptr_vert_current_vert = + n2f_ptr[current_vert_id + + 1]; // end loc of adj cells to current vertex + for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; + ++j) { // loop over adj cells to the current vertex + auto neighbor_cell_index = n2f_data[j]; // current cell + + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function + + if (visited.notVisited(neighbor_cell_index)) { + visited.push_back(neighbor_cell_index); + for (int k = 0; k < dim; ++k) { + support_coords[k] = + cell_centroids[neighbor_cell_index * dim + k]; + } + + Real dist = + calculateDistance(target_coords, support_coords, dim); + + if (dist <= cutoffDistance) { + count++; + queue.push_back(neighbor_cell_index); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = + neighbor_cell_index; + } // end of support_idx check + } // end of distance check + } // end of not visited check + } // end of loop over adj cells to the current vertex + } // end of loop over nodes + + } // end of while loop + + nSupports[id] = count; + }, // end of lambda + "count the number of supports in each target point"); +} + +struct SupportResults { + Write supports_ptr; + Write supports_idx; + Write radii2; // squared radii of the supports +}; + +SupportResults searchNeighbors(Mesh& mesh, Real& cutoffDistance) { + LO min_support = 20; + SupportResults support; + + FindSupports search(mesh); + + LO nvertices_target = mesh.nverts(); + + Write nSupports(nvertices_target, 0, + "number of supports in each target vertex"); + + printf("Inside searchNeighbors 1\n"); + support.radii2 = Write(nvertices_target, cutoffDistance, + "squared radii of the supports"); + // this call gets the number of supports for each target vertex: nSupports and + // the squared radii of the supports (with might be increased in the search to + // have enough supports) + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + support.radii2, true); + + printf("Inside searchNeighbors 2\n"); + Kokkos::fence(); + + // * update radius if nSupport is less that min_support + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + if (nSupports[i] < min_support) { + support.radii2[i] *= float(min_support) / nSupports[i]; + nSupports[i] = 0; // ? might not be needed + } + }); + + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + support.radii2, true); + + // offset array for the supports of each target vertex + support.supports_ptr = + Write(nvertices_target + 1, 0, + "number of support source vertices in CSR format"); + + LO total_supports = 0; + + // get the total number of supports and fill the offset array + Kokkos::parallel_scan( + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { // what final does? + // OMEGA_H_CHECK(nSupports[j] >= 15); + update += nSupports[j]; + if (final) { + support.supports_ptr[j + 1] = update; + } + }, + total_supports); + + printf("Inside searchNeighbors 3\n"); + Kokkos::fence(); + + support.supports_idx = Write( + total_supports, 0, "index of source supports of each target node"); + + // second call to get the actual support indices + // now sizes of support.supports_ptr and support.supports_idx are known and > + // 0 + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + support.radii2, false); + + return support; +} + +#endif diff --git a/src/interpolator/linear_interpolant.hpp b/src/interpolator/linear_interpolant.hpp new file mode 100644 index 00000000..7aeb02ef --- /dev/null +++ b/src/interpolator/linear_interpolant.hpp @@ -0,0 +1,184 @@ + +#ifndef INTERPOLANT_HPP +#define INTERPOLANT_HPP + + +#include +#include +#include "multidimarray.hpp" +#define MAX_DIM 10 + + +KOKKOS_INLINE_FUNCTION +void find_indices(const IntVecView& num_bins, const RealVecView& range, + const RealVecView& point , int* indices){ + int dim = point.extent(0); + // IntVecView indices("parametric coordinate", dim); + for (int i = 0; i < dim; ++i){ + int id = i * 2; + double length = range(id + 1) - range(id); + double dlen = length/num_bins(i); + indices[i] = ((point(i) - range(id)) / dlen); + + } + + +} + + +KOKKOS_INLINE_FUNCTION +void find_limits(const IntVecView& num_bins, const RealVecView& range, const int* indices, double* limits){ + int dim = num_bins.extent(0); + // RealVecView limits("limits", 2*dim); + for (int i = 0; i < dim; ++i){ + int ptr = i * 2; + double dlen = (range(ptr+1) - range(ptr))/num_bins(i); + limits[ptr] = indices[i] * dlen; + limits[ptr+1] = limits[ptr] + dlen; + } +} + + + +KOKKOS_INLINE_FUNCTION +void evaluateParametricCoord(const RealVecView& point, const double* limits , double* parametric_coord){ + int dim = point.extent(0); + // RealVecView parametric_coord("parametric coordinate", dim); + for (int i = 0; i < dim; ++i){ + int index = i * 2; + parametric_coord[i] = (point(i) - limits[index])/(limits[index+1] - limits[index]); + + } +} + + + +KOKKOS_INLINE_FUNCTION +void basis_function(const RealVecView& parametric_coord, double* linear_basis_each_dir){ + int dim = parametric_coord.extent(0); + // RealVecView linear_basis_each_dir("basis function each direction", 2*dim); + for (int i = 0; i < dim; ++i){ + int index = i * 2; + + linear_basis_each_dir[index] = 1 - parametric_coord(i); + + linear_basis_each_dir[index + 1] = parametric_coord(i); + + + } + +} + + +KOKKOS_INLINE_FUNCTION +double linear_interpolant(const IntVecView& dimensions, const double* linear_basis_each_dir, const IntVecView& indices, const RealVecView& values){ + int dim = dimensions.extent(0); + double sum = 0; + int ids[MAX_DIM]; + + int array_size = 1 << dim; + for (int i = 0; i < array_size; ++i){ + double temp = 1.0; + for (int j = 0; j < dim; ++j){ + int index = 2*j; + int id_left = indices(j); + int id_right = id_left + 1; + if (i & (1 << j)){ + temp *= linear_basis_each_dir[index+1]; + ids[j] = id_right; + } else { + temp *= linear_basis_each_dir[index]; + ids[j] = id_left; + } + } + + int idx = calculateIndex(dimensions, ids); + double corner_values = values(idx); + + sum += temp*corner_values; + } + return sum; +} + + + +class RegularGridInterpolator { + private: + const RealMatView parametric_coords; + const RealVecView values; + const IntMatView indices; + const IntVecView dimensions; + + public: + RegularGridInterpolator(const RealMatView& parametric_coords_, + const RealVecView& values_, const IntMatView& indices_, const IntVecView& dimensions_) + : parametric_coords(parametric_coords_), values(values_), indices(indices_), dimensions(dimensions_) {}; + +RealVecView linear_interpolation() { + int dim = dimensions.extent(0); + int N = parametric_coords.extent(0); + RealVecView interpolated_values("approximated values", N); + auto parametric_coords_ = parametric_coords; + auto dimensions_ = dimensions; + auto values_ = values; + auto indices_ = indices; + Kokkos::parallel_for("linear interpolation function",N, KOKKOS_LAMBDA(int j){ + double linear_basis_each_dir[MAX_DIM] = {0.0}; + auto parametric_coord_each_point = Kokkos::subview(parametric_coords_, j, Kokkos::ALL()); + auto index_each_point = Kokkos::subview(indices_, j , Kokkos::ALL()); + basis_function(parametric_coord_each_point, linear_basis_each_dir); + auto approx_value = linear_interpolant(dimensions_, linear_basis_each_dir,index_each_point, values_); + interpolated_values(j) = approx_value; + }); + + return interpolated_values; + } +}; + + +struct Result{ + IntMatView indices_pts; + RealMatView parametric_coords; +}; + +Result parametric_indices(const RealMatView& points, const IntVecView& num_bins, const RealVecView& range){ + + int dim = num_bins.extent(0); + Result result; + int N = points.extent(0); + result.indices_pts = IntMatView("indices",N,dim); + result.parametric_coords = RealMatView("parametric_coordinates",N, dim); + + Kokkos::parallel_for(N, KOKKOS_LAMBDA(int j){ + int indcs[MAX_DIM] = {0}; + double limits[MAX_DIM] = {0.0}; + double parametric_coord_each[MAX_DIM] = {0.0}; + auto point_coord = Kokkos::subview(points, j , Kokkos::ALL()); + find_indices(num_bins, range, point_coord, indcs); + find_limits(num_bins, range, indcs, limits); + evaluateParametricCoord( point_coord, limits, parametric_coord_each); + for (int i = 0; i < dim; ++i){ + result.indices_pts(j,i) = indcs[i]; + result.parametric_coords(j,i) = parametric_coord_each[i]; + } + + }); + + return result; + +} + +KOKKOS_INLINE_FUNCTION +double test_function(double* coord){ + double fun_value = 0; + int dim = 5; + for (int i = 0; i < dim; ++i){ + fun_value += coord[i]; + } + return fun_value; + } + +#endif + + + diff --git a/src/interpolator/multidimarray.hpp b/src/interpolator/multidimarray.hpp new file mode 100644 index 00000000..04b6fac1 --- /dev/null +++ b/src/interpolator/multidimarray.hpp @@ -0,0 +1,36 @@ +#ifndef MULTIDIMARRAY_HPP +#define MULTIDIMARRAY_HPP + +#include + + +using RealMatView = Kokkos::View; +using IntMatView = Kokkos::View; +using RealVecView = Kokkos::View; +using IntVecView = Kokkos::View; + +using HostIntVecView = Kokkos::View; + +KOKKOS_INLINE_FUNCTION +int calculateIndex(const IntVecView& dimensions, const int* indices) { + + int dim = dimensions.extent(0); + int index = 0; + int multiplier = 1; + for (int i = dim - 1; i >= 0; --i) { + index += indices[i] * multiplier; + multiplier *= dimensions(i); + } + return index; +} + + +int calculateTotalSize(const HostIntVecView& dimensions){ + int dim = dimensions.extent(0); + int size = 1; + for (int i = 0; i < dim; ++i){ + size *= dimensions(i); + } + return size; +} +#endif diff --git a/src/interpolator/points.hpp b/src/interpolator/points.hpp new file mode 100644 index 00000000..b54e27f0 --- /dev/null +++ b/src/interpolator/points.hpp @@ -0,0 +1,32 @@ +#ifndef POINTS_HPP +#define POINTS_HPP + +#include +#include + +struct Coord { + double x, y; +}; + +using range_policy = typename Kokkos::RangePolicy<>; +using team_policy = typename Kokkos::TeamPolicy<>; +using member_type = typename Kokkos::TeamPolicy<>::member_type; + +// alias for scratch view +using ScratchSpace = + typename Kokkos::DefaultExecutionSpace::scratch_memory_space; + +using ScratchMatView = + typename Kokkos::View>; +using ScratchVecView = + typename Kokkos::View>; + +using PointsViewType = Kokkos::View; + +struct Points { + PointsViewType coordinates; +}; + +#endif diff --git a/src/interpolator/queue_visited.hpp b/src/interpolator/queue_visited.hpp new file mode 100644 index 00000000..82989def --- /dev/null +++ b/src/interpolator/queue_visited.hpp @@ -0,0 +1,106 @@ +#ifndef QUEUE_VISITED_HPP +#define QUEUE_VISITED_HPP + +#include +#include +#include +#include +#include +#include +#define MAX_SIZE 500 +using namespace std; +using namespace Omega_h; + +class queue { + private: + Real queue_array[MAX_SIZE]; + int first = 0, last = -1, count = 0; + + public: + OMEGA_H_INLINE + queue() {} + + OMEGA_H_INLINE + ~queue() {} + + OMEGA_H_INLINE + void push_back(const int& item); + + OMEGA_H_INLINE + void pop_front(); + + OMEGA_H_INLINE + int front(); + + OMEGA_H_INLINE + bool isEmpty() const; + + OMEGA_H_INLINE + bool isFull() const; +}; + +class track { + private: + Real tracking_array[MAX_SIZE]; + int first = 0, last = -1, count = 0; + + public: + OMEGA_H_INLINE + track() {} + + OMEGA_H_INLINE + ~track() {} + + OMEGA_H_INLINE + void push_back(const int& item); + + OMEGA_H_INLINE + int size(); + + OMEGA_H_INLINE + bool notVisited(const int& item); +}; + +OMEGA_H_INLINE +void queue::push_back(const int& item) { + last = (last + 1) % MAX_SIZE; + queue_array[last] = item; + count++; +} + +OMEGA_H_INLINE +void queue::pop_front() { + first = (first + 1) % MAX_SIZE; + count--; +} + +OMEGA_H_INLINE +int queue::front() { return queue_array[first]; } + +OMEGA_H_INLINE +bool queue::isEmpty() const { return count == 0; } + +OMEGA_H_INLINE +bool queue::isFull() const { return count == MAX_SIZE; } + +OMEGA_H_INLINE +void track::push_back(const int& item) { + last = (last + 1) % MAX_SIZE; + tracking_array[last] = item; + count++; +} + +OMEGA_H_INLINE +bool track::notVisited(const int& item) { + for (int i = 0; i < count; ++i) { + if (tracking_array[i] == item) { + return false; + } + } + return true; +} + +OMEGA_H_INLINE +int track::size() { return count; } + +#endif From e3ac60bbc9e3f63db605e09cdb50c1ac426fe5e4 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sun, 22 Sep 2024 21:40:35 -0400 Subject: [PATCH 05/66] removed hdf5 header --- src/interpolator/linear_interpolant.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/interpolator/linear_interpolant.hpp b/src/interpolator/linear_interpolant.hpp index 7aeb02ef..5abde69f 100644 --- a/src/interpolator/linear_interpolant.hpp +++ b/src/interpolator/linear_interpolant.hpp @@ -3,7 +3,6 @@ #define INTERPOLANT_HPP -#include #include #include "multidimarray.hpp" #define MAX_DIM 10 From ec486e74d82561b4109c5a2dcb3ffbb791dd7a44 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Sun, 22 Sep 2024 23:33:33 -0400 Subject: [PATCH 06/66] removed unnecessary files --- interpolator/MLSInterpolation.hpp | 187 ------------- .../linear_interpolator/interpolant.hpp | 184 ------------- .../linear_interpolator/multidimarray.hpp | 36 --- interpolator/mls_interpolator/CMakeLists.txt | 52 ---- .../mls_interpolator/MLSCoefficients.hpp | 212 --------------- .../mls_interpolator/MLSInterpolation.hpp | 187 ------------- interpolator/mls_interpolator/adj_search.hpp | 218 --------------- .../mls_interpolator/adj_search_dega2.hpp | 257 ------------------ .../cmake/interpolatorConfig.cmake.in | 6 - interpolator/mls_interpolator/points.hpp | 32 --- .../mls_interpolator/queue_visited.hpp | 106 -------- 11 files changed, 1477 deletions(-) delete mode 100644 interpolator/MLSInterpolation.hpp delete mode 100644 interpolator/linear_interpolator/interpolant.hpp delete mode 100644 interpolator/linear_interpolator/multidimarray.hpp delete mode 100644 interpolator/mls_interpolator/CMakeLists.txt delete mode 100644 interpolator/mls_interpolator/MLSCoefficients.hpp delete mode 100644 interpolator/mls_interpolator/MLSInterpolation.hpp delete mode 100644 interpolator/mls_interpolator/adj_search.hpp delete mode 100644 interpolator/mls_interpolator/adj_search_dega2.hpp delete mode 100644 interpolator/mls_interpolator/cmake/interpolatorConfig.cmake.in delete mode 100644 interpolator/mls_interpolator/points.hpp delete mode 100644 interpolator/mls_interpolator/queue_visited.hpp diff --git a/interpolator/MLSInterpolation.hpp b/interpolator/MLSInterpolation.hpp deleted file mode 100644 index cb6a0eb5..00000000 --- a/interpolator/MLSInterpolation.hpp +++ /dev/null @@ -1,187 +0,0 @@ -#ifndef MLS_INTERPOLATION_HPP -#define MLS_INTERPOLATION_HPP - -#include "MLSCoefficients.hpp" -#include "adj_search_dega2.hpp" -#include "adj_search.hpp" -#include "points.hpp" - -using namespace Omega_h; -using namespace pcms; - -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - Write radii2) { - const auto nvertices_source = source_coordinates.size() / dim; - const auto nvertices_target = target_coordinates.size() / dim; - - // Kokkos::RangePolicy range_policy(1, - // nvertices_target); - - Kokkos::View shmem_each_team( - "stores the size required for each team", nvertices_target); - // Write shmem_each_team(nvertices_target, 0, "stores the size required - // for each team member"); - - Kokkos::parallel_for( - "calculate the size required for scratch for each team", nvertices_target, - KOKKOS_LAMBDA(const int i) { - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - int nsupports = end_ptr - start_ptr; - - size_t total_shared_size = 0; - - total_shared_size += ScratchMatView::shmem_size(6, 6) * 4; - total_shared_size += ScratchMatView::shmem_size(6, nsupports) * 2; - total_shared_size += ScratchMatView::shmem_size(nsupports, 6); - total_shared_size += ScratchVecView::shmem_size(6); - total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; - total_shared_size += ScratchMatView::shmem_size(nsupports, 2); - shmem_each_team(i) = total_shared_size; - }); - - size_t shared_size; - Kokkos::parallel_reduce( - "find_max", nvertices_target, - KOKKOS_LAMBDA(const int i, size_t& max_val_temp) { - if (shmem_each_team(i) > max_val_temp) { - max_val_temp = shmem_each_team(i); - } - }, - Kokkos::Max(shared_size)); - printf("shared size = %d \n", shared_size); - - Write approx_target_values(nvertices_target, 0, - "approximated target values"); - - team_policy tp(nvertices_target, Kokkos::AUTO); - - int scratch_size = tp.scratch_size_max(0); - printf("scratch size is %d \n", scratch_size); - - Kokkos::parallel_for( - "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), - KOKKOS_LAMBDA(const member_type& team) { - int i = team.league_rank(); - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - - int nsupports = end_ptr - start_ptr; - - ScratchMatView local_source_points(team.team_scratch(0), nsupports, 2); - int count = -1; - for (int j = start_ptr; j < end_ptr; ++j) { - count++; - auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - } - - ScratchMatView lower(team.team_scratch(0), 6, 6); - - ScratchMatView forward_matrix(team.team_scratch(0), 6, 6); - - ScratchMatView moment_matrix(team.team_scratch(0), 6, 6); - - ScratchMatView inv_mat(team.team_scratch(0), 6, 6); - - ScratchMatView V(team.team_scratch(0), nsupports, 6); - - ScratchMatView Ptphi(team.team_scratch(0), 6, nsupports); - - ScratchMatView resultant_matrix(team.team_scratch(0), 6, nsupports); - - ScratchVecView targetMonomialVec(team.team_scratch(0), 6); - - ScratchVecView SupportValues(team.team_scratch(0), nsupports); - - ScratchVecView result(team.team_scratch(0), nsupports); - - ScratchVecView Phi(team.team_scratch(0), nsupports); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 6), [=](int j) { - for (int k = 0; k < 6; ++k) { - lower(j, k) = 0; - forward_matrix(j, k) = 0; - moment_matrix(j, k) = 0; - inv_mat(j, k) = 0; - } - - targetMonomialVec(j) = 0; - for (int k = 0; k < nsupports; ++k) { - resultant_matrix(j, k) = 0; - - Ptphi(j, k) = 0; - } - }); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { - for (int k = 0; k < 6; ++k) { - V(j, k) = 0; - } - - SupportValues(j) = 0; - result(j) = 0; - Phi(j) = 0; - }); - - Coord target_point; - - target_point.x = target_coordinates[i * dim]; - - target_point.y = target_coordinates[i * dim + 1]; - - BasisPoly(targetMonomialVec, target_point); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { VandermondeMatrix(V, local_source_points, j); }); - - team.team_barrier(); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - PhiVector(Phi, target_point, local_source_points, j, radii2[i]); - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { PTphiMatrix(Ptphi, V, Phi, j); }); - - team.team_barrier(); - - MatMatMul(team, moment_matrix, Ptphi, V); - team.team_barrier(); - - inverse_matrix(team, moment_matrix, lower, forward_matrix, inv_mat); - - team.team_barrier(); - - MatMatMul(team, resultant_matrix, inv_mat, Ptphi); - team.team_barrier(); - - MatVecMul(team, targetMonomialVec, resultant_matrix, result); - team.team_barrier(); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { - SupportValues(i) = - source_values[support.supports_idx[start_ptr + i]]; - }); - - double tgt_value = 0; - dot_product(team, result, SupportValues, tgt_value); - if (team.team_rank() == 0) { - approx_target_values[i] = tgt_value; - } - }); - - return approx_target_values; -} - -#endif diff --git a/interpolator/linear_interpolator/interpolant.hpp b/interpolator/linear_interpolator/interpolant.hpp deleted file mode 100644 index 7aeb02ef..00000000 --- a/interpolator/linear_interpolator/interpolant.hpp +++ /dev/null @@ -1,184 +0,0 @@ - -#ifndef INTERPOLANT_HPP -#define INTERPOLANT_HPP - - -#include -#include -#include "multidimarray.hpp" -#define MAX_DIM 10 - - -KOKKOS_INLINE_FUNCTION -void find_indices(const IntVecView& num_bins, const RealVecView& range, - const RealVecView& point , int* indices){ - int dim = point.extent(0); - // IntVecView indices("parametric coordinate", dim); - for (int i = 0; i < dim; ++i){ - int id = i * 2; - double length = range(id + 1) - range(id); - double dlen = length/num_bins(i); - indices[i] = ((point(i) - range(id)) / dlen); - - } - - -} - - -KOKKOS_INLINE_FUNCTION -void find_limits(const IntVecView& num_bins, const RealVecView& range, const int* indices, double* limits){ - int dim = num_bins.extent(0); - // RealVecView limits("limits", 2*dim); - for (int i = 0; i < dim; ++i){ - int ptr = i * 2; - double dlen = (range(ptr+1) - range(ptr))/num_bins(i); - limits[ptr] = indices[i] * dlen; - limits[ptr+1] = limits[ptr] + dlen; - } -} - - - -KOKKOS_INLINE_FUNCTION -void evaluateParametricCoord(const RealVecView& point, const double* limits , double* parametric_coord){ - int dim = point.extent(0); - // RealVecView parametric_coord("parametric coordinate", dim); - for (int i = 0; i < dim; ++i){ - int index = i * 2; - parametric_coord[i] = (point(i) - limits[index])/(limits[index+1] - limits[index]); - - } -} - - - -KOKKOS_INLINE_FUNCTION -void basis_function(const RealVecView& parametric_coord, double* linear_basis_each_dir){ - int dim = parametric_coord.extent(0); - // RealVecView linear_basis_each_dir("basis function each direction", 2*dim); - for (int i = 0; i < dim; ++i){ - int index = i * 2; - - linear_basis_each_dir[index] = 1 - parametric_coord(i); - - linear_basis_each_dir[index + 1] = parametric_coord(i); - - - } - -} - - -KOKKOS_INLINE_FUNCTION -double linear_interpolant(const IntVecView& dimensions, const double* linear_basis_each_dir, const IntVecView& indices, const RealVecView& values){ - int dim = dimensions.extent(0); - double sum = 0; - int ids[MAX_DIM]; - - int array_size = 1 << dim; - for (int i = 0; i < array_size; ++i){ - double temp = 1.0; - for (int j = 0; j < dim; ++j){ - int index = 2*j; - int id_left = indices(j); - int id_right = id_left + 1; - if (i & (1 << j)){ - temp *= linear_basis_each_dir[index+1]; - ids[j] = id_right; - } else { - temp *= linear_basis_each_dir[index]; - ids[j] = id_left; - } - } - - int idx = calculateIndex(dimensions, ids); - double corner_values = values(idx); - - sum += temp*corner_values; - } - return sum; -} - - - -class RegularGridInterpolator { - private: - const RealMatView parametric_coords; - const RealVecView values; - const IntMatView indices; - const IntVecView dimensions; - - public: - RegularGridInterpolator(const RealMatView& parametric_coords_, - const RealVecView& values_, const IntMatView& indices_, const IntVecView& dimensions_) - : parametric_coords(parametric_coords_), values(values_), indices(indices_), dimensions(dimensions_) {}; - -RealVecView linear_interpolation() { - int dim = dimensions.extent(0); - int N = parametric_coords.extent(0); - RealVecView interpolated_values("approximated values", N); - auto parametric_coords_ = parametric_coords; - auto dimensions_ = dimensions; - auto values_ = values; - auto indices_ = indices; - Kokkos::parallel_for("linear interpolation function",N, KOKKOS_LAMBDA(int j){ - double linear_basis_each_dir[MAX_DIM] = {0.0}; - auto parametric_coord_each_point = Kokkos::subview(parametric_coords_, j, Kokkos::ALL()); - auto index_each_point = Kokkos::subview(indices_, j , Kokkos::ALL()); - basis_function(parametric_coord_each_point, linear_basis_each_dir); - auto approx_value = linear_interpolant(dimensions_, linear_basis_each_dir,index_each_point, values_); - interpolated_values(j) = approx_value; - }); - - return interpolated_values; - } -}; - - -struct Result{ - IntMatView indices_pts; - RealMatView parametric_coords; -}; - -Result parametric_indices(const RealMatView& points, const IntVecView& num_bins, const RealVecView& range){ - - int dim = num_bins.extent(0); - Result result; - int N = points.extent(0); - result.indices_pts = IntMatView("indices",N,dim); - result.parametric_coords = RealMatView("parametric_coordinates",N, dim); - - Kokkos::parallel_for(N, KOKKOS_LAMBDA(int j){ - int indcs[MAX_DIM] = {0}; - double limits[MAX_DIM] = {0.0}; - double parametric_coord_each[MAX_DIM] = {0.0}; - auto point_coord = Kokkos::subview(points, j , Kokkos::ALL()); - find_indices(num_bins, range, point_coord, indcs); - find_limits(num_bins, range, indcs, limits); - evaluateParametricCoord( point_coord, limits, parametric_coord_each); - for (int i = 0; i < dim; ++i){ - result.indices_pts(j,i) = indcs[i]; - result.parametric_coords(j,i) = parametric_coord_each[i]; - } - - }); - - return result; - -} - -KOKKOS_INLINE_FUNCTION -double test_function(double* coord){ - double fun_value = 0; - int dim = 5; - for (int i = 0; i < dim; ++i){ - fun_value += coord[i]; - } - return fun_value; - } - -#endif - - - diff --git a/interpolator/linear_interpolator/multidimarray.hpp b/interpolator/linear_interpolator/multidimarray.hpp deleted file mode 100644 index 04b6fac1..00000000 --- a/interpolator/linear_interpolator/multidimarray.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef MULTIDIMARRAY_HPP -#define MULTIDIMARRAY_HPP - -#include - - -using RealMatView = Kokkos::View; -using IntMatView = Kokkos::View; -using RealVecView = Kokkos::View; -using IntVecView = Kokkos::View; - -using HostIntVecView = Kokkos::View; - -KOKKOS_INLINE_FUNCTION -int calculateIndex(const IntVecView& dimensions, const int* indices) { - - int dim = dimensions.extent(0); - int index = 0; - int multiplier = 1; - for (int i = dim - 1; i >= 0; --i) { - index += indices[i] * multiplier; - multiplier *= dimensions(i); - } - return index; -} - - -int calculateTotalSize(const HostIntVecView& dimensions){ - int dim = dimensions.extent(0); - int size = 1; - for (int i = 0; i < dim; ++i){ - size *= dimensions(i); - } - return size; -} -#endif diff --git a/interpolator/mls_interpolator/CMakeLists.txt b/interpolator/mls_interpolator/CMakeLists.txt deleted file mode 100644 index 69f1d979..00000000 --- a/interpolator/mls_interpolator/CMakeLists.txt +++ /dev/null @@ -1,52 +0,0 @@ -cmake_minimum_required(VERSION 3.20) -project(Interpolator) - -set(VERSION_MAJOR 0) -set(VERSION_MINOR 1) - -set(HEADER_FILES - adj_search_dega2.hpp - MLSInterpolation.hpp - points.hpp - adj_search.hpp - MLSCoefficients.hpp - queue_visited.hpp -) - -install(FILES ${HEADER_FILES} DESTINATION include/interpolator) - -include(CMakePackageConfigHelpers) - -write_basic_package_version_file( - "${CMAKE_CURRENT_BINARY_DIR}/interpolatorConfigVersion.cmake" - VERSION ${VERSION_MAJOR}.${VERSION_MINOR} - COMPATIBILITY SameMajorVersion -) - -install(FILES "${CMAKE_CURRENT_BINARY_DIR}/interpolatorConfigVersion.cmake" - DESTINATION lib/cmake/interpolator) - -add_library(interpolator INTERFACE) -target_include_directories(interpolator INTERFACE - $ - $) - -install(TARGETS interpolator - EXPORT interpolatorTargets - INCLUDES DESTINATION include/interpolator) - -configure_package_config_file( - "${CMAKE_CURRENT_SOURCE_DIR}/cmake/interpolatorConfig.cmake.in" - "${CMAKE_CURRENT_BINARY_DIR}/interpolatorConfig.cmake" - INSTALL_DESTINATION lib/cmake/interpolator -) - -install(FILES - "${CMAKE_CURRENT_BINARY_DIR}/interpolatorConfig.cmake" - DESTINATION lib/cmake/interpolator) - -install(EXPORT interpolatorTargets - FILE interpolatorTargets.cmake - NAMESPACE interpolator:: - DESTINATION lib/cmake/interpolator) - diff --git a/interpolator/mls_interpolator/MLSCoefficients.hpp b/interpolator/mls_interpolator/MLSCoefficients.hpp deleted file mode 100644 index 620c264e..00000000 --- a/interpolator/mls_interpolator/MLSCoefficients.hpp +++ /dev/null @@ -1,212 +0,0 @@ -#ifndef MLS_COEFFICIENTS_HPP -#define MLS_COEFFICIENTS_HPP - -#include - -#include "points.hpp" - -#define PI_M 3.14159265358979323846 - -KOKKOS_INLINE_FUNCTION -double func(Coord& p) { - auto x = (p.x - 0.5) * PI_M * 2; - auto y = (p.y - 0.5) * PI_M * 2; - double Z = sin(x) * sin(y); - return Z; -} - -// polynomial basis vector -KOKKOS_INLINE_FUNCTION -void BasisPoly(ScratchVecView basis_monomial, Coord& p1) { - basis_monomial(0) = 1.0; - basis_monomial(1) = p1.x; - basis_monomial(2) = p1.y; - basis_monomial(3) = p1.x * p1.x; - basis_monomial(4) = p1.x * p1.y; - basis_monomial(5) = p1.y * p1.y; -} - -// radial basis function -KOKKOS_INLINE_FUNCTION -double rbf(double r_sq, double rho_sq) { - double phi; - double r = sqrt(r_sq); - double rho = sqrt(rho_sq); - double ratio = r / rho; - double limit = 1 - ratio; - if (limit < 0) { - phi = 0; - - } else { - phi = 5 * pow(ratio, 5) + 30 * pow(ratio, 4) + 72 * pow(ratio, 3) + - 82 * pow(ratio, 2) + 36 * ratio + 6; - phi = phi * pow(limit, 6); - } - - return phi; -} - -// create vandermondeMatrix -KOKKOS_INLINE_FUNCTION -void VandermondeMatrix(ScratchMatView V, ScratchMatView local_source_points, - int j) { - int N = local_source_points.extent(0); - Coord source_point; - source_point.x = local_source_points(j, 0); - source_point.y = local_source_points(j, 1); - ScratchVecView basis_monomial = Kokkos::subview(V, j, Kokkos::ALL()); - BasisPoly(basis_monomial, source_point); -} - -// moment matrix -KOKKOS_INLINE_FUNCTION -void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, - int j) { - int M = V.extent(0); - int N = V.extent(1); - - ScratchVecView vandermonde_mat_row = Kokkos::subview(V, j, Kokkos::ALL()); - for (int k = 0; k < N; k++) { - pt_phi(k, j) = vandermonde_mat_row(k) * Phi(j); - } -} - -// radial basis function vector -KOKKOS_INLINE_FUNCTION -void PhiVector(ScratchVecView Phi, Coord target_point, - ScratchMatView local_source_points, int j, - double cuttoff_dis_sq) { - int N = local_source_points.extent(0); - double dx = target_point.x - local_source_points(j, 0); - double dy = target_point.y - local_source_points(j, 1); - double ds_sq = dx * dx + dy * dy; - Phi(j) = rbf(ds_sq, cuttoff_dis_sq); -} - -// matrix matrix multiplication -KOKKOS_INLINE_FUNCTION -void MatMatMul(member_type team, ScratchMatView moment_matrix, - ScratchMatView pt_phi, ScratchMatView vandermonde) { - int M = pt_phi.extent(0); - int N = vandermonde.extent(1); - int K = pt_phi.extent(1); - - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { - double sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, K), - [=](const int k, double& lsum) { - lsum += pt_phi(i, k) * vandermonde(k, j); - }, - sum); - moment_matrix(i, j) = sum; - }); - }); -} - -// Matrix vector multiplication -KOKKOS_INLINE_FUNCTION -void MatVecMul(member_type team, ScratchVecView vector, ScratchMatView matrix, - ScratchVecView result) { - int M = matrix.extent(0); - int N = matrix.extent(1); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { - double sum = 0; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, M), - [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, - sum); - result(i) = sum; - }); - // team.team_barrier(); -} - -// dot product -KOKKOS_INLINE_FUNCTION -void dot_product(member_type team, ScratchVecView result_sub, - ScratchVecView exactSupportValues_sub, double& target_value) { - int N = result_sub.extent(0); - for (int j = 0; j < N; ++j) { - target_value += result_sub(j) * exactSupportValues_sub[j]; - } -} - -// moment matrix -KOKKOS_INLINE_FUNCTION -void PtphiPMatrix(ScratchMatView moment_matrix, member_type team, - ScratchMatView pt_phi, ScratchMatView vandermonde) { - int M = pt_phi.extent(0); - int N = vandermonde.extent(1); - int K = pt_phi.extent(1); - - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { - double sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, K), - [=](const int k, double& lsum) { - lsum += pt_phi(i, k) * vandermonde(k, j); - }, - sum); - moment_matrix(i, j) = sum; - }); - }); -} - -// inverse matrix -KOKKOS_INLINE_FUNCTION -void inverse_matrix(member_type team, ScratchMatView matrix, - ScratchMatView lower, ScratchMatView forward_matrix, - ScratchMatView solution) { - int N = matrix.extent(0); - - for (int j = 0; j < N; ++j) { - Kokkos::single(Kokkos::PerTeam(team), [=]() { - double sum = 0; - for (int k = 0; k < j; ++k) { - sum += lower(j, k) * lower(j, k); - } - lower(j, j) = sqrt(matrix(j, j) - sum); - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, j + 1, N), [=](int i) { - double inner_sum = 0; - for (int k = 0; k < j; ++k) { - inner_sum += lower(i, k) * lower(j, k); - } - lower(i, j) = (matrix(i, j) - inner_sum) / lower(j, j); - lower(j, i) = lower(i, j); - }); - - team.team_barrier(); - } - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { - forward_matrix(i, i) = 1.0 / lower(i, i); - for (int j = i + 1; j < N; ++j) { - forward_matrix(j, i) = 0.0; // Initialize to zero - for (int k = 0; k < j; ++k) { - forward_matrix(j, i) -= lower(j, k) * forward_matrix(k, i); - } - forward_matrix(j, i) /= lower(j, j); - } - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { - solution(N - 1, i) = forward_matrix(N - 1, i) / lower(N - 1, N - 1); - for (int j = N - 2; j >= 0; --j) { - solution(j, i) = forward_matrix(j, i); - for (int k = j + 1; k < N; ++k) { - solution(j, i) -= lower(j, k) * solution(k, i); - } - solution(j, i) /= lower(j, j); - } - }); -} - -#endif diff --git a/interpolator/mls_interpolator/MLSInterpolation.hpp b/interpolator/mls_interpolator/MLSInterpolation.hpp deleted file mode 100644 index cb6a0eb5..00000000 --- a/interpolator/mls_interpolator/MLSInterpolation.hpp +++ /dev/null @@ -1,187 +0,0 @@ -#ifndef MLS_INTERPOLATION_HPP -#define MLS_INTERPOLATION_HPP - -#include "MLSCoefficients.hpp" -#include "adj_search_dega2.hpp" -#include "adj_search.hpp" -#include "points.hpp" - -using namespace Omega_h; -using namespace pcms; - -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - Write radii2) { - const auto nvertices_source = source_coordinates.size() / dim; - const auto nvertices_target = target_coordinates.size() / dim; - - // Kokkos::RangePolicy range_policy(1, - // nvertices_target); - - Kokkos::View shmem_each_team( - "stores the size required for each team", nvertices_target); - // Write shmem_each_team(nvertices_target, 0, "stores the size required - // for each team member"); - - Kokkos::parallel_for( - "calculate the size required for scratch for each team", nvertices_target, - KOKKOS_LAMBDA(const int i) { - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - int nsupports = end_ptr - start_ptr; - - size_t total_shared_size = 0; - - total_shared_size += ScratchMatView::shmem_size(6, 6) * 4; - total_shared_size += ScratchMatView::shmem_size(6, nsupports) * 2; - total_shared_size += ScratchMatView::shmem_size(nsupports, 6); - total_shared_size += ScratchVecView::shmem_size(6); - total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; - total_shared_size += ScratchMatView::shmem_size(nsupports, 2); - shmem_each_team(i) = total_shared_size; - }); - - size_t shared_size; - Kokkos::parallel_reduce( - "find_max", nvertices_target, - KOKKOS_LAMBDA(const int i, size_t& max_val_temp) { - if (shmem_each_team(i) > max_val_temp) { - max_val_temp = shmem_each_team(i); - } - }, - Kokkos::Max(shared_size)); - printf("shared size = %d \n", shared_size); - - Write approx_target_values(nvertices_target, 0, - "approximated target values"); - - team_policy tp(nvertices_target, Kokkos::AUTO); - - int scratch_size = tp.scratch_size_max(0); - printf("scratch size is %d \n", scratch_size); - - Kokkos::parallel_for( - "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), - KOKKOS_LAMBDA(const member_type& team) { - int i = team.league_rank(); - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - - int nsupports = end_ptr - start_ptr; - - ScratchMatView local_source_points(team.team_scratch(0), nsupports, 2); - int count = -1; - for (int j = start_ptr; j < end_ptr; ++j) { - count++; - auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - } - - ScratchMatView lower(team.team_scratch(0), 6, 6); - - ScratchMatView forward_matrix(team.team_scratch(0), 6, 6); - - ScratchMatView moment_matrix(team.team_scratch(0), 6, 6); - - ScratchMatView inv_mat(team.team_scratch(0), 6, 6); - - ScratchMatView V(team.team_scratch(0), nsupports, 6); - - ScratchMatView Ptphi(team.team_scratch(0), 6, nsupports); - - ScratchMatView resultant_matrix(team.team_scratch(0), 6, nsupports); - - ScratchVecView targetMonomialVec(team.team_scratch(0), 6); - - ScratchVecView SupportValues(team.team_scratch(0), nsupports); - - ScratchVecView result(team.team_scratch(0), nsupports); - - ScratchVecView Phi(team.team_scratch(0), nsupports); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 6), [=](int j) { - for (int k = 0; k < 6; ++k) { - lower(j, k) = 0; - forward_matrix(j, k) = 0; - moment_matrix(j, k) = 0; - inv_mat(j, k) = 0; - } - - targetMonomialVec(j) = 0; - for (int k = 0; k < nsupports; ++k) { - resultant_matrix(j, k) = 0; - - Ptphi(j, k) = 0; - } - }); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { - for (int k = 0; k < 6; ++k) { - V(j, k) = 0; - } - - SupportValues(j) = 0; - result(j) = 0; - Phi(j) = 0; - }); - - Coord target_point; - - target_point.x = target_coordinates[i * dim]; - - target_point.y = target_coordinates[i * dim + 1]; - - BasisPoly(targetMonomialVec, target_point); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { VandermondeMatrix(V, local_source_points, j); }); - - team.team_barrier(); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - PhiVector(Phi, target_point, local_source_points, j, radii2[i]); - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { PTphiMatrix(Ptphi, V, Phi, j); }); - - team.team_barrier(); - - MatMatMul(team, moment_matrix, Ptphi, V); - team.team_barrier(); - - inverse_matrix(team, moment_matrix, lower, forward_matrix, inv_mat); - - team.team_barrier(); - - MatMatMul(team, resultant_matrix, inv_mat, Ptphi); - team.team_barrier(); - - MatVecMul(team, targetMonomialVec, resultant_matrix, result); - team.team_barrier(); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { - SupportValues(i) = - source_values[support.supports_idx[start_ptr + i]]; - }); - - double tgt_value = 0; - dot_product(team, result, SupportValues, tgt_value); - if (team.team_rank() == 0) { - approx_target_values[i] = tgt_value; - } - }); - - return approx_target_values; -} - -#endif diff --git a/interpolator/mls_interpolator/adj_search.hpp b/interpolator/mls_interpolator/adj_search.hpp deleted file mode 100644 index fd5fe60c..00000000 --- a/interpolator/mls_interpolator/adj_search.hpp +++ /dev/null @@ -1,218 +0,0 @@ -#ifndef ADJ_SEARCH_HPP -#define ADJ_SEARCH_HPP - -#include - -#include "queue_visited.hpp" - -using namespace Omega_h; - -static constexpr int max_dim = 3; - -// TODO change this into span/mdspan -OMEGA_H_INLINE -Real calculateDistance(const Real* p1, const Real* p2, const int dim) { - Real dx, dy, dz; - dx = p1[0] - p2[0]; - dy = p1[1] - p2[1]; - if (dim != 3) { - dz = 0.0; - } else { - dz = p1[2] - p2[2]; - } - - return dx * dx + dy * dy + dz * dz; -} - -class FindSupports { - private: - Mesh& source_mesh; - Mesh& target_mesh; - - public: - FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) - : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; - - void adjBasedSearch(const Real& cutoffDistance, const Write& supports_ptr, - Write& nSupports, Write& support_idx); -}; - -void FindSupports::adjBasedSearch(const Real& cutoffDistance, - const Write& supports_ptr, - Write& nSupports, - Write& support_idx) { - // Source Mesh Info - - const auto& sourcePoints_coords = source_mesh.coords(); - const auto nvertices_source = source_mesh.nverts(); - const auto dim = source_mesh.dim(); - - // Target Mesh Info - const auto& targetPoints_coords = target_mesh.coords(); - const auto nvertices_target = target_mesh.nverts(); - - // CSR data structure of adjacent vertex information of each source vertex - const auto& vert2vert = source_mesh.ask_star(VERT); - const auto& v2v_ptr = vert2vert.a2ab; - const auto& v2v_data = vert2vert.ab2b; - - // CSR data structure of vertex information of each cell in source mesh - - // CSR data structure of adjacent vertex information of each source vertex - // dim == 2; ask vertices of tri (3 vertices for each tri) & if dim ==3; ask - // vertices of tetrahedron (4 vertices for each tet) - const auto& cells2verts = source_mesh.ask_verts_of(dim); - - Kokkos::View target_points("test_points", nvertices_target); - - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { - target_points(i, 0) = targetPoints_coords[i * dim]; - target_points(i, 1) = targetPoints_coords[i * dim + 1]; - }); - Kokkos::fence(); - pcms::GridPointSearch search_cell(source_mesh, 10, 10); - - // get the cell id for each target point - auto results = search_cell(target_points); - - parallel_for( - nvertices_target, - OMEGA_H_LAMBDA(const LO id) { - queue queue; - track visited; - - LO source_cell_id = results(id).tri_id; - - const LO num_verts_in_dim = dim + 1; - - LO start_ptr = source_cell_id * num_verts_in_dim; - - LO end_ptr = start_ptr + num_verts_in_dim; - - Real target_coords[max_dim]; - - Real support_coords[max_dim]; - - for (LO k = 0; k < dim; ++k) { - target_coords[k] = target_points(id, k); - } - - LO start_counter; - - if (support_idx.size() > 0) { - start_counter = supports_ptr[id]; - } - - int count = 0; - // Initialize queue by pushing the vertices in the neighborhood of the - // given target point - - for (LO i = start_ptr; i < end_ptr; ++i) { - LO vert_id = cells2verts[i]; - visited.push_back(vert_id); - - for (LO k = 0; k < dim; ++k) { - support_coords[k] = sourcePoints_coords[vert_id * dim + k]; - } - - Real dist = calculateDistance(target_coords, support_coords, dim); - if (dist <= cutoffDistance) { - count++; - queue.push_back(vert_id); - if (support_idx.size() > 0) { - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = vert_id; - } - } - } - - while (!queue.isEmpty()) { - LO currentVertex = queue.front(); - queue.pop_front(); - LO start = v2v_ptr[currentVertex]; - LO end = v2v_ptr[currentVertex + 1]; - - for (LO i = start; i < end; ++i) { - auto neighborIndex = v2v_data[i]; - - // check if neighbor index is already in the queue to be checked - // TODO refactor this into a function - - if (visited.notVisited(neighborIndex)) { - visited.push_back(neighborIndex); - for (int k = 0; k < dim; ++k) { - support_coords[k] = - sourcePoints_coords[neighborIndex * dim + k]; - } - - Real dist = calculateDistance(target_coords, support_coords, dim); - - if (dist <= cutoffDistance) { - count++; - queue.push_back(neighborIndex); - if (support_idx.size() > 0) { - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = neighborIndex; - } - } - } - } - } // end of while loop - - nSupports[id] = count; - }, - "count the number of supports in each target point"); -} - -struct SupportResults { - Write supports_ptr; - Write supports_idx; -}; - -SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, - Real& cutoffDistance) { - SupportResults support; - - FindSupports search(source_mesh, target_mesh); - - LO nvertices_source = source_mesh.nverts(); - - LO nvertices_target = target_mesh.nverts(); - - Write nSupports(nvertices_target, 0, - "number of supports in each target vertex"); - - search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, - support.supports_idx); - - Kokkos::fence(); - - support.supports_ptr = - Write(nvertices_target + 1, 0, - "number of support source vertices in CSR format"); - - LO total_supports = 0; - - Kokkos::parallel_scan( - nvertices_target, - OMEGA_H_LAMBDA(int j, int& update, bool final) { - update += nSupports[j]; - if (final) { - support.supports_ptr[j + 1] = update; - } - }, - total_supports); - - Kokkos::fence(); - - support.supports_idx = Write( - total_supports, 0, "index of source supports of each target node"); - - search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, - support.supports_idx); - - return support; -} - -#endif diff --git a/interpolator/mls_interpolator/adj_search_dega2.hpp b/interpolator/mls_interpolator/adj_search_dega2.hpp deleted file mode 100644 index 3a9c4075..00000000 --- a/interpolator/mls_interpolator/adj_search_dega2.hpp +++ /dev/null @@ -1,257 +0,0 @@ -#ifndef ADJ_SEARCH_HPP -#define ADJ_SEARCH_HPP - -#include -#include - -#include "queue_visited.hpp" - -using namespace Omega_h; - -static constexpr int max_dim = 3; - -// TODO change this into span/mdspan -OMEGA_H_INLINE -Real calculateDistance(const Real* p1, const Real* p2, const int dim) { - Real dx, dy, dz; - dx = p1[0] - p2[0]; - dy = p1[1] - p2[1]; - if (dim != 3) { - dz = 0.0; - } else { - dz = p1[2] - p2[2]; - } - - return dx * dx + dy * dy + dz * dz; -} - -class FindSupports { - private: - Mesh& mesh; - - public: - FindSupports(Mesh& mesh_) : mesh(mesh_) {}; - - void adjBasedSearch(Write& supports_ptr, Write& nSupports, - Write& support_idx, Write& radii2, - bool is_build_csr_call); -}; - -void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, - Write& support_idx, Write& radii2, - bool is_build_csr_call) { - // Mesh Info - LO min_num_supports = 20; // TODO: make this an input parameter - const auto& mesh_coords = mesh.coords(); - const auto& nvertices = mesh.nverts(); - const auto& dim = mesh.dim(); - const auto& nfaces = mesh.nfaces(); - - // CSR data structure of adjacent cell information of each vertex in a mesh - const auto& nodes2faces = mesh.ask_up(VERT, FACE); - const auto& n2f_ptr = nodes2faces.a2ab; - const auto& n2f_data = nodes2faces.ab2b; - const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; - - Write cell_centroids( - dim * nfaces, 0, - "stores coordinates of cell centroid of each tri element"); - - parallel_for( - "calculate the centroid in each tri element", nfaces, - OMEGA_H_LAMBDA(const LO id) { - const auto current_el_verts = gather_verts<3>(faces2nodes, id); - const Omega_h::Few, 3> current_el_vert_coords = - gather_vectors<3, 2>(mesh_coords, current_el_verts); - auto centroid = average(current_el_vert_coords); - int index = dim * id; - cell_centroids[index] = centroid[0]; - cell_centroids[index + 1] = centroid[1]; - }); - // * Got the adj data and cell centroids - - parallel_for( - nvertices, // for each target vertex which is a node for this case - OMEGA_H_LAMBDA(const LO id) { - queue queue; - track visited; - const LO num_verts_in_dim = dim + 1; - Real target_coords[max_dim]; - Real support_coords[max_dim]; - Real cutoffDistance = radii2[id]; // squared radii of the supports - - //? copying the target vertex coordinates - for (LO k = 0; k < dim; ++k) { - target_coords[k] = mesh_coords[id * dim + k]; - } - - LO start_counter; // start of support idx for the current target vertex - - // not being done in the first call - // where the support_idx start for the current target vertex - if (!is_build_csr_call) { - start_counter = supports_ptr[id]; // start support location for the - // current target vertex - } - LO start_ptr = - n2f_ptr[id]; // start loc of the adj cells of the target node - LO end_ptr = - n2f_ptr[id + 1]; // end loc of the adj cells of the target node - - int count = 0; // number of supports for the current target vertex - // Initialize queue by pushing the cells in the neighborhood of the - // given target point - - for (LO i = start_ptr; i < end_ptr; - ++i) { // loop over adj cells to the target vertex - LO cell_id = n2f_data[i]; - visited.push_back(cell_id); // cell added to the visited list - - for (LO k = 0; k < dim; ++k) { // support vertex coordinates are the - // centroid of the cell - support_coords[k] = cell_centroids[cell_id * dim + k]; - } - - Real dist = calculateDistance(target_coords, support_coords, dim); - if (dist <= cutoffDistance) { - count++; - queue.push_back(cell_id); - if (!is_build_csr_call) { // not being done in the first call - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = - cell_id; // add the support cell to the support_idx - } // end of support_idx check - } // end of distance check - } // end of loop over adj cells to the target vertex - - // loops over the queued cells from the neighborhood of the target - // vertex - while (!queue.isEmpty()) { // ? can queue be empty? - LO currentCell = queue.front(); - queue.pop_front(); - LO start = currentCell * - num_verts_in_dim; // start vert id of the current cell - LO end = start + num_verts_in_dim; // end vert id of the current cell - - for (LO i = start; i < end; - ++i) { // loop over the vertices of the current cell - LO current_vert_id = faces2nodes[i]; - LO start_ptr_current_vert = - n2f_ptr[current_vert_id]; // start loc of adj cells to current - // vertex - LO end_ptr_vert_current_vert = - n2f_ptr[current_vert_id + - 1]; // end loc of adj cells to current vertex - for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; - ++j) { // loop over adj cells to the current vertex - auto neighbor_cell_index = n2f_data[j]; // current cell - - // check if neighbor index is already in the queue to be checked - // TODO refactor this into a function - - if (visited.notVisited(neighbor_cell_index)) { - visited.push_back(neighbor_cell_index); - for (int k = 0; k < dim; ++k) { - support_coords[k] = - cell_centroids[neighbor_cell_index * dim + k]; - } - - Real dist = - calculateDistance(target_coords, support_coords, dim); - - if (dist <= cutoffDistance) { - count++; - queue.push_back(neighbor_cell_index); - if (!is_build_csr_call) { - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = - neighbor_cell_index; - } // end of support_idx check - } // end of distance check - } // end of not visited check - } // end of loop over adj cells to the current vertex - } // end of loop over nodes - - } // end of while loop - - nSupports[id] = count; - }, // end of lambda - "count the number of supports in each target point"); -} - -struct SupportResults { - Write supports_ptr; - Write supports_idx; - Write radii2; // squared radii of the supports -}; - -SupportResults searchNeighbors(Mesh& mesh, Real& cutoffDistance) { - LO min_support = 20; - SupportResults support; - - FindSupports search(mesh); - - LO nvertices_target = mesh.nverts(); - - Write nSupports(nvertices_target, 0, - "number of supports in each target vertex"); - - printf("Inside searchNeighbors 1\n"); - support.radii2 = Write(nvertices_target, cutoffDistance, - "squared radii of the supports"); - // this call gets the number of supports for each target vertex: nSupports and - // the squared radii of the supports (with might be increased in the search to - // have enough supports) - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - support.radii2, true); - - printf("Inside searchNeighbors 2\n"); - Kokkos::fence(); - - // * update radius if nSupport is less that min_support - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { - if (nSupports[i] < min_support) { - support.radii2[i] *= float(min_support) / nSupports[i]; - nSupports[i] = 0; // ? might not be needed - } - }); - - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - support.radii2, true); - - // offset array for the supports of each target vertex - support.supports_ptr = - Write(nvertices_target + 1, 0, - "number of support source vertices in CSR format"); - - LO total_supports = 0; - - // get the total number of supports and fill the offset array - Kokkos::parallel_scan( - nvertices_target, - OMEGA_H_LAMBDA(int j, int& update, bool final) { // what final does? - // OMEGA_H_CHECK(nSupports[j] >= 15); - update += nSupports[j]; - if (final) { - support.supports_ptr[j + 1] = update; - } - }, - total_supports); - - printf("Inside searchNeighbors 3\n"); - Kokkos::fence(); - - support.supports_idx = Write( - total_supports, 0, "index of source supports of each target node"); - - // second call to get the actual support indices - // now sizes of support.supports_ptr and support.supports_idx are known and > - // 0 - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - support.radii2, false); - - return support; -} - -#endif diff --git a/interpolator/mls_interpolator/cmake/interpolatorConfig.cmake.in b/interpolator/mls_interpolator/cmake/interpolatorConfig.cmake.in deleted file mode 100644 index c74108bf..00000000 --- a/interpolator/mls_interpolator/cmake/interpolatorConfig.cmake.in +++ /dev/null @@ -1,6 +0,0 @@ -@PACKAGE_INIT@ - -include("${CMAKE_CURRENT_LIST_DIR}/interpolatorTargets.cmake") - -# Optional: Make the package more discoverable by exposing the include directories -set(INTERPOLATOR_INCLUDE_DIRS "@CMAKE_INSTALL_PREFIX@/include/interpolator") diff --git a/interpolator/mls_interpolator/points.hpp b/interpolator/mls_interpolator/points.hpp deleted file mode 100644 index b54e27f0..00000000 --- a/interpolator/mls_interpolator/points.hpp +++ /dev/null @@ -1,32 +0,0 @@ -#ifndef POINTS_HPP -#define POINTS_HPP - -#include -#include - -struct Coord { - double x, y; -}; - -using range_policy = typename Kokkos::RangePolicy<>; -using team_policy = typename Kokkos::TeamPolicy<>; -using member_type = typename Kokkos::TeamPolicy<>::member_type; - -// alias for scratch view -using ScratchSpace = - typename Kokkos::DefaultExecutionSpace::scratch_memory_space; - -using ScratchMatView = - typename Kokkos::View>; -using ScratchVecView = - typename Kokkos::View>; - -using PointsViewType = Kokkos::View; - -struct Points { - PointsViewType coordinates; -}; - -#endif diff --git a/interpolator/mls_interpolator/queue_visited.hpp b/interpolator/mls_interpolator/queue_visited.hpp deleted file mode 100644 index 82989def..00000000 --- a/interpolator/mls_interpolator/queue_visited.hpp +++ /dev/null @@ -1,106 +0,0 @@ -#ifndef QUEUE_VISITED_HPP -#define QUEUE_VISITED_HPP - -#include -#include -#include -#include -#include -#include -#define MAX_SIZE 500 -using namespace std; -using namespace Omega_h; - -class queue { - private: - Real queue_array[MAX_SIZE]; - int first = 0, last = -1, count = 0; - - public: - OMEGA_H_INLINE - queue() {} - - OMEGA_H_INLINE - ~queue() {} - - OMEGA_H_INLINE - void push_back(const int& item); - - OMEGA_H_INLINE - void pop_front(); - - OMEGA_H_INLINE - int front(); - - OMEGA_H_INLINE - bool isEmpty() const; - - OMEGA_H_INLINE - bool isFull() const; -}; - -class track { - private: - Real tracking_array[MAX_SIZE]; - int first = 0, last = -1, count = 0; - - public: - OMEGA_H_INLINE - track() {} - - OMEGA_H_INLINE - ~track() {} - - OMEGA_H_INLINE - void push_back(const int& item); - - OMEGA_H_INLINE - int size(); - - OMEGA_H_INLINE - bool notVisited(const int& item); -}; - -OMEGA_H_INLINE -void queue::push_back(const int& item) { - last = (last + 1) % MAX_SIZE; - queue_array[last] = item; - count++; -} - -OMEGA_H_INLINE -void queue::pop_front() { - first = (first + 1) % MAX_SIZE; - count--; -} - -OMEGA_H_INLINE -int queue::front() { return queue_array[first]; } - -OMEGA_H_INLINE -bool queue::isEmpty() const { return count == 0; } - -OMEGA_H_INLINE -bool queue::isFull() const { return count == MAX_SIZE; } - -OMEGA_H_INLINE -void track::push_back(const int& item) { - last = (last + 1) % MAX_SIZE; - tracking_array[last] = item; - count++; -} - -OMEGA_H_INLINE -bool track::notVisited(const int& item) { - for (int i = 0; i < count; ++i) { - if (tracking_array[i] == item) { - return false; - } - } - return true; -} - -OMEGA_H_INLINE -int track::size() { return count; } - -#endif From 749be28ef615e6d454b92d573ccd01dffeaa7a15 Mon Sep 17 00:00:00 2001 From: Fuad Hasibul Hasan Date: Tue, 24 Sep 2024 12:35:36 -0400 Subject: [PATCH 07/66] enforce number of support searches and adapts radii until all targets get the minimum number of supports and debug queue (counter overflow) --- src/interpolator/adj_search_dega2.hpp | 47 ++++++++++++++++++++++++--- src/interpolator/queue_visited.hpp | 31 +++++++++++++----- 2 files changed, 65 insertions(+), 13 deletions(-) diff --git a/src/interpolator/adj_search_dega2.hpp b/src/interpolator/adj_search_dega2.hpp index 3a9c4075..26f2edda 100644 --- a/src/interpolator/adj_search_dega2.hpp +++ b/src/interpolator/adj_search_dega2.hpp @@ -185,8 +185,8 @@ struct SupportResults { Write radii2; // squared radii of the supports }; -SupportResults searchNeighbors(Mesh& mesh, Real& cutoffDistance) { - LO min_support = 20; +SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance) { + LO min_support = 12; SupportResults support; FindSupports search(mesh); @@ -199,24 +199,60 @@ SupportResults searchNeighbors(Mesh& mesh, Real& cutoffDistance) { printf("Inside searchNeighbors 1\n"); support.radii2 = Write(nvertices_target, cutoffDistance, "squared radii of the supports"); + printf("cut off distance: %f\n", cutoffDistance); // this call gets the number of supports for each target vertex: nSupports and // the squared radii of the supports (with might be increased in the search to // have enough supports) + int r_adjust_loop = 0; + while (true) { // until the number of minimum support is met + // find maximum radius + Real max_radius = 0.0; + Kokkos::parallel_reduce("find max radius", + nvertices_target, + OMEGA_H_LAMBDA(const LO i, Real& local_max) { + local_max = (support.radii2[i] > local_max) ? support.radii2[i] : local_max; + }, + Kokkos::Max(max_radius)); + printf("Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, support.radii2, true); - printf("Inside searchNeighbors 2\n"); + //printf("Inside searchNeighbors 2\n"); + Kokkos::fence(); + // * find the minimum number of supports + LO min_nSupports = 0; + Kokkos::parallel_reduce( "find min number of supports", + nvertices_target, + OMEGA_H_LAMBDA(const LO i, LO& local_min) { + local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; + }, + Kokkos::Min(min_nSupports)); + + printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, r_adjust_loop, max_radius); + r_adjust_loop++; + + if (min_nSupports >= min_support) { + break; + } + Kokkos::fence(); // * update radius if nSupport is less that min_support parallel_for( nvertices_target, OMEGA_H_LAMBDA(const LO i) { if (nSupports[i] < min_support) { - support.radii2[i] *= float(min_support) / nSupports[i]; - nSupports[i] = 0; // ? might not be needed + Real factor = Real(min_support) / nSupports[i]; + factor = (nSupports[i] == 0 || factor > 3.0) ? 3.0 : factor; + support.radii2[i] *= factor; + //nSupports[i] = 0; // ? might not be needed } }); + } + + printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, support.radii2, true); @@ -245,6 +281,7 @@ SupportResults searchNeighbors(Mesh& mesh, Real& cutoffDistance) { support.supports_idx = Write( total_supports, 0, "index of source supports of each target node"); + printf("total_supports: %d\n", total_supports); // second call to get the actual support indices // now sizes of support.supports_ptr and support.supports_idx are known and > // 0 diff --git a/src/interpolator/queue_visited.hpp b/src/interpolator/queue_visited.hpp index 82989def..e236f6d6 100644 --- a/src/interpolator/queue_visited.hpp +++ b/src/interpolator/queue_visited.hpp @@ -7,13 +7,14 @@ #include #include #include -#define MAX_SIZE 500 +#define MAX_SIZE_QUEUE 500 +#define MAX_SIZE_TRACK 800 using namespace std; using namespace Omega_h; class queue { private: - Real queue_array[MAX_SIZE]; + LO queue_array[MAX_SIZE_QUEUE]; int first = 0, last = -1, count = 0; public: @@ -41,7 +42,7 @@ class queue { class track { private: - Real tracking_array[MAX_SIZE]; + LO tracking_array[MAX_SIZE_TRACK]; int first = 0, last = -1, count = 0; public: @@ -63,14 +64,22 @@ class track { OMEGA_H_INLINE void queue::push_back(const int& item) { - last = (last + 1) % MAX_SIZE; + if (count == MAX_SIZE_QUEUE) { + printf("queue is full %d\n", count); + return; + } + last = (last + 1) % MAX_SIZE_QUEUE; queue_array[last] = item; count++; } OMEGA_H_INLINE void queue::pop_front() { - first = (first + 1) % MAX_SIZE; + if (count == 0) { + printf("queue is empty\n"); + return; + } + first = (first + 1) % MAX_SIZE_QUEUE; count--; } @@ -81,19 +90,25 @@ OMEGA_H_INLINE bool queue::isEmpty() const { return count == 0; } OMEGA_H_INLINE -bool queue::isFull() const { return count == MAX_SIZE; } +bool queue::isFull() const { return count == MAX_SIZE_QUEUE; } OMEGA_H_INLINE void track::push_back(const int& item) { - last = (last + 1) % MAX_SIZE; + if (count == MAX_SIZE_TRACK) { + printf("track is full %d\n", count); + return; + } + last = (last + 1) % MAX_SIZE_TRACK; tracking_array[last] = item; count++; } OMEGA_H_INLINE bool track::notVisited(const int& item) { + int id; for (int i = 0; i < count; ++i) { - if (tracking_array[i] == item) { + id = (first + i) % MAX_SIZE_TRACK; + if (tracking_array[id] == item) { return false; } } From cca0eca89946b18535a878cf11fa240f6484ac3d Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 24 Sep 2024 13:32:26 -0400 Subject: [PATCH 08/66] can use any dimension and degree of polynomial --- src/interpolator/MLSCoefficients.hpp | 91 ++++++++++++++++++-- src/interpolator/MLSInterpolation.hpp | 116 ++++++++++++++++---------- src/interpolator/points.hpp | 3 +- 3 files changed, 156 insertions(+), 54 deletions(-) diff --git a/src/interpolator/MLSCoefficients.hpp b/src/interpolator/MLSCoefficients.hpp index 620c264e..5879846b 100644 --- a/src/interpolator/MLSCoefficients.hpp +++ b/src/interpolator/MLSCoefficients.hpp @@ -7,6 +7,9 @@ #define PI_M 3.14159265358979323846 +static constexpr MAX_DIM = 3; + + KOKKOS_INLINE_FUNCTION double func(Coord& p) { auto x = (p.x - 0.5) * PI_M * 2; @@ -15,17 +18,84 @@ double func(Coord& p) { return Z; } -// polynomial basis vector +// finds the slice lengths of the of the polynomial basis + +KOKKOS_INLINE_FUNCTION +void basisSliceLengths(MatViewType array){ + dim = array.extent(0); + degree = array.extent(1); + for (int i = 0; i < dim; ++i){ + array(0,i) = 1; + } + + for (int j = 0; j < degree; ++j){ + array(j, 0) = 1; + } + + for (int i = 1; i < degree; ++i){ + for (int j = 1; j < dim; ++j){ + array(i, j) = array(i-1,j) + array(i,j-1); + } + } + +} + +// finds the size of the polynomial basis vector KOKKOS_INLINE_FUNCTION -void BasisPoly(ScratchVecView basis_monomial, Coord& p1) { - basis_monomial(0) = 1.0; - basis_monomial(1) = p1.x; - basis_monomial(2) = p1.y; - basis_monomial(3) = p1.x * p1.x; - basis_monomial(4) = p1.x * p1.y; - basis_monomial(5) = p1.y * p1.y; +int basisSize(const MatViewType& array){ + int sum = 1; + for (int i = 1; i < degree; ++i){ + for (int j = 1; j < dim; ++j){ + sum += array(i, j); + } + } } +// evaluates the polynomial basis + +KOKKOS_INLINE_FUNCTION +void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, Coord &p){ + + basis_monomial(0) = 1; + dim = slice_length.extent(0); + degree = slice_length.extent(1); + + int prev_col = 0; + int curr_col = 1; + + double point[MAX_DIM]; + point[0] = p.x; + point[1] = p.y; + + if (dim == 3){ + point[2] = p.z; + } + + for (int i = 0; i < degree; ++i){ + int offset = curr_col; + for (int j = 0; j < dim; ++j){ + for (k = 0; k < slice_length(i,j); ++k){ + basis_monomial(offset + i) = basis_polynomial(prev_col +k) * point[j]; + loc_offset += slice_length(i,j); + + } + + prev_col = curr_col; + curr_col = offset; + } + } +} +// polynomial basis vector +//KOKKOS_INLINE_FUNCTION +//void BasisPoly(ScratchVecView basis_monomial, Coord& p1) { +// basis_monomial(0) = 1.0; +// basis_monomial(1) = p1.x; +// basis_monomial(2) = p1.y; +// basis_monomial(3) = p1.x * p1.x; +// basis_monomial(4) = p1.x * p1.y; +// basis_monomial(5) = p1.y * p1.y; +//} +// // radial basis function KOKKOS_INLINE_FUNCTION double rbf(double r_sq, double rho_sq) { @@ -51,9 +121,14 @@ KOKKOS_INLINE_FUNCTION void VandermondeMatrix(ScratchMatView V, ScratchMatView local_source_points, int j) { int N = local_source_points.extent(0); + int dim = local_source_points.extent(1); + Coord source_point; source_point.x = local_source_points(j, 0); source_point.y = local_source_points(j, 1); + if (dim == 3){ + source_point.z = local_source_points(j, 2); + } ScratchVecView basis_monomial = Kokkos::subview(V, j, Kokkos::ALL()); BasisPoly(basis_monomial, source_point); } diff --git a/src/interpolator/MLSInterpolation.hpp b/src/interpolator/MLSInterpolation.hpp index cb6a0eb5..bf096022 100644 --- a/src/interpolator/MLSInterpolation.hpp +++ b/src/interpolator/MLSInterpolation.hpp @@ -4,7 +4,6 @@ #include "MLSCoefficients.hpp" #include "adj_search_dega2.hpp" #include "adj_search.hpp" -#include "points.hpp" using namespace Omega_h; using namespace pcms; @@ -12,18 +11,26 @@ using namespace pcms; Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, - const SupportResults& support, const LO& dim, - Write radii2) { + const SupportResults& support, const LO& dim, + const LO& degree, Write radii2) { const auto nvertices_source = source_coordinates.size() / dim; const auto nvertices_target = target_coordinates.size() / dim; // Kokkos::RangePolicy range_policy(1, // nvertices_target); + static_assert(degree > 0," the degree of polynomial basis should be atleast 1"); + Kokkos::View shmem_each_team( "stores the size required for each team", nvertices_target); - // Write shmem_each_team(nvertices_target, 0, "stores the size required - // for each team member"); + + MatViewType slice_length("stores slice length of polynomial basis", degree, dim); + + Kokkos::deep_copy(slice_length, 0.0); + + basisSliceLengths(slice_length); + + auto basis_size = basisSize(slice_length); Kokkos::parallel_for( "calculate the size required for scratch for each team", nvertices_target, @@ -34,10 +41,10 @@ Write mls_interpolation(const Reals source_values, size_t total_shared_size = 0; - total_shared_size += ScratchMatView::shmem_size(6, 6) * 4; - total_shared_size += ScratchMatView::shmem_size(6, nsupports) * 2; - total_shared_size += ScratchMatView::shmem_size(nsupports, 6); - total_shared_size += ScratchVecView::shmem_size(6); + total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size) * 4; + total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports) * 2; + total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); + total_shared_size += ScratchVecView::shmem_size(basis_size); total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; total_shared_size += ScratchMatView::shmem_size(nsupports, 2); shmem_each_team(i) = total_shared_size; @@ -71,30 +78,33 @@ Write mls_interpolation(const Reals source_values, int nsupports = end_ptr - start_ptr; - ScratchMatView local_source_points(team.team_scratch(0), nsupports, 2); + ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); int count = -1; for (int j = start_ptr; j < end_ptr; ++j) { count++; auto index = support.supports_idx[j]; local_source_points(count, 0) = source_coordinates[index * dim]; local_source_points(count, 1) = source_coordinates[index * dim + 1]; + if (dim == 3){ + local_source_points(count, 2) = + } } - ScratchMatView lower(team.team_scratch(0), 6, 6); + ScratchMatView lower(team.team_scratch(0), basis_size, basis_size); - ScratchMatView forward_matrix(team.team_scratch(0), 6, 6); + ScratchMatView forward_matrix(team.team_scratch(0), basis_size, basis_size); - ScratchMatView moment_matrix(team.team_scratch(0), 6, 6); + ScratchMatView moment_matrix(team.team_scratch(0), basis_size, basis_size); - ScratchMatView inv_mat(team.team_scratch(0), 6, 6); + ScratchMatView inv_mat(team.team_scratch(0), basis_size, basis_size); - ScratchMatView V(team.team_scratch(0), nsupports, 6); + ScratchMatView V(team.team_scratch(0), nsupports, basis_size); - ScratchMatView Ptphi(team.team_scratch(0), 6, nsupports); + ScratchMatView Ptphi(team.team_scratch(0), basis_size, nsupports); - ScratchMatView resultant_matrix(team.team_scratch(0), 6, nsupports); + ScratchMatView resultant_matrix(team.team_scratch(0), basis_size, nsupports); - ScratchVecView targetMonomialVec(team.team_scratch(0), 6); + ScratchVecView targetMonomialVec(team.team_scratch(0), basis_size); ScratchVecView SupportValues(team.team_scratch(0), nsupports); @@ -102,39 +112,55 @@ Write mls_interpolation(const Reals source_values, ScratchVecView Phi(team.team_scratch(0), nsupports); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 6), [=](int j) { - for (int k = 0; k < 6; ++k) { - lower(j, k) = 0; - forward_matrix(j, k) = 0; - moment_matrix(j, k) = 0; - inv_mat(j, k) = 0; - } - - targetMonomialVec(j) = 0; - for (int k = 0; k < nsupports; ++k) { - resultant_matrix(j, k) = 0; - - Ptphi(j, k) = 0; - } - }); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { - for (int k = 0; k < 6; ++k) { - V(j, k) = 0; - } - - SupportValues(j) = 0; - result(j) = 0; - Phi(j) = 0; - }); + Kokkos::deep_copy(lower, 0.0); + Kokkos::deep_copy(forward_matrix, 0.0); + Kokkos::deep_copy(moment_matrix, 0.0); + Kokkos::deep_copy(inv_matrix, 0.0); + Kokkos::deep_copy(V, 0.0); + Kokkos::deep_copy(Ptphi, 0.0); + Kokkos::deep_copy(resultant_matrix, 0.0); + Kokkos::deep_copy(targetMonomialVec, 0.0); + Kokkos::deep_copy(SupportValues, 0.0); + Kokkos::deep_copy(result, 0.0); + Kokkos::deep_copy(Phi, 0.0); + +// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 6), [=](int j) { +// for (int k = 0; k < 6; ++k) { +// lower(j, k) = 0; +// forward_matrix(j, k) = 0; +// moment_matrix(j, k) = 0; +// inv_mat(j, k) = 0; +// } +// +// targetMonomialVec(j) = 0; +// for (int k = 0; k < nsupports; ++k) { +// resultant_matrix(j, k) = 0; +// +// Ptphi(j, k) = 0; +// } +// }); +// +// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), +// [=](int j) { +// for (int k = 0; k < 6; ++k) { +// V(j, k) = 0; +// } +// +// SupportValues(j) = 0; +// result(j) = 0; +// Phi(j) = 0; +// }); +// Coord target_point; target_point.x = target_coordinates[i * dim]; target_point.y = target_coordinates[i * dim + 1]; - + + if (dim == 3){ + target_point.z = target_coordinates[i * dim + 2]; + } BasisPoly(targetMonomialVec, target_point); Kokkos::parallel_for( diff --git a/src/interpolator/points.hpp b/src/interpolator/points.hpp index b54e27f0..c19f7cb7 100644 --- a/src/interpolator/points.hpp +++ b/src/interpolator/points.hpp @@ -5,7 +5,7 @@ #include struct Coord { - double x, y; + double x, y, z; }; using range_policy = typename Kokkos::RangePolicy<>; @@ -25,6 +25,7 @@ using ScratchVecView = using PointsViewType = Kokkos::View; +using MatViewType = Kokkos::View struct Points { PointsViewType coordinates; }; From 1ddc29ca7448c8035db0fedd85ba4dd0e764098e Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 24 Sep 2024 15:25:21 -0400 Subject: [PATCH 09/66] added test cases --- src/interpolator/MLSCoefficients.hpp | 2 +- src/interpolator/test/CMakeLists.txt | 18 ++++++++++++++++++ src/interpolator/test/test_polybasis.cpp | 21 +++++++++++++++++++++ 3 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 src/interpolator/test/CMakeLists.txt create mode 100644 src/interpolator/test/test_polybasis.cpp diff --git a/src/interpolator/MLSCoefficients.hpp b/src/interpolator/MLSCoefficients.hpp index 5879846b..538a9083 100644 --- a/src/interpolator/MLSCoefficients.hpp +++ b/src/interpolator/MLSCoefficients.hpp @@ -21,7 +21,7 @@ double func(Coord& p) { // finds the slice lengths of the of the polynomial basis KOKKOS_INLINE_FUNCTION -void basisSliceLengths(MatViewType array){ +void basisSliceLengths(MatViewType& array){ dim = array.extent(0); degree = array.extent(1); for (int i = 0; i < dim; ++i){ diff --git a/src/interpolator/test/CMakeLists.txt b/src/interpolator/test/CMakeLists.txt new file mode 100644 index 00000000..27b30c72 --- /dev/null +++ b/src/interpolator/test/CMakeLists.txt @@ -0,0 +1,18 @@ +cmake_minimum_required(VERSION 3.20) + +project(test_1 LANGUAGES CXX) + +find_program(BASH_EXECUTABLE NAMES bash REQUIRED) + +add_executable(test_polybasis test_polybasis.cpp) + +target_link_libraries(test_polybasis PUBLIC pcms::core) + +target_include_directories(test_polybasis PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) + +enable_testing() + +add_test( + NAME test_polybasis + COMMAND $ + ) diff --git a/src/interpolator/test/test_polybasis.cpp b/src/interpolator/test/test_polybasis.cpp new file mode 100644 index 00000000..c016559c --- /dev/null +++ b/src/interpolator/test/test_polybasis.cpp @@ -0,0 +1,21 @@ +#include "MLSInterpolation.hpp" + +int dim = 3; +int degree = 2; +MatViewType d_slice_length("slice array",degree, dim); + +auto host_slice_length = Kokkos::create_mirror_view(d_slice_length); + +Kokkos::deep_copy(d_slice_length, host_slice_length); + +basisSLiceLengths(host_slice_length); + +for (i = 0; i < host_slice_length.extent(0); ++i){ + for (j = 0; j < host_slice_length.extent(1); ++j){ + std::cout << host_slice_length(i, j) << " "; + } + + std::cout << "\n"; +} + + From eda1048dc3535acdba29c8b814fa822c91db037b Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 10 Oct 2024 13:09:31 -0400 Subject: [PATCH 10/66] moved interpolator folder to pcms --- src/pcms/interpolator/CMakeLists.txt | 38 +++ src/pcms/interpolator/MLSCoefficients.hpp | 289 +++++++++++++++++ src/pcms/interpolator/MLSInterpolation.hpp | 213 +++++++++++++ src/pcms/interpolator/adj_search.hpp | 218 +++++++++++++ src/pcms/interpolator/adj_search_dega2.hpp | 294 ++++++++++++++++++ src/pcms/interpolator/linear_interpolant.hpp | 183 +++++++++++ src/pcms/interpolator/multidimarray.hpp | 36 +++ src/pcms/interpolator/points.hpp | 33 ++ src/pcms/interpolator/queue_visited.hpp | 121 +++++++ src/pcms/interpolator/test/CMakeLists.txt | 8 + src/pcms/interpolator/test/config.sh | 17 + src/pcms/interpolator/test/test_polybasis.cpp | 27 ++ 12 files changed, 1477 insertions(+) create mode 100644 src/pcms/interpolator/CMakeLists.txt create mode 100644 src/pcms/interpolator/MLSCoefficients.hpp create mode 100644 src/pcms/interpolator/MLSInterpolation.hpp create mode 100644 src/pcms/interpolator/adj_search.hpp create mode 100644 src/pcms/interpolator/adj_search_dega2.hpp create mode 100644 src/pcms/interpolator/linear_interpolant.hpp create mode 100644 src/pcms/interpolator/multidimarray.hpp create mode 100644 src/pcms/interpolator/points.hpp create mode 100644 src/pcms/interpolator/queue_visited.hpp create mode 100644 src/pcms/interpolator/test/CMakeLists.txt create mode 100644 src/pcms/interpolator/test/config.sh create mode 100644 src/pcms/interpolator/test/test_polybasis.cpp diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt new file mode 100644 index 00000000..a8827030 --- /dev/null +++ b/src/pcms/interpolator/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required(VERSION 3.20) + +project(Interpolator) + +set(HEADER_FILES + adj_search_dega2.hpp + MLSInterpolation.hpp + points.hpp + adj_search.hpp + MLSCoefficients.hpp + queue_visited.hpp + linear_interpolant.hpp + multidimarray.hpp +) + +install(FILES ${HEADER_FILES} DESTINATION include/pcms/interpolator) + + +add_library(interpolator INTERFACE) + +target_include_directories(interpolator INTERFACE + $ + $ + $) + +target_link_libraries(interpolator INTERFACE pcms::core) + +install(TARGETS interpolator + EXPORT interpolatorTargets + INCLUDES DESTINATION include/pcms/interpolator) + +add_subdirectory(test) + +install(EXPORT interpolatorTargets + FILE interpolatorTargets.cmake + NAMESPACE pcms:: + DESTINATION lib/cmake/pcms) + diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp new file mode 100644 index 00000000..47d2ee39 --- /dev/null +++ b/src/pcms/interpolator/MLSCoefficients.hpp @@ -0,0 +1,289 @@ +#ifndef MLS_COEFFICIENTS_HPP +#define MLS_COEFFICIENTS_HPP + +#include + +#include "points.hpp" + +#define PI_M 3.14159265358979323846 + +static constexpr int MAX_DIM = 3; + + +KOKKOS_INLINE_FUNCTION +double func(Coord& p) { + auto x = (p.x - 0.5) * PI_M * 2; + auto y = (p.y - 0.5) * PI_M * 2; + double Z = sin(x) * sin(y); + return Z; +} + +// finds the slice lengths of the of the polynomial basis + +KOKKOS_INLINE_FUNCTION +void basisSliceLengths(MatViewType& array){ + int dim = array.extent(0); + int degree = array.extent(1); + for (int i = 0; i < dim; ++i){ + array(0,i) = 1; + } + + for (int j = 0; j < degree; ++j){ + array(j, 0) = 1; + } + + for (int i = 1; i < degree; ++i){ + for (int j = 1; j < dim; ++j){ + array(i, j) = array(i-1,j) + array(i,j-1); + } + } + +} + +// finds the size of the polynomial basis vector +KOKKOS_INLINE_FUNCTION +int basisSize(const MatViewType& array){ + int sum = 1; + int dim = array.extent(0); + int degree = array.extent(1); + for (int i = 0; i < degree; ++i){ + for (int j = 0; j < dim; ++j){ + sum += array(i, j); + } + } +} + +// evaluates the polynomial basis + +KOKKOS_INLINE_FUNCTION +void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, Coord &p){ + + basis_monomial(0) = 1; + int dim = slice_length.extent(0); + int degree = slice_length.extent(1); + + int prev_col = 0; + int curr_col = 1; + + double point[MAX_DIM]; + point[0] = p.x; + point[1] = p.y; + + if (dim == 3){ + point[2] = p.z; + } + + for (int i = 0; i < degree; ++i){ + int offset = curr_col; + for (int j = 0; j < dim; ++j){ + for (int k = 0; k < slice_length(i,j); ++k){ + basis_monomial(offset + i) = basis_monomial(prev_col +k) * point[j]; + offset += slice_length(i,j); + + } + + prev_col = curr_col; + curr_col = offset; + } + } +} +// polynomial basis vector +//KOKKOS_INLINE_FUNCTION +//void BasisPoly(ScratchVecView basis_monomial, Coord& p1) { +// basis_monomial(0) = 1.0; +// basis_monomial(1) = p1.x; +// basis_monomial(2) = p1.y; +// basis_monomial(3) = p1.x * p1.x; +// basis_monomial(4) = p1.x * p1.y; +// basis_monomial(5) = p1.y * p1.y; +//} +// +// radial basis function +KOKKOS_INLINE_FUNCTION +double rbf(double r_sq, double rho_sq) { + double phi; + double r = sqrt(r_sq); + double rho = sqrt(rho_sq); + double ratio = r / rho; + double limit = 1 - ratio; + if (limit < 0) { + phi = 0; + + } else { + phi = 5 * pow(ratio, 5) + 30 * pow(ratio, 4) + 72 * pow(ratio, 3) + + 82 * pow(ratio, 2) + 36 * ratio + 6; + phi = phi * pow(limit, 6); + } + + return phi; +} + +// create vandermondeMatrix +KOKKOS_INLINE_FUNCTION +void VandermondeMatrix(ScratchMatView& V, const ScratchMatView& local_source_points, + int j, const MatViewType& slice_length) { + int N = local_source_points.extent(0); + int dim = local_source_points.extent(1); + + Coord source_point; + source_point.x = local_source_points(j, 0); + source_point.y = local_source_points(j, 1); + if (dim == 3){ + source_point.z = local_source_points(j, 2); + } + ScratchVecView basis_monomial = Kokkos::subview(V, j, Kokkos::ALL()); + BasisPoly(basis_monomial, slice_length, source_point); +} + +// moment matrix +KOKKOS_INLINE_FUNCTION +void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, + int j) { + int M = V.extent(0); + int N = V.extent(1); + + ScratchVecView vandermonde_mat_row = Kokkos::subview(V, j, Kokkos::ALL()); + for (int k = 0; k < N; k++) { + pt_phi(k, j) = vandermonde_mat_row(k) * Phi(j); + } +} + +// radial basis function vector +KOKKOS_INLINE_FUNCTION +void PhiVector(ScratchVecView& Phi, const Coord& target_point, + const ScratchMatView& local_source_points, int j, + double cuttoff_dis_sq) { + int N = local_source_points.extent(0); + double dx = target_point.x - local_source_points(j, 0); + double dy = target_point.y - local_source_points(j, 1); + double ds_sq = dx * dx + dy * dy; + Phi(j) = rbf(ds_sq, cuttoff_dis_sq); +} + +// matrix matrix multiplication +KOKKOS_INLINE_FUNCTION +void MatMatMul(member_type team, ScratchMatView& moment_matrix, + const ScratchMatView& pt_phi, const ScratchMatView& vandermonde) { + int M = pt_phi.extent(0); + int N = vandermonde.extent(1); + int K = pt_phi.extent(1); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { + double sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, K), + [=](const int k, double& lsum) { + lsum += pt_phi(i, k) * vandermonde(k, j); + }, + sum); + moment_matrix(i, j) = sum; + }); + }); +} + +// Matrix vector multiplication +KOKKOS_INLINE_FUNCTION +void MatVecMul(member_type team, const ScratchVecView& vector, const ScratchMatView& matrix, + ScratchVecView& result) { + int M = matrix.extent(0); + int N = matrix.extent(1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { + double sum = 0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, M), + [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, + sum); + result(i) = sum; + }); + // team.team_barrier(); +} + +// dot product +KOKKOS_INLINE_FUNCTION +void dot_product(member_type team, const ScratchVecView& result_sub, + const ScratchVecView& SupportValues_sub, double& target_value) { + int N = result_sub.extent(0); + for (int j = 0; j < N; ++j) { + target_value += result_sub(j) * SupportValues_sub[j]; + } +} + +// moment matrix +KOKKOS_INLINE_FUNCTION +void PtphiPMatrix(ScratchMatView& moment_matrix, member_type team, + const ScratchMatView& pt_phi, const ScratchMatView& vandermonde) { + int M = pt_phi.extent(0); + int N = vandermonde.extent(1); + int K = pt_phi.extent(1); + + Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { + double sum = 0.0; + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, K), + [=](const int k, double& lsum) { + lsum += pt_phi(i, k) * vandermonde(k, j); + }, + sum); + moment_matrix(i, j) = sum; + }); + }); +} + +// inverse matrix +KOKKOS_INLINE_FUNCTION +void inverse_matrix(member_type team, const ScratchMatView& matrix, + ScratchMatView& lower, ScratchMatView& forward_matrix, + ScratchMatView& solution) { + int N = matrix.extent(0); + + for (int j = 0; j < N; ++j) { + Kokkos::single(Kokkos::PerTeam(team), [=]() { + double sum = 0; + for (int k = 0; k < j; ++k) { + sum += lower(j, k) * lower(j, k); + } + lower(j, j) = sqrt(matrix(j, j) - sum); + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, j + 1, N), [=](int i) { + double inner_sum = 0; + for (int k = 0; k < j; ++k) { + inner_sum += lower(i, k) * lower(j, k); + } + lower(i, j) = (matrix(i, j) - inner_sum) / lower(j, j); + lower(j, i) = lower(i, j); + }); + + team.team_barrier(); + } + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { + forward_matrix(i, i) = 1.0 / lower(i, i); + for (int j = i + 1; j < N; ++j) { + forward_matrix(j, i) = 0.0; // Initialize to zero + for (int k = 0; k < j; ++k) { + forward_matrix(j, i) -= lower(j, k) * forward_matrix(k, i); + } + forward_matrix(j, i) /= lower(j, j); + } + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { + solution(N - 1, i) = forward_matrix(N - 1, i) / lower(N - 1, N - 1); + for (int j = N - 2; j >= 0; --j) { + solution(j, i) = forward_matrix(j, i); + for (int k = j + 1; k < N; ++k) { + solution(j, i) -= lower(j, k) * solution(k, i); + } + solution(j, i) /= lower(j, j); + } + }); +} + +#endif diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp new file mode 100644 index 00000000..f294b33a --- /dev/null +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -0,0 +1,213 @@ +#ifndef MLS_INTERPOLATION_HPP +#define MLS_INTERPOLATION_HPP + +#include "MLSCoefficients.hpp" +#include "adj_search_dega2.hpp" +#include "adj_search.hpp" + +using namespace Omega_h; +using namespace pcms; + +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, Write radii2) { + const auto nvertices_source = source_coordinates.size() / dim; + const auto nvertices_target = target_coordinates.size() / dim; + + // Kokkos::RangePolicy range_policy(1, + // nvertices_target); + + // static_assert(degree > 0," the degree of polynomial basis should be atleast 1"); + + Kokkos::View shmem_each_team( + "stores the size required for each team", nvertices_target); + + MatViewType slice_length("stores slice length of polynomial basis", degree, dim); + + Kokkos::deep_copy(slice_length, 0.0); + + basisSliceLengths(slice_length); + + auto basis_size = basisSize(slice_length); + + Kokkos::parallel_for( + "calculate the size required for scratch for each team", nvertices_target, + KOKKOS_LAMBDA(const int i) { + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + int nsupports = end_ptr - start_ptr; + + size_t total_shared_size = 0; + + total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size) * 4; + total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports) * 2; + total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); + total_shared_size += ScratchVecView::shmem_size(basis_size); + total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; + total_shared_size += ScratchMatView::shmem_size(nsupports, 2); + shmem_each_team(i) = total_shared_size; + }); + + size_t shared_size; + Kokkos::parallel_reduce( + "find_max", nvertices_target, + KOKKOS_LAMBDA(const int i, size_t& max_val_temp) { + if (shmem_each_team(i) > max_val_temp) { + max_val_temp = shmem_each_team(i); + } + }, + Kokkos::Max(shared_size)); + printf("shared size = %d \n", shared_size); + + Write approx_target_values(nvertices_target, 0, + "approximated target values"); + + team_policy tp(nvertices_target, Kokkos::AUTO); + + int scratch_size = tp.scratch_size_max(0); + printf("scratch size is %d \n", scratch_size); + + Kokkos::parallel_for( + "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + + int nsupports = end_ptr - start_ptr; + + ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + if (dim == 3){ + local_source_points(count, 2) = source_coordinates[index * dim + 2]; + } + } + + ScratchMatView lower(team.team_scratch(0), basis_size, basis_size); + + ScratchMatView forward_matrix(team.team_scratch(0), basis_size, basis_size); + + ScratchMatView moment_matrix(team.team_scratch(0), basis_size, basis_size); + + ScratchMatView inv_mat(team.team_scratch(0), basis_size, basis_size); + + ScratchMatView V(team.team_scratch(0), nsupports, basis_size); + + ScratchMatView Ptphi(team.team_scratch(0), basis_size, nsupports); + + ScratchMatView resultant_matrix(team.team_scratch(0), basis_size, nsupports); + + ScratchVecView targetMonomialVec(team.team_scratch(0), basis_size); + + ScratchVecView SupportValues(team.team_scratch(0), nsupports); + + ScratchVecView result(team.team_scratch(0), nsupports); + + ScratchVecView Phi(team.team_scratch(0), nsupports); + + +// Kokkos::deep_copy(lower, 0.0); +// Kokkos::deep_copy(forward_matrix, 0.0); +// Kokkos::deep_copy(moment_matrix, 0.0); +// Kokkos::deep_copy(inv_mat, 0.0); +// Kokkos::deep_copy(V, 0.0); +// Kokkos::deep_copy(Ptphi, 0.0); +// Kokkos::deep_copy(resultant_matrix, 0.0); +// Kokkos::deep_copy(targetMonomialVec, 0.0); +// Kokkos::deep_copy(SupportValues, 0.0); +// Kokkos::deep_copy(result, 0.0); +// Kokkos::deep_copy(Phi, 0.0); +// + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 6), [=](int j) { + for (int k = 0; k < 6; ++k) { + lower(j, k) = 0; + forward_matrix(j, k) = 0; + moment_matrix(j, k) = 0; + inv_mat(j, k) = 0; + } + + targetMonomialVec(j) = 0; + for (int k = 0; k < nsupports; ++k) { + resultant_matrix(j, k) = 0; + + Ptphi(j, k) = 0; + } + }); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + for (int k = 0; k < 6; ++k) { + V(j, k) = 0; + } + + SupportValues(j) = 0; + result(j) = 0; + Phi(j) = 0; + }); + + Coord target_point; + + target_point.x = target_coordinates[i * dim]; + + target_point.y = target_coordinates[i * dim + 1]; + + if (dim == 3){ + target_point.z = target_coordinates[i * dim + 2]; + } + BasisPoly(targetMonomialVec, slice_length, target_point); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { VandermondeMatrix(V, local_source_points, j, slice_length); }); + + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + PhiVector(Phi, target_point, local_source_points, j, radii2[i]); + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { PTphiMatrix(Ptphi, V, Phi, j); }); + + team.team_barrier(); + + MatMatMul(team, moment_matrix, Ptphi, V); + team.team_barrier(); + + inverse_matrix(team, moment_matrix, lower, forward_matrix, inv_mat); + + team.team_barrier(); + + MatMatMul(team, resultant_matrix, inv_mat, Ptphi); + team.team_barrier(); + + MatVecMul(team, targetMonomialVec, resultant_matrix, result); + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { + SupportValues(i) = + source_values[support.supports_idx[start_ptr + i]]; + }); + + double tgt_value = 0; + dot_product(team, result, SupportValues, tgt_value); + if (team.team_rank() == 0) { + approx_target_values[i] = tgt_value; + } + }); + + return approx_target_values; +} + +#endif diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp new file mode 100644 index 00000000..fd5fe60c --- /dev/null +++ b/src/pcms/interpolator/adj_search.hpp @@ -0,0 +1,218 @@ +#ifndef ADJ_SEARCH_HPP +#define ADJ_SEARCH_HPP + +#include + +#include "queue_visited.hpp" + +using namespace Omega_h; + +static constexpr int max_dim = 3; + +// TODO change this into span/mdspan +OMEGA_H_INLINE +Real calculateDistance(const Real* p1, const Real* p2, const int dim) { + Real dx, dy, dz; + dx = p1[0] - p2[0]; + dy = p1[1] - p2[1]; + if (dim != 3) { + dz = 0.0; + } else { + dz = p1[2] - p2[2]; + } + + return dx * dx + dy * dy + dz * dz; +} + +class FindSupports { + private: + Mesh& source_mesh; + Mesh& target_mesh; + + public: + FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) + : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; + + void adjBasedSearch(const Real& cutoffDistance, const Write& supports_ptr, + Write& nSupports, Write& support_idx); +}; + +void FindSupports::adjBasedSearch(const Real& cutoffDistance, + const Write& supports_ptr, + Write& nSupports, + Write& support_idx) { + // Source Mesh Info + + const auto& sourcePoints_coords = source_mesh.coords(); + const auto nvertices_source = source_mesh.nverts(); + const auto dim = source_mesh.dim(); + + // Target Mesh Info + const auto& targetPoints_coords = target_mesh.coords(); + const auto nvertices_target = target_mesh.nverts(); + + // CSR data structure of adjacent vertex information of each source vertex + const auto& vert2vert = source_mesh.ask_star(VERT); + const auto& v2v_ptr = vert2vert.a2ab; + const auto& v2v_data = vert2vert.ab2b; + + // CSR data structure of vertex information of each cell in source mesh + + // CSR data structure of adjacent vertex information of each source vertex + // dim == 2; ask vertices of tri (3 vertices for each tri) & if dim ==3; ask + // vertices of tetrahedron (4 vertices for each tet) + const auto& cells2verts = source_mesh.ask_verts_of(dim); + + Kokkos::View target_points("test_points", nvertices_target); + + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + target_points(i, 0) = targetPoints_coords[i * dim]; + target_points(i, 1) = targetPoints_coords[i * dim + 1]; + }); + Kokkos::fence(); + pcms::GridPointSearch search_cell(source_mesh, 10, 10); + + // get the cell id for each target point + auto results = search_cell(target_points); + + parallel_for( + nvertices_target, + OMEGA_H_LAMBDA(const LO id) { + queue queue; + track visited; + + LO source_cell_id = results(id).tri_id; + + const LO num_verts_in_dim = dim + 1; + + LO start_ptr = source_cell_id * num_verts_in_dim; + + LO end_ptr = start_ptr + num_verts_in_dim; + + Real target_coords[max_dim]; + + Real support_coords[max_dim]; + + for (LO k = 0; k < dim; ++k) { + target_coords[k] = target_points(id, k); + } + + LO start_counter; + + if (support_idx.size() > 0) { + start_counter = supports_ptr[id]; + } + + int count = 0; + // Initialize queue by pushing the vertices in the neighborhood of the + // given target point + + for (LO i = start_ptr; i < end_ptr; ++i) { + LO vert_id = cells2verts[i]; + visited.push_back(vert_id); + + for (LO k = 0; k < dim; ++k) { + support_coords[k] = sourcePoints_coords[vert_id * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + queue.push_back(vert_id); + if (support_idx.size() > 0) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = vert_id; + } + } + } + + while (!queue.isEmpty()) { + LO currentVertex = queue.front(); + queue.pop_front(); + LO start = v2v_ptr[currentVertex]; + LO end = v2v_ptr[currentVertex + 1]; + + for (LO i = start; i < end; ++i) { + auto neighborIndex = v2v_data[i]; + + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function + + if (visited.notVisited(neighborIndex)) { + visited.push_back(neighborIndex); + for (int k = 0; k < dim; ++k) { + support_coords[k] = + sourcePoints_coords[neighborIndex * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + + if (dist <= cutoffDistance) { + count++; + queue.push_back(neighborIndex); + if (support_idx.size() > 0) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = neighborIndex; + } + } + } + } + } // end of while loop + + nSupports[id] = count; + }, + "count the number of supports in each target point"); +} + +struct SupportResults { + Write supports_ptr; + Write supports_idx; +}; + +SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, + Real& cutoffDistance) { + SupportResults support; + + FindSupports search(source_mesh, target_mesh); + + LO nvertices_source = source_mesh.nverts(); + + LO nvertices_target = target_mesh.nverts(); + + Write nSupports(nvertices_target, 0, + "number of supports in each target vertex"); + + search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, + support.supports_idx); + + Kokkos::fence(); + + support.supports_ptr = + Write(nvertices_target + 1, 0, + "number of support source vertices in CSR format"); + + LO total_supports = 0; + + Kokkos::parallel_scan( + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { + update += nSupports[j]; + if (final) { + support.supports_ptr[j + 1] = update; + } + }, + total_supports); + + Kokkos::fence(); + + support.supports_idx = Write( + total_supports, 0, "index of source supports of each target node"); + + search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, + support.supports_idx); + + return support; +} + +#endif diff --git a/src/pcms/interpolator/adj_search_dega2.hpp b/src/pcms/interpolator/adj_search_dega2.hpp new file mode 100644 index 00000000..26f2edda --- /dev/null +++ b/src/pcms/interpolator/adj_search_dega2.hpp @@ -0,0 +1,294 @@ +#ifndef ADJ_SEARCH_HPP +#define ADJ_SEARCH_HPP + +#include +#include + +#include "queue_visited.hpp" + +using namespace Omega_h; + +static constexpr int max_dim = 3; + +// TODO change this into span/mdspan +OMEGA_H_INLINE +Real calculateDistance(const Real* p1, const Real* p2, const int dim) { + Real dx, dy, dz; + dx = p1[0] - p2[0]; + dy = p1[1] - p2[1]; + if (dim != 3) { + dz = 0.0; + } else { + dz = p1[2] - p2[2]; + } + + return dx * dx + dy * dy + dz * dz; +} + +class FindSupports { + private: + Mesh& mesh; + + public: + FindSupports(Mesh& mesh_) : mesh(mesh_) {}; + + void adjBasedSearch(Write& supports_ptr, Write& nSupports, + Write& support_idx, Write& radii2, + bool is_build_csr_call); +}; + +void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, + Write& support_idx, Write& radii2, + bool is_build_csr_call) { + // Mesh Info + LO min_num_supports = 20; // TODO: make this an input parameter + const auto& mesh_coords = mesh.coords(); + const auto& nvertices = mesh.nverts(); + const auto& dim = mesh.dim(); + const auto& nfaces = mesh.nfaces(); + + // CSR data structure of adjacent cell information of each vertex in a mesh + const auto& nodes2faces = mesh.ask_up(VERT, FACE); + const auto& n2f_ptr = nodes2faces.a2ab; + const auto& n2f_data = nodes2faces.ab2b; + const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; + + Write cell_centroids( + dim * nfaces, 0, + "stores coordinates of cell centroid of each tri element"); + + parallel_for( + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(mesh_coords, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = dim * id; + cell_centroids[index] = centroid[0]; + cell_centroids[index + 1] = centroid[1]; + }); + // * Got the adj data and cell centroids + + parallel_for( + nvertices, // for each target vertex which is a node for this case + OMEGA_H_LAMBDA(const LO id) { + queue queue; + track visited; + const LO num_verts_in_dim = dim + 1; + Real target_coords[max_dim]; + Real support_coords[max_dim]; + Real cutoffDistance = radii2[id]; // squared radii of the supports + + //? copying the target vertex coordinates + for (LO k = 0; k < dim; ++k) { + target_coords[k] = mesh_coords[id * dim + k]; + } + + LO start_counter; // start of support idx for the current target vertex + + // not being done in the first call + // where the support_idx start for the current target vertex + if (!is_build_csr_call) { + start_counter = supports_ptr[id]; // start support location for the + // current target vertex + } + LO start_ptr = + n2f_ptr[id]; // start loc of the adj cells of the target node + LO end_ptr = + n2f_ptr[id + 1]; // end loc of the adj cells of the target node + + int count = 0; // number of supports for the current target vertex + // Initialize queue by pushing the cells in the neighborhood of the + // given target point + + for (LO i = start_ptr; i < end_ptr; + ++i) { // loop over adj cells to the target vertex + LO cell_id = n2f_data[i]; + visited.push_back(cell_id); // cell added to the visited list + + for (LO k = 0; k < dim; ++k) { // support vertex coordinates are the + // centroid of the cell + support_coords[k] = cell_centroids[cell_id * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + queue.push_back(cell_id); + if (!is_build_csr_call) { // not being done in the first call + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = + cell_id; // add the support cell to the support_idx + } // end of support_idx check + } // end of distance check + } // end of loop over adj cells to the target vertex + + // loops over the queued cells from the neighborhood of the target + // vertex + while (!queue.isEmpty()) { // ? can queue be empty? + LO currentCell = queue.front(); + queue.pop_front(); + LO start = currentCell * + num_verts_in_dim; // start vert id of the current cell + LO end = start + num_verts_in_dim; // end vert id of the current cell + + for (LO i = start; i < end; + ++i) { // loop over the vertices of the current cell + LO current_vert_id = faces2nodes[i]; + LO start_ptr_current_vert = + n2f_ptr[current_vert_id]; // start loc of adj cells to current + // vertex + LO end_ptr_vert_current_vert = + n2f_ptr[current_vert_id + + 1]; // end loc of adj cells to current vertex + for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; + ++j) { // loop over adj cells to the current vertex + auto neighbor_cell_index = n2f_data[j]; // current cell + + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function + + if (visited.notVisited(neighbor_cell_index)) { + visited.push_back(neighbor_cell_index); + for (int k = 0; k < dim; ++k) { + support_coords[k] = + cell_centroids[neighbor_cell_index * dim + k]; + } + + Real dist = + calculateDistance(target_coords, support_coords, dim); + + if (dist <= cutoffDistance) { + count++; + queue.push_back(neighbor_cell_index); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = + neighbor_cell_index; + } // end of support_idx check + } // end of distance check + } // end of not visited check + } // end of loop over adj cells to the current vertex + } // end of loop over nodes + + } // end of while loop + + nSupports[id] = count; + }, // end of lambda + "count the number of supports in each target point"); +} + +struct SupportResults { + Write supports_ptr; + Write supports_idx; + Write radii2; // squared radii of the supports +}; + +SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance) { + LO min_support = 12; + SupportResults support; + + FindSupports search(mesh); + + LO nvertices_target = mesh.nverts(); + + Write nSupports(nvertices_target, 0, + "number of supports in each target vertex"); + + printf("Inside searchNeighbors 1\n"); + support.radii2 = Write(nvertices_target, cutoffDistance, + "squared radii of the supports"); + printf("cut off distance: %f\n", cutoffDistance); + // this call gets the number of supports for each target vertex: nSupports and + // the squared radii of the supports (with might be increased in the search to + // have enough supports) + int r_adjust_loop = 0; + while (true) { // until the number of minimum support is met + // find maximum radius + Real max_radius = 0.0; + Kokkos::parallel_reduce("find max radius", + nvertices_target, + OMEGA_H_LAMBDA(const LO i, Real& local_max) { + local_max = (support.radii2[i] > local_max) ? support.radii2[i] : local_max; + }, + Kokkos::Max(max_radius)); + printf("Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); + + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + support.radii2, true); + + //printf("Inside searchNeighbors 2\n"); + Kokkos::fence(); + // * find the minimum number of supports + LO min_nSupports = 0; + Kokkos::parallel_reduce( "find min number of supports", + nvertices_target, + OMEGA_H_LAMBDA(const LO i, LO& local_min) { + local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; + }, + Kokkos::Min(min_nSupports)); + + printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, r_adjust_loop, max_radius); + r_adjust_loop++; + + if (min_nSupports >= min_support) { + break; + } + + Kokkos::fence(); + + // * update radius if nSupport is less that min_support + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + if (nSupports[i] < min_support) { + Real factor = Real(min_support) / nSupports[i]; + factor = (nSupports[i] == 0 || factor > 3.0) ? 3.0 : factor; + support.radii2[i] *= factor; + //nSupports[i] = 0; // ? might not be needed + } + }); + + } + + printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); + + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + support.radii2, true); + + // offset array for the supports of each target vertex + support.supports_ptr = + Write(nvertices_target + 1, 0, + "number of support source vertices in CSR format"); + + LO total_supports = 0; + + // get the total number of supports and fill the offset array + Kokkos::parallel_scan( + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { // what final does? + // OMEGA_H_CHECK(nSupports[j] >= 15); + update += nSupports[j]; + if (final) { + support.supports_ptr[j + 1] = update; + } + }, + total_supports); + + printf("Inside searchNeighbors 3\n"); + Kokkos::fence(); + + support.supports_idx = Write( + total_supports, 0, "index of source supports of each target node"); + + printf("total_supports: %d\n", total_supports); + // second call to get the actual support indices + // now sizes of support.supports_ptr and support.supports_idx are known and > + // 0 + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + support.radii2, false); + + return support; +} + +#endif diff --git a/src/pcms/interpolator/linear_interpolant.hpp b/src/pcms/interpolator/linear_interpolant.hpp new file mode 100644 index 00000000..5abde69f --- /dev/null +++ b/src/pcms/interpolator/linear_interpolant.hpp @@ -0,0 +1,183 @@ + +#ifndef INTERPOLANT_HPP +#define INTERPOLANT_HPP + + +#include +#include "multidimarray.hpp" +#define MAX_DIM 10 + + +KOKKOS_INLINE_FUNCTION +void find_indices(const IntVecView& num_bins, const RealVecView& range, + const RealVecView& point , int* indices){ + int dim = point.extent(0); + // IntVecView indices("parametric coordinate", dim); + for (int i = 0; i < dim; ++i){ + int id = i * 2; + double length = range(id + 1) - range(id); + double dlen = length/num_bins(i); + indices[i] = ((point(i) - range(id)) / dlen); + + } + + +} + + +KOKKOS_INLINE_FUNCTION +void find_limits(const IntVecView& num_bins, const RealVecView& range, const int* indices, double* limits){ + int dim = num_bins.extent(0); + // RealVecView limits("limits", 2*dim); + for (int i = 0; i < dim; ++i){ + int ptr = i * 2; + double dlen = (range(ptr+1) - range(ptr))/num_bins(i); + limits[ptr] = indices[i] * dlen; + limits[ptr+1] = limits[ptr] + dlen; + } +} + + + +KOKKOS_INLINE_FUNCTION +void evaluateParametricCoord(const RealVecView& point, const double* limits , double* parametric_coord){ + int dim = point.extent(0); + // RealVecView parametric_coord("parametric coordinate", dim); + for (int i = 0; i < dim; ++i){ + int index = i * 2; + parametric_coord[i] = (point(i) - limits[index])/(limits[index+1] - limits[index]); + + } +} + + + +KOKKOS_INLINE_FUNCTION +void basis_function(const RealVecView& parametric_coord, double* linear_basis_each_dir){ + int dim = parametric_coord.extent(0); + // RealVecView linear_basis_each_dir("basis function each direction", 2*dim); + for (int i = 0; i < dim; ++i){ + int index = i * 2; + + linear_basis_each_dir[index] = 1 - parametric_coord(i); + + linear_basis_each_dir[index + 1] = parametric_coord(i); + + + } + +} + + +KOKKOS_INLINE_FUNCTION +double linear_interpolant(const IntVecView& dimensions, const double* linear_basis_each_dir, const IntVecView& indices, const RealVecView& values){ + int dim = dimensions.extent(0); + double sum = 0; + int ids[MAX_DIM]; + + int array_size = 1 << dim; + for (int i = 0; i < array_size; ++i){ + double temp = 1.0; + for (int j = 0; j < dim; ++j){ + int index = 2*j; + int id_left = indices(j); + int id_right = id_left + 1; + if (i & (1 << j)){ + temp *= linear_basis_each_dir[index+1]; + ids[j] = id_right; + } else { + temp *= linear_basis_each_dir[index]; + ids[j] = id_left; + } + } + + int idx = calculateIndex(dimensions, ids); + double corner_values = values(idx); + + sum += temp*corner_values; + } + return sum; +} + + + +class RegularGridInterpolator { + private: + const RealMatView parametric_coords; + const RealVecView values; + const IntMatView indices; + const IntVecView dimensions; + + public: + RegularGridInterpolator(const RealMatView& parametric_coords_, + const RealVecView& values_, const IntMatView& indices_, const IntVecView& dimensions_) + : parametric_coords(parametric_coords_), values(values_), indices(indices_), dimensions(dimensions_) {}; + +RealVecView linear_interpolation() { + int dim = dimensions.extent(0); + int N = parametric_coords.extent(0); + RealVecView interpolated_values("approximated values", N); + auto parametric_coords_ = parametric_coords; + auto dimensions_ = dimensions; + auto values_ = values; + auto indices_ = indices; + Kokkos::parallel_for("linear interpolation function",N, KOKKOS_LAMBDA(int j){ + double linear_basis_each_dir[MAX_DIM] = {0.0}; + auto parametric_coord_each_point = Kokkos::subview(parametric_coords_, j, Kokkos::ALL()); + auto index_each_point = Kokkos::subview(indices_, j , Kokkos::ALL()); + basis_function(parametric_coord_each_point, linear_basis_each_dir); + auto approx_value = linear_interpolant(dimensions_, linear_basis_each_dir,index_each_point, values_); + interpolated_values(j) = approx_value; + }); + + return interpolated_values; + } +}; + + +struct Result{ + IntMatView indices_pts; + RealMatView parametric_coords; +}; + +Result parametric_indices(const RealMatView& points, const IntVecView& num_bins, const RealVecView& range){ + + int dim = num_bins.extent(0); + Result result; + int N = points.extent(0); + result.indices_pts = IntMatView("indices",N,dim); + result.parametric_coords = RealMatView("parametric_coordinates",N, dim); + + Kokkos::parallel_for(N, KOKKOS_LAMBDA(int j){ + int indcs[MAX_DIM] = {0}; + double limits[MAX_DIM] = {0.0}; + double parametric_coord_each[MAX_DIM] = {0.0}; + auto point_coord = Kokkos::subview(points, j , Kokkos::ALL()); + find_indices(num_bins, range, point_coord, indcs); + find_limits(num_bins, range, indcs, limits); + evaluateParametricCoord( point_coord, limits, parametric_coord_each); + for (int i = 0; i < dim; ++i){ + result.indices_pts(j,i) = indcs[i]; + result.parametric_coords(j,i) = parametric_coord_each[i]; + } + + }); + + return result; + +} + +KOKKOS_INLINE_FUNCTION +double test_function(double* coord){ + double fun_value = 0; + int dim = 5; + for (int i = 0; i < dim; ++i){ + fun_value += coord[i]; + } + return fun_value; + } + +#endif + + + diff --git a/src/pcms/interpolator/multidimarray.hpp b/src/pcms/interpolator/multidimarray.hpp new file mode 100644 index 00000000..04b6fac1 --- /dev/null +++ b/src/pcms/interpolator/multidimarray.hpp @@ -0,0 +1,36 @@ +#ifndef MULTIDIMARRAY_HPP +#define MULTIDIMARRAY_HPP + +#include + + +using RealMatView = Kokkos::View; +using IntMatView = Kokkos::View; +using RealVecView = Kokkos::View; +using IntVecView = Kokkos::View; + +using HostIntVecView = Kokkos::View; + +KOKKOS_INLINE_FUNCTION +int calculateIndex(const IntVecView& dimensions, const int* indices) { + + int dim = dimensions.extent(0); + int index = 0; + int multiplier = 1; + for (int i = dim - 1; i >= 0; --i) { + index += indices[i] * multiplier; + multiplier *= dimensions(i); + } + return index; +} + + +int calculateTotalSize(const HostIntVecView& dimensions){ + int dim = dimensions.extent(0); + int size = 1; + for (int i = 0; i < dim; ++i){ + size *= dimensions(i); + } + return size; +} +#endif diff --git a/src/pcms/interpolator/points.hpp b/src/pcms/interpolator/points.hpp new file mode 100644 index 00000000..7bc9d019 --- /dev/null +++ b/src/pcms/interpolator/points.hpp @@ -0,0 +1,33 @@ +#ifndef POINTS_HPP +#define POINTS_HPP + +#include +#include + +struct Coord { + double x, y, z; +}; + +using range_policy = typename Kokkos::RangePolicy<>; +using team_policy = typename Kokkos::TeamPolicy<>; +using member_type = typename Kokkos::TeamPolicy<>::member_type; + +// alias for scratch view +using ScratchSpace = + typename Kokkos::DefaultExecutionSpace::scratch_memory_space; + +using ScratchMatView = + typename Kokkos::View>; +using ScratchVecView = + typename Kokkos::View>; + +using PointsViewType = Kokkos::View; + +using MatViewType = Kokkos::View; +struct Points { + PointsViewType coordinates; +}; + +#endif diff --git a/src/pcms/interpolator/queue_visited.hpp b/src/pcms/interpolator/queue_visited.hpp new file mode 100644 index 00000000..e236f6d6 --- /dev/null +++ b/src/pcms/interpolator/queue_visited.hpp @@ -0,0 +1,121 @@ +#ifndef QUEUE_VISITED_HPP +#define QUEUE_VISITED_HPP + +#include +#include +#include +#include +#include +#include +#define MAX_SIZE_QUEUE 500 +#define MAX_SIZE_TRACK 800 +using namespace std; +using namespace Omega_h; + +class queue { + private: + LO queue_array[MAX_SIZE_QUEUE]; + int first = 0, last = -1, count = 0; + + public: + OMEGA_H_INLINE + queue() {} + + OMEGA_H_INLINE + ~queue() {} + + OMEGA_H_INLINE + void push_back(const int& item); + + OMEGA_H_INLINE + void pop_front(); + + OMEGA_H_INLINE + int front(); + + OMEGA_H_INLINE + bool isEmpty() const; + + OMEGA_H_INLINE + bool isFull() const; +}; + +class track { + private: + LO tracking_array[MAX_SIZE_TRACK]; + int first = 0, last = -1, count = 0; + + public: + OMEGA_H_INLINE + track() {} + + OMEGA_H_INLINE + ~track() {} + + OMEGA_H_INLINE + void push_back(const int& item); + + OMEGA_H_INLINE + int size(); + + OMEGA_H_INLINE + bool notVisited(const int& item); +}; + +OMEGA_H_INLINE +void queue::push_back(const int& item) { + if (count == MAX_SIZE_QUEUE) { + printf("queue is full %d\n", count); + return; + } + last = (last + 1) % MAX_SIZE_QUEUE; + queue_array[last] = item; + count++; +} + +OMEGA_H_INLINE +void queue::pop_front() { + if (count == 0) { + printf("queue is empty\n"); + return; + } + first = (first + 1) % MAX_SIZE_QUEUE; + count--; +} + +OMEGA_H_INLINE +int queue::front() { return queue_array[first]; } + +OMEGA_H_INLINE +bool queue::isEmpty() const { return count == 0; } + +OMEGA_H_INLINE +bool queue::isFull() const { return count == MAX_SIZE_QUEUE; } + +OMEGA_H_INLINE +void track::push_back(const int& item) { + if (count == MAX_SIZE_TRACK) { + printf("track is full %d\n", count); + return; + } + last = (last + 1) % MAX_SIZE_TRACK; + tracking_array[last] = item; + count++; +} + +OMEGA_H_INLINE +bool track::notVisited(const int& item) { + int id; + for (int i = 0; i < count; ++i) { + id = (first + i) % MAX_SIZE_TRACK; + if (tracking_array[id] == item) { + return false; + } + } + return true; +} + +OMEGA_H_INLINE +int track::size() { return count; } + +#endif diff --git a/src/pcms/interpolator/test/CMakeLists.txt b/src/pcms/interpolator/test/CMakeLists.txt new file mode 100644 index 00000000..3233bb6c --- /dev/null +++ b/src/pcms/interpolator/test/CMakeLists.txt @@ -0,0 +1,8 @@ +project(mfem-cmake-project C CXX) +cmake_minimum_required(VERSION 3.20) + +find_package(Omega_h 10 REQUIRED) + +find_package(pcms REQUIRED) +add_executable(test test_polybasis.cpp) +target_link_libraries(test PUBLIC Omega_h::omega_h pcms::pcms) diff --git a/src/pcms/interpolator/test/config.sh b/src/pcms/interpolator/test/config.sh new file mode 100644 index 00000000..dbd9c4c0 --- /dev/null +++ b/src/pcms/interpolator/test/config.sh @@ -0,0 +1,17 @@ +export NVCC_WRAPPER_DEFAULT_COMPILER=`which mpicxx` +export DEVICE_ARCH=ADA89 + +DEPENDENCY_DIR="${DEPENDENCY_DIR:-/lore/mersoj2/laces-software/build}" +DEVICE_ARCH="${DEVICE_ARCH:-ADA89}" + +cmake -S . -B build \ + -DCMAKE_CXX_COMPILER=`which mpicxx` \ + -DCMAKE_C_COMPILER=`which mpicc` \ + -DOmega_h_USE_Kokkos=ON \ + -DOmega_h_USE_CUDA=ON \ + -DOmega_h_DIR=$DEPENDENCY_DIR/${DEVICE_ARCH}/omega_h/install/lib/cmake/ \ + -DKokkos_ROOT=$DEPENDENCY_DIR/${DEVICE_ARCH}/kokkos/install/ \ + -Dpcms_ROOT=$DEPENDENCY_DIR/${DEVICE_ARCH}/pcms/install/ \ + -Dperfstubs_DIR=$DEPENDENCY_DIR/perfstubs/install/lib64/cmake/ \ + -DADIOS2_ROOT=$DEPENDENCY_DIR/adios2/install/ \ + -DCMAKE_BUILD_TYPE=Debug diff --git a/src/pcms/interpolator/test/test_polybasis.cpp b/src/pcms/interpolator/test/test_polybasis.cpp new file mode 100644 index 00000000..131b4663 --- /dev/null +++ b/src/pcms/interpolator/test/test_polybasis.cpp @@ -0,0 +1,27 @@ +#include "pcms/interpolator/MLSInterpolation.hpp" + +int dim = 3; +int degree = 2; +MatViewType d_slice_length("slice array",degree, dim); + +auto host_slice_length = Kokkos::create_mirror_view(d_slice_length); + +Kokkos::deep_copy(host_slice_length, 0) + +Kokkos::deep_copy(d_slice_length, host_slice_length); + + +Kokkos::parallel_for(1, KOKKOS_LAMBDA (int k) { + int m = k; + basisSliceLengths(d_slice_length); +}); + +Kokkos::deep_copy(h_slice_length, d_slice_length); + +for (int i = 0; i < host_slice_length.extent(0); ++i){ + for (int j = 0; j < host_slice_length.extent(1); ++j){ + std::cout << host_slice_length(i, j) << " "; + } + + std::cout << "\n"; +} From b90cbaf4131408a01625128c9144c67f4d5a26a7 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 10 Oct 2024 17:04:18 -0400 Subject: [PATCH 11/66] modified to incorporate interpolator --- src/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9880e908..a9d9bfd1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -117,7 +117,9 @@ if(PCMS_ENABLE_Fortran) target_link_libraries(pcms_pcms INTERFACE pcms::fortranapi) endif() -add_subdirectory(interpolator) + +add_subdirectory(pcms/interpolator) + install( TARGETS pcms_pcms EXPORT pcms-targets From 5cd58697deafbbf6be8d2e2775c7fcc4326deafb Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 10 Oct 2024 17:05:25 -0400 Subject: [PATCH 12/66] updated the file --- src/pcms/interpolator/CMakeLists.txt | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index a8827030..6c524e42 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -15,21 +15,20 @@ set(HEADER_FILES install(FILES ${HEADER_FILES} DESTINATION include/pcms/interpolator) +add_library(pcms_interpolator INTERFACE) -add_library(interpolator INTERFACE) +target_link_libraries(pcms_interpolator INTERFACE pcms::core) -target_include_directories(interpolator INTERFACE +target_include_directories(pcms_interpolator INTERFACE $ - $ - $) + $ + $) -target_link_libraries(interpolator INTERFACE pcms::core) -install(TARGETS interpolator +install(TARGETS pcms_interpolator EXPORT interpolatorTargets INCLUDES DESTINATION include/pcms/interpolator) -add_subdirectory(test) install(EXPORT interpolatorTargets FILE interpolatorTargets.cmake From 92d018321d58013b2455b541a3e44545cf891b66 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 10 Oct 2024 17:48:19 -0400 Subject: [PATCH 13/66] deleted unneceesary files --- src/interpolator/CMakeLists.txt | 38 --- src/interpolator/MLSCoefficients.hpp | 287 ----------------- src/interpolator/MLSInterpolation.hpp | 213 ------------- src/interpolator/adj_search.hpp | 218 ------------- src/interpolator/adj_search_dega2.hpp | 294 ------------------ src/interpolator/linear_interpolant.hpp | 183 ----------- src/interpolator/multidimarray.hpp | 36 --- src/interpolator/points.hpp | 33 -- src/interpolator/queue_visited.hpp | 121 ------- src/interpolator/test/CMakeLists.txt | 18 -- src/interpolator/test/test_polybasis.cpp | 21 -- src/pcms/interpolator/test/CMakeLists.txt | 8 - src/pcms/interpolator/test/config.sh | 17 - src/pcms/interpolator/test/test_polybasis.cpp | 27 -- 14 files changed, 1514 deletions(-) delete mode 100644 src/interpolator/CMakeLists.txt delete mode 100644 src/interpolator/MLSCoefficients.hpp delete mode 100644 src/interpolator/MLSInterpolation.hpp delete mode 100644 src/interpolator/adj_search.hpp delete mode 100644 src/interpolator/adj_search_dega2.hpp delete mode 100644 src/interpolator/linear_interpolant.hpp delete mode 100644 src/interpolator/multidimarray.hpp delete mode 100644 src/interpolator/points.hpp delete mode 100644 src/interpolator/queue_visited.hpp delete mode 100644 src/interpolator/test/CMakeLists.txt delete mode 100644 src/interpolator/test/test_polybasis.cpp delete mode 100644 src/pcms/interpolator/test/CMakeLists.txt delete mode 100644 src/pcms/interpolator/test/config.sh delete mode 100644 src/pcms/interpolator/test/test_polybasis.cpp diff --git a/src/interpolator/CMakeLists.txt b/src/interpolator/CMakeLists.txt deleted file mode 100644 index ee898661..00000000 --- a/src/interpolator/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -cmake_minimum_required(VERSION 3.20) - -project(Interpolator) - -set(HEADER_FILES - adj_search_dega2.hpp - MLSInterpolation.hpp - points.hpp - adj_search.hpp - MLSCoefficients.hpp - queue_visited.hpp - linear_interpolant.hpp - multidimarray.hpp -) - -install(FILES ${HEADER_FILES} DESTINATION include/pcms/interpolator) - - -add_library(interpolator INTERFACE) - -target_include_directories(interpolator INTERFACE - $ - $ - $) - -target_link_libraries(interpolator INTERFACE pcms::core) - -install(TARGETS interpolator - EXPORT interpolatorTargets - INCLUDES DESTINATION include/pcms/interpolator) - - - -install(EXPORT interpolatorTargets - FILE interpolatorTargets.cmake - NAMESPACE pcms:: - DESTINATION lib/cmake/pcms) - diff --git a/src/interpolator/MLSCoefficients.hpp b/src/interpolator/MLSCoefficients.hpp deleted file mode 100644 index 538a9083..00000000 --- a/src/interpolator/MLSCoefficients.hpp +++ /dev/null @@ -1,287 +0,0 @@ -#ifndef MLS_COEFFICIENTS_HPP -#define MLS_COEFFICIENTS_HPP - -#include - -#include "points.hpp" - -#define PI_M 3.14159265358979323846 - -static constexpr MAX_DIM = 3; - - -KOKKOS_INLINE_FUNCTION -double func(Coord& p) { - auto x = (p.x - 0.5) * PI_M * 2; - auto y = (p.y - 0.5) * PI_M * 2; - double Z = sin(x) * sin(y); - return Z; -} - -// finds the slice lengths of the of the polynomial basis - -KOKKOS_INLINE_FUNCTION -void basisSliceLengths(MatViewType& array){ - dim = array.extent(0); - degree = array.extent(1); - for (int i = 0; i < dim; ++i){ - array(0,i) = 1; - } - - for (int j = 0; j < degree; ++j){ - array(j, 0) = 1; - } - - for (int i = 1; i < degree; ++i){ - for (int j = 1; j < dim; ++j){ - array(i, j) = array(i-1,j) + array(i,j-1); - } - } - -} - -// finds the size of the polynomial basis vector -KOKKOS_INLINE_FUNCTION -int basisSize(const MatViewType& array){ - int sum = 1; - for (int i = 1; i < degree; ++i){ - for (int j = 1; j < dim; ++j){ - sum += array(i, j); - } - } -} - -// evaluates the polynomial basis - -KOKKOS_INLINE_FUNCTION -void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, Coord &p){ - - basis_monomial(0) = 1; - dim = slice_length.extent(0); - degree = slice_length.extent(1); - - int prev_col = 0; - int curr_col = 1; - - double point[MAX_DIM]; - point[0] = p.x; - point[1] = p.y; - - if (dim == 3){ - point[2] = p.z; - } - - for (int i = 0; i < degree; ++i){ - int offset = curr_col; - for (int j = 0; j < dim; ++j){ - for (k = 0; k < slice_length(i,j); ++k){ - basis_monomial(offset + i) = basis_polynomial(prev_col +k) * point[j]; - loc_offset += slice_length(i,j); - - } - - prev_col = curr_col; - curr_col = offset; - } - } -} -// polynomial basis vector -//KOKKOS_INLINE_FUNCTION -//void BasisPoly(ScratchVecView basis_monomial, Coord& p1) { -// basis_monomial(0) = 1.0; -// basis_monomial(1) = p1.x; -// basis_monomial(2) = p1.y; -// basis_monomial(3) = p1.x * p1.x; -// basis_monomial(4) = p1.x * p1.y; -// basis_monomial(5) = p1.y * p1.y; -//} -// -// radial basis function -KOKKOS_INLINE_FUNCTION -double rbf(double r_sq, double rho_sq) { - double phi; - double r = sqrt(r_sq); - double rho = sqrt(rho_sq); - double ratio = r / rho; - double limit = 1 - ratio; - if (limit < 0) { - phi = 0; - - } else { - phi = 5 * pow(ratio, 5) + 30 * pow(ratio, 4) + 72 * pow(ratio, 3) + - 82 * pow(ratio, 2) + 36 * ratio + 6; - phi = phi * pow(limit, 6); - } - - return phi; -} - -// create vandermondeMatrix -KOKKOS_INLINE_FUNCTION -void VandermondeMatrix(ScratchMatView V, ScratchMatView local_source_points, - int j) { - int N = local_source_points.extent(0); - int dim = local_source_points.extent(1); - - Coord source_point; - source_point.x = local_source_points(j, 0); - source_point.y = local_source_points(j, 1); - if (dim == 3){ - source_point.z = local_source_points(j, 2); - } - ScratchVecView basis_monomial = Kokkos::subview(V, j, Kokkos::ALL()); - BasisPoly(basis_monomial, source_point); -} - -// moment matrix -KOKKOS_INLINE_FUNCTION -void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, - int j) { - int M = V.extent(0); - int N = V.extent(1); - - ScratchVecView vandermonde_mat_row = Kokkos::subview(V, j, Kokkos::ALL()); - for (int k = 0; k < N; k++) { - pt_phi(k, j) = vandermonde_mat_row(k) * Phi(j); - } -} - -// radial basis function vector -KOKKOS_INLINE_FUNCTION -void PhiVector(ScratchVecView Phi, Coord target_point, - ScratchMatView local_source_points, int j, - double cuttoff_dis_sq) { - int N = local_source_points.extent(0); - double dx = target_point.x - local_source_points(j, 0); - double dy = target_point.y - local_source_points(j, 1); - double ds_sq = dx * dx + dy * dy; - Phi(j) = rbf(ds_sq, cuttoff_dis_sq); -} - -// matrix matrix multiplication -KOKKOS_INLINE_FUNCTION -void MatMatMul(member_type team, ScratchMatView moment_matrix, - ScratchMatView pt_phi, ScratchMatView vandermonde) { - int M = pt_phi.extent(0); - int N = vandermonde.extent(1); - int K = pt_phi.extent(1); - - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { - double sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, K), - [=](const int k, double& lsum) { - lsum += pt_phi(i, k) * vandermonde(k, j); - }, - sum); - moment_matrix(i, j) = sum; - }); - }); -} - -// Matrix vector multiplication -KOKKOS_INLINE_FUNCTION -void MatVecMul(member_type team, ScratchVecView vector, ScratchMatView matrix, - ScratchVecView result) { - int M = matrix.extent(0); - int N = matrix.extent(1); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { - double sum = 0; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, M), - [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, - sum); - result(i) = sum; - }); - // team.team_barrier(); -} - -// dot product -KOKKOS_INLINE_FUNCTION -void dot_product(member_type team, ScratchVecView result_sub, - ScratchVecView exactSupportValues_sub, double& target_value) { - int N = result_sub.extent(0); - for (int j = 0; j < N; ++j) { - target_value += result_sub(j) * exactSupportValues_sub[j]; - } -} - -// moment matrix -KOKKOS_INLINE_FUNCTION -void PtphiPMatrix(ScratchMatView moment_matrix, member_type team, - ScratchMatView pt_phi, ScratchMatView vandermonde) { - int M = pt_phi.extent(0); - int N = vandermonde.extent(1); - int K = pt_phi.extent(1); - - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { - double sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, K), - [=](const int k, double& lsum) { - lsum += pt_phi(i, k) * vandermonde(k, j); - }, - sum); - moment_matrix(i, j) = sum; - }); - }); -} - -// inverse matrix -KOKKOS_INLINE_FUNCTION -void inverse_matrix(member_type team, ScratchMatView matrix, - ScratchMatView lower, ScratchMatView forward_matrix, - ScratchMatView solution) { - int N = matrix.extent(0); - - for (int j = 0; j < N; ++j) { - Kokkos::single(Kokkos::PerTeam(team), [=]() { - double sum = 0; - for (int k = 0; k < j; ++k) { - sum += lower(j, k) * lower(j, k); - } - lower(j, j) = sqrt(matrix(j, j) - sum); - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, j + 1, N), [=](int i) { - double inner_sum = 0; - for (int k = 0; k < j; ++k) { - inner_sum += lower(i, k) * lower(j, k); - } - lower(i, j) = (matrix(i, j) - inner_sum) / lower(j, j); - lower(j, i) = lower(i, j); - }); - - team.team_barrier(); - } - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { - forward_matrix(i, i) = 1.0 / lower(i, i); - for (int j = i + 1; j < N; ++j) { - forward_matrix(j, i) = 0.0; // Initialize to zero - for (int k = 0; k < j; ++k) { - forward_matrix(j, i) -= lower(j, k) * forward_matrix(k, i); - } - forward_matrix(j, i) /= lower(j, j); - } - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { - solution(N - 1, i) = forward_matrix(N - 1, i) / lower(N - 1, N - 1); - for (int j = N - 2; j >= 0; --j) { - solution(j, i) = forward_matrix(j, i); - for (int k = j + 1; k < N; ++k) { - solution(j, i) -= lower(j, k) * solution(k, i); - } - solution(j, i) /= lower(j, j); - } - }); -} - -#endif diff --git a/src/interpolator/MLSInterpolation.hpp b/src/interpolator/MLSInterpolation.hpp deleted file mode 100644 index bf096022..00000000 --- a/src/interpolator/MLSInterpolation.hpp +++ /dev/null @@ -1,213 +0,0 @@ -#ifndef MLS_INTERPOLATION_HPP -#define MLS_INTERPOLATION_HPP - -#include "MLSCoefficients.hpp" -#include "adj_search_dega2.hpp" -#include "adj_search.hpp" - -using namespace Omega_h; -using namespace pcms; - -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Write radii2) { - const auto nvertices_source = source_coordinates.size() / dim; - const auto nvertices_target = target_coordinates.size() / dim; - - // Kokkos::RangePolicy range_policy(1, - // nvertices_target); - - static_assert(degree > 0," the degree of polynomial basis should be atleast 1"); - - Kokkos::View shmem_each_team( - "stores the size required for each team", nvertices_target); - - MatViewType slice_length("stores slice length of polynomial basis", degree, dim); - - Kokkos::deep_copy(slice_length, 0.0); - - basisSliceLengths(slice_length); - - auto basis_size = basisSize(slice_length); - - Kokkos::parallel_for( - "calculate the size required for scratch for each team", nvertices_target, - KOKKOS_LAMBDA(const int i) { - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - int nsupports = end_ptr - start_ptr; - - size_t total_shared_size = 0; - - total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size) * 4; - total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports) * 2; - total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); - total_shared_size += ScratchVecView::shmem_size(basis_size); - total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; - total_shared_size += ScratchMatView::shmem_size(nsupports, 2); - shmem_each_team(i) = total_shared_size; - }); - - size_t shared_size; - Kokkos::parallel_reduce( - "find_max", nvertices_target, - KOKKOS_LAMBDA(const int i, size_t& max_val_temp) { - if (shmem_each_team(i) > max_val_temp) { - max_val_temp = shmem_each_team(i); - } - }, - Kokkos::Max(shared_size)); - printf("shared size = %d \n", shared_size); - - Write approx_target_values(nvertices_target, 0, - "approximated target values"); - - team_policy tp(nvertices_target, Kokkos::AUTO); - - int scratch_size = tp.scratch_size_max(0); - printf("scratch size is %d \n", scratch_size); - - Kokkos::parallel_for( - "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), - KOKKOS_LAMBDA(const member_type& team) { - int i = team.league_rank(); - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - - int nsupports = end_ptr - start_ptr; - - ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); - int count = -1; - for (int j = start_ptr; j < end_ptr; ++j) { - count++; - auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - if (dim == 3){ - local_source_points(count, 2) = - } - } - - ScratchMatView lower(team.team_scratch(0), basis_size, basis_size); - - ScratchMatView forward_matrix(team.team_scratch(0), basis_size, basis_size); - - ScratchMatView moment_matrix(team.team_scratch(0), basis_size, basis_size); - - ScratchMatView inv_mat(team.team_scratch(0), basis_size, basis_size); - - ScratchMatView V(team.team_scratch(0), nsupports, basis_size); - - ScratchMatView Ptphi(team.team_scratch(0), basis_size, nsupports); - - ScratchMatView resultant_matrix(team.team_scratch(0), basis_size, nsupports); - - ScratchVecView targetMonomialVec(team.team_scratch(0), basis_size); - - ScratchVecView SupportValues(team.team_scratch(0), nsupports); - - ScratchVecView result(team.team_scratch(0), nsupports); - - ScratchVecView Phi(team.team_scratch(0), nsupports); - - - Kokkos::deep_copy(lower, 0.0); - Kokkos::deep_copy(forward_matrix, 0.0); - Kokkos::deep_copy(moment_matrix, 0.0); - Kokkos::deep_copy(inv_matrix, 0.0); - Kokkos::deep_copy(V, 0.0); - Kokkos::deep_copy(Ptphi, 0.0); - Kokkos::deep_copy(resultant_matrix, 0.0); - Kokkos::deep_copy(targetMonomialVec, 0.0); - Kokkos::deep_copy(SupportValues, 0.0); - Kokkos::deep_copy(result, 0.0); - Kokkos::deep_copy(Phi, 0.0); - -// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 6), [=](int j) { -// for (int k = 0; k < 6; ++k) { -// lower(j, k) = 0; -// forward_matrix(j, k) = 0; -// moment_matrix(j, k) = 0; -// inv_mat(j, k) = 0; -// } -// -// targetMonomialVec(j) = 0; -// for (int k = 0; k < nsupports; ++k) { -// resultant_matrix(j, k) = 0; -// -// Ptphi(j, k) = 0; -// } -// }); -// -// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), -// [=](int j) { -// for (int k = 0; k < 6; ++k) { -// V(j, k) = 0; -// } -// -// SupportValues(j) = 0; -// result(j) = 0; -// Phi(j) = 0; -// }); -// - Coord target_point; - - target_point.x = target_coordinates[i * dim]; - - target_point.y = target_coordinates[i * dim + 1]; - - if (dim == 3){ - target_point.z = target_coordinates[i * dim + 2]; - } - BasisPoly(targetMonomialVec, target_point); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { VandermondeMatrix(V, local_source_points, j); }); - - team.team_barrier(); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - PhiVector(Phi, target_point, local_source_points, j, radii2[i]); - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { PTphiMatrix(Ptphi, V, Phi, j); }); - - team.team_barrier(); - - MatMatMul(team, moment_matrix, Ptphi, V); - team.team_barrier(); - - inverse_matrix(team, moment_matrix, lower, forward_matrix, inv_mat); - - team.team_barrier(); - - MatMatMul(team, resultant_matrix, inv_mat, Ptphi); - team.team_barrier(); - - MatVecMul(team, targetMonomialVec, resultant_matrix, result); - team.team_barrier(); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { - SupportValues(i) = - source_values[support.supports_idx[start_ptr + i]]; - }); - - double tgt_value = 0; - dot_product(team, result, SupportValues, tgt_value); - if (team.team_rank() == 0) { - approx_target_values[i] = tgt_value; - } - }); - - return approx_target_values; -} - -#endif diff --git a/src/interpolator/adj_search.hpp b/src/interpolator/adj_search.hpp deleted file mode 100644 index fd5fe60c..00000000 --- a/src/interpolator/adj_search.hpp +++ /dev/null @@ -1,218 +0,0 @@ -#ifndef ADJ_SEARCH_HPP -#define ADJ_SEARCH_HPP - -#include - -#include "queue_visited.hpp" - -using namespace Omega_h; - -static constexpr int max_dim = 3; - -// TODO change this into span/mdspan -OMEGA_H_INLINE -Real calculateDistance(const Real* p1, const Real* p2, const int dim) { - Real dx, dy, dz; - dx = p1[0] - p2[0]; - dy = p1[1] - p2[1]; - if (dim != 3) { - dz = 0.0; - } else { - dz = p1[2] - p2[2]; - } - - return dx * dx + dy * dy + dz * dz; -} - -class FindSupports { - private: - Mesh& source_mesh; - Mesh& target_mesh; - - public: - FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) - : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; - - void adjBasedSearch(const Real& cutoffDistance, const Write& supports_ptr, - Write& nSupports, Write& support_idx); -}; - -void FindSupports::adjBasedSearch(const Real& cutoffDistance, - const Write& supports_ptr, - Write& nSupports, - Write& support_idx) { - // Source Mesh Info - - const auto& sourcePoints_coords = source_mesh.coords(); - const auto nvertices_source = source_mesh.nverts(); - const auto dim = source_mesh.dim(); - - // Target Mesh Info - const auto& targetPoints_coords = target_mesh.coords(); - const auto nvertices_target = target_mesh.nverts(); - - // CSR data structure of adjacent vertex information of each source vertex - const auto& vert2vert = source_mesh.ask_star(VERT); - const auto& v2v_ptr = vert2vert.a2ab; - const auto& v2v_data = vert2vert.ab2b; - - // CSR data structure of vertex information of each cell in source mesh - - // CSR data structure of adjacent vertex information of each source vertex - // dim == 2; ask vertices of tri (3 vertices for each tri) & if dim ==3; ask - // vertices of tetrahedron (4 vertices for each tet) - const auto& cells2verts = source_mesh.ask_verts_of(dim); - - Kokkos::View target_points("test_points", nvertices_target); - - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { - target_points(i, 0) = targetPoints_coords[i * dim]; - target_points(i, 1) = targetPoints_coords[i * dim + 1]; - }); - Kokkos::fence(); - pcms::GridPointSearch search_cell(source_mesh, 10, 10); - - // get the cell id for each target point - auto results = search_cell(target_points); - - parallel_for( - nvertices_target, - OMEGA_H_LAMBDA(const LO id) { - queue queue; - track visited; - - LO source_cell_id = results(id).tri_id; - - const LO num_verts_in_dim = dim + 1; - - LO start_ptr = source_cell_id * num_verts_in_dim; - - LO end_ptr = start_ptr + num_verts_in_dim; - - Real target_coords[max_dim]; - - Real support_coords[max_dim]; - - for (LO k = 0; k < dim; ++k) { - target_coords[k] = target_points(id, k); - } - - LO start_counter; - - if (support_idx.size() > 0) { - start_counter = supports_ptr[id]; - } - - int count = 0; - // Initialize queue by pushing the vertices in the neighborhood of the - // given target point - - for (LO i = start_ptr; i < end_ptr; ++i) { - LO vert_id = cells2verts[i]; - visited.push_back(vert_id); - - for (LO k = 0; k < dim; ++k) { - support_coords[k] = sourcePoints_coords[vert_id * dim + k]; - } - - Real dist = calculateDistance(target_coords, support_coords, dim); - if (dist <= cutoffDistance) { - count++; - queue.push_back(vert_id); - if (support_idx.size() > 0) { - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = vert_id; - } - } - } - - while (!queue.isEmpty()) { - LO currentVertex = queue.front(); - queue.pop_front(); - LO start = v2v_ptr[currentVertex]; - LO end = v2v_ptr[currentVertex + 1]; - - for (LO i = start; i < end; ++i) { - auto neighborIndex = v2v_data[i]; - - // check if neighbor index is already in the queue to be checked - // TODO refactor this into a function - - if (visited.notVisited(neighborIndex)) { - visited.push_back(neighborIndex); - for (int k = 0; k < dim; ++k) { - support_coords[k] = - sourcePoints_coords[neighborIndex * dim + k]; - } - - Real dist = calculateDistance(target_coords, support_coords, dim); - - if (dist <= cutoffDistance) { - count++; - queue.push_back(neighborIndex); - if (support_idx.size() > 0) { - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = neighborIndex; - } - } - } - } - } // end of while loop - - nSupports[id] = count; - }, - "count the number of supports in each target point"); -} - -struct SupportResults { - Write supports_ptr; - Write supports_idx; -}; - -SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, - Real& cutoffDistance) { - SupportResults support; - - FindSupports search(source_mesh, target_mesh); - - LO nvertices_source = source_mesh.nverts(); - - LO nvertices_target = target_mesh.nverts(); - - Write nSupports(nvertices_target, 0, - "number of supports in each target vertex"); - - search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, - support.supports_idx); - - Kokkos::fence(); - - support.supports_ptr = - Write(nvertices_target + 1, 0, - "number of support source vertices in CSR format"); - - LO total_supports = 0; - - Kokkos::parallel_scan( - nvertices_target, - OMEGA_H_LAMBDA(int j, int& update, bool final) { - update += nSupports[j]; - if (final) { - support.supports_ptr[j + 1] = update; - } - }, - total_supports); - - Kokkos::fence(); - - support.supports_idx = Write( - total_supports, 0, "index of source supports of each target node"); - - search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, - support.supports_idx); - - return support; -} - -#endif diff --git a/src/interpolator/adj_search_dega2.hpp b/src/interpolator/adj_search_dega2.hpp deleted file mode 100644 index 26f2edda..00000000 --- a/src/interpolator/adj_search_dega2.hpp +++ /dev/null @@ -1,294 +0,0 @@ -#ifndef ADJ_SEARCH_HPP -#define ADJ_SEARCH_HPP - -#include -#include - -#include "queue_visited.hpp" - -using namespace Omega_h; - -static constexpr int max_dim = 3; - -// TODO change this into span/mdspan -OMEGA_H_INLINE -Real calculateDistance(const Real* p1, const Real* p2, const int dim) { - Real dx, dy, dz; - dx = p1[0] - p2[0]; - dy = p1[1] - p2[1]; - if (dim != 3) { - dz = 0.0; - } else { - dz = p1[2] - p2[2]; - } - - return dx * dx + dy * dy + dz * dz; -} - -class FindSupports { - private: - Mesh& mesh; - - public: - FindSupports(Mesh& mesh_) : mesh(mesh_) {}; - - void adjBasedSearch(Write& supports_ptr, Write& nSupports, - Write& support_idx, Write& radii2, - bool is_build_csr_call); -}; - -void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, - Write& support_idx, Write& radii2, - bool is_build_csr_call) { - // Mesh Info - LO min_num_supports = 20; // TODO: make this an input parameter - const auto& mesh_coords = mesh.coords(); - const auto& nvertices = mesh.nverts(); - const auto& dim = mesh.dim(); - const auto& nfaces = mesh.nfaces(); - - // CSR data structure of adjacent cell information of each vertex in a mesh - const auto& nodes2faces = mesh.ask_up(VERT, FACE); - const auto& n2f_ptr = nodes2faces.a2ab; - const auto& n2f_data = nodes2faces.ab2b; - const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; - - Write cell_centroids( - dim * nfaces, 0, - "stores coordinates of cell centroid of each tri element"); - - parallel_for( - "calculate the centroid in each tri element", nfaces, - OMEGA_H_LAMBDA(const LO id) { - const auto current_el_verts = gather_verts<3>(faces2nodes, id); - const Omega_h::Few, 3> current_el_vert_coords = - gather_vectors<3, 2>(mesh_coords, current_el_verts); - auto centroid = average(current_el_vert_coords); - int index = dim * id; - cell_centroids[index] = centroid[0]; - cell_centroids[index + 1] = centroid[1]; - }); - // * Got the adj data and cell centroids - - parallel_for( - nvertices, // for each target vertex which is a node for this case - OMEGA_H_LAMBDA(const LO id) { - queue queue; - track visited; - const LO num_verts_in_dim = dim + 1; - Real target_coords[max_dim]; - Real support_coords[max_dim]; - Real cutoffDistance = radii2[id]; // squared radii of the supports - - //? copying the target vertex coordinates - for (LO k = 0; k < dim; ++k) { - target_coords[k] = mesh_coords[id * dim + k]; - } - - LO start_counter; // start of support idx for the current target vertex - - // not being done in the first call - // where the support_idx start for the current target vertex - if (!is_build_csr_call) { - start_counter = supports_ptr[id]; // start support location for the - // current target vertex - } - LO start_ptr = - n2f_ptr[id]; // start loc of the adj cells of the target node - LO end_ptr = - n2f_ptr[id + 1]; // end loc of the adj cells of the target node - - int count = 0; // number of supports for the current target vertex - // Initialize queue by pushing the cells in the neighborhood of the - // given target point - - for (LO i = start_ptr; i < end_ptr; - ++i) { // loop over adj cells to the target vertex - LO cell_id = n2f_data[i]; - visited.push_back(cell_id); // cell added to the visited list - - for (LO k = 0; k < dim; ++k) { // support vertex coordinates are the - // centroid of the cell - support_coords[k] = cell_centroids[cell_id * dim + k]; - } - - Real dist = calculateDistance(target_coords, support_coords, dim); - if (dist <= cutoffDistance) { - count++; - queue.push_back(cell_id); - if (!is_build_csr_call) { // not being done in the first call - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = - cell_id; // add the support cell to the support_idx - } // end of support_idx check - } // end of distance check - } // end of loop over adj cells to the target vertex - - // loops over the queued cells from the neighborhood of the target - // vertex - while (!queue.isEmpty()) { // ? can queue be empty? - LO currentCell = queue.front(); - queue.pop_front(); - LO start = currentCell * - num_verts_in_dim; // start vert id of the current cell - LO end = start + num_verts_in_dim; // end vert id of the current cell - - for (LO i = start; i < end; - ++i) { // loop over the vertices of the current cell - LO current_vert_id = faces2nodes[i]; - LO start_ptr_current_vert = - n2f_ptr[current_vert_id]; // start loc of adj cells to current - // vertex - LO end_ptr_vert_current_vert = - n2f_ptr[current_vert_id + - 1]; // end loc of adj cells to current vertex - for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; - ++j) { // loop over adj cells to the current vertex - auto neighbor_cell_index = n2f_data[j]; // current cell - - // check if neighbor index is already in the queue to be checked - // TODO refactor this into a function - - if (visited.notVisited(neighbor_cell_index)) { - visited.push_back(neighbor_cell_index); - for (int k = 0; k < dim; ++k) { - support_coords[k] = - cell_centroids[neighbor_cell_index * dim + k]; - } - - Real dist = - calculateDistance(target_coords, support_coords, dim); - - if (dist <= cutoffDistance) { - count++; - queue.push_back(neighbor_cell_index); - if (!is_build_csr_call) { - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = - neighbor_cell_index; - } // end of support_idx check - } // end of distance check - } // end of not visited check - } // end of loop over adj cells to the current vertex - } // end of loop over nodes - - } // end of while loop - - nSupports[id] = count; - }, // end of lambda - "count the number of supports in each target point"); -} - -struct SupportResults { - Write supports_ptr; - Write supports_idx; - Write radii2; // squared radii of the supports -}; - -SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance) { - LO min_support = 12; - SupportResults support; - - FindSupports search(mesh); - - LO nvertices_target = mesh.nverts(); - - Write nSupports(nvertices_target, 0, - "number of supports in each target vertex"); - - printf("Inside searchNeighbors 1\n"); - support.radii2 = Write(nvertices_target, cutoffDistance, - "squared radii of the supports"); - printf("cut off distance: %f\n", cutoffDistance); - // this call gets the number of supports for each target vertex: nSupports and - // the squared radii of the supports (with might be increased in the search to - // have enough supports) - int r_adjust_loop = 0; - while (true) { // until the number of minimum support is met - // find maximum radius - Real max_radius = 0.0; - Kokkos::parallel_reduce("find max radius", - nvertices_target, - OMEGA_H_LAMBDA(const LO i, Real& local_max) { - local_max = (support.radii2[i] > local_max) ? support.radii2[i] : local_max; - }, - Kokkos::Max(max_radius)); - printf("Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); - - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - support.radii2, true); - - //printf("Inside searchNeighbors 2\n"); - Kokkos::fence(); - // * find the minimum number of supports - LO min_nSupports = 0; - Kokkos::parallel_reduce( "find min number of supports", - nvertices_target, - OMEGA_H_LAMBDA(const LO i, LO& local_min) { - local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; - }, - Kokkos::Min(min_nSupports)); - - printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, r_adjust_loop, max_radius); - r_adjust_loop++; - - if (min_nSupports >= min_support) { - break; - } - - Kokkos::fence(); - - // * update radius if nSupport is less that min_support - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { - if (nSupports[i] < min_support) { - Real factor = Real(min_support) / nSupports[i]; - factor = (nSupports[i] == 0 || factor > 3.0) ? 3.0 : factor; - support.radii2[i] *= factor; - //nSupports[i] = 0; // ? might not be needed - } - }); - - } - - printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); - - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - support.radii2, true); - - // offset array for the supports of each target vertex - support.supports_ptr = - Write(nvertices_target + 1, 0, - "number of support source vertices in CSR format"); - - LO total_supports = 0; - - // get the total number of supports and fill the offset array - Kokkos::parallel_scan( - nvertices_target, - OMEGA_H_LAMBDA(int j, int& update, bool final) { // what final does? - // OMEGA_H_CHECK(nSupports[j] >= 15); - update += nSupports[j]; - if (final) { - support.supports_ptr[j + 1] = update; - } - }, - total_supports); - - printf("Inside searchNeighbors 3\n"); - Kokkos::fence(); - - support.supports_idx = Write( - total_supports, 0, "index of source supports of each target node"); - - printf("total_supports: %d\n", total_supports); - // second call to get the actual support indices - // now sizes of support.supports_ptr and support.supports_idx are known and > - // 0 - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - support.radii2, false); - - return support; -} - -#endif diff --git a/src/interpolator/linear_interpolant.hpp b/src/interpolator/linear_interpolant.hpp deleted file mode 100644 index 5abde69f..00000000 --- a/src/interpolator/linear_interpolant.hpp +++ /dev/null @@ -1,183 +0,0 @@ - -#ifndef INTERPOLANT_HPP -#define INTERPOLANT_HPP - - -#include -#include "multidimarray.hpp" -#define MAX_DIM 10 - - -KOKKOS_INLINE_FUNCTION -void find_indices(const IntVecView& num_bins, const RealVecView& range, - const RealVecView& point , int* indices){ - int dim = point.extent(0); - // IntVecView indices("parametric coordinate", dim); - for (int i = 0; i < dim; ++i){ - int id = i * 2; - double length = range(id + 1) - range(id); - double dlen = length/num_bins(i); - indices[i] = ((point(i) - range(id)) / dlen); - - } - - -} - - -KOKKOS_INLINE_FUNCTION -void find_limits(const IntVecView& num_bins, const RealVecView& range, const int* indices, double* limits){ - int dim = num_bins.extent(0); - // RealVecView limits("limits", 2*dim); - for (int i = 0; i < dim; ++i){ - int ptr = i * 2; - double dlen = (range(ptr+1) - range(ptr))/num_bins(i); - limits[ptr] = indices[i] * dlen; - limits[ptr+1] = limits[ptr] + dlen; - } -} - - - -KOKKOS_INLINE_FUNCTION -void evaluateParametricCoord(const RealVecView& point, const double* limits , double* parametric_coord){ - int dim = point.extent(0); - // RealVecView parametric_coord("parametric coordinate", dim); - for (int i = 0; i < dim; ++i){ - int index = i * 2; - parametric_coord[i] = (point(i) - limits[index])/(limits[index+1] - limits[index]); - - } -} - - - -KOKKOS_INLINE_FUNCTION -void basis_function(const RealVecView& parametric_coord, double* linear_basis_each_dir){ - int dim = parametric_coord.extent(0); - // RealVecView linear_basis_each_dir("basis function each direction", 2*dim); - for (int i = 0; i < dim; ++i){ - int index = i * 2; - - linear_basis_each_dir[index] = 1 - parametric_coord(i); - - linear_basis_each_dir[index + 1] = parametric_coord(i); - - - } - -} - - -KOKKOS_INLINE_FUNCTION -double linear_interpolant(const IntVecView& dimensions, const double* linear_basis_each_dir, const IntVecView& indices, const RealVecView& values){ - int dim = dimensions.extent(0); - double sum = 0; - int ids[MAX_DIM]; - - int array_size = 1 << dim; - for (int i = 0; i < array_size; ++i){ - double temp = 1.0; - for (int j = 0; j < dim; ++j){ - int index = 2*j; - int id_left = indices(j); - int id_right = id_left + 1; - if (i & (1 << j)){ - temp *= linear_basis_each_dir[index+1]; - ids[j] = id_right; - } else { - temp *= linear_basis_each_dir[index]; - ids[j] = id_left; - } - } - - int idx = calculateIndex(dimensions, ids); - double corner_values = values(idx); - - sum += temp*corner_values; - } - return sum; -} - - - -class RegularGridInterpolator { - private: - const RealMatView parametric_coords; - const RealVecView values; - const IntMatView indices; - const IntVecView dimensions; - - public: - RegularGridInterpolator(const RealMatView& parametric_coords_, - const RealVecView& values_, const IntMatView& indices_, const IntVecView& dimensions_) - : parametric_coords(parametric_coords_), values(values_), indices(indices_), dimensions(dimensions_) {}; - -RealVecView linear_interpolation() { - int dim = dimensions.extent(0); - int N = parametric_coords.extent(0); - RealVecView interpolated_values("approximated values", N); - auto parametric_coords_ = parametric_coords; - auto dimensions_ = dimensions; - auto values_ = values; - auto indices_ = indices; - Kokkos::parallel_for("linear interpolation function",N, KOKKOS_LAMBDA(int j){ - double linear_basis_each_dir[MAX_DIM] = {0.0}; - auto parametric_coord_each_point = Kokkos::subview(parametric_coords_, j, Kokkos::ALL()); - auto index_each_point = Kokkos::subview(indices_, j , Kokkos::ALL()); - basis_function(parametric_coord_each_point, linear_basis_each_dir); - auto approx_value = linear_interpolant(dimensions_, linear_basis_each_dir,index_each_point, values_); - interpolated_values(j) = approx_value; - }); - - return interpolated_values; - } -}; - - -struct Result{ - IntMatView indices_pts; - RealMatView parametric_coords; -}; - -Result parametric_indices(const RealMatView& points, const IntVecView& num_bins, const RealVecView& range){ - - int dim = num_bins.extent(0); - Result result; - int N = points.extent(0); - result.indices_pts = IntMatView("indices",N,dim); - result.parametric_coords = RealMatView("parametric_coordinates",N, dim); - - Kokkos::parallel_for(N, KOKKOS_LAMBDA(int j){ - int indcs[MAX_DIM] = {0}; - double limits[MAX_DIM] = {0.0}; - double parametric_coord_each[MAX_DIM] = {0.0}; - auto point_coord = Kokkos::subview(points, j , Kokkos::ALL()); - find_indices(num_bins, range, point_coord, indcs); - find_limits(num_bins, range, indcs, limits); - evaluateParametricCoord( point_coord, limits, parametric_coord_each); - for (int i = 0; i < dim; ++i){ - result.indices_pts(j,i) = indcs[i]; - result.parametric_coords(j,i) = parametric_coord_each[i]; - } - - }); - - return result; - -} - -KOKKOS_INLINE_FUNCTION -double test_function(double* coord){ - double fun_value = 0; - int dim = 5; - for (int i = 0; i < dim; ++i){ - fun_value += coord[i]; - } - return fun_value; - } - -#endif - - - diff --git a/src/interpolator/multidimarray.hpp b/src/interpolator/multidimarray.hpp deleted file mode 100644 index 04b6fac1..00000000 --- a/src/interpolator/multidimarray.hpp +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef MULTIDIMARRAY_HPP -#define MULTIDIMARRAY_HPP - -#include - - -using RealMatView = Kokkos::View; -using IntMatView = Kokkos::View; -using RealVecView = Kokkos::View; -using IntVecView = Kokkos::View; - -using HostIntVecView = Kokkos::View; - -KOKKOS_INLINE_FUNCTION -int calculateIndex(const IntVecView& dimensions, const int* indices) { - - int dim = dimensions.extent(0); - int index = 0; - int multiplier = 1; - for (int i = dim - 1; i >= 0; --i) { - index += indices[i] * multiplier; - multiplier *= dimensions(i); - } - return index; -} - - -int calculateTotalSize(const HostIntVecView& dimensions){ - int dim = dimensions.extent(0); - int size = 1; - for (int i = 0; i < dim; ++i){ - size *= dimensions(i); - } - return size; -} -#endif diff --git a/src/interpolator/points.hpp b/src/interpolator/points.hpp deleted file mode 100644 index c19f7cb7..00000000 --- a/src/interpolator/points.hpp +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef POINTS_HPP -#define POINTS_HPP - -#include -#include - -struct Coord { - double x, y, z; -}; - -using range_policy = typename Kokkos::RangePolicy<>; -using team_policy = typename Kokkos::TeamPolicy<>; -using member_type = typename Kokkos::TeamPolicy<>::member_type; - -// alias for scratch view -using ScratchSpace = - typename Kokkos::DefaultExecutionSpace::scratch_memory_space; - -using ScratchMatView = - typename Kokkos::View>; -using ScratchVecView = - typename Kokkos::View>; - -using PointsViewType = Kokkos::View; - -using MatViewType = Kokkos::View -struct Points { - PointsViewType coordinates; -}; - -#endif diff --git a/src/interpolator/queue_visited.hpp b/src/interpolator/queue_visited.hpp deleted file mode 100644 index e236f6d6..00000000 --- a/src/interpolator/queue_visited.hpp +++ /dev/null @@ -1,121 +0,0 @@ -#ifndef QUEUE_VISITED_HPP -#define QUEUE_VISITED_HPP - -#include -#include -#include -#include -#include -#include -#define MAX_SIZE_QUEUE 500 -#define MAX_SIZE_TRACK 800 -using namespace std; -using namespace Omega_h; - -class queue { - private: - LO queue_array[MAX_SIZE_QUEUE]; - int first = 0, last = -1, count = 0; - - public: - OMEGA_H_INLINE - queue() {} - - OMEGA_H_INLINE - ~queue() {} - - OMEGA_H_INLINE - void push_back(const int& item); - - OMEGA_H_INLINE - void pop_front(); - - OMEGA_H_INLINE - int front(); - - OMEGA_H_INLINE - bool isEmpty() const; - - OMEGA_H_INLINE - bool isFull() const; -}; - -class track { - private: - LO tracking_array[MAX_SIZE_TRACK]; - int first = 0, last = -1, count = 0; - - public: - OMEGA_H_INLINE - track() {} - - OMEGA_H_INLINE - ~track() {} - - OMEGA_H_INLINE - void push_back(const int& item); - - OMEGA_H_INLINE - int size(); - - OMEGA_H_INLINE - bool notVisited(const int& item); -}; - -OMEGA_H_INLINE -void queue::push_back(const int& item) { - if (count == MAX_SIZE_QUEUE) { - printf("queue is full %d\n", count); - return; - } - last = (last + 1) % MAX_SIZE_QUEUE; - queue_array[last] = item; - count++; -} - -OMEGA_H_INLINE -void queue::pop_front() { - if (count == 0) { - printf("queue is empty\n"); - return; - } - first = (first + 1) % MAX_SIZE_QUEUE; - count--; -} - -OMEGA_H_INLINE -int queue::front() { return queue_array[first]; } - -OMEGA_H_INLINE -bool queue::isEmpty() const { return count == 0; } - -OMEGA_H_INLINE -bool queue::isFull() const { return count == MAX_SIZE_QUEUE; } - -OMEGA_H_INLINE -void track::push_back(const int& item) { - if (count == MAX_SIZE_TRACK) { - printf("track is full %d\n", count); - return; - } - last = (last + 1) % MAX_SIZE_TRACK; - tracking_array[last] = item; - count++; -} - -OMEGA_H_INLINE -bool track::notVisited(const int& item) { - int id; - for (int i = 0; i < count; ++i) { - id = (first + i) % MAX_SIZE_TRACK; - if (tracking_array[id] == item) { - return false; - } - } - return true; -} - -OMEGA_H_INLINE -int track::size() { return count; } - -#endif diff --git a/src/interpolator/test/CMakeLists.txt b/src/interpolator/test/CMakeLists.txt deleted file mode 100644 index 27b30c72..00000000 --- a/src/interpolator/test/CMakeLists.txt +++ /dev/null @@ -1,18 +0,0 @@ -cmake_minimum_required(VERSION 3.20) - -project(test_1 LANGUAGES CXX) - -find_program(BASH_EXECUTABLE NAMES bash REQUIRED) - -add_executable(test_polybasis test_polybasis.cpp) - -target_link_libraries(test_polybasis PUBLIC pcms::core) - -target_include_directories(test_polybasis PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) - -enable_testing() - -add_test( - NAME test_polybasis - COMMAND $ - ) diff --git a/src/interpolator/test/test_polybasis.cpp b/src/interpolator/test/test_polybasis.cpp deleted file mode 100644 index c016559c..00000000 --- a/src/interpolator/test/test_polybasis.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include "MLSInterpolation.hpp" - -int dim = 3; -int degree = 2; -MatViewType d_slice_length("slice array",degree, dim); - -auto host_slice_length = Kokkos::create_mirror_view(d_slice_length); - -Kokkos::deep_copy(d_slice_length, host_slice_length); - -basisSLiceLengths(host_slice_length); - -for (i = 0; i < host_slice_length.extent(0); ++i){ - for (j = 0; j < host_slice_length.extent(1); ++j){ - std::cout << host_slice_length(i, j) << " "; - } - - std::cout << "\n"; -} - - diff --git a/src/pcms/interpolator/test/CMakeLists.txt b/src/pcms/interpolator/test/CMakeLists.txt deleted file mode 100644 index 3233bb6c..00000000 --- a/src/pcms/interpolator/test/CMakeLists.txt +++ /dev/null @@ -1,8 +0,0 @@ -project(mfem-cmake-project C CXX) -cmake_minimum_required(VERSION 3.20) - -find_package(Omega_h 10 REQUIRED) - -find_package(pcms REQUIRED) -add_executable(test test_polybasis.cpp) -target_link_libraries(test PUBLIC Omega_h::omega_h pcms::pcms) diff --git a/src/pcms/interpolator/test/config.sh b/src/pcms/interpolator/test/config.sh deleted file mode 100644 index dbd9c4c0..00000000 --- a/src/pcms/interpolator/test/config.sh +++ /dev/null @@ -1,17 +0,0 @@ -export NVCC_WRAPPER_DEFAULT_COMPILER=`which mpicxx` -export DEVICE_ARCH=ADA89 - -DEPENDENCY_DIR="${DEPENDENCY_DIR:-/lore/mersoj2/laces-software/build}" -DEVICE_ARCH="${DEVICE_ARCH:-ADA89}" - -cmake -S . -B build \ - -DCMAKE_CXX_COMPILER=`which mpicxx` \ - -DCMAKE_C_COMPILER=`which mpicc` \ - -DOmega_h_USE_Kokkos=ON \ - -DOmega_h_USE_CUDA=ON \ - -DOmega_h_DIR=$DEPENDENCY_DIR/${DEVICE_ARCH}/omega_h/install/lib/cmake/ \ - -DKokkos_ROOT=$DEPENDENCY_DIR/${DEVICE_ARCH}/kokkos/install/ \ - -Dpcms_ROOT=$DEPENDENCY_DIR/${DEVICE_ARCH}/pcms/install/ \ - -Dperfstubs_DIR=$DEPENDENCY_DIR/perfstubs/install/lib64/cmake/ \ - -DADIOS2_ROOT=$DEPENDENCY_DIR/adios2/install/ \ - -DCMAKE_BUILD_TYPE=Debug diff --git a/src/pcms/interpolator/test/test_polybasis.cpp b/src/pcms/interpolator/test/test_polybasis.cpp deleted file mode 100644 index 131b4663..00000000 --- a/src/pcms/interpolator/test/test_polybasis.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include "pcms/interpolator/MLSInterpolation.hpp" - -int dim = 3; -int degree = 2; -MatViewType d_slice_length("slice array",degree, dim); - -auto host_slice_length = Kokkos::create_mirror_view(d_slice_length); - -Kokkos::deep_copy(host_slice_length, 0) - -Kokkos::deep_copy(d_slice_length, host_slice_length); - - -Kokkos::parallel_for(1, KOKKOS_LAMBDA (int k) { - int m = k; - basisSliceLengths(d_slice_length); -}); - -Kokkos::deep_copy(h_slice_length, d_slice_length); - -for (int i = 0; i < host_slice_length.extent(0); ++i){ - for (int j = 0; j < host_slice_length.extent(1); ++j){ - std::cout << host_slice_length(i, j) << " "; - } - - std::cout << "\n"; -} From 5b7272730c499f275e16842dbefea2a407366895 Mon Sep 17 00:00:00 2001 From: Fuad Hasibul Hasan Date: Mon, 21 Oct 2024 16:05:43 -0400 Subject: [PATCH 14/66] variable radius for degas2 --- src/interpolator/MLSInterpolation.hpp | 11 +++ src/interpolator/adj_search_dega2.hpp | 118 ++++++++++++++++---------- 2 files changed, 84 insertions(+), 45 deletions(-) diff --git a/src/interpolator/MLSInterpolation.hpp b/src/interpolator/MLSInterpolation.hpp index cb6a0eb5..7cf8589a 100644 --- a/src/interpolator/MLSInterpolation.hpp +++ b/src/interpolator/MLSInterpolation.hpp @@ -148,6 +148,17 @@ Write mls_interpolation(const Reals source_values, PhiVector(Phi, target_point, local_source_points, j, radii2[i]); }); + // sum phi + double sum_phi = 0; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, nsupports), + [=](const int j, double& lsum) { lsum += Phi(j); }, sum_phi); + + // normalize phi with sum_phi + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { Phi(j) = Phi(j) / sum_phi; }); + team.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), diff --git a/src/interpolator/adj_search_dega2.hpp b/src/interpolator/adj_search_dega2.hpp index 26f2edda..df04f79c 100644 --- a/src/interpolator/adj_search_dega2.hpp +++ b/src/interpolator/adj_search_dega2.hpp @@ -41,7 +41,6 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, Write& support_idx, Write& radii2, bool is_build_csr_call) { // Mesh Info - LO min_num_supports = 20; // TODO: make this an input parameter const auto& mesh_coords = mesh.coords(); const auto& nvertices = mesh.nverts(); const auto& dim = mesh.dim(); @@ -177,6 +176,30 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, nSupports[id] = count; }, // end of lambda "count the number of supports in each target point"); + + if (is_build_csr_call == false) { + // print supports for target 22 + parallel_for( + nvertices, // for each target vertex which is a node for this + // calculateDistance + OMEGA_H_LAMBDA(const LO id) { + if (id == 22) { + LO start_ptr = + supports_ptr[id]; // start support location for the current target vertex + LO end_ptr = + supports_ptr[id + 1]; // end support location for the current target vertex + printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", id, + end_ptr - start_ptr, nSupports[id]); + for (LO i = start_ptr; i < end_ptr; + ++i) { // loop over adj cells to the target vertex + LO cell_id = support_idx[i]; + printf(", %d", cell_id); + } + printf("\n"); + } + } // end of lambda + ); + } } struct SupportResults { @@ -197,7 +220,7 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance) { "number of supports in each target vertex"); printf("Inside searchNeighbors 1\n"); - support.radii2 = Write(nvertices_target, cutoffDistance, + Write radii2 = Write(nvertices_target, cutoffDistance, "squared radii of the supports"); printf("cut off distance: %f\n", cutoffDistance); // this call gets the number of supports for each target vertex: nSupports and @@ -206,88 +229,93 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance) { int r_adjust_loop = 0; while (true) { // until the number of minimum support is met // find maximum radius - Real max_radius = 0.0; - Kokkos::parallel_reduce("find max radius", - nvertices_target, + Real max_radius = 0.0; + Kokkos::parallel_reduce( + "find max radius", nvertices_target, OMEGA_H_LAMBDA(const LO i, Real& local_max) { - local_max = (support.radii2[i] > local_max) ? support.radii2[i] : local_max; + local_max = + (radii2[i] > local_max) ? radii2[i] : local_max; }, Kokkos::Max(max_radius)); - printf("Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); - - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - support.radii2, true); + printf("Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); - //printf("Inside searchNeighbors 2\n"); - Kokkos::fence(); - // * find the minimum number of supports - LO min_nSupports = 0; - Kokkos::parallel_reduce( "find min number of supports", - nvertices_target, + nSupports = Write(nvertices_target, 0, "number of supports in each target vertex"); + SupportResults support; // create support every time to avoid complexity + + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + radii2, true); + + // printf("Inside searchNeighbors 2\n"); + Kokkos::fence(); + //* find the minimum number of supports + LO min_nSupports = 0; + Kokkos::parallel_reduce( + "find min number of supports", nvertices_target, OMEGA_H_LAMBDA(const LO i, LO& local_min) { local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; }, Kokkos::Min(min_nSupports)); - printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, r_adjust_loop, max_radius); - r_adjust_loop++; + printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, + r_adjust_loop, max_radius); + r_adjust_loop++; - if (min_nSupports >= min_support) { - break; - } + if (min_nSupports >= min_support) { + break; + } - Kokkos::fence(); + Kokkos::fence(); - // * update radius if nSupport is less that min_support - parallel_for( + // * update radius if nSupport is less that min_support + parallel_for( nvertices_target, OMEGA_H_LAMBDA(const LO i) { if (nSupports[i] < min_support) { - Real factor = Real(min_support) / nSupports[i]; - factor = (nSupports[i] == 0 || factor > 3.0) ? 3.0 : factor; - support.radii2[i] *= factor; - //nSupports[i] = 0; // ? might not be needed + Real factor = Real(min_support) / Real(nSupports[i]); + factor = (nSupports[i] == 0 || factor > 1.5) ? 1.5 : factor; + radii2[i] *= factor; } + nSupports[i] = 0; // ? might not be needed }); - } printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - support.radii2, true); + //search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + // radii2, true); // offset array for the supports of each target vertex - support.supports_ptr = - Write(nvertices_target + 1, 0, - "number of support source vertices in CSR format"); + support.supports_ptr = Write( + nvertices_target + 1, 0, "number of support source vertices in CSR format"); LO total_supports = 0; // get the total number of supports and fill the offset array Kokkos::parallel_scan( - nvertices_target, - OMEGA_H_LAMBDA(int j, int& update, bool final) { // what final does? - // OMEGA_H_CHECK(nSupports[j] >= 15); - update += nSupports[j]; - if (final) { - support.supports_ptr[j + 1] = update; - } - }, - total_supports); + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { // what final does? + // OMEGA_H_CHECK(nSupports[j] >= 15); + update += nSupports[j]; + if (final) { + support.supports_ptr[j + 1] = update; + } + }, + total_supports); printf("Inside searchNeighbors 3\n"); Kokkos::fence(); support.supports_idx = Write( - total_supports, 0, "index of source supports of each target node"); + total_supports, 0, "index of source supports of each target node"); printf("total_supports: %d\n", total_supports); // second call to get the actual support indices // now sizes of support.supports_ptr and support.supports_idx are known and > // 0 search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - support.radii2, false); + radii2, false); + support.radii2 = radii2; + mesh.add_tag(VERT, "support_radius", 1, radii2); return support; } From 1bc767b0cdc7038bd6230a41b90d2e379ca69ad1 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 21 Oct 2024 16:09:52 -0400 Subject: [PATCH 15/66] cleaned up the basis monomial calculation --- src/pcms/interpolator/MLSCoefficients.hpp | 51 ++++++++++++----------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp index 47d2ee39..398611ec 100644 --- a/src/pcms/interpolator/MLSCoefficients.hpp +++ b/src/pcms/interpolator/MLSCoefficients.hpp @@ -18,39 +18,42 @@ double func(Coord& p) { return Z; } -// finds the slice lengths of the of the polynomial basis +// computes the slice lengths of the of the polynomial basis -KOKKOS_INLINE_FUNCTION -void basisSliceLengths(MatViewType& array){ - int dim = array.extent(0); - int degree = array.extent(1); - for (int i = 0; i < dim; ++i){ - array(0,i) = 1; +void basisSliceLengths(Kokkos::View& array){ + int degree = array.extent(0); + int dim = array.extent(1); + + for (int j = 0; j < dim; ++j){ + array(0, j) = 1; } - for (int j = 0; j < degree; ++j){ - array(j, 0) = 1; + for (int i = 0; i < degree; ++i){ + array(i, 0) = 1; } for (int i = 1; i < degree; ++i){ for (int j = 1; j < dim; ++j){ - array(i, j) = array(i-1,j) + array(i,j-1); - } + array(i, j) = array(i , j - 1) + array(i - 1, j); + } } + } // finds the size of the polynomial basis vector -KOKKOS_INLINE_FUNCTION -int basisSize(const MatViewType& array){ +int basisSize(const Kokkos::View& array){ int sum = 1; - int dim = array.extent(0); - int degree = array.extent(1); + int degree = array.extent(0); + int dim = array.extent(1); + for (int i = 0; i < degree; ++i){ - for (int j = 0; j < dim; ++j){ + for (int j = 0; j < dim; ++j){ sum += array(i, j); - } + } } + + return sum; } // evaluates the polynomial basis @@ -59,8 +62,8 @@ KOKKOS_INLINE_FUNCTION void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, Coord &p){ basis_monomial(0) = 1; - int dim = slice_length.extent(0); - int degree = slice_length.extent(1); + int dim = slice_length.extent(1); + int degree = slice_length.extent(0); int prev_col = 0; int curr_col = 1; @@ -77,14 +80,14 @@ void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, C int offset = curr_col; for (int j = 0; j < dim; ++j){ for (int k = 0; k < slice_length(i,j); ++k){ - basis_monomial(offset + i) = basis_monomial(prev_col +k) * point[j]; - offset += slice_length(i,j); - + basis_monomial(offset + k) = basis_monomial(prev_col +k) * point[j]; } - prev_col = curr_col; - curr_col = offset; + offset += slice_length(i,j); } + + prev_col = curr_col; + curr_col = offset; } } // polynomial basis vector From c0522409a26fb5066baee1cff2bb32a3448b987c Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 21 Oct 2024 16:11:10 -0400 Subject: [PATCH 16/66] made it general --- src/pcms/interpolator/MLSInterpolation.hpp | 27 +++++++++++++--------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp index f294b33a..40220aa0 100644 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -4,6 +4,7 @@ #include "MLSCoefficients.hpp" #include "adj_search_dega2.hpp" #include "adj_search.hpp" +#include using namespace Omega_h; using namespace pcms; @@ -23,14 +24,18 @@ Write mls_interpolation(const Reals source_values, Kokkos::View shmem_each_team( "stores the size required for each team", nvertices_target); + + + + Kokkos::View host_slice_length("stores slice length of polynomial basis in host", degree, dim); + MatViewType slice_length("stores slice length of polynomial basis in device", degree, dim); + Kokkos::deep_copy(host_slice_length, 0); - MatViewType slice_length("stores slice length of polynomial basis", degree, dim); - - Kokkos::deep_copy(slice_length, 0.0); - - basisSliceLengths(slice_length); + basisSliceLengths(host_slice_length); auto basis_size = basisSize(slice_length); + + Kokkos::deep_copy(slice_length, host_slice_length); Kokkos::parallel_for( "calculate the size required for scratch for each team", nvertices_target, @@ -41,7 +46,7 @@ Write mls_interpolation(const Reals source_values, size_t total_shared_size = 0; - total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size) * 4; + total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size) * 4; total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports) * 2; total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); total_shared_size += ScratchVecView::shmem_size(basis_size); @@ -59,7 +64,6 @@ Write mls_interpolation(const Reals source_values, } }, Kokkos::Max(shared_size)); - printf("shared size = %d \n", shared_size); Write approx_target_values(nvertices_target, 0, "approximated target values"); @@ -67,7 +71,8 @@ Write mls_interpolation(const Reals source_values, team_policy tp(nvertices_target, Kokkos::AUTO); int scratch_size = tp.scratch_size_max(0); - printf("scratch size is %d \n", scratch_size); + + assert(shared_size > scratch_size && "The required scratch size exceeds the max available scratch size"); Kokkos::parallel_for( "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), @@ -125,8 +130,8 @@ Write mls_interpolation(const Reals source_values, // Kokkos::deep_copy(result, 0.0); // Kokkos::deep_copy(Phi, 0.0); // - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, 6), [=](int j) { - for (int k = 0; k < 6; ++k) { + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, basis_size), [=](int j) { + for (int k = 0; k < basis_size; ++k) { lower(j, k) = 0; forward_matrix(j, k) = 0; moment_matrix(j, k) = 0; @@ -143,7 +148,7 @@ Write mls_interpolation(const Reals source_values, Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - for (int k = 0; k < 6; ++k) { + for (int k = 0; k < basis_size; ++k) { V(j, k) = 0; } From 8319c3db77090f52ee572ddbe769622b16607c95 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 21 Oct 2024 16:12:18 -0400 Subject: [PATCH 17/66] updated the test cases that verifies the monomial basis calculations --- test/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7da6949e..70b00b95 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -192,6 +192,7 @@ if(Catch2_FOUND) test_uniform_grid.cpp test_omega_h_copy.cpp test_point_search.cpp + test_mls_basis.cpp ) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) From b9537dce71e93c1b986b7403bdc7d5c42c9fa775 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 21 Oct 2024 16:13:13 -0400 Subject: [PATCH 18/66] added unit test case that verifies the basis calculation --- test/test_mls_basis.cpp | 186 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 186 insertions(+) create mode 100644 test/test_mls_basis.cpp diff --git a/test/test_mls_basis.cpp b/test/test_mls_basis.cpp new file mode 100644 index 00000000..2e009cbb --- /dev/null +++ b/test/test_mls_basis.cpp @@ -0,0 +1,186 @@ +#include +#include +#include +#include +#include + +TEST_CASE("basis test") +{ + SECTION("check basis slice lengths, size of the basis vector and basis vector for degree = 3 and dim = 3") + { + + const int dim = 3; + const int degree = 3; + Kokkos::View array("array", degree, dim); + Kokkos::deep_copy(array,0); + basisSliceLengths(array); + auto size = basisSize(array); + + int expected[degree][dim] = { + {1, 1, 1}, + {1, 2, 3}, + {1, 3, 6} + }; + + for (int i = 0; i < degree; ++i) { + for (int j = 0; j < dim; ++j) { + REQUIRE(array(i, j) == expected[i][j]); + } + } + + REQUIRE(size == 20); + + + MatViewType d_array("array in device", degree, dim); + auto array_hd = Kokkos::create_mirror_view(d_array); + Kokkos::deep_copy(array_hd,array); + Kokkos::deep_copy(d_array, array_hd); + + int nvertices_target = 2; + Omega_h::Matrix<3, 2> coords{{2.0,3.0,4.0}, {0.4, 0.5, 0.2}}; + Kokkos::View results("stores results", nvertices_target, size); + + auto host_results = Kokkos::create_mirror_view(results); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for("inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team){ + int i = team.league_rank(); + ScratchVecView basis_vector(team.team_scratch(0), size); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,size),[=](int j){ + basis_vector(j) = 0; + }); + + Coord target_point; + target_point.x = coords[i][0]; + target_point.y = coords[i][1]; + target_point.z = coords[i][2]; + + BasisPoly(basis_vector, d_array, target_point); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,size),[=](int j){ + results(i,j) = basis_vector(j); + }); + + + }); + + Kokkos::deep_copy(host_results, results); + Kokkos::View expected_basis("expected vector", size); + std::vector names = {"1", "x", "y", "z", "x^2", "xy", "y^2", "xz", "yz", "z^2", "x^3", "x^2y", "xy^2", "Y^3", "x^2z", "xyz", "y^2z", "xz^2", "yz^2", "z^3"}; + + for (int i = 0; i < nvertices_target; ++i){ + double x = coords[i][0]; + double y = coords[i][1]; + double z = coords[i][2]; + expected_basis(0) = 1; + expected_basis(1) = x; // (x) + expected_basis(2) = y; // (y) + expected_basis(3) = z; // (z) + expected_basis(4) = x * x; // (x^2) + expected_basis(5) = x * y; // (xy) + expected_basis(6) = y * y; // (y^2) + expected_basis(7) = x * z; // (xz) + expected_basis(8) = y * z; // (yz) + expected_basis(9) = z * z; // (z^2) + expected_basis(10) = x * x * x; // (x^3) + expected_basis(11) = x * x * y; // (x^2y) + expected_basis(12) = x * y * y; // (xy^2) + expected_basis(13) = y * y * y; // (y^3) + expected_basis(14) = x * x * z; // (x^2z) + expected_basis(15) = x * y * z; // (xyz) + expected_basis(16) = y * y * z; // (y^2z) + expected_basis(17) = x * z * z; // (xz^2) + expected_basis(18) = y * z * z; // (yz^2) + expected_basis(19) = z * z * z; // (z^3) + for (int j = 0; j < size; ++j){ + std::cout< array("array", degree, dim); + Kokkos::deep_copy(array,0); + basisSliceLengths(array); + + auto size = basisSize(array); + + int expected[degree][dim] = { + {1, 1, 1}, + }; + + for (int i = 0; i < degree; ++i) { + for (int j = 0; j < dim; ++j) { + REQUIRE(array(i, j) == expected[i][j]); + } + } + + REQUIRE(size == 4); + + + MatViewType d_array("array in device", degree, dim); + auto array_hd = Kokkos::create_mirror_view(d_array); + Kokkos::deep_copy(array_hd,array); + Kokkos::deep_copy(d_array, array_hd); + + int nvertices_target = 2; + Omega_h::Matrix<3, 2> coords{{2.0,3.0,4.0}, {0.4, 0.5, 0.2}}; + Kokkos::View results("stores results", nvertices_target, size); + + auto host_results = Kokkos::create_mirror_view(results); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for("inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team){ + int i = team.league_rank(); + ScratchVecView basis_vector(team.team_scratch(0), size); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,size),[=](int j){ + basis_vector(j) = 0; + }); + + Coord target_point; + target_point.x = coords[i][0]; + target_point.y = coords[i][1]; + target_point.z = coords[i][2]; + + BasisPoly(basis_vector, d_array, target_point); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team,size),[=](int j){ + results(i,j) = basis_vector(j); + }); + + + }); + + Kokkos::deep_copy(host_results, results); + Kokkos::View expected_basis("expected vector", size); + std::vector names = {"1", "x", "y", "z"}; + + for (int i = 0; i < nvertices_target; ++i){ + double x = coords[i][0]; + double y = coords[i][1]; + double z = coords[i][2]; + expected_basis(0) = 1; + expected_basis(1) = x; // (x) + expected_basis(2) = y; // (y) + expected_basis(3) = z; // (z) + for (int j = 0; j < size; ++j){ + std::cout< Date: Mon, 21 Oct 2024 16:19:47 -0400 Subject: [PATCH 19/66] updated .gitignore file --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 28efa748..6f620e20 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ *.swp .spack-env *.bak +.nfs* From b65cee8d263e149e73db50f8e9674996e9838433 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 25 Oct 2024 13:18:45 -0400 Subject: [PATCH 20/66] made changes to accomodate general mls procedure --- src/pcms/interpolator/MLSCoefficients.hpp | 24 +++++++--------------- src/pcms/interpolator/MLSInterpolation.hpp | 17 ++++++++------- src/pcms/interpolator/adj_search_dega2.hpp | 6 ++---- 3 files changed, 19 insertions(+), 28 deletions(-) diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp index 398611ec..123f6ab7 100644 --- a/src/pcms/interpolator/MLSCoefficients.hpp +++ b/src/pcms/interpolator/MLSCoefficients.hpp @@ -14,7 +14,7 @@ KOKKOS_INLINE_FUNCTION double func(Coord& p) { auto x = (p.x - 0.5) * PI_M * 2; auto y = (p.y - 0.5) * PI_M * 2; - double Z = sin(x) * sin(y); + double Z = sin(x) * sin(y) + 2; return Z; } @@ -90,18 +90,8 @@ void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, C curr_col = offset; } } -// polynomial basis vector -//KOKKOS_INLINE_FUNCTION -//void BasisPoly(ScratchVecView basis_monomial, Coord& p1) { -// basis_monomial(0) = 1.0; -// basis_monomial(1) = p1.x; -// basis_monomial(2) = p1.y; -// basis_monomial(3) = p1.x * p1.x; -// basis_monomial(4) = p1.x * p1.y; -// basis_monomial(5) = p1.y * p1.y; -//} -// -// radial basis function + + KOKKOS_INLINE_FUNCTION double rbf(double r_sq, double rho_sq) { double phi; @@ -123,8 +113,8 @@ double rbf(double r_sq, double rho_sq) { // create vandermondeMatrix KOKKOS_INLINE_FUNCTION -void VandermondeMatrix(ScratchMatView& V, const ScratchMatView& local_source_points, - int j, const MatViewType& slice_length) { +void VandermondeMatrix(ScratchMatView V, const ScratchMatView local_source_points, + int j, const MatViewType slice_length) { int N = local_source_points.extent(0); int dim = local_source_points.extent(1); @@ -153,8 +143,8 @@ void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, // radial basis function vector KOKKOS_INLINE_FUNCTION -void PhiVector(ScratchVecView& Phi, const Coord& target_point, - const ScratchMatView& local_source_points, int j, +void PhiVector(ScratchVecView Phi, const Coord target_point, + const ScratchMatView local_source_points, int j, double cuttoff_dis_sq) { int N = local_source_points.extent(0); double dx = target_point.x - local_source_points(j, 0); diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp index 40220aa0..b3f38d8e 100644 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -28,14 +28,16 @@ Write mls_interpolation(const Reals source_values, Kokkos::View host_slice_length("stores slice length of polynomial basis in host", degree, dim); - MatViewType slice_length("stores slice length of polynomial basis in device", degree, dim); Kokkos::deep_copy(host_slice_length, 0); basisSliceLengths(host_slice_length); - auto basis_size = basisSize(slice_length); - - Kokkos::deep_copy(slice_length, host_slice_length); + auto basis_size = basisSize(host_slice_length); + printf("basis_size is %d\n", basis_size); + MatViewType slice_length("stores slice length of polynomial basis in device", degree, dim); + auto slice_length_hd = Kokkos::create_mirror_view(slice_length); + Kokkos::deep_copy(slice_length_hd, host_slice_length); + Kokkos::deep_copy(slice_length, slice_length_hd); Kokkos::parallel_for( "calculate the size required for scratch for each team", nvertices_target, @@ -72,7 +74,7 @@ Write mls_interpolation(const Reals source_values, int scratch_size = tp.scratch_size_max(0); - assert(shared_size > scratch_size && "The required scratch size exceeds the max available scratch size"); + assert(scratch_size > shared_size && "The required scratch size exceeds the max available scratch size"); Kokkos::parallel_for( "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), @@ -169,8 +171,9 @@ Write mls_interpolation(const Reals source_values, BasisPoly(targetMonomialVec, slice_length, target_point); Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { VandermondeMatrix(V, local_source_points, j, slice_length); }); + Kokkos::TeamThreadRange(team, nsupports),[=](int j) { + VandermondeMatrix(V, local_source_points, j, slice_length); + }); team.team_barrier(); diff --git a/src/pcms/interpolator/adj_search_dega2.hpp b/src/pcms/interpolator/adj_search_dega2.hpp index 26f2edda..31469638 100644 --- a/src/pcms/interpolator/adj_search_dega2.hpp +++ b/src/pcms/interpolator/adj_search_dega2.hpp @@ -33,7 +33,7 @@ class FindSupports { FindSupports(Mesh& mesh_) : mesh(mesh_) {}; void adjBasedSearch(Write& supports_ptr, Write& nSupports, - Write& support_idx, Write& radii2, + Write& support_idx, Write& radii2, bool is_build_csr_call); }; @@ -41,7 +41,6 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, Write& support_idx, Write& radii2, bool is_build_csr_call) { // Mesh Info - LO min_num_supports = 20; // TODO: make this an input parameter const auto& mesh_coords = mesh.coords(); const auto& nvertices = mesh.nverts(); const auto& dim = mesh.dim(); @@ -185,8 +184,7 @@ struct SupportResults { Write radii2; // squared radii of the supports }; -SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance) { - LO min_support = 12; +SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, LO min_support) { SupportResults support; FindSupports search(mesh); From 7021f91622ce245f85f9ae0b11c461e061cb3c96 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 28 Oct 2024 14:08:37 -0400 Subject: [PATCH 21/66] compatible with the generalized mls --- src/pcms/interpolator/MLSCoefficients.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp index 123f6ab7..8357d019 100644 --- a/src/pcms/interpolator/MLSCoefficients.hpp +++ b/src/pcms/interpolator/MLSCoefficients.hpp @@ -19,7 +19,9 @@ double func(Coord& p) { } // computes the slice lengths of the of the polynomial basis +// +KOKKOS_INLINE_FUNCTION void basisSliceLengths(Kokkos::View& array){ int degree = array.extent(0); int dim = array.extent(1); @@ -42,6 +44,8 @@ void basisSliceLengths(Kokkos::View& array){ } // finds the size of the polynomial basis vector + +KOKKOS_INLINE_FUNCTION int basisSize(const Kokkos::View& array){ int sum = 1; int degree = array.extent(0); From 723bcdf352b703725e83478d0e60b3c83c7838fc Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 28 Oct 2024 14:09:58 -0400 Subject: [PATCH 22/66] added test case file name --- test/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 70b00b95..acd0a0f4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -193,6 +193,7 @@ if(Catch2_FOUND) test_omega_h_copy.cpp test_point_search.cpp test_mls_basis.cpp + test_rbf_interp.cpp ) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) From d5bd614cf579dd3a0b6c03b9708dc1f932fcd6cd Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 28 Oct 2024 14:10:37 -0400 Subject: [PATCH 23/66] created the test case for generalized mls --- test/test_rbf_interp.cpp | 415 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 415 insertions(+) create mode 100644 test/test_rbf_interp.cpp diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp new file mode 100644 index 00000000..bba7efb8 --- /dev/null +++ b/test/test_rbf_interp.cpp @@ -0,0 +1,415 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace Omega_h; + +KOKKOS_INLINE_FUNCTION +double func_const(Coord& p) { + auto x = p.x; + auto y = p.y; + double Z = 3; + return Z; +} + +KOKKOS_INLINE_FUNCTION +double func_linear(Coord& p) { + auto x = p.x; + auto y = p.y; + double Z = x + y; + return Z; +} + +KOKKOS_INLINE_FUNCTION +double func_quadratic(Coord& p) { + auto x = p.x; + auto y = p.y; + double Z = pow(x,2) + pow(y,2); + return Z; +} + + +KOKKOS_INLINE_FUNCTION +double func_cubic(Coord& p) { + auto x = p.x; + auto y = p.y; + double Z = pow(x,3) + pow(y,3); + return Z; +} + +// Test cases for centroid to node mapping using MLS +TEST_CASE("mls_interp_test") +{ + + auto lib = Library{}; + auto world = lib.world(); + auto rank = lib.world()->rank(); + auto mesh = build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false); + + Real cutoffDistance = 0.3; + Real tolerance = 0.05; + cutoffDistance = cutoffDistance * cutoffDistance; + + const auto dim = mesh.dim(); + + const auto& target_coordinates = mesh.coords(); + + const auto& nfaces = mesh.nfaces(); + + const auto& ntargets = mesh.nverts(); + + + Write radii2(ntargets, cutoffDistance, "populate initial square of cutoffdistance to all target points"); + Write source_coordinates( + dim * nfaces, 0, + "stores coordinates of cell centroid of each tri element"); + + const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; + + Kokkos::parallel_for( + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(target_coordinates, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = 2 * id; + source_coordinates[index] = centroid[0]; + source_coordinates[index + 1] = centroid[1]; + }); + + Points source_points; + + source_points.coordinates = + PointsViewType("Number of local source supports", nfaces); + Kokkos::parallel_for( + "target points", nfaces, KOKKOS_LAMBDA(int j) { + source_points.coordinates(j).x = source_coordinates[j * dim]; + source_points.coordinates(j).y = source_coordinates[j * dim + 1]; + }); + + + Points target_points; + + target_points.coordinates = + PointsViewType("Number of local source supports", mesh.nverts()); + Kokkos::parallel_for( + "target points", mesh.nverts(), KOKKOS_LAMBDA(int j) { + target_points.coordinates(j).x = target_coordinates[j * dim]; + target_points.coordinates(j).y = target_coordinates[j * dim + 1]; + }); + + + SECTION("test interpo degree 1 poly degree 0") + { + + int degree = 1; + LO min_num_supports = 10; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_const(source_points.coordinates(i)); + }); + + SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + + auto approx_target_values = mls_interpolation( + source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + + auto host_approx_target_values = HostRead(approx_target_values); + + + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_const(target_points.coordinates(i)); + }); + + auto host_exact_target_values = HostRead(exact_target_values); + + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } + + } + + SECTION("test interpolation degree 1 poly degree 1") + { + + + int degree = 2; + LO min_num_supports = 16; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_linear(source_points.coordinates(i)); + }); + + SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + + auto approx_target_values = mls_interpolation( + source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + + auto host_approx_target_values = HostRead(approx_target_values); + + + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_linear(target_points.coordinates(i)); + }); + + auto host_exact_target_values = HostRead(exact_target_values); + + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } + + } + + + SECTION("test interpo degree 2 poly degree 0") + { + + int degree = 2; + LO min_num_supports = 16; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_const(source_points.coordinates(i)); + }); + + SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + + auto approx_target_values = mls_interpolation( + source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + + auto host_approx_target_values = HostRead(approx_target_values); + + + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_const(target_points.coordinates(i)); + }); + + auto host_exact_target_values = HostRead(exact_target_values); + + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } + + } + + SECTION("test interpolation degree 2 poly degree 1") + { + + + int degree = 2; + LO min_num_supports = 16; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_linear(source_points.coordinates(i)); + }); + + SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + + auto approx_target_values = mls_interpolation( + source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + + auto host_approx_target_values = HostRead(approx_target_values); + + + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_linear(target_points.coordinates(i)); + }); + + auto host_exact_target_values = HostRead(exact_target_values); + + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } + + } + + + SECTION("test interpolation degree 2 poly degree 2") + { + + + int degree = 2; + LO min_num_supports = 16; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_quadratic(source_points.coordinates(i)); + }); + + SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + + auto approx_target_values = mls_interpolation( + source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + + auto host_approx_target_values = HostRead(approx_target_values); + + + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_quadratic(target_points.coordinates(i)); + }); + + auto host_exact_target_values = HostRead(exact_target_values); + + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } + + } + + + + SECTION("test interpolation degree 3 poly degree 2") + { + + + int degree = 3; + LO min_num_supports = 20; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_quadratic(source_points.coordinates(i)); + }); + + SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + + auto approx_target_values = mls_interpolation( + source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + + auto host_approx_target_values = HostRead(approx_target_values); + + + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_quadratic(target_points.coordinates(i)); + }); + + auto host_exact_target_values = HostRead(exact_target_values); + + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } + + } + + + SECTION("test interpolation degree 3 poly degree 3") + { + + + int degree = 3; + LO min_num_supports = 20; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_cubic(source_points.coordinates(i)); + }); + + SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + + auto approx_target_values = mls_interpolation( + source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + + auto host_approx_target_values = HostRead(approx_target_values); + + + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_cubic(target_points.coordinates(i)); + }); + + auto host_exact_target_values = HostRead(exact_target_values); + + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } + + } + +} + From 810b760f48a1c36189d619b243ed392eb22e7de9 Mon Sep 17 00:00:00 2001 From: Fuad Hasibul Hasan Date: Mon, 28 Oct 2024 21:09:26 -0400 Subject: [PATCH 24/66] move include adj_search_dega2.hpp from MLSInterpolation.hpp to test_rbf_interp.cpp --- src/pcms/interpolator/MLSInterpolation.hpp | 1 - test/test_rbf_interp.cpp | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp index 4e14745e..5b5cad5a 100644 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -2,7 +2,6 @@ #define MLS_INTERPOLATION_HPP #include "MLSCoefficients.hpp" -#include "adj_search_dega2.hpp" #include "adj_search.hpp" #include diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index bba7efb8..01bd55ed 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include From 56098748ddbe7cdb328100c6b1406ae71d9dd88b Mon Sep 17 00:00:00 2001 From: Fuad Hasibul Hasan Date: Tue, 29 Oct 2024 13:12:14 -0400 Subject: [PATCH 25/66] radius adaptation for adj_search --- src/pcms/interpolator/adj_search.hpp | 122 +++++++++++++++++++++++---- 1 file changed, 107 insertions(+), 15 deletions(-) diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index fd5fe60c..5816d5a1 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -33,14 +33,15 @@ class FindSupports { FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; - void adjBasedSearch(const Real& cutoffDistance, const Write& supports_ptr, - Write& nSupports, Write& support_idx); + void adjBasedSearch(const Write& supports_ptr, + Write& nSupports, Write& support_idx, + Write& radii2, bool is_build_csr_call); }; -void FindSupports::adjBasedSearch(const Real& cutoffDistance, - const Write& supports_ptr, +void FindSupports::adjBasedSearch(const Write& supports_ptr, Write& nSupports, - Write& support_idx) { + Write& support_idx, + Write& radii2, bool is_build_csr_call) { // Source Mesh Info const auto& sourcePoints_coords = source_mesh.coords(); @@ -81,6 +82,7 @@ void FindSupports::adjBasedSearch(const Real& cutoffDistance, OMEGA_H_LAMBDA(const LO id) { queue queue; track visited; + Real cutoffDistance = radii2[id]; LO source_cell_id = results(id).tri_id; @@ -100,7 +102,7 @@ void FindSupports::adjBasedSearch(const Real& cutoffDistance, LO start_counter; - if (support_idx.size() > 0) { + if (!is_build_csr_call) { start_counter = supports_ptr[id]; } @@ -120,7 +122,7 @@ void FindSupports::adjBasedSearch(const Real& cutoffDistance, if (dist <= cutoffDistance) { count++; queue.push_back(vert_id); - if (support_idx.size() > 0) { + if (!is_build_csr_call) { LO idx_count = count - 1; support_idx[start_counter + idx_count] = vert_id; } @@ -151,7 +153,7 @@ void FindSupports::adjBasedSearch(const Real& cutoffDistance, if (dist <= cutoffDistance) { count++; queue.push_back(neighborIndex); - if (support_idx.size() > 0) { + if (!is_build_csr_call) { LO idx_count = count - 1; support_idx[start_counter + idx_count] = neighborIndex; } @@ -163,15 +165,41 @@ void FindSupports::adjBasedSearch(const Real& cutoffDistance, nSupports[id] = count; }, "count the number of supports in each target point"); + if (is_build_csr_call == false) { + // print supports for target 22 + parallel_for( + nvertices_target, // for each target vertex which is a node for this + // calculateDistance + OMEGA_H_LAMBDA(const LO id) { + if (id == 623) { + LO start_ptr = supports_ptr[id]; // start support location for the + // current target vertex + LO end_ptr = supports_ptr[id + 1]; // end support location for the + // current target vertex + printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", + id, end_ptr - start_ptr, nSupports[id]); + for (LO i = start_ptr; i < end_ptr; + ++i) { // loop over adj cells to the target vertex + LO cell_id = support_idx[i]; + printf(", %d", cell_id); + } + printf("\n"); + } + } // end of lambda + ); + } } struct SupportResults { Write supports_ptr; Write supports_idx; + Write radii2; // squared radii of the supports }; SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, - Real& cutoffDistance) { + Real& cutoffDistance, LO min_req_support = 12, + bool adapt_radius = true) +{ SupportResults support; FindSupports search(source_mesh, target_mesh); @@ -182,11 +210,71 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, Write nSupports(nvertices_target, 0, "number of supports in each target vertex"); + printf("Cut off distance: %f\n", cutoffDistance); + Write radii2 = Write(nvertices_target, cutoffDistance, + "squared radii of the supports"); + + if (!adapt_radius) { + printf("Fixed radius search... \n"); + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + radii2, true); + } else { + printf("Adaptive radius search... \n"); + int r_adjust_loop = 0; + while (true) { + nSupports = Write(nvertices_target, 0, + "number of supports in each target vertex"); + // find maximum radius + Real max_radius = 0.0; + Kokkos::parallel_reduce( + "find max radius", nvertices_target, + OMEGA_H_LAMBDA(const LO i, Real& local_max) { + local_max = (radii2[i] > local_max) ? radii2[i] : local_max; + }, + Kokkos::Max(max_radius)); + printf("Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); + + SupportResults support; // create support every time to avoid complexity + + search.adjBasedSearch(support.supports_ptr, nSupports, + support.supports_idx, radii2, true); + + Kokkos::fence(); + //* find the minimum number of supports + LO min_supports_found = 0; + Kokkos::Min min_reducer(min_supports_found); + Kokkos::parallel_reduce( + "find min number of supports", nvertices_target, + OMEGA_H_LAMBDA(const LO i, LO& local_min) { + min_reducer.join(local_min, nSupports[i]); + }, + min_reducer); + + printf("min_supports_found: %d at loop %d, max_radius %f\n", + min_supports_found, r_adjust_loop, max_radius); + r_adjust_loop++; + + if (min_supports_found >= min_req_support) { + break; + } + + Kokkos::fence(); + + // * update radius if nSupport is less that min_req_support + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + if (nSupports[i] < min_req_support) { + Real factor = Real(min_req_support) / Real(nSupports[i]); + factor = (factor > 1.1 || nSupports[i] == 0) ? 1.1 : factor; + radii2[i] *= factor; + } + // nSupports[i] = 0; // reset the number of supports ? not sure if + // needed + }); + } - search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, - support.supports_idx); - - Kokkos::fence(); + printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); + } support.supports_ptr = Write(nvertices_target + 1, 0, @@ -209,8 +297,12 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, support.supports_idx = Write( total_supports, 0, "index of source supports of each target node"); - search.adjBasedSearch(cutoffDistance, support.supports_ptr, nSupports, - support.supports_idx); + search.adjBasedSearch(support.supports_ptr, nSupports, + support.supports_idx, radii2, false); + + support.radii2 = radii2; + + target_mesh.add_tag(VERT, "radii2", 1, support.radii2); return support; } From f337543655e92f6cee4834fe8e31be6bf5353ed9 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 29 Oct 2024 13:47:46 -0400 Subject: [PATCH 26/66] added Nan Check --- src/pcms/interpolator/MLSInterpolation.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp index 5b5cad5a..f039abdd 100644 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -4,7 +4,7 @@ #include "MLSCoefficients.hpp" #include "adj_search.hpp" #include - +#include using namespace Omega_h; using namespace pcms; @@ -221,6 +221,7 @@ Write mls_interpolation(const Reals source_values, double tgt_value = 0; dot_product(team, result, SupportValues, tgt_value); if (team.team_rank() == 0) { + OMEGA_H_CHECK_PRINTF(!std::isnan(tgt_value),"Nan at %d\n",i); approx_target_values[i] = tgt_value; } }); From 726e6ff6dd77e7a944905c2b81dde284bbc7d486 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 29 Oct 2024 13:53:50 -0400 Subject: [PATCH 27/66] formatted the files --- src/pcms/interpolator/CMakeLists.txt | 47 ++- src/pcms/interpolator/MLSCoefficients.hpp | 180 ++++++----- src/pcms/interpolator/MLSInterpolation.hpp | 310 ++++++++++--------- src/pcms/interpolator/adj_search.hpp | 189 +++++------ src/pcms/interpolator/adj_search_dega2.hpp | 273 ++++++++-------- src/pcms/interpolator/linear_interpolant.hpp | 303 +++++++++--------- src/pcms/interpolator/multidimarray.hpp | 44 +-- src/pcms/interpolator/points.hpp | 16 +- src/pcms/interpolator/queue_visited.hpp | 46 ++- 9 files changed, 722 insertions(+), 686 deletions(-) diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 6c524e42..dd589789 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -1,37 +1,28 @@ cmake_minimum_required(VERSION 3.20) -project(Interpolator) + project(Interpolator) -set(HEADER_FILES - adj_search_dega2.hpp - MLSInterpolation.hpp - points.hpp - adj_search.hpp - MLSCoefficients.hpp - queue_visited.hpp - linear_interpolant.hpp - multidimarray.hpp -) + set(HEADER_FILES adj_search_dega2.hpp MLSInterpolation.hpp points + .hpp adj_search.hpp MLSCoefficients.hpp queue_visited + .hpp linear_interpolant.hpp multidimarray.hpp) -install(FILES ${HEADER_FILES} DESTINATION include/pcms/interpolator) + install(FILES ${HEADER_FILES} DESTINATION include / pcms / interpolator) -add_library(pcms_interpolator INTERFACE) + add_library(pcms_interpolator INTERFACE) -target_link_libraries(pcms_interpolator INTERFACE pcms::core) + target_link_libraries(pcms_interpolator INTERFACE pcms::core) -target_include_directories(pcms_interpolator INTERFACE - $ - $ - $) + target_include_directories( + pcms_interpolator INTERFACE + $ + $ + $) + install(TARGETS pcms_interpolator EXPORT interpolatorTargets + INCLUDES DESTINATION include / + pcms / interpolator) -install(TARGETS pcms_interpolator - EXPORT interpolatorTargets - INCLUDES DESTINATION include/pcms/interpolator) - - -install(EXPORT interpolatorTargets - FILE interpolatorTargets.cmake - NAMESPACE pcms:: - DESTINATION lib/cmake/pcms) - + install(EXPORT interpolatorTargets FILE interpolatorTargets + .cmake NAMESPACE pcms::DESTINATION lib / + cmake / pcms) diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp index 8357d019..95fb0f3d 100644 --- a/src/pcms/interpolator/MLSCoefficients.hpp +++ b/src/pcms/interpolator/MLSCoefficients.hpp @@ -9,9 +9,9 @@ static constexpr int MAX_DIM = 3; - KOKKOS_INLINE_FUNCTION -double func(Coord& p) { +double func(Coord& p) +{ auto x = (p.x - 0.5) * PI_M * 2; auto y = (p.y - 0.5) * PI_M * 2; double Z = sin(x) * sin(y) + 2; @@ -22,82 +22,84 @@ double func(Coord& p) { // KOKKOS_INLINE_FUNCTION -void basisSliceLengths(Kokkos::View& array){ - int degree = array.extent(0); - int dim = array.extent(1); - - for (int j = 0; j < dim; ++j){ - array(0, j) = 1; - } - - for (int i = 0; i < degree; ++i){ - array(i, 0) = 1; - } +void basisSliceLengths(Kokkos::View& array) +{ + int degree = array.extent(0); + int dim = array.extent(1); - for (int i = 1; i < degree; ++i){ - for (int j = 1; j < dim; ++j){ - array(i, j) = array(i , j - 1) + array(i - 1, j); - } - } + for (int j = 0; j < dim; ++j) { + array(0, j) = 1; + } + for (int i = 0; i < degree; ++i) { + array(i, 0) = 1; + } + for (int i = 1; i < degree; ++i) { + for (int j = 1; j < dim; ++j) { + array(i, j) = array(i, j - 1) + array(i - 1, j); + } + } } -// finds the size of the polynomial basis vector +// finds the size of the polynomial basis vector KOKKOS_INLINE_FUNCTION -int basisSize(const Kokkos::View& array){ - int sum = 1; - int degree = array.extent(0); - int dim = array.extent(1); - - for (int i = 0; i < degree; ++i){ - for (int j = 0; j < dim; ++j){ - sum += array(i, j); - } +int basisSize(const Kokkos::View& array) +{ + int sum = 1; + int degree = array.extent(0); + int dim = array.extent(1); + + for (int i = 0; i < degree; ++i) { + for (int j = 0; j < dim; ++j) { + sum += array(i, j); } + } - return sum; + return sum; } -// evaluates the polynomial basis +// evaluates the polynomial basis KOKKOS_INLINE_FUNCTION -void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, Coord &p){ - - basis_monomial(0) = 1; - int dim = slice_length.extent(1); - int degree = slice_length.extent(0); - - int prev_col = 0; - int curr_col = 1; - - double point[MAX_DIM]; - point[0] = p.x; - point[1] = p.y; - - if (dim == 3){ - point[2] = p.z; - } +void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, + Coord& p) +{ - for (int i = 0; i < degree; ++i){ - int offset = curr_col; - for (int j = 0; j < dim; ++j){ - for (int k = 0; k < slice_length(i,j); ++k){ - basis_monomial(offset + k) = basis_monomial(prev_col +k) * point[j]; - } + basis_monomial(0) = 1; + int dim = slice_length.extent(1); + int degree = slice_length.extent(0); - offset += slice_length(i,j); - } + int prev_col = 0; + int curr_col = 1; - prev_col = curr_col; - curr_col = offset; + double point[MAX_DIM]; + point[0] = p.x; + point[1] = p.y; + + if (dim == 3) { + point[2] = p.z; + } + + for (int i = 0; i < degree; ++i) { + int offset = curr_col; + for (int j = 0; j < dim; ++j) { + for (int k = 0; k < slice_length(i, j); ++k) { + basis_monomial(offset + k) = basis_monomial(prev_col + k) * point[j]; + } + + offset += slice_length(i, j); } -} + prev_col = curr_col; + curr_col = offset; + } +} KOKKOS_INLINE_FUNCTION -double rbf(double r_sq, double rho_sq) { +double rbf(double r_sq, double rho_sq) +{ double phi; double r = sqrt(r_sq); double rho = sqrt(rho_sq); @@ -117,16 +119,18 @@ double rbf(double r_sq, double rho_sq) { // create vandermondeMatrix KOKKOS_INLINE_FUNCTION -void VandermondeMatrix(ScratchMatView V, const ScratchMatView local_source_points, - int j, const MatViewType slice_length) { +void VandermondeMatrix(ScratchMatView V, + const ScratchMatView local_source_points, int j, + const MatViewType slice_length) +{ int N = local_source_points.extent(0); int dim = local_source_points.extent(1); Coord source_point; source_point.x = local_source_points(j, 0); source_point.y = local_source_points(j, 1); - if (dim == 3){ - source_point.z = local_source_points(j, 2); + if (dim == 3) { + source_point.z = local_source_points(j, 2); } ScratchVecView basis_monomial = Kokkos::subview(V, j, Kokkos::ALL()); BasisPoly(basis_monomial, slice_length, source_point); @@ -135,7 +139,8 @@ void VandermondeMatrix(ScratchMatView V, const ScratchMatView local_source_point // moment matrix KOKKOS_INLINE_FUNCTION void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, - int j) { + int j) +{ int M = V.extent(0); int N = V.extent(1); @@ -149,7 +154,8 @@ void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, KOKKOS_INLINE_FUNCTION void PhiVector(ScratchVecView Phi, const Coord target_point, const ScratchMatView local_source_points, int j, - double cuttoff_dis_sq) { + double cuttoff_dis_sq) +{ int N = local_source_points.extent(0); double dx = target_point.x - local_source_points(j, 0); double dy = target_point.y - local_source_points(j, 1); @@ -160,7 +166,8 @@ void PhiVector(ScratchVecView Phi, const Coord target_point, // matrix matrix multiplication KOKKOS_INLINE_FUNCTION void MatMatMul(member_type team, ScratchMatView& moment_matrix, - const ScratchMatView& pt_phi, const ScratchMatView& vandermonde) { + const ScratchMatView& pt_phi, const ScratchMatView& vandermonde) +{ int M = pt_phi.extent(0); int N = vandermonde.extent(1); int K = pt_phi.extent(1); @@ -169,11 +176,11 @@ void MatMatMul(member_type team, ScratchMatView& moment_matrix, Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { double sum = 0.0; Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, K), - [=](const int k, double& lsum) { - lsum += pt_phi(i, k) * vandermonde(k, j); - }, - sum); + Kokkos::ThreadVectorRange(team, K), + [=](const int k, double& lsum) { + lsum += pt_phi(i, k) * vandermonde(k, j); + }, + sum); moment_matrix(i, j) = sum; }); }); @@ -181,16 +188,17 @@ void MatMatMul(member_type team, ScratchMatView& moment_matrix, // Matrix vector multiplication KOKKOS_INLINE_FUNCTION -void MatVecMul(member_type team, const ScratchVecView& vector, const ScratchMatView& matrix, - ScratchVecView& result) { +void MatVecMul(member_type team, const ScratchVecView& vector, + const ScratchMatView& matrix, ScratchVecView& result) +{ int M = matrix.extent(0); int N = matrix.extent(1); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { double sum = 0; Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, M), - [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, - sum); + Kokkos::ThreadVectorRange(team, M), + [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, + sum); result(i) = sum; }); // team.team_barrier(); @@ -199,7 +207,8 @@ void MatVecMul(member_type team, const ScratchVecView& vector, const ScratchMatV // dot product KOKKOS_INLINE_FUNCTION void dot_product(member_type team, const ScratchVecView& result_sub, - const ScratchVecView& SupportValues_sub, double& target_value) { + const ScratchVecView& SupportValues_sub, double& target_value) +{ int N = result_sub.extent(0); for (int j = 0; j < N; ++j) { target_value += result_sub(j) * SupportValues_sub[j]; @@ -209,7 +218,9 @@ void dot_product(member_type team, const ScratchVecView& result_sub, // moment matrix KOKKOS_INLINE_FUNCTION void PtphiPMatrix(ScratchMatView& moment_matrix, member_type team, - const ScratchMatView& pt_phi, const ScratchMatView& vandermonde) { + const ScratchMatView& pt_phi, + const ScratchMatView& vandermonde) +{ int M = pt_phi.extent(0); int N = vandermonde.extent(1); int K = pt_phi.extent(1); @@ -218,11 +229,11 @@ void PtphiPMatrix(ScratchMatView& moment_matrix, member_type team, Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { double sum = 0.0; Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, K), - [=](const int k, double& lsum) { - lsum += pt_phi(i, k) * vandermonde(k, j); - }, - sum); + Kokkos::ThreadVectorRange(team, K), + [=](const int k, double& lsum) { + lsum += pt_phi(i, k) * vandermonde(k, j); + }, + sum); moment_matrix(i, j) = sum; }); }); @@ -232,7 +243,8 @@ void PtphiPMatrix(ScratchMatView& moment_matrix, member_type team, KOKKOS_INLINE_FUNCTION void inverse_matrix(member_type team, const ScratchMatView& matrix, ScratchMatView& lower, ScratchMatView& forward_matrix, - ScratchMatView& solution) { + ScratchMatView& solution) +{ int N = matrix.extent(0); for (int j = 0; j < N; ++j) { @@ -261,7 +273,7 @@ void inverse_matrix(member_type team, const ScratchMatView& matrix, Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { forward_matrix(i, i) = 1.0 / lower(i, i); for (int j = i + 1; j < N; ++j) { - forward_matrix(j, i) = 0.0; // Initialize to zero + forward_matrix(j, i) = 0.0; // Initialize to zero for (int k = 0; k < j; ++k) { forward_matrix(j, i) -= lower(j, k) * forward_matrix(k, i); } diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp index f039abdd..98e5bb82 100644 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -11,60 +11,64 @@ using namespace pcms; Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Write radii2) { + const SupportResults& support, const LO& dim, + const LO& degree, Write radii2) +{ const auto nvertices_source = source_coordinates.size() / dim; const auto nvertices_target = target_coordinates.size() / dim; // Kokkos::RangePolicy range_policy(1, // nvertices_target); - // static_assert(degree > 0," the degree of polynomial basis should be atleast 1"); + // static_assert(degree > 0," the degree of polynomial basis should be atleast + // 1"); Kokkos::View shmem_each_team( - "stores the size required for each team", nvertices_target); - - + "stores the size required for each team", nvertices_target); - Kokkos::View host_slice_length("stores slice length of polynomial basis in host", degree, dim); + Kokkos::View host_slice_length( + "stores slice length of polynomial basis in host", degree, dim); Kokkos::deep_copy(host_slice_length, 0); - + basisSliceLengths(host_slice_length); auto basis_size = basisSize(host_slice_length); - printf("basis_size is %d\n", basis_size); - MatViewType slice_length("stores slice length of polynomial basis in device", degree, dim); + printf("basis_size is %d\n", basis_size); + MatViewType slice_length("stores slice length of polynomial basis in device", + degree, dim); auto slice_length_hd = Kokkos::create_mirror_view(slice_length); Kokkos::deep_copy(slice_length_hd, host_slice_length); Kokkos::deep_copy(slice_length, slice_length_hd); Kokkos::parallel_for( - "calculate the size required for scratch for each team", nvertices_target, - KOKKOS_LAMBDA(const int i) { - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - int nsupports = end_ptr - start_ptr; - - size_t total_shared_size = 0; - - total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size) * 4; - total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports) * 2; - total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); - total_shared_size += ScratchVecView::shmem_size(basis_size); - total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; - total_shared_size += ScratchMatView::shmem_size(nsupports, 2); - shmem_each_team(i) = total_shared_size; - }); + "calculate the size required for scratch for each team", nvertices_target, + KOKKOS_LAMBDA(const int i) { + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + int nsupports = end_ptr - start_ptr; + + size_t total_shared_size = 0; + + total_shared_size += + ScratchMatView::shmem_size(basis_size, basis_size) * 4; + total_shared_size += + ScratchMatView::shmem_size(basis_size, nsupports) * 2; + total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); + total_shared_size += ScratchVecView::shmem_size(basis_size); + total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; + total_shared_size += ScratchMatView::shmem_size(nsupports, 2); + shmem_each_team(i) = total_shared_size; + }); size_t shared_size; Kokkos::parallel_reduce( - "find_max", nvertices_target, - KOKKOS_LAMBDA(const int i, size_t& max_val_temp) { - if (shmem_each_team(i) > max_val_temp) { - max_val_temp = shmem_each_team(i); - } - }, - Kokkos::Max(shared_size)); + "find_max", nvertices_target, + KOKKOS_LAMBDA(const int i, size_t& max_val_temp) { + if (shmem_each_team(i) > max_val_temp) { + max_val_temp = shmem_each_team(i); + } + }, + Kokkos::Max(shared_size)); Write approx_target_values(nvertices_target, 0, "approximated target values"); @@ -72,159 +76,161 @@ Write mls_interpolation(const Reals source_values, team_policy tp(nvertices_target, Kokkos::AUTO); int scratch_size = tp.scratch_size_max(0); - - assert(scratch_size > shared_size && "The required scratch size exceeds the max available scratch size"); + + assert(scratch_size > shared_size && + "The required scratch size exceeds the max available scratch size"); Kokkos::parallel_for( - "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), - KOKKOS_LAMBDA(const member_type& team) { - int i = team.league_rank(); - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - - int nsupports = end_ptr - start_ptr; - - ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); - int count = -1; - for (int j = start_ptr; j < end_ptr; ++j) { - count++; - auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - if (dim == 3){ - local_source_points(count, 2) = source_coordinates[index * dim + 2]; - } + "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + + int nsupports = end_ptr - start_ptr; + + ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + if (dim == 3) { + local_source_points(count, 2) = source_coordinates[index * dim + 2]; } + } - ScratchMatView lower(team.team_scratch(0), basis_size, basis_size); + ScratchMatView lower(team.team_scratch(0), basis_size, basis_size); - ScratchMatView forward_matrix(team.team_scratch(0), basis_size, basis_size); + ScratchMatView forward_matrix(team.team_scratch(0), basis_size, + basis_size); - ScratchMatView moment_matrix(team.team_scratch(0), basis_size, basis_size); + ScratchMatView moment_matrix(team.team_scratch(0), basis_size, + basis_size); - ScratchMatView inv_mat(team.team_scratch(0), basis_size, basis_size); + ScratchMatView inv_mat(team.team_scratch(0), basis_size, basis_size); - ScratchMatView V(team.team_scratch(0), nsupports, basis_size); + ScratchMatView V(team.team_scratch(0), nsupports, basis_size); - ScratchMatView Ptphi(team.team_scratch(0), basis_size, nsupports); + ScratchMatView Ptphi(team.team_scratch(0), basis_size, nsupports); - ScratchMatView resultant_matrix(team.team_scratch(0), basis_size, nsupports); + ScratchMatView resultant_matrix(team.team_scratch(0), basis_size, + nsupports); - ScratchVecView targetMonomialVec(team.team_scratch(0), basis_size); + ScratchVecView targetMonomialVec(team.team_scratch(0), basis_size); - ScratchVecView SupportValues(team.team_scratch(0), nsupports); + ScratchVecView SupportValues(team.team_scratch(0), nsupports); - ScratchVecView result(team.team_scratch(0), nsupports); + ScratchVecView result(team.team_scratch(0), nsupports); - ScratchVecView Phi(team.team_scratch(0), nsupports); + ScratchVecView Phi(team.team_scratch(0), nsupports); + // Kokkos::deep_copy(lower, 0.0); + // Kokkos::deep_copy(forward_matrix, 0.0); + // Kokkos::deep_copy(moment_matrix, 0.0); + // Kokkos::deep_copy(inv_mat, 0.0); + // Kokkos::deep_copy(V, 0.0); + // Kokkos::deep_copy(Ptphi, 0.0); + // Kokkos::deep_copy(resultant_matrix, 0.0); + // Kokkos::deep_copy(targetMonomialVec, 0.0); + // Kokkos::deep_copy(SupportValues, 0.0); + // Kokkos::deep_copy(result, 0.0); + // Kokkos::deep_copy(Phi, 0.0); + // + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, basis_size), + [=](int j) { + for (int k = 0; k < basis_size; ++k) { + lower(j, k) = 0; + forward_matrix(j, k) = 0; + moment_matrix(j, k) = 0; + inv_mat(j, k) = 0; + } -// Kokkos::deep_copy(lower, 0.0); -// Kokkos::deep_copy(forward_matrix, 0.0); -// Kokkos::deep_copy(moment_matrix, 0.0); -// Kokkos::deep_copy(inv_mat, 0.0); -// Kokkos::deep_copy(V, 0.0); -// Kokkos::deep_copy(Ptphi, 0.0); -// Kokkos::deep_copy(resultant_matrix, 0.0); -// Kokkos::deep_copy(targetMonomialVec, 0.0); -// Kokkos::deep_copy(SupportValues, 0.0); -// Kokkos::deep_copy(result, 0.0); -// Kokkos::deep_copy(Phi, 0.0); -// - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, basis_size), [=](int j) { - for (int k = 0; k < basis_size; ++k) { - lower(j, k) = 0; - forward_matrix(j, k) = 0; - moment_matrix(j, k) = 0; - inv_mat(j, k) = 0; - } + targetMonomialVec(j) = 0; + for (int k = 0; k < nsupports; ++k) { + resultant_matrix(j, k) = 0; - targetMonomialVec(j) = 0; - for (int k = 0; k < nsupports; ++k) { - resultant_matrix(j, k) = 0; + Ptphi(j, k) = 0; + } + }); - Ptphi(j, k) = 0; - } - }); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + for (int k = 0; k < basis_size; ++k) { + V(j, k) = 0; + } - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { - for (int k = 0; k < basis_size; ++k) { - V(j, k) = 0; - } + SupportValues(j) = 0; + result(j) = 0; + Phi(j) = 0; + }); - SupportValues(j) = 0; - result(j) = 0; - Phi(j) = 0; - }); + Coord target_point; - Coord target_point; + target_point.x = target_coordinates[i * dim]; - target_point.x = target_coordinates[i * dim]; + target_point.y = target_coordinates[i * dim + 1]; - target_point.y = target_coordinates[i * dim + 1]; - - if (dim == 3){ - target_point.z = target_coordinates[i * dim + 2]; - } - BasisPoly(targetMonomialVec, slice_length, target_point); + if (dim == 3) { + target_point.z = target_coordinates[i * dim + 2]; + } + BasisPoly(targetMonomialVec, slice_length, target_point); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports),[=](int j) { - VandermondeMatrix(V, local_source_points, j, slice_length); - }); + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + VandermondeMatrix(V, local_source_points, j, slice_length); + }); - team.team_barrier(); + team.team_barrier(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - PhiVector(Phi, target_point, local_source_points, j, radii2[i]); - }); + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + PhiVector(Phi, target_point, local_source_points, j, radii2[i]); + }); - // sum phi - double sum_phi = 0; - Kokkos::parallel_reduce( - Kokkos::TeamThreadRange(team, nsupports), - [=](const int j, double& lsum) { lsum += Phi(j); }, sum_phi); - - // normalize phi with sum_phi - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { Phi(j) = Phi(j) / sum_phi; }); + // sum phi + double sum_phi = 0; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, nsupports), + [=](const int j, double& lsum) { lsum += Phi(j); }, sum_phi); - team.team_barrier(); + // normalize phi with sum_phi + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { Phi(j) = Phi(j) / sum_phi; }); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { PTphiMatrix(Ptphi, V, Phi, j); }); + team.team_barrier(); - team.team_barrier(); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { PTphiMatrix(Ptphi, V, Phi, j); }); - MatMatMul(team, moment_matrix, Ptphi, V); - team.team_barrier(); + team.team_barrier(); - inverse_matrix(team, moment_matrix, lower, forward_matrix, inv_mat); + MatMatMul(team, moment_matrix, Ptphi, V); + team.team_barrier(); - team.team_barrier(); + inverse_matrix(team, moment_matrix, lower, forward_matrix, inv_mat); - MatMatMul(team, resultant_matrix, inv_mat, Ptphi); - team.team_barrier(); - - MatVecMul(team, targetMonomialVec, resultant_matrix, result); - team.team_barrier(); - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { - SupportValues(i) = - source_values[support.supports_idx[start_ptr + i]]; - }); - - double tgt_value = 0; - dot_product(team, result, SupportValues, tgt_value); - if (team.team_rank() == 0) { - OMEGA_H_CHECK_PRINTF(!std::isnan(tgt_value),"Nan at %d\n",i); - approx_target_values[i] = tgt_value; - } - }); + team.team_barrier(); + + MatMatMul(team, resultant_matrix, inv_mat, Ptphi); + team.team_barrier(); + + MatVecMul(team, targetMonomialVec, resultant_matrix, result); + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { + SupportValues(i) = source_values[support.supports_idx[start_ptr + i]]; + }); + + double tgt_value = 0; + dot_product(team, result, SupportValues, tgt_value); + if (team.team_rank() == 0) { + OMEGA_H_CHECK_PRINTF(!std::isnan(tgt_value), "Nan at %d\n", i); + approx_target_values[i] = tgt_value; + } + }); return approx_target_values; } diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index 5816d5a1..9643695b 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -11,7 +11,8 @@ static constexpr int max_dim = 3; // TODO change this into span/mdspan OMEGA_H_INLINE -Real calculateDistance(const Real* p1, const Real* p2, const int dim) { +Real calculateDistance(const Real* p1, const Real* p2, const int dim) +{ Real dx, dy, dz; dx = p1[0] - p2[0]; dy = p1[1] - p2[1]; @@ -24,24 +25,25 @@ Real calculateDistance(const Real* p1, const Real* p2, const int dim) { return dx * dx + dy * dy + dz * dz; } -class FindSupports { - private: +class FindSupports +{ +private: Mesh& source_mesh; Mesh& target_mesh; - public: +public: FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) - : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; + : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; - void adjBasedSearch(const Write& supports_ptr, - Write& nSupports, Write& support_idx, - Write& radii2, bool is_build_csr_call); + void adjBasedSearch(const Write& supports_ptr, Write& nSupports, + Write& support_idx, Write& radii2, + bool is_build_csr_call); }; void FindSupports::adjBasedSearch(const Write& supports_ptr, - Write& nSupports, - Write& support_idx, - Write& radii2, bool is_build_csr_call) { + Write& nSupports, Write& support_idx, + Write& radii2, bool is_build_csr_call) +{ // Source Mesh Info const auto& sourcePoints_coords = source_mesh.coords(); @@ -67,10 +69,10 @@ void FindSupports::adjBasedSearch(const Write& supports_ptr, Kokkos::View target_points("test_points", nvertices_target); parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { - target_points(i, 0) = targetPoints_coords[i * dim]; - target_points(i, 1) = targetPoints_coords[i * dim + 1]; - }); + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + target_points(i, 0) = targetPoints_coords[i * dim]; + target_points(i, 1) = targetPoints_coords[i * dim + 1]; + }); Kokkos::fence(); pcms::GridPointSearch search_cell(source_mesh, 10, 10); @@ -78,98 +80,97 @@ void FindSupports::adjBasedSearch(const Write& supports_ptr, auto results = search_cell(target_points); parallel_for( - nvertices_target, - OMEGA_H_LAMBDA(const LO id) { - queue queue; - track visited; - Real cutoffDistance = radii2[id]; + nvertices_target, + OMEGA_H_LAMBDA(const LO id) { + queue queue; + track visited; + Real cutoffDistance = radii2[id]; - LO source_cell_id = results(id).tri_id; + LO source_cell_id = results(id).tri_id; - const LO num_verts_in_dim = dim + 1; + const LO num_verts_in_dim = dim + 1; - LO start_ptr = source_cell_id * num_verts_in_dim; + LO start_ptr = source_cell_id * num_verts_in_dim; - LO end_ptr = start_ptr + num_verts_in_dim; + LO end_ptr = start_ptr + num_verts_in_dim; - Real target_coords[max_dim]; + Real target_coords[max_dim]; - Real support_coords[max_dim]; + Real support_coords[max_dim]; - for (LO k = 0; k < dim; ++k) { - target_coords[k] = target_points(id, k); - } + for (LO k = 0; k < dim; ++k) { + target_coords[k] = target_points(id, k); + } - LO start_counter; + LO start_counter; - if (!is_build_csr_call) { - start_counter = supports_ptr[id]; - } + if (!is_build_csr_call) { + start_counter = supports_ptr[id]; + } - int count = 0; - // Initialize queue by pushing the vertices in the neighborhood of the - // given target point + int count = 0; + // Initialize queue by pushing the vertices in the neighborhood of the + // given target point - for (LO i = start_ptr; i < end_ptr; ++i) { - LO vert_id = cells2verts[i]; - visited.push_back(vert_id); + for (LO i = start_ptr; i < end_ptr; ++i) { + LO vert_id = cells2verts[i]; + visited.push_back(vert_id); - for (LO k = 0; k < dim; ++k) { - support_coords[k] = sourcePoints_coords[vert_id * dim + k]; - } + for (LO k = 0; k < dim; ++k) { + support_coords[k] = sourcePoints_coords[vert_id * dim + k]; + } - Real dist = calculateDistance(target_coords, support_coords, dim); - if (dist <= cutoffDistance) { - count++; - queue.push_back(vert_id); - if (!is_build_csr_call) { - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = vert_id; - } + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + queue.push_back(vert_id); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = vert_id; } } + } - while (!queue.isEmpty()) { - LO currentVertex = queue.front(); - queue.pop_front(); - LO start = v2v_ptr[currentVertex]; - LO end = v2v_ptr[currentVertex + 1]; + while (!queue.isEmpty()) { + LO currentVertex = queue.front(); + queue.pop_front(); + LO start = v2v_ptr[currentVertex]; + LO end = v2v_ptr[currentVertex + 1]; - for (LO i = start; i < end; ++i) { - auto neighborIndex = v2v_data[i]; + for (LO i = start; i < end; ++i) { + auto neighborIndex = v2v_data[i]; - // check if neighbor index is already in the queue to be checked - // TODO refactor this into a function + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function - if (visited.notVisited(neighborIndex)) { - visited.push_back(neighborIndex); - for (int k = 0; k < dim; ++k) { - support_coords[k] = - sourcePoints_coords[neighborIndex * dim + k]; - } + if (visited.notVisited(neighborIndex)) { + visited.push_back(neighborIndex); + for (int k = 0; k < dim; ++k) { + support_coords[k] = sourcePoints_coords[neighborIndex * dim + k]; + } - Real dist = calculateDistance(target_coords, support_coords, dim); + Real dist = calculateDistance(target_coords, support_coords, dim); - if (dist <= cutoffDistance) { - count++; - queue.push_back(neighborIndex); - if (!is_build_csr_call) { - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = neighborIndex; - } + if (dist <= cutoffDistance) { + count++; + queue.push_back(neighborIndex); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = neighborIndex; } } } - } // end of while loop + } + } // end of while loop - nSupports[id] = count; - }, - "count the number of supports in each target point"); + nSupports[id] = count; + }, + "count the number of supports in each target point"); if (is_build_csr_call == false) { // print supports for target 22 parallel_for( nvertices_target, // for each target vertex which is a node for this - // calculateDistance + // calculateDistance OMEGA_H_LAMBDA(const LO id) { if (id == 623) { LO start_ptr = supports_ptr[id]; // start support location for the @@ -190,10 +191,11 @@ void FindSupports::adjBasedSearch(const Write& supports_ptr, } } -struct SupportResults { +struct SupportResults +{ Write supports_ptr; Write supports_idx; - Write radii2; // squared radii of the supports + Write radii2; // squared radii of the supports }; SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, @@ -276,29 +278,28 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); } - support.supports_ptr = - Write(nvertices_target + 1, 0, - "number of support source vertices in CSR format"); + support.supports_ptr = Write( + nvertices_target + 1, 0, "number of support source vertices in CSR format"); LO total_supports = 0; Kokkos::parallel_scan( - nvertices_target, - OMEGA_H_LAMBDA(int j, int& update, bool final) { - update += nSupports[j]; - if (final) { - support.supports_ptr[j + 1] = update; - } - }, - total_supports); + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { + update += nSupports[j]; + if (final) { + support.supports_ptr[j + 1] = update; + } + }, + total_supports); Kokkos::fence(); support.supports_idx = Write( - total_supports, 0, "index of source supports of each target node"); + total_supports, 0, "index of source supports of each target node"); - search.adjBasedSearch(support.supports_ptr, nSupports, - support.supports_idx, radii2, false); + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + radii2, false); support.radii2 = radii2; diff --git a/src/pcms/interpolator/adj_search_dega2.hpp b/src/pcms/interpolator/adj_search_dega2.hpp index 1130910a..6b3bc9d8 100644 --- a/src/pcms/interpolator/adj_search_dega2.hpp +++ b/src/pcms/interpolator/adj_search_dega2.hpp @@ -12,7 +12,8 @@ static constexpr int max_dim = 3; // TODO change this into span/mdspan OMEGA_H_INLINE -Real calculateDistance(const Real* p1, const Real* p2, const int dim) { +Real calculateDistance(const Real* p1, const Real* p2, const int dim) +{ Real dx, dy, dz; dx = p1[0] - p2[0]; dy = p1[1] - p2[1]; @@ -25,21 +26,23 @@ Real calculateDistance(const Real* p1, const Real* p2, const int dim) { return dx * dx + dy * dy + dz * dz; } -class FindSupports { - private: +class FindSupports +{ +private: Mesh& mesh; - public: +public: FindSupports(Mesh& mesh_) : mesh(mesh_) {}; void adjBasedSearch(Write& supports_ptr, Write& nSupports, - Write& support_idx, Write& radii2, + Write& support_idx, Write& radii2, bool is_build_csr_call); }; void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, Write& support_idx, Write& radii2, - bool is_build_csr_call) { + bool is_build_csr_call) +{ // Mesh Info const auto& mesh_coords = mesh.coords(); const auto& nvertices = mesh.nverts(); @@ -53,129 +56,126 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; Write cell_centroids( - dim * nfaces, 0, - "stores coordinates of cell centroid of each tri element"); + dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); parallel_for( - "calculate the centroid in each tri element", nfaces, - OMEGA_H_LAMBDA(const LO id) { - const auto current_el_verts = gather_verts<3>(faces2nodes, id); - const Omega_h::Few, 3> current_el_vert_coords = - gather_vectors<3, 2>(mesh_coords, current_el_verts); - auto centroid = average(current_el_vert_coords); - int index = dim * id; - cell_centroids[index] = centroid[0]; - cell_centroids[index + 1] = centroid[1]; - }); + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(mesh_coords, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = dim * id; + cell_centroids[index] = centroid[0]; + cell_centroids[index + 1] = centroid[1]; + }); // * Got the adj data and cell centroids parallel_for( - nvertices, // for each target vertex which is a node for this case - OMEGA_H_LAMBDA(const LO id) { - queue queue; - track visited; - const LO num_verts_in_dim = dim + 1; - Real target_coords[max_dim]; - Real support_coords[max_dim]; - Real cutoffDistance = radii2[id]; // squared radii of the supports - - //? copying the target vertex coordinates - for (LO k = 0; k < dim; ++k) { - target_coords[k] = mesh_coords[id * dim + k]; - } + nvertices, // for each target vertex which is a node for this case + OMEGA_H_LAMBDA(const LO id) { + queue queue; + track visited; + const LO num_verts_in_dim = dim + 1; + Real target_coords[max_dim]; + Real support_coords[max_dim]; + Real cutoffDistance = radii2[id]; // squared radii of the supports + + //? copying the target vertex coordinates + for (LO k = 0; k < dim; ++k) { + target_coords[k] = mesh_coords[id * dim + k]; + } - LO start_counter; // start of support idx for the current target vertex + LO start_counter; // start of support idx for the current target vertex - // not being done in the first call - // where the support_idx start for the current target vertex - if (!is_build_csr_call) { - start_counter = supports_ptr[id]; // start support location for the - // current target vertex + // not being done in the first call + // where the support_idx start for the current target vertex + if (!is_build_csr_call) { + start_counter = supports_ptr[id]; // start support location for the + // current target vertex + } + LO start_ptr = + n2f_ptr[id]; // start loc of the adj cells of the target node + LO end_ptr = + n2f_ptr[id + 1]; // end loc of the adj cells of the target node + + int count = 0; // number of supports for the current target vertex + // Initialize queue by pushing the cells in the neighborhood of the + // given target point + + for (LO i = start_ptr; i < end_ptr; + ++i) { // loop over adj cells to the target vertex + LO cell_id = n2f_data[i]; + visited.push_back(cell_id); // cell added to the visited list + + for (LO k = 0; k < dim; ++k) { // support vertex coordinates are the + // centroid of the cell + support_coords[k] = cell_centroids[cell_id * dim + k]; } - LO start_ptr = - n2f_ptr[id]; // start loc of the adj cells of the target node - LO end_ptr = - n2f_ptr[id + 1]; // end loc of the adj cells of the target node - - int count = 0; // number of supports for the current target vertex - // Initialize queue by pushing the cells in the neighborhood of the - // given target point - - for (LO i = start_ptr; i < end_ptr; - ++i) { // loop over adj cells to the target vertex - LO cell_id = n2f_data[i]; - visited.push_back(cell_id); // cell added to the visited list - - for (LO k = 0; k < dim; ++k) { // support vertex coordinates are the - // centroid of the cell - support_coords[k] = cell_centroids[cell_id * dim + k]; - } - Real dist = calculateDistance(target_coords, support_coords, dim); - if (dist <= cutoffDistance) { - count++; - queue.push_back(cell_id); - if (!is_build_csr_call) { // not being done in the first call - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = - cell_id; // add the support cell to the support_idx - } // end of support_idx check - } // end of distance check - } // end of loop over adj cells to the target vertex - - // loops over the queued cells from the neighborhood of the target - // vertex - while (!queue.isEmpty()) { // ? can queue be empty? - LO currentCell = queue.front(); - queue.pop_front(); - LO start = currentCell * - num_verts_in_dim; // start vert id of the current cell - LO end = start + num_verts_in_dim; // end vert id of the current cell - - for (LO i = start; i < end; - ++i) { // loop over the vertices of the current cell - LO current_vert_id = faces2nodes[i]; - LO start_ptr_current_vert = - n2f_ptr[current_vert_id]; // start loc of adj cells to current - // vertex - LO end_ptr_vert_current_vert = - n2f_ptr[current_vert_id + - 1]; // end loc of adj cells to current vertex - for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; - ++j) { // loop over adj cells to the current vertex - auto neighbor_cell_index = n2f_data[j]; // current cell - - // check if neighbor index is already in the queue to be checked - // TODO refactor this into a function - - if (visited.notVisited(neighbor_cell_index)) { - visited.push_back(neighbor_cell_index); - for (int k = 0; k < dim; ++k) { - support_coords[k] = - cell_centroids[neighbor_cell_index * dim + k]; - } - - Real dist = - calculateDistance(target_coords, support_coords, dim); - - if (dist <= cutoffDistance) { - count++; - queue.push_back(neighbor_cell_index); - if (!is_build_csr_call) { - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = - neighbor_cell_index; - } // end of support_idx check - } // end of distance check - } // end of not visited check - } // end of loop over adj cells to the current vertex - } // end of loop over nodes - - } // end of while loop - - nSupports[id] = count; - }, // end of lambda - "count the number of supports in each target point"); + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + queue.push_back(cell_id); + if (!is_build_csr_call) { // not being done in the first call + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = + cell_id; // add the support cell to the support_idx + } // end of support_idx check + } // end of distance check + } // end of loop over adj cells to the target vertex + + // loops over the queued cells from the neighborhood of the target + // vertex + while (!queue.isEmpty()) { // ? can queue be empty? + LO currentCell = queue.front(); + queue.pop_front(); + LO start = + currentCell * num_verts_in_dim; // start vert id of the current cell + LO end = start + num_verts_in_dim; // end vert id of the current cell + + for (LO i = start; i < end; + ++i) { // loop over the vertices of the current cell + LO current_vert_id = faces2nodes[i]; + LO start_ptr_current_vert = + n2f_ptr[current_vert_id]; // start loc of adj cells to current + // vertex + LO end_ptr_vert_current_vert = + n2f_ptr[current_vert_id + + 1]; // end loc of adj cells to current vertex + for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; + ++j) { // loop over adj cells to the current vertex + auto neighbor_cell_index = n2f_data[j]; // current cell + + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function + + if (visited.notVisited(neighbor_cell_index)) { + visited.push_back(neighbor_cell_index); + for (int k = 0; k < dim; ++k) { + support_coords[k] = + cell_centroids[neighbor_cell_index * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + + if (dist <= cutoffDistance) { + count++; + queue.push_back(neighbor_cell_index); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = neighbor_cell_index; + } // end of support_idx check + } // end of distance check + } // end of not visited check + } // end of loop over adj cells to the current vertex + } // end of loop over nodes + + } // end of while loop + + nSupports[id] = count; + }, // end of lambda + "count the number of supports in each target point"); if (is_build_csr_call == false) { // print supports for target 22 @@ -184,12 +184,12 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, // calculateDistance OMEGA_H_LAMBDA(const LO id) { if (id == 22) { - LO start_ptr = - supports_ptr[id]; // start support location for the current target vertex - LO end_ptr = - supports_ptr[id + 1]; // end support location for the current target vertex - printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", id, - end_ptr - start_ptr, nSupports[id]); + LO start_ptr = supports_ptr[id]; // start support location for the + // current target vertex + LO end_ptr = supports_ptr[id + 1]; // end support location for the + // current target vertex + printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", + id, end_ptr - start_ptr, nSupports[id]); for (LO i = start_ptr; i < end_ptr; ++i) { // loop over adj cells to the target vertex LO cell_id = support_idx[i]; @@ -202,13 +202,15 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, } } -struct SupportResults { +struct SupportResults +{ Write supports_ptr; Write supports_idx; - Write radii2; // squared radii of the supports + Write radii2; // squared radii of the supports }; -SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, LO min_support) { +SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, LO min_support) +{ SupportResults support; FindSupports search(mesh); @@ -220,7 +222,7 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, LO min_support) printf("Inside searchNeighbors 1\n"); Write radii2 = Write(nvertices_target, cutoffDistance, - "squared radii of the supports"); + "squared radii of the supports"); printf("cut off distance: %f\n", cutoffDistance); // this call gets the number of supports for each target vertex: nSupports and // the squared radii of the supports (with might be increased in the search to @@ -232,13 +234,13 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, LO min_support) Kokkos::parallel_reduce( "find max radius", nvertices_target, OMEGA_H_LAMBDA(const LO i, Real& local_max) { - local_max = - (radii2[i] > local_max) ? radii2[i] : local_max; + local_max = (radii2[i] > local_max) ? radii2[i] : local_max; }, Kokkos::Max(max_radius)); printf("Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); - nSupports = Write(nvertices_target, 0, "number of supports in each target vertex"); + nSupports = Write(nvertices_target, 0, + "number of supports in each target vertex"); SupportResults support; // create support every time to avoid complexity search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, @@ -279,8 +281,9 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, LO min_support) printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); - //search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - // radii2, true); + // search.adjBasedSearch(support.supports_ptr, nSupports, + // support.supports_idx, + // radii2, true); // offset array for the supports of each target vertex support.supports_ptr = Write( diff --git a/src/pcms/interpolator/linear_interpolant.hpp b/src/pcms/interpolator/linear_interpolant.hpp index 5abde69f..fb91336e 100644 --- a/src/pcms/interpolator/linear_interpolant.hpp +++ b/src/pcms/interpolator/linear_interpolant.hpp @@ -1,183 +1,186 @@ -#ifndef INTERPOLANT_HPP -#define INTERPOLANT_HPP - +#ifndef INTERPOLANT_HPP +#define INTERPOLANT_HPP #include #include "multidimarray.hpp" #define MAX_DIM 10 - KOKKOS_INLINE_FUNCTION -void find_indices(const IntVecView& num_bins, const RealVecView& range, - const RealVecView& point , int* indices){ - int dim = point.extent(0); - // IntVecView indices("parametric coordinate", dim); - for (int i = 0; i < dim; ++i){ - int id = i * 2; - double length = range(id + 1) - range(id); - double dlen = length/num_bins(i); - indices[i] = ((point(i) - range(id)) / dlen); - - } - - +void find_indices(const IntVecView& num_bins, const RealVecView& range, + const RealVecView& point, int* indices) +{ + int dim = point.extent(0); + // IntVecView indices("parametric coordinate", dim); + for (int i = 0; i < dim; ++i) { + int id = i * 2; + double length = range(id + 1) - range(id); + double dlen = length / num_bins(i); + indices[i] = ((point(i) - range(id)) / dlen); + } } - KOKKOS_INLINE_FUNCTION -void find_limits(const IntVecView& num_bins, const RealVecView& range, const int* indices, double* limits){ - int dim = num_bins.extent(0); - // RealVecView limits("limits", 2*dim); - for (int i = 0; i < dim; ++i){ - int ptr = i * 2; - double dlen = (range(ptr+1) - range(ptr))/num_bins(i); - limits[ptr] = indices[i] * dlen; - limits[ptr+1] = limits[ptr] + dlen; - } +void find_limits(const IntVecView& num_bins, const RealVecView& range, + const int* indices, double* limits) +{ + int dim = num_bins.extent(0); + // RealVecView limits("limits", 2*dim); + for (int i = 0; i < dim; ++i) { + int ptr = i * 2; + double dlen = (range(ptr + 1) - range(ptr)) / num_bins(i); + limits[ptr] = indices[i] * dlen; + limits[ptr + 1] = limits[ptr] + dlen; + } } - - KOKKOS_INLINE_FUNCTION -void evaluateParametricCoord(const RealVecView& point, const double* limits , double* parametric_coord){ - int dim = point.extent(0); - // RealVecView parametric_coord("parametric coordinate", dim); - for (int i = 0; i < dim; ++i){ - int index = i * 2; - parametric_coord[i] = (point(i) - limits[index])/(limits[index+1] - limits[index]); - - } +void evaluateParametricCoord(const RealVecView& point, const double* limits, + double* parametric_coord) +{ + int dim = point.extent(0); + // RealVecView parametric_coord("parametric coordinate", dim); + for (int i = 0; i < dim; ++i) { + int index = i * 2; + parametric_coord[i] = + (point(i) - limits[index]) / (limits[index + 1] - limits[index]); + } } - - KOKKOS_INLINE_FUNCTION -void basis_function(const RealVecView& parametric_coord, double* linear_basis_each_dir){ - int dim = parametric_coord.extent(0); - // RealVecView linear_basis_each_dir("basis function each direction", 2*dim); - for (int i = 0; i < dim; ++i){ - int index = i * 2; - - linear_basis_each_dir[index] = 1 - parametric_coord(i); - - linear_basis_each_dir[index + 1] = parametric_coord(i); - - - } - +void basis_function(const RealVecView& parametric_coord, + double* linear_basis_each_dir) +{ + int dim = parametric_coord.extent(0); + // RealVecView linear_basis_each_dir("basis function each direction", 2*dim); + for (int i = 0; i < dim; ++i) { + int index = i * 2; + + linear_basis_each_dir[index] = 1 - parametric_coord(i); + + linear_basis_each_dir[index + 1] = parametric_coord(i); + } } - KOKKOS_INLINE_FUNCTION -double linear_interpolant(const IntVecView& dimensions, const double* linear_basis_each_dir, const IntVecView& indices, const RealVecView& values){ - int dim = dimensions.extent(0); - double sum = 0; - int ids[MAX_DIM]; - - int array_size = 1 << dim; - for (int i = 0; i < array_size; ++i){ - double temp = 1.0; - for (int j = 0; j < dim; ++j){ - int index = 2*j; - int id_left = indices(j); - int id_right = id_left + 1; - if (i & (1 << j)){ - temp *= linear_basis_each_dir[index+1]; - ids[j] = id_right; - } else { - temp *= linear_basis_each_dir[index]; - ids[j] = id_left; - } - } - - int idx = calculateIndex(dimensions, ids); - double corner_values = values(idx); - - sum += temp*corner_values; +double linear_interpolant(const IntVecView& dimensions, + const double* linear_basis_each_dir, + const IntVecView& indices, const RealVecView& values) +{ + int dim = dimensions.extent(0); + double sum = 0; + int ids[MAX_DIM]; + + int array_size = 1 << dim; + for (int i = 0; i < array_size; ++i) { + double temp = 1.0; + for (int j = 0; j < dim; ++j) { + int index = 2 * j; + int id_left = indices(j); + int id_right = id_left + 1; + if (i & (1 << j)) { + temp *= linear_basis_each_dir[index + 1]; + ids[j] = id_right; + } else { + temp *= linear_basis_each_dir[index]; + ids[j] = id_left; + } } - return sum; -} + int idx = calculateIndex(dimensions, ids); + double corner_values = values(idx); + sum += temp * corner_values; + } + return sum; +} -class RegularGridInterpolator { - private: - const RealMatView parametric_coords; - const RealVecView values; - const IntMatView indices; - const IntVecView dimensions; - - public: - RegularGridInterpolator(const RealMatView& parametric_coords_, - const RealVecView& values_, const IntMatView& indices_, const IntVecView& dimensions_) - : parametric_coords(parametric_coords_), values(values_), indices(indices_), dimensions(dimensions_) {}; - -RealVecView linear_interpolation() { - int dim = dimensions.extent(0); - int N = parametric_coords.extent(0); - RealVecView interpolated_values("approximated values", N); - auto parametric_coords_ = parametric_coords; - auto dimensions_ = dimensions; - auto values_ = values; - auto indices_ = indices; - Kokkos::parallel_for("linear interpolation function",N, KOKKOS_LAMBDA(int j){ - double linear_basis_each_dir[MAX_DIM] = {0.0}; - auto parametric_coord_each_point = Kokkos::subview(parametric_coords_, j, Kokkos::ALL()); - auto index_each_point = Kokkos::subview(indices_, j , Kokkos::ALL()); - basis_function(parametric_coord_each_point, linear_basis_each_dir); - auto approx_value = linear_interpolant(dimensions_, linear_basis_each_dir,index_each_point, values_); - interpolated_values(j) = approx_value; - }); - - return interpolated_values; - } +class RegularGridInterpolator +{ +private: + const RealMatView parametric_coords; + const RealVecView values; + const IntMatView indices; + const IntVecView dimensions; + +public: + RegularGridInterpolator(const RealMatView& parametric_coords_, + const RealVecView& values_, + const IntMatView& indices_, + const IntVecView& dimensions_) + : parametric_coords(parametric_coords_), + values(values_), + indices(indices_), + dimensions(dimensions_) {}; + + RealVecView linear_interpolation() + { + int dim = dimensions.extent(0); + int N = parametric_coords.extent(0); + RealVecView interpolated_values("approximated values", N); + auto parametric_coords_ = parametric_coords; + auto dimensions_ = dimensions; + auto values_ = values; + auto indices_ = indices; + Kokkos::parallel_for( + "linear interpolation function", N, KOKKOS_LAMBDA(int j) { + double linear_basis_each_dir[MAX_DIM] = {0.0}; + auto parametric_coord_each_point = + Kokkos::subview(parametric_coords_, j, Kokkos::ALL()); + auto index_each_point = Kokkos::subview(indices_, j, Kokkos::ALL()); + basis_function(parametric_coord_each_point, linear_basis_each_dir); + auto approx_value = linear_interpolant( + dimensions_, linear_basis_each_dir, index_each_point, values_); + interpolated_values(j) = approx_value; + }); + + return interpolated_values; + } }; +struct Result +{ + IntMatView indices_pts; + RealMatView parametric_coords; +}; -struct Result{ - IntMatView indices_pts; - RealMatView parametric_coords; -}; - -Result parametric_indices(const RealMatView& points, const IntVecView& num_bins, const RealVecView& range){ - - int dim = num_bins.extent(0); - Result result; - int N = points.extent(0); - result.indices_pts = IntMatView("indices",N,dim); - result.parametric_coords = RealMatView("parametric_coordinates",N, dim); - - Kokkos::parallel_for(N, KOKKOS_LAMBDA(int j){ - int indcs[MAX_DIM] = {0}; - double limits[MAX_DIM] = {0.0}; - double parametric_coord_each[MAX_DIM] = {0.0}; - auto point_coord = Kokkos::subview(points, j , Kokkos::ALL()); - find_indices(num_bins, range, point_coord, indcs); - find_limits(num_bins, range, indcs, limits); - evaluateParametricCoord( point_coord, limits, parametric_coord_each); - for (int i = 0; i < dim; ++i){ - result.indices_pts(j,i) = indcs[i]; - result.parametric_coords(j,i) = parametric_coord_each[i]; - } - +Result parametric_indices(const RealMatView& points, const IntVecView& num_bins, + const RealVecView& range) +{ + + int dim = num_bins.extent(0); + Result result; + int N = points.extent(0); + result.indices_pts = IntMatView("indices", N, dim); + result.parametric_coords = RealMatView("parametric_coordinates", N, dim); + + Kokkos::parallel_for( + N, KOKKOS_LAMBDA(int j) { + int indcs[MAX_DIM] = {0}; + double limits[MAX_DIM] = {0.0}; + double parametric_coord_each[MAX_DIM] = {0.0}; + auto point_coord = Kokkos::subview(points, j, Kokkos::ALL()); + find_indices(num_bins, range, point_coord, indcs); + find_limits(num_bins, range, indcs, limits); + evaluateParametricCoord(point_coord, limits, parametric_coord_each); + for (int i = 0; i < dim; ++i) { + result.indices_pts(j, i) = indcs[i]; + result.parametric_coords(j, i) = parametric_coord_each[i]; + } }); - return result; - + return result; } KOKKOS_INLINE_FUNCTION -double test_function(double* coord){ - double fun_value = 0; - int dim = 5; - for (int i = 0; i < dim; ++i){ - fun_value += coord[i]; - } - return fun_value; - } +double test_function(double* coord) +{ + double fun_value = 0; + int dim = 5; + for (int i = 0; i < dim; ++i) { + fun_value += coord[i]; + } + return fun_value; +} #endif - - - diff --git a/src/pcms/interpolator/multidimarray.hpp b/src/pcms/interpolator/multidimarray.hpp index 04b6fac1..1423f45c 100644 --- a/src/pcms/interpolator/multidimarray.hpp +++ b/src/pcms/interpolator/multidimarray.hpp @@ -3,34 +3,34 @@ #include - -using RealMatView = Kokkos::View; -using IntMatView = Kokkos::View; +using RealMatView = Kokkos::View; +using IntMatView = Kokkos::View; using RealVecView = Kokkos::View; using IntVecView = Kokkos::View; using HostIntVecView = Kokkos::View; KOKKOS_INLINE_FUNCTION -int calculateIndex(const IntVecView& dimensions, const int* indices) { - - int dim = dimensions.extent(0); - int index = 0; - int multiplier = 1; - for (int i = dim - 1; i >= 0; --i) { - index += indices[i] * multiplier; - multiplier *= dimensions(i); - } - return index; -} +int calculateIndex(const IntVecView& dimensions, const int* indices) +{ + int dim = dimensions.extent(0); + int index = 0; + int multiplier = 1; + for (int i = dim - 1; i >= 0; --i) { + index += indices[i] * multiplier; + multiplier *= dimensions(i); + } + return index; +} -int calculateTotalSize(const HostIntVecView& dimensions){ - int dim = dimensions.extent(0); - int size = 1; - for (int i = 0; i < dim; ++i){ - size *= dimensions(i); - } - return size; +int calculateTotalSize(const HostIntVecView& dimensions) +{ + int dim = dimensions.extent(0); + int size = 1; + for (int i = 0; i < dim; ++i) { + size *= dimensions(i); + } + return size; } -#endif +#endif diff --git a/src/pcms/interpolator/points.hpp b/src/pcms/interpolator/points.hpp index 7bc9d019..25d074c1 100644 --- a/src/pcms/interpolator/points.hpp +++ b/src/pcms/interpolator/points.hpp @@ -4,7 +4,8 @@ #include #include -struct Coord { +struct Coord +{ double x, y, z; }; @@ -14,19 +15,20 @@ using member_type = typename Kokkos::TeamPolicy<>::member_type; // alias for scratch view using ScratchSpace = - typename Kokkos::DefaultExecutionSpace::scratch_memory_space; + typename Kokkos::DefaultExecutionSpace::scratch_memory_space; using ScratchMatView = - typename Kokkos::View>; + typename Kokkos::View>; using ScratchVecView = - typename Kokkos::View>; + typename Kokkos::View>; using PointsViewType = Kokkos::View; using MatViewType = Kokkos::View; -struct Points { +struct Points +{ PointsViewType coordinates; }; diff --git a/src/pcms/interpolator/queue_visited.hpp b/src/pcms/interpolator/queue_visited.hpp index e236f6d6..daa2b716 100644 --- a/src/pcms/interpolator/queue_visited.hpp +++ b/src/pcms/interpolator/queue_visited.hpp @@ -12,12 +12,13 @@ using namespace std; using namespace Omega_h; -class queue { - private: +class queue +{ +private: LO queue_array[MAX_SIZE_QUEUE]; int first = 0, last = -1, count = 0; - public: +public: OMEGA_H_INLINE queue() {} @@ -40,12 +41,13 @@ class queue { bool isFull() const; }; -class track { - private: +class track +{ +private: LO tracking_array[MAX_SIZE_TRACK]; int first = 0, last = -1, count = 0; - public: +public: OMEGA_H_INLINE track() {} @@ -63,7 +65,8 @@ class track { }; OMEGA_H_INLINE -void queue::push_back(const int& item) { +void queue::push_back(const int& item) +{ if (count == MAX_SIZE_QUEUE) { printf("queue is full %d\n", count); return; @@ -74,7 +77,8 @@ void queue::push_back(const int& item) { } OMEGA_H_INLINE -void queue::pop_front() { +void queue::pop_front() +{ if (count == 0) { printf("queue is empty\n"); return; @@ -84,16 +88,26 @@ void queue::pop_front() { } OMEGA_H_INLINE -int queue::front() { return queue_array[first]; } +int queue::front() +{ + return queue_array[first]; +} OMEGA_H_INLINE -bool queue::isEmpty() const { return count == 0; } +bool queue::isEmpty() const +{ + return count == 0; +} OMEGA_H_INLINE -bool queue::isFull() const { return count == MAX_SIZE_QUEUE; } +bool queue::isFull() const +{ + return count == MAX_SIZE_QUEUE; +} OMEGA_H_INLINE -void track::push_back(const int& item) { +void track::push_back(const int& item) +{ if (count == MAX_SIZE_TRACK) { printf("track is full %d\n", count); return; @@ -104,7 +118,8 @@ void track::push_back(const int& item) { } OMEGA_H_INLINE -bool track::notVisited(const int& item) { +bool track::notVisited(const int& item) +{ int id; for (int i = 0; i < count; ++i) { id = (first + i) % MAX_SIZE_TRACK; @@ -116,6 +131,9 @@ bool track::notVisited(const int& item) { } OMEGA_H_INLINE -int track::size() { return count; } +int track::size() +{ + return count; +} #endif From a7b5e332084cfd7406f27a7e4201a279010a287a Mon Sep 17 00:00:00 2001 From: Fuad Hasibul Hasan Date: Tue, 29 Oct 2024 14:09:28 -0400 Subject: [PATCH 28/66] revert clangformatting in CMakeLists.txt and remove const from support.ptr --- src/pcms/interpolator/CMakeLists.txt | 47 +++++++++++++++++----------- src/pcms/interpolator/adj_search.hpp | 4 +-- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index dd589789..6c524e42 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -1,28 +1,37 @@ cmake_minimum_required(VERSION 3.20) - project(Interpolator) +project(Interpolator) - set(HEADER_FILES adj_search_dega2.hpp MLSInterpolation.hpp points - .hpp adj_search.hpp MLSCoefficients.hpp queue_visited - .hpp linear_interpolant.hpp multidimarray.hpp) +set(HEADER_FILES + adj_search_dega2.hpp + MLSInterpolation.hpp + points.hpp + adj_search.hpp + MLSCoefficients.hpp + queue_visited.hpp + linear_interpolant.hpp + multidimarray.hpp +) - install(FILES ${HEADER_FILES} DESTINATION include / pcms / interpolator) +install(FILES ${HEADER_FILES} DESTINATION include/pcms/interpolator) - add_library(pcms_interpolator INTERFACE) +add_library(pcms_interpolator INTERFACE) - target_link_libraries(pcms_interpolator INTERFACE pcms::core) +target_link_libraries(pcms_interpolator INTERFACE pcms::core) - target_include_directories( - pcms_interpolator INTERFACE - $ - $ - $) +target_include_directories(pcms_interpolator INTERFACE + $ + $ + $) - install(TARGETS pcms_interpolator EXPORT interpolatorTargets - INCLUDES DESTINATION include / - pcms / interpolator) - install(EXPORT interpolatorTargets FILE interpolatorTargets - .cmake NAMESPACE pcms::DESTINATION lib / - cmake / pcms) +install(TARGETS pcms_interpolator + EXPORT interpolatorTargets + INCLUDES DESTINATION include/pcms/interpolator) + + +install(EXPORT interpolatorTargets + FILE interpolatorTargets.cmake + NAMESPACE pcms:: + DESTINATION lib/cmake/pcms) + diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index 9643695b..ce826117 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -35,12 +35,12 @@ class FindSupports FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; - void adjBasedSearch(const Write& supports_ptr, Write& nSupports, + void adjBasedSearch(Write& supports_ptr, Write& nSupports, Write& support_idx, Write& radii2, bool is_build_csr_call); }; -void FindSupports::adjBasedSearch(const Write& supports_ptr, +void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, Write& support_idx, Write& radii2, bool is_build_csr_call) { From 1f60fe1f41ebea9afcea4ce857e6d27ad9693965 Mon Sep 17 00:00:00 2001 From: Fuad Hasibul Hasan Date: Thu, 31 Oct 2024 17:25:32 -0400 Subject: [PATCH 29/66] merge centroid->node interp with general added some checks too that ends up breaking the code and some debugging helper functions --- src/pcms/interpolator/CMakeLists.txt | 1 - src/pcms/interpolator/MLSCoefficients.hpp | 49 ++ src/pcms/interpolator/MLSInterpolation.hpp | 20 +- src/pcms/interpolator/adj_search.hpp | 374 +++++++++++++-- src/pcms/interpolator/adj_search_dega2.hpp | 324 ------------- test/test_rbf_interp.cpp | 532 ++++++++++----------- 6 files changed, 673 insertions(+), 627 deletions(-) delete mode 100644 src/pcms/interpolator/adj_search_dega2.hpp diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 6c524e42..683ec387 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -3,7 +3,6 @@ cmake_minimum_required(VERSION 3.20) project(Interpolator) set(HEADER_FILES - adj_search_dega2.hpp MLSInterpolation.hpp points.hpp adj_search.hpp diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp index 95fb0f3d..ccef208b 100644 --- a/src/pcms/interpolator/MLSCoefficients.hpp +++ b/src/pcms/interpolator/MLSCoefficients.hpp @@ -2,6 +2,7 @@ #define MLS_COEFFICIENTS_HPP #include +#include #include "points.hpp" @@ -102,6 +103,9 @@ double rbf(double r_sq, double rho_sq) { double phi; double r = sqrt(r_sq); + OMEGA_H_CHECK_PRINTF( + rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n", + rho_sq); double rho = sqrt(rho_sq); double ratio = r / rho; double limit = 1 - ratio; @@ -114,6 +118,9 @@ double rbf(double r_sq, double rho_sq) phi = phi * pow(limit, 6); } + OMEGA_H_CHECK_PRINTF(!std::isnan(phi), + "ERROR: phi in rbf is NaN. r_sq, rho_sq = (%f, %f)\n", + r_sq, rho_sq); return phi; } @@ -146,7 +153,14 @@ void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, ScratchVecView vandermonde_mat_row = Kokkos::subview(V, j, Kokkos::ALL()); for (int k = 0; k < N; k++) { + OMEGA_H_CHECK_PRINTF(!std::isnan(vandermonde_mat_row(k)), + "ERROR: vandermonde_mat_row is NaN for k = %d\n", k); + OMEGA_H_CHECK_PRINTF(!std::isnan(Phi(j)), + "ERROR: Phi(j) in PTphiMatrix is NaN for j = %d\n", j); pt_phi(k, j) = vandermonde_mat_row(k) * Phi(j); + OMEGA_H_CHECK_PRINTF( + !std::isnan(pt_phi(k, j)), + "ERROR: pt_phi in PTphiMatrix is NaN for k = %d, j = %d\n", k, j); } } @@ -161,6 +175,10 @@ void PhiVector(ScratchVecView Phi, const Coord target_point, double dy = target_point.y - local_source_points(j, 1); double ds_sq = dx * dx + dy * dy; Phi(j) = rbf(ds_sq, cuttoff_dis_sq); + OMEGA_H_CHECK_PRINTF(!std::isnan(Phi(j)), + "ERROR: Phi(j) in PhiVector is NaN for j = %d " + "ds_sq=%.16f, cuttoff_dis_sq=%.16f", + j, ds_sq, cuttoff_dis_sq); } // matrix matrix multiplication @@ -178,9 +196,15 @@ void MatMatMul(member_type team, ScratchMatView& moment_matrix, Kokkos::parallel_reduce( Kokkos::ThreadVectorRange(team, K), [=](const int k, double& lsum) { + OMEGA_H_CHECK_PRINTF(!std::isnan(pt_phi(i, k)), + "ERROR: pt_phi is NaN for i = %d\n", i); + OMEGA_H_CHECK_PRINTF(!std::isnan(vandermonde(k, j)), + "ERROR: vandermonde is NaN for k = %d\n", k); lsum += pt_phi(i, k) * vandermonde(k, j); }, sum); + OMEGA_H_CHECK_PRINTF(!std::isnan(sum), + "ERROR: sum is NaN for i = %d, j = %d\n", i, j); moment_matrix(i, j) = sum; }); }); @@ -200,6 +224,8 @@ void MatVecMul(member_type team, const ScratchVecView& vector, [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, sum); result(i) = sum; + OMEGA_H_CHECK_PRINTF(!std::isnan(result(i)), + "ERROR: sum is NaN for i = %d\n", i); }); // team.team_barrier(); } @@ -213,6 +239,8 @@ void dot_product(member_type team, const ScratchVecView& result_sub, for (int j = 0; j < N; ++j) { target_value += result_sub(j) * SupportValues_sub[j]; } + OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), + "ERROR: NaN found in dot_product: N: %d\n", N); } // moment matrix @@ -253,7 +281,16 @@ void inverse_matrix(member_type team, const ScratchMatView& matrix, for (int k = 0; k < j; ++k) { sum += lower(j, k) * lower(j, k); } + OMEGA_H_CHECK_PRINTF(!std::isnan(matrix(j, j)), + "ERROR: matrix(j,j) is NaN: j = %d\n", j); + OMEGA_H_CHECK_PRINTF( + matrix(j, j) - sum >= -1e-10, // TODO: how to check this reliably? + "ERROR: (matrix(j,j) - sum) is negative: mat(jj)=%.16f, sum = %.16f \n", + matrix(j, j), sum); lower(j, j) = sqrt(matrix(j, j) - sum); + OMEGA_H_CHECK_PRINTF(!std::isnan(lower(j, j)), + "Lower in inverse_matrix is NaN (j,j) = (%d,%d)\n", + j, j); }); team.team_barrier(); @@ -265,6 +302,12 @@ void inverse_matrix(member_type team, const ScratchMatView& matrix, } lower(i, j) = (matrix(i, j) - inner_sum) / lower(j, j); lower(j, i) = lower(i, j); + OMEGA_H_CHECK_PRINTF(!std::isnan(lower(i, j)), + "Lower in inverse_matrix is NaN (i,j) = (%d,%d)\n", + i, j); + OMEGA_H_CHECK_PRINTF(!std::isnan(lower(j, i)), + "Lower in inverse_matrix is NaN (j,i) = (%d,%d)\n", + j, i); }); team.team_barrier(); @@ -278,6 +321,9 @@ void inverse_matrix(member_type team, const ScratchMatView& matrix, forward_matrix(j, i) -= lower(j, k) * forward_matrix(k, i); } forward_matrix(j, i) /= lower(j, j); + OMEGA_H_CHECK_PRINTF(!std::isnan(forward_matrix(j, i)), + "Forward in inverse_matrix is NaN (j,i) = (%d,%d)\n", + j, i); } }); @@ -291,6 +337,9 @@ void inverse_matrix(member_type team, const ScratchMatView& matrix, solution(j, i) -= lower(j, k) * solution(k, i); } solution(j, i) /= lower(j, j); + OMEGA_H_CHECK_PRINTF( + !std::isnan(solution(j, i)), + "Solution in inverse_matrix is NaN (j,i) = (%d,%d)\n", j, i); } }); } diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp index 98e5bb82..5ebab07a 100644 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -6,7 +6,6 @@ #include #include using namespace Omega_h; -using namespace pcms; Write mls_interpolation(const Reals source_values, const Reals source_coordinates, @@ -186,6 +185,10 @@ Write mls_interpolation(const Reals source_values, Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + OMEGA_H_CHECK_PRINTF( + radii2[i] > 0, + "ERROR: radius2 has to be positive but found to be %.16f\n", + radii2[i]); PhiVector(Phi, target_point, local_source_points, j, radii2[i]); }); @@ -194,10 +197,19 @@ Write mls_interpolation(const Reals source_values, Kokkos::parallel_reduce( Kokkos::TeamThreadRange(team, nsupports), [=](const int j, double& lsum) { lsum += Phi(j); }, sum_phi); + OMEGA_H_CHECK_PRINTF(!std::isnan(sum_phi), + "ERROR: sum_phi is NaN for i=%d\n", i); + OMEGA_H_CHECK_PRINTF(sum_phi != 0, "ERROR: sum_phi is zero for i=%d\n", + i); // normalize phi with sum_phi - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { Phi(j) = Phi(j) / sum_phi; }); + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + OMEGA_H_CHECK_PRINTF( + !std::isnan(Phi(j)), + "ERROR: Phi(j) is NaN before normalization for j = %d\n", j); + Phi(j) = Phi(j) / sum_phi; + }); team.team_barrier(); @@ -222,6 +234,8 @@ Write mls_interpolation(const Reals source_values, Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { SupportValues(i) = source_values[support.supports_idx[start_ptr + i]]; + OMEGA_H_CHECK_PRINTF(!std::isnan(SupportValues(i)), + "ERROR: NaN found: at support %d\n", i); }); double tgt_value = 0; diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index ce826117..94bff454 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -25,25 +25,75 @@ Real calculateDistance(const Real* p1, const Real* p2, const int dim) return dx * dx + dy * dy + dz * dz; } +void checkTargetPoints( + const Kokkos::View& results) +{ + Kokkos::fence(); + printf("INFO: Checking target points...\n"); + auto check_target_points = OMEGA_H_LAMBDA(LO i) + { + if (results(i).tri_id < 0) { + OMEGA_H_CHECK_PRINTF(results(i).tri_id >= 0, + "ERROR: Source cell id not found for target %d\n", + i); + printf("%d, ", i); + } + }; + parallel_for(results.size(), check_target_points, "check_target_points"); + Kokkos::fence(); + printf("\n"); +} + +void printSupportsForTarget(const LO target_id, const Write& supports_ptr, + const Write& nSupports, + const Write& support_idx) +{ + parallel_for( + nSupports.size(), OMEGA_H_LAMBDA(const LO id) { + if (id == target_id) { + LO start_ptr = supports_ptr[id]; // start support location for the + // current target vertex + LO end_ptr = + supports_ptr[id + + 1]; // end support location for the current target vertex + printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", id, + end_ptr - start_ptr, nSupports[id]); + for (LO i = start_ptr; i < end_ptr; + ++i) { // loop over adj cells to the target vertex + LO cell_id = support_idx[i]; + printf(", %d", cell_id); + } + printf("\n"); + } + }); +} + class FindSupports { private: Mesh& source_mesh; - Mesh& target_mesh; + Mesh& target_mesh; // TODO it's null when one mesh is used public: FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; + FindSupports(Mesh& mesh_) : source_mesh(mesh_), target_mesh(mesh_) {}; + void adjBasedSearch(Write& supports_ptr, Write& nSupports, Write& support_idx, Write& radii2, bool is_build_csr_call); + + void adjBasedSearchCentroidNodes(Write& supports_ptr, + Write& nSupports, Write& support_idx, + Write& radii2, bool is_build_csr_call); }; -void FindSupports::adjBasedSearch(Write& supports_ptr, - Write& nSupports, Write& support_idx, - Write& radii2, bool is_build_csr_call) +void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, + Write& support_idx, Write& radii2, + bool is_build_csr_call) { + // Source Mesh Info const auto& sourcePoints_coords = source_mesh.coords(); @@ -53,6 +103,7 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, // Target Mesh Info const auto& targetPoints_coords = target_mesh.coords(); const auto nvertices_target = target_mesh.nverts(); + OMEGA_H_CHECK(radii2.size() == nvertices_target); // CSR data structure of adjacent vertex information of each source vertex const auto& vert2vert = source_mesh.ask_star(VERT); @@ -74,11 +125,13 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, target_points(i, 1) = targetPoints_coords[i * dim + 1]; }); Kokkos::fence(); - pcms::GridPointSearch search_cell(source_mesh, 10, 10); + pcms::GridPointSearch search_cell(source_mesh, 1000, 1000); // get the cell id for each target point auto results = search_cell(target_points); + checkTargetPoints(results); + parallel_for( nvertices_target, OMEGA_H_LAMBDA(const LO id) { @@ -87,6 +140,10 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Real cutoffDistance = radii2[id]; LO source_cell_id = results(id).tri_id; + OMEGA_H_CHECK_PRINTF( + source_cell_id >= 0, + "ERROR: Source cell id not found for target %d (%f,%f)\n", id, + target_points(id, 0), target_points(id, 1)); const LO num_verts_in_dim = dim + 1; @@ -123,6 +180,15 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Real dist = calculateDistance(target_coords, support_coords, dim); if (dist <= cutoffDistance) { count++; + if (count >= 500) { + printf( + "Warning: count exceeds 500 for target %d with %d supports\n", id, + end_ptr - start_ptr); + printf("Warning: Target %d: coors: (%f, %f) and support %d: " + "coords: (%f, %f)\n", + id, target_coords[0], target_coords[1], vert_id, + support_coords[0], support_coords[1]); + } queue.push_back(vert_id); if (!is_build_csr_call) { LO idx_count = count - 1; @@ -153,6 +219,11 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, if (dist <= cutoffDistance) { count++; + if (count >= 500) { + // printf("Warning qne: count exceeds 500 for target %d with + // start %d and end %d radius2 %f adding neighbor %d\n", id, + // start, end, cutoffDistance, neighborIndex); + } queue.push_back(neighborIndex); if (!is_build_csr_call) { LO idx_count = count - 1; @@ -167,27 +238,153 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, }, "count the number of supports in each target point"); if (is_build_csr_call == false) { - // print supports for target 22 - parallel_for( - nvertices_target, // for each target vertex which is a node for this - // calculateDistance - OMEGA_H_LAMBDA(const LO id) { - if (id == 623) { - LO start_ptr = supports_ptr[id]; // start support location for the - // current target vertex - LO end_ptr = supports_ptr[id + 1]; // end support location for the - // current target vertex - printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", - id, end_ptr - start_ptr, nSupports[id]); - for (LO i = start_ptr; i < end_ptr; - ++i) { // loop over adj cells to the target vertex - LO cell_id = support_idx[i]; - printf(", %d", cell_id); - } - printf("\n"); + // printSupportsForTarget(2057, supports_ptr, nSupports, support_idx); + } +} + +void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, + Write& nSupports, + Write& support_idx, + Write& radii2, + bool is_build_csr_call) +{ + // Mesh Info + const auto& mesh_coords = source_mesh.coords(); + const auto& nvertices = source_mesh.nverts(); + const auto& dim = source_mesh.dim(); + const auto& nfaces = source_mesh.nfaces(); + + // CSR data structure of adjacent cell information of each vertex in a + // source_mesh + const auto& nodes2faces = source_mesh.ask_up(VERT, FACE); + const auto& n2f_ptr = nodes2faces.a2ab; + const auto& n2f_data = nodes2faces.ab2b; + const auto& faces2nodes = source_mesh.ask_down(FACE, VERT).ab2b; + + Write cell_centroids( + dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); + + parallel_for( + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(mesh_coords, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = dim * id; + cell_centroids[index] = centroid[0]; + cell_centroids[index + 1] = centroid[1]; + }); + // * Got the adj data and cell centroids + + parallel_for( + nvertices, // for each target vertex which is a node for this case + OMEGA_H_LAMBDA(const LO id) { + queue queue; + track visited; + const LO num_verts_in_dim = dim + 1; + Real target_coords[max_dim]; + Real support_coords[max_dim]; + Real cutoffDistance = radii2[id]; // squared radii of the supports + + //? copying the target vertex coordinates + for (LO k = 0; k < dim; ++k) { + target_coords[k] = mesh_coords[id * dim + k]; + } + + LO start_counter; // start of support idx for the current target vertex + + // not being done in the first call + // where the support_idx start for the current target vertex + if (!is_build_csr_call) { + start_counter = supports_ptr[id]; // start support location for the + // current target vertex + } + LO start_ptr = + n2f_ptr[id]; // start loc of the adj cells of the target node + LO end_ptr = + n2f_ptr[id + 1]; // end loc of the adj cells of the target node + + int count = 0; // number of supports for the current target vertex + // Initialize queue by pushing the cells in the neighborhood of the + // given target point + + for (LO i = start_ptr; i < end_ptr; + ++i) { // loop over adj cells to the target vertex + LO cell_id = n2f_data[i]; + visited.push_back(cell_id); // cell added to the visited list + + for (LO k = 0; k < dim; ++k) { // support vertex coordinates are the + // centroid of the cell + support_coords[k] = cell_centroids[cell_id * dim + k]; } - } // end of lambda - ); + + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + queue.push_back(cell_id); + if (!is_build_csr_call) { // not being done in the first call + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = + cell_id; // add the support cell to the support_idx + } // end of support_idx check + } // end of distance check + } // end of loop over adj cells to the target vertex + + // loops over the queued cells from the neighborhood of the target + // vertex + while (!queue.isEmpty()) { // ? can queue be empty? + LO currentCell = queue.front(); + queue.pop_front(); + LO start = + currentCell * num_verts_in_dim; // start vert id of the current cell + LO end = start + num_verts_in_dim; // end vert id of the current cell + + for (LO i = start; i < end; + ++i) { // loop over the vertices of the current cell + LO current_vert_id = faces2nodes[i]; + LO start_ptr_current_vert = + n2f_ptr[current_vert_id]; // start loc of adj cells to current + // vertex + LO end_ptr_vert_current_vert = + n2f_ptr[current_vert_id + + 1]; // end loc of adj cells to current vertex + for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; + ++j) { // loop over adj cells to the current vertex + auto neighbor_cell_index = n2f_data[j]; // current cell + + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function + + if (visited.notVisited(neighbor_cell_index)) { + visited.push_back(neighbor_cell_index); + for (int k = 0; k < dim; ++k) { + support_coords[k] = + cell_centroids[neighbor_cell_index * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + + if (dist <= cutoffDistance) { + count++; + queue.push_back(neighbor_cell_index); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = neighbor_cell_index; + } // end of support_idx check + } // end of distance check + } // end of not visited check + } // end of loop over adj cells to the current vertex + } // end of loop over nodes + + } // end of while loop + + nSupports[id] = count; + }, // end of lambda + "count the number of supports in each target point"); + + if (is_build_csr_call == false) { + // printSupportsForTarget(2057, supports_ptr, nSupports, support_idx); } } @@ -212,16 +409,16 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, Write nSupports(nvertices_target, 0, "number of supports in each target vertex"); - printf("Cut off distance: %f\n", cutoffDistance); + printf("INFO: Cut off distance: %f\n", cutoffDistance); Write radii2 = Write(nvertices_target, cutoffDistance, "squared radii of the supports"); if (!adapt_radius) { - printf("Fixed radius search... \n"); + printf("INFO: Fixed radius search... \n"); search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, radii2, true); } else { - printf("Adaptive radius search... \n"); + printf("INFO: Adaptive radius search... \n"); int r_adjust_loop = 0; while (true) { nSupports = Write(nvertices_target, 0, @@ -234,10 +431,11 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, local_max = (radii2[i] > local_max) ? radii2[i] : local_max; }, Kokkos::Max(max_radius)); - printf("Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); + printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); SupportResults support; // create support every time to avoid complexity + Kokkos::fence(); search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, radii2, true); @@ -252,10 +450,11 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, }, min_reducer); - printf("min_supports_found: %d at loop %d, max_radius %f\n", + printf("INFO: min_supports_found: %d at loop %d, max_radius %f\n", min_supports_found, r_adjust_loop, max_radius); r_adjust_loop++; + Kokkos::fence(); if (min_supports_found >= min_req_support) { break; } @@ -268,11 +467,12 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, if (nSupports[i] < min_req_support) { Real factor = Real(min_req_support) / Real(nSupports[i]); factor = (factor > 1.1 || nSupports[i] == 0) ? 1.1 : factor; - radii2[i] *= factor; + radii2[i] = radii2[i] * factor; } // nSupports[i] = 0; // reset the number of supports ? not sure if // needed }); + Kokkos::fence(); } printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); @@ -308,4 +508,116 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, return support; } +SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, + LO min_support = 12, bool adapt_radius = true) +{ + SupportResults support; + + FindSupports search(mesh); + + LO nvertices_target = mesh.nverts(); + + Write nSupports(nvertices_target, 0, + "number of supports in each target vertex"); + + printf("INFO: Inside searchNeighbors 1\n"); + Write radii2 = Write(nvertices_target, cutoffDistance, + "squared radii of the supports"); + printf("INFO: Cutoff distance: %f\n", cutoffDistance); + // this call gets the number of supports for each target vertex: nSupports and + // the squared radii of the supports (with might be increased in the search to + // have enough supports) + int r_adjust_loop = 0; + while (true) { // until the number of minimum support is met + // find maximum radius + Real max_radius = 0.0; + Kokkos::parallel_reduce( + "find max radius", nvertices_target, + OMEGA_H_LAMBDA(const LO i, Real& local_max) { + local_max = (radii2[i] > local_max) ? radii2[i] : local_max; + }, + Kokkos::Max(max_radius)); + printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); + + nSupports = Write(nvertices_target, 0, + "number of supports in each target vertex"); + SupportResults support; // create support every time to avoid complexity + + search.adjBasedSearchCentroidNodes(support.supports_ptr, nSupports, + support.supports_idx, radii2, true); + + Kokkos::fence(); + //* find the minimum number of supports + LO min_nSupports = 0; + Kokkos::parallel_reduce( + "find min number of supports", nvertices_target, + OMEGA_H_LAMBDA(const LO i, LO& local_min) { + local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; + }, + Kokkos::Min(min_nSupports)); + + printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, + r_adjust_loop, max_radius); + r_adjust_loop++; + + if (min_nSupports >= min_support) { + break; + } + + Kokkos::fence(); + + // * update radius if nSupport is less that min_support + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + if (nSupports[i] < min_support) { + Real factor = Real(min_support) / Real(nSupports[i]); + factor = (nSupports[i] == 0 || factor > 1.5) ? 1.5 : factor; + radii2[i] *= factor; + } + nSupports[i] = 0; // ? might not be needed + }); + } // while loop + + printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); + + // search.adjBasedSearch(support.supports_ptr, nSupports, + // support.supports_idx, + // radii2, true); + + // offset array for the supports of each target vertex + support.supports_ptr = Write( + nvertices_target + 1, 0, "number of support source vertices in CSR format"); + + LO total_supports = 0; + + // get the total number of supports and fill the offset array + Kokkos::parallel_scan( + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { // what final does? + // OMEGA_H_CHECK(nSupports[j] >= 15); + update += nSupports[j]; + if (final) { + support.supports_ptr[j + 1] = update; + } + }, + total_supports); + + printf("INFO: Inside searchNeighbors 3\n"); + Kokkos::fence(); + + support.supports_idx = Write( + total_supports, 0, "index of source supports of each target node"); + + printf("INFO: Total_supports: %d\n", total_supports); + // second call to get the actual support indices + // now sizes of support.supports_ptr and support.supports_idx are known and > + // 0 + search.adjBasedSearchCentroidNodes(support.supports_ptr, nSupports, + support.supports_idx, radii2, false); + support.radii2 = radii2; + + mesh.add_tag(VERT, "support_radius", 1, radii2); + return support; +} + #endif diff --git a/src/pcms/interpolator/adj_search_dega2.hpp b/src/pcms/interpolator/adj_search_dega2.hpp deleted file mode 100644 index 6b3bc9d8..00000000 --- a/src/pcms/interpolator/adj_search_dega2.hpp +++ /dev/null @@ -1,324 +0,0 @@ -#ifndef ADJ_SEARCH_HPP -#define ADJ_SEARCH_HPP - -#include -#include - -#include "queue_visited.hpp" - -using namespace Omega_h; - -static constexpr int max_dim = 3; - -// TODO change this into span/mdspan -OMEGA_H_INLINE -Real calculateDistance(const Real* p1, const Real* p2, const int dim) -{ - Real dx, dy, dz; - dx = p1[0] - p2[0]; - dy = p1[1] - p2[1]; - if (dim != 3) { - dz = 0.0; - } else { - dz = p1[2] - p2[2]; - } - - return dx * dx + dy * dy + dz * dz; -} - -class FindSupports -{ -private: - Mesh& mesh; - -public: - FindSupports(Mesh& mesh_) : mesh(mesh_) {}; - - void adjBasedSearch(Write& supports_ptr, Write& nSupports, - Write& support_idx, Write& radii2, - bool is_build_csr_call); -}; - -void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, - Write& support_idx, Write& radii2, - bool is_build_csr_call) -{ - // Mesh Info - const auto& mesh_coords = mesh.coords(); - const auto& nvertices = mesh.nverts(); - const auto& dim = mesh.dim(); - const auto& nfaces = mesh.nfaces(); - - // CSR data structure of adjacent cell information of each vertex in a mesh - const auto& nodes2faces = mesh.ask_up(VERT, FACE); - const auto& n2f_ptr = nodes2faces.a2ab; - const auto& n2f_data = nodes2faces.ab2b; - const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; - - Write cell_centroids( - dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); - - parallel_for( - "calculate the centroid in each tri element", nfaces, - OMEGA_H_LAMBDA(const LO id) { - const auto current_el_verts = gather_verts<3>(faces2nodes, id); - const Omega_h::Few, 3> current_el_vert_coords = - gather_vectors<3, 2>(mesh_coords, current_el_verts); - auto centroid = average(current_el_vert_coords); - int index = dim * id; - cell_centroids[index] = centroid[0]; - cell_centroids[index + 1] = centroid[1]; - }); - // * Got the adj data and cell centroids - - parallel_for( - nvertices, // for each target vertex which is a node for this case - OMEGA_H_LAMBDA(const LO id) { - queue queue; - track visited; - const LO num_verts_in_dim = dim + 1; - Real target_coords[max_dim]; - Real support_coords[max_dim]; - Real cutoffDistance = radii2[id]; // squared radii of the supports - - //? copying the target vertex coordinates - for (LO k = 0; k < dim; ++k) { - target_coords[k] = mesh_coords[id * dim + k]; - } - - LO start_counter; // start of support idx for the current target vertex - - // not being done in the first call - // where the support_idx start for the current target vertex - if (!is_build_csr_call) { - start_counter = supports_ptr[id]; // start support location for the - // current target vertex - } - LO start_ptr = - n2f_ptr[id]; // start loc of the adj cells of the target node - LO end_ptr = - n2f_ptr[id + 1]; // end loc of the adj cells of the target node - - int count = 0; // number of supports for the current target vertex - // Initialize queue by pushing the cells in the neighborhood of the - // given target point - - for (LO i = start_ptr; i < end_ptr; - ++i) { // loop over adj cells to the target vertex - LO cell_id = n2f_data[i]; - visited.push_back(cell_id); // cell added to the visited list - - for (LO k = 0; k < dim; ++k) { // support vertex coordinates are the - // centroid of the cell - support_coords[k] = cell_centroids[cell_id * dim + k]; - } - - Real dist = calculateDistance(target_coords, support_coords, dim); - if (dist <= cutoffDistance) { - count++; - queue.push_back(cell_id); - if (!is_build_csr_call) { // not being done in the first call - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = - cell_id; // add the support cell to the support_idx - } // end of support_idx check - } // end of distance check - } // end of loop over adj cells to the target vertex - - // loops over the queued cells from the neighborhood of the target - // vertex - while (!queue.isEmpty()) { // ? can queue be empty? - LO currentCell = queue.front(); - queue.pop_front(); - LO start = - currentCell * num_verts_in_dim; // start vert id of the current cell - LO end = start + num_verts_in_dim; // end vert id of the current cell - - for (LO i = start; i < end; - ++i) { // loop over the vertices of the current cell - LO current_vert_id = faces2nodes[i]; - LO start_ptr_current_vert = - n2f_ptr[current_vert_id]; // start loc of adj cells to current - // vertex - LO end_ptr_vert_current_vert = - n2f_ptr[current_vert_id + - 1]; // end loc of adj cells to current vertex - for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; - ++j) { // loop over adj cells to the current vertex - auto neighbor_cell_index = n2f_data[j]; // current cell - - // check if neighbor index is already in the queue to be checked - // TODO refactor this into a function - - if (visited.notVisited(neighbor_cell_index)) { - visited.push_back(neighbor_cell_index); - for (int k = 0; k < dim; ++k) { - support_coords[k] = - cell_centroids[neighbor_cell_index * dim + k]; - } - - Real dist = calculateDistance(target_coords, support_coords, dim); - - if (dist <= cutoffDistance) { - count++; - queue.push_back(neighbor_cell_index); - if (!is_build_csr_call) { - LO idx_count = count - 1; - support_idx[start_counter + idx_count] = neighbor_cell_index; - } // end of support_idx check - } // end of distance check - } // end of not visited check - } // end of loop over adj cells to the current vertex - } // end of loop over nodes - - } // end of while loop - - nSupports[id] = count; - }, // end of lambda - "count the number of supports in each target point"); - - if (is_build_csr_call == false) { - // print supports for target 22 - parallel_for( - nvertices, // for each target vertex which is a node for this - // calculateDistance - OMEGA_H_LAMBDA(const LO id) { - if (id == 22) { - LO start_ptr = supports_ptr[id]; // start support location for the - // current target vertex - LO end_ptr = supports_ptr[id + 1]; // end support location for the - // current target vertex - printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", - id, end_ptr - start_ptr, nSupports[id]); - for (LO i = start_ptr; i < end_ptr; - ++i) { // loop over adj cells to the target vertex - LO cell_id = support_idx[i]; - printf(", %d", cell_id); - } - printf("\n"); - } - } // end of lambda - ); - } -} - -struct SupportResults -{ - Write supports_ptr; - Write supports_idx; - Write radii2; // squared radii of the supports -}; - -SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, LO min_support) -{ - SupportResults support; - - FindSupports search(mesh); - - LO nvertices_target = mesh.nverts(); - - Write nSupports(nvertices_target, 0, - "number of supports in each target vertex"); - - printf("Inside searchNeighbors 1\n"); - Write radii2 = Write(nvertices_target, cutoffDistance, - "squared radii of the supports"); - printf("cut off distance: %f\n", cutoffDistance); - // this call gets the number of supports for each target vertex: nSupports and - // the squared radii of the supports (with might be increased in the search to - // have enough supports) - int r_adjust_loop = 0; - while (true) { // until the number of minimum support is met - // find maximum radius - Real max_radius = 0.0; - Kokkos::parallel_reduce( - "find max radius", nvertices_target, - OMEGA_H_LAMBDA(const LO i, Real& local_max) { - local_max = (radii2[i] > local_max) ? radii2[i] : local_max; - }, - Kokkos::Max(max_radius)); - printf("Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); - - nSupports = Write(nvertices_target, 0, - "number of supports in each target vertex"); - SupportResults support; // create support every time to avoid complexity - - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - radii2, true); - - // printf("Inside searchNeighbors 2\n"); - Kokkos::fence(); - //* find the minimum number of supports - LO min_nSupports = 0; - Kokkos::parallel_reduce( - "find min number of supports", nvertices_target, - OMEGA_H_LAMBDA(const LO i, LO& local_min) { - local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; - }, - Kokkos::Min(min_nSupports)); - - printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, - r_adjust_loop, max_radius); - r_adjust_loop++; - - if (min_nSupports >= min_support) { - break; - } - - Kokkos::fence(); - - // * update radius if nSupport is less that min_support - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { - if (nSupports[i] < min_support) { - Real factor = Real(min_support) / Real(nSupports[i]); - factor = (nSupports[i] == 0 || factor > 1.5) ? 1.5 : factor; - radii2[i] *= factor; - } - nSupports[i] = 0; // ? might not be needed - }); - } - - printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); - - // search.adjBasedSearch(support.supports_ptr, nSupports, - // support.supports_idx, - // radii2, true); - - // offset array for the supports of each target vertex - support.supports_ptr = Write( - nvertices_target + 1, 0, "number of support source vertices in CSR format"); - - LO total_supports = 0; - - // get the total number of supports and fill the offset array - Kokkos::parallel_scan( - nvertices_target, - OMEGA_H_LAMBDA(int j, int& update, bool final) { // what final does? - // OMEGA_H_CHECK(nSupports[j] >= 15); - update += nSupports[j]; - if (final) { - support.supports_ptr[j + 1] = update; - } - }, - total_supports); - - printf("Inside searchNeighbors 3\n"); - Kokkos::fence(); - - support.supports_idx = Write( - total_supports, 0, "index of source supports of each target node"); - - printf("total_supports: %d\n", total_supports); - // second call to get the actual support indices - // now sizes of support.supports_ptr and support.supports_idx are known and > - // 0 - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, - radii2, false); - support.radii2 = radii2; - - mesh.add_tag(VERT, "support_radius", 1, radii2); - return support; -} - -#endif diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index 01bd55ed..13088569 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -1,6 +1,6 @@ #include #include -#include +#include #include #include #include @@ -9,39 +9,42 @@ #include #include -using namespace std; -using namespace Omega_h; +using namespace std; +using namespace Omega_h; KOKKOS_INLINE_FUNCTION -double func_const(Coord& p) { +double func_const(Coord& p) +{ auto x = p.x; - auto y = p.y; + auto y = p.y; double Z = 3; return Z; } KOKKOS_INLINE_FUNCTION -double func_linear(Coord& p) { +double func_linear(Coord& p) +{ auto x = p.x; - auto y = p.y; + auto y = p.y; double Z = x + y; return Z; } KOKKOS_INLINE_FUNCTION -double func_quadratic(Coord& p) { +double func_quadratic(Coord& p) +{ auto x = p.x; - auto y = p.y; - double Z = pow(x,2) + pow(y,2); + auto y = p.y; + double Z = pow(x, 2) + pow(y, 2); return Z; } - KOKKOS_INLINE_FUNCTION -double func_cubic(Coord& p) { +double func_cubic(Coord& p) +{ auto x = p.x; - auto y = p.y; - double Z = pow(x,3) + pow(y,3); + auto y = p.y; + double Z = pow(x, 3) + pow(y, 3); return Z; } @@ -49,368 +52,361 @@ double func_cubic(Coord& p) { TEST_CASE("mls_interp_test") { - auto lib = Library{}; - auto world = lib.world(); - auto rank = lib.world()->rank(); - auto mesh = build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false); - - Real cutoffDistance = 0.3; - Real tolerance = 0.05; - cutoffDistance = cutoffDistance * cutoffDistance; - - const auto dim = mesh.dim(); - - const auto& target_coordinates = mesh.coords(); - - const auto& nfaces = mesh.nfaces(); - - const auto& ntargets = mesh.nverts(); + auto lib = Library{}; + auto world = lib.world(); + auto rank = lib.world()->rank(); + auto mesh = build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false); + Real cutoffDistance = 0.3; + Real tolerance = 0.05; + cutoffDistance = cutoffDistance * cutoffDistance; - Write radii2(ntargets, cutoffDistance, "populate initial square of cutoffdistance to all target points"); - Write source_coordinates( - dim * nfaces, 0, - "stores coordinates of cell centroid of each tri element"); + const auto dim = mesh.dim(); - const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; + const auto& target_coordinates = mesh.coords(); - Kokkos::parallel_for( - "calculate the centroid in each tri element", nfaces, - OMEGA_H_LAMBDA(const LO id) { - const auto current_el_verts = gather_verts<3>(faces2nodes, id); - const Omega_h::Few, 3> current_el_vert_coords = - gather_vectors<3, 2>(target_coordinates, current_el_verts); - auto centroid = average(current_el_vert_coords); - int index = 2 * id; - source_coordinates[index] = centroid[0]; - source_coordinates[index + 1] = centroid[1]; - }); - - Points source_points; + const auto& nfaces = mesh.nfaces(); - source_points.coordinates = - PointsViewType("Number of local source supports", nfaces); - Kokkos::parallel_for( - "target points", nfaces, KOKKOS_LAMBDA(int j) { - source_points.coordinates(j).x = source_coordinates[j * dim]; - source_points.coordinates(j).y = source_coordinates[j * dim + 1]; - }); + const auto& ntargets = mesh.nverts(); + Write radii2( + ntargets, cutoffDistance, + "populate initial square of cutoffdistance to all target points"); + Write source_coordinates( + dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); - Points target_points; + const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; - target_points.coordinates = - PointsViewType("Number of local source supports", mesh.nverts()); - Kokkos::parallel_for( - "target points", mesh.nverts(), KOKKOS_LAMBDA(int j) { - target_points.coordinates(j).x = target_coordinates[j * dim]; - target_points.coordinates(j).y = target_coordinates[j * dim + 1]; - }); + Kokkos::parallel_for( + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(target_coordinates, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = 2 * id; + source_coordinates[index] = centroid[0]; + source_coordinates[index + 1] = centroid[1]; + }); + Points source_points; - SECTION("test interpo degree 1 poly degree 0") - { + source_points.coordinates = + PointsViewType("Number of local source supports", nfaces); + Kokkos::parallel_for( + "target points", nfaces, KOKKOS_LAMBDA(int j) { + source_points.coordinates(j).x = source_coordinates[j * dim]; + source_points.coordinates(j).y = source_coordinates[j * dim + 1]; + }); - int degree = 1; - LO min_num_supports = 10; + Points target_points; - Write source_values(nfaces, 0, "exact target values"); + target_points.coordinates = + PointsViewType("Number of local source supports", mesh.nverts()); + Kokkos::parallel_for( + "target points", mesh.nverts(), KOKKOS_LAMBDA(int j) { + target_points.coordinates(j).x = target_coordinates[j * dim]; + target_points.coordinates(j).y = target_coordinates[j * dim + 1]; + }); - Kokkos::parallel_for( - nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_const(source_points.coordinates(i)); - }); + SECTION("test interpo degree 1 poly degree 0") + { - SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + int degree = 1; + LO min_num_supports = 10; - auto approx_target_values = mls_interpolation( - source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + Write source_values(nfaces, 0, "exact target values"); - auto host_approx_target_values = HostRead(approx_target_values); + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_const(source_points.coordinates(i)); + }); + SupportResults support = + searchNeighbors(mesh, cutoffDistance, min_num_supports); + auto approx_target_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, support.radii2); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + auto host_approx_target_values = HostRead(approx_target_values); - Kokkos::parallel_for( - mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_const(target_points.coordinates(i)); - }); + Write exact_target_values(mesh.nverts(), 0, "exact target values"); - auto host_exact_target_values = HostRead(exact_target_values); + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_const(target_points.coordinates(i)); + }); - int m = exact_target_values.size(); - int n = approx_target_values.size(); + auto host_exact_target_values = HostRead(exact_target_values); - REQUIRE(m == n); + int m = exact_target_values.size(); + int n = approx_target_values.size(); - for (size_t i = 0; i < m; ++i) { - CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + REQUIRE(m == n); + for (size_t i = 0; i < m; ++i) { + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); } + } - SECTION("test interpolation degree 1 poly degree 1") - { + SECTION("test interpolation degree 1 poly degree 1") + { + int degree = 2; + LO min_num_supports = 16; - int degree = 2; - LO min_num_supports = 16; - - Write source_values(nfaces, 0, "exact target values"); + Write source_values(nfaces, 0, "exact target values"); - Kokkos::parallel_for( - nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_linear(source_points.coordinates(i)); - }); + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_linear(source_points.coordinates(i)); + }); - SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + SupportResults support = + searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto approx_target_values = mls_interpolation( - source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + auto approx_target_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, support.radii2); - auto host_approx_target_values = HostRead(approx_target_values); + auto host_approx_target_values = HostRead(approx_target_values); + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_linear(target_points.coordinates(i)); + }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + auto host_exact_target_values = HostRead(exact_target_values); - Kokkos::parallel_for( - mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_linear(target_points.coordinates(i)); - }); + int m = exact_target_values.size(); + int n = approx_target_values.size(); - auto host_exact_target_values = HostRead(exact_target_values); - - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + REQUIRE(m == n); + for (size_t i = 0; i < m; ++i) { + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); } + } + SECTION("test interpo degree 2 poly degree 0") + { - SECTION("test interpo degree 2 poly degree 0") - { - - int degree = 2; - LO min_num_supports = 16; + int degree = 2; + LO min_num_supports = 16; - Write source_values(nfaces, 0, "exact target values"); + Write source_values(nfaces, 0, "exact target values"); - Kokkos::parallel_for( - nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_const(source_points.coordinates(i)); - }); + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_const(source_points.coordinates(i)); + }); - SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + SupportResults support = + searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto approx_target_values = mls_interpolation( - source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + auto approx_target_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, support.radii2); - auto host_approx_target_values = HostRead(approx_target_values); + auto host_approx_target_values = HostRead(approx_target_values); + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_const(target_points.coordinates(i)); + }); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + auto host_exact_target_values = HostRead(exact_target_values); - Kokkos::parallel_for( - mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_const(target_points.coordinates(i)); - }); + int m = exact_target_values.size(); + int n = approx_target_values.size(); - auto host_exact_target_values = HostRead(exact_target_values); - - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + REQUIRE(m == n); + for (size_t i = 0; i < m; ++i) { + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); } + } - SECTION("test interpolation degree 2 poly degree 1") - { - + SECTION("test interpolation degree 2 poly degree 1") + { - int degree = 2; - LO min_num_supports = 16; - - Write source_values(nfaces, 0, "exact target values"); + int degree = 2; + LO min_num_supports = 16; - Kokkos::parallel_for( - nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_linear(source_points.coordinates(i)); - }); + Write source_values(nfaces, 0, "exact target values"); - SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_linear(source_points.coordinates(i)); + }); - auto approx_target_values = mls_interpolation( - source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + SupportResults support = + searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto host_approx_target_values = HostRead(approx_target_values); + auto approx_target_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, support.radii2); + auto host_approx_target_values = HostRead(approx_target_values); + Write exact_target_values(mesh.nverts(), 0, "exact target values"); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_linear(target_points.coordinates(i)); + }); - Kokkos::parallel_for( - mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_linear(target_points.coordinates(i)); - }); + auto host_exact_target_values = HostRead(exact_target_values); - auto host_exact_target_values = HostRead(exact_target_values); + int m = exact_target_values.size(); + int n = approx_target_values.size(); - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + REQUIRE(m == n); + for (size_t i = 0; i < m; ++i) { + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); } + } + SECTION("test interpolation degree 2 poly degree 2") + { - SECTION("test interpolation degree 2 poly degree 2") - { - - - int degree = 2; - LO min_num_supports = 16; - - Write source_values(nfaces, 0, "exact target values"); + int degree = 2; + LO min_num_supports = 16; - Kokkos::parallel_for( - nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_quadratic(source_points.coordinates(i)); - }); + Write source_values(nfaces, 0, "exact target values"); - SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_quadratic(source_points.coordinates(i)); + }); - auto approx_target_values = mls_interpolation( - source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + SupportResults support = + searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto host_approx_target_values = HostRead(approx_target_values); + auto approx_target_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, support.radii2); + auto host_approx_target_values = HostRead(approx_target_values); + Write exact_target_values(mesh.nverts(), 0, "exact target values"); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_quadratic(target_points.coordinates(i)); + }); - Kokkos::parallel_for( - mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_quadratic(target_points.coordinates(i)); - }); + auto host_exact_target_values = HostRead(exact_target_values); - auto host_exact_target_values = HostRead(exact_target_values); + int m = exact_target_values.size(); + int n = approx_target_values.size(); - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + REQUIRE(m == n); + for (size_t i = 0; i < m; ++i) { + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); } + } + SECTION("test interpolation degree 3 poly degree 2") + { - - SECTION("test interpolation degree 3 poly degree 2") - { - - - int degree = 3; - LO min_num_supports = 20; - - Write source_values(nfaces, 0, "exact target values"); + int degree = 3; + LO min_num_supports = 20; - Kokkos::parallel_for( - nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_quadratic(source_points.coordinates(i)); - }); + Write source_values(nfaces, 0, "exact target values"); - SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_quadratic(source_points.coordinates(i)); + }); - auto approx_target_values = mls_interpolation( - source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + SupportResults support = + searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto host_approx_target_values = HostRead(approx_target_values); + auto approx_target_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, support.radii2); + auto host_approx_target_values = HostRead(approx_target_values); + Write exact_target_values(mesh.nverts(), 0, "exact target values"); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_quadratic(target_points.coordinates(i)); + }); - Kokkos::parallel_for( - mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_quadratic(target_points.coordinates(i)); - }); + auto host_exact_target_values = HostRead(exact_target_values); - auto host_exact_target_values = HostRead(exact_target_values); + int m = exact_target_values.size(); + int n = approx_target_values.size(); - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + REQUIRE(m == n); + for (size_t i = 0; i < m; ++i) { + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); } + } + SECTION("test interpolation degree 3 poly degree 3") + { - SECTION("test interpolation degree 3 poly degree 3") - { - - - int degree = 3; - LO min_num_supports = 20; - - Write source_values(nfaces, 0, "exact target values"); + int degree = 3; + LO min_num_supports = 20; - Kokkos::parallel_for( - nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_cubic(source_points.coordinates(i)); - }); + Write source_values(nfaces, 0, "exact target values"); - SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func_cubic(source_points.coordinates(i)); + }); - auto approx_target_values = mls_interpolation( - source_values,source_coordinates, target_coordinates, support, dim, degree, support.radii2); + SupportResults support = + searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto host_approx_target_values = HostRead(approx_target_values); + auto approx_target_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, support.radii2); + auto host_approx_target_values = HostRead(approx_target_values); + Write exact_target_values(mesh.nverts(), 0, "exact target values"); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_cubic(target_points.coordinates(i)); + }); - Kokkos::parallel_for( - mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_cubic(target_points.coordinates(i)); - }); + auto host_exact_target_values = HostRead(exact_target_values); - auto host_exact_target_values = HostRead(exact_target_values); + int m = exact_target_values.size(); + int n = approx_target_values.size(); - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT(host_exact_target_values[i], Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + REQUIRE(m == n); + for (size_t i = 0; i < m; ++i) { + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); } - + } } - From a2b1563a6a4d071c94c6c8ce4d2999fb2cf87bd7 Mon Sep 17 00:00:00 2001 From: Fuad Hasibul Hasan Date: Thu, 31 Oct 2024 20:35:06 -0400 Subject: [PATCH 30/66] refactor removed some unneccesary comments --- src/pcms/interpolator/MLSCoefficients.hpp | 5 - src/pcms/interpolator/MLSInterpolation.hpp | 4 - src/pcms/interpolator/adj_search.hpp | 166 ++++++--------------- 3 files changed, 48 insertions(+), 127 deletions(-) diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp index ccef208b..bcad3adf 100644 --- a/src/pcms/interpolator/MLSCoefficients.hpp +++ b/src/pcms/interpolator/MLSCoefficients.hpp @@ -20,8 +20,6 @@ double func(Coord& p) } // computes the slice lengths of the of the polynomial basis -// - KOKKOS_INLINE_FUNCTION void basisSliceLengths(Kokkos::View& array) { @@ -44,7 +42,6 @@ void basisSliceLengths(Kokkos::View& array) } // finds the size of the polynomial basis vector - KOKKOS_INLINE_FUNCTION int basisSize(const Kokkos::View& array) { @@ -62,12 +59,10 @@ int basisSize(const Kokkos::View& array) } // evaluates the polynomial basis - KOKKOS_INLINE_FUNCTION void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, Coord& p) { - basis_monomial(0) = 1; int dim = slice_length.extent(1); int degree = slice_length.extent(0); diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp index 5ebab07a..dbac8f39 100644 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -28,7 +28,6 @@ Write mls_interpolation(const Reals source_values, Kokkos::View host_slice_length( "stores slice length of polynomial basis in host", degree, dim); Kokkos::deep_copy(host_slice_length, 0); - basisSliceLengths(host_slice_length); auto basis_size = basisSize(host_slice_length); @@ -85,7 +84,6 @@ Write mls_interpolation(const Reals source_values, int i = team.league_rank(); int start_ptr = support.supports_ptr[i]; int end_ptr = support.supports_ptr[i + 1]; - int nsupports = end_ptr - start_ptr; ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); @@ -166,9 +164,7 @@ Write mls_interpolation(const Reals source_values, }); Coord target_point; - target_point.x = target_coordinates[i * dim]; - target_point.y = target_coordinates[i * dim + 1]; if (dim == 3) { diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index 94bff454..90dd1d59 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -21,7 +21,6 @@ Real calculateDistance(const Real* p1, const Real* p2, const int dim) } else { dz = p1[2] - p2[2]; } - return dx * dx + dy * dy + dz * dz; } @@ -51,15 +50,11 @@ void printSupportsForTarget(const LO target_id, const Write& supports_ptr, parallel_for( nSupports.size(), OMEGA_H_LAMBDA(const LO id) { if (id == target_id) { - LO start_ptr = supports_ptr[id]; // start support location for the - // current target vertex - LO end_ptr = - supports_ptr[id + - 1]; // end support location for the current target vertex + LO start_ptr = supports_ptr[id]; + LO end_ptr = supports_ptr[id + 1]; printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", id, end_ptr - start_ptr, nSupports[id]); - for (LO i = start_ptr; i < end_ptr; - ++i) { // loop over adj cells to the target vertex + for (LO i = start_ptr; i < end_ptr; ++i) { LO cell_id = support_idx[i]; printf(", %d", cell_id); } @@ -94,42 +89,29 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, bool is_build_csr_call) { - // Source Mesh Info - const auto& sourcePoints_coords = source_mesh.coords(); const auto nvertices_source = source_mesh.nverts(); const auto dim = source_mesh.dim(); - // Target Mesh Info const auto& targetPoints_coords = target_mesh.coords(); const auto nvertices_target = target_mesh.nverts(); OMEGA_H_CHECK(radii2.size() == nvertices_target); - // CSR data structure of adjacent vertex information of each source vertex const auto& vert2vert = source_mesh.ask_star(VERT); const auto& v2v_ptr = vert2vert.a2ab; const auto& v2v_data = vert2vert.ab2b; - - // CSR data structure of vertex information of each cell in source mesh - - // CSR data structure of adjacent vertex information of each source vertex - // dim == 2; ask vertices of tri (3 vertices for each tri) & if dim ==3; ask - // vertices of tetrahedron (4 vertices for each tet) const auto& cells2verts = source_mesh.ask_verts_of(dim); Kokkos::View target_points("test_points", nvertices_target); - parallel_for( nvertices_target, OMEGA_H_LAMBDA(const LO i) { target_points(i, 0) = targetPoints_coords[i * dim]; target_points(i, 1) = targetPoints_coords[i * dim + 1]; }); Kokkos::fence(); - pcms::GridPointSearch search_cell(source_mesh, 1000, 1000); - // get the cell id for each target point + pcms::GridPointSearch search_cell(source_mesh, 10, 10); auto results = search_cell(target_points); - checkTargetPoints(results); parallel_for( @@ -146,13 +128,9 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, target_points(id, 0), target_points(id, 1)); const LO num_verts_in_dim = dim + 1; - LO start_ptr = source_cell_id * num_verts_in_dim; - LO end_ptr = start_ptr + num_verts_in_dim; - Real target_coords[max_dim]; - Real support_coords[max_dim]; for (LO k = 0; k < dim; ++k) { @@ -160,15 +138,21 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, } LO start_counter; - if (!is_build_csr_call) { start_counter = supports_ptr[id]; } - int count = 0; - // Initialize queue by pushing the vertices in the neighborhood of the - // given target point + // * Method: + // 1. Get the vertices of the source cell (source cell is the cell in the + // source mesh in which the target point lies): done above + // 2. Using those 3 vertices, get the adjacent vertices of those 3 + // vertices and go on until the queue is empty + // 3. Already visited vertices are stored in visited and the vertices to + // be checked (dist < cutoff) are stored in the queue + // 4. If not CSR building call, store the support vertices in support_idx + // * Method + int count = 0; for (LO i = start_ptr; i < end_ptr; ++i) { LO vert_id = cells2verts[i]; visited.push_back(vert_id); @@ -220,9 +204,9 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, if (dist <= cutoffDistance) { count++; if (count >= 500) { - // printf("Warning qne: count exceeds 500 for target %d with - // start %d and end %d radius2 %f adding neighbor %d\n", id, - // start, end, cutoffDistance, neighborIndex); + printf("Warning: count exceeds 500 for target %d with start %d " + "and end %d radius2 %f adding neighbor %d\n", + id, start, end, cutoffDistance, neighborIndex); } queue.push_back(neighborIndex); if (!is_build_csr_call) { @@ -235,7 +219,7 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, } // end of while loop nSupports[id] = count; - }, + }, // lambda "count the number of supports in each target point"); if (is_build_csr_call == false) { // printSupportsForTarget(2057, supports_ptr, nSupports, support_idx); @@ -254,8 +238,6 @@ void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, const auto& dim = source_mesh.dim(); const auto& nfaces = source_mesh.nfaces(); - // CSR data structure of adjacent cell information of each vertex in a - // source_mesh const auto& nodes2faces = source_mesh.ask_up(VERT, FACE); const auto& n2f_ptr = nodes2faces.a2ab; const auto& n2f_data = nodes2faces.ab2b; @@ -278,44 +260,33 @@ void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, // * Got the adj data and cell centroids parallel_for( - nvertices, // for each target vertex which is a node for this case + nvertices, OMEGA_H_LAMBDA(const LO id) { queue queue; track visited; const LO num_verts_in_dim = dim + 1; Real target_coords[max_dim]; Real support_coords[max_dim]; - Real cutoffDistance = radii2[id]; // squared radii of the supports + Real cutoffDistance = radii2[id]; //? copying the target vertex coordinates for (LO k = 0; k < dim; ++k) { target_coords[k] = mesh_coords[id * dim + k]; } - LO start_counter; // start of support idx for the current target vertex - - // not being done in the first call - // where the support_idx start for the current target vertex + LO start_counter; if (!is_build_csr_call) { - start_counter = supports_ptr[id]; // start support location for the - // current target vertex + start_counter = supports_ptr[id]; } - LO start_ptr = - n2f_ptr[id]; // start loc of the adj cells of the target node - LO end_ptr = - n2f_ptr[id + 1]; // end loc of the adj cells of the target node + LO start_ptr = n2f_ptr[id]; + LO end_ptr = n2f_ptr[id + 1]; - int count = 0; // number of supports for the current target vertex - // Initialize queue by pushing the cells in the neighborhood of the - // given target point - - for (LO i = start_ptr; i < end_ptr; - ++i) { // loop over adj cells to the target vertex + int count = 0; + for (LO i = start_ptr; i < end_ptr; ++i) { LO cell_id = n2f_data[i]; - visited.push_back(cell_id); // cell added to the visited list + visited.push_back(cell_id); - for (LO k = 0; k < dim; ++k) { // support vertex coordinates are the - // centroid of the cell + for (LO k = 0; k < dim; ++k) { support_coords[k] = cell_centroids[cell_id * dim + k]; } @@ -323,35 +294,26 @@ void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, if (dist <= cutoffDistance) { count++; queue.push_back(cell_id); - if (!is_build_csr_call) { // not being done in the first call + if (!is_build_csr_call) { LO idx_count = count - 1; - support_idx[start_counter + idx_count] = - cell_id; // add the support cell to the support_idx - } // end of support_idx check - } // end of distance check - } // end of loop over adj cells to the target vertex - - // loops over the queued cells from the neighborhood of the target - // vertex + support_idx[start_counter + idx_count] = cell_id; + } + } + } + while (!queue.isEmpty()) { // ? can queue be empty? LO currentCell = queue.front(); queue.pop_front(); - LO start = - currentCell * num_verts_in_dim; // start vert id of the current cell - LO end = start + num_verts_in_dim; // end vert id of the current cell + LO start = currentCell * num_verts_in_dim; + LO end = start + num_verts_in_dim; - for (LO i = start; i < end; - ++i) { // loop over the vertices of the current cell + for (LO i = start; i < end; ++i) { LO current_vert_id = faces2nodes[i]; - LO start_ptr_current_vert = - n2f_ptr[current_vert_id]; // start loc of adj cells to current - // vertex - LO end_ptr_vert_current_vert = - n2f_ptr[current_vert_id + - 1]; // end loc of adj cells to current vertex + LO start_ptr_current_vert = n2f_ptr[current_vert_id]; + LO end_ptr_vert_current_vert = n2f_ptr[current_vert_id + 1]; for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; - ++j) { // loop over adj cells to the current vertex - auto neighbor_cell_index = n2f_data[j]; // current cell + ++j) { + auto neighbor_cell_index = n2f_data[j]; // check if neighbor index is already in the queue to be checked // TODO refactor this into a function @@ -400,11 +362,8 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, bool adapt_radius = true) { SupportResults support; - FindSupports search(source_mesh, target_mesh); - LO nvertices_source = source_mesh.nverts(); - LO nvertices_target = target_mesh.nverts(); Write nSupports(nvertices_target, 0, @@ -423,7 +382,7 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, while (true) { nSupports = Write(nvertices_target, 0, "number of supports in each target vertex"); - // find maximum radius + Real max_radius = 0.0; Kokkos::parallel_reduce( "find max radius", nvertices_target, @@ -434,13 +393,11 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); SupportResults support; // create support every time to avoid complexity - Kokkos::fence(); search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, radii2, true); - Kokkos::fence(); - //* find the minimum number of supports + LO min_supports_found = 0; Kokkos::Min min_reducer(min_supports_found); Kokkos::parallel_reduce( @@ -449,19 +406,16 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, min_reducer.join(local_min, nSupports[i]); }, min_reducer); - printf("INFO: min_supports_found: %d at loop %d, max_radius %f\n", min_supports_found, r_adjust_loop, max_radius); - r_adjust_loop++; + r_adjust_loop++; Kokkos::fence(); if (min_supports_found >= min_req_support) { break; } Kokkos::fence(); - - // * update radius if nSupport is less that min_req_support parallel_for( nvertices_target, OMEGA_H_LAMBDA(const LO i) { if (nSupports[i] < min_req_support) { @@ -469,8 +423,6 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, factor = (factor > 1.1 || nSupports[i] == 0) ? 1.1 : factor; radii2[i] = radii2[i] * factor; } - // nSupports[i] = 0; // reset the number of supports ? not sure if - // needed }); Kokkos::fence(); } @@ -502,9 +454,7 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, radii2, false); support.radii2 = radii2; - target_mesh.add_tag(VERT, "radii2", 1, support.radii2); - return support; } @@ -512,11 +462,8 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, LO min_support = 12, bool adapt_radius = true) { SupportResults support; - FindSupports search(mesh); - LO nvertices_target = mesh.nverts(); - Write nSupports(nvertices_target, 0, "number of supports in each target vertex"); @@ -524,12 +471,9 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, Write radii2 = Write(nvertices_target, cutoffDistance, "squared radii of the supports"); printf("INFO: Cutoff distance: %f\n", cutoffDistance); - // this call gets the number of supports for each target vertex: nSupports and - // the squared radii of the supports (with might be increased in the search to - // have enough supports) + int r_adjust_loop = 0; while (true) { // until the number of minimum support is met - // find maximum radius Real max_radius = 0.0; Kokkos::parallel_reduce( "find max radius", nvertices_target, @@ -542,12 +486,10 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, nSupports = Write(nvertices_target, 0, "number of supports in each target vertex"); SupportResults support; // create support every time to avoid complexity - search.adjBasedSearchCentroidNodes(support.supports_ptr, nSupports, support.supports_idx, radii2, true); Kokkos::fence(); - //* find the minimum number of supports LO min_nSupports = 0; Kokkos::parallel_reduce( "find min number of supports", nvertices_target, @@ -565,8 +507,6 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, } Kokkos::fence(); - - // * update radius if nSupport is less that min_support parallel_for( nvertices_target, OMEGA_H_LAMBDA(const LO i) { if (nSupports[i] < min_support) { @@ -580,21 +520,14 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); - // search.adjBasedSearch(support.supports_ptr, nSupports, - // support.supports_idx, - // radii2, true); - // offset array for the supports of each target vertex support.supports_ptr = Write( nvertices_target + 1, 0, "number of support source vertices in CSR format"); LO total_supports = 0; - - // get the total number of supports and fill the offset array Kokkos::parallel_scan( nvertices_target, - OMEGA_H_LAMBDA(int j, int& update, bool final) { // what final does? - // OMEGA_H_CHECK(nSupports[j] >= 15); + OMEGA_H_LAMBDA(int j, int& update, bool final) { update += nSupports[j]; if (final) { support.supports_ptr[j + 1] = update; @@ -607,15 +540,12 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, support.supports_idx = Write( total_supports, 0, "index of source supports of each target node"); - printf("INFO: Total_supports: %d\n", total_supports); - // second call to get the actual support indices - // now sizes of support.supports_ptr and support.supports_idx are known and > - // 0 + search.adjBasedSearchCentroidNodes(support.supports_ptr, nSupports, support.supports_idx, radii2, false); - support.radii2 = radii2; + support.radii2 = radii2; mesh.add_tag(VERT, "support_radius", 1, radii2); return support; } From 8e6873aa27acaa83203ef8b1c11a93f26d21b8a8 Mon Sep 17 00:00:00 2001 From: Fuad Hasibul Hasan Date: Thu, 31 Oct 2024 20:54:48 -0400 Subject: [PATCH 31/66] radius adaptation option added for cell2node search --- src/pcms/interpolator/adj_search.hpp | 100 +++++++++++++++------------ 1 file changed, 54 insertions(+), 46 deletions(-) diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index 90dd1d59..88abbd00 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -373,7 +373,8 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, "squared radii of the supports"); if (!adapt_radius) { - printf("INFO: Fixed radius search... \n"); + printf("INFO: Fixed radius search *(disregarding required minimum " + "support)*... \n"); search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, radii2, true); } else { @@ -472,53 +473,60 @@ SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, "squared radii of the supports"); printf("INFO: Cutoff distance: %f\n", cutoffDistance); - int r_adjust_loop = 0; - while (true) { // until the number of minimum support is met - Real max_radius = 0.0; - Kokkos::parallel_reduce( - "find max radius", nvertices_target, - OMEGA_H_LAMBDA(const LO i, Real& local_max) { - local_max = (radii2[i] > local_max) ? radii2[i] : local_max; - }, - Kokkos::Max(max_radius)); - printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); - - nSupports = Write(nvertices_target, 0, - "number of supports in each target vertex"); - SupportResults support; // create support every time to avoid complexity - search.adjBasedSearchCentroidNodes(support.supports_ptr, nSupports, - support.supports_idx, radii2, true); - - Kokkos::fence(); - LO min_nSupports = 0; - Kokkos::parallel_reduce( - "find min number of supports", nvertices_target, - OMEGA_H_LAMBDA(const LO i, LO& local_min) { - local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; - }, - Kokkos::Min(min_nSupports)); - - printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, - r_adjust_loop, max_radius); - r_adjust_loop++; - - if (min_nSupports >= min_support) { - break; - } + if (!adapt_radius) { + printf("INFO: Fixed radius search *(disregarding required minimum " + "support)* ... \n"); + search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + radii2, true); + } else { + printf("INFO: Adaptive radius search... \n"); + int r_adjust_loop = 0; + while (true) { // until the number of minimum support is met + Real max_radius = 0.0; + Kokkos::parallel_reduce( + "find max radius", nvertices_target, + OMEGA_H_LAMBDA(const LO i, Real& local_max) { + local_max = (radii2[i] > local_max) ? radii2[i] : local_max; + }, + Kokkos::Max(max_radius)); + printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); - Kokkos::fence(); - parallel_for( - nvertices_target, OMEGA_H_LAMBDA(const LO i) { - if (nSupports[i] < min_support) { - Real factor = Real(min_support) / Real(nSupports[i]); - factor = (nSupports[i] == 0 || factor > 1.5) ? 1.5 : factor; - radii2[i] *= factor; - } - nSupports[i] = 0; // ? might not be needed - }); - } // while loop + nSupports = Write(nvertices_target, 0, + "number of supports in each target vertex"); + SupportResults support; // create support every time to avoid complexity + search.adjBasedSearchCentroidNodes(support.supports_ptr, nSupports, + support.supports_idx, radii2, true); + + Kokkos::fence(); + LO min_nSupports = 0; + Kokkos::parallel_reduce( + "find min number of supports", nvertices_target, + OMEGA_H_LAMBDA(const LO i, LO& local_min) { + local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; + }, + Kokkos::Min(min_nSupports)); + + printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, + r_adjust_loop, max_radius); + r_adjust_loop++; - printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); + if (min_nSupports >= min_support) { + break; + } + + Kokkos::fence(); + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + if (nSupports[i] < min_support) { + Real factor = Real(min_support) / Real(nSupports[i]); + factor = (nSupports[i] == 0 || factor > 1.5) ? 1.5 : factor; + radii2[i] *= factor; + } + nSupports[i] = 0; // ? might not be needed + }); + } // while loop + printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); + } // adaptive radius search // offset array for the supports of each target vertex support.supports_ptr = Write( From 4c93f4a28fb36cabd7d37686e95baafabfab0a28 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 11 Nov 2024 11:26:56 -0500 Subject: [PATCH 32/66] renamed test sections --- test/test_rbf_interp.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index 13088569..10020249 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -109,7 +109,7 @@ TEST_CASE("mls_interp_test") target_points.coordinates(j).y = target_coordinates[j * dim + 1]; }); - SECTION("test interpo degree 1 poly degree 0") + SECTION("test interpolation degree 1, function degree 0") { int degree = 1; @@ -152,11 +152,11 @@ TEST_CASE("mls_interp_test") } } - SECTION("test interpolation degree 1 poly degree 1") + SECTION("test interpolation degree 1, function degree 1") { - int degree = 2; - LO min_num_supports = 16; + int degree = 1; + LO min_num_supports = 10; Write source_values(nfaces, 0, "exact target values"); @@ -281,7 +281,7 @@ TEST_CASE("mls_interp_test") } } - SECTION("test interpolation degree 2 poly degree 2") + SECTION("test interpolation degree 2, function degree 2") { int degree = 2; @@ -324,7 +324,7 @@ TEST_CASE("mls_interp_test") } } - SECTION("test interpolation degree 3 poly degree 2") + SECTION("test interpolation degree 3, function degree 2") { int degree = 3; @@ -367,7 +367,7 @@ TEST_CASE("mls_interp_test") } } - SECTION("test interpolation degree 3 poly degree 3") + SECTION("test interpolation degree 3, function degree 3") { int degree = 3; From cd88d5311c61ba5071f846dd030090d3d49915fb Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 18 Nov 2024 14:20:57 -0500 Subject: [PATCH 33/66] interface for passing various rbf functions --- src/pcms/interpolator/MLS_rbf_options.cpp | 138 ++++++++++++++++++++++ src/pcms/interpolator/MLS_rbf_options.hpp | 23 ++++ 2 files changed, 161 insertions(+) create mode 100644 src/pcms/interpolator/MLS_rbf_options.cpp create mode 100644 src/pcms/interpolator/MLS_rbf_options.hpp diff --git a/src/pcms/interpolator/MLS_rbf_options.cpp b/src/pcms/interpolator/MLS_rbf_options.cpp new file mode 100644 index 00000000..527506af --- /dev/null +++ b/src/pcms/interpolator/MLS_rbf_options.cpp @@ -0,0 +1,138 @@ +#include +#include "MLS_rbf_options.hpp" +#include "MLSInterpolation.hpp" +#include + +// TODO add PCMS namespace to all interpolation code + +// RBF_GAUSSIAN Functor +struct RBF_GAUSSIAN +{ + OMEGA_H_INLINE + double operator()(double r_sq, double rho_sq) const + { + double phi; + OMEGA_H_CHECK_PRINTF(rho_sq > 0, + "ERROR: square of cutoff distance should always be " + "positive but the value is %.16f\n", + rho_sq); + + OMEGA_H_CHECK_PRINTF(r_sq > 0, + "ERROR: square of distance should always be positive " + "but the value is %.16f\n", + r_sq); + + // 'a' is a spreading factor/decay factor + // the value of 'a' is higher if the data is localized + // the value of 'a' is smaller if the data is farther + int a = 20; + + double r = sqrt(r_sq); + double rho = sqrt(rho_sq); + double ratio = r / rho; + double limit = 1 - ratio; + + if (limit < 0) { + phi = 0; + + } else { + phi = exp(-a * a * r * r); + } + + return phi; + } +}; + +// RBF_C4 Functor +struct RBF_C4 +{ + OMEGA_H_INLINE + double operator()(double r_sq, double rho_sq) const + { + double phi; + double r = sqrt(r_sq); + OMEGA_H_CHECK_PRINTF( + rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n", + rho_sq); + double rho = sqrt(rho_sq); + double ratio = r / rho; + double limit = 1 - ratio; + if (limit < 0) { + phi = 0; + + } else { + phi = 5 * pow(ratio, 5) + 30 * pow(ratio, 4) + 72 * pow(ratio, 3) + + 82 * pow(ratio, 2) + 36 * ratio + 6; + phi = phi * pow(limit, 6); + } + + OMEGA_H_CHECK_PRINTF(!std::isnan(phi), + "ERROR: phi in rbf is NaN. r_sq, rho_sq = (%f, %f)\n", + r_sq, rho_sq); + return phi; + } +}; + +// RBF_const Functor +// +struct RBF_CONST +{ + OMEGA_H_INLINE + double operator()(double r_sq, double rho_sq) const + { + double phi; + double r = sqrt(r_sq); + OMEGA_H_CHECK_PRINTF( + rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n", + rho_sq); + double rho = sqrt(rho_sq); + double ratio = r / rho; + double limit = 1 - ratio; + if (limit < 0) { + phi = 0; + + } else { + phi = 1.0; + } + + OMEGA_H_CHECK_PRINTF(!std::isnan(phi), + "ERROR: phi in rbf is NaN. r_sq, rho_sq = (%f, %f)\n", + r_sq, rho_sq); + return phi; + } +}; + +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, Write radii2, + RadialBasisFunction bf) +{ + + const auto nvertices_target = target_coordinates.size() / dim; + + Write interpolated_values(nvertices_target, 0, + "approximated target values"); + switch (bf) { + case RadialBasisFunction::RBF_GAUSSIAN: + interpolated_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, radii2, RBF_GAUSSIAN{}); + break; + + case RadialBasisFunction::RBF_C4: + interpolated_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, radii2, RBF_C4{}); + break; + + case RadialBasisFunction::RBF_CONSTANT: + interpolated_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, radii2, RBF_CONST{}); + break; + } + + return interpolated_values; +} diff --git a/src/pcms/interpolator/MLS_rbf_options.hpp b/src/pcms/interpolator/MLS_rbf_options.hpp new file mode 100644 index 00000000..73661b8d --- /dev/null +++ b/src/pcms/interpolator/MLS_rbf_options.hpp @@ -0,0 +1,23 @@ +#ifndef MLS_RBF_OPTIONS_HPP +#define MLS_RBF_OPTIONS_HPP + +#include "adj_search.hpp" + +using namespace Omega_h; + +enum class RadialBasisFunction : LO +{ + RBF_GAUSSIAN = 0, + RBF_C4, + RBF_CONSTANT + +}; + +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, Write radii2, + RadialBasisFunction bf); + +#endif From 5acab5eedcc70ff43c66d26b2e49c05cd7176157 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 18 Nov 2024 14:22:16 -0500 Subject: [PATCH 34/66] updated the CMake list to include the new files --- src/pcms/interpolator/CMakeLists.txt | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 683ec387..640662c3 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.20) project(Interpolator) -set(HEADER_FILES +set(PCMS_FIELD_TRANSFER_HEADERS MLSInterpolation.hpp points.hpp adj_search.hpp @@ -10,13 +10,18 @@ set(HEADER_FILES queue_visited.hpp linear_interpolant.hpp multidimarray.hpp + MLS_rbf_options.hpp ) -install(FILES ${HEADER_FILES} DESTINATION include/pcms/interpolator) +set(PCMS_FIELD_TRANSFER_SOURCES + MLS_rbf_options.cpp +) +install(FILES ${PCMS_FIELD_TRANSFER_HEADERS} DESTINATION include/pcms/interpolator) + -add_library(pcms_interpolator INTERFACE) +add_library(pcms_interpolator ${PCMS_FIELD_TRANSFER_SOURCES}) -target_link_libraries(pcms_interpolator INTERFACE pcms::core) +target_link_libraries(pcms_interpolator PUBLIC pcms::core) target_include_directories(pcms_interpolator INTERFACE $ @@ -26,7 +31,8 @@ target_include_directories(pcms_interpolator INTERFACE install(TARGETS pcms_interpolator EXPORT interpolatorTargets - INCLUDES DESTINATION include/pcms/interpolator) + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/pcms/interpolator + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) install(EXPORT interpolatorTargets From cc8e3d81a0e9c93f70569f675d76426422cc90ce Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 18 Nov 2024 14:23:27 -0500 Subject: [PATCH 35/66] changes to accomodate interface --- src/pcms/interpolator/MLSCoefficients.hpp | 41 ++++++----------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp index bcad3adf..82826303 100644 --- a/src/pcms/interpolator/MLSCoefficients.hpp +++ b/src/pcms/interpolator/MLSCoefficients.hpp @@ -5,6 +5,7 @@ #include #include "points.hpp" +#include #define PI_M 3.14159265358979323846 @@ -93,32 +94,6 @@ void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, } } -KOKKOS_INLINE_FUNCTION -double rbf(double r_sq, double rho_sq) -{ - double phi; - double r = sqrt(r_sq); - OMEGA_H_CHECK_PRINTF( - rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n", - rho_sq); - double rho = sqrt(rho_sq); - double ratio = r / rho; - double limit = 1 - ratio; - if (limit < 0) { - phi = 0; - - } else { - phi = 5 * pow(ratio, 5) + 30 * pow(ratio, 4) + 72 * pow(ratio, 3) + - 82 * pow(ratio, 2) + 36 * ratio + 6; - phi = phi * pow(limit, 6); - } - - OMEGA_H_CHECK_PRINTF(!std::isnan(phi), - "ERROR: phi in rbf is NaN. r_sq, rho_sq = (%f, %f)\n", - r_sq, rho_sq); - return phi; -} - // create vandermondeMatrix KOKKOS_INLINE_FUNCTION void VandermondeMatrix(ScratchMatView V, @@ -160,16 +135,20 @@ void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, } // radial basis function vector -KOKKOS_INLINE_FUNCTION -void PhiVector(ScratchVecView Phi, const Coord target_point, - const ScratchMatView local_source_points, int j, - double cuttoff_dis_sq) +template , + bool> = true> +KOKKOS_INLINE_FUNCTION void PhiVector(ScratchVecView Phi, + const Coord target_point, + const ScratchMatView local_source_points, + int j, double cuttoff_dis_sq, + Func rbf_func) { int N = local_source_points.extent(0); double dx = target_point.x - local_source_points(j, 0); double dy = target_point.y - local_source_points(j, 1); double ds_sq = dx * dx + dy * dy; - Phi(j) = rbf(ds_sq, cuttoff_dis_sq); + Phi(j) = rbf_func(ds_sq, cuttoff_dis_sq); OMEGA_H_CHECK_PRINTF(!std::isnan(Phi(j)), "ERROR: Phi(j) in PhiVector is NaN for j = %d " "ds_sq=%.16f, cuttoff_dis_sq=%.16f", From ec57a6d587ff4c49c6588d9cdbfa9d40b9983ad3 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 18 Nov 2024 14:23:45 -0500 Subject: [PATCH 36/66] changes to accomodate interface --- src/pcms/interpolator/MLSInterpolation.hpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp index dbac8f39..993424f7 100644 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -5,14 +5,23 @@ #include "adj_search.hpp" #include #include +#include +#include + using namespace Omega_h; +template Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, Write radii2) + const LO& degree, Write radii2, + Func rbf_func) { + static_assert(std::is_invocable_r_v, + "rbf_func, takes radius and cutoff, returns weight"); + static_assert(!std::is_pointer_v, + "function pointer will fail in GPU execution context"); const auto nvertices_source = source_coordinates.size() / dim; const auto nvertices_target = target_coordinates.size() / dim; @@ -185,7 +194,8 @@ Write mls_interpolation(const Reals source_values, radii2[i] > 0, "ERROR: radius2 has to be positive but found to be %.16f\n", radii2[i]); - PhiVector(Phi, target_point, local_source_points, j, radii2[i]); + PhiVector(Phi, target_point, local_source_points, j, radii2[i], + rbf_func); }); // sum phi From c888f2814c2e9ce4d032341bd45ccb40fffc2af9 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Mon, 18 Nov 2024 14:24:38 -0500 Subject: [PATCH 37/66] updated test CMake list --- test/CMakeLists.txt | 4 +- test/test_rbf_interp.cpp | 168 ++++++++++++++++++--------------------- 2 files changed, 80 insertions(+), 92 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index acd0a0f4..c2a539ce 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -186,7 +186,7 @@ if(Catch2_FOUND) if (PCMS_ENABLE_OMEGA_H) add_executable(field_transfer_example field_transfer_example.cpp) - target_link_libraries(field_transfer_example PUBLIC pcms::core) + target_link_libraries(field_transfer_example PUBLIC pcms::core pcms_interpolator) list(APPEND PCMS_UNIT_TEST_SOURCES test_field_transfer.cpp test_uniform_grid.cpp @@ -197,7 +197,7 @@ if(Catch2_FOUND) ) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) - target_link_libraries(unit_tests PUBLIC Catch2::Catch2 pcms::core) + target_link_libraries(unit_tests PUBLIC Catch2::Catch2 pcms::core pcms_interpolator) target_include_directories(unit_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) include(Catch) diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index 10020249..c82879e5 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -1,53 +1,82 @@ #include #include #include -#include -#include +#include #include #include #include #include #include +#include using namespace std; using namespace Omega_h; KOKKOS_INLINE_FUNCTION -double func_const(Coord& p) +double func(Coord& p, int degree) { - auto x = p.x; - auto y = p.y; - double Z = 3; - return Z; -} + [[maybe_unused]] auto x = p.x; + [[maybe_unused]] auto y = p.y; + if (degree = 0) { + double Z = 3; + } + elseif(degree = 1) + { + double Z = x + y; + } + elseif(degree = 2) + { + double Z = pow(x, 2) + pow(y, 2); + } + elseif(degree = 3) + { -KOKKOS_INLINE_FUNCTION -double func_linear(Coord& p) -{ - auto x = p.x; - auto y = p.y; - double Z = x + y; - return Z; + double Z = pow(x, 3) + pow(y, 3); + } + else printf("No polynomials with degree = %f\n", degree) } - -KOKKOS_INLINE_FUNCTION -double func_quadratic(Coord& p) -{ - auto x = p.x; - auto y = p.y; - double Z = pow(x, 2) + pow(y, 2); - return Z; +return Z; } -KOKKOS_INLINE_FUNCTION -double func_cubic(Coord& p) +void test(Mesh& mesh, Real cutoffdistance, int degree, LO min_num_Supports, + RadialBasisFunction bf) { - auto x = p.x; - auto y = p.y; - double Z = pow(x, 3) + pow(y, 3); - return Z; -} + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), degree); + }); + + SupportResults support = + searchNeighbors(mesh, cutoffDistance, min_num_supports); + + auto approx_target_values = mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, degree, + support.radii2, RadialBasisFunction::RBF_C4); + + auto host_approx_target_values = HostRead(approx_target_values); + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func_linear(target_points.coordinates(i)); + }); + + auto host_exact_target_values = HostRead(exact_target_values); + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } +} // Test cases for centroid to node mapping using MLS TEST_CASE("mls_interp_test") { @@ -125,9 +154,9 @@ TEST_CASE("mls_interp_test") SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto approx_target_values = - mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, support.radii2); + auto approx_target_values = mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, support.radii2, RadialBasisFunction::RBF_C4); auto host_approx_target_values = HostRead(approx_target_values); @@ -152,48 +181,7 @@ TEST_CASE("mls_interp_test") } } - SECTION("test interpolation degree 1, function degree 1") - { - - int degree = 1; - LO min_num_supports = 10; - - Write source_values(nfaces, 0, "exact target values"); - - Kokkos::parallel_for( - nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_linear(source_points.coordinates(i)); - }); - - SupportResults support = - searchNeighbors(mesh, cutoffDistance, min_num_supports); - - auto approx_target_values = - mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, support.radii2); - - auto host_approx_target_values = HostRead(approx_target_values); - - Write exact_target_values(mesh.nverts(), 0, "exact target values"); - - Kokkos::parallel_for( - mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_linear(target_points.coordinates(i)); - }); - - auto host_exact_target_values = HostRead(exact_target_values); - - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT( - host_exact_target_values[i], - Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } - } + SECTION("test interpolation degree 1, function degree 1") {} SECTION("test interpo degree 2 poly degree 0") { @@ -211,9 +199,9 @@ TEST_CASE("mls_interp_test") SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto approx_target_values = - mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, support.radii2); + auto approx_target_values = mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, support.radii2, RadialBasisFunction::RBF_C4); auto host_approx_target_values = HostRead(approx_target_values); @@ -254,9 +242,9 @@ TEST_CASE("mls_interp_test") SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto approx_target_values = - mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, support.radii2); + auto approx_target_values = mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, support.radii2, RadialBasisFunction::RBF_C4); auto host_approx_target_values = HostRead(approx_target_values); @@ -297,9 +285,9 @@ TEST_CASE("mls_interp_test") SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto approx_target_values = - mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, support.radii2); + auto approx_target_values = mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, support.radii2, RadialBasisFunction::RBF_C4); auto host_approx_target_values = HostRead(approx_target_values); @@ -340,9 +328,9 @@ TEST_CASE("mls_interp_test") SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto approx_target_values = - mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, support.radii2); + auto approx_target_values = mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, support.radii2, RadialBasisFunction::RBF_C4); auto host_approx_target_values = HostRead(approx_target_values); @@ -383,9 +371,9 @@ TEST_CASE("mls_interp_test") SupportResults support = searchNeighbors(mesh, cutoffDistance, min_num_supports); - auto approx_target_values = - mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, support.radii2); + auto approx_target_values = mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, support.radii2, RadialBasisFunction::RBF_C4); auto host_approx_target_values = HostRead(approx_target_values); From 27453c6dc79dc80c89325bc8739f30bfc32b1aaa Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 3 Dec 2024 17:31:22 -0500 Subject: [PATCH 38/66] added pcms_interpolator target --- config.cmake.in | 1 + 1 file changed, 1 insertion(+) diff --git a/config.cmake.in b/config.cmake.in index 1834271c..6300c498 100644 --- a/config.cmake.in +++ b/config.cmake.in @@ -18,5 +18,6 @@ if(@PCMS_ENABLE_Fortran@) endif() include("${CMAKE_CURRENT_LIST_DIR}/pcms-targets.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/pcms_interpolator-targets.cmake") # must be called at the end of the config file check_required_components(pcms) From f9bd12048833e4644ac761c0958db5f780bdf52c Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 3 Dec 2024 17:33:16 -0500 Subject: [PATCH 39/66] set target properties for pcms_interpolator --- src/pcms/interpolator/CMakeLists.txt | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 640662c3..1e17e6c5 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -1,6 +1,7 @@ -cmake_minimum_required(VERSION 3.20) +#cmake_minimum_required(VERSION 3.20) +#project(Interpolator) -project(Interpolator) +find_package(KokkosKernels REQUIRED) set(PCMS_FIELD_TRANSFER_HEADERS MLSInterpolation.hpp @@ -20,13 +21,18 @@ install(FILES ${PCMS_FIELD_TRANSFER_HEADERS} DESTINATION include/pcms/interpolat add_library(pcms_interpolator ${PCMS_FIELD_TRANSFER_SOURCES}) +set_target_properties(pcms_interpolator PROPERTIES + OUTPUT_NAME pcmsinterpolator + EXPORT_NAME interpolator) +add_library(pcms::interpolator ALIAS pcms_interpolator) -target_link_libraries(pcms_interpolator PUBLIC pcms::core) + +target_link_libraries(pcms_interpolator PUBLIC pcms::core Kokkos::kokkoskernels) target_include_directories(pcms_interpolator INTERFACE $ $ - $) + $) install(TARGETS pcms_interpolator @@ -36,7 +42,7 @@ install(TARGETS pcms_interpolator install(EXPORT interpolatorTargets - FILE interpolatorTargets.cmake + FILE pcms_interpolator-targets.cmake NAMESPACE pcms:: - DESTINATION lib/cmake/pcms) + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/pcms) From 2ba03229b3622f37d9a66be00083e16119502f60 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 3 Dec 2024 17:37:37 -0500 Subject: [PATCH 40/66] updated with kokkos kernels LU solver --- src/pcms/interpolator/MLSCoefficients.hpp | 423 ++++++++++++++-------- 1 file changed, 264 insertions(+), 159 deletions(-) diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp index 82826303..c7e0394a 100644 --- a/src/pcms/interpolator/MLSCoefficients.hpp +++ b/src/pcms/interpolator/MLSCoefficients.hpp @@ -4,6 +4,12 @@ #include #include +#include +#include +#include +#include +#include +#include #include "points.hpp" #include @@ -61,10 +67,10 @@ int basisSize(const Kokkos::View& array) // evaluates the polynomial basis KOKKOS_INLINE_FUNCTION -void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, +void BasisPoly(ScratchVecView basis_vector, const MatViewType& slice_length, Coord& p) { - basis_monomial(0) = 1; + basis_vector(0) = 1; int dim = slice_length.extent(1); int degree = slice_length.extent(0); @@ -83,7 +89,7 @@ void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, int offset = curr_col; for (int j = 0; j < dim; ++j) { for (int k = 0; k < slice_length(i, j); ++k) { - basis_monomial(offset + k) = basis_monomial(prev_col + k) * point[j]; + basis_vector(offset + k) = basis_vector(prev_col + k) * point[j]; } offset += slice_length(i, j); @@ -96,9 +102,9 @@ void BasisPoly(ScratchVecView basis_monomial, const MatViewType& slice_length, // create vandermondeMatrix KOKKOS_INLINE_FUNCTION -void VandermondeMatrix(ScratchMatView V, - const ScratchMatView local_source_points, int j, - const MatViewType slice_length) +void CreateVandermondeMatrix(ScratchMatView vandermonde_matrix, + const ScratchMatView local_source_points, int j, + const MatViewType slice_length) { int N = local_source_points.extent(0); int dim = local_source_points.extent(1); @@ -109,32 +115,12 @@ void VandermondeMatrix(ScratchMatView V, if (dim == 3) { source_point.z = local_source_points(j, 2); } - ScratchVecView basis_monomial = Kokkos::subview(V, j, Kokkos::ALL()); - BasisPoly(basis_monomial, slice_length, source_point); + ScratchVecView basis_vector_supports = + Kokkos::subview(vandermonde_matrix, j, Kokkos::ALL()); + BasisPoly(basis_vector_supports, slice_length, source_point); } -// moment matrix -KOKKOS_INLINE_FUNCTION -void PTphiMatrix(ScratchMatView pt_phi, ScratchMatView V, ScratchVecView Phi, - int j) -{ - int M = V.extent(0); - int N = V.extent(1); - - ScratchVecView vandermonde_mat_row = Kokkos::subview(V, j, Kokkos::ALL()); - for (int k = 0; k < N; k++) { - OMEGA_H_CHECK_PRINTF(!std::isnan(vandermonde_mat_row(k)), - "ERROR: vandermonde_mat_row is NaN for k = %d\n", k); - OMEGA_H_CHECK_PRINTF(!std::isnan(Phi(j)), - "ERROR: Phi(j) in PTphiMatrix is NaN for j = %d\n", j); - pt_phi(k, j) = vandermonde_mat_row(k) * Phi(j); - OMEGA_H_CHECK_PRINTF( - !std::isnan(pt_phi(k, j)), - "ERROR: pt_phi in PTphiMatrix is NaN for k = %d, j = %d\n", k, j); - } -} - -// radial basis function vector +// radial basis function vector (phi(s_ti)) template , bool> = true> @@ -155,55 +141,55 @@ KOKKOS_INLINE_FUNCTION void PhiVector(ScratchVecView Phi, j, ds_sq, cuttoff_dis_sq); } -// matrix matrix multiplication +// takes a matrix A(m,n) and vector b(m) and calulates +// A^Tb --> A^T(n,m) b(m) KOKKOS_INLINE_FUNCTION -void MatMatMul(member_type team, ScratchMatView& moment_matrix, - const ScratchMatView& pt_phi, const ScratchMatView& vandermonde) +void ScaleColumnTransMatrix(ScratchMatView result_matrix, + const ScratchMatView matrix, + const ScratchVecView vector, member_type team, + int j) { - int M = pt_phi.extent(0); - int N = vandermonde.extent(1); - int K = pt_phi.extent(1); - - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { - double sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, K), - [=](const int k, double& lsum) { - OMEGA_H_CHECK_PRINTF(!std::isnan(pt_phi(i, k)), - "ERROR: pt_phi is NaN for i = %d\n", i); - OMEGA_H_CHECK_PRINTF(!std::isnan(vandermonde(k, j)), - "ERROR: vandermonde is NaN for k = %d\n", k); - lsum += pt_phi(i, k) * vandermonde(k, j); - }, - sum); - OMEGA_H_CHECK_PRINTF(!std::isnan(sum), - "ERROR: sum is NaN for i = %d, j = %d\n", i, j); - moment_matrix(i, j) = sum; - }); - }); -} -// Matrix vector multiplication -KOKKOS_INLINE_FUNCTION -void MatVecMul(member_type team, const ScratchVecView& vector, - const ScratchMatView& matrix, ScratchVecView& result) -{ int M = matrix.extent(0); int N = matrix.extent(1); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { - double sum = 0; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, M), - [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, - sum); - result(i) = sum; - OMEGA_H_CHECK_PRINTF(!std::isnan(result(i)), - "ERROR: sum is NaN for i = %d\n", i); - }); - // team.team_barrier(); + + ScratchVecView matrix_row = Kokkos::subview(matrix, j, Kokkos::ALL()); + for (int k = 0; k < N; k++) { + OMEGA_H_CHECK_PRINTF(!std::isnan(matrix_row(k)), + "ERROR: vandermonde_mat_row is NaN for k = %d\n", k); + OMEGA_H_CHECK_PRINTF(!std::isnan(vector(j)), + "ERROR: Phi(j) in PTphiMatrix is NaN for j = %d\n", j); + result_matrix(k, j) = matrix_row(k) * vector(j); + OMEGA_H_CHECK_PRINTF( + !std::isnan(result_matrix(k, j)), + "ERROR: pt_phi in PTphiMatrix is NaN for k = %d, j = %d\n", k, j); + } } +// KOKKOS_INLINE_FUNCTION +// void PtphiPMatrix(ScratchMatView& moment_matrix, member_type team, +// const ScratchMatView& pt_phi, +// const ScratchMatView& vandermonde) +//{ +// int M = pt_phi.extent(0); +// int N = vandermonde.extent(1); +// int K = pt_phi.extent(1); +// +// Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { +// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { +// double sum = 0.0; +// Kokkos::parallel_reduce( +// Kokkos::ThreadVectorRange(team, K), +// [=](const int k, double& lsum) { +// lsum += pt_phi(i, k) * vandermonde(k, j); +// }, +// sum); +// moment_matrix(i, j) = sum; +// }); +// }); +// } +// + // dot product KOKKOS_INLINE_FUNCTION void dot_product(member_type team, const ScratchVecView& result_sub, @@ -217,105 +203,224 @@ void dot_product(member_type team, const ScratchVecView& result_sub, "ERROR: NaN found in dot_product: N: %d\n", N); } -// moment matrix -KOKKOS_INLINE_FUNCTION -void PtphiPMatrix(ScratchMatView& moment_matrix, member_type team, - const ScratchMatView& pt_phi, - const ScratchMatView& vandermonde) +// matrix matrix multiplication +// KOKKOS_INLINE_FUNCTION +// void MatMatMul(member_type team, ScratchMatView& moment_matrix, +// const ScratchMatView& pt_phi, const ScratchMatView& +// vandermonde) +//{ +// int M = pt_phi.extent(0); +// int N = vandermonde.extent(1); +// int K = pt_phi.extent(1); +// +// Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { +// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { +// double sum = 0.0; +// Kokkos::parallel_reduce( +// Kokkos::ThreadVectorRange(team, K), +// [=](const int k, double& lsum) { +// OMEGA_H_CHECK_PRINTF(!std::isnan(pt_phi(i, k)), +// "ERROR: pt_phi is NaN for i = %d\n", i); +// OMEGA_H_CHECK_PRINTF(!std::isnan(vandermonde(k, j)), +// "ERROR: vandermonde is NaN for k = %d\n", k); +// lsum += pt_phi(i, k) * vandermonde(k, j); +// }, +// sum); +// OMEGA_H_CHECK_PRINTF(!std::isnan(sum), +// "ERROR: sum is NaN for i = %d, j = %d\n", i, j); +// moment_matrix(i, j) = sum; +// }); +// }); +//} +//// +// Matrix vector multiplication + +// KOKKOS_INLINE_FUNCTION +// void MatVecMul(member_type team, const ScratchVecView& vector, +// const ScratchMatView& matrix, ScratchVecView& result) +//{ +// int M = matrix.extent(0); +// int N = matrix.extent(1); +// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { +// double sum = 0; +// Kokkos::parallel_reduce( +// Kokkos::ThreadVectorRange(team, M), +// [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, +// sum); +// result(i) = sum; +// OMEGA_H_CHECK_PRINTF(!std::isnan(result(i)), +// "ERROR: sum is NaN for i = %d\n", i); +// }); +// // team.team_barrier(); +// } +// + +// TODO: Implement QR decomposition to solve the linear system +// convert normal equation P^T Q P x = P^T Q b to Ax = b'; +// A = P^T Q P & b' = P^T Q b + +struct ResultConvertNormal { - int M = pt_phi.extent(0); - int N = vandermonde.extent(1); - int K = pt_phi.extent(1); - - Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { - double sum = 0.0; - Kokkos::parallel_reduce( - Kokkos::ThreadVectorRange(team, K), - [=](const int k, double& lsum) { - lsum += pt_phi(i, k) * vandermonde(k, j); - }, - sum); - moment_matrix(i, j) = sum; - }); - }); -} + ScratchMatView scaled_matrix; + ScratchMatView square_matrix; + ScratchVecView transformed_rhs; +}; -// inverse matrix KOKKOS_INLINE_FUNCTION -void inverse_matrix(member_type team, const ScratchMatView& matrix, - ScratchMatView& lower, ScratchMatView& forward_matrix, - ScratchMatView& solution) +ResultConvertNormal ConvertNormalEq(const ScratchMatView matrix, + const ScratchVecView weight_vector, + const ScratchVecView rhs, member_type team) { - int N = matrix.extent(0); - for (int j = 0; j < N; ++j) { - Kokkos::single(Kokkos::PerTeam(team), [=]() { - double sum = 0; - for (int k = 0; k < j; ++k) { - sum += lower(j, k) * lower(j, k); - } - OMEGA_H_CHECK_PRINTF(!std::isnan(matrix(j, j)), - "ERROR: matrix(j,j) is NaN: j = %d\n", j); - OMEGA_H_CHECK_PRINTF( - matrix(j, j) - sum >= -1e-10, // TODO: how to check this reliably? - "ERROR: (matrix(j,j) - sum) is negative: mat(jj)=%.16f, sum = %.16f \n", - matrix(j, j), sum); - lower(j, j) = sqrt(matrix(j, j) - sum); - OMEGA_H_CHECK_PRINTF(!std::isnan(lower(j, j)), - "Lower in inverse_matrix is NaN (j,j) = (%d,%d)\n", - j, j); - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamVectorRange(team, j + 1, N), [=](int i) { - double inner_sum = 0; - for (int k = 0; k < j; ++k) { - inner_sum += lower(i, k) * lower(j, k); - } - lower(i, j) = (matrix(i, j) - inner_sum) / lower(j, j); - lower(j, i) = lower(i, j); - OMEGA_H_CHECK_PRINTF(!std::isnan(lower(i, j)), - "Lower in inverse_matrix is NaN (i,j) = (%d,%d)\n", - i, j); - OMEGA_H_CHECK_PRINTF(!std::isnan(lower(j, i)), - "Lower in inverse_matrix is NaN (j,i) = (%d,%d)\n", - j, i); - }); - - team.team_barrier(); - } + int m = matrix.extent(0); + int n = matrix.extent(1); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { - forward_matrix(i, i) = 1.0 / lower(i, i); - for (int j = i + 1; j < N; ++j) { - forward_matrix(j, i) = 0.0; // Initialize to zero - for (int k = 0; k < j; ++k) { - forward_matrix(j, i) -= lower(j, k) * forward_matrix(k, i); - } - forward_matrix(j, i) /= lower(j, j); - OMEGA_H_CHECK_PRINTF(!std::isnan(forward_matrix(j, i)), - "Forward in inverse_matrix is NaN (j,i) = (%d,%d)\n", - j, i); + ResultConvertNormal result; + + result.scaled_matrix = ScratchMatView(team.team_scratch(0), n, m); + + result.square_matrix = ScratchMatView(team.team_scratch(0), n, n); + + result.transformed_rhs = ScratchVecView(team.team_scratch(0), n); + + // performing P^T Q + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) { + for (int k = 0; k < m; ++k) { + result.scaled_matrix(j, k) = 0; } }); team.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { - solution(N - 1, i) = forward_matrix(N - 1, i) / lower(N - 1, N - 1); - for (int j = N - 2; j >= 0; --j) { - solution(j, i) = forward_matrix(j, i); - for (int k = j + 1; k < N; ++k) { - solution(j, i) -= lower(j, k) * solution(k, i); - } - solution(j, i) /= lower(j, j); - OMEGA_H_CHECK_PRINTF( - !std::isnan(solution(j, i)), - "Solution in inverse_matrix is NaN (j,i) = (%d,%d)\n", j, i); - } + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, m), [=](int j) { + ScaleColumnTransMatrix(result.scaled_matrix, matrix, weight_vector, team, + j); }); + + team.team_barrier(); + // performing (P^T Q P) + KokkosBatched::TeamGemm< + member_type, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, + result.scaled_matrix, matrix, + 0.0, result.square_matrix); + + team.team_barrier(); + + // KokkosBlas::Experimental::TeamGemv(team, 1.0, result.scaled_matrix, rhs, + // 0.0, result.transformed_rhs); + KokkosBlas::Experimental:: + Gemv::invoke( + team, 'N', 1.0, result.scaled_matrix, rhs, 0.0, result.transformed_rhs); + + team.team_barrier(); + + return result; +} + +/// solve A x = b using LU decomposition +// inputs rhs, overwrites rhs and returns rhs + +KOKKOS_INLINE_FUNCTION +void SolveMatrix(const ScratchMatView square_matrix, const ScratchVecView rhs, + member_type team) +{ + + // Perform LU decomposition + KokkosBatched::TeamLU< + member_type, KokkosBatched::Algo::LU::Unblocked>::invoke(team, + square_matrix); + + team.team_barrier(); + + // Solve the equation with forward and backward solves + KokkosBatched::TeamSolveLU< + member_type, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs); } +//// moment matrix +// +//// inverse matrix +// KOKKOS_INLINE_FUNCTION +// void inverse_matrix(member_type team, const ScratchMatView& matrix, +// ScratchMatView& lower, ScratchMatView& forward_matrix, +// ScratchMatView& solution) +//{ +// int N = matrix.extent(0); +// +// for (int j = 0; j < N; ++j) { +// Kokkos::single(Kokkos::PerTeam(team), [=]() { +// double sum = 0; +// for (int k = 0; k < j; ++k) { +// sum += lower(j, k) * lower(j, k); +// } +// OMEGA_H_CHECK_PRINTF(!std::isnan(matrix(j, j)), +// "ERROR: matrix(j,j) is NaN: j = %d\n", j); +// OMEGA_H_CHECK_PRINTF( +// matrix(j, j) - sum >= -1e-10, // TODO: how to check this reliably? +// "ERROR: (matrix(j,j) - sum) is negative: mat(jj)=%.16f, sum = %.16f +// \n", matrix(j, j), sum); +// lower(j, j) = sqrt(matrix(j, j) - sum); +// OMEGA_H_CHECK_PRINTF(!std::isnan(lower(j, j)), +// "Lower in inverse_matrix is NaN (j,j) = +// (%d,%d)\n", j, j); +// }); +// +// team.team_barrier(); +// +// Kokkos::parallel_for(Kokkos::TeamVectorRange(team, j + 1, N), [=](int i) +// { +// double inner_sum = 0; +// for (int k = 0; k < j; ++k) { +// inner_sum += lower(i, k) * lower(j, k); +// } +// lower(i, j) = (matrix(i, j) - inner_sum) / lower(j, j); +// lower(j, i) = lower(i, j); +// OMEGA_H_CHECK_PRINTF(!std::isnan(lower(i, j)), +// "Lower in inverse_matrix is NaN (i,j) = +// (%d,%d)\n", i, j); +// OMEGA_H_CHECK_PRINTF(!std::isnan(lower(j, i)), +// "Lower in inverse_matrix is NaN (j,i) = +// (%d,%d)\n", j, i); +// }); +// +// team.team_barrier(); +// } +// +// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { +// forward_matrix(i, i) = 1.0 / lower(i, i); +// for (int j = i + 1; j < N; ++j) { +// forward_matrix(j, i) = 0.0; // Initialize to zero +// for (int k = 0; k < j; ++k) { +// forward_matrix(j, i) -= lower(j, k) * forward_matrix(k, i); +// } +// forward_matrix(j, i) /= lower(j, j); +// OMEGA_H_CHECK_PRINTF(!std::isnan(forward_matrix(j, i)), +// "Forward in inverse_matrix is NaN (j,i) = +// (%d,%d)\n", j, i); +// } +// }); +// +// team.team_barrier(); +// +// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { +// solution(N - 1, i) = forward_matrix(N - 1, i) / lower(N - 1, N - 1); +// for (int j = N - 2; j >= 0; --j) { +// solution(j, i) = forward_matrix(j, i); +// for (int k = j + 1; k < N; ++k) { +// solution(j, i) -= lower(j, k) * solution(k, i); +// } +// solution(j, i) /= lower(j, j); +// OMEGA_H_CHECK_PRINTF( +// !std::isnan(solution(j, i)), +// "Solution in inverse_matrix is NaN (j,i) = (%d,%d)\n", j, i); +// } +// }); +// } +// #endif From 52dc62aee710f123b4cffb6e9b8bcaf450c351ef Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 3 Dec 2024 17:46:55 -0500 Subject: [PATCH 41/66] upgraded to kokkos kernels --- src/pcms/interpolator/MLSInterpolation.hpp | 175 +++++++++++---------- 1 file changed, 92 insertions(+), 83 deletions(-) diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp index 993424f7..cedec17d 100644 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -46,7 +46,6 @@ Write mls_interpolation(const Reals source_values, auto slice_length_hd = Kokkos::create_mirror_view(slice_length); Kokkos::deep_copy(slice_length_hd, host_slice_length); Kokkos::deep_copy(slice_length, slice_length_hd); - Kokkos::parallel_for( "calculate the size required for scratch for each team", nvertices_target, KOKKOS_LAMBDA(const int i) { @@ -56,14 +55,13 @@ Write mls_interpolation(const Reals source_values, size_t total_shared_size = 0; - total_shared_size += - ScratchMatView::shmem_size(basis_size, basis_size) * 4; - total_shared_size += - ScratchMatView::shmem_size(basis_size, nsupports) * 2; + total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size); + total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports); total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); total_shared_size += ScratchVecView::shmem_size(basis_size); total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; total_shared_size += ScratchMatView::shmem_size(nsupports, 2); + total_shared_size += ScratchMatView::shmem_size(nsupports, 1); shmem_each_team(i) = total_shared_size; }); @@ -107,30 +105,26 @@ Write mls_interpolation(const Reals source_values, } } - ScratchMatView lower(team.team_scratch(0), basis_size, basis_size); - - ScratchMatView forward_matrix(team.team_scratch(0), basis_size, - basis_size); - - ScratchMatView moment_matrix(team.team_scratch(0), basis_size, - basis_size); - - ScratchMatView inv_mat(team.team_scratch(0), basis_size, basis_size); - - ScratchMatView V(team.team_scratch(0), nsupports, basis_size); + // vondermonde matrix P from the vectors of basis vector of supports + ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + basis_size); - ScratchMatView Ptphi(team.team_scratch(0), basis_size, nsupports); + // rbf function values of source supports Phi(n,n) + ScratchVecView phi_vector(team.team_scratch(0), nsupports); - ScratchMatView resultant_matrix(team.team_scratch(0), basis_size, - nsupports); + // stores P^T Q + // ScratchMatView scaled_vandermonde_matrix(team.team_scratch(0), + // basis_size, nsupports); - ScratchVecView targetMonomialVec(team.team_scratch(0), basis_size); + // stores P^T Q P + // ScratchMatView moment_matrix(team.team_scratch(0), + // basis_size,basis_size); - ScratchVecView SupportValues(team.team_scratch(0), nsupports); + // stores known vector (b) + ScratchVecView support_values(team.team_scratch(0), nsupports); - ScratchVecView result(team.team_scratch(0), nsupports); - - ScratchVecView Phi(team.team_scratch(0), nsupports); + // basis of target + ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); // Kokkos::deep_copy(lower, 0.0); // Kokkos::deep_copy(forward_matrix, 0.0); @@ -140,38 +134,41 @@ Write mls_interpolation(const Reals source_values, // Kokkos::deep_copy(Ptphi, 0.0); // Kokkos::deep_copy(resultant_matrix, 0.0); // Kokkos::deep_copy(targetMonomialVec, 0.0); - // Kokkos::deep_copy(SupportValues, 0.0); + // Kokkos::deep_copy(support_values, 0.0); // Kokkos::deep_copy(result, 0.0); // Kokkos::deep_copy(Phi, 0.0); - // - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, basis_size), - [=](int j) { - for (int k = 0; k < basis_size; ++k) { - lower(j, k) = 0; - forward_matrix(j, k) = 0; - moment_matrix(j, k) = 0; - inv_mat(j, k) = 0; - } - targetMonomialVec(j) = 0; - for (int k = 0; k < nsupports; ++k) { - resultant_matrix(j, k) = 0; + // Initialize the scratch matrices and vectors - Ptphi(j, k) = 0; - } + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, basis_size), + [=](int j) { + // for (int k = 0; k < + //basis_size; ++k) { + // moment_matrix(j, + // k) = 0; + // } + + target_basis_vector(j) = 0; + // for (int k = 0; k < + // nsupports; ++k) { + // + // scaled_vandermonde_matrix(j, + // k) = 0; + // } }); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), [=](int j) { for (int k = 0; k < basis_size; ++k) { - V(j, k) = 0; + vandermonde_matrix(j, k) = 0; } - SupportValues(j) = 0; - result(j) = 0; - Phi(j) = 0; + support_values(j) = 0; + phi_vector(j) = 0; }); + // evaluates the basis vector of a given target point + Coord target_point; target_point.x = target_coordinates[i * dim]; target_point.y = target_coordinates[i * dim + 1]; @@ -179,76 +176,88 @@ Write mls_interpolation(const Reals source_values, if (dim == 3) { target_point.z = target_coordinates[i * dim + 2]; } - BasisPoly(targetMonomialVec, slice_length, target_point); + BasisPoly(target_basis_vector, slice_length, target_point); + // VandermondeMatrix vandermonde_matrix(nsupports, basis_size) + // vandermonde Matrix is created with the basis vector + // of source supports stacking on top of each other + // Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - VandermondeMatrix(V, local_source_points, j, slice_length); + CreateVandermondeMatrix(vandermonde_matrix, local_source_points, j, + slice_length); }); team.team_barrier(); + // PhiVector(nsupports) is the array of rbf functions evaluated at the + // source supports In the actual implementation, Phi(nsupports, nsupports) + // is the diagonal matrix & each diagonal element is the phi evalauted at + // each source points Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { OMEGA_H_CHECK_PRINTF( radii2[i] > 0, "ERROR: radius2 has to be positive but found to be %.16f\n", radii2[i]); - PhiVector(Phi, target_point, local_source_points, j, radii2[i], + PhiVector(phi_vector, target_point, local_source_points, j, radii2[i], rbf_func); }); - // sum phi - double sum_phi = 0; - Kokkos::parallel_reduce( - Kokkos::TeamThreadRange(team, nsupports), - [=](const int j, double& lsum) { lsum += Phi(j); }, sum_phi); - OMEGA_H_CHECK_PRINTF(!std::isnan(sum_phi), - "ERROR: sum_phi is NaN for i=%d\n", i); - OMEGA_H_CHECK_PRINTF(sum_phi != 0, "ERROR: sum_phi is zero for i=%d\n", - i); - - // normalize phi with sum_phi - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - OMEGA_H_CHECK_PRINTF( - !std::isnan(Phi(j)), - "ERROR: Phi(j) is NaN before normalization for j = %d\n", j); - Phi(j) = Phi(j) / sum_phi; - }); - + // // sum phi + // double sum_phi = 0; + // Kokkos::parallel_reduce( + // Kokkos::TeamThreadRange(team, nsupports), + // [=](const int j, double& lsum) { lsum += Phi(j); }, sum_phi); + // OMEGA_H_CHECK_PRINTF(!std::isnan(sum_phi), + // "ERROR: sum_phi is NaN for i=%d\n", i); + // OMEGA_H_CHECK_PRINTF(sum_phi != 0, "ERROR: sum_phi is zero for + // i=%d\n", + // i); + // + // // normalize phi with sum_phi + // Kokkos::parallel_for( + // Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + // OMEGA_H_CHECK_PRINTF( + // !std::isnan(Phi(j)), + // "ERROR: Phi(j) is NaN before normalization for j = %d\n", + // j); + // Phi(j) = Phi(j) / sum_phi; + // }); + // team.team_barrier(); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { PTphiMatrix(Ptphi, V, Phi, j); }); + // support_values(nsupports) (or known rhs vector b) is the vector of the + // quantity that we want interpolate - team.team_barrier(); + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { + support_values(i) = + source_values[support.supports_idx[start_ptr + i]]; + OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(i)), + "ERROR: NaN found: at support %d\n", i); + }); - MatMatMul(team, moment_matrix, Ptphi, V); team.team_barrier(); - inverse_matrix(team, moment_matrix, lower, forward_matrix, inv_mat); + auto result = + ConvertNormalEq(vandermonde_matrix, phi_vector, support_values, team); team.team_barrier(); - MatMatMul(team, resultant_matrix, inv_mat, Ptphi); - team.team_barrier(); + // It stores the solution in rhs vector itself + + SolveMatrix(result.square_matrix, result.transformed_rhs, team); - MatVecMul(team, targetMonomialVec, resultant_matrix, result); team.team_barrier(); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { - SupportValues(i) = source_values[support.supports_idx[start_ptr + i]]; - OMEGA_H_CHECK_PRINTF(!std::isnan(SupportValues(i)), - "ERROR: NaN found: at support %d\n", i); - }); + double target_value = 0; + dot_product(team, result.transformed_rhs, target_basis_vector, + target_value); - double tgt_value = 0; - dot_product(team, result, SupportValues, tgt_value); if (team.team_rank() == 0) { - OMEGA_H_CHECK_PRINTF(!std::isnan(tgt_value), "Nan at %d\n", i); - approx_target_values[i] = tgt_value; + OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", i); + approx_target_values[i] = target_value; } }); From 10f0b5103bfee117d2acc0bc1d7bac5b36cae1ca Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 3 Dec 2024 17:48:20 -0500 Subject: [PATCH 42/66] add radial basis function choices for user --- src/pcms/interpolator/MLS_rbf_options.cpp | 2 +- src/pcms/interpolator/MLS_rbf_options.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pcms/interpolator/MLS_rbf_options.cpp b/src/pcms/interpolator/MLS_rbf_options.cpp index 527506af..deab7811 100644 --- a/src/pcms/interpolator/MLS_rbf_options.cpp +++ b/src/pcms/interpolator/MLS_rbf_options.cpp @@ -127,7 +127,7 @@ Write mls_interpolation(const Reals source_values, support, dim, degree, radii2, RBF_C4{}); break; - case RadialBasisFunction::RBF_CONSTANT: + case RadialBasisFunction::RBF_CONST: interpolated_values = mls_interpolation(source_values, source_coordinates, target_coordinates, support, dim, degree, radii2, RBF_CONST{}); diff --git a/src/pcms/interpolator/MLS_rbf_options.hpp b/src/pcms/interpolator/MLS_rbf_options.hpp index 73661b8d..ae04fe94 100644 --- a/src/pcms/interpolator/MLS_rbf_options.hpp +++ b/src/pcms/interpolator/MLS_rbf_options.hpp @@ -9,7 +9,7 @@ enum class RadialBasisFunction : LO { RBF_GAUSSIAN = 0, RBF_C4, - RBF_CONSTANT + RBF_CONST }; From 8e8e7fb26d12125497929009778e5e8f57363a34 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 3 Dec 2024 17:49:59 -0500 Subject: [PATCH 43/66] added and updated test cases --- test/CMakeLists.txt | 1 + test/test_linear_solver.cpp | 230 +++++++++++++++++++ test/test_mls_basis.cpp | 437 ++++++++++++++++++++++-------------- test/test_rbf_interp.cpp | 275 ++++++++--------------- 4 files changed, 588 insertions(+), 355 deletions(-) create mode 100644 test/test_linear_solver.cpp diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index c2a539ce..4c3e249b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -194,6 +194,7 @@ if(Catch2_FOUND) test_point_search.cpp test_mls_basis.cpp test_rbf_interp.cpp + test_linear_solver.cpp ) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp new file mode 100644 index 00000000..601655f9 --- /dev/null +++ b/test/test_linear_solver.cpp @@ -0,0 +1,230 @@ +#include +#include +#include +#include +#include +#include +#include + +TEST_CASE("solver test") +{ + // This test computes A^TQA and A^TQ b for the normal equation A^TQA x = A^TQ + // b; where A(n,m) (n >= m) is a rectangular matrix, Q(m,m) is a diagonal + // matrix with diagonal elements stored as vector and b(n,1) is a vector + SECTION("check convert normal equation ") + { + + int nvertices_target = 2; + + int nsupports = 3; + + int size = 2; + + Kokkos::View Q("phi vector", nvertices_target, nsupports); + + Kokkos::View b("rhs vector", nvertices_target, nsupports); + + Kokkos::View A("vandermonde matrix", nvertices_target, nsupports, + size); + + Kokkos::View TransAQA("moment matrix", nvertices_target, size, + size); + + Kokkos::View TransAQ("moment matrix", nvertices_target, size, + nsupports); + + Kokkos::View TransAQb("transformed rhs", nvertices_target, size); + + Kokkos::View solution("transformed rhs", nvertices_target, size); + + auto host_Q = Kokkos::create_mirror_view(Q); + auto host_A = Kokkos::create_mirror_view(A); + auto host_b = Kokkos::create_mirror_view(b); + + host_Q(0, 0) = 1.0; + host_Q(0, 1) = 1.0; + host_Q(0, 2) = 1.0; + + host_Q(1, 0) = 3.0; + host_Q(1, 1) = 2.0; + host_Q(1, 2) = 1.0; + + Kokkos::deep_copy(Q, host_Q); + + host_A(0, 0, 0) = 1.0; + host_A(0, 0, 1) = 2.0; + host_A(0, 1, 0) = 3.0; + host_A(0, 1, 1) = 4.0; + host_A(0, 2, 0) = 5.0; + host_A(0, 2, 1) = 6.0; + + host_A(1, 0, 0) = 2.0; + host_A(1, 0, 1) = 3.0; + host_A(1, 1, 0) = 1.0; + host_A(1, 1, 1) = -1.0; + host_A(1, 2, 0) = 4.0; + host_A(1, 2, 1) = 1.0; + + Kokkos::deep_copy(A, host_A); + + host_b(0, 0) = 7.0; + host_b(0, 1) = 8.0; + host_b(0, 2) = 9.0; + + host_b(1, 0) = 5.0; + host_b(1, 1) = 6.0; + host_b(1, 2) = 7.0; + + Kokkos::deep_copy(b, host_b); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for( + "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + size); + ScratchVecView phi(team.team_scratch(0), nsupports); + ScratchVecView support_values(team.team_scratch(0), nsupports); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + phi(j) = Q(i, j); + support_values(j) = b(i, j); + for (int k = 0; k < size; ++k) { + vandermonde_matrix(j, k) = A(i, j, k); + } + }); + + team.team_barrier(); + + auto result = + ConvertNormalEq(vandermonde_matrix, phi, support_values, team); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { + TransAQb(i, j) = result.transformed_rhs(j); + for (int k = 0; k < size; ++k) { + TransAQA(i, j, k) = result.square_matrix(j, k); + } + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + for (int k = 0; k < size; ++k) { + TransAQ(i, k, j) = result.scaled_matrix(k, j); + } + }); + + team.team_barrier(); + + SolveMatrix(result.square_matrix, result.transformed_rhs, team); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { + solution(i, j) = result.transformed_rhs(j); + }); + + team.team_barrier(); + }); + + auto host_result_lhs = Kokkos::create_mirror_view(TransAQA); + auto host_result_rhs = Kokkos::create_mirror_view(TransAQb); + auto host_result_scaled = Kokkos::create_mirror_view(TransAQ); + auto host_result_solution = Kokkos::create_mirror_view(solution); + + Kokkos::deep_copy(host_result_lhs, TransAQA); + Kokkos::deep_copy(host_result_rhs, TransAQb); + Kokkos::deep_copy(host_result_scaled, TransAQ); + Kokkos::deep_copy(host_result_solution, solution); + + Kokkos::View expected_lhs_matrix( + "expected lhs matrix", nvertices_target, size, size); + + Kokkos::View expected_rhs_vector( + "expected rhs vector", nvertices_target, size); + + Kokkos::View expected_scaled_matrix( + "expected scaled matrix", nvertices_target, size, nsupports); + + Kokkos::View expected_solution( + "expected solution", nvertices_target, size); + + expected_lhs_matrix(0, 0, 0) = 35.0; + expected_lhs_matrix(0, 0, 1) = 44.0; + expected_lhs_matrix(0, 1, 0) = 44.0; + expected_lhs_matrix(0, 1, 1) = 56.0; + + expected_rhs_vector(0, 0) = 76.0; + expected_rhs_vector(0, 1) = 100.0; + + expected_scaled_matrix(0, 0, 0) = 1.0; + expected_scaled_matrix(0, 0, 1) = 3.0; + expected_scaled_matrix(0, 0, 2) = 5.0; + expected_scaled_matrix(0, 1, 0) = 2.0; + expected_scaled_matrix(0, 1, 1) = 4.0; + expected_scaled_matrix(0, 1, 2) = 6.0; + + expected_solution(0, 0) = -6.0; + expected_solution(0, 1) = 6.5; + + expected_lhs_matrix(1, 0, 0) = 30.0; + expected_lhs_matrix(1, 0, 1) = 20.0; + expected_lhs_matrix(1, 1, 0) = 20.0; + expected_lhs_matrix(1, 1, 1) = 30.0; + + expected_rhs_vector(1, 0) = 70.0; + expected_rhs_vector(1, 1) = 40.0; + + expected_scaled_matrix(1, 0, 0) = 6.0; + expected_scaled_matrix(1, 0, 1) = 2.0; + expected_scaled_matrix(1, 0, 2) = 4.0; + expected_scaled_matrix(1, 1, 0) = 9.0; + expected_scaled_matrix(1, 1, 1) = -2.0; + expected_scaled_matrix(1, 1, 2) = 1.0; + + expected_solution(1, 0) = 2.6; + expected_solution(1, 1) = -0.4; + + for (int i = 0; i < nvertices_target; ++i) { + for (int j = 0; j < size; ++j) { + for (int l = 0; l < nsupports; ++l) { + printf("scaled_matrix (A^T Q) (%f, %f) at (%d,%d,%d)\n", + expected_scaled_matrix(i, j, l), host_result_scaled(i, j, l), + i, j, l); + REQUIRE_THAT( + expected_scaled_matrix(i, j, l), + Catch::Matchers::WithinAbs(host_result_scaled(i, j, l), 1E-10)); + } + } + for (int j = 0; j < size; ++j) { + for (int k = 0; k < size; ++k) { + printf("A^T Q A (%f, %f) at (%d,%d,%d)\n", + expected_lhs_matrix(i, j, k), host_result_lhs(i, j, k), i, j, + k); + REQUIRE_THAT( + expected_lhs_matrix(i, j, k), + Catch::Matchers::WithinAbs(host_result_lhs(i, j, k), 1E-10)); + } + } + for (int j = 0; j < size; ++j) { + printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_rhs_vector(i, j), + host_result_rhs(i, j), i, j); + REQUIRE_THAT(expected_rhs_vector(i, j), + Catch::Matchers::WithinAbs(host_result_rhs(i, j), 1E-10)); + } + + for (int j = 0; j < size; ++j) { + printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_solution(i, j), + host_result_solution(i, j), i, j); + REQUIRE_THAT( + expected_solution(i, j), + Catch::Matchers::WithinAbs(host_result_solution(i, j), 1E-10)); + } + } + } +} diff --git a/test/test_mls_basis.cpp b/test/test_mls_basis.cpp index 2e009cbb..33bb9301 100644 --- a/test/test_mls_basis.cpp +++ b/test/test_mls_basis.cpp @@ -6,181 +6,274 @@ TEST_CASE("basis test") { - SECTION("check basis slice lengths, size of the basis vector and basis vector for degree = 3 and dim = 3") - { - - const int dim = 3; - const int degree = 3; - Kokkos::View array("array", degree, dim); - Kokkos::deep_copy(array,0); - basisSliceLengths(array); - auto size = basisSize(array); - - int expected[degree][dim] = { - {1, 1, 1}, - {1, 2, 3}, - {1, 3, 6} - }; - - for (int i = 0; i < degree; ++i) { - for (int j = 0; j < dim; ++j) { - REQUIRE(array(i, j) == expected[i][j]); - } - } - - REQUIRE(size == 20); - - - MatViewType d_array("array in device", degree, dim); - auto array_hd = Kokkos::create_mirror_view(d_array); - Kokkos::deep_copy(array_hd,array); - Kokkos::deep_copy(d_array, array_hd); - - int nvertices_target = 2; - Omega_h::Matrix<3, 2> coords{{2.0,3.0,4.0}, {0.4, 0.5, 0.2}}; - Kokkos::View results("stores results", nvertices_target, size); - - auto host_results = Kokkos::create_mirror_view(results); - - team_policy tp(nvertices_target, Kokkos::AUTO); - Kokkos::parallel_for("inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team){ - int i = team.league_rank(); - ScratchVecView basis_vector(team.team_scratch(0), size); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,size),[=](int j){ - basis_vector(j) = 0; - }); - - Coord target_point; - target_point.x = coords[i][0]; - target_point.y = coords[i][1]; - target_point.z = coords[i][2]; - - BasisPoly(basis_vector, d_array, target_point); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,size),[=](int j){ - results(i,j) = basis_vector(j); - }); - - - }); - - Kokkos::deep_copy(host_results, results); - Kokkos::View expected_basis("expected vector", size); - std::vector names = {"1", "x", "y", "z", "x^2", "xy", "y^2", "xz", "yz", "z^2", "x^3", "x^2y", "xy^2", "Y^3", "x^2z", "xyz", "y^2z", "xz^2", "yz^2", "z^3"}; - - for (int i = 0; i < nvertices_target; ++i){ - double x = coords[i][0]; - double y = coords[i][1]; - double z = coords[i][2]; - expected_basis(0) = 1; - expected_basis(1) = x; // (x) - expected_basis(2) = y; // (y) - expected_basis(3) = z; // (z) - expected_basis(4) = x * x; // (x^2) - expected_basis(5) = x * y; // (xy) - expected_basis(6) = y * y; // (y^2) - expected_basis(7) = x * z; // (xz) - expected_basis(8) = y * z; // (yz) - expected_basis(9) = z * z; // (z^2) - expected_basis(10) = x * x * x; // (x^3) - expected_basis(11) = x * x * y; // (x^2y) - expected_basis(12) = x * y * y; // (xy^2) - expected_basis(13) = y * y * y; // (y^3) - expected_basis(14) = x * x * z; // (x^2z) - expected_basis(15) = x * y * z; // (xyz) - expected_basis(16) = y * y * z; // (y^2z) - expected_basis(17) = x * z * z; // (xz^2) - expected_basis(18) = y * z * z; // (yz^2) - expected_basis(19) = z * z * z; // (z^3) - for (int j = 0; j < size; ++j){ - std::cout< array("array", degree, dim); + Kokkos::deep_copy(array, 0); + basisSliceLengths(array); + auto size = basisSize(array); + + int expected[degree][dim] = {{1, 1, 1}, {1, 2, 3}, {1, 3, 6}}; + + for (int i = 0; i < degree; ++i) { + for (int j = 0; j < dim; ++j) { + REQUIRE(array(i, j) == expected[i][j]); + } + } + + REQUIRE(size == 20); + + MatViewType d_array("array in device", degree, dim); + auto array_hd = Kokkos::create_mirror_view(d_array); + Kokkos::deep_copy(array_hd, array); + Kokkos::deep_copy(d_array, array_hd); + + int nvertices_target = 2; + Omega_h::Matrix<3, 2> coords{{2.0, 3.0, 4.0}, {0.4, 0.5, 0.2}}; + Kokkos::View results("stores results", nvertices_target, size); + + auto host_results = Kokkos::create_mirror_view(results); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for( + "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + ScratchVecView basis_vector(team.team_scratch(0), size); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { basis_vector(j) = 0; }); + + Coord target_point; + target_point.x = coords[i][0]; + target_point.y = coords[i][1]; + target_point.z = coords[i][2]; + + BasisPoly(basis_vector, d_array, target_point); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { results(i, j) = basis_vector(j); }); + }); + + Kokkos::deep_copy(host_results, results); + Kokkos::View expected_basis("expected vector", + size); + std::vector names = {"1", "x", "y", "z", "x^2", + "xy", "y^2", "xz", "yz", "z^2", + "x^3", "x^2y", "xy^2", "Y^3", "x^2z", + "xyz", "y^2z", "xz^2", "yz^2", "z^3"}; + + for (int i = 0; i < nvertices_target; ++i) { + double x = coords[i][0]; + double y = coords[i][1]; + double z = coords[i][2]; + expected_basis(0) = 1; + expected_basis(1) = x; // (x) + expected_basis(2) = y; // (y) + expected_basis(3) = z; // (z) + expected_basis(4) = x * x; // (x^2) + expected_basis(5) = x * y; // (xy) + expected_basis(6) = y * y; // (y^2) + expected_basis(7) = x * z; // (xz) + expected_basis(8) = y * z; // (yz) + expected_basis(9) = z * z; // (z^2) + expected_basis(10) = x * x * x; // (x^3) + expected_basis(11) = x * x * y; // (x^2y) + expected_basis(12) = x * y * y; // (xy^2) + expected_basis(13) = y * y * y; // (y^3) + expected_basis(14) = x * x * z; // (x^2z) + expected_basis(15) = x * y * z; // (xyz) + expected_basis(16) = y * y * z; // (y^2z) + expected_basis(17) = x * z * z; // (xz^2) + expected_basis(18) = y * z * z; // (yz^2) + expected_basis(19) = z * z * z; // (z^3) + for (int j = 0; j < size; ++j) { + std::cout << names[j] << " " << expected_basis(j) << " " + << host_results(i, j) << "\n"; + REQUIRE(expected_basis(j) == host_results(i, j)); + } + } + } + + SECTION("check basis slice lengths, size of the basis vector and basis " + "vector for degree = 1 and dim = 3") + { + + const int dim = 3; + const int degree = 1; + Kokkos::View array("array", degree, dim); + Kokkos::deep_copy(array, 0); + basisSliceLengths(array); + auto size = basisSize(array); - SECTION("check basis slice lengths, size of the basis vector and basis vector for degree = 1 and dim = 3") - { - - const int dim = 3; - const int degree = 1; - Kokkos::View array("array", degree, dim); - Kokkos::deep_copy(array,0); - basisSliceLengths(array); - - auto size = basisSize(array); - - int expected[degree][dim] = { - {1, 1, 1}, - }; - - for (int i = 0; i < degree; ++i) { - for (int j = 0; j < dim; ++j) { - REQUIRE(array(i, j) == expected[i][j]); - } - } - - REQUIRE(size == 4); - - - MatViewType d_array("array in device", degree, dim); - auto array_hd = Kokkos::create_mirror_view(d_array); - Kokkos::deep_copy(array_hd,array); - Kokkos::deep_copy(d_array, array_hd); - - int nvertices_target = 2; - Omega_h::Matrix<3, 2> coords{{2.0,3.0,4.0}, {0.4, 0.5, 0.2}}; - Kokkos::View results("stores results", nvertices_target, size); - - auto host_results = Kokkos::create_mirror_view(results); - - team_policy tp(nvertices_target, Kokkos::AUTO); - Kokkos::parallel_for("inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), - KOKKOS_LAMBDA(const member_type& team){ - int i = team.league_rank(); - ScratchVecView basis_vector(team.team_scratch(0), size); - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,size),[=](int j){ - basis_vector(j) = 0; - }); - - Coord target_point; - target_point.x = coords[i][0]; - target_point.y = coords[i][1]; - target_point.z = coords[i][2]; - - BasisPoly(basis_vector, d_array, target_point); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team,size),[=](int j){ - results(i,j) = basis_vector(j); - }); - - - }); - - Kokkos::deep_copy(host_results, results); - Kokkos::View expected_basis("expected vector", size); - std::vector names = {"1", "x", "y", "z"}; - - for (int i = 0; i < nvertices_target; ++i){ - double x = coords[i][0]; - double y = coords[i][1]; - double z = coords[i][2]; - expected_basis(0) = 1; - expected_basis(1) = x; // (x) - expected_basis(2) = y; // (y) - expected_basis(3) = z; // (z) - for (int j = 0; j < size; ++j){ - std::cout< coords{{2.0, 3.0, 4.0}, {0.4, 0.5, 0.2}}; + Kokkos::View results("stores results", nvertices_target, size); + + auto host_results = Kokkos::create_mirror_view(results); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for( + "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + ScratchVecView basis_vector(team.team_scratch(0), size); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { basis_vector(j) = 0; }); + + Coord target_point; + target_point.x = coords[i][0]; + target_point.y = coords[i][1]; + target_point.z = coords[i][2]; + + BasisPoly(basis_vector, d_array, target_point); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { results(i, j) = basis_vector(j); }); + }); + + Kokkos::deep_copy(host_results, results); + Kokkos::View expected_basis("expected vector", + size); + std::vector names = {"1", "x", "y", "z"}; + + for (int i = 0; i < nvertices_target; ++i) { + double x = coords[i][0]; + double y = coords[i][1]; + double z = coords[i][2]; + expected_basis(0) = 1; + expected_basis(1) = x; // (x) + expected_basis(2) = y; // (y) + expected_basis(3) = z; // (z) + for (int j = 0; j < size; ++j) { + std::cout << names[j] << " " << expected_basis(j) << " " + << host_results(i, j) << "\n"; + REQUIRE(expected_basis(j) == host_results(i, j)); + } + } + } + + SECTION("check vandermonde matrix") + { + + const int dim = 1; + const int degree = 3; + + Kokkos::View host_slice_length("array", degree, + dim); + Kokkos::deep_copy(host_slice_length, 0); + basisSliceLengths(host_slice_length); + + MatViewType slice_length("slice array in device", degree, dim); + auto slice_length_hd = Kokkos::create_mirror_view(slice_length); + Kokkos::deep_copy(slice_length_hd, host_slice_length); + Kokkos::deep_copy(slice_length, slice_length_hd); + auto size = basisSize(host_slice_length); + int nvertices_target = 2; + int nsupports = 5; + Kokkos::View points("vandermonde matrix", nvertices_target, + nsupports); + + Kokkos::View vandermonde_matrix_combined( + "stores vandermonde matrix", nvertices_target, nsupports, size); + + auto host_points = Kokkos::create_mirror_view(points); + auto host_vandermonde_matrix_combined = + Kokkos::create_mirror_view(vandermonde_matrix_combined); + + host_points(0, 1) = 0.5; + host_points(0, 2) = 0.1; + host_points(0, 3) = 0.3; + host_points(0, 4) = 0.2; + host_points(0, 5) = 0.6; + + host_points(1, 1) = 1.1; + host_points(1, 2) = 2.6; + host_points(1, 3) = 0.8; + host_points(1, 4) = 0.4; + host_points(1, 5) = 1.7; + + Kokkos::deep_copy(points, host_points); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for( + "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + size); + + ScratchMatView local_source_points(team.team_scratch(0), nsupports, 1); + for (int j = 0; j < nsupports; ++j) { + local_source_points(j, 0) = points(i, j); + } + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + for (int k = 0; k < size; ++k) { + vandermonde_matrix(j, k) = 0.0; + } + }); + + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + CreateVandermondeMatrix(vandermonde_matrix, local_source_points, j, + slice_length); + }); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + for (int k = 0; k < size; ++k) { + vandermonde_matrix_combined(i, j, k) = vandermonde_matrix(j, k); + } + }); + }); + + Kokkos::deep_copy(host_vandermonde_matrix_combined, + vandermonde_matrix_combined); + Kokkos::View expected_vandermonde_matrix( + "expected vector", nvertices_target, nsupports, size); + + for (int i = 0; i < nvertices_target; ++i) { + for (int j = 0; j < nsupports; ++j) { + double x = host_points(i, j); + expected_vandermonde_matrix(i, j, 0) = 1; + expected_vandermonde_matrix(i, j, 1) = x; // (x) + expected_vandermonde_matrix(i, j, 2) = x * x; // (x^2) + expected_vandermonde_matrix(i, j, 3) = x * x * x; // (x^3) + } + } + for (int i = 0; i < nvertices_target; ++i) { + for (int j = 0; j < nsupports; ++j) { + for (int k = 0; k < size; ++k) { + REQUIRE(expected_vandermonde_matrix(i, j, k) == + host_vandermonde_matrix_combined(i, j, k)); + } + } + } + } +} diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index c82879e5..51661285 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -8,6 +8,8 @@ #include #include #include +#include +#include using namespace std; using namespace Omega_h; @@ -17,64 +19,57 @@ double func(Coord& p, int degree) { [[maybe_unused]] auto x = p.x; [[maybe_unused]] auto y = p.y; - if (degree = 0) { - double Z = 3; + if (degree == 0) { + return 3; + } else if (degree == 1) { + return x + y; + } else if (degree == 2) { + return pow(x, 2) + pow(y, 2); + } else if (degree == 3) { + + return pow(x, 3) + pow(y, 3); + } else { + printf("No polynomials with degree = %d\n", degree); } - elseif(degree = 1) - { - double Z = x + y; - } - elseif(degree = 2) - { - double Z = pow(x, 2) + pow(y, 2); - } - elseif(degree = 3) - { - - double Z = pow(x, 3) + pow(y, 3); - } - else printf("No polynomials with degree = %f\n", degree) -} -return Z; + return -1; } -void test(Mesh& mesh, Real cutoffdistance, int degree, LO min_num_Supports, - RadialBasisFunction bf) +void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, + Reals source_values, Reals exact_target_values, + Reals source_coordinates, Reals target_coordinates) { - Write source_values(nfaces, 0, "exact target values"); - Kokkos::parallel_for( - nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func(source_points.coordinates(i), degree); - }); + int dim = mesh.dim(); + Real tolerance = 0.0005; - SupportResults support = - searchNeighbors(mesh, cutoffDistance, min_num_supports); + std::vector rbf_types = { + RadialBasisFunction::RBF_GAUSSIAN, RadialBasisFunction::RBF_C4, + RadialBasisFunction::RBF_CONST - auto approx_target_values = mls_interpolation( - source_values, source_coordinates, target_coordinates, support, dim, degree, - support.radii2, RadialBasisFunction::RBF_C4); + }; - auto host_approx_target_values = HostRead(approx_target_values); + SupportResults support = + searchNeighbors(mesh, cutoffDistance, min_num_supports); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); + for (const auto& rbf : rbf_types) { + auto approx_target_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, support.radii2, rbf); - Kokkos::parallel_for( - mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_linear(target_points.coordinates(i)); - }); + auto host_approx_target_values = HostRead(approx_target_values); - auto host_exact_target_values = HostRead(exact_target_values); + auto host_exact_target_values = HostRead(exact_target_values); - int m = exact_target_values.size(); - int n = approx_target_values.size(); + int m = exact_target_values.size(); + int n = approx_target_values.size(); - REQUIRE(m == n); + REQUIRE(m == n); - for (size_t i = 0; i < m; ++i) { - CHECK_THAT( - host_exact_target_values[i], - Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + for (size_t i = 0; i < m; ++i) { + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } } } // Test cases for centroid to node mapping using MLS @@ -87,7 +82,6 @@ TEST_CASE("mls_interp_test") auto mesh = build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false); Real cutoffDistance = 0.3; - Real tolerance = 0.05; cutoffDistance = cutoffDistance * cutoffDistance; const auto dim = mesh.dim(); @@ -100,7 +94,7 @@ TEST_CASE("mls_interp_test") Write radii2( ntargets, cutoffDistance, - "populate initial square of cutoffdistance to all target points"); + "populate initial square of cutoffDistance to all target points"); Write source_coordinates( dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); @@ -148,40 +142,45 @@ TEST_CASE("mls_interp_test") Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_const(source_points.coordinates(i)); + source_values[i] = func(source_points.coordinates(i), degree - 1); }); - SupportResults support = - searchNeighbors(mesh, cutoffDistance, min_num_supports); - - auto approx_target_values = mls_interpolation( - source_values, source_coordinates, target_coordinates, support, dim, - degree, support.radii2, RadialBasisFunction::RBF_C4); - - auto host_approx_target_values = HostRead(approx_target_values); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_const(target_points.coordinates(i)); + exact_target_values[i] = func(target_points.coordinates(i), degree - 1); }); - auto host_exact_target_values = HostRead(exact_target_values); + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); + } - int m = exact_target_values.size(); - int n = approx_target_values.size(); + SECTION("test interpolation degree 1, function degree 1") + { - REQUIRE(m == n); + int degree = 1; + LO min_num_supports = 10; - for (size_t i = 0; i < m; ++i) { - CHECK_THAT( - host_exact_target_values[i], - Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } - } + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), degree); + }); + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); - SECTION("test interpolation degree 1, function degree 1") {} + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func(target_points.coordinates(i), degree); + }); + + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); + } SECTION("test interpo degree 2 poly degree 0") { @@ -193,37 +192,19 @@ TEST_CASE("mls_interp_test") Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_const(source_points.coordinates(i)); + source_values[i] = func(source_points.coordinates(i), degree - 2); }); - SupportResults support = - searchNeighbors(mesh, cutoffDistance, min_num_supports); - - auto approx_target_values = mls_interpolation( - source_values, source_coordinates, target_coordinates, support, dim, - degree, support.radii2, RadialBasisFunction::RBF_C4); - - auto host_approx_target_values = HostRead(approx_target_values); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_const(target_points.coordinates(i)); + exact_target_values[i] = func(target_points.coordinates(i), degree - 2); }); - auto host_exact_target_values = HostRead(exact_target_values); - - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT( - host_exact_target_values[i], - Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); } SECTION("test interpolation degree 2 poly degree 1") @@ -236,37 +217,19 @@ TEST_CASE("mls_interp_test") Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_linear(source_points.coordinates(i)); + source_values[i] = func(source_points.coordinates(i), degree - 1); }); - SupportResults support = - searchNeighbors(mesh, cutoffDistance, min_num_supports); - - auto approx_target_values = mls_interpolation( - source_values, source_coordinates, target_coordinates, support, dim, - degree, support.radii2, RadialBasisFunction::RBF_C4); - - auto host_approx_target_values = HostRead(approx_target_values); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_linear(target_points.coordinates(i)); + exact_target_values[i] = func(target_points.coordinates(i), degree - 1); }); - auto host_exact_target_values = HostRead(exact_target_values); - - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT( - host_exact_target_values[i], - Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); } SECTION("test interpolation degree 2, function degree 2") @@ -279,37 +242,19 @@ TEST_CASE("mls_interp_test") Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_quadratic(source_points.coordinates(i)); + source_values[i] = func(source_points.coordinates(i), degree); }); - SupportResults support = - searchNeighbors(mesh, cutoffDistance, min_num_supports); - - auto approx_target_values = mls_interpolation( - source_values, source_coordinates, target_coordinates, support, dim, - degree, support.radii2, RadialBasisFunction::RBF_C4); - - auto host_approx_target_values = HostRead(approx_target_values); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_quadratic(target_points.coordinates(i)); + exact_target_values[i] = func(target_points.coordinates(i), degree); }); - auto host_exact_target_values = HostRead(exact_target_values); - - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT( - host_exact_target_values[i], - Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); } SECTION("test interpolation degree 3, function degree 2") @@ -322,37 +267,19 @@ TEST_CASE("mls_interp_test") Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_quadratic(source_points.coordinates(i)); + source_values[i] = func(source_points.coordinates(i), degree - 1); }); - SupportResults support = - searchNeighbors(mesh, cutoffDistance, min_num_supports); - - auto approx_target_values = mls_interpolation( - source_values, source_coordinates, target_coordinates, support, dim, - degree, support.radii2, RadialBasisFunction::RBF_C4); - - auto host_approx_target_values = HostRead(approx_target_values); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_quadratic(target_points.coordinates(i)); + exact_target_values[i] = func(target_points.coordinates(i), degree - 1); }); - auto host_exact_target_values = HostRead(exact_target_values); - - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT( - host_exact_target_values[i], - Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); } SECTION("test interpolation degree 3, function degree 3") @@ -365,36 +292,18 @@ TEST_CASE("mls_interp_test") Kokkos::parallel_for( nfaces, KOKKOS_LAMBDA(int i) { - source_values[i] = func_cubic(source_points.coordinates(i)); + source_values[i] = func(source_points.coordinates(i), degree); }); - SupportResults support = - searchNeighbors(mesh, cutoffDistance, min_num_supports); - - auto approx_target_values = mls_interpolation( - source_values, source_coordinates, target_coordinates, support, dim, - degree, support.radii2, RadialBasisFunction::RBF_C4); - - auto host_approx_target_values = HostRead(approx_target_values); - Write exact_target_values(mesh.nverts(), 0, "exact target values"); Kokkos::parallel_for( mesh.nverts(), KOKKOS_LAMBDA(int i) { - exact_target_values[i] = func_cubic(target_points.coordinates(i)); + exact_target_values[i] = func(target_points.coordinates(i), degree); }); - auto host_exact_target_values = HostRead(exact_target_values); - - int m = exact_target_values.size(); - int n = approx_target_values.size(); - - REQUIRE(m == n); - - for (size_t i = 0; i < m; ++i) { - CHECK_THAT( - host_exact_target_values[i], - Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); - } + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); } } From 90b52183094a78bd79d29f1bfce3f831eef800f3 Mon Sep 17 00:00:00 2001 From: Jacob Merson Date: Wed, 18 Dec 2024 03:26:44 -0500 Subject: [PATCH 44/66] make functions inline to compile This is a workaround for multiple definition errors. Functions that are not templated should be moved to the cpp files. --- src/pcms/interpolator/adj_search.hpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index 88abbd00..93363ce4 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -24,7 +24,7 @@ Real calculateDistance(const Real* p1, const Real* p2, const int dim) return dx * dx + dy * dy + dz * dz; } -void checkTargetPoints( +inline void checkTargetPoints( const Kokkos::View& results) { Kokkos::fence(); @@ -43,7 +43,7 @@ void checkTargetPoints( printf("\n"); } -void printSupportsForTarget(const LO target_id, const Write& supports_ptr, +inline void printSupportsForTarget(const LO target_id, const Write& supports_ptr, const Write& nSupports, const Write& support_idx) { @@ -84,7 +84,7 @@ class FindSupports Write& radii2, bool is_build_csr_call); }; -void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, +inline void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, Write& support_idx, Write& radii2, bool is_build_csr_call) { @@ -226,7 +226,7 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, } } -void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, +inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, Write& nSupports, Write& support_idx, Write& radii2, @@ -357,7 +357,7 @@ struct SupportResults Write radii2; // squared radii of the supports }; -SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, +inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, Real& cutoffDistance, LO min_req_support = 12, bool adapt_radius = true) { @@ -459,7 +459,7 @@ SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, return support; } -SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, +inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, LO min_support = 12, bool adapt_radius = true) { SupportResults support; From 4b3cbe1860fa89b2f991efccd9c7e01ae0c3467b Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 21 Jan 2025 14:23:01 -0500 Subject: [PATCH 45/66] removed target_include_directories --- src/pcms/interpolator/CMakeLists.txt | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 1e17e6c5..8c762ad0 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -1,6 +1,3 @@ -#cmake_minimum_required(VERSION 3.20) -#project(Interpolator) - find_package(KokkosKernels REQUIRED) set(PCMS_FIELD_TRANSFER_HEADERS @@ -29,11 +26,6 @@ add_library(pcms::interpolator ALIAS pcms_interpolator) target_link_libraries(pcms_interpolator PUBLIC pcms::core Kokkos::kokkoskernels) -target_include_directories(pcms_interpolator INTERFACE - $ - $ - $) - install(TARGETS pcms_interpolator EXPORT interpolatorTargets From ab714800e071416b705feb3af9fedc42db4a6142 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 21 Jan 2025 14:24:10 -0500 Subject: [PATCH 46/66] added comments and kokkoskernel dot product function --- src/pcms/interpolator/MLSCoefficients.hpp | 281 +++++++--------------- 1 file changed, 91 insertions(+), 190 deletions(-) diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp index c7e0394a..be41fc1b 100644 --- a/src/pcms/interpolator/MLSCoefficients.hpp +++ b/src/pcms/interpolator/MLSCoefficients.hpp @@ -1,5 +1,5 @@ -#ifndef MLS_COEFFICIENTS_HPP -#define MLS_COEFFICIENTS_HPP +#ifndef PCMS_INTERPOLATOR_MLS_COEFFICIENTS_HPP +#define PCMS_INTERPOLATOR_MLS_COEFFICIENTS_HPP #include #include @@ -13,20 +13,38 @@ #include "points.hpp" #include -#define PI_M 3.14159265358979323846 - static constexpr int MAX_DIM = 3; -KOKKOS_INLINE_FUNCTION -double func(Coord& p) -{ - auto x = (p.x - 0.5) * PI_M * 2; - auto y = (p.y - 0.5) * PI_M * 2; - double Z = sin(x) * sin(y) + 2; - return Z; -} - -// computes the slice lengths of the of the polynomial basis +/** + * basisSliceLengths, basissSize and BasisPoly are needed to evaluate the + * polynomial basis for any degree and dimension For instance, polynomial basis + * vector for dim = 2 and degree = 3 at the point A(x,y) looks like {1, x, y, + * xx, xy, yy, xxx, xxy, xyy,yyy}. The slices can be written as [1] degree 0 [x] + * & [y] degree 1 [xx] & [xy, yy] degree 2 [xxx] & [xxy, + * xyy, yyy] degree 3 + * + * the recursive pattern becomes: + * [1] degree 0 + * x*[1] & y*[1] degree 1 + * x*[x] & y*[x, y] degree 2 + * xx*[x] & y*[xx,xy,yy] degree 3 + * + * lengths of the slices: + * Degree \ Dim | x | y | + * ========================= + * 1 | 1 | 1 | + * 2 | 1 | 2 | + * 3 | 1 | 3 | + */ + +/** + * @brief computes the slice lengths of the polynomial basis + * @param array: takes kokkos view array as an input and computes the lengths of + * slices and fills the array + * + * @notes it takes a host array as an input, could have done for device + * but there was a race conditions for a device + */ KOKKOS_INLINE_FUNCTION void basisSliceLengths(Kokkos::View& array) { @@ -48,7 +66,16 @@ void basisSliceLengths(Kokkos::View& array) } } -// finds the size of the polynomial basis vector +/** + * @brief computes the size of the polynomial basis vector + * + * @param array: it is the array of the slices length + * @return sum: sum of each element of slice lengths array gives the basis + * vector size + * + * @note it takes the host array + */ + KOKKOS_INLINE_FUNCTION int basisSize(const Kokkos::View& array) { @@ -65,9 +92,22 @@ int basisSize(const Kokkos::View& array) return sum; } -// evaluates the polynomial basis +/** + * @brief evaluates the polynomial basis + * for example, if it dim = 2 and degree = 2 + * the basis_vector looks like + * basis_vector = {1,x,y,xx,xy,yy} + * + * @param basis_vector: basis_vector 1D array is initialised and passed as + * input + * @param slice_length: slice length is the 2D array that consists of the + * length of slices + * @param p: instance of an class type Coord + * + * @return basis_vector filled with polynomial basis + */ KOKKOS_INLINE_FUNCTION -void BasisPoly(ScratchVecView basis_vector, const MatViewType& slice_length, +void BasisPoly(ScratchVecView& basis_vector, const MatViewType& slice_length, Coord& p) { basis_vector(0) = 1; @@ -103,8 +143,8 @@ void BasisPoly(ScratchVecView basis_vector, const MatViewType& slice_length, // create vandermondeMatrix KOKKOS_INLINE_FUNCTION void CreateVandermondeMatrix(ScratchMatView vandermonde_matrix, - const ScratchMatView local_source_points, int j, - const MatViewType slice_length) + const ScratchMatView& local_source_points, int j, + const MatViewType& slice_length) { int N = local_source_points.extent(0); int dim = local_source_points.extent(1); @@ -145,8 +185,8 @@ KOKKOS_INLINE_FUNCTION void PhiVector(ScratchVecView Phi, // A^Tb --> A^T(n,m) b(m) KOKKOS_INLINE_FUNCTION void ScaleColumnTransMatrix(ScratchMatView result_matrix, - const ScratchMatView matrix, - const ScratchVecView vector, member_type team, + const ScratchMatView& matrix, + const ScratchVecView& vector, member_type team, int j) { @@ -156,40 +196,16 @@ void ScaleColumnTransMatrix(ScratchMatView result_matrix, ScratchVecView matrix_row = Kokkos::subview(matrix, j, Kokkos::ALL()); for (int k = 0; k < N; k++) { OMEGA_H_CHECK_PRINTF(!std::isnan(matrix_row(k)), - "ERROR: vandermonde_mat_row is NaN for k = %d\n", k); + "ERROR: given matrix is NaN for k = %d\n", k); OMEGA_H_CHECK_PRINTF(!std::isnan(vector(j)), - "ERROR: Phi(j) in PTphiMatrix is NaN for j = %d\n", j); + "ERROR: given vector is NaN for j = %d\n", j); result_matrix(k, j) = matrix_row(k) * vector(j); - OMEGA_H_CHECK_PRINTF( - !std::isnan(result_matrix(k, j)), - "ERROR: pt_phi in PTphiMatrix is NaN for k = %d, j = %d\n", k, j); + OMEGA_H_CHECK_PRINTF(!std::isnan(result_matrix(k, j)), + "ERROR: result_matrix is NaN for k = %d, j = %d\n", k, + j); } } -// KOKKOS_INLINE_FUNCTION -// void PtphiPMatrix(ScratchMatView& moment_matrix, member_type team, -// const ScratchMatView& pt_phi, -// const ScratchMatView& vandermonde) -//{ -// int M = pt_phi.extent(0); -// int N = vandermonde.extent(1); -// int K = pt_phi.extent(1); -// -// Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { -// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { -// double sum = 0.0; -// Kokkos::parallel_reduce( -// Kokkos::ThreadVectorRange(team, K), -// [=](const int k, double& lsum) { -// lsum += pt_phi(i, k) * vandermonde(k, j); -// }, -// sum); -// moment_matrix(i, j) = sum; -// }); -// }); -// } -// - // dot product KOKKOS_INLINE_FUNCTION void dot_product(member_type team, const ScratchVecView& result_sub, @@ -203,62 +219,23 @@ void dot_product(member_type team, const ScratchVecView& result_sub, "ERROR: NaN found in dot_product: N: %d\n", N); } -// matrix matrix multiplication -// KOKKOS_INLINE_FUNCTION -// void MatMatMul(member_type team, ScratchMatView& moment_matrix, -// const ScratchMatView& pt_phi, const ScratchMatView& -// vandermonde) -//{ -// int M = pt_phi.extent(0); -// int N = vandermonde.extent(1); -// int K = pt_phi.extent(1); -// -// Kokkos::parallel_for(Kokkos::ThreadVectorRange(team, M), [=](const int i) { -// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int j) { -// double sum = 0.0; -// Kokkos::parallel_reduce( -// Kokkos::ThreadVectorRange(team, K), -// [=](const int k, double& lsum) { -// OMEGA_H_CHECK_PRINTF(!std::isnan(pt_phi(i, k)), -// "ERROR: pt_phi is NaN for i = %d\n", i); -// OMEGA_H_CHECK_PRINTF(!std::isnan(vandermonde(k, j)), -// "ERROR: vandermonde is NaN for k = %d\n", k); -// lsum += pt_phi(i, k) * vandermonde(k, j); -// }, -// sum); -// OMEGA_H_CHECK_PRINTF(!std::isnan(sum), -// "ERROR: sum is NaN for i = %d, j = %d\n", i, j); -// moment_matrix(i, j) = sum; -// }); -// }); -//} -//// -// Matrix vector multiplication - -// KOKKOS_INLINE_FUNCTION -// void MatVecMul(member_type team, const ScratchVecView& vector, -// const ScratchMatView& matrix, ScratchVecView& result) -//{ -// int M = matrix.extent(0); -// int N = matrix.extent(1); -// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { -// double sum = 0; -// Kokkos::parallel_reduce( -// Kokkos::ThreadVectorRange(team, M), -// [=](const int j, double& lsum) { lsum += vector(j) * matrix(j, i); }, -// sum); -// result(i) = sum; -// OMEGA_H_CHECK_PRINTF(!std::isnan(result(i)), -// "ERROR: sum is NaN for i = %d\n", i); -// }); -// // team.team_barrier(); -// } -// - // TODO: Implement QR decomposition to solve the linear system // convert normal equation P^T Q P x = P^T Q b to Ax = b'; // A = P^T Q P & b' = P^T Q b +/** + * @struct ResultConvertNormal + * @brief Represents the results from P^T Q(scaled matrix), P^T Q P (square + * matrix) and P^T Q b (transformed rhs) + * + * scaled_matrix is the matrix obtained after scaling the column of the given + * matrix square_matrix is the matrix obtained after P^T Q P operations + * transformed_rhs is obtained after P^T b operation + * + * This class is used to store the scaled matrix, square matrix and transformed + * rhs + */ + struct ResultConvertNormal { ScratchMatView scaled_matrix; @@ -284,7 +261,6 @@ ResultConvertNormal ConvertNormalEq(const ScratchMatView matrix, result.transformed_rhs = ScratchVecView(team.team_scratch(0), n); // performing P^T Q - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) { for (int k = 0; k < m; ++k) { result.scaled_matrix(j, k) = 0; @@ -299,6 +275,7 @@ ResultConvertNormal ConvertNormalEq(const ScratchMatView matrix, }); team.team_barrier(); + // performing (P^T Q P) KokkosBatched::TeamGemm< member_type, KokkosBatched::Trans::NoTranspose, @@ -309,10 +286,6 @@ ResultConvertNormal ConvertNormalEq(const ScratchMatView matrix, team.team_barrier(); - // KokkosBlas::Experimental::TeamGemv(team, 1.0, result.scaled_matrix, rhs, - // 0.0, result.transformed_rhs); KokkosBlas::Experimental:: Gemv::invoke( team, 'N', 1.0, result.scaled_matrix, rhs, 0.0, result.transformed_rhs); @@ -322,9 +295,17 @@ ResultConvertNormal ConvertNormalEq(const ScratchMatView matrix, return result; } -/// solve A x = b using LU decomposition -// inputs rhs, overwrites rhs and returns rhs - +/** + * @brief Solves the matrix equation Ax = b + * + * This function uses LU decomposition to solve the matrix equation + * + * @param square_matrix The given matrix A (must be square) + * @param rhs The known vector b + * @return rhs The unknown vector x after solving matrix equation is overwritten + * in rhs + * + */ KOKKOS_INLINE_FUNCTION void SolveMatrix(const ScratchMatView square_matrix, const ScratchVecView rhs, member_type team) @@ -343,84 +324,4 @@ void SolveMatrix(const ScratchMatView square_matrix, const ScratchVecView rhs, KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs); } -//// moment matrix -// -//// inverse matrix -// KOKKOS_INLINE_FUNCTION -// void inverse_matrix(member_type team, const ScratchMatView& matrix, -// ScratchMatView& lower, ScratchMatView& forward_matrix, -// ScratchMatView& solution) -//{ -// int N = matrix.extent(0); -// -// for (int j = 0; j < N; ++j) { -// Kokkos::single(Kokkos::PerTeam(team), [=]() { -// double sum = 0; -// for (int k = 0; k < j; ++k) { -// sum += lower(j, k) * lower(j, k); -// } -// OMEGA_H_CHECK_PRINTF(!std::isnan(matrix(j, j)), -// "ERROR: matrix(j,j) is NaN: j = %d\n", j); -// OMEGA_H_CHECK_PRINTF( -// matrix(j, j) - sum >= -1e-10, // TODO: how to check this reliably? -// "ERROR: (matrix(j,j) - sum) is negative: mat(jj)=%.16f, sum = %.16f -// \n", matrix(j, j), sum); -// lower(j, j) = sqrt(matrix(j, j) - sum); -// OMEGA_H_CHECK_PRINTF(!std::isnan(lower(j, j)), -// "Lower in inverse_matrix is NaN (j,j) = -// (%d,%d)\n", j, j); -// }); -// -// team.team_barrier(); -// -// Kokkos::parallel_for(Kokkos::TeamVectorRange(team, j + 1, N), [=](int i) -// { -// double inner_sum = 0; -// for (int k = 0; k < j; ++k) { -// inner_sum += lower(i, k) * lower(j, k); -// } -// lower(i, j) = (matrix(i, j) - inner_sum) / lower(j, j); -// lower(j, i) = lower(i, j); -// OMEGA_H_CHECK_PRINTF(!std::isnan(lower(i, j)), -// "Lower in inverse_matrix is NaN (i,j) = -// (%d,%d)\n", i, j); -// OMEGA_H_CHECK_PRINTF(!std::isnan(lower(j, i)), -// "Lower in inverse_matrix is NaN (j,i) = -// (%d,%d)\n", j, i); -// }); -// -// team.team_barrier(); -// } -// -// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { -// forward_matrix(i, i) = 1.0 / lower(i, i); -// for (int j = i + 1; j < N; ++j) { -// forward_matrix(j, i) = 0.0; // Initialize to zero -// for (int k = 0; k < j; ++k) { -// forward_matrix(j, i) -= lower(j, k) * forward_matrix(k, i); -// } -// forward_matrix(j, i) /= lower(j, j); -// OMEGA_H_CHECK_PRINTF(!std::isnan(forward_matrix(j, i)), -// "Forward in inverse_matrix is NaN (j,i) = -// (%d,%d)\n", j, i); -// } -// }); -// -// team.team_barrier(); -// -// Kokkos::parallel_for(Kokkos::TeamThreadRange(team, N), [=](const int i) { -// solution(N - 1, i) = forward_matrix(N - 1, i) / lower(N - 1, N - 1); -// for (int j = N - 2; j >= 0; --j) { -// solution(j, i) = forward_matrix(j, i); -// for (int k = j + 1; k < N; ++k) { -// solution(j, i) -= lower(j, k) * solution(k, i); -// } -// solution(j, i) /= lower(j, j); -// OMEGA_H_CHECK_PRINTF( -// !std::isnan(solution(j, i)), -// "Solution in inverse_matrix is NaN (j,i) = (%d,%d)\n", j, i); -// } -// }); -// } -// #endif From f5e55c31385ddfd8f79138a2721760dbef3b56d0 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 21 Jan 2025 14:24:57 -0500 Subject: [PATCH 47/66] added comments --- src/pcms/interpolator/MLSInterpolation.hpp | 88 ++++++++++------------ 1 file changed, 38 insertions(+), 50 deletions(-) diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp index cedec17d..1c4ca64c 100644 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ b/src/pcms/interpolator/MLSInterpolation.hpp @@ -7,9 +7,23 @@ #include #include #include - +#include using namespace Omega_h; +/** + * @brief maps the data from source mesh to target mesh + * @param source_values Source field values from source mesh + * @param source_coordinates The coordinates of control points of source field + * @param target_coordinates The coordinates of control points of target field + * @param support The object that enpasulates support info + * @param dim The dimension of the simulations + * @param degree The degree of the interpolation order + * @param radii2 The array of the square of cutoff distance + * @param rbf_func The radial basis function choice + * @return interpolated field in target mesh + * + * + */ template Write mls_interpolation(const Reals source_values, const Reals source_coordinates, @@ -25,12 +39,6 @@ Write mls_interpolation(const Reals source_values, const auto nvertices_source = source_coordinates.size() / dim; const auto nvertices_target = target_coordinates.size() / dim; - // Kokkos::RangePolicy range_policy(1, - // nvertices_target); - - // static_assert(degree > 0," the degree of polynomial basis should be atleast - // 1"); - Kokkos::View shmem_each_team( "stores the size required for each team", nvertices_target); @@ -112,14 +120,6 @@ Write mls_interpolation(const Reals source_values, // rbf function values of source supports Phi(n,n) ScratchVecView phi_vector(team.team_scratch(0), nsupports); - // stores P^T Q - // ScratchMatView scaled_vandermonde_matrix(team.team_scratch(0), - // basis_size, nsupports); - - // stores P^T Q P - // ScratchMatView moment_matrix(team.team_scratch(0), - // basis_size,basis_size); - // stores known vector (b) ScratchVecView support_values(team.team_scratch(0), nsupports); @@ -143,7 +143,7 @@ Write mls_interpolation(const Reals source_values, Kokkos::parallel_for(Kokkos::TeamThreadRange(team, basis_size), [=](int j) { // for (int k = 0; k < - //basis_size; ++k) { + // basis_size; ++k) { // moment_matrix(j, // k) = 0; // } @@ -178,10 +178,11 @@ Write mls_interpolation(const Reals source_values, } BasisPoly(target_basis_vector, slice_length, target_point); - // VandermondeMatrix vandermonde_matrix(nsupports, basis_size) - // vandermonde Matrix is created with the basis vector - // of source supports stacking on top of each other - // + /** VandermondeMatrix vandermonde_matrix(nsupports, basis_size) + * vandermonde Matrix is created with the basis vector + * of source supports stacking on top of each other + */ + Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { CreateVandermondeMatrix(vandermonde_matrix, local_source_points, j, @@ -190,10 +191,12 @@ Write mls_interpolation(const Reals source_values, team.team_barrier(); - // PhiVector(nsupports) is the array of rbf functions evaluated at the - // source supports In the actual implementation, Phi(nsupports, nsupports) - // is the diagonal matrix & each diagonal element is the phi evalauted at - // each source points + /** PhiVector(nsupports) is the array of rbf functions evaluated at the + * source supports In the actual implementation, Phi(nsupports, nsupports) + * is the diagonal matrix & each diagonal element is the phi evaluated at + * each source points + */ + Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { OMEGA_H_CHECK_PRINTF( @@ -204,31 +207,11 @@ Write mls_interpolation(const Reals source_values, rbf_func); }); - // // sum phi - // double sum_phi = 0; - // Kokkos::parallel_reduce( - // Kokkos::TeamThreadRange(team, nsupports), - // [=](const int j, double& lsum) { lsum += Phi(j); }, sum_phi); - // OMEGA_H_CHECK_PRINTF(!std::isnan(sum_phi), - // "ERROR: sum_phi is NaN for i=%d\n", i); - // OMEGA_H_CHECK_PRINTF(sum_phi != 0, "ERROR: sum_phi is zero for - // i=%d\n", - // i); - // - // // normalize phi with sum_phi - // Kokkos::parallel_for( - // Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - // OMEGA_H_CHECK_PRINTF( - // !std::isnan(Phi(j)), - // "ERROR: Phi(j) is NaN before normalization for j = %d\n", - // j); - // Phi(j) = Phi(j) / sum_phi; - // }); - // team.team_barrier(); - // support_values(nsupports) (or known rhs vector b) is the vector of the - // quantity that we want interpolate + /** support_values(nsupports) (or known rhs vector b) is the vector of the + * quantity that we want interpolate + */ Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { @@ -251,9 +234,14 @@ Write mls_interpolation(const Reals source_values, team.team_barrier(); - double target_value = 0; - dot_product(team, result.transformed_rhs, target_basis_vector, - target_value); + // implemented the kokkoskernels dot product function + // need to verify + double target_value = KokkosBlas::Experimental::dot( + team, result.transformed_rhs, target_basis_vector); + + // double target_value = 0; + // dot_product(team, result.transformed_rhs, target_basis_vector, + // target_value); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", i); From 90bff10138f703d5c38427ecf7332edc88a25fc9 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 21 Jan 2025 14:27:21 -0500 Subject: [PATCH 48/66] consistent naming of the class --- src/pcms/interpolator/adj_search.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index 88abbd00..e3e47ff0 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -117,8 +117,8 @@ void FindSupports::adjBasedSearch(Write& supports_ptr, Write& nSupports, parallel_for( nvertices_target, OMEGA_H_LAMBDA(const LO id) { - queue queue; - track visited; + Queue queue; + Track visited; Real cutoffDistance = radii2[id]; LO source_cell_id = results(id).tri_id; @@ -262,8 +262,8 @@ void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, parallel_for( nvertices, OMEGA_H_LAMBDA(const LO id) { - queue queue; - track visited; + Queue queue; + Track visited; const LO num_verts_in_dim = dim + 1; Real target_coords[max_dim]; Real support_coords[max_dim]; From 799ff6d5835ef0220e3831dc79baf0ada87dd8f4 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 21 Jan 2025 14:29:54 -0500 Subject: [PATCH 49/66] updated to consistent naming of the class --- src/pcms/interpolator/queue_visited.hpp | 28 ++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/pcms/interpolator/queue_visited.hpp b/src/pcms/interpolator/queue_visited.hpp index daa2b716..f09d38af 100644 --- a/src/pcms/interpolator/queue_visited.hpp +++ b/src/pcms/interpolator/queue_visited.hpp @@ -12,7 +12,7 @@ using namespace std; using namespace Omega_h; -class queue +class Queue { private: LO queue_array[MAX_SIZE_QUEUE]; @@ -20,10 +20,10 @@ class queue public: OMEGA_H_INLINE - queue() {} + Queue() {} OMEGA_H_INLINE - ~queue() {} + ~Queue() {} OMEGA_H_INLINE void push_back(const int& item); @@ -41,7 +41,7 @@ class queue bool isFull() const; }; -class track +class Track { private: LO tracking_array[MAX_SIZE_TRACK]; @@ -49,10 +49,10 @@ class track public: OMEGA_H_INLINE - track() {} + Track() {} OMEGA_H_INLINE - ~track() {} + ~Track() {} OMEGA_H_INLINE void push_back(const int& item); @@ -65,7 +65,7 @@ class track }; OMEGA_H_INLINE -void queue::push_back(const int& item) +void Queue::push_back(const int& item) { if (count == MAX_SIZE_QUEUE) { printf("queue is full %d\n", count); @@ -77,7 +77,7 @@ void queue::push_back(const int& item) } OMEGA_H_INLINE -void queue::pop_front() +void Queue::pop_front() { if (count == 0) { printf("queue is empty\n"); @@ -88,25 +88,25 @@ void queue::pop_front() } OMEGA_H_INLINE -int queue::front() +int Queue::front() { return queue_array[first]; } OMEGA_H_INLINE -bool queue::isEmpty() const +bool Queue::isEmpty() const { return count == 0; } OMEGA_H_INLINE -bool queue::isFull() const +bool Queue::isFull() const { return count == MAX_SIZE_QUEUE; } OMEGA_H_INLINE -void track::push_back(const int& item) +void Track::push_back(const int& item) { if (count == MAX_SIZE_TRACK) { printf("track is full %d\n", count); @@ -118,7 +118,7 @@ void track::push_back(const int& item) } OMEGA_H_INLINE -bool track::notVisited(const int& item) +bool Track::notVisited(const int& item) { int id; for (int i = 0; i < count; ++i) { @@ -131,7 +131,7 @@ bool track::notVisited(const int& item) } OMEGA_H_INLINE -int track::size() +int Track::size() { return count; } From e63e3afb5027bf86bacf0b47549798b9b3c52ff3 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 21 Jan 2025 14:30:43 -0500 Subject: [PATCH 50/66] changed the matrix name from A to P for consistency --- test/test_linear_solver.cpp | 63 +++++++++++++++++++------------------ 1 file changed, 32 insertions(+), 31 deletions(-) diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp index 601655f9..e1ee5158 100644 --- a/test/test_linear_solver.cpp +++ b/test/test_linear_solver.cpp @@ -8,8 +8,8 @@ TEST_CASE("solver test") { - // This test computes A^TQA and A^TQ b for the normal equation A^TQA x = A^TQ - // b; where A(n,m) (n >= m) is a rectangular matrix, Q(m,m) is a diagonal + // This test computes P^TQP and P^TQ b for the normal equation P^TQP x = P^TQ + // b where P(n,m) (n >= m) is a rectangular matrix, Q(m,m) is a diagonal // matrix with diagonal elements stored as vector and b(n,1) is a vector SECTION("check convert normal equation ") { @@ -24,21 +24,22 @@ TEST_CASE("solver test") Kokkos::View b("rhs vector", nvertices_target, nsupports); - Kokkos::View A("vandermonde matrix", nvertices_target, nsupports, + Kokkos::View P("vandermonde matrix", nvertices_target, nsupports, size); - Kokkos::View TransAQA("moment matrix", nvertices_target, size, + Kokkos::View TransPQP("moment matrix", nvertices_target, size, size); - Kokkos::View TransAQ("moment matrix", nvertices_target, size, + Kokkos::View TransPQ("scaled matrix", nvertices_target, size, nsupports); - Kokkos::View TransAQb("transformed rhs", nvertices_target, size); + Kokkos::View TransPQb("transformed rhs", nvertices_target, size); - Kokkos::View solution("transformed rhs", nvertices_target, size); + Kokkos::View solution("solution unknown vector", nvertices_target, + size); auto host_Q = Kokkos::create_mirror_view(Q); - auto host_A = Kokkos::create_mirror_view(A); + auto host_P = Kokkos::create_mirror_view(P); auto host_b = Kokkos::create_mirror_view(b); host_Q(0, 0) = 1.0; @@ -51,21 +52,21 @@ TEST_CASE("solver test") Kokkos::deep_copy(Q, host_Q); - host_A(0, 0, 0) = 1.0; - host_A(0, 0, 1) = 2.0; - host_A(0, 1, 0) = 3.0; - host_A(0, 1, 1) = 4.0; - host_A(0, 2, 0) = 5.0; - host_A(0, 2, 1) = 6.0; + host_P(0, 0, 0) = 1.0; + host_P(0, 0, 1) = 2.0; + host_P(0, 1, 0) = 3.0; + host_P(0, 1, 1) = 4.0; + host_P(0, 2, 0) = 5.0; + host_P(0, 2, 1) = 6.0; - host_A(1, 0, 0) = 2.0; - host_A(1, 0, 1) = 3.0; - host_A(1, 1, 0) = 1.0; - host_A(1, 1, 1) = -1.0; - host_A(1, 2, 0) = 4.0; - host_A(1, 2, 1) = 1.0; + host_P(1, 0, 0) = 2.0; + host_P(1, 0, 1) = 3.0; + host_P(1, 1, 0) = 1.0; + host_P(1, 1, 1) = -1.0; + host_P(1, 2, 0) = 4.0; + host_P(1, 2, 1) = 1.0; - Kokkos::deep_copy(A, host_A); + Kokkos::deep_copy(P, host_P); host_b(0, 0) = 7.0; host_b(0, 1) = 8.0; @@ -92,7 +93,7 @@ TEST_CASE("solver test") phi(j) = Q(i, j); support_values(j) = b(i, j); for (int k = 0; k < size; ++k) { - vandermonde_matrix(j, k) = A(i, j, k); + vandermonde_matrix(j, k) = P(i, j, k); } }); @@ -104,9 +105,9 @@ TEST_CASE("solver test") team.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { - TransAQb(i, j) = result.transformed_rhs(j); + TransPQb(i, j) = result.transformed_rhs(j); for (int k = 0; k < size; ++k) { - TransAQA(i, j, k) = result.square_matrix(j, k); + TransPQP(i, j, k) = result.square_matrix(j, k); } }); @@ -115,7 +116,7 @@ TEST_CASE("solver test") Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), [=](int j) { for (int k = 0; k < size; ++k) { - TransAQ(i, k, j) = result.scaled_matrix(k, j); + TransPQ(i, k, j) = result.scaled_matrix(k, j); } }); @@ -132,14 +133,14 @@ TEST_CASE("solver test") team.team_barrier(); }); - auto host_result_lhs = Kokkos::create_mirror_view(TransAQA); - auto host_result_rhs = Kokkos::create_mirror_view(TransAQb); - auto host_result_scaled = Kokkos::create_mirror_view(TransAQ); + auto host_result_lhs = Kokkos::create_mirror_view(TransPQP); + auto host_result_rhs = Kokkos::create_mirror_view(TransPQb); + auto host_result_scaled = Kokkos::create_mirror_view(TransPQ); auto host_result_solution = Kokkos::create_mirror_view(solution); - Kokkos::deep_copy(host_result_lhs, TransAQA); - Kokkos::deep_copy(host_result_rhs, TransAQb); - Kokkos::deep_copy(host_result_scaled, TransAQ); + Kokkos::deep_copy(host_result_lhs, TransPQP); + Kokkos::deep_copy(host_result_rhs, TransPQb); + Kokkos::deep_copy(host_result_scaled, TransPQ); Kokkos::deep_copy(host_result_solution, solution); Kokkos::View expected_lhs_matrix( From d5ae0993f5ca1a858e519ecf433430d63e011f42 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Tue, 21 Jan 2025 14:31:36 -0500 Subject: [PATCH 51/66] updates --- ] | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 ] diff --git a/] b/] new file mode 100644 index 00000000..e69497b2 --- /dev/null +++ b/] @@ -0,0 +1,14 @@ +added comments and kokkoskernel dot product function +# Please enter the commit message for your changes. Lines starting +# with '#' will be ignored, and an empty message aborts the commit. +# +# On branch general_mls +# Changes to be committed: +# modified: MLSCoefficients.hpp +# +# Changes not staged for commit: +# modified: MLSInterpolation.hpp +# modified: adj_search.hpp +# modified: queue_visited.hpp +# modified: ../../../test/test_linear_solver.cpp +# From 9a73bd3d30094ad0ef9c0990a897e4bd50ab29d8 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 30 Jan 2025 22:12:45 -0500 Subject: [PATCH 52/66] deleted files --- src/pcms/interpolator/MLSCoefficients.hpp | 327 ---------- src/pcms/interpolator/MLSInterpolation.hpp | 255 -------- .../interpolator/mls_interpolation_impl.cpp | 84 +++ .../interpolator/mls_interpolation_impl.hpp | 575 ++++++++++++++++++ ...ints.hpp => pcms_interpolator_aliases.hpp} | 15 +- 5 files changed, 664 insertions(+), 592 deletions(-) delete mode 100644 src/pcms/interpolator/MLSCoefficients.hpp delete mode 100644 src/pcms/interpolator/MLSInterpolation.hpp create mode 100644 src/pcms/interpolator/mls_interpolation_impl.cpp create mode 100644 src/pcms/interpolator/mls_interpolation_impl.hpp rename src/pcms/interpolator/{points.hpp => pcms_interpolator_aliases.hpp} (73%) diff --git a/src/pcms/interpolator/MLSCoefficients.hpp b/src/pcms/interpolator/MLSCoefficients.hpp deleted file mode 100644 index be41fc1b..00000000 --- a/src/pcms/interpolator/MLSCoefficients.hpp +++ /dev/null @@ -1,327 +0,0 @@ -#ifndef PCMS_INTERPOLATOR_MLS_COEFFICIENTS_HPP -#define PCMS_INTERPOLATOR_MLS_COEFFICIENTS_HPP - -#include -#include - -#include -#include -#include -#include -#include -#include -#include "points.hpp" -#include - -static constexpr int MAX_DIM = 3; - -/** - * basisSliceLengths, basissSize and BasisPoly are needed to evaluate the - * polynomial basis for any degree and dimension For instance, polynomial basis - * vector for dim = 2 and degree = 3 at the point A(x,y) looks like {1, x, y, - * xx, xy, yy, xxx, xxy, xyy,yyy}. The slices can be written as [1] degree 0 [x] - * & [y] degree 1 [xx] & [xy, yy] degree 2 [xxx] & [xxy, - * xyy, yyy] degree 3 - * - * the recursive pattern becomes: - * [1] degree 0 - * x*[1] & y*[1] degree 1 - * x*[x] & y*[x, y] degree 2 - * xx*[x] & y*[xx,xy,yy] degree 3 - * - * lengths of the slices: - * Degree \ Dim | x | y | - * ========================= - * 1 | 1 | 1 | - * 2 | 1 | 2 | - * 3 | 1 | 3 | - */ - -/** - * @brief computes the slice lengths of the polynomial basis - * @param array: takes kokkos view array as an input and computes the lengths of - * slices and fills the array - * - * @notes it takes a host array as an input, could have done for device - * but there was a race conditions for a device - */ -KOKKOS_INLINE_FUNCTION -void basisSliceLengths(Kokkos::View& array) -{ - int degree = array.extent(0); - int dim = array.extent(1); - - for (int j = 0; j < dim; ++j) { - array(0, j) = 1; - } - - for (int i = 0; i < degree; ++i) { - array(i, 0) = 1; - } - - for (int i = 1; i < degree; ++i) { - for (int j = 1; j < dim; ++j) { - array(i, j) = array(i, j - 1) + array(i - 1, j); - } - } -} - -/** - * @brief computes the size of the polynomial basis vector - * - * @param array: it is the array of the slices length - * @return sum: sum of each element of slice lengths array gives the basis - * vector size - * - * @note it takes the host array - */ - -KOKKOS_INLINE_FUNCTION -int basisSize(const Kokkos::View& array) -{ - int sum = 1; - int degree = array.extent(0); - int dim = array.extent(1); - - for (int i = 0; i < degree; ++i) { - for (int j = 0; j < dim; ++j) { - sum += array(i, j); - } - } - - return sum; -} - -/** - * @brief evaluates the polynomial basis - * for example, if it dim = 2 and degree = 2 - * the basis_vector looks like - * basis_vector = {1,x,y,xx,xy,yy} - * - * @param basis_vector: basis_vector 1D array is initialised and passed as - * input - * @param slice_length: slice length is the 2D array that consists of the - * length of slices - * @param p: instance of an class type Coord - * - * @return basis_vector filled with polynomial basis - */ -KOKKOS_INLINE_FUNCTION -void BasisPoly(ScratchVecView& basis_vector, const MatViewType& slice_length, - Coord& p) -{ - basis_vector(0) = 1; - int dim = slice_length.extent(1); - int degree = slice_length.extent(0); - - int prev_col = 0; - int curr_col = 1; - - double point[MAX_DIM]; - point[0] = p.x; - point[1] = p.y; - - if (dim == 3) { - point[2] = p.z; - } - - for (int i = 0; i < degree; ++i) { - int offset = curr_col; - for (int j = 0; j < dim; ++j) { - for (int k = 0; k < slice_length(i, j); ++k) { - basis_vector(offset + k) = basis_vector(prev_col + k) * point[j]; - } - - offset += slice_length(i, j); - } - - prev_col = curr_col; - curr_col = offset; - } -} - -// create vandermondeMatrix -KOKKOS_INLINE_FUNCTION -void CreateVandermondeMatrix(ScratchMatView vandermonde_matrix, - const ScratchMatView& local_source_points, int j, - const MatViewType& slice_length) -{ - int N = local_source_points.extent(0); - int dim = local_source_points.extent(1); - - Coord source_point; - source_point.x = local_source_points(j, 0); - source_point.y = local_source_points(j, 1); - if (dim == 3) { - source_point.z = local_source_points(j, 2); - } - ScratchVecView basis_vector_supports = - Kokkos::subview(vandermonde_matrix, j, Kokkos::ALL()); - BasisPoly(basis_vector_supports, slice_length, source_point); -} - -// radial basis function vector (phi(s_ti)) -template , - bool> = true> -KOKKOS_INLINE_FUNCTION void PhiVector(ScratchVecView Phi, - const Coord target_point, - const ScratchMatView local_source_points, - int j, double cuttoff_dis_sq, - Func rbf_func) -{ - int N = local_source_points.extent(0); - double dx = target_point.x - local_source_points(j, 0); - double dy = target_point.y - local_source_points(j, 1); - double ds_sq = dx * dx + dy * dy; - Phi(j) = rbf_func(ds_sq, cuttoff_dis_sq); - OMEGA_H_CHECK_PRINTF(!std::isnan(Phi(j)), - "ERROR: Phi(j) in PhiVector is NaN for j = %d " - "ds_sq=%.16f, cuttoff_dis_sq=%.16f", - j, ds_sq, cuttoff_dis_sq); -} - -// takes a matrix A(m,n) and vector b(m) and calulates -// A^Tb --> A^T(n,m) b(m) -KOKKOS_INLINE_FUNCTION -void ScaleColumnTransMatrix(ScratchMatView result_matrix, - const ScratchMatView& matrix, - const ScratchVecView& vector, member_type team, - int j) -{ - - int M = matrix.extent(0); - int N = matrix.extent(1); - - ScratchVecView matrix_row = Kokkos::subview(matrix, j, Kokkos::ALL()); - for (int k = 0; k < N; k++) { - OMEGA_H_CHECK_PRINTF(!std::isnan(matrix_row(k)), - "ERROR: given matrix is NaN for k = %d\n", k); - OMEGA_H_CHECK_PRINTF(!std::isnan(vector(j)), - "ERROR: given vector is NaN for j = %d\n", j); - result_matrix(k, j) = matrix_row(k) * vector(j); - OMEGA_H_CHECK_PRINTF(!std::isnan(result_matrix(k, j)), - "ERROR: result_matrix is NaN for k = %d, j = %d\n", k, - j); - } -} - -// dot product -KOKKOS_INLINE_FUNCTION -void dot_product(member_type team, const ScratchVecView& result_sub, - const ScratchVecView& SupportValues_sub, double& target_value) -{ - int N = result_sub.extent(0); - for (int j = 0; j < N; ++j) { - target_value += result_sub(j) * SupportValues_sub[j]; - } - OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), - "ERROR: NaN found in dot_product: N: %d\n", N); -} - -// TODO: Implement QR decomposition to solve the linear system -// convert normal equation P^T Q P x = P^T Q b to Ax = b'; -// A = P^T Q P & b' = P^T Q b - -/** - * @struct ResultConvertNormal - * @brief Represents the results from P^T Q(scaled matrix), P^T Q P (square - * matrix) and P^T Q b (transformed rhs) - * - * scaled_matrix is the matrix obtained after scaling the column of the given - * matrix square_matrix is the matrix obtained after P^T Q P operations - * transformed_rhs is obtained after P^T b operation - * - * This class is used to store the scaled matrix, square matrix and transformed - * rhs - */ - -struct ResultConvertNormal -{ - ScratchMatView scaled_matrix; - ScratchMatView square_matrix; - ScratchVecView transformed_rhs; -}; - -KOKKOS_INLINE_FUNCTION -ResultConvertNormal ConvertNormalEq(const ScratchMatView matrix, - const ScratchVecView weight_vector, - const ScratchVecView rhs, member_type team) -{ - - int m = matrix.extent(0); - int n = matrix.extent(1); - - ResultConvertNormal result; - - result.scaled_matrix = ScratchMatView(team.team_scratch(0), n, m); - - result.square_matrix = ScratchMatView(team.team_scratch(0), n, n); - - result.transformed_rhs = ScratchVecView(team.team_scratch(0), n); - - // performing P^T Q - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) { - for (int k = 0; k < m; ++k) { - result.scaled_matrix(j, k) = 0; - } - }); - - team.team_barrier(); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, m), [=](int j) { - ScaleColumnTransMatrix(result.scaled_matrix, matrix, weight_vector, team, - j); - }); - - team.team_barrier(); - - // performing (P^T Q P) - KokkosBatched::TeamGemm< - member_type, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, - result.scaled_matrix, matrix, - 0.0, result.square_matrix); - - team.team_barrier(); - - KokkosBlas::Experimental:: - Gemv::invoke( - team, 'N', 1.0, result.scaled_matrix, rhs, 0.0, result.transformed_rhs); - - team.team_barrier(); - - return result; -} - -/** - * @brief Solves the matrix equation Ax = b - * - * This function uses LU decomposition to solve the matrix equation - * - * @param square_matrix The given matrix A (must be square) - * @param rhs The known vector b - * @return rhs The unknown vector x after solving matrix equation is overwritten - * in rhs - * - */ -KOKKOS_INLINE_FUNCTION -void SolveMatrix(const ScratchMatView square_matrix, const ScratchVecView rhs, - member_type team) -{ - - // Perform LU decomposition - KokkosBatched::TeamLU< - member_type, KokkosBatched::Algo::LU::Unblocked>::invoke(team, - square_matrix); - - team.team_barrier(); - - // Solve the equation with forward and backward solves - KokkosBatched::TeamSolveLU< - member_type, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs); -} - -#endif diff --git a/src/pcms/interpolator/MLSInterpolation.hpp b/src/pcms/interpolator/MLSInterpolation.hpp deleted file mode 100644 index 1c4ca64c..00000000 --- a/src/pcms/interpolator/MLSInterpolation.hpp +++ /dev/null @@ -1,255 +0,0 @@ -#ifndef MLS_INTERPOLATION_HPP -#define MLS_INTERPOLATION_HPP - -#include "MLSCoefficients.hpp" -#include "adj_search.hpp" -#include -#include -#include -#include -#include -using namespace Omega_h; - -/** - * @brief maps the data from source mesh to target mesh - * @param source_values Source field values from source mesh - * @param source_coordinates The coordinates of control points of source field - * @param target_coordinates The coordinates of control points of target field - * @param support The object that enpasulates support info - * @param dim The dimension of the simulations - * @param degree The degree of the interpolation order - * @param radii2 The array of the square of cutoff distance - * @param rbf_func The radial basis function choice - * @return interpolated field in target mesh - * - * - */ -template -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Write radii2, - Func rbf_func) -{ - static_assert(std::is_invocable_r_v, - "rbf_func, takes radius and cutoff, returns weight"); - static_assert(!std::is_pointer_v, - "function pointer will fail in GPU execution context"); - const auto nvertices_source = source_coordinates.size() / dim; - const auto nvertices_target = target_coordinates.size() / dim; - - Kokkos::View shmem_each_team( - "stores the size required for each team", nvertices_target); - - Kokkos::View host_slice_length( - "stores slice length of polynomial basis in host", degree, dim); - Kokkos::deep_copy(host_slice_length, 0); - basisSliceLengths(host_slice_length); - - auto basis_size = basisSize(host_slice_length); - printf("basis_size is %d\n", basis_size); - MatViewType slice_length("stores slice length of polynomial basis in device", - degree, dim); - auto slice_length_hd = Kokkos::create_mirror_view(slice_length); - Kokkos::deep_copy(slice_length_hd, host_slice_length); - Kokkos::deep_copy(slice_length, slice_length_hd); - Kokkos::parallel_for( - "calculate the size required for scratch for each team", nvertices_target, - KOKKOS_LAMBDA(const int i) { - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - int nsupports = end_ptr - start_ptr; - - size_t total_shared_size = 0; - - total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size); - total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports); - total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); - total_shared_size += ScratchVecView::shmem_size(basis_size); - total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; - total_shared_size += ScratchMatView::shmem_size(nsupports, 2); - total_shared_size += ScratchMatView::shmem_size(nsupports, 1); - shmem_each_team(i) = total_shared_size; - }); - - size_t shared_size; - Kokkos::parallel_reduce( - "find_max", nvertices_target, - KOKKOS_LAMBDA(const int i, size_t& max_val_temp) { - if (shmem_each_team(i) > max_val_temp) { - max_val_temp = shmem_each_team(i); - } - }, - Kokkos::Max(shared_size)); - - Write approx_target_values(nvertices_target, 0, - "approximated target values"); - - team_policy tp(nvertices_target, Kokkos::AUTO); - - int scratch_size = tp.scratch_size_max(0); - - assert(scratch_size > shared_size && - "The required scratch size exceeds the max available scratch size"); - - Kokkos::parallel_for( - "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), - KOKKOS_LAMBDA(const member_type& team) { - int i = team.league_rank(); - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - int nsupports = end_ptr - start_ptr; - - ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); - int count = -1; - for (int j = start_ptr; j < end_ptr; ++j) { - count++; - auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - if (dim == 3) { - local_source_points(count, 2) = source_coordinates[index * dim + 2]; - } - } - - // vondermonde matrix P from the vectors of basis vector of supports - ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - basis_size); - - // rbf function values of source supports Phi(n,n) - ScratchVecView phi_vector(team.team_scratch(0), nsupports); - - // stores known vector (b) - ScratchVecView support_values(team.team_scratch(0), nsupports); - - // basis of target - ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); - - // Kokkos::deep_copy(lower, 0.0); - // Kokkos::deep_copy(forward_matrix, 0.0); - // Kokkos::deep_copy(moment_matrix, 0.0); - // Kokkos::deep_copy(inv_mat, 0.0); - // Kokkos::deep_copy(V, 0.0); - // Kokkos::deep_copy(Ptphi, 0.0); - // Kokkos::deep_copy(resultant_matrix, 0.0); - // Kokkos::deep_copy(targetMonomialVec, 0.0); - // Kokkos::deep_copy(support_values, 0.0); - // Kokkos::deep_copy(result, 0.0); - // Kokkos::deep_copy(Phi, 0.0); - - // Initialize the scratch matrices and vectors - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, basis_size), - [=](int j) { - // for (int k = 0; k < - // basis_size; ++k) { - // moment_matrix(j, - // k) = 0; - // } - - target_basis_vector(j) = 0; - // for (int k = 0; k < - // nsupports; ++k) { - // - // scaled_vandermonde_matrix(j, - // k) = 0; - // } - }); - - Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), - [=](int j) { - for (int k = 0; k < basis_size; ++k) { - vandermonde_matrix(j, k) = 0; - } - - support_values(j) = 0; - phi_vector(j) = 0; - }); - - // evaluates the basis vector of a given target point - - Coord target_point; - target_point.x = target_coordinates[i * dim]; - target_point.y = target_coordinates[i * dim + 1]; - - if (dim == 3) { - target_point.z = target_coordinates[i * dim + 2]; - } - BasisPoly(target_basis_vector, slice_length, target_point); - - /** VandermondeMatrix vandermonde_matrix(nsupports, basis_size) - * vandermonde Matrix is created with the basis vector - * of source supports stacking on top of each other - */ - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - CreateVandermondeMatrix(vandermonde_matrix, local_source_points, j, - slice_length); - }); - - team.team_barrier(); - - /** PhiVector(nsupports) is the array of rbf functions evaluated at the - * source supports In the actual implementation, Phi(nsupports, nsupports) - * is the diagonal matrix & each diagonal element is the phi evaluated at - * each source points - */ - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - OMEGA_H_CHECK_PRINTF( - radii2[i] > 0, - "ERROR: radius2 has to be positive but found to be %.16f\n", - radii2[i]); - PhiVector(phi_vector, target_point, local_source_points, j, radii2[i], - rbf_func); - }); - - team.team_barrier(); - - /** support_values(nsupports) (or known rhs vector b) is the vector of the - * quantity that we want interpolate - */ - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { - support_values(i) = - source_values[support.supports_idx[start_ptr + i]]; - OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(i)), - "ERROR: NaN found: at support %d\n", i); - }); - - team.team_barrier(); - - auto result = - ConvertNormalEq(vandermonde_matrix, phi_vector, support_values, team); - - team.team_barrier(); - - // It stores the solution in rhs vector itself - - SolveMatrix(result.square_matrix, result.transformed_rhs, team); - - team.team_barrier(); - - // implemented the kokkoskernels dot product function - // need to verify - double target_value = KokkosBlas::Experimental::dot( - team, result.transformed_rhs, target_basis_vector); - - // double target_value = 0; - // dot_product(team, result.transformed_rhs, target_basis_vector, - // target_value); - - if (team.team_rank() == 0) { - OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", i); - approx_target_values[i] = target_value; - } - }); - - return approx_target_values; -} - -#endif diff --git a/src/pcms/interpolator/mls_interpolation_impl.cpp b/src/pcms/interpolator/mls_interpolation_impl.cpp new file mode 100644 index 00000000..37083b01 --- /dev/null +++ b/src/pcms/interpolator/mls_interpolation_impl.cpp @@ -0,0 +1,84 @@ +#include "mls_interpolation_impl.hpp" + +namespace pcms +{ + +namespace detail +{ +void calculate_basis_slice_lengths(IntHostMatView& array) +{ + int degree = array.extent(0); + int dim = array.extent(1); + + for (int j = 0; j < dim; ++j) { + array(0, j) = 1; + } + + for (int i = 0; i < degree; ++i) { + array(i, 0) = 1; + } + + for (int i = 1; i < degree; ++i) { + for (int j = 1; j < dim; ++j) { + array(i, j) = array(i, j - 1) + array(i - 1, j); + } + } +} + +int calculate_basis_vector_size(const IntHostMatView& array) +{ + int sum = 1; + int degree = array.extent(0); + int dim = array.extent(1); + + for (int i = 0; i < degree; ++i) { + for (int j = 0; j < dim; ++j) { + sum += array(i, j); + } + } + + return sum; +} + +int calculate_scratch_shared_size() +{ + + IntVecView shmem_each_team("stores the size required for each team", + nvertices_target); + Kokkos::parallel_for( + "calculate the size required for scratch for each team", nvertices_target, + KOKKOS_LAMBDA(const int i) { + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + int nsupports = end_ptr - start_ptr; + + size_t total_shared_size = 0; + + total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size); + total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports); + total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); + total_shared_size += ScratchVecView::shmem_size(basis_size); + total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; + total_shared_size += ScratchMatView::shmem_size(nsupports, 2); + total_shared_size += ScratchMatView::shmem_size(nsupports, 1); + shmem_each_team(i) = total_shared_size; + }); + + // namespace KE = Kokkos::Experimental; + // auto shared_size = KE::max_element(Kokkos::DefaultExecutionSpace(), + // shmem_each_team); printf("shared size = %d", shared_size); + int shared_size = 0; + Kokkos::parallel_reduce( + "find_max", nvertices_target, + KOKKOS_LAMBDA(const int i, int& max_val_temp) { + if (shmem_each_team(i) > max_val_temp) { + max_val_temp = shmem_each_team(i); + } + }, + Kokkos::Max(shared_size)); + + return shared_size; +} + +} // namespace detail +} // namespace pcms diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp new file mode 100644 index 00000000..f13a9b33 --- /dev/null +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -0,0 +1,575 @@ +#ifndef PCMS_INTERPOLATOR_MLS_INTERPOLATION_IMP_HPP +#define PCMS_INTERPOLATOR_MLS_INTERPOLATION_IMP_HPP + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pcms_interpolator_aliases.hpp" +#include "adj_search.hpp" +#include "../assert.h" +#include "../profile.h" + +static constexpr int MAX_DIM = 3; + +/** + * calculate_basis_slice_lengths, calculate_basis_vector_size and + * eval_basis_vector are needed to evaluate the polynomial basis for any degree + * and dimension For instance, polynomial basis vector for dim = 2 and degree = + * 3 at the point (x,y) looks like {1, x, y, xx, xy, yy, xxx, xxy, xyy,yyy}. The + * slices can be written as [1] degree 0 [x] & [y] degree 1 + * [xx] & [xy, yy] degree 2 + * [xxx] & [xxy,xyy, yyy] degree 3 + * + * the recursive pattern becomes: + * [1] degree 0 + * x*[1] & y*[1] degree 1 + * x*[x] & y*[x, y] degree 2 + * xx*[x] & y*[xx,xy,yy] degree 3 + * + * lengths of the slices: + * Degree \ Dim | x | y | + * ========================= + * 1 | 1 | 1 | + * 2 | 1 | 2 | + * 3 | 1 | 3 | + */ + +namespace pcms +{ + +namespace detail +{ +/** + * @brief Computes the slice lengths of the polynomial basis + * + * This function takes kokkos view array as an input and computes the lengths of + * slices and fills the array + * + * @param[in,out] array The 2D array + * + * @note It takes a host array as an input, could have done for device + * but there was a race conditions for a device + */ +void calculate_basis_slice_lengths(IntHostMatView& array); + +/** + * @brief Computes the size of the polynomial basis vector + * + * @param array The 2D array of the slices length + * @return The basis vector size + * + * @note It takes the host array + */ +int calculate_basis_vector_size(const IntHostMatView& array); + +/** + * @brief Calculates the scratch size required + * + * This function uses the size of the vectors and matrices and + * calculates the scratch size required in each team and extracts the + * maximum shared size + * + * @param None + * + * @return The scratch size + */ +int calculate_scratch_shared_size(); + +/** + * @brief Evaluates the polynomial basis + * + * for example, if it dim = 2 and degree = 2, the basis_vector looks like + * basis_vector = {1,x,y,xx,xy,yy} + * + * @param[in] slice_length A 2D array of slices length + * @param[in] p A reference to the coordinate struct + * @param[in,out] basis_vector The polynomial basis vector + * + */ +KOKKOS_INLINE_FUNCTION +void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, + ScratchVecView& basis_vector) +{ + basis_vector(0) = 1; + int dim = slice_length.extent(1); + int degree = slice_length.extent(0); + + int prev_col = 0; + int curr_col = 1; + + double point[MAX_DIM]; + point[0] = p.x; + point[1] = p.y; + + if (dim == 3) { + point[2] = p.z; + } + + for (int i = 0; i < degree; ++i) { + int offset = curr_col; + for (int j = 0; j < dim; ++j) { + for (int k = 0; k < slice_length(i, j); ++k) { + basis_vector(offset + k) = basis_vector(prev_col + k) * point[j]; + } + + offset += slice_length(i, j); + } + + prev_col = curr_col; + curr_col = offset; + } +} + +/** + * @brief Creates a vandermonde matrix + * + * @param[in] local_source_points The coordinates of the local supports + * @param[in] j The jth row + * @param[in] slice_length The slice lengths of the polynomial basis + * @param[in,out] vandermonde_matrix The Vandermonde Matrix + * + */ +KOKKOS_INLINE_FUNCTION +void create_vandermonde_matrix(const ScratchMatView& local_source_points, int j, + const IntDeviceMatView& slice_length, + ScratchMatView vandermonde_matrix) +{ + int N = local_source_points.extent(0); + int dim = local_source_points.extent(1); + + Coord source_point; + source_point.x = local_source_points(j, 0); + source_point.y = local_source_points(j, 1); + if (dim == 3) { + source_point.z = local_source_points(j, 2); + } + ScratchVecView basis_vector_supports = + Kokkos::subview(vandermonde_matrix, j, Kokkos::ALL()); + eval_basis_vector(slice_length, source_point, basis_vector_supports); +} + +/** + *@brief Computes basis function vector + * + *This function takes a basis function the user wants and calculates the value + *for each local source supports + * + * @tparam Func The type of the radial basis function + * @param[in] target_point The coordinate of the target point where the + *interpolation is carried out + * @param[in] source_points The coordinates of the source support points + * @param[in] j The jth row + * @param[in] cutoff_dis_sq The cutoff radius squared + * @param[in] rbf_func The radial basis function of choice + * @param[in, out] phi A radial basis function value + * + */ +template , + bool> = true> +KOKKOS_INLINE_FUNCTION void PhiVector(const Coord& target_point, + const ScratchMatView& local_source_points, + int j, double cuttoff_dis_sq, + Func rbf_func, ScratchVecView phi) +{ + int N = local_source_points.extent(0); + double dx = target_point.x - local_source_points(j, 0); + double dy = target_point.y - local_source_points(j, 1); + double ds_sq = dx * dx + dy * dy; + phi(j) = rbf_func(ds_sq, cuttoff_dis_sq); + OMEGA_H_CHECK_PRINTF(!std::isnan(phi(j)), + "ERROR: Phi(j) in PhiVector is NaN for j = %d " + "ds_sq=%.16f, cuttoff_dis_sq=%.16f", + j, ds_sq, cuttoff_dis_sq); +} + +/** + * @brief Scales the column of the transpose of the given matrix + * + * This function takes a matrix P(m,n) and vector phi(m) and scales the + * each column of the transpose of P by each corresponding element in vector + * phi + * + * @param[in] matrix The matrix + * @param[in] vector The vector + * @param[in] team The team member + * @param[in, out] result_matrix + * + */ +KOKKOS_INLINE_FUNCTION +void scale_column_trans_matrix(const ScratchMatView& matrix, + const ScratchVecView& vector, member_type team, + int j, ScratchMatView result_matrix) +{ + + int M = matrix.extent(0); + int N = matrix.extent(1); + + ScratchVecView matrix_row = Kokkos::subview(matrix, j, Kokkos::ALL()); + for (int k = 0; k < N; k++) { + OMEGA_H_CHECK_PRINTF(!std::isnan(matrix_row(k)), + "ERROR: given matrix is NaN for k = %d\n", k); + + OMEGA_H_CHECK_PRINTF(!std::isnan(vector(j)), + "ERROR: given vector is NaN for j = %d\n", j); + + result_matrix(k, j) = matrix_row(k) * vector(j); + + OMEGA_H_CHECK_PRINTF(!std::isnan(result_matrix(k, j)), + "ERROR: result_matrix is NaN for k = %d, j = %d\n", k, + j); + } +} + +/** + * @struct ResultConvertNormal + * @brief Stores the results of matrix and vector transformations. + * + * This struct represents the results of the following operations: + * - `P^T Q` (scaled matrix): The matrix obtained after scaling the columns of + * the given matrix. + * - `P^T Q P` (square matrix): The matrix obtained after performing the `P^T Q + * P` operation. + * - `P^T b` (transformed RHS): The vector obtained after applying the `P^T b` + * operation. + * + * The struct is used to store: + * - The scaled matrix (`scaled_matrix`). + * - The square matrix (`square_matrix`). + * - The transformed right-hand side vector (`transformed_rhs`). + */ +struct ResultConvertNormal +{ + ScratchMatView scaled_matrix; + ScratchMatView square_matrix; + ScratchVecView transformed_rhs; +}; + +/** + * @brief Converts a normal equation into a simplified form. + * + * This function takes a matrix, a basis/weight vector, and a right-hand side + * (RHS) vector and computes the scaled matrix, square matrix, and transformed + * RHS. + * + * For example, the normal equation `P^T Q P x = P^T Q b` is converted into the + * form `Ax = b'`, where: + * - `A` (square matrix) = `P^T Q P` + * - `b'` (transformed RHS) = `P^T Q b` + * + * @param matrix The input matrix. + * @param weight_vector The weight or basis vector. + * @param rhs The right-hand side vector of the equation. + * @param team The team member executing the task (optional, if applicable). + * + * @return The result of the normal equation conversion as a struct containing: + * - The scaled matrix. + * - The square matrix. + * - The transformed RHS. + * + * @todo Implement QR decomposition to solve the linear system. + */ +KOKKOS_INLINE_FUNCTION +ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, + const ScratchVecView& weight_vector, + const ScratchVecView& rhs, + member_type team) +{ + + int m = matrix.extent(0); + int n = matrix.extent(1); + + ResultConvertNormal result; + + result.scaled_matrix = ScratchMatView(team.team_scratch(0), n, m); + + result.square_matrix = ScratchMatView(team.team_scratch(0), n, n); + + result.transformed_rhs = ScratchVecView(team.team_scratch(0), n); + + // performing P^T Q + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) { + for (int k = 0; k < m; ++k) { + result.scaled_matrix(j, k) = 0; + } + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, m), [=](int j) { + scale_column_trans_matrix(matrix, weight_vector, team, j, + result.scaled_matrix); + }); + + team.team_barrier(); + + // performing (P^T Q P) + KokkosBatched::TeamGemm< + member_type, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, + result.scaled_matrix, matrix, + 0.0, result.square_matrix); + + team.team_barrier(); + + KokkosBlas::Experimental:: + Gemv::invoke( + team, 'N', 1.0, result.scaled_matrix, rhs, 0.0, result.transformed_rhs); + + team.team_barrier(); + + return result; +} + +/** + * @brief Solves the matrix equation Ax = b' using LU decomposition. + * + * This function solves the given matrix equation using LU decomposition. + * The solution vector `x'` overwrites the input `rhs` vector. + * + * @param square_matrix The input matrix A (must be square). + * @param rhs The right-hand side vector b. After execution, it is overwritten + * with the solution vector x. + * @param team The team member executing the task. + * + * @return None. The solution is directly written to the `rhs` vector. + */ +KOKKOS_INLINE_FUNCTION +void solve_matrix(const ScratchMatView& square_matrix, + const ScratchVecView& rhs, member_type team) +{ + + // Perform LU decomposition + KokkosBatched::TeamLU< + member_type, KokkosBatched::Algo::LU::Unblocked>::invoke(team, + square_matrix); + + team.team_barrier(); + + // Solve the equation with forward and backward solves + KokkosBatched::TeamSolveLU< + member_type, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs); +} + +/** + * @brief Fills the kokkos scratch view + * + * @param value The value to be populated + * @param team The team member + * @param matrix The scratch matrix + * + */ +KOKKOS_INLINE_FUNCTION +void fill(double value, member_type team, ScratchMatView matrix) +{ + + int row = matrix.extent(0); + int col = matrix.extent(1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) { + for (int k = 0; k < col; ++k) { + matrix(j, k) = value; + } + }); +} + +/** + * @brief Fills the kokkos scratch view + * + * @param value The value to be populated + * @param team The team member + * @param vector The scratch vector + * + */ +KOKKOS_INLINE_FUNCTION +void fill(double value, member_type team, ScratchVecView vector) +{ + + int size = vector.extent(0); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { vector(j) = value; }); +} + +/** + * @brief maps the data from source mesh to target mesh + * @param source_values Source field values from source mesh + * @param source_coordinates The coordinates of control points of source field + * @param target_coordinates The coordinates of control points of target field + * @param support The object that enpasulates support info + * @param dim The dimension of the simulations + * @param degree The degree of the interpolation order + * @param radii2 The array of the square of cutoff distance + * @param rbf_func The radial basis function choice + * @return interpolated field in target mesh + * + * + */ +template +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, Write radii2, + Func rbf_func) +{ + PCMS_FUNCTION_TIMER; + static_assert(std::is_invocable_r_v, + "rbf_func, takes radius and cutoff, returns weight"); + static_assert(!std::is_pointer_v, + "function pointer will fail in GPU execution context"); + const auto nvertices_source = source_coordinates.size() / dim; + const auto nvertices_target = target_coordinates.size() / dim; + + IntHostMatView host_slice_length( + "stores slice length of polynomial basis in host", degree, dim); + Kokkos::deep_copy(host_slice_length, 0); + calculate_basis_slice_lengths(host_slice_length); + + auto basis_size = calculate_basis_vector_size(host_slice_length); + + IntDeviceMatView slice_length( + "stores slice length of polynomial basis in device", degree, dim); + auto slice_length_hd = Kokkos::create_mirror_view(slice_length); + Kokkos::deep_copy(slice_length_hd, host_slice_length); + Kokkos::deep_copy(slice_length, slice_length_hd); + Write approx_target_values(nvertices_target, 0, + "approximated target values"); + + int shared_size = calculate_scratch_shared_size(); + + team_policy tp(nvertices_target, Kokkos::AUTO); + + int scratch_size = tp.scratch_size_max(0); + + PCMS_ALWAYS_ASSERT(scratch_size > shared_size); + + // calculates the interpolated values + Kokkos::parallel_for( + "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + int nsupports = end_ptr - start_ptr; + + ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + if (dim == 3) { + local_source_points(count, 2) = source_coordinates[index * dim + 2]; + } + } + + // vondermonde matrix P from the vectors of basis vector of supports + ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + basis_size); + + // rbf function values of source supports Phi(n,n) + ScratchVecView phi_vector(team.team_scratch(0), nsupports); + + // stores known vector (b) + ScratchVecView support_values(team.team_scratch(0), nsupports); + + // basis of target + ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); + + // Initialize the scratch matrices and vectors + fill(0.0, team, vandermonde_matrix); + fill(0.0, team, phi_vector); + fill(0.0, team, support_values); + fill(0.0, team, target_basis_vector); + + // evaluates the basis vector of a given target point + Coord target_point; + target_point.x = target_coordinates[i * dim]; + target_point.y = target_coordinates[i * dim + 1]; + + if (dim == 3) { + target_point.z = target_coordinates[i * dim + 2]; + } + eval_basis_vector(slice_length, target_point, target_basis_vector); + + /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is + * created with the basis vector of source supports stacking on top of + * each other + */ + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + create_vandermonde_matrix(local_source_points, j, slice_length, + vandermonde_matrix); + }); + + team.team_barrier(); + + /** PhiVector(nsupports) is the array of rbf functions evaluated at the + * source supports In the actual implementation, Phi(nsupports, nsupports) + * is the diagonal matrix & each diagonal element is the phi evaluated at + * each source points + */ + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + OMEGA_H_CHECK_PRINTF( + radii2[i] > 0, + "ERROR: radius2 has to be positive but found to be %.16f\n", + radii2[i]); + PhiVector(target_point, local_source_points, j, radii2[i], rbf_func, + phi_vector); + }); + + team.team_barrier(); + + /** support_values(nsupports) (or known rhs vector b) is the vector of the + * quantity that we want interpolate + */ + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { + support_values(i) = + source_values[support.supports_idx[start_ptr + i]]; + OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(i)), + "ERROR: NaN found: at support %d\n", i); + }); + + team.team_barrier(); + + auto result = convert_normal_equation(vandermonde_matrix, phi_vector, + support_values, team); + + team.team_barrier(); + + // It stores the solution in rhs vector itself + solve_matrix(result.square_matrix, result.transformed_rhs, team); + + team.team_barrier(); + + double target_value = KokkosBlas::Experimental::dot( + team, result.transformed_rhs, target_basis_vector); + + if (team.team_rank() == 0) { + OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", i); + approx_target_values[i] = target_value; + } + }); + + return approx_target_values; +} + +} // namespace detail +} // namespace pcms +#endif diff --git a/src/pcms/interpolator/points.hpp b/src/pcms/interpolator/pcms_interpolator_aliases.hpp similarity index 73% rename from src/pcms/interpolator/points.hpp rename to src/pcms/interpolator/pcms_interpolator_aliases.hpp index 25d074c1..383393a7 100644 --- a/src/pcms/interpolator/points.hpp +++ b/src/pcms/interpolator/pcms_interpolator_aliases.hpp @@ -1,8 +1,7 @@ -#ifndef POINTS_HPP -#define POINTS_HPP +#ifndef PCMS_INTERPOLATOR_ALIASES_HPP +#define PCMS_INTERPOLATOR_ALIASES_HPP #include -#include struct Coord { @@ -24,12 +23,8 @@ using ScratchVecView = typename Kokkos::View>; -using PointsViewType = Kokkos::View; - -using MatViewType = Kokkos::View; -struct Points -{ - PointsViewType coordinates; -}; +using IntDeviceMatView = Kokkos::View; +using IntDeviceVecView = Kokkos::View; +using IntHostMatView = Kokkos::View; #endif From c226a24cf2f53faa8db3328a3609b8d897a05c71 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 31 Jan 2025 15:41:35 -0500 Subject: [PATCH 53/66] remove deleted files --- src/pcms/interpolator/CMakeLists.txt | 9 +- ..._rbf_options.cpp => mls_interpolation.cpp} | 106 +++++++++++++++--- ..._rbf_options.hpp => mls_interpolation.hpp} | 6 +- .../interpolator/mls_interpolation_impl.cpp | 84 -------------- 4 files changed, 101 insertions(+), 104 deletions(-) rename src/pcms/interpolator/{MLS_rbf_options.cpp => mls_interpolation.cpp} (52%) rename src/pcms/interpolator/{MLS_rbf_options.hpp => mls_interpolation.hpp} (88%) delete mode 100644 src/pcms/interpolator/mls_interpolation_impl.cpp diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt index 8c762ad0..08728df7 100644 --- a/src/pcms/interpolator/CMakeLists.txt +++ b/src/pcms/interpolator/CMakeLists.txt @@ -1,18 +1,17 @@ find_package(KokkosKernels REQUIRED) set(PCMS_FIELD_TRANSFER_HEADERS - MLSInterpolation.hpp - points.hpp + pcms_interpolator_aliases.hpp adj_search.hpp - MLSCoefficients.hpp + mls_interpolation_impl.hpp queue_visited.hpp linear_interpolant.hpp multidimarray.hpp - MLS_rbf_options.hpp + mls_interpolation.hpp ) set(PCMS_FIELD_TRANSFER_SOURCES - MLS_rbf_options.cpp + mls_interpolation.cpp ) install(FILES ${PCMS_FIELD_TRANSFER_HEADERS} DESTINATION include/pcms/interpolator) diff --git a/src/pcms/interpolator/MLS_rbf_options.cpp b/src/pcms/interpolator/mls_interpolation.cpp similarity index 52% rename from src/pcms/interpolator/MLS_rbf_options.cpp rename to src/pcms/interpolator/mls_interpolation.cpp index deab7811..3f34cee8 100644 --- a/src/pcms/interpolator/MLS_rbf_options.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -1,9 +1,8 @@ #include -#include "MLS_rbf_options.hpp" -#include "MLSInterpolation.hpp" -#include +#include "mls_interpolation.hpp" -// TODO add PCMS namespace to all interpolation code +namespace pcms +{ // RBF_GAUSSIAN Functor struct RBF_GAUSSIAN @@ -116,23 +115,104 @@ Write mls_interpolation(const Reals source_values, "approximated target values"); switch (bf) { case RadialBasisFunction::RBF_GAUSSIAN: - interpolated_values = - mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, radii2, RBF_GAUSSIAN{}); + interpolated_values = detail::mls_interpolation( + source_values, source_coordinates, target_coordinates, support, radii2, + dim, degree, RBF_GAUSSIAN{}); break; case RadialBasisFunction::RBF_C4: - interpolated_values = - mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, radii2, RBF_C4{}); + interpolated_values = detail::mls_interpolation( + source_values, source_coordinates, target_coordinates, support, radii2, + dim, degree, RBF_C4{}); break; case RadialBasisFunction::RBF_CONST: - interpolated_values = - mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, radii2, RBF_CONST{}); + interpolated_values = detail::mls_interpolation( + source_values, source_coordinates, target_coordinates, support, radii2, + dim, degree, RBF_CONST{}); break; } return interpolated_values; } + +namespace detail +{ + +void calculate_basis_slice_lengths(IntHostMatView& array) +{ + int degree = array.extent(0); + int dim = array.extent(1); + + for (int j = 0; j < dim; ++j) { + array(0, j) = 1; + } + + for (int i = 0; i < degree; ++i) { + array(i, 0) = 1; + } + + for (int i = 1; i < degree; ++i) { + for (int j = 1; j < dim; ++j) { + array(i, j) = array(i, j - 1) + array(i - 1, j); + } + } +} + +int calculate_basis_vector_size(const IntHostMatView& array) +{ + int sum = 1; + int degree = array.extent(0); + int dim = array.extent(1); + + for (int i = 0; i < degree; ++i) { + for (int j = 0; j < dim; ++j) { + sum += array(i, j); + } + } + + return sum; +} + +int calculate_scratch_shared_size(const SupportResults& support, + const int nvertices_target, int basis_size) +{ + + IntDeviceVecView shmem_each_team("stores the size required for each team", + nvertices_target); + Kokkos::parallel_for( + "calculate the size required for scratch for each team", nvertices_target, + KOKKOS_LAMBDA(const int i) { + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + int nsupports = end_ptr - start_ptr; + + size_t total_shared_size = 0; + + total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size); + total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports); + total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); + total_shared_size += ScratchVecView::shmem_size(basis_size); + total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; + total_shared_size += ScratchMatView::shmem_size(nsupports, 2); + total_shared_size += ScratchMatView::shmem_size(nsupports, 1); + shmem_each_team(i) = total_shared_size; + }); + + // namespace KE = Kokkos::Experimental; + // auto shared_size = KE::max_element(Kokkos::DefaultExecutionSpace(), + // shmem_each_team); printf("shared size = %d", shared_size); + int shared_size = 0; + Kokkos::parallel_reduce( + "find_max", nvertices_target, + KOKKOS_LAMBDA(const int i, int& max_val_temp) { + if (shmem_each_team(i) > max_val_temp) { + max_val_temp = shmem_each_team(i); + } + }, + Kokkos::Max(shared_size)); + + return shared_size; +} +} // namespace detail +} // namespace pcms diff --git a/src/pcms/interpolator/MLS_rbf_options.hpp b/src/pcms/interpolator/mls_interpolation.hpp similarity index 88% rename from src/pcms/interpolator/MLS_rbf_options.hpp rename to src/pcms/interpolator/mls_interpolation.hpp index ae04fe94..56abaffa 100644 --- a/src/pcms/interpolator/MLS_rbf_options.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -1,10 +1,12 @@ #ifndef MLS_RBF_OPTIONS_HPP #define MLS_RBF_OPTIONS_HPP -#include "adj_search.hpp" +#include "mls_interpolation_impl.hpp" using namespace Omega_h; +namespace pcms +{ enum class RadialBasisFunction : LO { RBF_GAUSSIAN = 0, @@ -19,5 +21,5 @@ Write mls_interpolation(const Reals source_values, const SupportResults& support, const LO& dim, const LO& degree, Write radii2, RadialBasisFunction bf); - +} // namespace pcms #endif diff --git a/src/pcms/interpolator/mls_interpolation_impl.cpp b/src/pcms/interpolator/mls_interpolation_impl.cpp deleted file mode 100644 index 37083b01..00000000 --- a/src/pcms/interpolator/mls_interpolation_impl.cpp +++ /dev/null @@ -1,84 +0,0 @@ -#include "mls_interpolation_impl.hpp" - -namespace pcms -{ - -namespace detail -{ -void calculate_basis_slice_lengths(IntHostMatView& array) -{ - int degree = array.extent(0); - int dim = array.extent(1); - - for (int j = 0; j < dim; ++j) { - array(0, j) = 1; - } - - for (int i = 0; i < degree; ++i) { - array(i, 0) = 1; - } - - for (int i = 1; i < degree; ++i) { - for (int j = 1; j < dim; ++j) { - array(i, j) = array(i, j - 1) + array(i - 1, j); - } - } -} - -int calculate_basis_vector_size(const IntHostMatView& array) -{ - int sum = 1; - int degree = array.extent(0); - int dim = array.extent(1); - - for (int i = 0; i < degree; ++i) { - for (int j = 0; j < dim; ++j) { - sum += array(i, j); - } - } - - return sum; -} - -int calculate_scratch_shared_size() -{ - - IntVecView shmem_each_team("stores the size required for each team", - nvertices_target); - Kokkos::parallel_for( - "calculate the size required for scratch for each team", nvertices_target, - KOKKOS_LAMBDA(const int i) { - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; - int nsupports = end_ptr - start_ptr; - - size_t total_shared_size = 0; - - total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size); - total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports); - total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); - total_shared_size += ScratchVecView::shmem_size(basis_size); - total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; - total_shared_size += ScratchMatView::shmem_size(nsupports, 2); - total_shared_size += ScratchMatView::shmem_size(nsupports, 1); - shmem_each_team(i) = total_shared_size; - }); - - // namespace KE = Kokkos::Experimental; - // auto shared_size = KE::max_element(Kokkos::DefaultExecutionSpace(), - // shmem_each_team); printf("shared size = %d", shared_size); - int shared_size = 0; - Kokkos::parallel_reduce( - "find_max", nvertices_target, - KOKKOS_LAMBDA(const int i, int& max_val_temp) { - if (shmem_each_team(i) > max_val_temp) { - max_val_temp = shmem_each_team(i); - } - }, - Kokkos::Max(shared_size)); - - return shared_size; -} - -} // namespace detail -} // namespace pcms From d00e04621c8e34897aeee8d562ef7cd4623a8e92 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 31 Jan 2025 15:42:43 -0500 Subject: [PATCH 54/66] move implementation details --- .../interpolator/mls_interpolation_impl.hpp | 254 +++++++++++++++++- 1 file changed, 242 insertions(+), 12 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index f13a9b33..32af89ef 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -83,7 +83,8 @@ int calculate_basis_vector_size(const IntHostMatView& array); * * @return The scratch size */ -int calculate_scratch_shared_size(); +int calculate_scratch_shared_size(const SupportResults& support, + const int nvertices_target, int basis_size); /** * @brief Evaluates the polynomial basis @@ -177,10 +178,9 @@ void create_vandermonde_matrix(const ScratchMatView& local_source_points, int j, template , bool> = true> -KOKKOS_INLINE_FUNCTION void PhiVector(const Coord& target_point, - const ScratchMatView& local_source_points, - int j, double cuttoff_dis_sq, - Func rbf_func, ScratchVecView phi) +KOKKOS_INLINE_FUNCTION void compute_phi_vector( + const Coord& target_point, const ScratchMatView& local_source_points, int j, + double cuttoff_dis_sq, Func rbf_func, ScratchVecView phi) { int N = local_source_points.extent(0); double dx = target_point.x - local_source_points(j, 0); @@ -188,7 +188,7 @@ KOKKOS_INLINE_FUNCTION void PhiVector(const Coord& target_point, double ds_sq = dx * dx + dy * dy; phi(j) = rbf_func(ds_sq, cuttoff_dis_sq); OMEGA_H_CHECK_PRINTF(!std::isnan(phi(j)), - "ERROR: Phi(j) in PhiVector is NaN for j = %d " + "ERROR: Phi(j) in compute_phi_vector is NaN for j = %d " "ds_sq=%.16f, cuttoff_dis_sq=%.16f", j, ds_sq, cuttoff_dis_sq); } @@ -402,7 +402,7 @@ void fill(double value, member_type team, ScratchVecView vector) } /** - * @brief maps the data from source mesh to target mesh + * @brief Maps the data from source mesh to target mesh * @param source_values Source field values from source mesh * @param source_coordinates The coordinates of control points of source field * @param target_coordinates The coordinates of control points of target field @@ -446,7 +446,8 @@ Write mls_interpolation(const Reals source_values, Write approx_target_values(nvertices_target, 0, "approximated target values"); - int shared_size = calculate_scratch_shared_size(); + int shared_size = + calculate_scratch_shared_size(support, nvertices_target, basis_size); team_policy tp(nvertices_target, Kokkos::AUTO); @@ -517,7 +518,184 @@ Write mls_interpolation(const Reals source_values, team.team_barrier(); - /** PhiVector(nsupports) is the array of rbf functions evaluated at the + /** compute_phi_vector(nsupports) is the array of rbf functions evaluated + * at the source supports In the actual implementation, Phi(nsupports, + * nsupports) is the diagonal matrix & each diagonal element is the phi + * evaluated at each source points + */ + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + OMEGA_H_CHECK_PRINTF( + radii2[i] > 0, + "ERROR: radius2 has to be positive but found to be %.16f\n", + radii2[i]); + compute_phi_vector(target_point, local_source_points, j, radii2[i], + rbf_func, phi_vector); + }); + + team.team_barrier(); + + /** support_values(nsupports) (or known rhs vector b) is the vector of the + * quantity that we want interpolate + */ + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { + support_values(i) = + source_values[support.supports_idx[start_ptr + i]]; + OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(i)), + "ERROR: NaN found: at support %d\n", i); + }); + + team.team_barrier(); + + auto result = convert_normal_equation(vandermonde_matrix, phi_vector, + support_values, team); + + team.team_barrier(); + + // It stores the solution in rhs vector itself + solve_matrix(result.square_matrix, result.transformed_rhs, team); + + team.team_barrier(); + + double target_value = KokkosBlas::Experimental::dot( + team, result.transformed_rhs, target_basis_vector); + + if (team.team_rank() == 0) { + OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", i); + approx_target_values[i] = target_value; + } + }); + + return approx_target_values; +} + +/** + * @brief \overload + * Maps the data from source mesh to target mesh + * + * @param source_values Scalar array view of source field values + * @param source_coordinates Scalar array view of the coordinates of control + * points of source field + * @param target_coordinates Scalar array view of the coordinates of control + * points of target field + * @param support The object that enpasulates support info + * @param dim The dimension of the simulations + * @param degree The degree of the interpolation order + * @param radii2 Scalar array view of the square of cutoff distance + * @param rbf_func The radial basis function choice + * @param[in/out] Scalar array view of the interpolated field in target mesh + * + * + */ +template +void mls_interpolation(RealConstDefaultScalarArrayView source_values, + RealConstDefaultScalarArrayView source_coordinates, + RealConstDefaultScalarArrayView target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, RealDefaultScalarArrayView radii2, + Func rbf_func, + RealDefaultScalarArrayView approx_target_values) +{ + PCMS_FUNCTION_TIMER; + static_assert(std::is_invocable_r_v, + "rbf_func, takes radius and cutoff, returns weight"); + static_assert(!std::is_pointer_v, + "function pointer will fail in GPU execution context"); + + const auto nvertices_source = source_coordinates.size() / dim; + const auto nvertices_target = target_coordinates.size() / dim; + + IntHostMatView host_slice_length( + "stores slice length of polynomial basis in host", degree, dim); + + Kokkos::deep_copy(host_slice_length, 0); + + calculate_basis_slice_lengths(host_slice_length); + + auto basis_size = calculate_basis_vector_size(host_slice_length); + + IntDeviceMatView slice_length( + "stores slice length of polynomial basis in device", degree, dim); + + auto slice_length_hd = Kokkos::create_mirror_view(slice_length); + Kokkos::deep_copy(slice_length_hd, host_slice_length); + Kokkos::deep_copy(slice_length, slice_length_hd); + + int shared_size = + calculate_scratch_shared_size(support, nvertices_target, basis_size); + + team_policy tp(nvertices_target, Kokkos::AUTO); + + int scratch_size = tp.scratch_size_max(0); + + PCMS_ALWAYS_ASSERT(scratch_size > shared_size); + + // calculates the interpolated values + Kokkos::parallel_for( + "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + int nsupports = end_ptr - start_ptr; + + ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + if (dim == 3) { + local_source_points(count, 2) = source_coordinates[index * dim + 2]; + } + } + + // vondermonde matrix P from the vectors of basis vector of supports + ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + basis_size); + + // rbf function values of source supports Phi(n,n) + ScratchVecView phi_vector(team.team_scratch(0), nsupports); + + // stores known vector (b) + ScratchVecView support_values(team.team_scratch(0), nsupports); + + // basis of target + ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); + + // Initialize the scratch matrices and vectors + fill(0.0, team, vandermonde_matrix); + fill(0.0, team, phi_vector); + fill(0.0, team, support_values); + fill(0.0, team, target_basis_vector); + + // evaluates the basis vector of a given target point + Coord target_point; + target_point.x = target_coordinates[i * dim]; + target_point.y = target_coordinates[i * dim + 1]; + + if (dim == 3) { + target_point.z = target_coordinates[i * dim + 2]; + } + eval_basis_vector(slice_length, target_point, target_basis_vector); + + /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is + * created with the basis vector of source supports stacking on top of + * each other + */ + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + create_vandermonde_matrix(local_source_points, j, slice_length, + vandermonde_matrix); + }); + + team.team_barrier(); + + /** phi(nsupports) is the array of rbf functions evaluated at the * source supports In the actual implementation, Phi(nsupports, nsupports) * is the diagonal matrix & each diagonal element is the phi evaluated at * each source points @@ -529,8 +707,8 @@ Write mls_interpolation(const Reals source_values, radii2[i] > 0, "ERROR: radius2 has to be positive but found to be %.16f\n", radii2[i]); - PhiVector(target_point, local_source_points, j, radii2[i], rbf_func, - phi_vector); + compute_phi_vector(target_point, local_source_points, j, radii2[i], + rbf_func, phi_vector); }); team.team_barrier(); @@ -566,8 +744,60 @@ Write mls_interpolation(const Reals source_values, approx_target_values[i] = target_value; } }); +} - return approx_target_values; +/** + * @brief \overload + * Maps the data from source mesh to target mesh + * + * @param source_values Read array of Source field values + * @param source_coordinates Read array of the coordinates of control points of + * source field + * @param target_coordinates Read array of the coordinates of control points of + * target field + * @param support The object that enpasulates support info + * @param dim The dimension of the simulations + * @param degree The degree of the interpolation order + * @param radii2 Write array of the square of cutoff distance + * @param rbf_func The radial basis function choice + * @return Write array of the interpolated field in target mesh + * + * + */ +template +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, Write radii2, + const LO& dim, const LO& degree, Func rbf_func) +{ + const auto nvertices_source = source_coordinates.size() / dim; + + const auto nvertices_target = target_coordinates.size() / dim; + + RealConstDefaultScalarArrayView source_values_array_view( + source_values.data(), source_values.size()); + + RealConstDefaultScalarArrayView source_coordinates_array_view( + source_coordinates.data(), source_coordinates.size()); + + RealConstDefaultScalarArrayView target_coordinates_array_view( + target_coordinates.data(), target_coordinates.size()); + + RealDefaultScalarArrayView radii2_array_view(radii2.data(), radii2.size()); + + Write interpolated_values(nvertices_target, 0, + "approximated target values"); + + RealDefaultScalarArrayView interpolated_values_array_view( + interpolated_values.data(), interpolated_values.size()); + + mls_interpolation(source_values_array_view, source_coordinates_array_view, + target_coordinates_array_view, support, dim, degree, + radii2_array_view, rbf_func, + interpolated_values_array_view); + + return interpolated_values; } } // namespace detail From 3828ec29212313ca640d4fd1dab717be6eb9ceb4 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 31 Jan 2025 15:43:32 -0500 Subject: [PATCH 55/66] introduce new aliases --- .../interpolator/pcms_interpolator_aliases.hpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/src/pcms/interpolator/pcms_interpolator_aliases.hpp b/src/pcms/interpolator/pcms_interpolator_aliases.hpp index 383393a7..06d83640 100644 --- a/src/pcms/interpolator/pcms_interpolator_aliases.hpp +++ b/src/pcms/interpolator/pcms_interpolator_aliases.hpp @@ -2,6 +2,10 @@ #define PCMS_INTERPOLATOR_ALIASES_HPP #include +#include "../arrays.h" + +namespace pcms +{ struct Coord { @@ -23,8 +27,21 @@ using ScratchVecView = typename Kokkos::View>; +using PointsViewType = Kokkos::View; + +struct Points +{ + PointsViewType coordinates; +}; + using IntDeviceMatView = Kokkos::View; using IntDeviceVecView = Kokkos::View; using IntHostMatView = Kokkos::View; +using RealDefaultScalarArrayView = + ScalarArrayView; +using RealConstDefaultScalarArrayView = + ScalarArrayView; + +} // namespace pcms #endif From 435b1e10f62e023c1881bf0a12e0c394526b8e32 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Fri, 31 Jan 2025 15:45:50 -0500 Subject: [PATCH 56/66] update mls interpolation test cases --- test/test_linear_solver.cpp | 12 ++++++++---- test/test_mls_basis.cpp | 31 +++++++++++++++++-------------- test/test_rbf_interp.cpp | 5 +++-- 3 files changed, 28 insertions(+), 20 deletions(-) diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp index e1ee5158..2f8664ec 100644 --- a/test/test_linear_solver.cpp +++ b/test/test_linear_solver.cpp @@ -1,11 +1,15 @@ #include #include -#include +#include +#include #include #include #include #include +using namespace pcms; +using namespace pcms::detail; + TEST_CASE("solver test") { // This test computes P^TQP and P^TQ b for the normal equation P^TQP x = P^TQ @@ -99,8 +103,8 @@ TEST_CASE("solver test") team.team_barrier(); - auto result = - ConvertNormalEq(vandermonde_matrix, phi, support_values, team); + auto result = convert_normal_equation(vandermonde_matrix, phi, + support_values, team); team.team_barrier(); @@ -122,7 +126,7 @@ TEST_CASE("solver test") team.team_barrier(); - SolveMatrix(result.square_matrix, result.transformed_rhs, team); + solve_matrix(result.square_matrix, result.transformed_rhs, team); team.team_barrier(); diff --git a/test/test_mls_basis.cpp b/test/test_mls_basis.cpp index 33bb9301..5664508d 100644 --- a/test/test_mls_basis.cpp +++ b/test/test_mls_basis.cpp @@ -1,9 +1,12 @@ #include #include -#include +#include +#include #include #include +using namespace pcms; + TEST_CASE("basis test") { SECTION("check basis slice lengths, size of the basis vector and basis " @@ -14,8 +17,8 @@ TEST_CASE("basis test") const int degree = 3; Kokkos::View array("array", degree, dim); Kokkos::deep_copy(array, 0); - basisSliceLengths(array); - auto size = basisSize(array); + detail::calculate_basis_slice_lengths(array); + auto size = detail::calculate_basis_vector_size(array); int expected[degree][dim] = {{1, 1, 1}, {1, 2, 3}, {1, 3, 6}}; @@ -27,7 +30,7 @@ TEST_CASE("basis test") REQUIRE(size == 20); - MatViewType d_array("array in device", degree, dim); + IntDeviceMatView d_array("array in device", degree, dim); auto array_hd = Kokkos::create_mirror_view(d_array); Kokkos::deep_copy(array_hd, array); Kokkos::deep_copy(d_array, array_hd); @@ -52,7 +55,7 @@ TEST_CASE("basis test") target_point.y = coords[i][1]; target_point.z = coords[i][2]; - BasisPoly(basis_vector, d_array, target_point); + detail::eval_basis_vector(d_array, target_point, basis_vector); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { results(i, j) = basis_vector(j); }); @@ -106,9 +109,9 @@ TEST_CASE("basis test") const int degree = 1; Kokkos::View array("array", degree, dim); Kokkos::deep_copy(array, 0); - basisSliceLengths(array); + detail::calculate_basis_slice_lengths(array); - auto size = basisSize(array); + auto size = detail::calculate_basis_vector_size(array); int expected[degree][dim] = { {1, 1, 1}, @@ -122,7 +125,7 @@ TEST_CASE("basis test") REQUIRE(size == 4); - MatViewType d_array("array in device", degree, dim); + IntDeviceMatView d_array("array in device", degree, dim); auto array_hd = Kokkos::create_mirror_view(d_array); Kokkos::deep_copy(array_hd, array); Kokkos::deep_copy(d_array, array_hd); @@ -147,7 +150,7 @@ TEST_CASE("basis test") target_point.y = coords[i][1]; target_point.z = coords[i][2]; - BasisPoly(basis_vector, d_array, target_point); + detail::eval_basis_vector(d_array, target_point, basis_vector); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { results(i, j) = basis_vector(j); }); @@ -183,13 +186,13 @@ TEST_CASE("basis test") Kokkos::View host_slice_length("array", degree, dim); Kokkos::deep_copy(host_slice_length, 0); - basisSliceLengths(host_slice_length); + detail::calculate_basis_slice_lengths(host_slice_length); - MatViewType slice_length("slice array in device", degree, dim); + IntDeviceMatView slice_length("slice array in device", degree, dim); auto slice_length_hd = Kokkos::create_mirror_view(slice_length); Kokkos::deep_copy(slice_length_hd, host_slice_length); Kokkos::deep_copy(slice_length, slice_length_hd); - auto size = basisSize(host_slice_length); + auto size = detail::calculate_basis_vector_size(host_slice_length); int nvertices_target = 2; int nsupports = 5; Kokkos::View points("vandermonde matrix", nvertices_target, @@ -240,8 +243,8 @@ TEST_CASE("basis test") Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - CreateVandermondeMatrix(vandermonde_matrix, local_source_points, j, - slice_length); + detail::create_vandermonde_matrix(local_source_points, j, + slice_length, vandermonde_matrix); }); Kokkos::parallel_for( diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index 51661285..b23f050b 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -1,18 +1,19 @@ #include #include #include -#include +#include +#include #include #include #include #include #include -#include #include #include using namespace std; using namespace Omega_h; +using namespace pcms; KOKKOS_INLINE_FUNCTION double func(Coord& p, int degree) From 73bbd2ab6f74f621efa469d849ac1b3f91f8c238 Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Tue, 11 Feb 2025 09:19:22 -0500 Subject: [PATCH 57/66] make SupportResults struct readonly (#156) --- src/pcms/interpolator/adj_search.hpp | 53 +++++++++++++++------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index 98bb5215..f0ad4fea 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -355,8 +355,8 @@ inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, struct SupportResults { - Write supports_ptr; - Write supports_idx; + LOs supports_ptr; + LOs supports_idx; Write radii2; // squared radii of the supports }; @@ -365,7 +365,6 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, LO min_req_support = 12, bool adapt_radius = true) { - SupportResults support; FindSupports search(source_mesh, target_mesh); LO nvertices_source = source_mesh.nverts(); LO nvertices_target = target_mesh.nverts(); @@ -376,10 +375,13 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, Write radii2 = Write(nvertices_target, cutoffDistance, "squared radii of the supports"); + Write supports_ptr; + Write supports_idx; + if (!adapt_radius) { printf("INFO: Fixed radius search *(disregarding required minimum " "support)*... \n"); - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, true); } else { printf("INFO: Adaptive radius search... \n"); @@ -397,10 +399,12 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, Kokkos::Max(max_radius)); printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); - SupportResults support; // create support every time to avoid complexity + // create storage every time to avoid complexity + Write supports_ptr; + Write supports_idx; Kokkos::fence(); - search.adjBasedSearch(support.supports_ptr, nSupports, - support.supports_idx, radii2, true); + search.adjBasedSearch(supports_ptr, nSupports, + supports_idx, radii2, true); Kokkos::fence(); LO min_supports_found = 0; @@ -435,7 +439,7 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); } - support.supports_ptr = Write( + supports_ptr = Write( nvertices_target + 1, 0, "number of support source vertices in CSR format"); LO total_supports = 0; @@ -445,29 +449,29 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, OMEGA_H_LAMBDA(int j, int& update, bool final) { update += nSupports[j]; if (final) { - support.supports_ptr[j + 1] = update; + supports_ptr[j + 1] = update; } }, total_supports); Kokkos::fence(); - support.supports_idx = Write( + supports_idx = Write( total_supports, 0, "index of source supports of each target node"); - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, false); - support.radii2 = radii2; - target_mesh.add_tag(VERT, "radii2", 1, support.radii2); - return support; + target_mesh.add_tag(VERT, "radii2", 1, radii2); + return SupportResults{read(supports_ptr), read(supports_idx), radii2}; } inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, LO min_support = 12, bool adapt_radius = true) { - SupportResults support; + Write supports_ptr; + Write supports_idx; FindSupports search(mesh); LO nvertices_target = mesh.nverts(); Write nSupports(nvertices_target, 0, @@ -481,7 +485,7 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, if (!adapt_radius) { printf("INFO: Fixed radius search *(disregarding required minimum " "support)* ... \n"); - search.adjBasedSearch(support.supports_ptr, nSupports, support.supports_idx, + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, true); } else { printf("INFO: Adaptive radius search... \n"); @@ -499,8 +503,8 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, nSupports = Write(nvertices_target, 0, "number of supports in each target vertex"); SupportResults support; // create support every time to avoid complexity - search.adjBasedSearchCentroidNodes(support.supports_ptr, nSupports, - support.supports_idx, radii2, true); + search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, + supports_idx, radii2, true); Kokkos::fence(); LO min_nSupports = 0; @@ -534,7 +538,7 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, } // adaptive radius search // offset array for the supports of each target vertex - support.supports_ptr = Write( + supports_ptr = Write( nvertices_target + 1, 0, "number of support source vertices in CSR format"); LO total_supports = 0; @@ -543,7 +547,7 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, OMEGA_H_LAMBDA(int j, int& update, bool final) { update += nSupports[j]; if (final) { - support.supports_ptr[j + 1] = update; + supports_ptr[j + 1] = update; } }, total_supports); @@ -551,16 +555,15 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, printf("INFO: Inside searchNeighbors 3\n"); Kokkos::fence(); - support.supports_idx = Write( + supports_idx = Write( total_supports, 0, "index of source supports of each target node"); printf("INFO: Total_supports: %d\n", total_supports); - search.adjBasedSearchCentroidNodes(support.supports_ptr, nSupports, - support.supports_idx, radii2, false); + search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, + supports_idx, radii2, false); - support.radii2 = radii2; mesh.add_tag(VERT, "support_radius", 1, radii2); - return support; + return SupportResults{read(supports_ptr), read(supports_idx), radii2}; } #endif From b6e01f297a4c784de24d1205b75dadac851e18ac Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Tue, 11 Feb 2025 09:42:40 -0500 Subject: [PATCH 58/66] remove ] file --- ] | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 ] diff --git a/] b/] deleted file mode 100644 index e69497b2..00000000 --- a/] +++ /dev/null @@ -1,14 +0,0 @@ -added comments and kokkoskernel dot product function -# Please enter the commit message for your changes. Lines starting -# with '#' will be ignored, and an empty message aborts the commit. -# -# On branch general_mls -# Changes to be committed: -# modified: MLSCoefficients.hpp -# -# Changes not staged for commit: -# modified: MLSInterpolation.hpp -# modified: adj_search.hpp -# modified: queue_visited.hpp -# modified: ../../../test/test_linear_solver.cpp -# From 528167587168ea3fb4caa52a03f4906fb19b829d Mon Sep 17 00:00:00 2001 From: Cameron Smith Date: Tue, 11 Feb 2025 16:25:12 -0500 Subject: [PATCH 59/66] use league_rank as loop control var there were nested loops reusing i and given the length of the loop body, this helps readability --- .../interpolator/mls_interpolation_impl.hpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 32af89ef..a6198d3d 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -636,9 +636,9 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, Kokkos::parallel_for( "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), KOKKOS_LAMBDA(const member_type& team) { - int i = team.league_rank(); - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; + int league_rank = team.league_rank(); + int start_ptr = support.supports_ptr[league_rank]; + int end_ptr = support.supports_ptr[league_rank + 1]; int nsupports = end_ptr - start_ptr; ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); @@ -674,11 +674,11 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, // evaluates the basis vector of a given target point Coord target_point; - target_point.x = target_coordinates[i * dim]; - target_point.y = target_coordinates[i * dim + 1]; + target_point.x = target_coordinates[league_rank * dim]; + target_point.y = target_coordinates[league_rank * dim + 1]; if (dim == 3) { - target_point.z = target_coordinates[i * dim + 2]; + target_point.z = target_coordinates[league_rank * dim + 2]; } eval_basis_vector(slice_length, target_point, target_basis_vector); @@ -704,10 +704,10 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { OMEGA_H_CHECK_PRINTF( - radii2[i] > 0, + radii2[j] > 0, "ERROR: radius2 has to be positive but found to be %.16f\n", - radii2[i]); - compute_phi_vector(target_point, local_source_points, j, radii2[i], + radii2[j]); + compute_phi_vector(target_point, local_source_points, j, radii2[league_rank], rbf_func, phi_vector); }); @@ -717,11 +717,11 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, * quantity that we want interpolate */ Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { - support_values(i) = - source_values[support.supports_idx[start_ptr + i]]; - OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(i)), - "ERROR: NaN found: at support %d\n", i); + Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { + support_values(j) = + source_values[support.supports_idx[start_ptr + j]]; + OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)), + "ERROR: NaN found: at support %d\n", j); }); team.team_barrier(); @@ -740,8 +740,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team, result.transformed_rhs, target_basis_vector); if (team.team_rank() == 0) { - OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", i); - approx_target_values[i] = target_value; + OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", league_rank); + approx_target_values[league_rank] = target_value; } }); } From 89812a4fe5d0f520f71df78ab27c5791ab1359be Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 13 Feb 2025 01:20:43 -0500 Subject: [PATCH 60/66] change i to league_rank --- .../interpolator/mls_interpolation_impl.hpp | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index a6198d3d..8f854489 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -459,9 +459,9 @@ Write mls_interpolation(const Reals source_values, Kokkos::parallel_for( "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), KOKKOS_LAMBDA(const member_type& team) { - int i = team.league_rank(); - int start_ptr = support.supports_ptr[i]; - int end_ptr = support.supports_ptr[i + 1]; + int league_rank = team.league_rank(); + int start_ptr = support.supports_ptr[league_rank]; + int end_ptr = support.supports_ptr[league_rank + 1]; int nsupports = end_ptr - start_ptr; ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); @@ -497,11 +497,11 @@ Write mls_interpolation(const Reals source_values, // evaluates the basis vector of a given target point Coord target_point; - target_point.x = target_coordinates[i * dim]; - target_point.y = target_coordinates[i * dim + 1]; + target_point.x = target_coordinates[league_rank * dim]; + target_point.y = target_coordinates[league_rank * dim + 1]; if (dim == 3) { - target_point.z = target_coordinates[i * dim + 2]; + target_point.z = target_coordinates[league_rank * dim + 2]; } eval_basis_vector(slice_length, target_point, target_basis_vector); @@ -524,14 +524,14 @@ Write mls_interpolation(const Reals source_values, * evaluated at each source points */ + OMEGA_H_CHECK_PRINTF( + radii2[league_rank] > 0, + "ERROR: radius2 has to be positive but found to be %.16f\n", + radii2[league_rank]); Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - OMEGA_H_CHECK_PRINTF( - radii2[i] > 0, - "ERROR: radius2 has to be positive but found to be %.16f\n", - radii2[i]); - compute_phi_vector(target_point, local_source_points, j, radii2[i], - rbf_func, phi_vector); + compute_phi_vector(target_point, local_source_points, j, + radii2[league_rank], rbf_func, phi_vector); }); team.team_barrier(); @@ -540,11 +540,11 @@ Write mls_interpolation(const Reals source_values, * quantity that we want interpolate */ Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int i) { - support_values(i) = - source_values[support.supports_idx[start_ptr + i]]; - OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(i)), - "ERROR: NaN found: at support %d\n", i); + Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { + support_values(j) = + source_values[support.supports_idx[start_ptr + j]]; + OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)), + "ERROR: NaN found: at support %d\n", j); }); team.team_barrier(); @@ -563,8 +563,9 @@ Write mls_interpolation(const Reals source_values, team, result.transformed_rhs, target_basis_vector); if (team.team_rank() == 0) { - OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", i); - approx_target_values[i] = target_value; + OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", + league_rank); + approx_target_values[league_rank] = target_value; } }); @@ -704,11 +705,11 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { OMEGA_H_CHECK_PRINTF( - radii2[j] > 0, + radii2[league_rank] > 0, "ERROR: radius2 has to be positive but found to be %.16f\n", - radii2[j]); - compute_phi_vector(target_point, local_source_points, j, radii2[league_rank], - rbf_func, phi_vector); + radii2[league_rank]); + compute_phi_vector(target_point, local_source_points, j, + radii2[league_rank], rbf_func, phi_vector); }); team.team_barrier(); @@ -740,7 +741,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team, result.transformed_rhs, target_basis_vector); if (team.team_rank() == 0) { - OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", league_rank); + OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", + league_rank); approx_target_values[league_rank] = target_value; } }); From 651682dc609514cb1bb2c7de343cee815eee4eaa Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 13 Feb 2025 01:30:35 -0500 Subject: [PATCH 61/66] remove radii2 --- src/pcms/interpolator/adj_search.hpp | 38 +++++++++++++--------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index f0ad4fea..a9a6a290 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -357,7 +357,6 @@ struct SupportResults { LOs supports_ptr; LOs supports_idx; - Write radii2; // squared radii of the supports }; inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, @@ -381,8 +380,7 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, if (!adapt_radius) { printf("INFO: Fixed radius search *(disregarding required minimum " "support)*... \n"); - search.adjBasedSearch(supports_ptr, nSupports, supports_idx, - radii2, true); + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, true); } else { printf("INFO: Adaptive radius search... \n"); int r_adjust_loop = 0; @@ -403,8 +401,8 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, Write supports_ptr; Write supports_idx; Kokkos::fence(); - search.adjBasedSearch(supports_ptr, nSupports, - supports_idx, radii2, true); + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, + true); Kokkos::fence(); LO min_supports_found = 0; @@ -439,8 +437,8 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); } - supports_ptr = Write( - nvertices_target + 1, 0, "number of support source vertices in CSR format"); + supports_ptr = Write(nvertices_target + 1, 0, + "number of support source vertices in CSR format"); LO total_supports = 0; @@ -456,11 +454,10 @@ inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, Kokkos::fence(); - supports_idx = Write( - total_supports, 0, "index of source supports of each target node"); + supports_idx = Write(total_supports, 0, + "index of source supports of each target node"); - search.adjBasedSearch(supports_ptr, nSupports, supports_idx, - radii2, false); + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, false); target_mesh.add_tag(VERT, "radii2", 1, radii2); return SupportResults{read(supports_ptr), read(supports_idx), radii2}; @@ -485,8 +482,7 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, if (!adapt_radius) { printf("INFO: Fixed radius search *(disregarding required minimum " "support)* ... \n"); - search.adjBasedSearch(supports_ptr, nSupports, supports_idx, - radii2, true); + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, true); } else { printf("INFO: Adaptive radius search... \n"); int r_adjust_loop = 0; @@ -503,8 +499,8 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, nSupports = Write(nvertices_target, 0, "number of supports in each target vertex"); SupportResults support; // create support every time to avoid complexity - search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, - supports_idx, radii2, true); + search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, supports_idx, + radii2, true); Kokkos::fence(); LO min_nSupports = 0; @@ -538,8 +534,8 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, } // adaptive radius search // offset array for the supports of each target vertex - supports_ptr = Write( - nvertices_target + 1, 0, "number of support source vertices in CSR format"); + supports_ptr = Write(nvertices_target + 1, 0, + "number of support source vertices in CSR format"); LO total_supports = 0; Kokkos::parallel_scan( @@ -555,12 +551,12 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, printf("INFO: Inside searchNeighbors 3\n"); Kokkos::fence(); - supports_idx = Write( - total_supports, 0, "index of source supports of each target node"); + supports_idx = Write(total_supports, 0, + "index of source supports of each target node"); printf("INFO: Total_supports: %d\n", total_supports); - search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, - supports_idx, radii2, false); + search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, supports_idx, + radii2, false); mesh.add_tag(VERT, "support_radius", 1, radii2); return SupportResults{read(supports_ptr), read(supports_idx), radii2}; From 2693551e64fa091462c336c380d78b55bea8b2f9 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 13 Feb 2025 01:41:54 -0500 Subject: [PATCH 62/66] remove radii2 from struct --- src/pcms/interpolator/adj_search.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index a9a6a290..37f8cb1b 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -559,7 +559,7 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, radii2, false); mesh.add_tag(VERT, "support_radius", 1, radii2); - return SupportResults{read(supports_ptr), read(supports_idx), radii2}; + return SupportResults{read(supports_ptr), read(supports_idx)}; } #endif From 48140e7bad6a2a9b486d5005907d1baef1caaaf8 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 13 Feb 2025 03:02:12 -0500 Subject: [PATCH 63/66] add radii2 --- src/pcms/interpolator/adj_search.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp index 37f8cb1b..a5a20b78 100644 --- a/src/pcms/interpolator/adj_search.hpp +++ b/src/pcms/interpolator/adj_search.hpp @@ -357,6 +357,7 @@ struct SupportResults { LOs supports_ptr; LOs supports_idx; + Write radii2; }; inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, @@ -559,7 +560,7 @@ inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, radii2, false); mesh.add_tag(VERT, "support_radius", 1, radii2); - return SupportResults{read(supports_ptr), read(supports_idx)}; + return SupportResults{read(supports_ptr), read(supports_idx), radii2}; } #endif From 529832fad5fb68d3cfaac6f16cf4ee1e1a000e45 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 13 Feb 2025 03:04:01 -0500 Subject: [PATCH 64/66] remove radii2 --- src/pcms/interpolator/mls_interpolation.cpp | 15 ++-- src/pcms/interpolator/mls_interpolation.hpp | 3 +- .../interpolator/mls_interpolation_impl.hpp | 70 ++++++++----------- 3 files changed, 39 insertions(+), 49 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp index 3f34cee8..113986a1 100644 --- a/src/pcms/interpolator/mls_interpolation.cpp +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -105,8 +105,7 @@ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, Write radii2, - RadialBasisFunction bf) + const LO& degree, RadialBasisFunction bf) { const auto nvertices_target = target_coordinates.size() / dim; @@ -116,20 +115,20 @@ Write mls_interpolation(const Reals source_values, switch (bf) { case RadialBasisFunction::RBF_GAUSSIAN: interpolated_values = detail::mls_interpolation( - source_values, source_coordinates, target_coordinates, support, radii2, - dim, degree, RBF_GAUSSIAN{}); + source_values, source_coordinates, target_coordinates, support, dim, + degree, RBF_GAUSSIAN{}); break; case RadialBasisFunction::RBF_C4: interpolated_values = detail::mls_interpolation( - source_values, source_coordinates, target_coordinates, support, radii2, - dim, degree, RBF_C4{}); + source_values, source_coordinates, target_coordinates, support, dim, + degree, RBF_C4{}); break; case RadialBasisFunction::RBF_CONST: interpolated_values = detail::mls_interpolation( - source_values, source_coordinates, target_coordinates, support, radii2, - dim, degree, RBF_CONST{}); + source_values, source_coordinates, target_coordinates, support, dim, + degree, RBF_CONST{}); break; } diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp index 56abaffa..17b085aa 100644 --- a/src/pcms/interpolator/mls_interpolation.hpp +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -19,7 +19,6 @@ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, Write radii2, - RadialBasisFunction bf); + const LO& degree, RadialBasisFunction bf); } // namespace pcms #endif diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 8f854489..655726df 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -289,26 +289,23 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, int m = matrix.extent(0); int n = matrix.extent(1); - ResultConvertNormal result; + scaled_matrix = ScratchMatView(team.team_scratch(0), n, m); - result.scaled_matrix = ScratchMatView(team.team_scratch(0), n, m); + square_matrix = ScratchMatView(team.team_scratch(0), n, n); - result.square_matrix = ScratchMatView(team.team_scratch(0), n, n); - - result.transformed_rhs = ScratchVecView(team.team_scratch(0), n); + transformed_rhs = ScratchVecView(team.team_scratch(0), n); // performing P^T Q Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) { for (int k = 0; k < m; ++k) { - result.scaled_matrix(j, k) = 0; + scaled_matrix(j, k) = 0; } }); team.team_barrier(); Kokkos::parallel_for(Kokkos::TeamThreadRange(team, m), [=](int j) { - scale_column_trans_matrix(matrix, weight_vector, team, j, - result.scaled_matrix); + scale_column_trans_matrix(matrix, weight_vector, team, j, scaled_matrix); }); team.team_barrier(); @@ -317,19 +314,18 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, KokkosBatched::TeamGemm< member_type, KokkosBatched::Trans::NoTranspose, KokkosBatched::Trans::NoTranspose, - KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, - result.scaled_matrix, matrix, - 0.0, result.square_matrix); + KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, scaled_matrix, + matrix, 0.0, square_matrix); team.team_barrier(); KokkosBlas::Experimental:: Gemv::invoke( - team, 'N', 1.0, result.scaled_matrix, rhs, 0.0, result.transformed_rhs); + team, 'N', 1.0, scaled_matrix, rhs, 0.0, transformed_rhs); team.team_barrier(); - return result; + return ResultConvertNormal{scaled_matrix, square_matrix, transformed_rhs}; } /** @@ -409,7 +405,6 @@ void fill(double value, member_type team, ScratchVecView vector) * @param support The object that enpasulates support info * @param dim The dimension of the simulations * @param degree The degree of the interpolation order - * @param radii2 The array of the square of cutoff distance * @param rbf_func The radial basis function choice * @return interpolated field in target mesh * @@ -420,8 +415,7 @@ Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, Write radii2, - Func rbf_func) + const LO& degree, Func rbf_func) { PCMS_FUNCTION_TIMER; static_assert(std::is_invocable_r_v, @@ -525,13 +519,13 @@ Write mls_interpolation(const Reals source_values, */ OMEGA_H_CHECK_PRINTF( - radii2[league_rank] > 0, + support.radii2[league_rank] > 0, "ERROR: radius2 has to be positive but found to be %.16f\n", - radii2[league_rank]); + support.radii2[league_rank]); Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { compute_phi_vector(target_point, local_source_points, j, - radii2[league_rank], rbf_func, phi_vector); + support.radii2[league_rank], rbf_func, phi_vector); }); team.team_barrier(); @@ -555,12 +549,12 @@ Write mls_interpolation(const Reals source_values, team.team_barrier(); // It stores the solution in rhs vector itself - solve_matrix(result.square_matrix, result.transformed_rhs, team); + solve_matrix(square_matrix, transformed_rhs, team); team.team_barrier(); - double target_value = KokkosBlas::Experimental::dot( - team, result.transformed_rhs, target_basis_vector); + double target_value = KokkosBlas::Experimental::dot(team, transformed_rhs, + target_basis_vector); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", @@ -584,7 +578,6 @@ Write mls_interpolation(const Reals source_values, * @param support The object that enpasulates support info * @param dim The dimension of the simulations * @param degree The degree of the interpolation order - * @param radii2 Scalar array view of the square of cutoff distance * @param rbf_func The radial basis function choice * @param[in/out] Scalar array view of the interpolated field in target mesh * @@ -595,8 +588,7 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, RealConstDefaultScalarArrayView source_coordinates, RealConstDefaultScalarArrayView target_coordinates, const SupportResults& support, const LO& dim, - const LO& degree, RealDefaultScalarArrayView radii2, - Func rbf_func, + const LO& degree, Func rbf_func, RealDefaultScalarArrayView approx_target_values) { PCMS_FUNCTION_TIMER; @@ -696,6 +688,11 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); + OMEGA_H_CHECK_PRINTF( + support.radii2[league_rank] > 0, + "ERROR: radius2 has to be positive but found to be %.16f\n", + support.radii2[league_rank]); + /** phi(nsupports) is the array of rbf functions evaluated at the * source supports In the actual implementation, Phi(nsupports, nsupports) * is the diagonal matrix & each diagonal element is the phi evaluated at @@ -704,12 +701,8 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, Kokkos::parallel_for( Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - OMEGA_H_CHECK_PRINTF( - radii2[league_rank] > 0, - "ERROR: radius2 has to be positive but found to be %.16f\n", - radii2[league_rank]); compute_phi_vector(target_point, local_source_points, j, - radii2[league_rank], rbf_func, phi_vector); + support.radii2[league_rank], rbf_func, phi_vector); }); team.team_barrier(); @@ -733,12 +726,12 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); // It stores the solution in rhs vector itself - solve_matrix(result.square_matrix, result.transformed_rhs, team); + solve_matrix(square_matrix, transformed_rhs, team); team.team_barrier(); - double target_value = KokkosBlas::Experimental::dot( - team, result.transformed_rhs, target_basis_vector); + double target_value = KokkosBlas::Experimental::dot(team, transformed_rhs, + target_basis_vector); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", @@ -760,7 +753,6 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, * @param support The object that enpasulates support info * @param dim The dimension of the simulations * @param degree The degree of the interpolation order - * @param radii2 Write array of the square of cutoff distance * @param rbf_func The radial basis function choice * @return Write array of the interpolated field in target mesh * @@ -770,8 +762,8 @@ template Write mls_interpolation(const Reals source_values, const Reals source_coordinates, const Reals target_coordinates, - const SupportResults& support, Write radii2, - const LO& dim, const LO& degree, Func rbf_func) + const SupportResults& support, const LO& dim, + const LO& degree, Func rbf_func) { const auto nvertices_source = source_coordinates.size() / dim; @@ -786,7 +778,8 @@ Write mls_interpolation(const Reals source_values, RealConstDefaultScalarArrayView target_coordinates_array_view( target_coordinates.data(), target_coordinates.size()); - RealDefaultScalarArrayView radii2_array_view(radii2.data(), radii2.size()); + RealDefaultScalarArrayView radii2_array_view(support.radii2.data(), + support.radii2.size()); Write interpolated_values(nvertices_target, 0, "approximated target values"); @@ -796,8 +789,7 @@ Write mls_interpolation(const Reals source_values, mls_interpolation(source_values_array_view, source_coordinates_array_view, target_coordinates_array_view, support, dim, degree, - radii2_array_view, rbf_func, - interpolated_values_array_view); + rbf_func, interpolated_values_array_view); return interpolated_values; } From 0f8e60634ba4bb77bd60dfcacd982ddac5775a26 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 13 Feb 2025 04:08:31 -0500 Subject: [PATCH 65/66] remove redundant mls_interpolation overload --- .../interpolator/mls_interpolation_impl.hpp | 182 +----------------- 1 file changed, 6 insertions(+), 176 deletions(-) diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp index 655726df..5a79f64a 100644 --- a/src/pcms/interpolator/mls_interpolation_impl.hpp +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -289,11 +289,11 @@ ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, int m = matrix.extent(0); int n = matrix.extent(1); - scaled_matrix = ScratchMatView(team.team_scratch(0), n, m); + ScratchMatView scaled_matrix(team.team_scratch(0), n, m); - square_matrix = ScratchMatView(team.team_scratch(0), n, n); + ScratchMatView square_matrix(team.team_scratch(0), n, n); - transformed_rhs = ScratchVecView(team.team_scratch(0), n); + ScratchVecView transformed_rhs(team.team_scratch(0), n); // performing P^T Q Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) { @@ -399,176 +399,6 @@ void fill(double value, member_type team, ScratchVecView vector) /** * @brief Maps the data from source mesh to target mesh - * @param source_values Source field values from source mesh - * @param source_coordinates The coordinates of control points of source field - * @param target_coordinates The coordinates of control points of target field - * @param support The object that enpasulates support info - * @param dim The dimension of the simulations - * @param degree The degree of the interpolation order - * @param rbf_func The radial basis function choice - * @return interpolated field in target mesh - * - * - */ -template -Write mls_interpolation(const Reals source_values, - const Reals source_coordinates, - const Reals target_coordinates, - const SupportResults& support, const LO& dim, - const LO& degree, Func rbf_func) -{ - PCMS_FUNCTION_TIMER; - static_assert(std::is_invocable_r_v, - "rbf_func, takes radius and cutoff, returns weight"); - static_assert(!std::is_pointer_v, - "function pointer will fail in GPU execution context"); - const auto nvertices_source = source_coordinates.size() / dim; - const auto nvertices_target = target_coordinates.size() / dim; - - IntHostMatView host_slice_length( - "stores slice length of polynomial basis in host", degree, dim); - Kokkos::deep_copy(host_slice_length, 0); - calculate_basis_slice_lengths(host_slice_length); - - auto basis_size = calculate_basis_vector_size(host_slice_length); - - IntDeviceMatView slice_length( - "stores slice length of polynomial basis in device", degree, dim); - auto slice_length_hd = Kokkos::create_mirror_view(slice_length); - Kokkos::deep_copy(slice_length_hd, host_slice_length); - Kokkos::deep_copy(slice_length, slice_length_hd); - Write approx_target_values(nvertices_target, 0, - "approximated target values"); - - int shared_size = - calculate_scratch_shared_size(support, nvertices_target, basis_size); - - team_policy tp(nvertices_target, Kokkos::AUTO); - - int scratch_size = tp.scratch_size_max(0); - - PCMS_ALWAYS_ASSERT(scratch_size > shared_size); - - // calculates the interpolated values - Kokkos::parallel_for( - "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), - KOKKOS_LAMBDA(const member_type& team) { - int league_rank = team.league_rank(); - int start_ptr = support.supports_ptr[league_rank]; - int end_ptr = support.supports_ptr[league_rank + 1]; - int nsupports = end_ptr - start_ptr; - - ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); - int count = -1; - for (int j = start_ptr; j < end_ptr; ++j) { - count++; - auto index = support.supports_idx[j]; - local_source_points(count, 0) = source_coordinates[index * dim]; - local_source_points(count, 1) = source_coordinates[index * dim + 1]; - if (dim == 3) { - local_source_points(count, 2) = source_coordinates[index * dim + 2]; - } - } - - // vondermonde matrix P from the vectors of basis vector of supports - ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, - basis_size); - - // rbf function values of source supports Phi(n,n) - ScratchVecView phi_vector(team.team_scratch(0), nsupports); - - // stores known vector (b) - ScratchVecView support_values(team.team_scratch(0), nsupports); - - // basis of target - ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); - - // Initialize the scratch matrices and vectors - fill(0.0, team, vandermonde_matrix); - fill(0.0, team, phi_vector); - fill(0.0, team, support_values); - fill(0.0, team, target_basis_vector); - - // evaluates the basis vector of a given target point - Coord target_point; - target_point.x = target_coordinates[league_rank * dim]; - target_point.y = target_coordinates[league_rank * dim + 1]; - - if (dim == 3) { - target_point.z = target_coordinates[league_rank * dim + 2]; - } - eval_basis_vector(slice_length, target_point, target_basis_vector); - - /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is - * created with the basis vector of source supports stacking on top of - * each other - */ - - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - create_vandermonde_matrix(local_source_points, j, slice_length, - vandermonde_matrix); - }); - - team.team_barrier(); - - /** compute_phi_vector(nsupports) is the array of rbf functions evaluated - * at the source supports In the actual implementation, Phi(nsupports, - * nsupports) is the diagonal matrix & each diagonal element is the phi - * evaluated at each source points - */ - - OMEGA_H_CHECK_PRINTF( - support.radii2[league_rank] > 0, - "ERROR: radius2 has to be positive but found to be %.16f\n", - support.radii2[league_rank]); - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](int j) { - compute_phi_vector(target_point, local_source_points, j, - support.radii2[league_rank], rbf_func, phi_vector); - }); - - team.team_barrier(); - - /** support_values(nsupports) (or known rhs vector b) is the vector of the - * quantity that we want interpolate - */ - Kokkos::parallel_for( - Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { - support_values(j) = - source_values[support.supports_idx[start_ptr + j]]; - OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)), - "ERROR: NaN found: at support %d\n", j); - }); - - team.team_barrier(); - - auto result = convert_normal_equation(vandermonde_matrix, phi_vector, - support_values, team); - - team.team_barrier(); - - // It stores the solution in rhs vector itself - solve_matrix(square_matrix, transformed_rhs, team); - - team.team_barrier(); - - double target_value = KokkosBlas::Experimental::dot(team, transformed_rhs, - target_basis_vector); - - if (team.team_rank() == 0) { - OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", - league_rank); - approx_target_values[league_rank] = target_value; - } - }); - - return approx_target_values; -} - -/** - * @brief \overload - * Maps the data from source mesh to target mesh * * @param source_values Scalar array view of source field values * @param source_coordinates Scalar array view of the coordinates of control @@ -726,12 +556,12 @@ void mls_interpolation(RealConstDefaultScalarArrayView source_values, team.team_barrier(); // It stores the solution in rhs vector itself - solve_matrix(square_matrix, transformed_rhs, team); + solve_matrix(result.square_matrix, result.transformed_rhs, team); team.team_barrier(); - double target_value = KokkosBlas::Experimental::dot(team, transformed_rhs, - target_basis_vector); + double target_value = KokkosBlas::Experimental::dot( + team, result.transformed_rhs, target_basis_vector); if (team.team_rank() == 0) { OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", From e5b30cd07c0905f82a17e0d3bee5f1ea9a3609a8 Mon Sep 17 00:00:00 2001 From: abhiyanpaudel Date: Thu, 13 Feb 2025 04:10:56 -0500 Subject: [PATCH 66/66] update mls_interpolation parameters --- test/test_rbf_interp.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/test/test_rbf_interp.cpp b/test/test_rbf_interp.cpp index b23f050b..31413597 100644 --- a/test/test_rbf_interp.cpp +++ b/test/test_rbf_interp.cpp @@ -55,7 +55,7 @@ void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, for (const auto& rbf : rbf_types) { auto approx_target_values = mls_interpolation(source_values, source_coordinates, target_coordinates, - support, dim, degree, support.radii2, rbf); + support, dim, degree, rbf); auto host_approx_target_values = HostRead(approx_target_values); @@ -93,9 +93,6 @@ TEST_CASE("mls_interp_test") const auto& ntargets = mesh.nverts(); - Write radii2( - ntargets, cutoffDistance, - "populate initial square of cutoffDistance to all target points"); Write source_coordinates( dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); @@ -114,7 +111,6 @@ TEST_CASE("mls_interp_test") }); Points source_points; - source_points.coordinates = PointsViewType("Number of local source supports", nfaces); Kokkos::parallel_for(