diff --git a/.gitignore b/.gitignore index 28efa748..6f620e20 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ *.swp .spack-env *.bak +.nfs* diff --git a/config.cmake.in b/config.cmake.in index 1834271c..6300c498 100644 --- a/config.cmake.in +++ b/config.cmake.in @@ -18,5 +18,6 @@ if(@PCMS_ENABLE_Fortran@) endif() include("${CMAKE_CURRENT_LIST_DIR}/pcms-targets.cmake") +include("${CMAKE_CURRENT_LIST_DIR}/pcms_interpolator-targets.cmake") # must be called at the end of the config file check_required_components(pcms) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8b249374..88defe6b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -136,6 +136,9 @@ if(PCMS_ENABLE_Fortran) target_link_libraries(pcms_pcms INTERFACE pcms::fortranapi) endif() + +add_subdirectory(pcms/interpolator) + install( TARGETS pcms_pcms EXPORT pcms-targets diff --git a/src/pcms/assert.cpp b/src/pcms/assert.cpp index e4564900..abd3e449 100644 --- a/src/pcms/assert.cpp +++ b/src/pcms/assert.cpp @@ -2,9 +2,11 @@ #include "pcms/print.h" #include #include -namespace pcms { -void Pcms_Assert_Fail(const char* msg) { +namespace pcms +{ +void Pcms_Assert_Fail(const char* msg) +{ printError("%s", msg); abort(); } -} +} // namespace pcms diff --git a/src/pcms/assert.h b/src/pcms/assert.h index f336a58c..c4fb9519 100644 --- a/src/pcms/assert.h +++ b/src/pcms/assert.h @@ -23,14 +23,13 @@ snprintf(omsg, 2048, "%s failed at %s + %d on rank %d\n", #cond, \ __FILE__, __LINE__, rank); \ pcms::Pcms_Assert_Fail(omsg); \ - } \ -} while (0) - - -namespace pcms { -//from scorec/core/pcu_fail.h -void Pcms_Assert_Fail(const char* msg) __attribute__ ((noreturn)); -} - + } \ + } while (0) + +namespace pcms +{ +// from scorec/core/pcu_fail.h +void Pcms_Assert_Fail(const char* msg) __attribute__((noreturn)); +} // namespace pcms #endif // PCMS_COUPLING_ASSERT_H diff --git a/src/pcms/capi/client.cpp b/src/pcms/capi/client.cpp index c9405022..03017b8d 100644 --- a/src/pcms/capi/client.cpp +++ b/src/pcms/capi/client.cpp @@ -16,19 +16,18 @@ namespace pcms using FieldAdapterVariant = std::variant, pcms::XGCFieldAdapter, pcms::XGCFieldAdapter, - pcms::XGCFieldAdapter, - pcms::DummyFieldAdapter -//#ifdef PCMS_HAS_OMEGA_H -// , -// pcms::OmegaHFieldAdapter, -// pcms::OmegaHFieldAdapter -//#endif + pcms::XGCFieldAdapter, pcms::DummyFieldAdapter + // #ifdef PCMS_HAS_OMEGA_H + // , + // pcms::OmegaHFieldAdapter, + // pcms::OmegaHFieldAdapter + // #endif >; } // namespace pcms [[nodiscard]] PcmsClientHandle pcms_create_client(const char* name, - MPI_Comm comm) + MPI_Comm comm) { auto* client = new pcms::CouplerClient(name, comm); return {reinterpret_cast(client)}; @@ -41,29 +40,35 @@ void pcms_destroy_client(PcmsClientHandle client) PcmsReverseClassificationHandle pcms_load_reverse_classification( const char* file, MPI_Comm comm) { - //std::filesystem::path filepath{file}; + // std::filesystem::path filepath{file}; auto* rc = new pcms::ReverseClassificationVertex{ pcms::ReadReverseClassificationVertex(file, comm)}; return {reinterpret_cast(rc)}; } -void pcms_destroy_reverse_classification( - PcmsReverseClassificationHandle rc) +void pcms_destroy_reverse_classification(PcmsReverseClassificationHandle rc) { if (rc.pointer != nullptr) delete reinterpret_cast(rc.pointer); } -struct AddFieldVariantOperators { - AddFieldVariantOperators(const char* name, pcms::CouplerClient* client, int participates) - : name_(name), client_(client), participates_(participates) +struct AddFieldVariantOperators +{ + AddFieldVariantOperators(const char* name, pcms::CouplerClient* client, + int participates) + : name_(name), client_(client), participates_(participates) { } [[nodiscard]] - pcms::CoupledField* operator()(const std::monostate&) const noexcept { return nullptr; } + pcms::CoupledField* operator()(const std::monostate&) const noexcept + { + return nullptr; + } template [[nodiscard]] - pcms::CoupledField* operator()(const FieldAdapter& field_adapter) const noexcept { - return client_->AddField(name_, field_adapter, participates_); + pcms::CoupledField* operator()( + const FieldAdapter& field_adapter) const noexcept + { + return client_->AddField(name_, field_adapter, participates_); } const char* name_; @@ -71,10 +76,9 @@ struct AddFieldVariantOperators { bool participates_; }; -PcmsFieldHandle pcms_add_field(PcmsClientHandle client_handle, - const char* name, - PcmsFieldAdapterHandle adapter_handle, - int participates) +PcmsFieldHandle pcms_add_field(PcmsClientHandle client_handle, const char* name, + PcmsFieldAdapterHandle adapter_handle, + int participates) { auto* adapter = @@ -82,14 +86,15 @@ PcmsFieldHandle pcms_add_field(PcmsClientHandle client_handle, auto* client = reinterpret_cast(client_handle.pointer); PCMS_ALWAYS_ASSERT(client != nullptr); PCMS_ALWAYS_ASSERT(adapter != nullptr); - //pcms::CoupledField* field = std::visit( - // redev::overloaded{ - // [](const std::monostate&) -> pcms::CoupledField* { return nullptr; }, - // [&name, &client, participates](const auto& field_adapter) { - // return client->AddField(name, field_adapter, participates); - // }}, - // *adapter); - pcms::CoupledField* field = std::visit(AddFieldVariantOperators{name, client, participates},*adapter); + // pcms::CoupledField* field = std::visit( + // redev::overloaded{ + // [](const std::monostate&) -> pcms::CoupledField* { return nullptr; }, + // [&name, &client, participates](const auto& field_adapter) { + // return client->AddField(name, field_adapter, participates); + // }}, + // *adapter); + pcms::CoupledField* field = + std::visit(AddFieldVariantOperators{name, client, participates}, *adapter); return {reinterpret_cast(field)}; } void pcms_send_field_name(PcmsClientHandle client_handle, const char* name) @@ -98,8 +103,7 @@ void pcms_send_field_name(PcmsClientHandle client_handle, const char* name) PCMS_ALWAYS_ASSERT(client != nullptr); client->SendField(name); } -void pcms_receive_field_name(PcmsClientHandle client_handle, - const char* name) +void pcms_receive_field_name(PcmsClientHandle client_handle, const char* name) { auto* client = reinterpret_cast(client_handle.pointer); PCMS_ALWAYS_ASSERT(client != nullptr); @@ -123,7 +127,7 @@ void pcms_create_xgc_field_adapter_t( const pcms::ReverseClassificationVertex& reverse_classification, in_overlap_function in_overlap, pcms::FieldAdapterVariant& field_adapter) { - PCMS_ALWAYS_ASSERT((size >0) ? (data!=nullptr) : true); + PCMS_ALWAYS_ASSERT((size > 0) ? (data != nullptr) : true); pcms::ScalarArrayView data_view( reinterpret_cast(data), size); field_adapter.emplace>( @@ -140,20 +144,24 @@ PcmsFieldAdapterHandle pcms_create_xgc_field_adapter( PCMS_ALWAYS_ASSERT(reverse_classification != nullptr); switch (data_type) { case PCMS_DOUBLE: - pcms_create_xgc_field_adapter_t( - name, comm, data, size, *reverse_classification, in_overlap, *field_adapter); + pcms_create_xgc_field_adapter_t(name, comm, data, size, + *reverse_classification, + in_overlap, *field_adapter); break; case PCMS_FLOAT: - pcms_create_xgc_field_adapter_t( - name, comm, data, size, *reverse_classification, in_overlap, *field_adapter); + pcms_create_xgc_field_adapter_t(name, comm, data, size, + *reverse_classification, + in_overlap, *field_adapter); break; case PCMS_INT: - pcms_create_xgc_field_adapter_t( - name, comm, data, size, *reverse_classification, in_overlap, *field_adapter); + pcms_create_xgc_field_adapter_t(name, comm, data, size, + *reverse_classification, in_overlap, + *field_adapter); break; case PCMS_LONG_INT: - pcms_create_xgc_field_adapter_t( - name, comm, data, size, *reverse_classification, in_overlap, *field_adapter); + pcms_create_xgc_field_adapter_t(name, comm, data, size, + *reverse_classification, + in_overlap, *field_adapter); break; default: pcms::printError("tyring to create XGC adapter with invalid type!\n"); @@ -161,8 +169,10 @@ PcmsFieldAdapterHandle pcms_create_xgc_field_adapter( } return {reinterpret_cast(field_adapter)}; } -PcmsFieldAdapterHandle pcms_create_dummy_field_adapter() { - auto* field_adapter = new pcms::FieldAdapterVariant{pcms::DummyFieldAdapter{}}; +PcmsFieldAdapterHandle pcms_create_dummy_field_adapter() +{ + auto* field_adapter = + new pcms::FieldAdapterVariant{pcms::DummyFieldAdapter{}}; return {reinterpret_cast(field_adapter)}; } @@ -175,8 +185,7 @@ void pcms_destroy_field_adapter(PcmsFieldAdapterHandle adapter_handle) adapter = nullptr; } } -int pcms_reverse_classification_count_verts( - PcmsReverseClassificationHandle rc) +int pcms_reverse_classification_count_verts(PcmsReverseClassificationHandle rc) { auto* reverse_classification = reinterpret_cast(rc.pointer); @@ -187,23 +196,27 @@ int pcms_reverse_classification_count_verts( return current + verts.second.size(); }); } -void pcms_begin_send_phase(PcmsClientHandle h) { +void pcms_begin_send_phase(PcmsClientHandle h) +{ auto* client = reinterpret_cast(h.pointer); PCMS_ALWAYS_ASSERT(client != nullptr); - client ->BeginSendPhase(); + client->BeginSendPhase(); } -void pcms_end_send_phase(PcmsClientHandle h) { +void pcms_end_send_phase(PcmsClientHandle h) +{ auto* client = reinterpret_cast(h.pointer); PCMS_ALWAYS_ASSERT(client != nullptr); - client ->EndSendPhase(); + client->EndSendPhase(); } -void pcms_begin_receive_phase(PcmsClientHandle h ) { +void pcms_begin_receive_phase(PcmsClientHandle h) +{ auto* client = reinterpret_cast(h.pointer); PCMS_ALWAYS_ASSERT(client != nullptr); - client ->BeginReceivePhase(); + client->BeginReceivePhase(); } -void pcms_end_receive_phase(PcmsClientHandle h) { +void pcms_end_receive_phase(PcmsClientHandle h) +{ auto* client = reinterpret_cast(h.pointer); PCMS_ALWAYS_ASSERT(client != nullptr); - client ->EndReceivePhase(); + client->EndReceivePhase(); } \ No newline at end of file diff --git a/src/pcms/capi/client.h b/src/pcms/capi/client.h index 78a49f42..e03a3332 100644 --- a/src/pcms/capi/client.h +++ b/src/pcms/capi/client.h @@ -7,15 +7,30 @@ extern "C" { #endif -struct PcmsClientHandle { void* pointer; }; +struct PcmsClientHandle +{ + void* pointer; +}; typedef struct PcmsClientHandle PcmsClientHandle; -struct PcmsOmegaHMeshHandle { void* pointer; }; +struct PcmsOmegaHMeshHandle +{ + void* pointer; +}; typedef struct PcmsOmegaHMeshHandle PcmsOmegaHMeshHandle; -struct PcmsReverseClassificationHandle { void* pointer; }; +struct PcmsReverseClassificationHandle +{ + void* pointer; +}; typedef struct PcmsReverseClassificationHandle PcmsReverseClassificationHandle; -struct PcmsFieldAdapterHandle { void* pointer; }; +struct PcmsFieldAdapterHandle +{ + void* pointer; +}; typedef struct PcmsFieldAdapterHandle PcmsFieldAdapterHandle; -struct PcmsFieldHandle { void* pointer; }; +struct PcmsFieldHandle +{ + void* pointer; +}; typedef struct PcmsFieldHandle PcmsFieldHandle; enum PcmsAdapterType @@ -35,7 +50,7 @@ enum PcmsType }; typedef enum PcmsType PcmsType; -//change to a struct holding a pointer +// change to a struct holding a pointer PcmsClientHandle pcms_create_client(const char* name, MPI_Comm comm); void pcms_destroy_client(PcmsClientHandle); @@ -47,24 +62,23 @@ void pcms_destroy_reverse_classification(PcmsReverseClassificationHandle); // this function is helpful for test cases so we can compute the total number of // vertexes in the mesh without reading additional files. This function is not // likely to be needed for production cases -int pcms_reverse_classification_count_verts( - PcmsReverseClassificationHandle); +int pcms_reverse_classification_count_verts(PcmsReverseClassificationHandle); // takes in overlap function takes a geometric dimension and a geometric id // C doesn't have a builtin bool type, so we use int for compatability with C++ typedef int8_t (*in_overlap_function)(int, int); PcmsFieldAdapterHandle pcms_create_xgc_field_adapter( - const char* name, MPI_Comm plane_comm, void* data, int size, PcmsType data_type, - const PcmsReverseClassificationHandle rc, in_overlap_function in_overlap); + const char* name, MPI_Comm plane_comm, void* data, int size, + PcmsType data_type, const PcmsReverseClassificationHandle rc, + in_overlap_function in_overlap); PcmsFieldAdapterHandle pcms_create_dummy_field_adapter(); void pcms_destroy_field_adapter(PcmsFieldAdapterHandle); -PcmsFieldHandle pcms_add_field(PcmsClientHandle client_handle, - const char* name, - PcmsFieldAdapterHandle adapter_handle, - int participates); +PcmsFieldHandle pcms_add_field(PcmsClientHandle client_handle, const char* name, + PcmsFieldAdapterHandle adapter_handle, + int participates); void pcms_send_field_name(PcmsClientHandle, const char* name); void pcms_receive_field_name(PcmsClientHandle, const char* name); diff --git a/src/pcms/client.h b/src/pcms/client.h index 37021df1..5b640936 100644 --- a/src/pcms/client.h +++ b/src/pcms/client.h @@ -4,7 +4,6 @@ #include "pcms/field_communicator.h" #include "pcms/profile.h" - namespace pcms { @@ -19,7 +18,7 @@ class CoupledField PCMS_FUNCTION_TIMER; MPI_Comm mpi_comm_subset = MPI_COMM_NULL; PCMS_ALWAYS_ASSERT((mpi_comm == MPI_COMM_NULL) ? (participates == false) - : true); + : true); if (mpi_comm != MPI_COMM_NULL) { int rank = -1; MPI_Comm_rank(mpi_comm, &rank); @@ -89,8 +88,6 @@ class CoupledField std::unique_ptr coupled_field_; }; - - class CouplerClient { public: diff --git a/src/pcms/interpolator/CMakeLists.txt b/src/pcms/interpolator/CMakeLists.txt new file mode 100644 index 00000000..08728df7 --- /dev/null +++ b/src/pcms/interpolator/CMakeLists.txt @@ -0,0 +1,39 @@ +find_package(KokkosKernels REQUIRED) + +set(PCMS_FIELD_TRANSFER_HEADERS + pcms_interpolator_aliases.hpp + adj_search.hpp + mls_interpolation_impl.hpp + queue_visited.hpp + linear_interpolant.hpp + multidimarray.hpp + mls_interpolation.hpp +) + +set(PCMS_FIELD_TRANSFER_SOURCES + mls_interpolation.cpp +) +install(FILES ${PCMS_FIELD_TRANSFER_HEADERS} DESTINATION include/pcms/interpolator) + + +add_library(pcms_interpolator ${PCMS_FIELD_TRANSFER_SOURCES}) +set_target_properties(pcms_interpolator PROPERTIES + OUTPUT_NAME pcmsinterpolator + EXPORT_NAME interpolator) +add_library(pcms::interpolator ALIAS pcms_interpolator) + + +target_link_libraries(pcms_interpolator PUBLIC pcms::core Kokkos::kokkoskernels) + + +install(TARGETS pcms_interpolator + EXPORT interpolatorTargets + INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/pcms/interpolator + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + +install(EXPORT interpolatorTargets + FILE pcms_interpolator-targets.cmake + NAMESPACE pcms:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/pcms) + diff --git a/src/pcms/interpolator/adj_search.hpp b/src/pcms/interpolator/adj_search.hpp new file mode 100644 index 00000000..a5a20b78 --- /dev/null +++ b/src/pcms/interpolator/adj_search.hpp @@ -0,0 +1,566 @@ +#ifndef ADJ_SEARCH_HPP +#define ADJ_SEARCH_HPP + +#include + +#include "queue_visited.hpp" + +using namespace Omega_h; + +static constexpr int max_dim = 3; + +// TODO change this into span/mdspan +OMEGA_H_INLINE +Real calculateDistance(const Real* p1, const Real* p2, const int dim) +{ + Real dx, dy, dz; + dx = p1[0] - p2[0]; + dy = p1[1] - p2[1]; + if (dim != 3) { + dz = 0.0; + } else { + dz = p1[2] - p2[2]; + } + return dx * dx + dy * dy + dz * dz; +} + +inline void checkTargetPoints( + const Kokkos::View& results) +{ + Kokkos::fence(); + printf("INFO: Checking target points...\n"); + auto check_target_points = OMEGA_H_LAMBDA(LO i) + { + if (results(i).tri_id < 0) { + OMEGA_H_CHECK_PRINTF(results(i).tri_id >= 0, + "ERROR: Source cell id not found for target %d\n", + i); + printf("%d, ", i); + } + }; + parallel_for(results.size(), check_target_points, "check_target_points"); + Kokkos::fence(); + printf("\n"); +} + +inline void printSupportsForTarget(const LO target_id, + const Write& supports_ptr, + const Write& nSupports, + const Write& support_idx) +{ + parallel_for( + nSupports.size(), OMEGA_H_LAMBDA(const LO id) { + if (id == target_id) { + LO start_ptr = supports_ptr[id]; + LO end_ptr = supports_ptr[id + 1]; + printf("Target vertex: %d\n with %d num supports: nSupports[id]=%d", id, + end_ptr - start_ptr, nSupports[id]); + for (LO i = start_ptr; i < end_ptr; ++i) { + LO cell_id = support_idx[i]; + printf(", %d", cell_id); + } + printf("\n"); + } + }); +} + +class FindSupports +{ +private: + Mesh& source_mesh; + Mesh& target_mesh; // TODO it's null when one mesh is used + +public: + FindSupports(Mesh& source_mesh_, Mesh& target_mesh_) + : source_mesh(source_mesh_), target_mesh(target_mesh_) {}; + + FindSupports(Mesh& mesh_) : source_mesh(mesh_), target_mesh(mesh_) {}; + + void adjBasedSearch(Write& supports_ptr, Write& nSupports, + Write& support_idx, Write& radii2, + bool is_build_csr_call); + + void adjBasedSearchCentroidNodes(Write& supports_ptr, + Write& nSupports, Write& support_idx, + Write& radii2, bool is_build_csr_call); +}; + +inline void FindSupports::adjBasedSearch(Write& supports_ptr, + Write& nSupports, + Write& support_idx, + Write& radii2, + bool is_build_csr_call) +{ + + const auto& sourcePoints_coords = source_mesh.coords(); + const auto nvertices_source = source_mesh.nverts(); + const auto dim = source_mesh.dim(); + + const auto& targetPoints_coords = target_mesh.coords(); + const auto nvertices_target = target_mesh.nverts(); + OMEGA_H_CHECK(radii2.size() == nvertices_target); + + const auto& vert2vert = source_mesh.ask_star(VERT); + const auto& v2v_ptr = vert2vert.a2ab; + const auto& v2v_data = vert2vert.ab2b; + const auto& cells2verts = source_mesh.ask_verts_of(dim); + + Kokkos::View target_points("test_points", nvertices_target); + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + target_points(i, 0) = targetPoints_coords[i * dim]; + target_points(i, 1) = targetPoints_coords[i * dim + 1]; + }); + Kokkos::fence(); + + pcms::GridPointSearch search_cell(source_mesh, 10, 10); + auto results = search_cell(target_points); + checkTargetPoints(results); + + parallel_for( + nvertices_target, + OMEGA_H_LAMBDA(const LO id) { + Queue queue; + Track visited; + Real cutoffDistance = radii2[id]; + + LO source_cell_id = results(id).tri_id; + OMEGA_H_CHECK_PRINTF( + source_cell_id >= 0, + "ERROR: Source cell id not found for target %d (%f,%f)\n", id, + target_points(id, 0), target_points(id, 1)); + + const LO num_verts_in_dim = dim + 1; + LO start_ptr = source_cell_id * num_verts_in_dim; + LO end_ptr = start_ptr + num_verts_in_dim; + Real target_coords[max_dim]; + Real support_coords[max_dim]; + + for (LO k = 0; k < dim; ++k) { + target_coords[k] = target_points(id, k); + } + + LO start_counter; + if (!is_build_csr_call) { + start_counter = supports_ptr[id]; + } + + // * Method: + // 1. Get the vertices of the source cell (source cell is the cell in the + // source mesh in which the target point lies): done above + // 2. Using those 3 vertices, get the adjacent vertices of those 3 + // vertices and go on until the queue is empty + // 3. Already visited vertices are stored in visited and the vertices to + // be checked (dist < cutoff) are stored in the queue + // 4. If not CSR building call, store the support vertices in support_idx + // * Method + + int count = 0; + for (LO i = start_ptr; i < end_ptr; ++i) { + LO vert_id = cells2verts[i]; + visited.push_back(vert_id); + + for (LO k = 0; k < dim; ++k) { + support_coords[k] = sourcePoints_coords[vert_id * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + if (count >= 500) { + printf( + "Warning: count exceeds 500 for target %d with %d supports\n", id, + end_ptr - start_ptr); + printf("Warning: Target %d: coors: (%f, %f) and support %d: " + "coords: (%f, %f)\n", + id, target_coords[0], target_coords[1], vert_id, + support_coords[0], support_coords[1]); + } + queue.push_back(vert_id); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = vert_id; + } + } + } + + while (!queue.isEmpty()) { + LO currentVertex = queue.front(); + queue.pop_front(); + LO start = v2v_ptr[currentVertex]; + LO end = v2v_ptr[currentVertex + 1]; + + for (LO i = start; i < end; ++i) { + auto neighborIndex = v2v_data[i]; + + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function + + if (visited.notVisited(neighborIndex)) { + visited.push_back(neighborIndex); + for (int k = 0; k < dim; ++k) { + support_coords[k] = sourcePoints_coords[neighborIndex * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + + if (dist <= cutoffDistance) { + count++; + if (count >= 500) { + printf("Warning: count exceeds 500 for target %d with start %d " + "and end %d radius2 %f adding neighbor %d\n", + id, start, end, cutoffDistance, neighborIndex); + } + queue.push_back(neighborIndex); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = neighborIndex; + } + } + } + } + } // end of while loop + + nSupports[id] = count; + }, // lambda + "count the number of supports in each target point"); + if (is_build_csr_call == false) { + // printSupportsForTarget(2057, supports_ptr, nSupports, support_idx); + } +} + +inline void FindSupports::adjBasedSearchCentroidNodes(Write& supports_ptr, + Write& nSupports, + Write& support_idx, + Write& radii2, + bool is_build_csr_call) +{ + // Mesh Info + const auto& mesh_coords = source_mesh.coords(); + const auto& nvertices = source_mesh.nverts(); + const auto& dim = source_mesh.dim(); + const auto& nfaces = source_mesh.nfaces(); + + const auto& nodes2faces = source_mesh.ask_up(VERT, FACE); + const auto& n2f_ptr = nodes2faces.a2ab; + const auto& n2f_data = nodes2faces.ab2b; + const auto& faces2nodes = source_mesh.ask_down(FACE, VERT).ab2b; + + Write cell_centroids( + dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); + + parallel_for( + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(mesh_coords, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = dim * id; + cell_centroids[index] = centroid[0]; + cell_centroids[index + 1] = centroid[1]; + }); + // * Got the adj data and cell centroids + + parallel_for( + nvertices, + OMEGA_H_LAMBDA(const LO id) { + Queue queue; + Track visited; + const LO num_verts_in_dim = dim + 1; + Real target_coords[max_dim]; + Real support_coords[max_dim]; + Real cutoffDistance = radii2[id]; + + //? copying the target vertex coordinates + for (LO k = 0; k < dim; ++k) { + target_coords[k] = mesh_coords[id * dim + k]; + } + + LO start_counter; + if (!is_build_csr_call) { + start_counter = supports_ptr[id]; + } + LO start_ptr = n2f_ptr[id]; + LO end_ptr = n2f_ptr[id + 1]; + + int count = 0; + for (LO i = start_ptr; i < end_ptr; ++i) { + LO cell_id = n2f_data[i]; + visited.push_back(cell_id); + + for (LO k = 0; k < dim; ++k) { + support_coords[k] = cell_centroids[cell_id * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + if (dist <= cutoffDistance) { + count++; + queue.push_back(cell_id); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = cell_id; + } + } + } + + while (!queue.isEmpty()) { // ? can queue be empty? + LO currentCell = queue.front(); + queue.pop_front(); + LO start = currentCell * num_verts_in_dim; + LO end = start + num_verts_in_dim; + + for (LO i = start; i < end; ++i) { + LO current_vert_id = faces2nodes[i]; + LO start_ptr_current_vert = n2f_ptr[current_vert_id]; + LO end_ptr_vert_current_vert = n2f_ptr[current_vert_id + 1]; + for (LO j = start_ptr_current_vert; j < end_ptr_vert_current_vert; + ++j) { + auto neighbor_cell_index = n2f_data[j]; + + // check if neighbor index is already in the queue to be checked + // TODO refactor this into a function + + if (visited.notVisited(neighbor_cell_index)) { + visited.push_back(neighbor_cell_index); + for (int k = 0; k < dim; ++k) { + support_coords[k] = + cell_centroids[neighbor_cell_index * dim + k]; + } + + Real dist = calculateDistance(target_coords, support_coords, dim); + + if (dist <= cutoffDistance) { + count++; + queue.push_back(neighbor_cell_index); + if (!is_build_csr_call) { + LO idx_count = count - 1; + support_idx[start_counter + idx_count] = neighbor_cell_index; + } // end of support_idx check + } // end of distance check + } // end of not visited check + } // end of loop over adj cells to the current vertex + } // end of loop over nodes + + } // end of while loop + + nSupports[id] = count; + }, // end of lambda + "count the number of supports in each target point"); + + if (is_build_csr_call == false) { + // printSupportsForTarget(2057, supports_ptr, nSupports, support_idx); + } +} + +struct SupportResults +{ + LOs supports_ptr; + LOs supports_idx; + Write radii2; +}; + +inline SupportResults searchNeighbors(Mesh& source_mesh, Mesh& target_mesh, + Real& cutoffDistance, + LO min_req_support = 12, + bool adapt_radius = true) +{ + FindSupports search(source_mesh, target_mesh); + LO nvertices_source = source_mesh.nverts(); + LO nvertices_target = target_mesh.nverts(); + + Write nSupports(nvertices_target, 0, + "number of supports in each target vertex"); + printf("INFO: Cut off distance: %f\n", cutoffDistance); + Write radii2 = Write(nvertices_target, cutoffDistance, + "squared radii of the supports"); + + Write supports_ptr; + Write supports_idx; + + if (!adapt_radius) { + printf("INFO: Fixed radius search *(disregarding required minimum " + "support)*... \n"); + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, true); + } else { + printf("INFO: Adaptive radius search... \n"); + int r_adjust_loop = 0; + while (true) { + nSupports = Write(nvertices_target, 0, + "number of supports in each target vertex"); + + Real max_radius = 0.0; + Kokkos::parallel_reduce( + "find max radius", nvertices_target, + OMEGA_H_LAMBDA(const LO i, Real& local_max) { + local_max = (radii2[i] > local_max) ? radii2[i] : local_max; + }, + Kokkos::Max(max_radius)); + printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); + + // create storage every time to avoid complexity + Write supports_ptr; + Write supports_idx; + Kokkos::fence(); + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, + true); + Kokkos::fence(); + + LO min_supports_found = 0; + Kokkos::Min min_reducer(min_supports_found); + Kokkos::parallel_reduce( + "find min number of supports", nvertices_target, + OMEGA_H_LAMBDA(const LO i, LO& local_min) { + min_reducer.join(local_min, nSupports[i]); + }, + min_reducer); + printf("INFO: min_supports_found: %d at loop %d, max_radius %f\n", + min_supports_found, r_adjust_loop, max_radius); + + r_adjust_loop++; + Kokkos::fence(); + if (min_supports_found >= min_req_support) { + break; + } + + Kokkos::fence(); + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + if (nSupports[i] < min_req_support) { + Real factor = Real(min_req_support) / Real(nSupports[i]); + factor = (factor > 1.1 || nSupports[i] == 0) ? 1.1 : factor; + radii2[i] = radii2[i] * factor; + } + }); + Kokkos::fence(); + } + + printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); + } + + supports_ptr = Write(nvertices_target + 1, 0, + "number of support source vertices in CSR format"); + + LO total_supports = 0; + + Kokkos::parallel_scan( + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { + update += nSupports[j]; + if (final) { + supports_ptr[j + 1] = update; + } + }, + total_supports); + + Kokkos::fence(); + + supports_idx = Write(total_supports, 0, + "index of source supports of each target node"); + + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, false); + + target_mesh.add_tag(VERT, "radii2", 1, radii2); + return SupportResults{read(supports_ptr), read(supports_idx), radii2}; +} + +inline SupportResults searchNeighbors(Mesh& mesh, Real cutoffDistance, + LO min_support = 12, + bool adapt_radius = true) +{ + Write supports_ptr; + Write supports_idx; + FindSupports search(mesh); + LO nvertices_target = mesh.nverts(); + Write nSupports(nvertices_target, 0, + "number of supports in each target vertex"); + + printf("INFO: Inside searchNeighbors 1\n"); + Write radii2 = Write(nvertices_target, cutoffDistance, + "squared radii of the supports"); + printf("INFO: Cutoff distance: %f\n", cutoffDistance); + + if (!adapt_radius) { + printf("INFO: Fixed radius search *(disregarding required minimum " + "support)* ... \n"); + search.adjBasedSearch(supports_ptr, nSupports, supports_idx, radii2, true); + } else { + printf("INFO: Adaptive radius search... \n"); + int r_adjust_loop = 0; + while (true) { // until the number of minimum support is met + Real max_radius = 0.0; + Kokkos::parallel_reduce( + "find max radius", nvertices_target, + OMEGA_H_LAMBDA(const LO i, Real& local_max) { + local_max = (radii2[i] > local_max) ? radii2[i] : local_max; + }, + Kokkos::Max(max_radius)); + printf("INFO: Loop %d: max_radius: %f\n", r_adjust_loop, max_radius); + + nSupports = Write(nvertices_target, 0, + "number of supports in each target vertex"); + SupportResults support; // create support every time to avoid complexity + search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, supports_idx, + radii2, true); + + Kokkos::fence(); + LO min_nSupports = 0; + Kokkos::parallel_reduce( + "find min number of supports", nvertices_target, + OMEGA_H_LAMBDA(const LO i, LO& local_min) { + local_min = (nSupports[i] < local_min) ? nSupports[i] : local_min; + }, + Kokkos::Min(min_nSupports)); + + printf("min_nSupports: %d at loop %d, max_radius %f\n", min_nSupports, + r_adjust_loop, max_radius); + r_adjust_loop++; + + if (min_nSupports >= min_support) { + break; + } + + Kokkos::fence(); + parallel_for( + nvertices_target, OMEGA_H_LAMBDA(const LO i) { + if (nSupports[i] < min_support) { + Real factor = Real(min_support) / Real(nSupports[i]); + factor = (nSupports[i] == 0 || factor > 1.5) ? 1.5 : factor; + radii2[i] *= factor; + } + nSupports[i] = 0; // ? might not be needed + }); + } // while loop + printf("INFO: Took %d loops to adjust the radius\n", r_adjust_loop); + } // adaptive radius search + + // offset array for the supports of each target vertex + supports_ptr = Write(nvertices_target + 1, 0, + "number of support source vertices in CSR format"); + + LO total_supports = 0; + Kokkos::parallel_scan( + nvertices_target, + OMEGA_H_LAMBDA(int j, int& update, bool final) { + update += nSupports[j]; + if (final) { + supports_ptr[j + 1] = update; + } + }, + total_supports); + + printf("INFO: Inside searchNeighbors 3\n"); + Kokkos::fence(); + + supports_idx = Write(total_supports, 0, + "index of source supports of each target node"); + printf("INFO: Total_supports: %d\n", total_supports); + + search.adjBasedSearchCentroidNodes(supports_ptr, nSupports, supports_idx, + radii2, false); + + mesh.add_tag(VERT, "support_radius", 1, radii2); + return SupportResults{read(supports_ptr), read(supports_idx), radii2}; +} + +#endif diff --git a/src/pcms/interpolator/linear_interpolant.hpp b/src/pcms/interpolator/linear_interpolant.hpp new file mode 100644 index 00000000..fb91336e --- /dev/null +++ b/src/pcms/interpolator/linear_interpolant.hpp @@ -0,0 +1,186 @@ + +#ifndef INTERPOLANT_HPP +#define INTERPOLANT_HPP + +#include +#include "multidimarray.hpp" +#define MAX_DIM 10 + +KOKKOS_INLINE_FUNCTION +void find_indices(const IntVecView& num_bins, const RealVecView& range, + const RealVecView& point, int* indices) +{ + int dim = point.extent(0); + // IntVecView indices("parametric coordinate", dim); + for (int i = 0; i < dim; ++i) { + int id = i * 2; + double length = range(id + 1) - range(id); + double dlen = length / num_bins(i); + indices[i] = ((point(i) - range(id)) / dlen); + } +} + +KOKKOS_INLINE_FUNCTION +void find_limits(const IntVecView& num_bins, const RealVecView& range, + const int* indices, double* limits) +{ + int dim = num_bins.extent(0); + // RealVecView limits("limits", 2*dim); + for (int i = 0; i < dim; ++i) { + int ptr = i * 2; + double dlen = (range(ptr + 1) - range(ptr)) / num_bins(i); + limits[ptr] = indices[i] * dlen; + limits[ptr + 1] = limits[ptr] + dlen; + } +} + +KOKKOS_INLINE_FUNCTION +void evaluateParametricCoord(const RealVecView& point, const double* limits, + double* parametric_coord) +{ + int dim = point.extent(0); + // RealVecView parametric_coord("parametric coordinate", dim); + for (int i = 0; i < dim; ++i) { + int index = i * 2; + parametric_coord[i] = + (point(i) - limits[index]) / (limits[index + 1] - limits[index]); + } +} + +KOKKOS_INLINE_FUNCTION +void basis_function(const RealVecView& parametric_coord, + double* linear_basis_each_dir) +{ + int dim = parametric_coord.extent(0); + // RealVecView linear_basis_each_dir("basis function each direction", 2*dim); + for (int i = 0; i < dim; ++i) { + int index = i * 2; + + linear_basis_each_dir[index] = 1 - parametric_coord(i); + + linear_basis_each_dir[index + 1] = parametric_coord(i); + } +} + +KOKKOS_INLINE_FUNCTION +double linear_interpolant(const IntVecView& dimensions, + const double* linear_basis_each_dir, + const IntVecView& indices, const RealVecView& values) +{ + int dim = dimensions.extent(0); + double sum = 0; + int ids[MAX_DIM]; + + int array_size = 1 << dim; + for (int i = 0; i < array_size; ++i) { + double temp = 1.0; + for (int j = 0; j < dim; ++j) { + int index = 2 * j; + int id_left = indices(j); + int id_right = id_left + 1; + if (i & (1 << j)) { + temp *= linear_basis_each_dir[index + 1]; + ids[j] = id_right; + } else { + temp *= linear_basis_each_dir[index]; + ids[j] = id_left; + } + } + + int idx = calculateIndex(dimensions, ids); + double corner_values = values(idx); + + sum += temp * corner_values; + } + return sum; +} + +class RegularGridInterpolator +{ +private: + const RealMatView parametric_coords; + const RealVecView values; + const IntMatView indices; + const IntVecView dimensions; + +public: + RegularGridInterpolator(const RealMatView& parametric_coords_, + const RealVecView& values_, + const IntMatView& indices_, + const IntVecView& dimensions_) + : parametric_coords(parametric_coords_), + values(values_), + indices(indices_), + dimensions(dimensions_) {}; + + RealVecView linear_interpolation() + { + int dim = dimensions.extent(0); + int N = parametric_coords.extent(0); + RealVecView interpolated_values("approximated values", N); + auto parametric_coords_ = parametric_coords; + auto dimensions_ = dimensions; + auto values_ = values; + auto indices_ = indices; + Kokkos::parallel_for( + "linear interpolation function", N, KOKKOS_LAMBDA(int j) { + double linear_basis_each_dir[MAX_DIM] = {0.0}; + auto parametric_coord_each_point = + Kokkos::subview(parametric_coords_, j, Kokkos::ALL()); + auto index_each_point = Kokkos::subview(indices_, j, Kokkos::ALL()); + basis_function(parametric_coord_each_point, linear_basis_each_dir); + auto approx_value = linear_interpolant( + dimensions_, linear_basis_each_dir, index_each_point, values_); + interpolated_values(j) = approx_value; + }); + + return interpolated_values; + } +}; + +struct Result +{ + IntMatView indices_pts; + RealMatView parametric_coords; +}; + +Result parametric_indices(const RealMatView& points, const IntVecView& num_bins, + const RealVecView& range) +{ + + int dim = num_bins.extent(0); + Result result; + int N = points.extent(0); + result.indices_pts = IntMatView("indices", N, dim); + result.parametric_coords = RealMatView("parametric_coordinates", N, dim); + + Kokkos::parallel_for( + N, KOKKOS_LAMBDA(int j) { + int indcs[MAX_DIM] = {0}; + double limits[MAX_DIM] = {0.0}; + double parametric_coord_each[MAX_DIM] = {0.0}; + auto point_coord = Kokkos::subview(points, j, Kokkos::ALL()); + find_indices(num_bins, range, point_coord, indcs); + find_limits(num_bins, range, indcs, limits); + evaluateParametricCoord(point_coord, limits, parametric_coord_each); + for (int i = 0; i < dim; ++i) { + result.indices_pts(j, i) = indcs[i]; + result.parametric_coords(j, i) = parametric_coord_each[i]; + } + }); + + return result; +} + +KOKKOS_INLINE_FUNCTION +double test_function(double* coord) +{ + double fun_value = 0; + int dim = 5; + for (int i = 0; i < dim; ++i) { + fun_value += coord[i]; + } + return fun_value; +} + +#endif diff --git a/src/pcms/interpolator/mls_interpolation.cpp b/src/pcms/interpolator/mls_interpolation.cpp new file mode 100644 index 00000000..113986a1 --- /dev/null +++ b/src/pcms/interpolator/mls_interpolation.cpp @@ -0,0 +1,217 @@ +#include +#include "mls_interpolation.hpp" + +namespace pcms +{ + +// RBF_GAUSSIAN Functor +struct RBF_GAUSSIAN +{ + OMEGA_H_INLINE + double operator()(double r_sq, double rho_sq) const + { + double phi; + OMEGA_H_CHECK_PRINTF(rho_sq > 0, + "ERROR: square of cutoff distance should always be " + "positive but the value is %.16f\n", + rho_sq); + + OMEGA_H_CHECK_PRINTF(r_sq > 0, + "ERROR: square of distance should always be positive " + "but the value is %.16f\n", + r_sq); + + // 'a' is a spreading factor/decay factor + // the value of 'a' is higher if the data is localized + // the value of 'a' is smaller if the data is farther + int a = 20; + + double r = sqrt(r_sq); + double rho = sqrt(rho_sq); + double ratio = r / rho; + double limit = 1 - ratio; + + if (limit < 0) { + phi = 0; + + } else { + phi = exp(-a * a * r * r); + } + + return phi; + } +}; + +// RBF_C4 Functor +struct RBF_C4 +{ + OMEGA_H_INLINE + double operator()(double r_sq, double rho_sq) const + { + double phi; + double r = sqrt(r_sq); + OMEGA_H_CHECK_PRINTF( + rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n", + rho_sq); + double rho = sqrt(rho_sq); + double ratio = r / rho; + double limit = 1 - ratio; + if (limit < 0) { + phi = 0; + + } else { + phi = 5 * pow(ratio, 5) + 30 * pow(ratio, 4) + 72 * pow(ratio, 3) + + 82 * pow(ratio, 2) + 36 * ratio + 6; + phi = phi * pow(limit, 6); + } + + OMEGA_H_CHECK_PRINTF(!std::isnan(phi), + "ERROR: phi in rbf is NaN. r_sq, rho_sq = (%f, %f)\n", + r_sq, rho_sq); + return phi; + } +}; + +// RBF_const Functor +// +struct RBF_CONST +{ + OMEGA_H_INLINE + double operator()(double r_sq, double rho_sq) const + { + double phi; + double r = sqrt(r_sq); + OMEGA_H_CHECK_PRINTF( + rho_sq > 0, "ERROR: rho_sq in rbf has to be positive, but got %.16f\n", + rho_sq); + double rho = sqrt(rho_sq); + double ratio = r / rho; + double limit = 1 - ratio; + if (limit < 0) { + phi = 0; + + } else { + phi = 1.0; + } + + OMEGA_H_CHECK_PRINTF(!std::isnan(phi), + "ERROR: phi in rbf is NaN. r_sq, rho_sq = (%f, %f)\n", + r_sq, rho_sq); + return phi; + } +}; + +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, RadialBasisFunction bf) +{ + + const auto nvertices_target = target_coordinates.size() / dim; + + Write interpolated_values(nvertices_target, 0, + "approximated target values"); + switch (bf) { + case RadialBasisFunction::RBF_GAUSSIAN: + interpolated_values = detail::mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, RBF_GAUSSIAN{}); + break; + + case RadialBasisFunction::RBF_C4: + interpolated_values = detail::mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, RBF_C4{}); + break; + + case RadialBasisFunction::RBF_CONST: + interpolated_values = detail::mls_interpolation( + source_values, source_coordinates, target_coordinates, support, dim, + degree, RBF_CONST{}); + break; + } + + return interpolated_values; +} + +namespace detail +{ + +void calculate_basis_slice_lengths(IntHostMatView& array) +{ + int degree = array.extent(0); + int dim = array.extent(1); + + for (int j = 0; j < dim; ++j) { + array(0, j) = 1; + } + + for (int i = 0; i < degree; ++i) { + array(i, 0) = 1; + } + + for (int i = 1; i < degree; ++i) { + for (int j = 1; j < dim; ++j) { + array(i, j) = array(i, j - 1) + array(i - 1, j); + } + } +} + +int calculate_basis_vector_size(const IntHostMatView& array) +{ + int sum = 1; + int degree = array.extent(0); + int dim = array.extent(1); + + for (int i = 0; i < degree; ++i) { + for (int j = 0; j < dim; ++j) { + sum += array(i, j); + } + } + + return sum; +} + +int calculate_scratch_shared_size(const SupportResults& support, + const int nvertices_target, int basis_size) +{ + + IntDeviceVecView shmem_each_team("stores the size required for each team", + nvertices_target); + Kokkos::parallel_for( + "calculate the size required for scratch for each team", nvertices_target, + KOKKOS_LAMBDA(const int i) { + int start_ptr = support.supports_ptr[i]; + int end_ptr = support.supports_ptr[i + 1]; + int nsupports = end_ptr - start_ptr; + + size_t total_shared_size = 0; + + total_shared_size += ScratchMatView::shmem_size(basis_size, basis_size); + total_shared_size += ScratchMatView::shmem_size(basis_size, nsupports); + total_shared_size += ScratchMatView::shmem_size(nsupports, basis_size); + total_shared_size += ScratchVecView::shmem_size(basis_size); + total_shared_size += ScratchVecView::shmem_size(nsupports) * 3; + total_shared_size += ScratchMatView::shmem_size(nsupports, 2); + total_shared_size += ScratchMatView::shmem_size(nsupports, 1); + shmem_each_team(i) = total_shared_size; + }); + + // namespace KE = Kokkos::Experimental; + // auto shared_size = KE::max_element(Kokkos::DefaultExecutionSpace(), + // shmem_each_team); printf("shared size = %d", shared_size); + int shared_size = 0; + Kokkos::parallel_reduce( + "find_max", nvertices_target, + KOKKOS_LAMBDA(const int i, int& max_val_temp) { + if (shmem_each_team(i) > max_val_temp) { + max_val_temp = shmem_each_team(i); + } + }, + Kokkos::Max(shared_size)); + + return shared_size; +} +} // namespace detail +} // namespace pcms diff --git a/src/pcms/interpolator/mls_interpolation.hpp b/src/pcms/interpolator/mls_interpolation.hpp new file mode 100644 index 00000000..17b085aa --- /dev/null +++ b/src/pcms/interpolator/mls_interpolation.hpp @@ -0,0 +1,24 @@ +#ifndef MLS_RBF_OPTIONS_HPP +#define MLS_RBF_OPTIONS_HPP + +#include "mls_interpolation_impl.hpp" + +using namespace Omega_h; + +namespace pcms +{ +enum class RadialBasisFunction : LO +{ + RBF_GAUSSIAN = 0, + RBF_C4, + RBF_CONST + +}; + +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, RadialBasisFunction bf); +} // namespace pcms +#endif diff --git a/src/pcms/interpolator/mls_interpolation_impl.hpp b/src/pcms/interpolator/mls_interpolation_impl.hpp new file mode 100644 index 00000000..5a79f64a --- /dev/null +++ b/src/pcms/interpolator/mls_interpolation_impl.hpp @@ -0,0 +1,629 @@ +#ifndef PCMS_INTERPOLATOR_MLS_INTERPOLATION_IMP_HPP +#define PCMS_INTERPOLATOR_MLS_INTERPOLATION_IMP_HPP + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "pcms_interpolator_aliases.hpp" +#include "adj_search.hpp" +#include "../assert.h" +#include "../profile.h" + +static constexpr int MAX_DIM = 3; + +/** + * calculate_basis_slice_lengths, calculate_basis_vector_size and + * eval_basis_vector are needed to evaluate the polynomial basis for any degree + * and dimension For instance, polynomial basis vector for dim = 2 and degree = + * 3 at the point (x,y) looks like {1, x, y, xx, xy, yy, xxx, xxy, xyy,yyy}. The + * slices can be written as [1] degree 0 [x] & [y] degree 1 + * [xx] & [xy, yy] degree 2 + * [xxx] & [xxy,xyy, yyy] degree 3 + * + * the recursive pattern becomes: + * [1] degree 0 + * x*[1] & y*[1] degree 1 + * x*[x] & y*[x, y] degree 2 + * xx*[x] & y*[xx,xy,yy] degree 3 + * + * lengths of the slices: + * Degree \ Dim | x | y | + * ========================= + * 1 | 1 | 1 | + * 2 | 1 | 2 | + * 3 | 1 | 3 | + */ + +namespace pcms +{ + +namespace detail +{ +/** + * @brief Computes the slice lengths of the polynomial basis + * + * This function takes kokkos view array as an input and computes the lengths of + * slices and fills the array + * + * @param[in,out] array The 2D array + * + * @note It takes a host array as an input, could have done for device + * but there was a race conditions for a device + */ +void calculate_basis_slice_lengths(IntHostMatView& array); + +/** + * @brief Computes the size of the polynomial basis vector + * + * @param array The 2D array of the slices length + * @return The basis vector size + * + * @note It takes the host array + */ +int calculate_basis_vector_size(const IntHostMatView& array); + +/** + * @brief Calculates the scratch size required + * + * This function uses the size of the vectors and matrices and + * calculates the scratch size required in each team and extracts the + * maximum shared size + * + * @param None + * + * @return The scratch size + */ +int calculate_scratch_shared_size(const SupportResults& support, + const int nvertices_target, int basis_size); + +/** + * @brief Evaluates the polynomial basis + * + * for example, if it dim = 2 and degree = 2, the basis_vector looks like + * basis_vector = {1,x,y,xx,xy,yy} + * + * @param[in] slice_length A 2D array of slices length + * @param[in] p A reference to the coordinate struct + * @param[in,out] basis_vector The polynomial basis vector + * + */ +KOKKOS_INLINE_FUNCTION +void eval_basis_vector(const IntDeviceMatView& slice_length, const Coord& p, + ScratchVecView& basis_vector) +{ + basis_vector(0) = 1; + int dim = slice_length.extent(1); + int degree = slice_length.extent(0); + + int prev_col = 0; + int curr_col = 1; + + double point[MAX_DIM]; + point[0] = p.x; + point[1] = p.y; + + if (dim == 3) { + point[2] = p.z; + } + + for (int i = 0; i < degree; ++i) { + int offset = curr_col; + for (int j = 0; j < dim; ++j) { + for (int k = 0; k < slice_length(i, j); ++k) { + basis_vector(offset + k) = basis_vector(prev_col + k) * point[j]; + } + + offset += slice_length(i, j); + } + + prev_col = curr_col; + curr_col = offset; + } +} + +/** + * @brief Creates a vandermonde matrix + * + * @param[in] local_source_points The coordinates of the local supports + * @param[in] j The jth row + * @param[in] slice_length The slice lengths of the polynomial basis + * @param[in,out] vandermonde_matrix The Vandermonde Matrix + * + */ +KOKKOS_INLINE_FUNCTION +void create_vandermonde_matrix(const ScratchMatView& local_source_points, int j, + const IntDeviceMatView& slice_length, + ScratchMatView vandermonde_matrix) +{ + int N = local_source_points.extent(0); + int dim = local_source_points.extent(1); + + Coord source_point; + source_point.x = local_source_points(j, 0); + source_point.y = local_source_points(j, 1); + if (dim == 3) { + source_point.z = local_source_points(j, 2); + } + ScratchVecView basis_vector_supports = + Kokkos::subview(vandermonde_matrix, j, Kokkos::ALL()); + eval_basis_vector(slice_length, source_point, basis_vector_supports); +} + +/** + *@brief Computes basis function vector + * + *This function takes a basis function the user wants and calculates the value + *for each local source supports + * + * @tparam Func The type of the radial basis function + * @param[in] target_point The coordinate of the target point where the + *interpolation is carried out + * @param[in] source_points The coordinates of the source support points + * @param[in] j The jth row + * @param[in] cutoff_dis_sq The cutoff radius squared + * @param[in] rbf_func The radial basis function of choice + * @param[in, out] phi A radial basis function value + * + */ +template , + bool> = true> +KOKKOS_INLINE_FUNCTION void compute_phi_vector( + const Coord& target_point, const ScratchMatView& local_source_points, int j, + double cuttoff_dis_sq, Func rbf_func, ScratchVecView phi) +{ + int N = local_source_points.extent(0); + double dx = target_point.x - local_source_points(j, 0); + double dy = target_point.y - local_source_points(j, 1); + double ds_sq = dx * dx + dy * dy; + phi(j) = rbf_func(ds_sq, cuttoff_dis_sq); + OMEGA_H_CHECK_PRINTF(!std::isnan(phi(j)), + "ERROR: Phi(j) in compute_phi_vector is NaN for j = %d " + "ds_sq=%.16f, cuttoff_dis_sq=%.16f", + j, ds_sq, cuttoff_dis_sq); +} + +/** + * @brief Scales the column of the transpose of the given matrix + * + * This function takes a matrix P(m,n) and vector phi(m) and scales the + * each column of the transpose of P by each corresponding element in vector + * phi + * + * @param[in] matrix The matrix + * @param[in] vector The vector + * @param[in] team The team member + * @param[in, out] result_matrix + * + */ +KOKKOS_INLINE_FUNCTION +void scale_column_trans_matrix(const ScratchMatView& matrix, + const ScratchVecView& vector, member_type team, + int j, ScratchMatView result_matrix) +{ + + int M = matrix.extent(0); + int N = matrix.extent(1); + + ScratchVecView matrix_row = Kokkos::subview(matrix, j, Kokkos::ALL()); + for (int k = 0; k < N; k++) { + OMEGA_H_CHECK_PRINTF(!std::isnan(matrix_row(k)), + "ERROR: given matrix is NaN for k = %d\n", k); + + OMEGA_H_CHECK_PRINTF(!std::isnan(vector(j)), + "ERROR: given vector is NaN for j = %d\n", j); + + result_matrix(k, j) = matrix_row(k) * vector(j); + + OMEGA_H_CHECK_PRINTF(!std::isnan(result_matrix(k, j)), + "ERROR: result_matrix is NaN for k = %d, j = %d\n", k, + j); + } +} + +/** + * @struct ResultConvertNormal + * @brief Stores the results of matrix and vector transformations. + * + * This struct represents the results of the following operations: + * - `P^T Q` (scaled matrix): The matrix obtained after scaling the columns of + * the given matrix. + * - `P^T Q P` (square matrix): The matrix obtained after performing the `P^T Q + * P` operation. + * - `P^T b` (transformed RHS): The vector obtained after applying the `P^T b` + * operation. + * + * The struct is used to store: + * - The scaled matrix (`scaled_matrix`). + * - The square matrix (`square_matrix`). + * - The transformed right-hand side vector (`transformed_rhs`). + */ +struct ResultConvertNormal +{ + ScratchMatView scaled_matrix; + ScratchMatView square_matrix; + ScratchVecView transformed_rhs; +}; + +/** + * @brief Converts a normal equation into a simplified form. + * + * This function takes a matrix, a basis/weight vector, and a right-hand side + * (RHS) vector and computes the scaled matrix, square matrix, and transformed + * RHS. + * + * For example, the normal equation `P^T Q P x = P^T Q b` is converted into the + * form `Ax = b'`, where: + * - `A` (square matrix) = `P^T Q P` + * - `b'` (transformed RHS) = `P^T Q b` + * + * @param matrix The input matrix. + * @param weight_vector The weight or basis vector. + * @param rhs The right-hand side vector of the equation. + * @param team The team member executing the task (optional, if applicable). + * + * @return The result of the normal equation conversion as a struct containing: + * - The scaled matrix. + * - The square matrix. + * - The transformed RHS. + * + * @todo Implement QR decomposition to solve the linear system. + */ +KOKKOS_INLINE_FUNCTION +ResultConvertNormal convert_normal_equation(const ScratchMatView& matrix, + const ScratchVecView& weight_vector, + const ScratchVecView& rhs, + member_type team) +{ + + int m = matrix.extent(0); + int n = matrix.extent(1); + + ScratchMatView scaled_matrix(team.team_scratch(0), n, m); + + ScratchMatView square_matrix(team.team_scratch(0), n, n); + + ScratchVecView transformed_rhs(team.team_scratch(0), n); + + // performing P^T Q + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, n), [=](int j) { + for (int k = 0; k < m; ++k) { + scaled_matrix(j, k) = 0; + } + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, m), [=](int j) { + scale_column_trans_matrix(matrix, weight_vector, team, j, scaled_matrix); + }); + + team.team_barrier(); + + // performing (P^T Q P) + KokkosBatched::TeamGemm< + member_type, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::Gemm::Unblocked>::invoke(team, 1.0, scaled_matrix, + matrix, 0.0, square_matrix); + + team.team_barrier(); + + KokkosBlas::Experimental:: + Gemv::invoke( + team, 'N', 1.0, scaled_matrix, rhs, 0.0, transformed_rhs); + + team.team_barrier(); + + return ResultConvertNormal{scaled_matrix, square_matrix, transformed_rhs}; +} + +/** + * @brief Solves the matrix equation Ax = b' using LU decomposition. + * + * This function solves the given matrix equation using LU decomposition. + * The solution vector `x'` overwrites the input `rhs` vector. + * + * @param square_matrix The input matrix A (must be square). + * @param rhs The right-hand side vector b. After execution, it is overwritten + * with the solution vector x. + * @param team The team member executing the task. + * + * @return None. The solution is directly written to the `rhs` vector. + */ +KOKKOS_INLINE_FUNCTION +void solve_matrix(const ScratchMatView& square_matrix, + const ScratchVecView& rhs, member_type team) +{ + + // Perform LU decomposition + KokkosBatched::TeamLU< + member_type, KokkosBatched::Algo::LU::Unblocked>::invoke(team, + square_matrix); + + team.team_barrier(); + + // Solve the equation with forward and backward solves + KokkosBatched::TeamSolveLU< + member_type, KokkosBatched::Trans::NoTranspose, + KokkosBatched::Algo::SolveLU::Unblocked>::invoke(team, square_matrix, rhs); +} + +/** + * @brief Fills the kokkos scratch view + * + * @param value The value to be populated + * @param team The team member + * @param matrix The scratch matrix + * + */ +KOKKOS_INLINE_FUNCTION +void fill(double value, member_type team, ScratchMatView matrix) +{ + + int row = matrix.extent(0); + int col = matrix.extent(1); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, row), [=](int j) { + for (int k = 0; k < col; ++k) { + matrix(j, k) = value; + } + }); +} + +/** + * @brief Fills the kokkos scratch view + * + * @param value The value to be populated + * @param team The team member + * @param vector The scratch vector + * + */ +KOKKOS_INLINE_FUNCTION +void fill(double value, member_type team, ScratchVecView vector) +{ + + int size = vector.extent(0); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { vector(j) = value; }); +} + +/** + * @brief Maps the data from source mesh to target mesh + * + * @param source_values Scalar array view of source field values + * @param source_coordinates Scalar array view of the coordinates of control + * points of source field + * @param target_coordinates Scalar array view of the coordinates of control + * points of target field + * @param support The object that enpasulates support info + * @param dim The dimension of the simulations + * @param degree The degree of the interpolation order + * @param rbf_func The radial basis function choice + * @param[in/out] Scalar array view of the interpolated field in target mesh + * + * + */ +template +void mls_interpolation(RealConstDefaultScalarArrayView source_values, + RealConstDefaultScalarArrayView source_coordinates, + RealConstDefaultScalarArrayView target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, Func rbf_func, + RealDefaultScalarArrayView approx_target_values) +{ + PCMS_FUNCTION_TIMER; + static_assert(std::is_invocable_r_v, + "rbf_func, takes radius and cutoff, returns weight"); + static_assert(!std::is_pointer_v, + "function pointer will fail in GPU execution context"); + + const auto nvertices_source = source_coordinates.size() / dim; + const auto nvertices_target = target_coordinates.size() / dim; + + IntHostMatView host_slice_length( + "stores slice length of polynomial basis in host", degree, dim); + + Kokkos::deep_copy(host_slice_length, 0); + + calculate_basis_slice_lengths(host_slice_length); + + auto basis_size = calculate_basis_vector_size(host_slice_length); + + IntDeviceMatView slice_length( + "stores slice length of polynomial basis in device", degree, dim); + + auto slice_length_hd = Kokkos::create_mirror_view(slice_length); + Kokkos::deep_copy(slice_length_hd, host_slice_length); + Kokkos::deep_copy(slice_length, slice_length_hd); + + int shared_size = + calculate_scratch_shared_size(support, nvertices_target, basis_size); + + team_policy tp(nvertices_target, Kokkos::AUTO); + + int scratch_size = tp.scratch_size_max(0); + + PCMS_ALWAYS_ASSERT(scratch_size > shared_size); + + // calculates the interpolated values + Kokkos::parallel_for( + "MLS coefficients", tp.set_scratch_size(0, Kokkos::PerTeam(shared_size)), + KOKKOS_LAMBDA(const member_type& team) { + int league_rank = team.league_rank(); + int start_ptr = support.supports_ptr[league_rank]; + int end_ptr = support.supports_ptr[league_rank + 1]; + int nsupports = end_ptr - start_ptr; + + ScratchMatView local_source_points(team.team_scratch(0), nsupports, dim); + int count = -1; + for (int j = start_ptr; j < end_ptr; ++j) { + count++; + auto index = support.supports_idx[j]; + local_source_points(count, 0) = source_coordinates[index * dim]; + local_source_points(count, 1) = source_coordinates[index * dim + 1]; + if (dim == 3) { + local_source_points(count, 2) = source_coordinates[index * dim + 2]; + } + } + + // vondermonde matrix P from the vectors of basis vector of supports + ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + basis_size); + + // rbf function values of source supports Phi(n,n) + ScratchVecView phi_vector(team.team_scratch(0), nsupports); + + // stores known vector (b) + ScratchVecView support_values(team.team_scratch(0), nsupports); + + // basis of target + ScratchVecView target_basis_vector(team.team_scratch(0), basis_size); + + // Initialize the scratch matrices and vectors + fill(0.0, team, vandermonde_matrix); + fill(0.0, team, phi_vector); + fill(0.0, team, support_values); + fill(0.0, team, target_basis_vector); + + // evaluates the basis vector of a given target point + Coord target_point; + target_point.x = target_coordinates[league_rank * dim]; + target_point.y = target_coordinates[league_rank * dim + 1]; + + if (dim == 3) { + target_point.z = target_coordinates[league_rank * dim + 2]; + } + eval_basis_vector(slice_length, target_point, target_basis_vector); + + /** vandermonde_matrix(nsupports, basis_size) vandermonde Matrix is + * created with the basis vector of source supports stacking on top of + * each other + */ + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + create_vandermonde_matrix(local_source_points, j, slice_length, + vandermonde_matrix); + }); + + team.team_barrier(); + + OMEGA_H_CHECK_PRINTF( + support.radii2[league_rank] > 0, + "ERROR: radius2 has to be positive but found to be %.16f\n", + support.radii2[league_rank]); + + /** phi(nsupports) is the array of rbf functions evaluated at the + * source supports In the actual implementation, Phi(nsupports, nsupports) + * is the diagonal matrix & each diagonal element is the phi evaluated at + * each source points + */ + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + compute_phi_vector(target_point, local_source_points, j, + support.radii2[league_rank], rbf_func, phi_vector); + }); + + team.team_barrier(); + + /** support_values(nsupports) (or known rhs vector b) is the vector of the + * quantity that we want interpolate + */ + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](const int j) { + support_values(j) = + source_values[support.supports_idx[start_ptr + j]]; + OMEGA_H_CHECK_PRINTF(!std::isnan(support_values(j)), + "ERROR: NaN found: at support %d\n", j); + }); + + team.team_barrier(); + + auto result = convert_normal_equation(vandermonde_matrix, phi_vector, + support_values, team); + + team.team_barrier(); + + // It stores the solution in rhs vector itself + solve_matrix(result.square_matrix, result.transformed_rhs, team); + + team.team_barrier(); + + double target_value = KokkosBlas::Experimental::dot( + team, result.transformed_rhs, target_basis_vector); + + if (team.team_rank() == 0) { + OMEGA_H_CHECK_PRINTF(!std::isnan(target_value), "Nan at %d\n", + league_rank); + approx_target_values[league_rank] = target_value; + } + }); +} + +/** + * @brief \overload + * Maps the data from source mesh to target mesh + * + * @param source_values Read array of Source field values + * @param source_coordinates Read array of the coordinates of control points of + * source field + * @param target_coordinates Read array of the coordinates of control points of + * target field + * @param support The object that enpasulates support info + * @param dim The dimension of the simulations + * @param degree The degree of the interpolation order + * @param rbf_func The radial basis function choice + * @return Write array of the interpolated field in target mesh + * + * + */ +template +Write mls_interpolation(const Reals source_values, + const Reals source_coordinates, + const Reals target_coordinates, + const SupportResults& support, const LO& dim, + const LO& degree, Func rbf_func) +{ + const auto nvertices_source = source_coordinates.size() / dim; + + const auto nvertices_target = target_coordinates.size() / dim; + + RealConstDefaultScalarArrayView source_values_array_view( + source_values.data(), source_values.size()); + + RealConstDefaultScalarArrayView source_coordinates_array_view( + source_coordinates.data(), source_coordinates.size()); + + RealConstDefaultScalarArrayView target_coordinates_array_view( + target_coordinates.data(), target_coordinates.size()); + + RealDefaultScalarArrayView radii2_array_view(support.radii2.data(), + support.radii2.size()); + + Write interpolated_values(nvertices_target, 0, + "approximated target values"); + + RealDefaultScalarArrayView interpolated_values_array_view( + interpolated_values.data(), interpolated_values.size()); + + mls_interpolation(source_values_array_view, source_coordinates_array_view, + target_coordinates_array_view, support, dim, degree, + rbf_func, interpolated_values_array_view); + + return interpolated_values; +} + +} // namespace detail +} // namespace pcms +#endif diff --git a/src/pcms/interpolator/multidimarray.hpp b/src/pcms/interpolator/multidimarray.hpp new file mode 100644 index 00000000..1423f45c --- /dev/null +++ b/src/pcms/interpolator/multidimarray.hpp @@ -0,0 +1,36 @@ +#ifndef MULTIDIMARRAY_HPP +#define MULTIDIMARRAY_HPP + +#include + +using RealMatView = Kokkos::View; +using IntMatView = Kokkos::View; +using RealVecView = Kokkos::View; +using IntVecView = Kokkos::View; + +using HostIntVecView = Kokkos::View; + +KOKKOS_INLINE_FUNCTION +int calculateIndex(const IntVecView& dimensions, const int* indices) +{ + + int dim = dimensions.extent(0); + int index = 0; + int multiplier = 1; + for (int i = dim - 1; i >= 0; --i) { + index += indices[i] * multiplier; + multiplier *= dimensions(i); + } + return index; +} + +int calculateTotalSize(const HostIntVecView& dimensions) +{ + int dim = dimensions.extent(0); + int size = 1; + for (int i = 0; i < dim; ++i) { + size *= dimensions(i); + } + return size; +} +#endif diff --git a/src/pcms/interpolator/pcms_interpolator_aliases.hpp b/src/pcms/interpolator/pcms_interpolator_aliases.hpp new file mode 100644 index 00000000..06d83640 --- /dev/null +++ b/src/pcms/interpolator/pcms_interpolator_aliases.hpp @@ -0,0 +1,47 @@ +#ifndef PCMS_INTERPOLATOR_ALIASES_HPP +#define PCMS_INTERPOLATOR_ALIASES_HPP + +#include +#include "../arrays.h" + +namespace pcms +{ + +struct Coord +{ + double x, y, z; +}; + +using range_policy = typename Kokkos::RangePolicy<>; +using team_policy = typename Kokkos::TeamPolicy<>; +using member_type = typename Kokkos::TeamPolicy<>::member_type; + +// alias for scratch view +using ScratchSpace = + typename Kokkos::DefaultExecutionSpace::scratch_memory_space; + +using ScratchMatView = + typename Kokkos::View>; +using ScratchVecView = + typename Kokkos::View>; + +using PointsViewType = Kokkos::View; + +struct Points +{ + PointsViewType coordinates; +}; + +using IntDeviceMatView = Kokkos::View; +using IntDeviceVecView = Kokkos::View; +using IntHostMatView = Kokkos::View; + +using RealDefaultScalarArrayView = + ScalarArrayView; +using RealConstDefaultScalarArrayView = + ScalarArrayView; + +} // namespace pcms +#endif diff --git a/src/pcms/interpolator/queue_visited.hpp b/src/pcms/interpolator/queue_visited.hpp new file mode 100644 index 00000000..f09d38af --- /dev/null +++ b/src/pcms/interpolator/queue_visited.hpp @@ -0,0 +1,139 @@ +#ifndef QUEUE_VISITED_HPP +#define QUEUE_VISITED_HPP + +#include +#include +#include +#include +#include +#include +#define MAX_SIZE_QUEUE 500 +#define MAX_SIZE_TRACK 800 +using namespace std; +using namespace Omega_h; + +class Queue +{ +private: + LO queue_array[MAX_SIZE_QUEUE]; + int first = 0, last = -1, count = 0; + +public: + OMEGA_H_INLINE + Queue() {} + + OMEGA_H_INLINE + ~Queue() {} + + OMEGA_H_INLINE + void push_back(const int& item); + + OMEGA_H_INLINE + void pop_front(); + + OMEGA_H_INLINE + int front(); + + OMEGA_H_INLINE + bool isEmpty() const; + + OMEGA_H_INLINE + bool isFull() const; +}; + +class Track +{ +private: + LO tracking_array[MAX_SIZE_TRACK]; + int first = 0, last = -1, count = 0; + +public: + OMEGA_H_INLINE + Track() {} + + OMEGA_H_INLINE + ~Track() {} + + OMEGA_H_INLINE + void push_back(const int& item); + + OMEGA_H_INLINE + int size(); + + OMEGA_H_INLINE + bool notVisited(const int& item); +}; + +OMEGA_H_INLINE +void Queue::push_back(const int& item) +{ + if (count == MAX_SIZE_QUEUE) { + printf("queue is full %d\n", count); + return; + } + last = (last + 1) % MAX_SIZE_QUEUE; + queue_array[last] = item; + count++; +} + +OMEGA_H_INLINE +void Queue::pop_front() +{ + if (count == 0) { + printf("queue is empty\n"); + return; + } + first = (first + 1) % MAX_SIZE_QUEUE; + count--; +} + +OMEGA_H_INLINE +int Queue::front() +{ + return queue_array[first]; +} + +OMEGA_H_INLINE +bool Queue::isEmpty() const +{ + return count == 0; +} + +OMEGA_H_INLINE +bool Queue::isFull() const +{ + return count == MAX_SIZE_QUEUE; +} + +OMEGA_H_INLINE +void Track::push_back(const int& item) +{ + if (count == MAX_SIZE_TRACK) { + printf("track is full %d\n", count); + return; + } + last = (last + 1) % MAX_SIZE_TRACK; + tracking_array[last] = item; + count++; +} + +OMEGA_H_INLINE +bool Track::notVisited(const int& item) +{ + int id; + for (int i = 0; i < count; ++i) { + id = (first + i) % MAX_SIZE_TRACK; + if (tracking_array[id] == item) { + return false; + } + } + return true; +} + +OMEGA_H_INLINE +int Track::size() +{ + return count; +} + +#endif diff --git a/src/pcms/print.cpp b/src/pcms/print.cpp index e40c7506..c103671e 100644 --- a/src/pcms/print.cpp +++ b/src/pcms/print.cpp @@ -1,20 +1,29 @@ #include "print.h" -namespace pcms { +namespace pcms +{ - FILE* pcms_stdout = stdout; - FILE* pcms_stderr = stderr; +FILE* pcms_stdout = stdout; +FILE* pcms_stderr = stderr; - FILE* getStdout() { return pcms_stdout; } - FILE* getStderr() { return pcms_stderr; } +FILE* getStdout() +{ + return pcms_stdout; +} +FILE* getStderr() +{ + return pcms_stderr; +} - void setStdout(FILE* out) { - assert(out != NULL); - pcms_stdout = out; - } +void setStdout(FILE* out) +{ + assert(out != NULL); + pcms_stdout = out; +} - void setStderr(FILE* err) { - assert(err != NULL); - pcms_stderr = err; - } -} \ No newline at end of file +void setStderr(FILE* err) +{ + assert(err != NULL); + pcms_stderr = err; +} +} // namespace pcms \ No newline at end of file diff --git a/src/pcms/print.h b/src/pcms/print.h index 532c4115..9354946b 100644 --- a/src/pcms/print.h +++ b/src/pcms/print.h @@ -2,43 +2,47 @@ #define PCMS_PRINT_H #ifdef PCMS_SPDLOG_ENABLED - #include "spdlog/spdlog.h" - #include +#include "spdlog/spdlog.h" +#include #endif #include -namespace pcms { - - #if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) || defined(__SYCL_DEVICE_ONLY__) - #define ACTIVE_GPU_EXECUTION - #endif - - FILE* getStdout(); - FILE* getStderr(); - - void setStdout(FILE* out); - void setStderr(FILE* err); - - template - void printError(const char* fmt, const Args&... args) { - #if defined(PCMS_SPDLOG_ENABLED) && defined(PCMS_PRINT_ENABLED) - spdlog::error("{}", fmt::sprintf(fmt, args...)); - #elif defined(PCMS_PRINT_ENABLED) - fprintf(getStdout(), fmt, args...); - #endif - } - - template - KOKKOS_INLINE_FUNCTION - void printInfo(const char* fmt, const Args&... args) { - #if defined(PCMS_SPDLOG_ENABLED) && defined(PCMS_PRINT_ENABLED) && !defined(ACTIVE_GPU_EXECUTION) - spdlog::info("{}", fmt::sprintf(fmt, args...)); - #elif defined(PCMS_PRINT_ENABLED) && !defined(ACTIVE_GPU_EXECUTION) - fprintf(getStdout(), fmt, args...); - #endif - } +namespace pcms +{ +#if defined(__CUDA_ARCH__) || defined(__HIP_DEVICE_COMPILE__) || \ + defined(__SYCL_DEVICE_ONLY__) +#define ACTIVE_GPU_EXECUTION +#endif + +FILE* getStdout(); +FILE* getStderr(); + +void setStdout(FILE* out); +void setStderr(FILE* err); + +template +void printError(const char* fmt, const Args&... args) +{ +#if defined(PCMS_SPDLOG_ENABLED) && defined(PCMS_PRINT_ENABLED) + spdlog::error("{}", fmt::sprintf(fmt, args...)); +#elif defined(PCMS_PRINT_ENABLED) + fprintf(getStdout(), fmt, args...); +#endif } -#endif //PCMS_PRINT_H \ No newline at end of file +template +KOKKOS_INLINE_FUNCTION void printInfo(const char* fmt, const Args&... args) +{ +#if defined(PCMS_SPDLOG_ENABLED) && defined(PCMS_PRINT_ENABLED) && \ + !defined(ACTIVE_GPU_EXECUTION) + spdlog::info("{}", fmt::sprintf(fmt, args...)); +#elif defined(PCMS_PRINT_ENABLED) && !defined(ACTIVE_GPU_EXECUTION) + fprintf(getStdout(), fmt, args...); +#endif +} + +} // namespace pcms + +#endif // PCMS_PRINT_H \ No newline at end of file diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 84257711..a23262b2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -102,16 +102,19 @@ if(Catch2_FOUND) if (PCMS_ENABLE_OMEGA_H) add_executable(field_transfer_example field_transfer_example.cpp) - target_link_libraries(field_transfer_example PUBLIC pcms::core) + target_link_libraries(field_transfer_example PUBLIC pcms::core pcms_interpolator) list(APPEND PCMS_UNIT_TEST_SOURCES test_field_transfer.cpp test_uniform_grid.cpp test_omega_h_copy.cpp test_point_search.cpp + test_mls_basis.cpp + test_rbf_interp.cpp + test_linear_solver.cpp ) endif () add_executable(unit_tests ${PCMS_UNIT_TEST_SOURCES}) - target_link_libraries(unit_tests PUBLIC Catch2::Catch2 pcms::core) + target_link_libraries(unit_tests PUBLIC Catch2::Catch2 pcms::core pcms_interpolator) target_include_directories(unit_tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) include(Catch) diff --git a/test/test_linear_solver.cpp b/test/test_linear_solver.cpp new file mode 100644 index 00000000..2f8664ec --- /dev/null +++ b/test/test_linear_solver.cpp @@ -0,0 +1,235 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace pcms; +using namespace pcms::detail; + +TEST_CASE("solver test") +{ + // This test computes P^TQP and P^TQ b for the normal equation P^TQP x = P^TQ + // b where P(n,m) (n >= m) is a rectangular matrix, Q(m,m) is a diagonal + // matrix with diagonal elements stored as vector and b(n,1) is a vector + SECTION("check convert normal equation ") + { + + int nvertices_target = 2; + + int nsupports = 3; + + int size = 2; + + Kokkos::View Q("phi vector", nvertices_target, nsupports); + + Kokkos::View b("rhs vector", nvertices_target, nsupports); + + Kokkos::View P("vandermonde matrix", nvertices_target, nsupports, + size); + + Kokkos::View TransPQP("moment matrix", nvertices_target, size, + size); + + Kokkos::View TransPQ("scaled matrix", nvertices_target, size, + nsupports); + + Kokkos::View TransPQb("transformed rhs", nvertices_target, size); + + Kokkos::View solution("solution unknown vector", nvertices_target, + size); + + auto host_Q = Kokkos::create_mirror_view(Q); + auto host_P = Kokkos::create_mirror_view(P); + auto host_b = Kokkos::create_mirror_view(b); + + host_Q(0, 0) = 1.0; + host_Q(0, 1) = 1.0; + host_Q(0, 2) = 1.0; + + host_Q(1, 0) = 3.0; + host_Q(1, 1) = 2.0; + host_Q(1, 2) = 1.0; + + Kokkos::deep_copy(Q, host_Q); + + host_P(0, 0, 0) = 1.0; + host_P(0, 0, 1) = 2.0; + host_P(0, 1, 0) = 3.0; + host_P(0, 1, 1) = 4.0; + host_P(0, 2, 0) = 5.0; + host_P(0, 2, 1) = 6.0; + + host_P(1, 0, 0) = 2.0; + host_P(1, 0, 1) = 3.0; + host_P(1, 1, 0) = 1.0; + host_P(1, 1, 1) = -1.0; + host_P(1, 2, 0) = 4.0; + host_P(1, 2, 1) = 1.0; + + Kokkos::deep_copy(P, host_P); + + host_b(0, 0) = 7.0; + host_b(0, 1) = 8.0; + host_b(0, 2) = 9.0; + + host_b(1, 0) = 5.0; + host_b(1, 1) = 6.0; + host_b(1, 2) = 7.0; + + Kokkos::deep_copy(b, host_b); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for( + "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + size); + ScratchVecView phi(team.team_scratch(0), nsupports); + ScratchVecView support_values(team.team_scratch(0), nsupports); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + phi(j) = Q(i, j); + support_values(j) = b(i, j); + for (int k = 0; k < size; ++k) { + vandermonde_matrix(j, k) = P(i, j, k); + } + }); + + team.team_barrier(); + + auto result = convert_normal_equation(vandermonde_matrix, phi, + support_values, team); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { + TransPQb(i, j) = result.transformed_rhs(j); + for (int k = 0; k < size; ++k) { + TransPQP(i, j, k) = result.square_matrix(j, k); + } + }); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + for (int k = 0; k < size; ++k) { + TransPQ(i, k, j) = result.scaled_matrix(k, j); + } + }); + + team.team_barrier(); + + solve_matrix(result.square_matrix, result.transformed_rhs, team); + + team.team_barrier(); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), [=](int j) { + solution(i, j) = result.transformed_rhs(j); + }); + + team.team_barrier(); + }); + + auto host_result_lhs = Kokkos::create_mirror_view(TransPQP); + auto host_result_rhs = Kokkos::create_mirror_view(TransPQb); + auto host_result_scaled = Kokkos::create_mirror_view(TransPQ); + auto host_result_solution = Kokkos::create_mirror_view(solution); + + Kokkos::deep_copy(host_result_lhs, TransPQP); + Kokkos::deep_copy(host_result_rhs, TransPQb); + Kokkos::deep_copy(host_result_scaled, TransPQ); + Kokkos::deep_copy(host_result_solution, solution); + + Kokkos::View expected_lhs_matrix( + "expected lhs matrix", nvertices_target, size, size); + + Kokkos::View expected_rhs_vector( + "expected rhs vector", nvertices_target, size); + + Kokkos::View expected_scaled_matrix( + "expected scaled matrix", nvertices_target, size, nsupports); + + Kokkos::View expected_solution( + "expected solution", nvertices_target, size); + + expected_lhs_matrix(0, 0, 0) = 35.0; + expected_lhs_matrix(0, 0, 1) = 44.0; + expected_lhs_matrix(0, 1, 0) = 44.0; + expected_lhs_matrix(0, 1, 1) = 56.0; + + expected_rhs_vector(0, 0) = 76.0; + expected_rhs_vector(0, 1) = 100.0; + + expected_scaled_matrix(0, 0, 0) = 1.0; + expected_scaled_matrix(0, 0, 1) = 3.0; + expected_scaled_matrix(0, 0, 2) = 5.0; + expected_scaled_matrix(0, 1, 0) = 2.0; + expected_scaled_matrix(0, 1, 1) = 4.0; + expected_scaled_matrix(0, 1, 2) = 6.0; + + expected_solution(0, 0) = -6.0; + expected_solution(0, 1) = 6.5; + + expected_lhs_matrix(1, 0, 0) = 30.0; + expected_lhs_matrix(1, 0, 1) = 20.0; + expected_lhs_matrix(1, 1, 0) = 20.0; + expected_lhs_matrix(1, 1, 1) = 30.0; + + expected_rhs_vector(1, 0) = 70.0; + expected_rhs_vector(1, 1) = 40.0; + + expected_scaled_matrix(1, 0, 0) = 6.0; + expected_scaled_matrix(1, 0, 1) = 2.0; + expected_scaled_matrix(1, 0, 2) = 4.0; + expected_scaled_matrix(1, 1, 0) = 9.0; + expected_scaled_matrix(1, 1, 1) = -2.0; + expected_scaled_matrix(1, 1, 2) = 1.0; + + expected_solution(1, 0) = 2.6; + expected_solution(1, 1) = -0.4; + + for (int i = 0; i < nvertices_target; ++i) { + for (int j = 0; j < size; ++j) { + for (int l = 0; l < nsupports; ++l) { + printf("scaled_matrix (A^T Q) (%f, %f) at (%d,%d,%d)\n", + expected_scaled_matrix(i, j, l), host_result_scaled(i, j, l), + i, j, l); + REQUIRE_THAT( + expected_scaled_matrix(i, j, l), + Catch::Matchers::WithinAbs(host_result_scaled(i, j, l), 1E-10)); + } + } + for (int j = 0; j < size; ++j) { + for (int k = 0; k < size; ++k) { + printf("A^T Q A (%f, %f) at (%d,%d,%d)\n", + expected_lhs_matrix(i, j, k), host_result_lhs(i, j, k), i, j, + k); + REQUIRE_THAT( + expected_lhs_matrix(i, j, k), + Catch::Matchers::WithinAbs(host_result_lhs(i, j, k), 1E-10)); + } + } + for (int j = 0; j < size; ++j) { + printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_rhs_vector(i, j), + host_result_rhs(i, j), i, j); + REQUIRE_THAT(expected_rhs_vector(i, j), + Catch::Matchers::WithinAbs(host_result_rhs(i, j), 1E-10)); + } + + for (int j = 0; j < size; ++j) { + printf("A^T Q b: (%f,%f) at (%d,%d)\n", expected_solution(i, j), + host_result_solution(i, j), i, j); + REQUIRE_THAT( + expected_solution(i, j), + Catch::Matchers::WithinAbs(host_result_solution(i, j), 1E-10)); + } + } + } +} diff --git a/test/test_mls_basis.cpp b/test/test_mls_basis.cpp new file mode 100644 index 00000000..5664508d --- /dev/null +++ b/test/test_mls_basis.cpp @@ -0,0 +1,282 @@ +#include +#include +#include +#include +#include +#include + +using namespace pcms; + +TEST_CASE("basis test") +{ + SECTION("check basis slice lengths, size of the basis vector and basis " + "vector for degree = 3 and dim = 3") + { + + const int dim = 3; + const int degree = 3; + Kokkos::View array("array", degree, dim); + Kokkos::deep_copy(array, 0); + detail::calculate_basis_slice_lengths(array); + auto size = detail::calculate_basis_vector_size(array); + + int expected[degree][dim] = {{1, 1, 1}, {1, 2, 3}, {1, 3, 6}}; + + for (int i = 0; i < degree; ++i) { + for (int j = 0; j < dim; ++j) { + REQUIRE(array(i, j) == expected[i][j]); + } + } + + REQUIRE(size == 20); + + IntDeviceMatView d_array("array in device", degree, dim); + auto array_hd = Kokkos::create_mirror_view(d_array); + Kokkos::deep_copy(array_hd, array); + Kokkos::deep_copy(d_array, array_hd); + + int nvertices_target = 2; + Omega_h::Matrix<3, 2> coords{{2.0, 3.0, 4.0}, {0.4, 0.5, 0.2}}; + Kokkos::View results("stores results", nvertices_target, size); + + auto host_results = Kokkos::create_mirror_view(results); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for( + "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + ScratchVecView basis_vector(team.team_scratch(0), size); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { basis_vector(j) = 0; }); + + Coord target_point; + target_point.x = coords[i][0]; + target_point.y = coords[i][1]; + target_point.z = coords[i][2]; + + detail::eval_basis_vector(d_array, target_point, basis_vector); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { results(i, j) = basis_vector(j); }); + }); + + Kokkos::deep_copy(host_results, results); + Kokkos::View expected_basis("expected vector", + size); + std::vector names = {"1", "x", "y", "z", "x^2", + "xy", "y^2", "xz", "yz", "z^2", + "x^3", "x^2y", "xy^2", "Y^3", "x^2z", + "xyz", "y^2z", "xz^2", "yz^2", "z^3"}; + + for (int i = 0; i < nvertices_target; ++i) { + double x = coords[i][0]; + double y = coords[i][1]; + double z = coords[i][2]; + expected_basis(0) = 1; + expected_basis(1) = x; // (x) + expected_basis(2) = y; // (y) + expected_basis(3) = z; // (z) + expected_basis(4) = x * x; // (x^2) + expected_basis(5) = x * y; // (xy) + expected_basis(6) = y * y; // (y^2) + expected_basis(7) = x * z; // (xz) + expected_basis(8) = y * z; // (yz) + expected_basis(9) = z * z; // (z^2) + expected_basis(10) = x * x * x; // (x^3) + expected_basis(11) = x * x * y; // (x^2y) + expected_basis(12) = x * y * y; // (xy^2) + expected_basis(13) = y * y * y; // (y^3) + expected_basis(14) = x * x * z; // (x^2z) + expected_basis(15) = x * y * z; // (xyz) + expected_basis(16) = y * y * z; // (y^2z) + expected_basis(17) = x * z * z; // (xz^2) + expected_basis(18) = y * z * z; // (yz^2) + expected_basis(19) = z * z * z; // (z^3) + for (int j = 0; j < size; ++j) { + std::cout << names[j] << " " << expected_basis(j) << " " + << host_results(i, j) << "\n"; + REQUIRE(expected_basis(j) == host_results(i, j)); + } + } + } + + SECTION("check basis slice lengths, size of the basis vector and basis " + "vector for degree = 1 and dim = 3") + { + + const int dim = 3; + const int degree = 1; + Kokkos::View array("array", degree, dim); + Kokkos::deep_copy(array, 0); + detail::calculate_basis_slice_lengths(array); + + auto size = detail::calculate_basis_vector_size(array); + + int expected[degree][dim] = { + {1, 1, 1}, + }; + + for (int i = 0; i < degree; ++i) { + for (int j = 0; j < dim; ++j) { + REQUIRE(array(i, j) == expected[i][j]); + } + } + + REQUIRE(size == 4); + + IntDeviceMatView d_array("array in device", degree, dim); + auto array_hd = Kokkos::create_mirror_view(d_array); + Kokkos::deep_copy(array_hd, array); + Kokkos::deep_copy(d_array, array_hd); + + int nvertices_target = 2; + Omega_h::Matrix<3, 2> coords{{2.0, 3.0, 4.0}, {0.4, 0.5, 0.2}}; + Kokkos::View results("stores results", nvertices_target, size); + + auto host_results = Kokkos::create_mirror_view(results); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for( + "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + ScratchVecView basis_vector(team.team_scratch(0), size); + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { basis_vector(j) = 0; }); + + Coord target_point; + target_point.x = coords[i][0]; + target_point.y = coords[i][1]; + target_point.z = coords[i][2]; + + detail::eval_basis_vector(d_array, target_point, basis_vector); + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, size), + [=](int j) { results(i, j) = basis_vector(j); }); + }); + + Kokkos::deep_copy(host_results, results); + Kokkos::View expected_basis("expected vector", + size); + std::vector names = {"1", "x", "y", "z"}; + + for (int i = 0; i < nvertices_target; ++i) { + double x = coords[i][0]; + double y = coords[i][1]; + double z = coords[i][2]; + expected_basis(0) = 1; + expected_basis(1) = x; // (x) + expected_basis(2) = y; // (y) + expected_basis(3) = z; // (z) + for (int j = 0; j < size; ++j) { + std::cout << names[j] << " " << expected_basis(j) << " " + << host_results(i, j) << "\n"; + REQUIRE(expected_basis(j) == host_results(i, j)); + } + } + } + + SECTION("check vandermonde matrix") + { + + const int dim = 1; + const int degree = 3; + + Kokkos::View host_slice_length("array", degree, + dim); + Kokkos::deep_copy(host_slice_length, 0); + detail::calculate_basis_slice_lengths(host_slice_length); + + IntDeviceMatView slice_length("slice array in device", degree, dim); + auto slice_length_hd = Kokkos::create_mirror_view(slice_length); + Kokkos::deep_copy(slice_length_hd, host_slice_length); + Kokkos::deep_copy(slice_length, slice_length_hd); + auto size = detail::calculate_basis_vector_size(host_slice_length); + int nvertices_target = 2; + int nsupports = 5; + Kokkos::View points("vandermonde matrix", nvertices_target, + nsupports); + + Kokkos::View vandermonde_matrix_combined( + "stores vandermonde matrix", nvertices_target, nsupports, size); + + auto host_points = Kokkos::create_mirror_view(points); + auto host_vandermonde_matrix_combined = + Kokkos::create_mirror_view(vandermonde_matrix_combined); + + host_points(0, 1) = 0.5; + host_points(0, 2) = 0.1; + host_points(0, 3) = 0.3; + host_points(0, 4) = 0.2; + host_points(0, 5) = 0.6; + + host_points(1, 1) = 1.1; + host_points(1, 2) = 2.6; + host_points(1, 3) = 0.8; + host_points(1, 4) = 0.4; + host_points(1, 5) = 1.7; + + Kokkos::deep_copy(points, host_points); + + team_policy tp(nvertices_target, Kokkos::AUTO); + Kokkos::parallel_for( + "inside team", tp.set_scratch_size(0, Kokkos::PerTeam(200)), + KOKKOS_LAMBDA(const member_type& team) { + int i = team.league_rank(); + ScratchMatView vandermonde_matrix(team.team_scratch(0), nsupports, + size); + + ScratchMatView local_source_points(team.team_scratch(0), nsupports, 1); + for (int j = 0; j < nsupports; ++j) { + local_source_points(j, 0) = points(i, j); + } + + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, nsupports), + [=](int j) { + for (int k = 0; k < size; ++k) { + vandermonde_matrix(j, k) = 0.0; + } + }); + + team.team_barrier(); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + detail::create_vandermonde_matrix(local_source_points, j, + slice_length, vandermonde_matrix); + }); + + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, nsupports), [=](int j) { + for (int k = 0; k < size; ++k) { + vandermonde_matrix_combined(i, j, k) = vandermonde_matrix(j, k); + } + }); + }); + + Kokkos::deep_copy(host_vandermonde_matrix_combined, + vandermonde_matrix_combined); + Kokkos::View expected_vandermonde_matrix( + "expected vector", nvertices_target, nsupports, size); + + for (int i = 0; i < nvertices_target; ++i) { + for (int j = 0; j < nsupports; ++j) { + double x = host_points(i, j); + expected_vandermonde_matrix(i, j, 0) = 1; + expected_vandermonde_matrix(i, j, 1) = x; // (x) + expected_vandermonde_matrix(i, j, 2) = x * x; // (x^2) + expected_vandermonde_matrix(i, j, 3) = x * x * x; // (x^3) + } + } + + for (int i = 0; i < nvertices_target; ++i) { + for (int j = 0; j < nsupports; ++j) { + for (int k = 0; k < size; ++k) { + REQUIRE(expected_vandermonde_matrix(i, j, k) == + host_vandermonde_matrix_combined(i, j, k)); + } + } + } + } +} diff --git a/test/test_ohClassPtn.cpp b/test/test_ohClassPtn.cpp index 5e653063..94c9f355 100644 --- a/test/test_ohClassPtn.cpp +++ b/test/test_ohClassPtn.cpp @@ -44,7 +44,7 @@ int main(int argc, char** argv) adios2::Params params{{"Streaming", "On"}, {"OpenTimeoutSecs", "60"}}; auto channel = rdv.CreateAdiosChannel(name, params, redev::TransportType::BP4); - auto commPair = channel.CreateComm(name,rdv.GetMPIComm()); + auto commPair = channel.CreateComm(name, rdv.GetMPIComm()); // build dest, offsets, and permutation arrays ts::OutMsg appOut = @@ -103,6 +103,6 @@ int main(int argc, char** argv) ts::checkAndAttachIds(mesh, "inVtxGids", msgs, rdvInPermute); ts::writeVtk(mesh, "rdvInGids", iter); } // end non-rdv -> rdv - } // end iter loop + } // end iter loop return 0; } diff --git a/test/test_ohClassPtn_appRibPtn.cpp b/test/test_ohClassPtn_appRibPtn.cpp index 612df09b..4179f831 100644 --- a/test/test_ohClassPtn_appRibPtn.cpp +++ b/test/test_ohClassPtn_appRibPtn.cpp @@ -55,7 +55,7 @@ int main(int argc, char** argv) adios2::Params params{{"Streaming", "On"}, {"OpenTimeoutSecs", "60"}}; auto channel = rdv.CreateAdiosChannel(name, params, redev::TransportType::BP4); - auto commPair = channel.CreateComm(name,rdv.GetMPIComm()); + auto commPair = channel.CreateComm(name, rdv.GetMPIComm()); // Build the dest, offsets, and permutation arrays for the forward // send from non-rendezvous to rendezvous. @@ -151,6 +151,6 @@ int main(int argc, char** argv) } } } // end rdv -> non-rdv - } // end iter loop + } // end iter loop return 0; } diff --git a/test/test_ohOverlap.cpp b/test/test_ohOverlap.cpp index 56d89e90..8ed9f346 100644 --- a/test/test_ohOverlap.cpp +++ b/test/test_ohOverlap.cpp @@ -12,26 +12,26 @@ namespace ts = test_support; - -//TODO - use attributes on the geometric model to -// define which model entities are in the -// buffer/blended/overlap regions. -// This is currently hardcoded for the D3D -// case in the coupling data repo. +// TODO - use attributes on the geometric model to +// define which model entities are in the +// buffer/blended/overlap regions. +// This is currently hardcoded for the D3D +// case in the coupling data repo. /** * return 1 if the specificed model entity is part of the overlap region, 0 * otherwise */ -OMEGA_H_DEVICE Omega_h::I8 isModelEntInOverlap(const int dim, const int id) { - //the TOMMS generated geometric model has - //entity IDs that increase with the distance - //from the magnetic axis - if (dim == 2 && (id >= 22 && id <= 34) ) { - return 1; - } else if (dim == 1 && (id >= 21 && id <= 34) ) { - return 1; - } else if (dim == 0 && (id >= 21 && id <= 34) ) { - return 1; +OMEGA_H_DEVICE Omega_h::I8 isModelEntInOverlap(const int dim, const int id) +{ + // the TOMMS generated geometric model has + // entity IDs that increase with the distance + // from the magnetic axis + if (dim == 2 && (id >= 22 && id <= 34)) { + return 1; + } else if (dim == 1 && (id >= 21 && id <= 34)) { + return 1; + } else if (dim == 0 && (id >= 21 && id <= 34)) { + return 1; } return 0; } @@ -41,12 +41,14 @@ OMEGA_H_DEVICE Omega_h::I8 isModelEntInOverlap(const int dim, const int id) { * vertex is classified on a model entity in the closure of the geometric model * faces forming the overlap region; the value is 0 otherwise. */ -Omega_h::Read markOverlapMeshEntities(Omega_h::Mesh& mesh) { - //transfer vtx classification to host +Omega_h::Read markOverlapMeshEntities(Omega_h::Mesh& mesh) +{ + // transfer vtx classification to host auto classIds = mesh.get_array(0, "class_id"); auto classDims = mesh.get_array(0, "class_dim"); auto isOverlap = Omega_h::Write(classIds.size(), "isOverlap"); - auto markOverlap = OMEGA_H_LAMBDA(int i) { + auto markOverlap = OMEGA_H_LAMBDA(int i) + { isOverlap[i] = isModelEntInOverlap(classDims[i], classIds[i]); }; Omega_h::parallel_for(classIds.size(), markOverlap); @@ -55,207 +57,229 @@ Omega_h::Read markOverlapMeshEntities(Omega_h::Mesh& mesh) { return isOverlap_r; } -redev::ClassPtn setupClientPartition(Omega_h::Mesh& mesh) { - ts::writeVtk(mesh,"appPartition",0); +redev::ClassPtn setupClientPartition(Omega_h::Mesh& mesh) +{ + ts::writeVtk(mesh, "appPartition", 0); auto ptn = ts::ClassificationPartition(); return redev::ClassPtn(MPI_COMM_WORLD, ptn.ranks, ptn.modelEnts); } -redev::ClassPtn setupServerPartition(Omega_h::Mesh& mesh, std::string_view cpnFileName) { +redev::ClassPtn setupServerPartition(Omega_h::Mesh& mesh, + std::string_view cpnFileName) +{ auto ohComm = mesh.comm(); - const auto facePartition = !ohComm->rank() ? ts::readClassPartitionFile(cpnFileName) : - ts::ClassificationPartition(); + const auto facePartition = !ohComm->rank() + ? ts::readClassPartitionFile(cpnFileName) + : ts::ClassificationPartition(); ts::migrateMeshElms(mesh, facePartition); auto ptn = ts::CreateClassificationPartition(mesh); return redev::ClassPtn(MPI_COMM_WORLD, ptn.ranks, ptn.modelEnts); } -Omega_h::HostRead markMeshOverlapRegion(Omega_h::Mesh& mesh) { +Omega_h::HostRead markMeshOverlapRegion(Omega_h::Mesh& mesh) +{ auto isOverlap = markOverlapMeshEntities(mesh); return Omega_h::HostRead(isOverlap); } void clientCheckIncomingMessages(Omega_h::Mesh& mesh, - Omega_h::HostRead isOverlap_h, - const std::vector& msgsIn, - const ts::OutMsg& appOut) { - //check incoming messages are in the correct order + Omega_h::HostRead isOverlap_h, + const std::vector& msgsIn, + const ts::OutMsg& appOut) +{ + // check incoming messages are in the correct order auto gids = mesh.globals(0); auto gids_h = Omega_h::HostRead(gids); - int j=0; - for(size_t i=0; irank(); - if(argc != 4) { - if(!rank) { - std::cerr << "Usage: " << argv[0] << " <1=isRendezvousApp,0=isParticipant> /path/to/omega_h/mesh /path/to/partitionFile.cpn\n"; + if (argc != 4) { + if (!rank) { + std::cerr << "Usage: " << argv[0] + << " <1=isRendezvousApp,0=isParticipant> /path/to/omega_h/mesh " + "/path/to/partitionFile.cpn\n"; } exit(EXIT_FAILURE); } OMEGA_H_CHECK(argc == 4); const auto isRdv = atoi(argv[1]); - REDEV_ALWAYS_ASSERT(isRdv==1 || isRdv==0); + REDEV_ALWAYS_ASSERT(isRdv == 1 || isRdv == 0); printf("VERSION: %s\n", pcms::pcms_version()); Omega_h::Mesh mesh(&lib); Omega_h::binary::read(argv[2], lib.world(), &mesh); const std::string name = "meshVtxIds"; - if(isRdv) { + if (isRdv) { /////////////////// SERVER ///////////////////////// std::string_view cpnFileName(argv[3]); - auto partition = setupServerPartition(mesh,cpnFileName); - auto rdv = redev::Redev(MPI_COMM_WORLD,redev::Partition{partition},static_cast(isRdv)); - adios2::Params params{ {"Streaming", "On"}, {"OpenTimeoutSecs", "60"}}; - auto channel = rdv.CreateAdiosChannel(std::string(name),params,redev::TransportType::BP4); - auto comm = channel.CreateComm(std::string(name),rdv.GetMPIComm()); + auto partition = setupServerPartition(mesh, cpnFileName); + auto rdv = redev::Redev(MPI_COMM_WORLD, redev::Partition{partition}, + static_cast(isRdv)); + adios2::Params params{{"Streaming", "On"}, {"OpenTimeoutSecs", "60"}}; + auto channel = rdv.CreateAdiosChannel(std::string(name), params, + redev::TransportType::BP4); + auto comm = + channel.CreateComm(std::string(name), rdv.GetMPIComm()); auto isOverlap_h = markMeshOverlapRegion(mesh); - ts::OutMsg appOut = ts::OutMsg(); //FIXME is this needed? - //TODO - Document why rendezvous needs two permutations but the app does not + ts::OutMsg appOut = ts::OutMsg(); // FIXME is this needed? + // TODO - Document why rendezvous needs two permutations but the app does + // not redev::GOs rdvInPermute; ts::CSR rdvOutPermute; ts::OutMsg rdvOut; ////////////////////////////////////////////////////// - //the non-rendezvous app sends global vtx ids to rendezvous + // the non-rendezvous app sends global vtx ids to rendezvous ////////////////////////////////////////////////////// auto start = std::chrono::steady_clock::now(); channel.BeginReceiveCommunicationPhase(); const auto msgsIn = comm.Recv(redev::Mode::Synchronous); channel.EndReceiveCommunicationPhase(); - ts::getAndPrintTime(start,name + " rdvRead",rank); - //We have received the first input message in the rendezvous - //processes. Using the meta data of the incoming message we will: + ts::getAndPrintTime(start, name + " rdvRead", rank); + // We have received the first input message in the rendezvous + // processes. Using the meta data of the incoming message we will: //- compute the permutation from the incoming vertex global ids to the - // on-process global ids - //- set the message layout for the reverse (rendezvous->non-rendezvous) send by - // building the dest and offsets array. + // on-process global ids + //- set the message layout for the reverse (rendezvous->non-rendezvous) send + //by + // building the dest and offsets array. //- compute the reverse send's permutation array using the layout of - // global vertex ids in 'msgsIn'. - //These operations only need to be done once per coupling as long as - //the topology and partition of the rendezvous and non-rendezvous meshes - //remains the same. + // global vertex ids in 'msgsIn'. + // These operations only need to be done once per coupling as long as + // the topology and partition of the rendezvous and non-rendezvous meshes + // remains the same. auto rdvIn = comm.GetInMessageLayout(); rdvInPermute = ts::getRdvPermutation(mesh, msgsIn); - rdvOut = ts::prepareRdvOutMessage(mesh,rdvIn); - comm.SetOutMessageLayout(rdvOut.dest,rdvOut.offset); + rdvOut = ts::prepareRdvOutMessage(mesh, rdvIn); + comm.SetOutMessageLayout(rdvOut.dest, rdvOut.offset); rdvOutPermute = ts::getRdvOutPermutation(mesh, msgsIn); - //attach ids to the mesh + // attach ids to the mesh ts::checkAndAttachIds(mesh, "inVtxGids", msgsIn, rdvInPermute); - ts::writeVtk(mesh,"rdvInGids",0); + ts::writeVtk(mesh, "rdvInGids", 0); ////////////////////////////////////////////////////// - //the rendezvous app sends global vtx ids to the client + // the rendezvous app sends global vtx ids to the client ////////////////////////////////////////////////////// - //fill message array + // fill message array auto gids = mesh.globals(0); auto gids_h = Omega_h::HostRead(gids); redev::GOs msgs(rdvOutPermute.off.back()); - for(int i=0; i(isRdv)); - adios2::Params params{ {"Streaming", "On"}, {"OpenTimeoutSecs", "60"}}; - auto channel = rdv.CreateAdiosChannel(std::string(name),params,redev::TransportType::BP4); - auto comm = channel.CreateComm(std::string(name),rdv.GetMPIComm()); + auto rdv = + redev::Redev(MPI_COMM_WORLD, redev::Partition{std::move(partition)}, + static_cast(isRdv)); + adios2::Params params{{"Streaming", "On"}, {"OpenTimeoutSecs", "60"}}; + auto channel = rdv.CreateAdiosChannel(std::string(name), params, + redev::TransportType::BP4); + auto comm = + channel.CreateComm(std::string(name), rdv.GetMPIComm()); auto isOverlap_h = markMeshOverlapRegion(mesh); ////////////////////////////////////////////////////// - //the non-rendezvous app sends global vtx ids to rendezvous + // the non-rendezvous app sends global vtx ids to rendezvous ////////////////////////////////////////////////////// - ts::OutMsg appOut = ts::prepareAppOutMessage(mesh, std::get(rdv.GetPartition())); - //Build the dest, offsets, and permutation arrays for the forward - //send from client to rendezvous/server. - comm.SetOutMessageLayout(appOut.dest, appOut.offset); //TODO - can this be moved to the AdiosComm ctor - //fill message array + ts::OutMsg appOut = ts::prepareAppOutMessage( + mesh, std::get(rdv.GetPartition())); + // Build the dest, offsets, and permutation arrays for the forward + // send from client to rendezvous/server. + comm.SetOutMessageLayout( + appOut.dest, + appOut.offset); // TODO - can this be moved to the AdiosComm ctor + // fill message array auto gids = mesh.globals(0); auto gids_h = Omega_h::HostRead(gids); - redev::GOs msgs(isOverlap_h.size(),0); - int j=0; - for(pcms::LO i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace Omega_h; +using namespace pcms; + +KOKKOS_INLINE_FUNCTION +double func(Coord& p, int degree) +{ + [[maybe_unused]] auto x = p.x; + [[maybe_unused]] auto y = p.y; + if (degree == 0) { + return 3; + } else if (degree == 1) { + return x + y; + } else if (degree == 2) { + return pow(x, 2) + pow(y, 2); + } else if (degree == 3) { + + return pow(x, 3) + pow(y, 3); + } else { + printf("No polynomials with degree = %d\n", degree); + } + return -1; +} + +void test(Mesh& mesh, Real cutoffDistance, int degree, LO min_num_supports, + Reals source_values, Reals exact_target_values, + Reals source_coordinates, Reals target_coordinates) +{ + + int dim = mesh.dim(); + Real tolerance = 0.0005; + + std::vector rbf_types = { + RadialBasisFunction::RBF_GAUSSIAN, RadialBasisFunction::RBF_C4, + RadialBasisFunction::RBF_CONST + + }; + + SupportResults support = + searchNeighbors(mesh, cutoffDistance, min_num_supports); + + for (const auto& rbf : rbf_types) { + auto approx_target_values = + mls_interpolation(source_values, source_coordinates, target_coordinates, + support, dim, degree, rbf); + + auto host_approx_target_values = HostRead(approx_target_values); + + auto host_exact_target_values = HostRead(exact_target_values); + + int m = exact_target_values.size(); + int n = approx_target_values.size(); + + REQUIRE(m == n); + + for (size_t i = 0; i < m; ++i) { + CHECK_THAT( + host_exact_target_values[i], + Catch::Matchers::WithinAbs(host_approx_target_values[i], tolerance)); + } + } +} +// Test cases for centroid to node mapping using MLS +TEST_CASE("mls_interp_test") +{ + + auto lib = Library{}; + auto world = lib.world(); + auto rank = lib.world()->rank(); + auto mesh = build_box(world, OMEGA_H_SIMPLEX, 1, 1, 1, 10, 10, 0, false); + + Real cutoffDistance = 0.3; + cutoffDistance = cutoffDistance * cutoffDistance; + + const auto dim = mesh.dim(); + + const auto& target_coordinates = mesh.coords(); + + const auto& nfaces = mesh.nfaces(); + + const auto& ntargets = mesh.nverts(); + + Write source_coordinates( + dim * nfaces, 0, "stores coordinates of cell centroid of each tri element"); + + const auto& faces2nodes = mesh.ask_down(FACE, VERT).ab2b; + + Kokkos::parallel_for( + "calculate the centroid in each tri element", nfaces, + OMEGA_H_LAMBDA(const LO id) { + const auto current_el_verts = gather_verts<3>(faces2nodes, id); + const Omega_h::Few, 3> current_el_vert_coords = + gather_vectors<3, 2>(target_coordinates, current_el_verts); + auto centroid = average(current_el_vert_coords); + int index = 2 * id; + source_coordinates[index] = centroid[0]; + source_coordinates[index + 1] = centroid[1]; + }); + + Points source_points; + source_points.coordinates = + PointsViewType("Number of local source supports", nfaces); + Kokkos::parallel_for( + "target points", nfaces, KOKKOS_LAMBDA(int j) { + source_points.coordinates(j).x = source_coordinates[j * dim]; + source_points.coordinates(j).y = source_coordinates[j * dim + 1]; + }); + + Points target_points; + + target_points.coordinates = + PointsViewType("Number of local source supports", mesh.nverts()); + Kokkos::parallel_for( + "target points", mesh.nverts(), KOKKOS_LAMBDA(int j) { + target_points.coordinates(j).x = target_coordinates[j * dim]; + target_points.coordinates(j).y = target_coordinates[j * dim + 1]; + }); + + SECTION("test interpolation degree 1, function degree 0") + { + + int degree = 1; + LO min_num_supports = 10; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), degree - 1); + }); + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func(target_points.coordinates(i), degree - 1); + }); + + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); + } + + SECTION("test interpolation degree 1, function degree 1") + { + + int degree = 1; + LO min_num_supports = 10; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), degree); + }); + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func(target_points.coordinates(i), degree); + }); + + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); + } + + SECTION("test interpo degree 2 poly degree 0") + { + + int degree = 2; + LO min_num_supports = 16; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), degree - 2); + }); + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func(target_points.coordinates(i), degree - 2); + }); + + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); + } + + SECTION("test interpolation degree 2 poly degree 1") + { + + int degree = 2; + LO min_num_supports = 16; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), degree - 1); + }); + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func(target_points.coordinates(i), degree - 1); + }); + + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); + } + + SECTION("test interpolation degree 2, function degree 2") + { + + int degree = 2; + LO min_num_supports = 16; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), degree); + }); + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func(target_points.coordinates(i), degree); + }); + + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); + } + + SECTION("test interpolation degree 3, function degree 2") + { + + int degree = 3; + LO min_num_supports = 20; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), degree - 1); + }); + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func(target_points.coordinates(i), degree - 1); + }); + + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); + } + + SECTION("test interpolation degree 3, function degree 3") + { + + int degree = 3; + LO min_num_supports = 20; + + Write source_values(nfaces, 0, "exact target values"); + + Kokkos::parallel_for( + nfaces, KOKKOS_LAMBDA(int i) { + source_values[i] = func(source_points.coordinates(i), degree); + }); + + Write exact_target_values(mesh.nverts(), 0, "exact target values"); + + Kokkos::parallel_for( + mesh.nverts(), KOKKOS_LAMBDA(int i) { + exact_target_values[i] = func(target_points.coordinates(i), degree); + }); + + test(mesh, cutoffDistance, degree, min_num_supports, Reals(source_values), + Reals(exact_target_values), Reals(source_coordinates), + Reals(target_coordinates)); + } +} diff --git a/test/test_twoClientOverlap.cpp b/test/test_twoClientOverlap.cpp index 77dc1ea2..52e6506b 100644 --- a/test/test_twoClientOverlap.cpp +++ b/test/test_twoClientOverlap.cpp @@ -11,26 +11,26 @@ namespace ts = test_support; - -//TODO - use attributes on the geometric model to -// define which model entities are in the -// buffer/blended/overlap regions. -// This is currently hardcoded for the D3D -// case in the coupling data repo. +// TODO - use attributes on the geometric model to +// define which model entities are in the +// buffer/blended/overlap regions. +// This is currently hardcoded for the D3D +// case in the coupling data repo. /** * return 1 if the specificed model entity is part of the overlap region, 0 * otherwise */ -OMEGA_H_DEVICE Omega_h::I8 isModelEntInOverlap(const int dim, const int id) { - //the TOMMS generated geometric model has - //entity IDs that increase with the distance - //from the magnetic axis - if (dim == 2 && (id >= 22 && id <= 34) ) { - return 1; - } else if (dim == 1 && (id >= 21 && id <= 34) ) { - return 1; - } else if (dim == 0 && (id >= 21 && id <= 34) ) { - return 1; +OMEGA_H_DEVICE Omega_h::I8 isModelEntInOverlap(const int dim, const int id) +{ + // the TOMMS generated geometric model has + // entity IDs that increase with the distance + // from the magnetic axis + if (dim == 2 && (id >= 22 && id <= 34)) { + return 1; + } else if (dim == 1 && (id >= 21 && id <= 34)) { + return 1; + } else if (dim == 0 && (id >= 21 && id <= 34)) { + return 1; } return 0; } @@ -40,12 +40,14 @@ OMEGA_H_DEVICE Omega_h::I8 isModelEntInOverlap(const int dim, const int id) { * vertex is classified on a model entity in the closure of the geometric model * faces forming the overlap region; the value is 0 otherwise. */ -Omega_h::Read markOverlapMeshEntities(Omega_h::Mesh& mesh) { - //transfer vtx classification to host +Omega_h::Read markOverlapMeshEntities(Omega_h::Mesh& mesh) +{ + // transfer vtx classification to host auto classIds = mesh.get_array(0, "class_id"); auto classDims = mesh.get_array(0, "class_dim"); auto isOverlap = Omega_h::Write(classIds.size(), "isOverlap"); - auto markOverlap = OMEGA_H_LAMBDA(int i) { + auto markOverlap = OMEGA_H_LAMBDA(int i) + { isOverlap[i] = isModelEntInOverlap(classDims[i], classIds[i]); }; Omega_h::parallel_for(classIds.size(), markOverlap); @@ -54,164 +56,188 @@ Omega_h::Read markOverlapMeshEntities(Omega_h::Mesh& mesh) { return isOverlap_r; } -redev::ClassPtn setupClientPartition(Omega_h::Mesh& mesh) { - ts::writeVtk(mesh,"appPartition",0); +redev::ClassPtn setupClientPartition(Omega_h::Mesh& mesh) +{ + ts::writeVtk(mesh, "appPartition", 0); auto ptn = ts::ClassificationPartition(); return redev::ClassPtn(MPI_COMM_WORLD, ptn.ranks, ptn.modelEnts); } -redev::ClassPtn setupServerPartition(Omega_h::Mesh& mesh, std::string_view cpnFileName) { +redev::ClassPtn setupServerPartition(Omega_h::Mesh& mesh, + std::string_view cpnFileName) +{ auto ohComm = mesh.comm(); - const auto facePartition = !ohComm->rank() ? ts::readClassPartitionFile(cpnFileName) : - ts::ClassificationPartition(); + const auto facePartition = !ohComm->rank() + ? ts::readClassPartitionFile(cpnFileName) + : ts::ClassificationPartition(); ts::migrateMeshElms(mesh, facePartition); auto ptn = ts::CreateClassificationPartition(mesh); return redev::ClassPtn(MPI_COMM_WORLD, ptn.ranks, ptn.modelEnts); } -auto setupComms(redev::Redev& rdv, std::string clientName, const int clientId) { +auto setupComms(redev::Redev& rdv, std::string clientName, const int clientId) +{ const bool isSST = false; - adios2::Params params{ {"Streaming", "On"}, {"OpenTimeoutSecs", "60"}}; - REDEV_ALWAYS_ASSERT(clientId == 0 || clientId ==1); - return rdv.CreateAdiosChannel(clientName, params,static_cast(isSST) ); + adios2::Params params{{"Streaming", "On"}, {"OpenTimeoutSecs", "60"}}; + REDEV_ALWAYS_ASSERT(clientId == 0 || clientId == 1); + return rdv.CreateAdiosChannel(clientName, params, + static_cast(isSST)); } -Omega_h::HostRead markMeshOverlapRegion(Omega_h::Mesh& mesh) { +Omega_h::HostRead markMeshOverlapRegion(Omega_h::Mesh& mesh) +{ auto isOverlap = markOverlapMeshEntities(mesh); return Omega_h::HostRead(isOverlap); } void clientCheckIncomingMessages(Omega_h::Mesh& mesh, - Omega_h::HostRead isOverlap_h, - const std::vector& msgsIn, - const ts::OutMsg& appOut) { - //check incoming messages are in the correct order + Omega_h::HostRead isOverlap_h, + const std::vector& msgsIn, + const ts::OutMsg& appOut) +{ + // check incoming messages are in the correct order auto gids = mesh.globals(0); auto gids_h = Omega_h::HostRead(gids); - int j=0; - for(size_t i=0; irank(); - if(!rank) fprintf(stderr, "clientId %d\n", clientId); + if (!rank) + fprintf(stderr, "clientId %d\n", clientId); auto partition = setupClientPartition(mesh); - auto rdv = redev::Redev(MPI_COMM_WORLD,redev::Partition{std::move(partition)},static_cast(isRdv)); + auto rdv = + redev::Redev(MPI_COMM_WORLD, redev::Partition{std::move(partition)}, + static_cast(isRdv)); std::stringstream clientName; clientName << fieldName << "Client" << clientId; - auto channel = setupComms(rdv,clientName.str(),clientId); + auto channel = setupComms(rdv, clientName.str(), clientId); auto comm = channel.CreateComm(clientName.str(), rdv.GetMPIComm()); auto isOverlap_h = markMeshOverlapRegion(mesh); ////////////////////////////////////////////////////// - //the non-rendezvous app sends global vtx ids to rendezvous + // the non-rendezvous app sends global vtx ids to rendezvous ////////////////////////////////////////////////////// - ts::OutMsg appOut = ts::prepareAppOutMessage(mesh, std::get(rdv.GetPartition())); - //Build the dest, offsets, and permutation arrays for the forward - //send from client to rendezvous/server. - comm.SetOutMessageLayout(appOut.dest, appOut.offset); //TODO - can this be moved to the AdiosComm ctor - //fill message array + ts::OutMsg appOut = ts::prepareAppOutMessage( + mesh, std::get(rdv.GetPartition())); + // Build the dest, offsets, and permutation arrays for the forward + // send from client to rendezvous/server. + comm.SetOutMessageLayout( + appOut.dest, + appOut.offset); // TODO - can this be moved to the AdiosComm ctor + // fill message array auto gids = mesh.globals(0); auto gids_h = Omega_h::HostRead(gids); - redev::GOs msgs(isOverlap_h.size(),0); - int j=0; - for(size_t i=0; i& comm, Omega_h::Mesh& mesh, - std::string_view fieldName, const int rank, const int clientId) { +void serverReceiveFromClient(ClientMetaData& clientMeta, + redev::Channel& channel, + redev::BidirectionalComm& comm, + Omega_h::Mesh& mesh, std::string_view fieldName, + const int rank, const int clientId) +{ std::stringstream ss; ss << fieldName << " rdvRead clientId " << clientId; auto start = std::chrono::steady_clock::now(); channel.BeginReceiveCommunicationPhase(); const auto msgsIn = comm.Recv(redev::Mode::Synchronous); channel.EndReceiveCommunicationPhase(); - ts::getAndPrintTime(start,ss.str(),rank); + ts::getAndPrintTime(start, ss.str(), rank); auto rdvIn = comm.GetInMessageLayout(); - //setup outbound meta data - // FIXME: this assumes we know the correct order of the data already! - // If we transfer data/msg that's not GID we can't do this comparison on the - // message directly, so the global ids may need to be sent during an initialization - // phase + // setup outbound meta data + // FIXME: this assumes we know the correct order of the data already! + // If we transfer data/msg that's not GID we can't do this comparison on the + // message directly, so the global ids may need to be sent during an + // initialization phase clientMeta.inPermute = ts::getRdvPermutation(mesh, msgsIn); - clientMeta.outMsg = ts::prepareRdvOutMessage(mesh,rdvIn); + clientMeta.outMsg = ts::prepareRdvOutMessage(mesh, rdvIn); // FIXME is this necessary to set the outgoing message layout here? The // reciever shouldn't need to know the layout of the sender - comm.SetOutMessageLayout(clientMeta.outMsg.dest,clientMeta.outMsg.offset); + comm.SetOutMessageLayout(clientMeta.outMsg.dest, clientMeta.outMsg.offset); clientMeta.outPermute = ts::getRdvOutPermutation(mesh, msgsIn); - //attach ids to the mesh + // attach ids to the mesh ss.str(""); ss << "inVtxGidsClientId" << clientId; ts::checkAndAttachIds(mesh, ss.str(), msgsIn, clientMeta.inPermute); } void serverSendToClient(ClientMetaData& clientMeta, redev::Channel& channel, - redev::BidirectionalComm& comm, Omega_h::Mesh& mesh, - Omega_h::HostRead& isOverlap_h, - std::string_view fieldName, const int rank, const int clientId) { - //fill message array + redev::BidirectionalComm& comm, + Omega_h::Mesh& mesh, + Omega_h::HostRead& isOverlap_h, + std::string_view fieldName, const int rank, + const int clientId) +{ + // fill message array auto gids = mesh.globals(0); auto gids_h = Omega_h::HostRead(gids); redev::GOs msgs(clientMeta.outPermute.off.back()); - for(int i=0; irank(); - auto partition = setupServerPartition(mesh,cpnFileName); - auto rdv = redev::Redev(MPI_COMM_WORLD,std::move(partition),static_cast(isRdv)); + auto partition = setupServerPartition(mesh, cpnFileName); + auto rdv = redev::Redev(MPI_COMM_WORLD, std::move(partition), + static_cast(isRdv)); std::stringstream clientName0; clientName0 << fieldName << "Client0"; std::stringstream clientName1; clientName1 << fieldName << "Client1"; - auto channel0 = setupComms(rdv,clientName0.str(),0); - auto channel1 = setupComms(rdv,clientName1.str(),1); + auto channel0 = setupComms(rdv, clientName0.str(), 0); + auto channel1 = setupComms(rdv, clientName1.str(), 1); - auto commClient0 = channel0.CreateComm(clientName0.str(), rdv.GetMPIComm()); - auto commClient1 = channel1.CreateComm(clientName1.str(), rdv.GetMPIComm()); + auto commClient0 = + channel0.CreateComm(clientName0.str(), rdv.GetMPIComm()); + auto commClient1 = + channel1.CreateComm(clientName1.str(), rdv.GetMPIComm()); auto isOverlap_h = markMeshOverlapRegion(mesh); - //TODO - Document why rendezvous needs two permutations but the app does not + // TODO - Document why rendezvous needs two permutations but the app does not ClientMetaData client0; ClientMetaData client1; - serverReceiveFromClient(client0,channel0,commClient0,mesh,fieldName,rank,0); - serverReceiveFromClient(client1,channel1,commClient1,mesh,fieldName,rank,1); - ts::writeVtk(mesh,"rdvInGids",0); + serverReceiveFromClient(client0, channel0, commClient0, mesh, fieldName, rank, + 0); + serverReceiveFromClient(client1, channel1, commClient1, mesh, fieldName, rank, + 1); + ts::writeVtk(mesh, "rdvInGids", 0); - serverSendToClient(client0,channel0,commClient0,mesh,isOverlap_h,fieldName,rank,0); - serverSendToClient(client1,channel1,commClient1,mesh,isOverlap_h,fieldName,rank,1); + serverSendToClient(client0, channel0, commClient0, mesh, isOverlap_h, + fieldName, rank, 0); + serverSendToClient(client1, channel1, commClient1, mesh, isOverlap_h, + fieldName, rank, 1); ////////////////////////////////////////////////////// - //communication loop + // communication loop ////////////////////////////////////////////////////// - for(int iter=0; iter<3; iter++) { - if(!rank) fprintf(stderr, "isRdv %d iter %d\n", isRdv, iter); - //receive from clients + for (int iter = 0; iter < 3; iter++) { + if (!rank) + fprintf(stderr, "isRdv %d iter %d\n", isRdv, iter); + // receive from clients auto start = std::chrono::steady_clock::now(); const auto msgsIn0 = commClient0.Recv(redev::Mode::Synchronous); - ts::getAndPrintTime(start,fieldName + " rdvRead clientId 0",rank); + ts::getAndPrintTime(start, fieldName + " rdvRead clientId 0", rank); ts::checkAndAttachIds(mesh, "inVtxGidsClient0", msgsIn0, client0.inPermute); start = std::chrono::steady_clock::now(); const auto msgsIn1 = commClient1.Recv(redev::Mode::Synchronous); - ts::getAndPrintTime(start,fieldName + " rdvRead clientId 1",rank); + ts::getAndPrintTime(start, fieldName + " rdvRead clientId 1", rank); ts::checkAndAttachIds(mesh, "inVtxGidsClient1", msgsIn1, client1.inPermute); - //send to clients - serverSendToClient(client0,channel0,commClient0,mesh,isOverlap_h,fieldName,rank,0); - serverSendToClient(client1,channel1,commClient1,mesh,isOverlap_h,fieldName,rank,1); - } //end iter loop + // send to clients + serverSendToClient(client0, channel0, commClient0, mesh, isOverlap_h, + fieldName, rank, 0); + serverSendToClient(client1, channel1, commClient1, mesh, isOverlap_h, + fieldName, rank, 1); + } // end iter loop } - -int main(int argc, char** argv) { +int main(int argc, char** argv) +{ auto lib = Omega_h::Library(&argc, &argv); auto world = lib.world(); const int rank = world->rank(); - if(argc != 4) { - if(!rank) { - std::cerr << "Usage: " << argv[0] << " /path/to/omega_h/mesh /path/to/partitionFile.cpn\n"; + if (argc != 4) { + if (!rank) { + std::cerr << "Usage: " << argv[0] + << " /path/to/omega_h/mesh " + "/path/to/partitionFile.cpn\n"; } exit(EXIT_FAILURE); } OMEGA_H_CHECK(argc == 4); const auto clientId = atoi(argv[1]); - REDEV_ALWAYS_ASSERT(clientId >= -1 && clientId <=1); + REDEV_ALWAYS_ASSERT(clientId >= -1 && clientId <= 1); const auto meshFile = argv[2]; const auto classPartitionFile = argv[3]; Omega_h::Mesh mesh(&lib); Omega_h::binary::read(meshFile, lib.world(), &mesh); const std::string name = "meshVtxIds"; - if(clientId == -1) { //rendezvous - server(mesh,name,classPartitionFile); + if (clientId == -1) { // rendezvous + server(mesh, name, classPartitionFile); } else { - client(mesh,name,clientId); + client(mesh, name, clientId); } return 0; } diff --git a/tools/CpnFromOsh.cpp b/tools/CpnFromOsh.cpp index 360731a8..9bcd33be 100644 --- a/tools/CpnFromOsh.cpp +++ b/tools/CpnFromOsh.cpp @@ -41,15 +41,14 @@ int main(int argc, char** argv) // roughly how many verts per part const auto verts_per_part = total_verts_in_faces / nparts; - std::cout << nfaces <<"\n"; - int idx=0; - for(int i=0; i verts_per_part*(idx+1)) { - idx++; - } + std::cout << nfaces << "\n"; + int idx = 0; + for (int i = 0; i < nfaces; ++i) { + std::cout << i + 1 << " " << idx << "\n"; + if (verts_per_face[i] > verts_per_part * (idx + 1)) { + idx++; + } } - return 0; } diff --git a/tools/XgcRCfromOsh.cpp b/tools/XgcRCfromOsh.cpp index 35d05fa7..eb84e022 100644 --- a/tools/XgcRCfromOsh.cpp +++ b/tools/XgcRCfromOsh.cpp @@ -26,10 +26,10 @@ int main(int argc, char** argv) } if (Omega_h::is(tag)) { std::cout << pcms::ConstructRCFromOmegaHMesh(mesh, numbering, - index_base); + index_base); } else if (Omega_h::is(tag)) { std::cout << pcms::ConstructRCFromOmegaHMesh(mesh, numbering, - index_base); + index_base); } else { std::cerr << "IDs should be either be LO or GO\n"; std::abort();